[to #9285266] tts pipeline using model id params

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9285266 Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9285266
2026-02-24 20:19:51 +01:00 · 2022-07-06 13:20:04 +08:00
parent 793b35d56d
commit a84c80cd57
2 changed files with 17 additions and 34 deletions
--- a/modelscope/pipelines/audio/text_to_speech_pipeline.py
+++ b/modelscope/pipelines/audio/text_to_speech_pipeline.py
@@ -9,7 +9,8 @@ from modelscope.models.audio.tts.am import SambertNetHifi16k
 from modelscope.models.audio.tts.vocoder import Hifigan16k
 from modelscope.pipelines.base import Pipeline
 from modelscope.pipelines.builder import PIPELINES
-from modelscope.preprocessors import TextToTacotronSymbols, build_preprocessor
+from modelscope.preprocessors import (Preprocessor, TextToTacotronSymbols,
+                                      build_preprocessor)
 from modelscope.utils.constant import Fields, Tasks

 __all__ = ['TextToSpeechSambertHifigan16kPipeline']
@@ -20,19 +21,19 @@ __all__ = ['TextToSpeechSambertHifigan16kPipeline']
 class TextToSpeechSambertHifigan16kPipeline(Pipeline):

    def __init__(self,
-                 config_file: str = None,
-                 model: List[Model] = None,
-                 preprocessor: TextToTacotronSymbols = None,
+                 model: List[str] = None,
+                 preprocessor: Preprocessor = None,
                 **kwargs):
-        super().__init__(
-            config_file=config_file,
-            model=model,
-            preprocessor=preprocessor,
-            **kwargs)
-        assert len(model) == 2, 'model number should be 2'
-        self._am = model[0]
-        self._vocoder = model[1]
-        self._preprocessor = preprocessor
+        assert len(model) == 3, 'model number should be 3'
+        if preprocessor is None:
+            lang_type = 'pinyin'
+            if 'lang_type' in kwargs:
+                lang_type = kwargs.lang_type
+            preprocessor = TextToTacotronSymbols(model[0], lang_type=lang_type)
+        models = [model[1], model[2]]
+        super().__init__(model=models, preprocessor=preprocessor, **kwargs)
+        self._am = self.models[0]
+        self._vocoder = self.models[1]

    def forward(self, inputs: Dict[str, Any]) -> Dict[str, np.ndarray]:
        texts = inputs['texts']
--- a/tests/pipelines/test_text_to_speech.py
+++ b/tests/pipelines/test_text_to_speech.py
@@ -1,6 +1,5 @@
 import unittest

-import tensorflow as tf
 # NOTICE: Tensorflow 1.15 seems not so compatible with pytorch.
 #         A segmentation fault may be raise by pytorch cpp library
 #         if 'import tensorflow' in front of 'import torch'.
@@ -16,6 +15,8 @@ from modelscope.utils.constant import Fields, Tasks
 from modelscope.utils.logger import get_logger
 from modelscope.utils.test_utils import test_level

+import tensorflow as tf  # isort:skip
+
 logger = get_logger()


@@ -23,33 +24,14 @@ class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase):

    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_pipeline(self):
-        lang_type = 'pinyin'
        text = '明天天气怎么样'
        preprocessor_model_id = 'damo/speech_binary_tts_frontend_resource'
        am_model_id = 'damo/speech_sambert16k_tts_zhitian_emo'
        voc_model_id = 'damo/speech_hifigan16k_tts_zhitian_emo'
-
-        cfg_preprocessor = dict(
-            type=Preprocessors.text_to_tacotron_symbols,
-            model_name=preprocessor_model_id,
-            lang_type=lang_type)
-        preprocessor = build_preprocessor(cfg_preprocessor, Fields.audio)
-        self.assertTrue(preprocessor is not None)
-
-        am = Model.from_pretrained(am_model_id)
-        self.assertTrue(am is not None)
-
-        voc = Model.from_pretrained(voc_model_id)
-        self.assertTrue(voc is not None)
-
        sambert_tts = pipeline(
            task=Tasks.text_to_speech,
-            pipeline_name=Pipelines.sambert_hifigan_16k_tts,
-            config_file='',
-            model=[am, voc],
-            preprocessor=preprocessor)
+            model=[preprocessor_model_id, am_model_id, voc_model_id])
        self.assertTrue(sambert_tts is not None)
-
        output = sambert_tts(text)
        self.assertTrue(len(output['output']) > 0)
        write('output.wav', 16000, output['output'])