modify format of itn_pipeline

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11257394 * dev for asr itn inference pipeline * add task interface * add pipeline input * add modemodelscope/pipelines/audio/itn_inference_pipeline.py * add modelscope/pipelines/audio/itn_inference_pipeline.py * modelscope/pipelines/audio/itn_inference_pipeline.py * update modelscope/pipelines/audio/itn_inference_pipeline.py * modify itn_inference_pipeline.py * modify itn_inference_pipeline.py * modify itn_inference_pipeline.py * remove itn.py * modify some names * add modify itn_inference_pipeline.py * modify itn_inference_pipeline.py * modify itn_inference_pipeline.py * modify itn_inference_pipeline.py * modify itn * add tests/pipelines/test_inverse_text_processing.py * modify asr_inference_pipeline.py for the original files * modify format * add commits files * Merge remote-tracking branch 'origin' into remotes/origin/asr/itn_nichongjia * Merge remote-tracking branch 'origin' into remotes/origin/asr/itn_nichongjia * modify the pipelines * Merge branch 'master' into remotes/origin/asr/itn_nichongjia * [to #47031187]fix: hub test suites can not parallel Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11276872 * [to #47031187]fix: hub test suites can not parallel * google style docs and selected file generator ref: https://yuque.alibaba-inc.com/pai/rwqgvl/go8sc8tqzeqqfmsz Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11150212 * google style docs and selected file generator * merge * Merge remote-tracking branch 'origin' into remotes/origin/asr/itn_nichongjia * Merge branch 'master' into remotes/origin/asr/itn_nichongjia * add requirements for fun_text_processing
2025-12-16 16:27:45 +01:00 · 2023-01-05 16:36:17 +08:00
parent 2260dd45fa
commit ac53ce3e36
11 changed files with 267 additions and 3 deletions
--- a/modelscope/metainfo.py
+++ b/modelscope/metainfo.py
@@ -119,6 +119,7 @@ class Models(object):
    kws_kwsbp = 'kws-kwsbp'
    generic_asr = 'generic-asr'
    wenet_asr = 'wenet-asr'
+    generic_itn = 'generic-itn'

    # multi-modal models
    ofa = 'ofa'
@@ -327,6 +328,7 @@ class Pipelines(object):
    kws_kwsbp = 'kws-kwsbp'
    asr_inference = 'asr-inference'
    asr_wenet_inference = 'asr-wenet-inference'
+    itn_inference = 'itn-inference'

    # multi-modal tasks
    image_captioning = 'image-captioning'
--- a/modelscope/models/audio/init.py
+++ b/modelscope/models/audio/init.py
@@ -1,3 +1,3 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.

-from . import ans, asr, kws, tts
+from . import ans, asr, itn, kws, tts
--- a/modelscope/models/audio/itn/init.py
+++ b/modelscope/models/audio/itn/init.py
@@ -0,0 +1,22 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .generic_inverse_text_processing import GenericInverseTextProcessing
+
+else:
+    _import_structure = {
+        'generic_inverse_text_processing': ['GenericInverseTextProcessing'],
+    }
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
--- a/modelscope/models/audio/itn/generic_inverse_text_processing.py
+++ b/modelscope/models/audio/itn/generic_inverse_text_processing.py
@@ -0,0 +1,44 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+from typing import Any, Dict
+
+from modelscope.metainfo import Models
+from modelscope.models.base import Model
+from modelscope.models.builder import MODELS
+from modelscope.utils.constant import Frameworks, Tasks
+
+
+@MODELS.register_module(
+    Tasks.inverse_text_processing, module_name=Models.generic_itn)
+class GenericInverseTextProcessing(Model):
+
+    def __init__(self, model_dir: str, itn_model_name: str,
+                 model_config: Dict[str, Any], *args, **kwargs):
+        """initialize the info of model.
+
+        Args:
+            model_dir (str): the model path.
+            itn_model_name (str): the itn model name from configuration.json
+            model_config (Dict[str, Any]): the detail config about model from configuration.json
+        """
+        super().__init__(model_dir, itn_model_name, model_config, *args,
+                         **kwargs)
+        self.model_cfg = {
+            # the recognition model dir path
+            'model_workspace': model_dir,
+            # the itn model name
+            'itn_model': itn_model_name,
+            # the am model file path
+            'itn_model_path': os.path.join(model_dir, itn_model_name),
+            # the recognition model config dict
+            'model_config': model_config
+        }
+
+    def forward(self) -> Dict[str, Any]:
+        """
+          just return the model config
+
+        """
+
+        return self.model_cfg
--- a/modelscope/pipeline_inputs.py
+++ b/modelscope/pipeline_inputs.py
@@ -222,6 +222,8 @@ TASK_INPUTS = {
    InputType.TEXT,
    Tasks.keyword_spotting:
    InputType.AUDIO,
+    Tasks.inverse_text_processing:
+    InputType.TEXT,

    # ============ multi-modal tasks ===================
    Tasks.image_captioning: [InputType.IMAGE, {
--- a/modelscope/pipelines/audio/init.py
+++ b/modelscope/pipelines/audio/init.py
@@ -10,7 +10,7 @@ if TYPE_CHECKING:
    from .kws_kwsbp_pipeline import KeyWordSpottingKwsbpPipeline
    from .linear_aec_pipeline import LinearAECPipeline
    from .text_to_speech_pipeline import TextToSpeechSambertHifiganPipeline
-
+    from .inverse_text_processing_pipeline import InverseTextProcessingPipeline
 else:
    _import_structure = {
        'ans_pipeline': ['ANSPipeline'],
@@ -19,6 +19,7 @@ else:
        'kws_kwsbp_pipeline': ['KeyWordSpottingKwsbpPipeline'],
        'linear_aec_pipeline': ['LinearAECPipeline'],
        'text_to_speech_pipeline': ['TextToSpeechSambertHifiganPipeline'],
+        'itn_inference_pipeline': ['InverseTextProcessingPipeline']
    }

    import sys
--- a/modelscope/pipelines/audio/asr_inference_pipeline.py
+++ b/modelscope/pipelines/audio/asr_inference_pipeline.py
@@ -167,7 +167,6 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
            if 'sampled_lengths' in inputs['model_config']:
                cmd['sampled_lengths'] = inputs['model_config'][
                    'sampled_lengths']
-
        else:
            raise ValueError('model type is mismatching')

@@ -275,6 +274,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
                token_num_relax=cmd['token_num_relax'],
                decoding_ind=cmd['decoding_ind'],
                decoding_mode=cmd['decoding_mode'])
+
        elif self.framework == Frameworks.torch:
            from easyasr import asr_inference_paraformer_espnet

--- a/modelscope/pipelines/audio/inverse_text_processing_pipeline.py
+++ b/modelscope/pipelines/audio/inverse_text_processing_pipeline.py
@@ -0,0 +1,121 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+from typing import Any, Dict, List, Sequence, Tuple, Union
+
+import yaml
+
+from modelscope.metainfo import Pipelines
+from modelscope.models import Model
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines.base import Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.utils.constant import Frameworks, Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+__all__ = ['InverseTextProcessingPipeline']
+
+
+@PIPELINES.register_module(
+    Tasks.inverse_text_processing, module_name=Pipelines.itn_inference)
+class InverseTextProcessingPipeline(Pipeline):
+    """Inverse Text Processing Inference Pipeline
+    use `model` to create a Inverse Text Processing pipeline.
+
+    Args:
+        model (BartForTextErrorCorrection): A model instance, or a model local dir, or a model id in the model hub.
+        kwargs (dict, `optional`):
+            Extra kwargs passed into the preprocessor's constructor.
+    Example:
+    >>> from modelscope.pipelines import pipeline
+    >>> pipeline_itn = pipeline(
+    >>>    task=Tasks.inverse_text_processing, model='damo/speech_inverse_text_processing_fun-text-processing-itn-id')
+    >>> sentence = 'sembilan ribu sembilan ratus sembilan puluh sembilan'
+    >>> print(pipeline_itn(sentence))
+
+    To view other examples plese check tests/pipelines/test_inverse_text_processing.py.
+    """
+
+    def __init__(self, model: Union[Model, str] = None, **kwargs):
+        """use `model` to create an asr pipeline for prediction
+        """
+        super().__init__(model=model, **kwargs)
+        self.model_cfg = self.model.forward()
+
+    def __call__(self, text_in: str = None) -> str:
+
+        if len(text_in) == 0:
+            raise ValueError('The input of ITN should not be null.')
+        else:
+            self.text_in = text_in
+
+        output = self.forward(self.text_in)
+        return output
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        """Postprocessing
+        """
+        return inputs
+
+    def forward(self, text_in: str = None) -> str:
+        """Decoding
+        """
+        logger.info('Inverse Text Normalization: {0} ...'.format(text_in))
+        lang = self.model_cfg['model_config']['lang']
+        model_dir = self.model_cfg['model_workspace']
+        itn_model_path = self.model_cfg['itn_model_path']
+
+        # make directory recursively
+        cache_dir = os.path.join(model_dir, lang, '.cache')
+        if not os.path.isdir(cache_dir):
+            os.makedirs(cache_dir, mode=0o777, exist_ok=True)
+
+        name = '_{0}_itn.far'.format(lang)
+        far_file = os.path.join(cache_dir, name)
+
+        # copy file into cache_dir
+        shutil.copy(itn_model_path, far_file)
+
+        # generate itn inference command
+        cmd = {
+            'ngpu': 0,
+            'log_level': 'ERROR',
+            'text_in': text_in,
+            'itn_model_file': far_file,
+            'cache_dir': cache_dir,
+            'overwrite_cache': False,
+            'enable_standalone_number': True,
+            'enable_0_to_9': True,
+            'lang': lang,
+            'verbose': False,
+        }
+
+        itn_result = self.run_inference(cmd)
+
+        return itn_result
+
+    def run_inference(self, cmd):
+        itn_result = ''
+        if self.framework == Frameworks.torch:
+            from fun_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
+            if cmd['lang'] == 'ja':
+                itn_normalizer = InverseNormalizer(
+                    lang=cmd['lang'],
+                    cache_dir=cmd['cache_dir'],
+                    overwrite_cache=cmd['overwrite_cache'],
+                    enable_standalone_number=cmd['enable_standalone_number'],
+                    enable_0_to_9=cmd['enable_0_to_9'])
+            else:
+                itn_normalizer = InverseNormalizer(
+                    lang=cmd['lang'],
+                    cache_dir=cmd['cache_dir'],
+                    overwrite_cache=cmd['overwrite_cache'])
+            itn_result = itn_normalizer.inverse_normalize(
+                cmd['text_in'], verbose=cmd['verbose'])
+
+        else:
+            raise ValueError('model type is mismatching')
+
+        return itn_result
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -162,6 +162,7 @@ class AudioTasks(object):
    acoustic_echo_cancellation = 'acoustic-echo-cancellation'
    acoustic_noise_suppression = 'acoustic-noise-suppression'
    keyword_spotting = 'keyword-spotting'
+    inverse_text_processing = 'inverse-text-processing'


 class MultiModalTasks(object):
--- a/requirements/audio.txt
+++ b/requirements/audio.txt
@@ -1,6 +1,7 @@
 easyasr>=0.0.2
 espnet==202204
 funasr>=0.1.4
+funtextprocessing>=0.1.1
 greenlet>=1.1.2
 h5py
 inflect
--- a/tests/pipelines/test_inverse_text_processing.py
+++ b/tests/pipelines/test_inverse_text_processing.py
@@ -0,0 +1,70 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.demo_utils import DemoCompatibilityCheck
+from modelscope.utils.test_utils import test_level
+
+
+class InverseTextProcessingTest(unittest.TestCase, DemoCompatibilityCheck):
+
+    def setUp(self) -> None:
+        self.task = Tasks.inverse_text_processing,
+        self.model_dict = {
+            'en':
+            'damo/speech_inverse_text_processing_fun-text-processing-itn-en',
+            'de':
+            'damo/speech_inverse_text_processing_fun-text-processing-itn-de',
+            'es':
+            'damo/speech_inverse_text_processing_fun-text-processing-itn-es',
+            'fr':
+            'damo/speech_inverse_text_processing_fun-text-processing-itn-fr',
+            'id':
+            'damo/speech_inverse_text_processing_fun-text-processing-itn-id',
+            'ko':
+            'damo/speech_inverse_text_processing_fun-text-processing-itn-ko',
+            'ja':
+            'damo/speech_inverse_text_processing_fun-text-processing-itn-ja',
+            'pt':
+            'damo/speech_inverse_text_processing_fun-text-processing-itn-pt',
+            'ru':
+            'damo/speech_inverse_text_processing_fun-text-processing-itn-ru',
+            'vi':
+            'damo/speech_inverse_text_processing_fun-text-processing-itn-vi',
+            'tl':
+            'damo/speech_inverse_text_processing_fun-text-processing-itn-tl',
+        }
+        self.text_in_dict = {
+            'en':
+            'on december second, we paid one hundred and twenty three dollars for christmas tree.',
+            'de': 'einhundertdreiundzwanzig',
+            'es': 'ciento veintitrés',
+            'fr': 'cent vingt-trois',
+            'id': 'seratus dua puluh tiga',
+            'ko': '삼백오 독일 마',
+            'ja': '百二十三',
+            'pt': 'cento e vinte e três',
+            'ru': 'сто двадцать три',
+            'vi': 'một trăm hai mươi ba',
+            'tl': "ika-lima mayo dalawang libo dalawampu't dalawa",
+        }
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_model_name_for_multi_language_itn(self):
+        for key, value in self.model_dict.items():
+            lang = key
+            model_name = value
+            itn_inference_pipline = pipeline(
+                task=Tasks.inverse_text_processing, model=model_name)
+            lang_text_in = self.text_in_dict[lang]
+            itn_result = itn_inference_pipline(text_in=lang_text_in)
+            print(itn_result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_demo_compatibility(self):
+        self.compatibility_check()
+
+
+if __name__ == '__main__':
+    unittest.main()