modify format of itn_pipeline

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11257394

    * dev for asr itn inference pipeline

* add task interface

* add pipeline input

* add modemodelscope/pipelines/audio/itn_inference_pipeline.py

* add modelscope/pipelines/audio/itn_inference_pipeline.py

* modelscope/pipelines/audio/itn_inference_pipeline.py

* update modelscope/pipelines/audio/itn_inference_pipeline.py

* modify itn_inference_pipeline.py

* modify itn_inference_pipeline.py

* modify itn_inference_pipeline.py

* remove itn.py

* modify some names

* add modify itn_inference_pipeline.py

* modify itn_inference_pipeline.py

* modify itn_inference_pipeline.py

* modify itn_inference_pipeline.py

* modify itn

* add tests/pipelines/test_inverse_text_processing.py

* modify asr_inference_pipeline.py for the original files

* modify format

* add commits files

* Merge remote-tracking branch 'origin' into remotes/origin/asr/itn_nichongjia

* Merge remote-tracking branch 'origin' into remotes/origin/asr/itn_nichongjia

* modify the pipelines

* Merge branch 'master' into remotes/origin/asr/itn_nichongjia

* [to #47031187]fix: hub test suites can not parallel 
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11276872

    * [to #47031187]fix: hub test suites can not parallel

* google style docs and selected file generator 

ref: https://yuque.alibaba-inc.com/pai/rwqgvl/go8sc8tqzeqqfmsz
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11150212

    * google style docs and selected file generator

* merge

* Merge remote-tracking branch 'origin' into remotes/origin/asr/itn_nichongjia

* Merge branch 'master' into remotes/origin/asr/itn_nichongjia

* add requirements for fun_text_processing
This commit is contained in:
ni.chongjia
2023-01-05 16:36:17 +08:00
parent 2260dd45fa
commit ac53ce3e36
11 changed files with 267 additions and 3 deletions

View File

@@ -119,6 +119,7 @@ class Models(object):
kws_kwsbp = 'kws-kwsbp'
generic_asr = 'generic-asr'
wenet_asr = 'wenet-asr'
generic_itn = 'generic-itn'
# multi-modal models
ofa = 'ofa'
@@ -327,6 +328,7 @@ class Pipelines(object):
kws_kwsbp = 'kws-kwsbp'
asr_inference = 'asr-inference'
asr_wenet_inference = 'asr-wenet-inference'
itn_inference = 'itn-inference'
# multi-modal tasks
image_captioning = 'image-captioning'

View File

@@ -1,3 +1,3 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from . import ans, asr, kws, tts
from . import ans, asr, itn, kws, tts

View File

@@ -0,0 +1,22 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import TYPE_CHECKING
from modelscope.utils.import_utils import LazyImportModule
if TYPE_CHECKING:
from .generic_inverse_text_processing import GenericInverseTextProcessing
else:
_import_structure = {
'generic_inverse_text_processing': ['GenericInverseTextProcessing'],
}
import sys
sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

View File

@@ -0,0 +1,44 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from typing import Any, Dict
from modelscope.metainfo import Models
from modelscope.models.base import Model
from modelscope.models.builder import MODELS
from modelscope.utils.constant import Frameworks, Tasks
@MODELS.register_module(
Tasks.inverse_text_processing, module_name=Models.generic_itn)
class GenericInverseTextProcessing(Model):
def __init__(self, model_dir: str, itn_model_name: str,
model_config: Dict[str, Any], *args, **kwargs):
"""initialize the info of model.
Args:
model_dir (str): the model path.
itn_model_name (str): the itn model name from configuration.json
model_config (Dict[str, Any]): the detail config about model from configuration.json
"""
super().__init__(model_dir, itn_model_name, model_config, *args,
**kwargs)
self.model_cfg = {
# the recognition model dir path
'model_workspace': model_dir,
# the itn model name
'itn_model': itn_model_name,
# the am model file path
'itn_model_path': os.path.join(model_dir, itn_model_name),
# the recognition model config dict
'model_config': model_config
}
def forward(self) -> Dict[str, Any]:
"""
just return the model config
"""
return self.model_cfg

View File

@@ -222,6 +222,8 @@ TASK_INPUTS = {
InputType.TEXT,
Tasks.keyword_spotting:
InputType.AUDIO,
Tasks.inverse_text_processing:
InputType.TEXT,
# ============ multi-modal tasks ===================
Tasks.image_captioning: [InputType.IMAGE, {

View File

@@ -10,7 +10,7 @@ if TYPE_CHECKING:
from .kws_kwsbp_pipeline import KeyWordSpottingKwsbpPipeline
from .linear_aec_pipeline import LinearAECPipeline
from .text_to_speech_pipeline import TextToSpeechSambertHifiganPipeline
from .inverse_text_processing_pipeline import InverseTextProcessingPipeline
else:
_import_structure = {
'ans_pipeline': ['ANSPipeline'],
@@ -19,6 +19,7 @@ else:
'kws_kwsbp_pipeline': ['KeyWordSpottingKwsbpPipeline'],
'linear_aec_pipeline': ['LinearAECPipeline'],
'text_to_speech_pipeline': ['TextToSpeechSambertHifiganPipeline'],
'itn_inference_pipeline': ['InverseTextProcessingPipeline']
}
import sys

View File

@@ -167,7 +167,6 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
if 'sampled_lengths' in inputs['model_config']:
cmd['sampled_lengths'] = inputs['model_config'][
'sampled_lengths']
else:
raise ValueError('model type is mismatching')
@@ -275,6 +274,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
token_num_relax=cmd['token_num_relax'],
decoding_ind=cmd['decoding_ind'],
decoding_mode=cmd['decoding_mode'])
elif self.framework == Frameworks.torch:
from easyasr import asr_inference_paraformer_espnet

View File

@@ -0,0 +1,121 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import shutil
from typing import Any, Dict, List, Sequence, Tuple, Union
import yaml
from modelscope.metainfo import Pipelines
from modelscope.models import Model
from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Pipeline
from modelscope.pipelines.builder import PIPELINES
from modelscope.utils.constant import Frameworks, Tasks
from modelscope.utils.logger import get_logger
logger = get_logger()
__all__ = ['InverseTextProcessingPipeline']
@PIPELINES.register_module(
Tasks.inverse_text_processing, module_name=Pipelines.itn_inference)
class InverseTextProcessingPipeline(Pipeline):
"""Inverse Text Processing Inference Pipeline
use `model` to create a Inverse Text Processing pipeline.
Args:
model (BartForTextErrorCorrection): A model instance, or a model local dir, or a model id in the model hub.
kwargs (dict, `optional`):
Extra kwargs passed into the preprocessor's constructor.
Example:
>>> from modelscope.pipelines import pipeline
>>> pipeline_itn = pipeline(
>>> task=Tasks.inverse_text_processing, model='damo/speech_inverse_text_processing_fun-text-processing-itn-id')
>>> sentence = 'sembilan ribu sembilan ratus sembilan puluh sembilan'
>>> print(pipeline_itn(sentence))
To view other examples plese check tests/pipelines/test_inverse_text_processing.py.
"""
def __init__(self, model: Union[Model, str] = None, **kwargs):
"""use `model` to create an asr pipeline for prediction
"""
super().__init__(model=model, **kwargs)
self.model_cfg = self.model.forward()
def __call__(self, text_in: str = None) -> str:
if len(text_in) == 0:
raise ValueError('The input of ITN should not be null.')
else:
self.text_in = text_in
output = self.forward(self.text_in)
return output
def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
"""Postprocessing
"""
return inputs
def forward(self, text_in: str = None) -> str:
"""Decoding
"""
logger.info('Inverse Text Normalization: {0} ...'.format(text_in))
lang = self.model_cfg['model_config']['lang']
model_dir = self.model_cfg['model_workspace']
itn_model_path = self.model_cfg['itn_model_path']
# make directory recursively
cache_dir = os.path.join(model_dir, lang, '.cache')
if not os.path.isdir(cache_dir):
os.makedirs(cache_dir, mode=0o777, exist_ok=True)
name = '_{0}_itn.far'.format(lang)
far_file = os.path.join(cache_dir, name)
# copy file into cache_dir
shutil.copy(itn_model_path, far_file)
# generate itn inference command
cmd = {
'ngpu': 0,
'log_level': 'ERROR',
'text_in': text_in,
'itn_model_file': far_file,
'cache_dir': cache_dir,
'overwrite_cache': False,
'enable_standalone_number': True,
'enable_0_to_9': True,
'lang': lang,
'verbose': False,
}
itn_result = self.run_inference(cmd)
return itn_result
def run_inference(self, cmd):
itn_result = ''
if self.framework == Frameworks.torch:
from fun_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
if cmd['lang'] == 'ja':
itn_normalizer = InverseNormalizer(
lang=cmd['lang'],
cache_dir=cmd['cache_dir'],
overwrite_cache=cmd['overwrite_cache'],
enable_standalone_number=cmd['enable_standalone_number'],
enable_0_to_9=cmd['enable_0_to_9'])
else:
itn_normalizer = InverseNormalizer(
lang=cmd['lang'],
cache_dir=cmd['cache_dir'],
overwrite_cache=cmd['overwrite_cache'])
itn_result = itn_normalizer.inverse_normalize(
cmd['text_in'], verbose=cmd['verbose'])
else:
raise ValueError('model type is mismatching')
return itn_result

View File

@@ -162,6 +162,7 @@ class AudioTasks(object):
acoustic_echo_cancellation = 'acoustic-echo-cancellation'
acoustic_noise_suppression = 'acoustic-noise-suppression'
keyword_spotting = 'keyword-spotting'
inverse_text_processing = 'inverse-text-processing'
class MultiModalTasks(object):

View File

@@ -1,6 +1,7 @@
easyasr>=0.0.2
espnet==202204
funasr>=0.1.4
funtextprocessing>=0.1.1
greenlet>=1.1.2
h5py
inflect

View File

@@ -0,0 +1,70 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
class InverseTextProcessingTest(unittest.TestCase, DemoCompatibilityCheck):
def setUp(self) -> None:
self.task = Tasks.inverse_text_processing,
self.model_dict = {
'en':
'damo/speech_inverse_text_processing_fun-text-processing-itn-en',
'de':
'damo/speech_inverse_text_processing_fun-text-processing-itn-de',
'es':
'damo/speech_inverse_text_processing_fun-text-processing-itn-es',
'fr':
'damo/speech_inverse_text_processing_fun-text-processing-itn-fr',
'id':
'damo/speech_inverse_text_processing_fun-text-processing-itn-id',
'ko':
'damo/speech_inverse_text_processing_fun-text-processing-itn-ko',
'ja':
'damo/speech_inverse_text_processing_fun-text-processing-itn-ja',
'pt':
'damo/speech_inverse_text_processing_fun-text-processing-itn-pt',
'ru':
'damo/speech_inverse_text_processing_fun-text-processing-itn-ru',
'vi':
'damo/speech_inverse_text_processing_fun-text-processing-itn-vi',
'tl':
'damo/speech_inverse_text_processing_fun-text-processing-itn-tl',
}
self.text_in_dict = {
'en':
'on december second, we paid one hundred and twenty three dollars for christmas tree.',
'de': 'einhundertdreiundzwanzig',
'es': 'ciento veintitrés',
'fr': 'cent vingt-trois',
'id': 'seratus dua puluh tiga',
'ko': '삼백오 독일 마',
'ja': '百二十三',
'pt': 'cento e vinte e três',
'ru': 'сто двадцать три',
'vi': 'một trăm hai mươi ba',
'tl': "ika-lima mayo dalawang libo dalawampu't dalawa",
}
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_run_with_model_name_for_multi_language_itn(self):
for key, value in self.model_dict.items():
lang = key
model_name = value
itn_inference_pipline = pipeline(
task=Tasks.inverse_text_processing, model=model_name)
lang_text_in = self.text_in_dict[lang]
itn_result = itn_inference_pipline(text_in=lang_text_in)
print(itn_result)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_demo_compatibility(self):
self.compatibility_check()
if __name__ == '__main__':
unittest.main()