mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-16 16:27:45 +01:00
modify format of itn_pipeline
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11257394 * dev for asr itn inference pipeline * add task interface * add pipeline input * add modemodelscope/pipelines/audio/itn_inference_pipeline.py * add modelscope/pipelines/audio/itn_inference_pipeline.py * modelscope/pipelines/audio/itn_inference_pipeline.py * update modelscope/pipelines/audio/itn_inference_pipeline.py * modify itn_inference_pipeline.py * modify itn_inference_pipeline.py * modify itn_inference_pipeline.py * remove itn.py * modify some names * add modify itn_inference_pipeline.py * modify itn_inference_pipeline.py * modify itn_inference_pipeline.py * modify itn_inference_pipeline.py * modify itn * add tests/pipelines/test_inverse_text_processing.py * modify asr_inference_pipeline.py for the original files * modify format * add commits files * Merge remote-tracking branch 'origin' into remotes/origin/asr/itn_nichongjia * Merge remote-tracking branch 'origin' into remotes/origin/asr/itn_nichongjia * modify the pipelines * Merge branch 'master' into remotes/origin/asr/itn_nichongjia * [to #47031187]fix: hub test suites can not parallel Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11276872 * [to #47031187]fix: hub test suites can not parallel * google style docs and selected file generator ref: https://yuque.alibaba-inc.com/pai/rwqgvl/go8sc8tqzeqqfmsz Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11150212 * google style docs and selected file generator * merge * Merge remote-tracking branch 'origin' into remotes/origin/asr/itn_nichongjia * Merge branch 'master' into remotes/origin/asr/itn_nichongjia * add requirements for fun_text_processing
This commit is contained in:
@@ -119,6 +119,7 @@ class Models(object):
|
||||
kws_kwsbp = 'kws-kwsbp'
|
||||
generic_asr = 'generic-asr'
|
||||
wenet_asr = 'wenet-asr'
|
||||
generic_itn = 'generic-itn'
|
||||
|
||||
# multi-modal models
|
||||
ofa = 'ofa'
|
||||
@@ -327,6 +328,7 @@ class Pipelines(object):
|
||||
kws_kwsbp = 'kws-kwsbp'
|
||||
asr_inference = 'asr-inference'
|
||||
asr_wenet_inference = 'asr-wenet-inference'
|
||||
itn_inference = 'itn-inference'
|
||||
|
||||
# multi-modal tasks
|
||||
image_captioning = 'image-captioning'
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
|
||||
from . import ans, asr, kws, tts
|
||||
from . import ans, asr, itn, kws, tts
|
||||
|
||||
22
modelscope/models/audio/itn/__init__.py
Normal file
22
modelscope/models/audio/itn/__init__.py
Normal file
@@ -0,0 +1,22 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from modelscope.utils.import_utils import LazyImportModule
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .generic_inverse_text_processing import GenericInverseTextProcessing
|
||||
|
||||
else:
|
||||
_import_structure = {
|
||||
'generic_inverse_text_processing': ['GenericInverseTextProcessing'],
|
||||
}
|
||||
|
||||
import sys
|
||||
|
||||
sys.modules[__name__] = LazyImportModule(
|
||||
__name__,
|
||||
globals()['__file__'],
|
||||
_import_structure,
|
||||
module_spec=__spec__,
|
||||
extra_objects={},
|
||||
)
|
||||
@@ -0,0 +1,44 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
|
||||
import os
|
||||
from typing import Any, Dict
|
||||
|
||||
from modelscope.metainfo import Models
|
||||
from modelscope.models.base import Model
|
||||
from modelscope.models.builder import MODELS
|
||||
from modelscope.utils.constant import Frameworks, Tasks
|
||||
|
||||
|
||||
@MODELS.register_module(
|
||||
Tasks.inverse_text_processing, module_name=Models.generic_itn)
|
||||
class GenericInverseTextProcessing(Model):
|
||||
|
||||
def __init__(self, model_dir: str, itn_model_name: str,
|
||||
model_config: Dict[str, Any], *args, **kwargs):
|
||||
"""initialize the info of model.
|
||||
|
||||
Args:
|
||||
model_dir (str): the model path.
|
||||
itn_model_name (str): the itn model name from configuration.json
|
||||
model_config (Dict[str, Any]): the detail config about model from configuration.json
|
||||
"""
|
||||
super().__init__(model_dir, itn_model_name, model_config, *args,
|
||||
**kwargs)
|
||||
self.model_cfg = {
|
||||
# the recognition model dir path
|
||||
'model_workspace': model_dir,
|
||||
# the itn model name
|
||||
'itn_model': itn_model_name,
|
||||
# the am model file path
|
||||
'itn_model_path': os.path.join(model_dir, itn_model_name),
|
||||
# the recognition model config dict
|
||||
'model_config': model_config
|
||||
}
|
||||
|
||||
def forward(self) -> Dict[str, Any]:
|
||||
"""
|
||||
just return the model config
|
||||
|
||||
"""
|
||||
|
||||
return self.model_cfg
|
||||
@@ -222,6 +222,8 @@ TASK_INPUTS = {
|
||||
InputType.TEXT,
|
||||
Tasks.keyword_spotting:
|
||||
InputType.AUDIO,
|
||||
Tasks.inverse_text_processing:
|
||||
InputType.TEXT,
|
||||
|
||||
# ============ multi-modal tasks ===================
|
||||
Tasks.image_captioning: [InputType.IMAGE, {
|
||||
|
||||
@@ -10,7 +10,7 @@ if TYPE_CHECKING:
|
||||
from .kws_kwsbp_pipeline import KeyWordSpottingKwsbpPipeline
|
||||
from .linear_aec_pipeline import LinearAECPipeline
|
||||
from .text_to_speech_pipeline import TextToSpeechSambertHifiganPipeline
|
||||
|
||||
from .inverse_text_processing_pipeline import InverseTextProcessingPipeline
|
||||
else:
|
||||
_import_structure = {
|
||||
'ans_pipeline': ['ANSPipeline'],
|
||||
@@ -19,6 +19,7 @@ else:
|
||||
'kws_kwsbp_pipeline': ['KeyWordSpottingKwsbpPipeline'],
|
||||
'linear_aec_pipeline': ['LinearAECPipeline'],
|
||||
'text_to_speech_pipeline': ['TextToSpeechSambertHifiganPipeline'],
|
||||
'itn_inference_pipeline': ['InverseTextProcessingPipeline']
|
||||
}
|
||||
|
||||
import sys
|
||||
|
||||
@@ -167,7 +167,6 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
|
||||
if 'sampled_lengths' in inputs['model_config']:
|
||||
cmd['sampled_lengths'] = inputs['model_config'][
|
||||
'sampled_lengths']
|
||||
|
||||
else:
|
||||
raise ValueError('model type is mismatching')
|
||||
|
||||
@@ -275,6 +274,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
|
||||
token_num_relax=cmd['token_num_relax'],
|
||||
decoding_ind=cmd['decoding_ind'],
|
||||
decoding_mode=cmd['decoding_mode'])
|
||||
|
||||
elif self.framework == Frameworks.torch:
|
||||
from easyasr import asr_inference_paraformer_espnet
|
||||
|
||||
|
||||
121
modelscope/pipelines/audio/inverse_text_processing_pipeline.py
Normal file
121
modelscope/pipelines/audio/inverse_text_processing_pipeline.py
Normal file
@@ -0,0 +1,121 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import os
|
||||
import shutil
|
||||
from typing import Any, Dict, List, Sequence, Tuple, Union
|
||||
|
||||
import yaml
|
||||
|
||||
from modelscope.metainfo import Pipelines
|
||||
from modelscope.models import Model
|
||||
from modelscope.outputs import OutputKeys
|
||||
from modelscope.pipelines.base import Pipeline
|
||||
from modelscope.pipelines.builder import PIPELINES
|
||||
from modelscope.utils.constant import Frameworks, Tasks
|
||||
from modelscope.utils.logger import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
__all__ = ['InverseTextProcessingPipeline']
|
||||
|
||||
|
||||
@PIPELINES.register_module(
|
||||
Tasks.inverse_text_processing, module_name=Pipelines.itn_inference)
|
||||
class InverseTextProcessingPipeline(Pipeline):
|
||||
"""Inverse Text Processing Inference Pipeline
|
||||
use `model` to create a Inverse Text Processing pipeline.
|
||||
|
||||
Args:
|
||||
model (BartForTextErrorCorrection): A model instance, or a model local dir, or a model id in the model hub.
|
||||
kwargs (dict, `optional`):
|
||||
Extra kwargs passed into the preprocessor's constructor.
|
||||
Example:
|
||||
>>> from modelscope.pipelines import pipeline
|
||||
>>> pipeline_itn = pipeline(
|
||||
>>> task=Tasks.inverse_text_processing, model='damo/speech_inverse_text_processing_fun-text-processing-itn-id')
|
||||
>>> sentence = 'sembilan ribu sembilan ratus sembilan puluh sembilan'
|
||||
>>> print(pipeline_itn(sentence))
|
||||
|
||||
To view other examples plese check tests/pipelines/test_inverse_text_processing.py.
|
||||
"""
|
||||
|
||||
def __init__(self, model: Union[Model, str] = None, **kwargs):
|
||||
"""use `model` to create an asr pipeline for prediction
|
||||
"""
|
||||
super().__init__(model=model, **kwargs)
|
||||
self.model_cfg = self.model.forward()
|
||||
|
||||
def __call__(self, text_in: str = None) -> str:
|
||||
|
||||
if len(text_in) == 0:
|
||||
raise ValueError('The input of ITN should not be null.')
|
||||
else:
|
||||
self.text_in = text_in
|
||||
|
||||
output = self.forward(self.text_in)
|
||||
return output
|
||||
|
||||
def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Postprocessing
|
||||
"""
|
||||
return inputs
|
||||
|
||||
def forward(self, text_in: str = None) -> str:
|
||||
"""Decoding
|
||||
"""
|
||||
logger.info('Inverse Text Normalization: {0} ...'.format(text_in))
|
||||
lang = self.model_cfg['model_config']['lang']
|
||||
model_dir = self.model_cfg['model_workspace']
|
||||
itn_model_path = self.model_cfg['itn_model_path']
|
||||
|
||||
# make directory recursively
|
||||
cache_dir = os.path.join(model_dir, lang, '.cache')
|
||||
if not os.path.isdir(cache_dir):
|
||||
os.makedirs(cache_dir, mode=0o777, exist_ok=True)
|
||||
|
||||
name = '_{0}_itn.far'.format(lang)
|
||||
far_file = os.path.join(cache_dir, name)
|
||||
|
||||
# copy file into cache_dir
|
||||
shutil.copy(itn_model_path, far_file)
|
||||
|
||||
# generate itn inference command
|
||||
cmd = {
|
||||
'ngpu': 0,
|
||||
'log_level': 'ERROR',
|
||||
'text_in': text_in,
|
||||
'itn_model_file': far_file,
|
||||
'cache_dir': cache_dir,
|
||||
'overwrite_cache': False,
|
||||
'enable_standalone_number': True,
|
||||
'enable_0_to_9': True,
|
||||
'lang': lang,
|
||||
'verbose': False,
|
||||
}
|
||||
|
||||
itn_result = self.run_inference(cmd)
|
||||
|
||||
return itn_result
|
||||
|
||||
def run_inference(self, cmd):
|
||||
itn_result = ''
|
||||
if self.framework == Frameworks.torch:
|
||||
from fun_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
|
||||
if cmd['lang'] == 'ja':
|
||||
itn_normalizer = InverseNormalizer(
|
||||
lang=cmd['lang'],
|
||||
cache_dir=cmd['cache_dir'],
|
||||
overwrite_cache=cmd['overwrite_cache'],
|
||||
enable_standalone_number=cmd['enable_standalone_number'],
|
||||
enable_0_to_9=cmd['enable_0_to_9'])
|
||||
else:
|
||||
itn_normalizer = InverseNormalizer(
|
||||
lang=cmd['lang'],
|
||||
cache_dir=cmd['cache_dir'],
|
||||
overwrite_cache=cmd['overwrite_cache'])
|
||||
itn_result = itn_normalizer.inverse_normalize(
|
||||
cmd['text_in'], verbose=cmd['verbose'])
|
||||
|
||||
else:
|
||||
raise ValueError('model type is mismatching')
|
||||
|
||||
return itn_result
|
||||
@@ -162,6 +162,7 @@ class AudioTasks(object):
|
||||
acoustic_echo_cancellation = 'acoustic-echo-cancellation'
|
||||
acoustic_noise_suppression = 'acoustic-noise-suppression'
|
||||
keyword_spotting = 'keyword-spotting'
|
||||
inverse_text_processing = 'inverse-text-processing'
|
||||
|
||||
|
||||
class MultiModalTasks(object):
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
easyasr>=0.0.2
|
||||
espnet==202204
|
||||
funasr>=0.1.4
|
||||
funtextprocessing>=0.1.1
|
||||
greenlet>=1.1.2
|
||||
h5py
|
||||
inflect
|
||||
|
||||
70
tests/pipelines/test_inverse_text_processing.py
Normal file
70
tests/pipelines/test_inverse_text_processing.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import unittest
|
||||
|
||||
from modelscope.pipelines import pipeline
|
||||
from modelscope.utils.constant import Tasks
|
||||
from modelscope.utils.demo_utils import DemoCompatibilityCheck
|
||||
from modelscope.utils.test_utils import test_level
|
||||
|
||||
|
||||
class InverseTextProcessingTest(unittest.TestCase, DemoCompatibilityCheck):
|
||||
|
||||
def setUp(self) -> None:
|
||||
self.task = Tasks.inverse_text_processing,
|
||||
self.model_dict = {
|
||||
'en':
|
||||
'damo/speech_inverse_text_processing_fun-text-processing-itn-en',
|
||||
'de':
|
||||
'damo/speech_inverse_text_processing_fun-text-processing-itn-de',
|
||||
'es':
|
||||
'damo/speech_inverse_text_processing_fun-text-processing-itn-es',
|
||||
'fr':
|
||||
'damo/speech_inverse_text_processing_fun-text-processing-itn-fr',
|
||||
'id':
|
||||
'damo/speech_inverse_text_processing_fun-text-processing-itn-id',
|
||||
'ko':
|
||||
'damo/speech_inverse_text_processing_fun-text-processing-itn-ko',
|
||||
'ja':
|
||||
'damo/speech_inverse_text_processing_fun-text-processing-itn-ja',
|
||||
'pt':
|
||||
'damo/speech_inverse_text_processing_fun-text-processing-itn-pt',
|
||||
'ru':
|
||||
'damo/speech_inverse_text_processing_fun-text-processing-itn-ru',
|
||||
'vi':
|
||||
'damo/speech_inverse_text_processing_fun-text-processing-itn-vi',
|
||||
'tl':
|
||||
'damo/speech_inverse_text_processing_fun-text-processing-itn-tl',
|
||||
}
|
||||
self.text_in_dict = {
|
||||
'en':
|
||||
'on december second, we paid one hundred and twenty three dollars for christmas tree.',
|
||||
'de': 'einhundertdreiundzwanzig',
|
||||
'es': 'ciento veintitrés',
|
||||
'fr': 'cent vingt-trois',
|
||||
'id': 'seratus dua puluh tiga',
|
||||
'ko': '삼백오 독일 마',
|
||||
'ja': '百二十三',
|
||||
'pt': 'cento e vinte e três',
|
||||
'ru': 'сто двадцать три',
|
||||
'vi': 'một trăm hai mươi ba',
|
||||
'tl': "ika-lima mayo dalawang libo dalawampu't dalawa",
|
||||
}
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_run_with_model_name_for_multi_language_itn(self):
|
||||
for key, value in self.model_dict.items():
|
||||
lang = key
|
||||
model_name = value
|
||||
itn_inference_pipline = pipeline(
|
||||
task=Tasks.inverse_text_processing, model=model_name)
|
||||
lang_text_in = self.text_in_dict[lang]
|
||||
itn_result = itn_inference_pipline(text_in=lang_text_in)
|
||||
print(itn_result)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_demo_compatibility(self):
|
||||
self.compatibility_check()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user