From 2bec9603b190c76a9db1674194a978e82cfb2dcc Mon Sep 17 00:00:00 2001 From: "wucong.lyb" Date: Mon, 13 Feb 2023 11:46:32 +0000 Subject: [PATCH] [to #42322933] fix bugs: audio fs, asr & sv demo services (cherry picked from commit 1b300b2f3e2a942d39b136c7284e0f51c6320f43) --- .../pipelines/audio/asr_inference_pipeline.py | 10 ++++++---- modelscope/pipelines/audio/lm_infer_pipeline.py | 2 +- .../audio/punctuation_processing_pipeline.py | 2 +- .../audio/speaker_verification_pipeline.py | 9 +++++++-- modelscope/utils/audio/audio_utils.py | 15 +++------------ requirements/audio/audio_asr.txt | 2 +- 6 files changed, 19 insertions(+), 21 deletions(-) diff --git a/modelscope/pipelines/audio/asr_inference_pipeline.py b/modelscope/pipelines/audio/asr_inference_pipeline.py index e259bc44..f0288f27 100644 --- a/modelscope/pipelines/audio/asr_inference_pipeline.py +++ b/modelscope/pipelines/audio/asr_inference_pipeline.py @@ -203,13 +203,11 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): code_base = self.cmd['code_base'] self.recog_type = recog_type self.audio_format = audio_format - self.audio_fs = audio_fs + self.audio_fs = None checking_audio_fs = None self.raw_inputs = None if output_dir is not None: self.cmd['output_dir'] = output_dir - if audio_fs is not None: - self.cmd['fs']['audio_fs'] = audio_fs self.cmd['param_dict'] = param_dict if code_base == 'funasr': @@ -254,6 +252,10 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): self.audio_in, self.audio_format) if checking_audio_fs is not None: self.audio_fs = checking_audio_fs + if audio_fs is not None: + self.cmd['fs']['audio_fs'] = audio_fs + else: + self.cmd['fs']['audio_fs'] = self.audio_fs output = self.preprocessor.forward(self.model_cfg, self.recog_type, self.audio_format, self.audio_in, @@ -293,7 +295,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): 'seed': 0, 'ngram_weight': 0.9, 'nbest': 1, - 'num_workers': 1, + 'num_workers': 0, 'vad_infer_config': None, 'vad_model_file': None, 'vad_cmvn_file': None, diff --git a/modelscope/pipelines/audio/lm_infer_pipeline.py b/modelscope/pipelines/audio/lm_infer_pipeline.py index 1a4c6225..d7275b6b 100644 --- a/modelscope/pipelines/audio/lm_infer_pipeline.py +++ b/modelscope/pipelines/audio/lm_infer_pipeline.py @@ -156,7 +156,7 @@ class LanguageModelPipeline(Pipeline): 'dtype': 'float32', 'ngpu': 1, # 0: only CPU, ngpu>=1: gpu number if cuda is available 'seed': 0, - 'num_workers': 1, + 'num_workers': 0, 'log_level': 'ERROR', 'key_file': None, 'train_config': lm_model_config, diff --git a/modelscope/pipelines/audio/punctuation_processing_pipeline.py b/modelscope/pipelines/audio/punctuation_processing_pipeline.py index 0ef6087a..ec1532ea 100644 --- a/modelscope/pipelines/audio/punctuation_processing_pipeline.py +++ b/modelscope/pipelines/audio/punctuation_processing_pipeline.py @@ -107,7 +107,7 @@ class PunctuationProcessingPipeline(Pipeline): 'dtype': 'float32', 'ngpu': 1, # 0: only CPU, ngpu>=1: gpu number if cuda is available 'seed': 0, - 'num_workers': 1, + 'num_workers': 0, 'log_level': 'ERROR', 'key_file': None, 'train_config': punc_model_config, diff --git a/modelscope/pipelines/audio/speaker_verification_pipeline.py b/modelscope/pipelines/audio/speaker_verification_pipeline.py index 19633872..51f01543 100644 --- a/modelscope/pipelines/audio/speaker_verification_pipeline.py +++ b/modelscope/pipelines/audio/speaker_verification_pipeline.py @@ -168,8 +168,13 @@ class SpeakerVerificationPipeline(Pipeline): else: audio_scp_1, audio_scp_2 = generate_sv_scp_from_url( audio_in) - data_cmd = [(audio_scp_1, 'speech', 'sound'), - (audio_scp_2, 'ref_speech', 'sound')] + if isinstance(audio_scp_1, bytes) and isinstance( + audio_scp_2, bytes): + data_cmd = [(audio_scp_1, 'speech', 'bytes'), + (audio_scp_2, 'ref_speech', 'bytes')] + else: + data_cmd = [(audio_scp_1, 'speech', 'sound'), + (audio_scp_2, 'ref_speech', 'sound')] # for raw bytes inputs elif isinstance(audio_in[0], bytes): data_cmd = [(audio_in[0], 'speech', 'bytes'), diff --git a/modelscope/utils/audio/audio_utils.py b/modelscope/utils/audio/audio_utils.py index c5fbe8c5..c0189171 100644 --- a/modelscope/utils/audio/audio_utils.py +++ b/modelscope/utils/audio/audio_utils.py @@ -179,7 +179,7 @@ def generate_scp_from_url(url: str, key: str = None): if os.path.exists(url) and (url.lower().endswith('.wav')): wav_scp_path = url return wav_scp_path, raw_inputs - # for wav url, download and generate wav.scp + # for wav url, download bytes data result = urlparse(url) if result.scheme is not None and len(result.scheme) > 0: storage = HTTPStorage() @@ -243,17 +243,8 @@ def generate_scp_for_sv(url: str, key: str = None): result = urlparse(url) if result.scheme is not None and len(result.scheme) > 0: storage = HTTPStorage() - data = storage.read(url) - work_dir = tempfile.TemporaryDirectory().name - if not os.path.exists(work_dir): - os.makedirs(work_dir) - wav_path = os.path.join(work_dir, os.path.basename(url)) - with open(wav_path, 'wb') as fb: - fb.write(data) - wav_scp_path = os.path.join(work_dir, 'wav.scp') - with open(wav_scp_path, 'w') as ft: - scp_content = '\t'.join([wav_name, wav_path]) + '\n' - ft.writelines(scp_content) + wav_scp_path = storage.read(url) + return wav_scp_path return wav_scp_path diff --git a/requirements/audio/audio_asr.txt b/requirements/audio/audio_asr.txt index 204363d0..2dc2f9b7 100644 --- a/requirements/audio/audio_asr.txt +++ b/requirements/audio/audio_asr.txt @@ -1,2 +1,2 @@ easyasr>=0.0.2 -funasr>=0.1.7 +funasr>=0.2.0