mirror of
https://github.com/modelscope/modelscope.git
synced 2026-02-25 04:30:48 +01:00
[to #42322933] fix bugs: audio fs, asr & sv demo services
(cherry picked from commit 1b300b2f3e)
This commit is contained in:
committed by
Zhicheng Zhang
parent
38268a8048
commit
2bec9603b1
@@ -203,13 +203,11 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
|
||||
code_base = self.cmd['code_base']
|
||||
self.recog_type = recog_type
|
||||
self.audio_format = audio_format
|
||||
self.audio_fs = audio_fs
|
||||
self.audio_fs = None
|
||||
checking_audio_fs = None
|
||||
self.raw_inputs = None
|
||||
if output_dir is not None:
|
||||
self.cmd['output_dir'] = output_dir
|
||||
if audio_fs is not None:
|
||||
self.cmd['fs']['audio_fs'] = audio_fs
|
||||
self.cmd['param_dict'] = param_dict
|
||||
|
||||
if code_base == 'funasr':
|
||||
@@ -254,6 +252,10 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
|
||||
self.audio_in, self.audio_format)
|
||||
if checking_audio_fs is not None:
|
||||
self.audio_fs = checking_audio_fs
|
||||
if audio_fs is not None:
|
||||
self.cmd['fs']['audio_fs'] = audio_fs
|
||||
else:
|
||||
self.cmd['fs']['audio_fs'] = self.audio_fs
|
||||
|
||||
output = self.preprocessor.forward(self.model_cfg, self.recog_type,
|
||||
self.audio_format, self.audio_in,
|
||||
@@ -293,7 +295,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
|
||||
'seed': 0,
|
||||
'ngram_weight': 0.9,
|
||||
'nbest': 1,
|
||||
'num_workers': 1,
|
||||
'num_workers': 0,
|
||||
'vad_infer_config': None,
|
||||
'vad_model_file': None,
|
||||
'vad_cmvn_file': None,
|
||||
|
||||
@@ -156,7 +156,7 @@ class LanguageModelPipeline(Pipeline):
|
||||
'dtype': 'float32',
|
||||
'ngpu': 1, # 0: only CPU, ngpu>=1: gpu number if cuda is available
|
||||
'seed': 0,
|
||||
'num_workers': 1,
|
||||
'num_workers': 0,
|
||||
'log_level': 'ERROR',
|
||||
'key_file': None,
|
||||
'train_config': lm_model_config,
|
||||
|
||||
@@ -107,7 +107,7 @@ class PunctuationProcessingPipeline(Pipeline):
|
||||
'dtype': 'float32',
|
||||
'ngpu': 1, # 0: only CPU, ngpu>=1: gpu number if cuda is available
|
||||
'seed': 0,
|
||||
'num_workers': 1,
|
||||
'num_workers': 0,
|
||||
'log_level': 'ERROR',
|
||||
'key_file': None,
|
||||
'train_config': punc_model_config,
|
||||
|
||||
@@ -168,8 +168,13 @@ class SpeakerVerificationPipeline(Pipeline):
|
||||
else:
|
||||
audio_scp_1, audio_scp_2 = generate_sv_scp_from_url(
|
||||
audio_in)
|
||||
data_cmd = [(audio_scp_1, 'speech', 'sound'),
|
||||
(audio_scp_2, 'ref_speech', 'sound')]
|
||||
if isinstance(audio_scp_1, bytes) and isinstance(
|
||||
audio_scp_2, bytes):
|
||||
data_cmd = [(audio_scp_1, 'speech', 'bytes'),
|
||||
(audio_scp_2, 'ref_speech', 'bytes')]
|
||||
else:
|
||||
data_cmd = [(audio_scp_1, 'speech', 'sound'),
|
||||
(audio_scp_2, 'ref_speech', 'sound')]
|
||||
# for raw bytes inputs
|
||||
elif isinstance(audio_in[0], bytes):
|
||||
data_cmd = [(audio_in[0], 'speech', 'bytes'),
|
||||
|
||||
@@ -179,7 +179,7 @@ def generate_scp_from_url(url: str, key: str = None):
|
||||
if os.path.exists(url) and (url.lower().endswith('.wav')):
|
||||
wav_scp_path = url
|
||||
return wav_scp_path, raw_inputs
|
||||
# for wav url, download and generate wav.scp
|
||||
# for wav url, download bytes data
|
||||
result = urlparse(url)
|
||||
if result.scheme is not None and len(result.scheme) > 0:
|
||||
storage = HTTPStorage()
|
||||
@@ -243,17 +243,8 @@ def generate_scp_for_sv(url: str, key: str = None):
|
||||
result = urlparse(url)
|
||||
if result.scheme is not None and len(result.scheme) > 0:
|
||||
storage = HTTPStorage()
|
||||
data = storage.read(url)
|
||||
work_dir = tempfile.TemporaryDirectory().name
|
||||
if not os.path.exists(work_dir):
|
||||
os.makedirs(work_dir)
|
||||
wav_path = os.path.join(work_dir, os.path.basename(url))
|
||||
with open(wav_path, 'wb') as fb:
|
||||
fb.write(data)
|
||||
wav_scp_path = os.path.join(work_dir, 'wav.scp')
|
||||
with open(wav_scp_path, 'w') as ft:
|
||||
scp_content = '\t'.join([wav_name, wav_path]) + '\n'
|
||||
ft.writelines(scp_content)
|
||||
wav_scp_path = storage.read(url)
|
||||
return wav_scp_path
|
||||
|
||||
return wav_scp_path
|
||||
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
easyasr>=0.0.2
|
||||
funasr>=0.1.7
|
||||
funasr>=0.2.0
|
||||
|
||||
Reference in New Issue
Block a user