[to #42322933] fix bugs: audio fs, asr & sv demo services

(cherry picked from commit 1b300b2f3e)
This commit is contained in:
wucong.lyb
2023-02-13 11:46:32 +00:00
committed by Zhicheng Zhang
parent 38268a8048
commit 2bec9603b1
6 changed files with 19 additions and 21 deletions

View File

@@ -203,13 +203,11 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
code_base = self.cmd['code_base']
self.recog_type = recog_type
self.audio_format = audio_format
self.audio_fs = audio_fs
self.audio_fs = None
checking_audio_fs = None
self.raw_inputs = None
if output_dir is not None:
self.cmd['output_dir'] = output_dir
if audio_fs is not None:
self.cmd['fs']['audio_fs'] = audio_fs
self.cmd['param_dict'] = param_dict
if code_base == 'funasr':
@@ -254,6 +252,10 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
self.audio_in, self.audio_format)
if checking_audio_fs is not None:
self.audio_fs = checking_audio_fs
if audio_fs is not None:
self.cmd['fs']['audio_fs'] = audio_fs
else:
self.cmd['fs']['audio_fs'] = self.audio_fs
output = self.preprocessor.forward(self.model_cfg, self.recog_type,
self.audio_format, self.audio_in,
@@ -293,7 +295,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
'seed': 0,
'ngram_weight': 0.9,
'nbest': 1,
'num_workers': 1,
'num_workers': 0,
'vad_infer_config': None,
'vad_model_file': None,
'vad_cmvn_file': None,

View File

@@ -156,7 +156,7 @@ class LanguageModelPipeline(Pipeline):
'dtype': 'float32',
'ngpu': 1, # 0: only CPU, ngpu>=1: gpu number if cuda is available
'seed': 0,
'num_workers': 1,
'num_workers': 0,
'log_level': 'ERROR',
'key_file': None,
'train_config': lm_model_config,

View File

@@ -107,7 +107,7 @@ class PunctuationProcessingPipeline(Pipeline):
'dtype': 'float32',
'ngpu': 1, # 0: only CPU, ngpu>=1: gpu number if cuda is available
'seed': 0,
'num_workers': 1,
'num_workers': 0,
'log_level': 'ERROR',
'key_file': None,
'train_config': punc_model_config,

View File

@@ -168,8 +168,13 @@ class SpeakerVerificationPipeline(Pipeline):
else:
audio_scp_1, audio_scp_2 = generate_sv_scp_from_url(
audio_in)
data_cmd = [(audio_scp_1, 'speech', 'sound'),
(audio_scp_2, 'ref_speech', 'sound')]
if isinstance(audio_scp_1, bytes) and isinstance(
audio_scp_2, bytes):
data_cmd = [(audio_scp_1, 'speech', 'bytes'),
(audio_scp_2, 'ref_speech', 'bytes')]
else:
data_cmd = [(audio_scp_1, 'speech', 'sound'),
(audio_scp_2, 'ref_speech', 'sound')]
# for raw bytes inputs
elif isinstance(audio_in[0], bytes):
data_cmd = [(audio_in[0], 'speech', 'bytes'),

View File

@@ -179,7 +179,7 @@ def generate_scp_from_url(url: str, key: str = None):
if os.path.exists(url) and (url.lower().endswith('.wav')):
wav_scp_path = url
return wav_scp_path, raw_inputs
# for wav url, download and generate wav.scp
# for wav url, download bytes data
result = urlparse(url)
if result.scheme is not None and len(result.scheme) > 0:
storage = HTTPStorage()
@@ -243,17 +243,8 @@ def generate_scp_for_sv(url: str, key: str = None):
result = urlparse(url)
if result.scheme is not None and len(result.scheme) > 0:
storage = HTTPStorage()
data = storage.read(url)
work_dir = tempfile.TemporaryDirectory().name
if not os.path.exists(work_dir):
os.makedirs(work_dir)
wav_path = os.path.join(work_dir, os.path.basename(url))
with open(wav_path, 'wb') as fb:
fb.write(data)
wav_scp_path = os.path.join(work_dir, 'wav.scp')
with open(wav_scp_path, 'w') as ft:
scp_content = '\t'.join([wav_name, wav_path]) + '\n'
ft.writelines(scp_content)
wav_scp_path = storage.read(url)
return wav_scp_path
return wav_scp_path

View File

@@ -1,2 +1,2 @@
easyasr>=0.0.2
funasr>=0.1.7
funasr>=0.2.0