Files
modelscope/tests/pipelines/test_speech_signal_process.py
Mashiro e71057cacf feat(audio/ans): Add ZipEnhancer and related layers for acoustic nois… (#1019)
* feat(audio): Add acoustic noise suppression pipeline and tests for zipenhancer model

* Introduce `ZipEnhancer` module and associated layers (`ZipEnhancerLayer`, `Generator`, `ZipFormer`, ...).
* Add `speech_zipenhancer_ans_multiloss_16k_base` pipeline for `ZipEnhancer` module.
* Add new test cases and update metainfo.


Co-authored-by: Haoxu Wang <wanghaoxu.whx@alibaba-inc.com>
2024-10-24 15:38:09 +08:00

186 lines
8.1 KiB
Python

# Copyright (c) Alibaba, Inc. and its affiliates.
import os.path
import unittest
from modelscope.metainfo import Pipelines
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.test_utils import test_level
NEAREND_MIC_FILE = 'data/test/audios/nearend_mic.wav'
FAREND_SPEECH_FILE = 'data/test/audios/farend_speech.wav'
NEAREND_MIC_URL = 'https://modelscope.oss-cn-beijing.aliyuncs.com/' \
'test/audios/nearend_mic.wav'
FAREND_SPEECH_URL = 'https://modelscope.oss-cn-beijing.aliyuncs.com/' \
'test/audios/farend_speech.wav'
NOISE_SPEECH_FILE = 'data/test/audios/speech_with_noise.wav'
NOISE_SPEECH_FILE_48K = 'data/test/audios/speech_with_noise_48k.wav'
NOISE_SPEECH_FILE_48K_PCM = 'data/test/audios/speech_with_noise_48k.PCM'
NOISE_SPEECH_URL = 'https://modelscope.oss-cn-beijing.aliyuncs.com/' \
'test/audios/speech_with_noise.wav'
@unittest.skip('For librosa numpy compatible')
class SpeechSignalProcessTest(unittest.TestCase):
def setUp(self) -> None:
pass
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_aec(self):
model_id = 'damo/speech_dfsmn_aec_psm_16k'
input = {
'nearend_mic': os.path.join(os.getcwd(), NEAREND_MIC_FILE),
'farend_speech': os.path.join(os.getcwd(), FAREND_SPEECH_FILE)
}
aec = pipeline(Tasks.acoustic_echo_cancellation, model=model_id)
output_path = os.path.abspath('output.wav')
aec(input, output_path=output_path)
print(f'Processed audio saved to {output_path}')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_aec_url(self):
model_id = 'damo/speech_dfsmn_aec_psm_16k'
input = {
'nearend_mic': NEAREND_MIC_URL,
'farend_speech': FAREND_SPEECH_URL
}
aec = pipeline(Tasks.acoustic_echo_cancellation, model=model_id)
output_path = os.path.abspath('output.wav')
aec(input, output_path=output_path)
print(f'Processed audio saved to {output_path}')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_aec_bytes(self):
model_id = 'damo/speech_dfsmn_aec_psm_16k'
input = {}
with open(os.path.join(os.getcwd(), NEAREND_MIC_FILE), 'rb') as f:
input['nearend_mic'] = f.read()
with open(os.path.join(os.getcwd(), FAREND_SPEECH_FILE), 'rb') as f:
input['farend_speech'] = f.read()
aec = pipeline(
Tasks.acoustic_echo_cancellation,
model=model_id,
pipeline_name=Pipelines.speech_dfsmn_aec_psm_16k)
output_path = os.path.abspath('output.wav')
aec(input, output_path=output_path)
print(f'Processed audio saved to {output_path}')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_aec_tuple_bytes(self):
model_id = 'damo/speech_dfsmn_aec_psm_16k'
with open(os.path.join(os.getcwd(), NEAREND_MIC_FILE), 'rb') as f:
nearend_bytes = f.read()
with open(os.path.join(os.getcwd(), FAREND_SPEECH_FILE), 'rb') as f:
farend_bytes = f.read()
inputs = (nearend_bytes, farend_bytes)
aec = pipeline(
Tasks.acoustic_echo_cancellation,
model=model_id,
pipeline_name=Pipelines.speech_dfsmn_aec_psm_16k)
output_path = os.path.abspath('output.wav')
aec(inputs, output_path=output_path)
print(f'Processed audio saved to {output_path}')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_frcrn_ans(self):
model_id = 'damo/speech_frcrn_ans_cirm_16k'
ans = pipeline(Tasks.acoustic_noise_suppression, model=model_id)
output_path = os.path.abspath('output.wav')
ans(os.path.join(os.getcwd(), NOISE_SPEECH_FILE),
output_path=output_path)
print(f'Processed audio saved to {output_path}')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_ans_url(self):
model_id = 'damo/speech_frcrn_ans_cirm_16k'
ans = pipeline(Tasks.acoustic_noise_suppression, model=model_id)
output_path = os.path.abspath('output.wav')
ans(NOISE_SPEECH_URL, output_path=output_path)
print(f'Processed audio saved to {output_path}')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_ans_bytes(self):
model_id = 'damo/speech_frcrn_ans_cirm_16k'
ans = pipeline(
Tasks.acoustic_noise_suppression,
model=model_id,
pipeline_name=Pipelines.speech_frcrn_ans_cirm_16k)
output_path = os.path.abspath('output.wav')
with open(os.path.join(os.getcwd(), NOISE_SPEECH_FILE), 'rb') as f:
data = f.read()
ans(data, output_path=output_path)
print(f'Processed audio saved to {output_path}')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_dfsmn_ans(self):
model_id = 'damo/speech_dfsmn_ans_psm_48k_causal'
ans = pipeline(Tasks.acoustic_noise_suppression, model=model_id)
output_path = os.path.abspath('output.wav')
ans(os.path.join(os.getcwd(), NOISE_SPEECH_FILE_48K),
output_path=output_path)
print(f'Processed audio saved to {output_path}')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_dfsmn_ans_bytes(self):
model_id = 'damo/speech_dfsmn_ans_psm_48k_causal'
ans = pipeline(Tasks.acoustic_noise_suppression, model=model_id)
output_path = os.path.abspath('output.wav')
with open(os.path.join(os.getcwd(), NOISE_SPEECH_FILE_48K), 'rb') as f:
data = f.read()
ans(data, output_path=output_path)
print(f'Processed audio saved to {output_path}')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_dfsmn_ans_stream(self):
model_id = 'damo/speech_dfsmn_ans_psm_48k_causal'
ans = pipeline(
Tasks.acoustic_noise_suppression, model=model_id, stream_mode=True)
with open(os.path.join(os.getcwd(), NOISE_SPEECH_FILE_48K_PCM),
'rb') as f:
block_size = 3840
audio = f.read(block_size)
with open('output.pcm', 'wb') as w:
while len(audio) >= block_size:
result = ans(audio)
pcm = result[OutputKeys.OUTPUT_PCM]
w.write(pcm)
audio = f.read(block_size)
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_zipenhancer_ans(self):
model_id = 'damo/speech_zipenhancer_ans_multiloss_16k_base'
ans = pipeline(Tasks.acoustic_noise_suppression, model=model_id)
output_path = os.path.abspath('output.wav')
ans(os.path.join(os.getcwd(), NOISE_SPEECH_FILE),
output_path=output_path)
print(f'Processed audio saved to {output_path}')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_zipenhancer_ans_url(self):
model_id = 'damo/speech_zipenhancer_ans_multiloss_16k_base'
ans = pipeline(Tasks.acoustic_noise_suppression, model=model_id)
output_path = os.path.abspath('output.wav')
ans(NOISE_SPEECH_URL, output_path=output_path)
print(f'Processed audio saved to {output_path}')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_zipenhancer_ans_bytes(self):
model_id = 'damo/speech_zipenhancer_ans_multiloss_16k_base'
ans = pipeline(
Tasks.acoustic_noise_suppression,
model=model_id,
pipeline_name=Pipelines.speech_zipenhancer_ans_multiloss_16k_base)
output_path = os.path.abspath('output.wav')
with open(os.path.join(os.getcwd(), NOISE_SPEECH_FILE), 'rb') as f:
data = f.read()
ans(data, output_path=output_path)
print(f'Processed audio saved to {output_path}')
if __name__ == '__main__':
unittest.main()