modelscope/pipelines/audio/ans_pipeline.py

import io
from typing import Any, Dict

import librosa
import numpy as np
import soundfile as sf
import torch

from modelscope.metainfo import Pipelines
from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Input, Pipeline
from modelscope.pipelines.builder import PIPELINES
from modelscope.utils.constant import Tasks


def audio_norm(x):
    rms = (x**2).mean()**0.5
    scalar = 10**(-25 / 20) / rms
    x = x * scalar
    pow_x = x**2
    avg_pow_x = pow_x.mean()
    rmsx = pow_x[pow_x > avg_pow_x].mean()**0.5
    scalarx = 10**(-25 / 20) / rmsx
    x = x * scalarx
    return x


@PIPELINES.register_module(
    Tasks.acoustic_noise_suppression,
    module_name=Pipelines.speech_frcrn_ans_cirm_16k)
class ANSPipeline(Pipeline):
    r"""ANS (Acoustic Noise Suppression) Inference Pipeline .

    When invoke the class with pipeline.__call__(), it accept only one parameter:
        inputs(str): the path of wav file
    """
    SAMPLE_RATE = 16000

    def __init__(self, model, **kwargs):
        """
        use `model` and `preprocessor` to create a kws pipeline for prediction
        Args:
            model: model id on modelscope hub.
        """
        super().__init__(model=model, **kwargs)
        self.model.eval()

    def preprocess(self, inputs: Input) -> Dict[str, Any]:
        if isinstance(inputs, bytes):
            data1, fs = sf.read(io.BytesIO(inputs))
        elif isinstance(inputs, str):
            data1, fs = sf.read(inputs)
        else:
            raise TypeError(f'Unsupported type {type(inputs)}.')
        if len(data1.shape) > 1:
            data1 = data1[:, 0]
        if fs != self.SAMPLE_RATE:
            data1 = librosa.resample(data1, fs, self.SAMPLE_RATE)
        data1 = audio_norm(data1)
        data = data1.astype(np.float32)
        inputs = np.reshape(data, [1, data.shape[0]])
        return {'ndarray': inputs, 'nsamples': data.shape[0]}

    def forward(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        ndarray = inputs['ndarray']
        if isinstance(ndarray, torch.Tensor):
            ndarray = ndarray.cpu().numpy()
        nsamples = inputs['nsamples']
        decode_do_segement = False
        window = 16000
        stride = int(window * 0.75)
        print('inputs:{}'.format(ndarray.shape))
        b, t = ndarray.shape  # size()
        if t > window * 120:
            decode_do_segement = True

        if t < window:
            ndarray = np.concatenate(
                [ndarray, np.zeros((ndarray.shape[0], window - t))], 1)
        elif t < window + stride:
            padding = window + stride - t
            print('padding: {}'.format(padding))
            ndarray = np.concatenate(
                [ndarray, np.zeros((ndarray.shape[0], padding))], 1)
        else:
            if (t - window) % stride != 0:
                padding = t - (t - window) // stride * stride
                print('padding: {}'.format(padding))
                ndarray = np.concatenate(
                    [ndarray, np.zeros((ndarray.shape[0], padding))], 1)
        print('inputs after padding:{}'.format(ndarray.shape))
        with torch.no_grad():
            ndarray = torch.from_numpy(np.float32(ndarray)).to(self.device)
            b, t = ndarray.shape
            if decode_do_segement:
                outputs = np.zeros(t)
                give_up_length = (window - stride) // 2
                current_idx = 0
                while current_idx + window <= t:
                    print('current_idx: {}'.format(current_idx))
                    tmp_input = ndarray[:, current_idx:current_idx + window]
                    tmp_output = self.model(
                        tmp_input, )['wav_l2'][0].cpu().numpy()
                    end_index = current_idx + window - give_up_length
                    if current_idx == 0:
                        outputs[current_idx:
                                end_index] = tmp_output[:-give_up_length]
                    else:
                        outputs[current_idx
                                + give_up_length:end_index] = tmp_output[
                                    give_up_length:-give_up_length]
                    current_idx += stride
            else:
                outputs = self.model(ndarray)['wav_l2'][0].cpu().numpy()
        outputs = (outputs[:nsamples] * 32768).astype(np.int16).tobytes()
        return {OutputKeys.OUTPUT_PCM: outputs}

    def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]:
        if 'output_path' in kwargs.keys():
            sf.write(
                kwargs['output_path'],
                np.frombuffer(inputs[OutputKeys.OUTPUT_PCM], dtype=np.int16),
                self.SAMPLE_RATE)
        return inputs
[to #42322933]feat: ANS pipeline can accept bytes as input and adjust processing order to reduce the amount of computation Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9533946 2022-07-27 17:08:51 +08:00			`import io`
[to #42322933] Merge ANS pipeline into master Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9178339 * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * feat: add unittest for ANS pipeline * Merge branch 'master' into dev/ans * add new SoundFile to audio dependency * Merge branch 'master' into dev/ans * use ANS pipeline name from metainfo * Merge branch 'master' into dev/ans * chore: update docstring of ANS module * Merge branch 'master' into dev/ans * refactor: use names from metainfo * refactor: enable ans unittest * refactor: add more log message in unittest 2022-06-28 14:41:08 +08:00			`from typing import Any, Dict`

			`import librosa`
			`import numpy as np`
			`import soundfile as sf`
			`import torch`

			`from modelscope.metainfo import Pipelines`
[to #43112534] finetune support and first case co-contributed with 夕陌&雨泓 * add torch epoch based trainer and dis utils * add hooks including optimizer, lrscheduler, logging, checkpoint, evaluation, time profiling * add torch mdoel base and test * add optimizer and lrscheduler module * add sbert for text classification example * add task_dataset for dataset-level processor Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9338412 2022-07-14 16:25:55 +08:00			`from modelscope.outputs import OutputKeys`
[to #43259593]update abs paths typo Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9491629 2022-07-25 13:29:26 +08:00			`from modelscope.pipelines.base import Input, Pipeline`
			`from modelscope.pipelines.builder import PIPELINES`
[to #42322933] Merge ANS pipeline into master Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9178339 * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * feat: add unittest for ANS pipeline * Merge branch 'master' into dev/ans * add new SoundFile to audio dependency * Merge branch 'master' into dev/ans * use ANS pipeline name from metainfo * Merge branch 'master' into dev/ans * chore: update docstring of ANS module * Merge branch 'master' into dev/ans * refactor: use names from metainfo * refactor: enable ans unittest * refactor: add more log message in unittest 2022-06-28 14:41:08 +08:00			`from modelscope.utils.constant import Tasks`


			`def audio_norm(x):`
			`rms = (x2).mean()0.5`
			`scalar = 10**(-25 / 20) / rms`
			`x = x * scalar`
			`pow_x = x**2`
			`avg_pow_x = pow_x.mean()`
			`rmsx = pow_x[pow_x > avg_pow_x].mean()**0.5`
			`scalarx = 10**(-25 / 20) / rmsx`
			`x = x * scalarx`
			`return x`


			`@PIPELINES.register_module(`
[to #42322933]feat: split speech-signal-process task to subtasks 2022-08-01 20:56:32 +08:00			`Tasks.acoustic_noise_suppression,`
[to #42322933] Merge ANS pipeline into master Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9178339 * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * feat: add unittest for ANS pipeline * Merge branch 'master' into dev/ans * add new SoundFile to audio dependency * Merge branch 'master' into dev/ans * use ANS pipeline name from metainfo * Merge branch 'master' into dev/ans * chore: update docstring of ANS module * Merge branch 'master' into dev/ans * refactor: use names from metainfo * refactor: enable ans unittest * refactor: add more log message in unittest 2022-06-28 14:41:08 +08:00			`module_name=Pipelines.speech_frcrn_ans_cirm_16k)`
			`class ANSPipeline(Pipeline):`
			`r"""ANS (Acoustic Noise Suppression) Inference Pipeline .`

			`When invoke the class with pipeline.__call__(), it accept only one parameter:`
			`inputs(str): the path of wav file`
			`"""`
			`SAMPLE_RATE = 16000`

[to #43112534] taskdataset refine and auto placement for data and model * refine taskdataset interface * add device placement for trainer * add device placement for pipeline * add config checker and fix model placement bug * fix cycling import * refactor model init for translation_pipeline * cv pipelines support kwargs Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9463076 2022-07-23 11:08:43 +08:00			`def __init__(self, model, **kwargs):`
[to #42322933]support model revision Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9300163 2022-07-07 23:00:14 +08:00			`"""`
			use `model` and `preprocessor` to create a kws pipeline for prediction
[to #42322933] Merge ANS pipeline into master Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9178339 * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * feat: add unittest for ANS pipeline * Merge branch 'master' into dev/ans * add new SoundFile to audio dependency * Merge branch 'master' into dev/ans * use ANS pipeline name from metainfo * Merge branch 'master' into dev/ans * chore: update docstring of ANS module * Merge branch 'master' into dev/ans * refactor: use names from metainfo * refactor: enable ans unittest * refactor: add more log message in unittest 2022-06-28 14:41:08 +08:00			`Args:`
			`model: model id on modelscope hub.`
			`"""`
[to #43112534] taskdataset refine and auto placement for data and model * refine taskdataset interface * add device placement for trainer * add device placement for pipeline * add config checker and fix model placement bug * fix cycling import * refactor model init for translation_pipeline * cv pipelines support kwargs Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9463076 2022-07-23 11:08:43 +08:00			`super().__init__(model=model, **kwargs)`
[to #42322933] Merge ANS pipeline into master Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9178339 * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * feat: add unittest for ANS pipeline * Merge branch 'master' into dev/ans * add new SoundFile to audio dependency * Merge branch 'master' into dev/ans * use ANS pipeline name from metainfo * Merge branch 'master' into dev/ans * chore: update docstring of ANS module * Merge branch 'master' into dev/ans * refactor: use names from metainfo * refactor: enable ans unittest * refactor: add more log message in unittest 2022-06-28 14:41:08 +08:00			`self.model.eval()`

			`def preprocess(self, inputs: Input) -> Dict[str, Any]:`
[to #42322933]feat: ANS pipeline can accept bytes as input and adjust processing order to reduce the amount of computation Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9533946 2022-07-27 17:08:51 +08:00			`if isinstance(inputs, bytes):`
[to #42322933] fix ans_pipeline bug and add test 2022-07-27 18:45:17 +08:00			`data1, fs = sf.read(io.BytesIO(inputs))`
[to #42322933]feat: ANS pipeline can accept bytes as input and adjust processing order to reduce the amount of computation Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9533946 2022-07-27 17:08:51 +08:00			`elif isinstance(inputs, str):`
[to #42322933] fix ans_pipeline bug and add test 2022-07-27 18:45:17 +08:00			`data1, fs = sf.read(inputs)`
[to #42322933]feat: ANS pipeline can accept bytes as input and adjust processing order to reduce the amount of computation Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9533946 2022-07-27 17:08:51 +08:00			`else:`
			`raise TypeError(f'Unsupported type {type(inputs)}.')`
[to #42322933] fix ans_pipeline bug and add test 2022-07-27 18:45:17 +08:00			`if len(data1.shape) > 1:`
			`data1 = data1[:, 0]`
[to #42322933] Merge ANS pipeline into master Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9178339 * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * feat: add unittest for ANS pipeline * Merge branch 'master' into dev/ans * add new SoundFile to audio dependency * Merge branch 'master' into dev/ans * use ANS pipeline name from metainfo * Merge branch 'master' into dev/ans * chore: update docstring of ANS module * Merge branch 'master' into dev/ans * refactor: use names from metainfo * refactor: enable ans unittest * refactor: add more log message in unittest 2022-06-28 14:41:08 +08:00			`if fs != self.SAMPLE_RATE:`
			`data1 = librosa.resample(data1, fs, self.SAMPLE_RATE)`
[to #42322933]feat: ANS pipeline can accept bytes as input and adjust processing order to reduce the amount of computation Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9533946 2022-07-27 17:08:51 +08:00			`data1 = audio_norm(data1)`
[to #42322933] Merge ANS pipeline into master Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9178339 * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * feat: add unittest for ANS pipeline * Merge branch 'master' into dev/ans * add new SoundFile to audio dependency * Merge branch 'master' into dev/ans * use ANS pipeline name from metainfo * Merge branch 'master' into dev/ans * chore: update docstring of ANS module * Merge branch 'master' into dev/ans * refactor: use names from metainfo * refactor: enable ans unittest * refactor: add more log message in unittest 2022-06-28 14:41:08 +08:00			`data = data1.astype(np.float32)`
			`inputs = np.reshape(data, [1, data.shape[0]])`
			`return {'ndarray': inputs, 'nsamples': data.shape[0]}`

			`def forward(self, inputs: Dict[str, Any]) -> Dict[str, Any]:`
			`ndarray = inputs['ndarray']`
[to #43112534] taskdataset refine and auto placement for data and model * refine taskdataset interface * add device placement for trainer * add device placement for pipeline * add config checker and fix model placement bug * fix cycling import * refactor model init for translation_pipeline * cv pipelines support kwargs Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9463076 2022-07-23 11:08:43 +08:00			`if isinstance(ndarray, torch.Tensor):`
			`ndarray = ndarray.cpu().numpy()`
[to #42322933] Merge ANS pipeline into master Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9178339 * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * feat: add unittest for ANS pipeline * Merge branch 'master' into dev/ans * add new SoundFile to audio dependency * Merge branch 'master' into dev/ans * use ANS pipeline name from metainfo * Merge branch 'master' into dev/ans * chore: update docstring of ANS module * Merge branch 'master' into dev/ans * refactor: use names from metainfo * refactor: enable ans unittest * refactor: add more log message in unittest 2022-06-28 14:41:08 +08:00			`nsamples = inputs['nsamples']`
			`decode_do_segement = False`
			`window = 16000`
			`stride = int(window * 0.75)`
			`print('inputs:{}'.format(ndarray.shape))`
			`b, t = ndarray.shape # size()`
			`if t > window * 120:`
			`decode_do_segement = True`

			`if t < window:`
			`ndarray = np.concatenate(`
			`[ndarray, np.zeros((ndarray.shape[0], window - t))], 1)`
			`elif t < window + stride:`
			`padding = window + stride - t`
			`print('padding: {}'.format(padding))`
			`ndarray = np.concatenate(`
			`[ndarray, np.zeros((ndarray.shape[0], padding))], 1)`
			`else:`
			`if (t - window) % stride != 0:`
			`padding = t - (t - window) // stride * stride`
			`print('padding: {}'.format(padding))`
			`ndarray = np.concatenate(`
			`[ndarray, np.zeros((ndarray.shape[0], padding))], 1)`
			`print('inputs after padding:{}'.format(ndarray.shape))`
			`with torch.no_grad():`
			`ndarray = torch.from_numpy(np.float32(ndarray)).to(self.device)`
			`b, t = ndarray.shape`
			`if decode_do_segement:`
			`outputs = np.zeros(t)`
			`give_up_length = (window - stride) // 2`
			`current_idx = 0`
			`while current_idx + window <= t:`
			`print('current_idx: {}'.format(current_idx))`
			`tmp_input = ndarray[:, current_idx:current_idx + window]`
			`tmp_output = self.model(`
			`tmp_input, )['wav_l2'][0].cpu().numpy()`
			`end_index = current_idx + window - give_up_length`
			`if current_idx == 0:`
			`outputs[current_idx:`
			`end_index] = tmp_output[:-give_up_length]`
			`else:`
			`outputs[current_idx`
			`+ give_up_length:end_index] = tmp_output[`
			`give_up_length:-give_up_length]`
			`current_idx += stride`
			`else:`
			`outputs = self.model(ndarray)['wav_l2'][0].cpu().numpy()`
[to #42322933] feat: change ans&aec pipeline output type to bytes 2022-08-03 22:03:20 +08:00			`outputs = (outputs[:nsamples] * 32768).astype(np.int16).tobytes()`
			`return {OutputKeys.OUTPUT_PCM: outputs}`
[to #42322933] Merge ANS pipeline into master Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9178339 * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * feat: add unittest for ANS pipeline * Merge branch 'master' into dev/ans * add new SoundFile to audio dependency * Merge branch 'master' into dev/ans * use ANS pipeline name from metainfo * Merge branch 'master' into dev/ans * chore: update docstring of ANS module * Merge branch 'master' into dev/ans * refactor: use names from metainfo * refactor: enable ans unittest * refactor: add more log message in unittest 2022-06-28 14:41:08 +08:00
			`def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]:`
			`if 'output_path' in kwargs.keys():`
[to #42322933] feat: change ans&aec pipeline output type to bytes 2022-08-03 22:03:20 +08:00			`sf.write(`
			`kwargs['output_path'],`
			`np.frombuffer(inputs[OutputKeys.OUTPUT_PCM], dtype=np.int16),`
			`self.SAMPLE_RATE)`
[to #42322933] Merge ANS pipeline into master Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9178339 * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * feat: add unittest for ANS pipeline * Merge branch 'master' into dev/ans * add new SoundFile to audio dependency * Merge branch 'master' into dev/ans * use ANS pipeline name from metainfo * Merge branch 'master' into dev/ans * chore: update docstring of ANS module * Merge branch 'master' into dev/ans * refactor: use names from metainfo * refactor: enable ans unittest * refactor: add more log message in unittest 2022-06-28 14:41:08 +08:00			`return inputs`