From 3a78e32eb25eda188aa508e8f2a103d032cf2ae6 Mon Sep 17 00:00:00 2001 From: "wenmeng.zwm" Date: Mon, 13 Jun 2022 18:30:29 +0800 Subject: [PATCH 01/16] Revert "[to #42322933]formalize image matting" This reverts commit de3ea0db5414872ef4262195e1f10c634b5a6226. --- modelscope/pipelines/cv/image_matting_pipeline.py | 4 ++-- modelscope/utils/constant.py | 9 --------- tests/pipelines/test_image_matting.py | 5 ++--- 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/modelscope/pipelines/cv/image_matting_pipeline.py b/modelscope/pipelines/cv/image_matting_pipeline.py index 3e962d85..6f3ff5f5 100644 --- a/modelscope/pipelines/cv/image_matting_pipeline.py +++ b/modelscope/pipelines/cv/image_matting_pipeline.py @@ -7,7 +7,7 @@ import PIL from modelscope.pipelines.base import Input from modelscope.preprocessors import load_image -from modelscope.utils.constant import TF_GRAPH_FILE, Tasks +from modelscope.utils.constant import Tasks from modelscope.utils.logger import get_logger from ..base import Pipeline from ..builder import PIPELINES @@ -24,7 +24,7 @@ class ImageMattingPipeline(Pipeline): import tensorflow as tf if tf.__version__ >= '2.0': tf = tf.compat.v1 - model_path = osp.join(self.model, TF_GRAPH_FILE) + model_path = osp.join(self.model, 'matting_person.pb') config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index c51e2445..0d0f2492 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -75,12 +75,3 @@ class Hubs(object): # in order to avoid conflict with huggingface # config file we use maas_config instead CONFIGFILE = 'maas_config.json' - -README_FILE = 'README.md' -TF_SAVED_MODEL_FILE = 'saved_model.pb' -TF_GRAPH_FILE = 'tf_graph.pb' -TF_CHECKPOINT_FOLDER = 'tf_ckpts' -TF_CHECKPOINT_FILE = 'checkpoint' -TORCH_MODEL_FILE = 'pytorch_model.bin' -TENSORFLOW = 'tensorflow' -PYTORCH = 'pytorch' diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py index 69195bd1..53006317 100644 --- a/tests/pipelines/test_image_matting.py +++ b/tests/pipelines/test_image_matting.py @@ -16,15 +16,14 @@ from modelscope.utils.hub import get_model_cache_dir class ImageMattingTest(unittest.TestCase): def setUp(self) -> None: - self.model_id = 'damo/cv_unet_image-matting_damo' + self.model_id = 'damo/image-matting-person' # switch to False if downloading everytime is not desired purge_cache = True if purge_cache: shutil.rmtree( get_model_cache_dir(self.model_id), ignore_errors=True) - @unittest.skip('deprecated, download model from model hub instead') - def test_run_with_direct_file_download(self): + def test_run(self): model_path = 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs' \ '.com/data/test/maas/image_matting/matting_person.pb' with tempfile.TemporaryDirectory() as tmp_dir: From 8a030ead7271a3d65508ee1f6cf0404e629bcff6 Mon Sep 17 00:00:00 2001 From: "wenmeng.zwm" Date: Mon, 13 Jun 2022 19:44:34 +0800 Subject: [PATCH 02/16] [to #42362853] feat: rename config to configuration and remove repeated task fileds 1. rename maas_config to configuration 2. remove task field image and video, using cv instead Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9010802 --- .../{config.json => configuration.json} | 0 .../examples/{config.py => configuration.py} | 0 .../{config.yaml => configuration.yaml} | 0 modelscope/pipelines/util.py | 35 +++++++++++-------- modelscope/preprocessors/image.py | 2 +- modelscope/utils/config.py | 12 +++---- modelscope/utils/constant.py | 8 ++--- tests/utils/test_config.py | 10 +++--- 8 files changed, 36 insertions(+), 31 deletions(-) rename configs/examples/{config.json => configuration.json} (100%) rename configs/examples/{config.py => configuration.py} (100%) rename configs/examples/{config.yaml => configuration.yaml} (100%) diff --git a/configs/examples/config.json b/configs/examples/configuration.json similarity index 100% rename from configs/examples/config.json rename to configs/examples/configuration.json diff --git a/configs/examples/config.py b/configs/examples/configuration.py similarity index 100% rename from configs/examples/config.py rename to configs/examples/configuration.py diff --git a/configs/examples/config.yaml b/configs/examples/configuration.yaml similarity index 100% rename from configs/examples/config.yaml rename to configs/examples/configuration.yaml diff --git a/modelscope/pipelines/util.py b/modelscope/pipelines/util.py index caef6b22..43a7ac5a 100644 --- a/modelscope/pipelines/util.py +++ b/modelscope/pipelines/util.py @@ -5,8 +5,22 @@ from typing import List, Union import json from maas_hub.file_download import model_file_download +from matplotlib.pyplot import get +from modelscope.utils.config import Config from modelscope.utils.constant import CONFIGFILE +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +def is_config_has_model(cfg_file): + try: + cfg = Config.from_file(cfg_file) + return hasattr(cfg, 'model') + except Exception as e: + logger.error(f'parse config file {cfg_file} failed: {e}') + return False def is_model_name(model: Union[str, List]): @@ -15,24 +29,17 @@ def is_model_name(model: Union[str, List]): def is_model_name_impl(model): if osp.exists(model): - if osp.exists(osp.join(model, CONFIGFILE)): - return True + cfg_file = osp.join(model, CONFIGFILE) + if osp.exists(cfg_file): + return is_config_has_model(cfg_file) else: return False else: - # try: - # cfg_file = model_file_download(model, CONFIGFILE) - # except Exception: - # cfg_file = None - # TODO @wenmeng.zwm use exception instead of - # following tricky logic - cfg_file = model_file_download(model, CONFIGFILE) - with open(cfg_file, 'r') as infile: - cfg = json.load(infile) - if 'Code' in cfg: + try: + cfg_file = model_file_download(model, CONFIGFILE) + return is_config_has_model(cfg_file) + except Exception: return False - else: - return True if isinstance(model, str): return is_model_name_impl(model) diff --git a/modelscope/preprocessors/image.py b/modelscope/preprocessors/image.py index 142f9484..6bd8aed5 100644 --- a/modelscope/preprocessors/image.py +++ b/modelscope/preprocessors/image.py @@ -9,7 +9,7 @@ from modelscope.utils.constant import Fields from .builder import PREPROCESSORS -@PREPROCESSORS.register_module(Fields.image) +@PREPROCESSORS.register_module(Fields.cv) class LoadImage: """Load an image from file or url. Added or updated keys are "filename", "img", "img_shape", diff --git a/modelscope/utils/config.py b/modelscope/utils/config.py index d0f3f657..df9e38fd 100644 --- a/modelscope/utils/config.py +++ b/modelscope/utils/config.py @@ -74,17 +74,17 @@ class Config: {'c': [1, 2, 3], 'd': 'dd'} >>> cfg.b.d 'dd' - >>> cfg = Config.from_file('configs/examples/config.json') + >>> cfg = Config.from_file('configs/examples/configuration.json') >>> cfg.filename - 'configs/examples/config.json' + 'configs/examples/configuration.json' >>> cfg.b {'c': [1, 2, 3], 'd': 'dd'} - >>> cfg = Config.from_file('configs/examples/config.py') + >>> cfg = Config.from_file('configs/examples/configuration.py') >>> cfg.filename - "configs/examples/config.py" - >>> cfg = Config.from_file('configs/examples/config.yaml') + "configs/examples/configuration.py" + >>> cfg = Config.from_file('configs/examples/configuration.yaml') >>> cfg.filename - "configs/examples/config.yaml" + "configs/examples/configuration.yaml" """ @staticmethod diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 0d0f2492..fa30dd2a 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -4,8 +4,8 @@ class Fields(object): """ Names for different application fields """ - image = 'image' - video = 'video' + # image = 'image' + # video = 'video' cv = 'cv' nlp = 'nlp' audio = 'audio' @@ -72,6 +72,4 @@ class Hubs(object): # configuration filename -# in order to avoid conflict with huggingface -# config file we use maas_config instead -CONFIGFILE = 'maas_config.json' +CONFIGFILE = 'configuration.json' diff --git a/tests/utils/test_config.py b/tests/utils/test_config.py index 48f1d4a8..fb7044e8 100644 --- a/tests/utils/test_config.py +++ b/tests/utils/test_config.py @@ -14,25 +14,25 @@ obj = {'a': 1, 'b': {'c': [1, 2, 3], 'd': 'dd'}} class ConfigTest(unittest.TestCase): def test_json(self): - config_file = 'configs/examples/config.json' + config_file = 'configs/examples/configuration.json' cfg = Config.from_file(config_file) self.assertEqual(cfg.a, 1) self.assertEqual(cfg.b, obj['b']) def test_yaml(self): - config_file = 'configs/examples/config.yaml' + config_file = 'configs/examples/configuration.yaml' cfg = Config.from_file(config_file) self.assertEqual(cfg.a, 1) self.assertEqual(cfg.b, obj['b']) def test_py(self): - config_file = 'configs/examples/config.py' + config_file = 'configs/examples/configuration.py' cfg = Config.from_file(config_file) self.assertEqual(cfg.a, 1) self.assertEqual(cfg.b, obj['b']) def test_dump(self): - config_file = 'configs/examples/config.py' + config_file = 'configs/examples/configuration.py' cfg = Config.from_file(config_file) self.assertEqual(cfg.a, 1) self.assertEqual(cfg.b, obj['b']) @@ -53,7 +53,7 @@ class ConfigTest(unittest.TestCase): self.assertEqual(yaml_str, infile.read()) def test_to_dict(self): - config_file = 'configs/examples/config.json' + config_file = 'configs/examples/configuration.json' cfg = Config.from_file(config_file) d = cfg.to_dict() print(d) From 67086e26f94edad73ef0dc40ecc56ed96a361233 Mon Sep 17 00:00:00 2001 From: "wenmeng.zwm" Date: Tue, 14 Jun 2022 16:29:12 +0800 Subject: [PATCH 03/16] [to #42362932] feat: docker_support * add dockerfile * uninstall opencv-python-headless * update develop doc Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9020302 --- Makefile.docker | 66 ++++++++++++++++++++++++++++++ docker/.dockerignore | 4 ++ docker/pytorch.dockerfile | 37 +++++++++++++++++ docker/rcfiles/pip.conf.tsinghua | 2 + docker/rcfiles/sources.list.aliyun | 25 +++++++++++ docker/rcfiles/user.vimrc | 10 +++++ docker/scripts/install_libs.sh | 12 ++++++ docs/source/develop.md | 19 +++++++++ 8 files changed, 175 insertions(+) create mode 100644 Makefile.docker create mode 100644 docker/.dockerignore create mode 100644 docker/pytorch.dockerfile create mode 100644 docker/rcfiles/pip.conf.tsinghua create mode 100644 docker/rcfiles/sources.list.aliyun create mode 100644 docker/rcfiles/user.vimrc create mode 100644 docker/scripts/install_libs.sh diff --git a/Makefile.docker b/Makefile.docker new file mode 100644 index 00000000..bbac840e --- /dev/null +++ b/Makefile.docker @@ -0,0 +1,66 @@ +DOCKER_REGISTRY = registry.cn-shanghai.aliyuncs.com +DOCKER_ORG = modelscope +DOCKER_IMAGE = modelscope +DOCKER_FULL_NAME = $(DOCKER_REGISTRY)/$(DOCKER_ORG)/$(DOCKER_IMAGE) + +# CUDA_VERSION = 11.3 +# CUDNN_VERSION = 8 +BASE_RUNTIME = reg.docker.alibaba-inc.com/pai-dlc/pytorch-training:1.10PAI-gpu-py36-cu113-ubuntu18.04 +BASE_DEVEL = reg.docker.alibaba-inc.com/pai-dlc/pytorch-training:1.10PAI-gpu-py36-cu113-ubuntu18.04 + + +MODELSCOPE_VERSION = $(shell git describe --tags --always) + +# Can be either official / dev +BUILD_TYPE = dev +BUILD_PROGRESS = auto +BUILD_ARGS = --build-arg BASE_IMAGE=$(BASE_IMAGE) + +EXTRA_DOCKER_BUILD_FLAGS ?= --network=host +# DOCKER_BUILD = DOCKER_BUILDKIT=1 \ +# docker build \ +# --progress=$(BUILD_PROGRESS) \ +# $(EXTRA_DOCKER_BUILD_FLAGS) \ +# --target $(BUILD_TYPE) \ +# -t $(DOCKER_FULL_NAME):$(DOCKER_TAG) \ +# $(BUILD_ARGS) \ +# -f docker/pytorch.dockerfile . +DOCKER_BUILD = DOCKER_BUILDKIT=1 \ + docker build \ + $(EXTRA_DOCKER_BUILD_FLAGS) \ + -t $(DOCKER_FULL_NAME):$(DOCKER_TAG) \ + $(BUILD_ARGS) \ + -f docker/pytorch.dockerfile . +DOCKER_PUSH = docker push $(DOCKER_FULL_NAME):$(DOCKER_TAG) + +.PHONY: all +all: devel-image + +.PHONY: devel-image +devel-image: BASE_IMAGE := $(BASE_DEVEL) +devel-image: DOCKER_TAG := $(MODELSCOPE_VERSION)-devel +devel-image: + $(DOCKER_BUILD) + +.PHONY: devel-push +devel-push: BASE_IMAGE := $(BASE_DEVEL) +devel-push: DOCKER_TAG := $(MODELSCOPE_VERSION)-devel +devel-push: + $(DOCKER_PUSH) + +.PHONY: runtime-image +runtime-image: BASE_IMAGE := $(BASE_RUNTIME) +runtime-image: DOCKER_TAG := $(MODELSCOPE_VERSION)-runtime +runtime-image: + $(DOCKER_BUILD) + docker tag $(DOCKER_FULL_NAME):$(DOCKER_TAG) $(DOCKER_FULL_NAME):latest + +.PHONY: runtime-push +runtime-push: BASE_IMAGE := $(BASE_RUNTIME) +runtime-push: DOCKER_TAG := $(MODELSCOPE_VERSION)-runtime +runtime-push: + $(DOCKER_PUSH) + +.PHONY: clean +clean: + -docker rmi -f $(shell docker images -q $(DOCKER_FULL_NAME)) diff --git a/docker/.dockerignore b/docker/.dockerignore new file mode 100644 index 00000000..14284cb6 --- /dev/null +++ b/docker/.dockerignore @@ -0,0 +1,4 @@ +*.sh +*.md +*.dockerfile +*.zip diff --git a/docker/pytorch.dockerfile b/docker/pytorch.dockerfile new file mode 100644 index 00000000..73c35af1 --- /dev/null +++ b/docker/pytorch.dockerfile @@ -0,0 +1,37 @@ +# syntax = docker/dockerfile:experimental +# +# NOTE: To build this you will need a docker version > 18.06 with +# experimental enabled and DOCKER_BUILDKIT=1 +# +# If you do not use buildkit you are not going to have a good time +# +# For reference: +# https://docs.docker.com/develop/develop-images/build_enhancements/ + +#ARG BASE_IMAGE=reg.docker.alibaba-inc.com/pai-dlc/pytorch-training:1.10PAI-gpu-py36-cu113-ubuntu18.04 +#FROM ${BASE_IMAGE} as dev-base + +FROM reg.docker.alibaba-inc.com/pai-dlc/pytorch-training:1.10PAI-gpu-py36-cu113-ubuntu18.04 as dev-base +# config pip source +RUN mkdir /root/.pip +COPY docker/rcfiles/pip.conf.tsinghua /root/.pip/pip.conf + +# install modelscope and its python env +WORKDIR /opt/modelscope +COPY . . +RUN pip install -r requirements.txt +# RUN --mount=type=cache,target=/opt/ccache \ +# python setup.py install + +# opencv-python-headless conflict with opencv-python installed +RUN python setup.py install \ + && pip uninstall -y opencv-python-headless + +# prepare modelscope libs +COPY docker/scripts/install_libs.sh /tmp/ +RUN bash /tmp/install_libs.sh && \ + rm -rf /tmp/install_libs.sh + +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/modelscope/lib64 + +WORKDIR /workspace diff --git a/docker/rcfiles/pip.conf.tsinghua b/docker/rcfiles/pip.conf.tsinghua new file mode 100644 index 00000000..4242075a --- /dev/null +++ b/docker/rcfiles/pip.conf.tsinghua @@ -0,0 +1,2 @@ +[global] +index-url=https://pypi.tuna.tsinghua.edu.cn/simple diff --git a/docker/rcfiles/sources.list.aliyun b/docker/rcfiles/sources.list.aliyun new file mode 100644 index 00000000..120bb1f1 --- /dev/null +++ b/docker/rcfiles/sources.list.aliyun @@ -0,0 +1,25 @@ +deb http://mirrors.aliyun.com/ubuntu/ bionic main restricted +# deb-src http://mirrors.aliyun.com/ubuntu/ bionic main restricted + +deb http://mirrors.aliyun.com/ubuntu/ bionic-updates main restricted +# deb-src http://mirrors.aliyun.com/ubuntu/ bionic-updates main restricted + +deb http://mirrors.aliyun.com/ubuntu/ bionic universe +# deb-src http://mirrors.aliyun.com/ubuntu/ bionic universe +deb http://mirrors.aliyun.com/ubuntu/ bionic-updates universe +# deb-src http://mirrors.aliyun.com/ubuntu/ bionic-updates universe + +deb http://mirrors.aliyun.com/ubuntu/ bionic multiverse +# deb-src http://mirrors.aliyun.com/ubuntu/ bionic multiverse +deb http://mirrors.aliyun.com/ubuntu/ bionic-updates multiverse +# deb-src http://mirrors.aliyun.com/ubuntu/ bionic-updates multiverse + +deb http://mirrors.aliyun.com/ubuntu/ bionic-backports main restricted universe multiverse +# deb-src http://mirrors.aliyun.com/ubuntu/ bionic-backports main restricted universe multiverse + +deb http://mirrors.aliyun.com/ubuntu bionic-security main restricted +# deb-src http://mirrors.aliyun.com/ubuntu bionic-security main restricted +deb http://mirrors.aliyun.com/ubuntu bionic-security universe +# deb-src http://mirrors.aliyun.com/ubuntu bionic-security universe +deb http://mirrors.aliyun.com/ubuntu bionic-security multiverse +# deb-src http://mirrors.aliyun.com/ubuntu bionic-security multiverse diff --git a/docker/rcfiles/user.vimrc b/docker/rcfiles/user.vimrc new file mode 100644 index 00000000..590aca43 --- /dev/null +++ b/docker/rcfiles/user.vimrc @@ -0,0 +1,10 @@ +set nocompatible +set encoding=utf-8 +set hlsearch +set smartindent +set ruler +set number +set ts=2 +set sw=2 +set expandtab +autocmd FileType make setlocal noexpandtab diff --git a/docker/scripts/install_libs.sh b/docker/scripts/install_libs.sh new file mode 100644 index 00000000..dea0dc19 --- /dev/null +++ b/docker/scripts/install_libs.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -eo pipefail + +ModelScopeLib=/usr/local/modelscope/lib64 + +if [ ! -d /usr/local/modelscope ]; then + mkdir -p $ModelScopeLib +fi + +# audio libs +wget "http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/release/maas/libs/audio/libmitaec_pyio.so" -O ${ModelScopeLib}/libmitaec_pyio.so diff --git a/docs/source/develop.md b/docs/source/develop.md index c048bef7..f0c8b8b0 100644 --- a/docs/source/develop.md +++ b/docs/source/develop.md @@ -93,3 +93,22 @@ TODO ```bash make whl ``` + +## Build docker + +build develop docker +```bash +sudo make -f Makefile.docker devel-image +``` + +push develop docker, passwd pls ask wenmeng.zwm +```bash +sudo docker login --username=mass_test@test.aliyunid.com registry.cn-shanghai.aliyuncs.com +Password: +sudo make -f Makefile.docker devel-push +``` + +To build runtime image, just replace `devel` with `runtime` in the upper commands. +```bash +udo make -f Makefile.docker runtime-image runtime-push +``` From b4fc38e1b9b8debb5bd1bd5c1840aba1eccab835 Mon Sep 17 00:00:00 2001 From: "wenmeng.zwm" Date: Wed, 15 Jun 2022 11:43:52 +0800 Subject: [PATCH 04/16] [to #42461396] add Pillow version constraint and update hub version 1. use Pillow >= 6.2.0 2. change skip test msg for image caption Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9041194 --- requirements/runtime.txt | 4 ++-- tests/pipelines/test_image_captioning.py | 2 +- tests/preprocessors/test_image.py | 22 ++++++++++++++++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 tests/preprocessors/test_image.py diff --git a/requirements/runtime.txt b/requirements/runtime.txt index 47a11cbc..43684a06 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -1,10 +1,10 @@ addict datasets easydict -https://maashub.oss-cn-hangzhou.aliyuncs.com/releases/maas_hub-0.1.0.dev0-py2.py3-none-any.whl +https://mindscope.oss-cn-hangzhou.aliyuncs.com/sdklib/maas_hub-0.2.2.dev0-py3-none-any.whl numpy opencv-python-headless -Pillow +Pillow>=6.2.0 pyyaml requests tokenizers<=0.10.3 diff --git a/tests/pipelines/test_image_captioning.py b/tests/pipelines/test_image_captioning.py index 5584d0e2..76ffc79d 100644 --- a/tests/pipelines/test_image_captioning.py +++ b/tests/pipelines/test_image_captioning.py @@ -11,7 +11,7 @@ from modelscope.utils.constant import Tasks class ImageCaptionTest(unittest.TestCase): - @unittest.skip('skip long test') + @unittest.skip('skip before model is restored in model hub') def test_run(self): model = 'https://ofa-beijing.oss-cn-beijing.aliyuncs.com/checkpoints/caption_large_best_clean.pt' diff --git a/tests/preprocessors/test_image.py b/tests/preprocessors/test_image.py new file mode 100644 index 00000000..cfa7b11d --- /dev/null +++ b/tests/preprocessors/test_image.py @@ -0,0 +1,22 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import unittest + +import PIL + +from modelscope.preprocessors import load_image +from modelscope.utils.logger import get_logger + + +class ImagePreprocessorTest(unittest.TestCase): + + def test_load(self): + img = load_image( + 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' + ) + self.assertTrue(isinstance(img, PIL.Image.Image)) + self.assertEqual(img.size, (948, 533)) + + +if __name__ == '__main__': + unittest.main() From 5786b9a0a1ba0507862a4225726d03ddfcac735c Mon Sep 17 00:00:00 2001 From: "yingda.chen" Date: Wed, 15 Jun 2022 14:06:53 +0800 Subject: [PATCH 05/16] [to #42322933]formalize image matting Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9019685 --- modelscope/models/base.py | 8 ++++---- modelscope/pipelines/builder.py | 8 ++------ modelscope/pipelines/cv/image_matting_pipeline.py | 6 +++--- modelscope/pipelines/util.py | 9 +++------ modelscope/utils/constant.py | 15 +++++++++++++-- modelscope/utils/registry.py | 1 - tests/pipelines/test_image_matting.py | 9 +++++---- tests/utils/test_config.py | 3 --- 8 files changed, 30 insertions(+), 29 deletions(-) diff --git a/modelscope/models/base.py b/modelscope/models/base.py index e641236d..3e361f91 100644 --- a/modelscope/models/base.py +++ b/modelscope/models/base.py @@ -2,14 +2,13 @@ import os.path as osp from abc import ABC, abstractmethod -from typing import Dict, List, Tuple, Union +from typing import Dict, Union -from maas_hub.file_download import model_file_download from maas_hub.snapshot_download import snapshot_download from modelscope.models.builder import build_model from modelscope.utils.config import Config -from modelscope.utils.constant import CONFIGFILE +from modelscope.utils.constant import ModelFile from modelscope.utils.hub import get_model_cache_dir Tensor = Union['torch.Tensor', 'tf.Tensor'] @@ -47,7 +46,8 @@ class Model(ABC): # raise ValueError( # 'Remote model repo {model_name_or_path} does not exists') - cfg = Config.from_file(osp.join(local_model_dir, CONFIGFILE)) + cfg = Config.from_file( + osp.join(local_model_dir, ModelFile.CONFIGURATION)) task_name = cfg.task model_cfg = cfg.model # TODO @wenmeng.zwm may should manually initialize model after model building diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index 6495a5db..ad3511cb 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -3,21 +3,17 @@ import os.path as osp from typing import List, Union -import json -from maas_hub.file_download import model_file_download - from modelscope.models.base import Model from modelscope.utils.config import Config, ConfigDict -from modelscope.utils.constant import CONFIGFILE, Tasks +from modelscope.utils.constant import Tasks from modelscope.utils.registry import Registry, build_from_cfg from .base import Pipeline -from .util import is_model_name PIPELINES = Registry('pipelines') DEFAULT_MODEL_FOR_PIPELINE = { # TaskName: (pipeline_module_name, model_repo) - Tasks.image_matting: ('image-matting', 'damo/image-matting-person'), + Tasks.image_matting: ('image-matting', 'damo/cv_unet_image-matting_damo'), Tasks.text_classification: ('bert-sentiment-analysis', 'damo/bert-base-sst2'), Tasks.text_generation: ('palm', 'damo/nlp_palm_text-generation_chinese'), diff --git a/modelscope/pipelines/cv/image_matting_pipeline.py b/modelscope/pipelines/cv/image_matting_pipeline.py index 6f3ff5f5..0c60dfa7 100644 --- a/modelscope/pipelines/cv/image_matting_pipeline.py +++ b/modelscope/pipelines/cv/image_matting_pipeline.py @@ -1,5 +1,5 @@ import os.path as osp -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Dict import cv2 import numpy as np @@ -7,7 +7,7 @@ import PIL from modelscope.pipelines.base import Input from modelscope.preprocessors import load_image -from modelscope.utils.constant import Tasks +from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.logger import get_logger from ..base import Pipeline from ..builder import PIPELINES @@ -24,7 +24,7 @@ class ImageMattingPipeline(Pipeline): import tensorflow as tf if tf.__version__ >= '2.0': tf = tf.compat.v1 - model_path = osp.join(self.model, 'matting_person.pb') + model_path = osp.join(self.model, ModelFile.TF_GRAPH_FILE) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True diff --git a/modelscope/pipelines/util.py b/modelscope/pipelines/util.py index 43a7ac5a..37c9c929 100644 --- a/modelscope/pipelines/util.py +++ b/modelscope/pipelines/util.py @@ -1,14 +1,11 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -import os import os.path as osp from typing import List, Union -import json from maas_hub.file_download import model_file_download -from matplotlib.pyplot import get from modelscope.utils.config import Config -from modelscope.utils.constant import CONFIGFILE +from modelscope.utils.constant import ModelFile from modelscope.utils.logger import get_logger logger = get_logger() @@ -29,14 +26,14 @@ def is_model_name(model: Union[str, List]): def is_model_name_impl(model): if osp.exists(model): - cfg_file = osp.join(model, CONFIGFILE) + cfg_file = osp.join(model, ModelFile.CONFIGURATION) if osp.exists(cfg_file): return is_config_has_model(cfg_file) else: return False else: try: - cfg_file = model_file_download(model, CONFIGFILE) + cfg_file = model_file_download(model, ModelFile.CONFIGURATION) return is_config_has_model(cfg_file) except Exception: return False diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index fa30dd2a..c6eb6385 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -71,5 +71,16 @@ class Hubs(object): huggingface = 'huggingface' -# configuration filename -CONFIGFILE = 'configuration.json' +class ModelFile(object): + CONFIGURATION = 'configuration.json' + README = 'README.md' + TF_SAVED_MODEL_FILE = 'saved_model.pb' + TF_GRAPH_FILE = 'tf_graph.pb' + TF_CHECKPOINT_FOLDER = 'tf_ckpts' + TF_CKPT_PREFIX = 'ckpt-' + TORCH_MODEL_FILE = 'pytorch_model.pt' + TORCH_MODEL_BIN_FILE = 'pytorch_model.bin' + + +TENSORFLOW = 'tensorflow' +PYTORCH = 'pytorch' diff --git a/modelscope/utils/registry.py b/modelscope/utils/registry.py index 73a938ea..888564c7 100644 --- a/modelscope/utils/registry.py +++ b/modelscope/utils/registry.py @@ -1,7 +1,6 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import inspect -from email.policy import default from modelscope.utils.logger import get_logger diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py index 53006317..f1a627a0 100644 --- a/tests/pipelines/test_image_matting.py +++ b/tests/pipelines/test_image_matting.py @@ -9,25 +9,26 @@ import cv2 from modelscope.fileio import File from modelscope.pipelines import pipeline from modelscope.pydatasets import PyDataset -from modelscope.utils.constant import Tasks +from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.hub import get_model_cache_dir class ImageMattingTest(unittest.TestCase): def setUp(self) -> None: - self.model_id = 'damo/image-matting-person' + self.model_id = 'damo/cv_unet_image-matting_damo' # switch to False if downloading everytime is not desired purge_cache = True if purge_cache: shutil.rmtree( get_model_cache_dir(self.model_id), ignore_errors=True) - def test_run(self): + @unittest.skip('deprecated, download model from model hub instead') + def test_run_with_direct_file_download(self): model_path = 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs' \ '.com/data/test/maas/image_matting/matting_person.pb' with tempfile.TemporaryDirectory() as tmp_dir: - model_file = osp.join(tmp_dir, 'matting_person.pb') + model_file = osp.join(tmp_dir, ModelFile.TF_GRAPH_FILE) with open(model_file, 'wb') as ofile: ofile.write(File.read(model_path)) img_matting = pipeline(Tasks.image_matting, model=tmp_dir) diff --git a/tests/utils/test_config.py b/tests/utils/test_config.py index fb7044e8..a3770f0d 100644 --- a/tests/utils/test_config.py +++ b/tests/utils/test_config.py @@ -1,11 +1,8 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import argparse -import os.path as osp import tempfile import unittest -from pathlib import Path -from modelscope.fileio import dump, load from modelscope.utils.config import Config obj = {'a': 1, 'b': {'c': [1, 2, 3], 'd': 'dd'}} From d7112be05635d4beee50da7e7795e465446c6cb0 Mon Sep 17 00:00:00 2001 From: "yingda.chen" Date: Wed, 15 Jun 2022 14:07:57 +0800 Subject: [PATCH 06/16] [to #42510875]use sphinx book theme Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9042850 --- docs/source/conf.py | 2 +- requirements/docs.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 2c2a0017..50ac2fa0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -76,7 +76,7 @@ exclude_patterns = ['build', 'Thumbs.db', '.DS_Store'] # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = 'sphinx_book_theme' html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] html_theme_options = {} diff --git a/requirements/docs.txt b/requirements/docs.txt index 25373976..2436f5af 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -1,6 +1,7 @@ docutils==0.16.0 recommonmark sphinx==4.0.2 +sphinx-book-theme sphinx-copybutton sphinx_markdown_tables sphinx_rtd_theme==0.5.2 From c59833c7eeb0139d0b74150da3ca6cf4ec32f9a1 Mon Sep 17 00:00:00 2001 From: "wenmeng.zwm" Date: Wed, 15 Jun 2022 14:53:49 +0800 Subject: [PATCH 07/16] [to #42461396] feat: test_level support * add test level support * update develop doc Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9021354 --- docs/source/develop.md | 55 ++++++++++++++++++-- modelscope/utils/test_utils.py | 20 +++++++ tests/pipelines/test_image_captioning.py | 1 + tests/pipelines/test_image_matting.py | 4 ++ tests/pipelines/test_person_image_cartoon.py | 3 ++ tests/pipelines/test_text_classification.py | 6 +++ tests/pipelines/test_text_generation.py | 6 ++- tests/run.py | 9 ++++ 8 files changed, 100 insertions(+), 4 deletions(-) create mode 100644 modelscope/utils/test_utils.py diff --git a/docs/source/develop.md b/docs/source/develop.md index f0c8b8b0..f96590b0 100644 --- a/docs/source/develop.md +++ b/docs/source/develop.md @@ -34,13 +34,62 @@ make linter ``` ## 2. Test -### 2.1 Unit test + +### 2.1 Test level + +There are mainly three test levels: + +* level 0: tests for basic interface and function of framework, such as `tests/trainers/test_trainer_base.py` +* level 1: important functional test which test end2end workflow, such as `tests/pipelines/test_image_matting.py` +* level 2: scenario tests for all the implemented modules such as model, pipeline in different algorithm filed. + +Default test level is 0, which will only run those cases of level 0, you can set test level +via environment variable `TEST_LEVEL`. For more details, you can refer to [test-doc](https://alidocs.dingtalk.com/i/nodes/mdvQnONayjBJKLXy1Bp38PY2MeXzp5o0?dontjump=true&nav=spaces&navQuery=spaceId%3Dnb9XJNlZxbgrOXyA) + + ```bash +# run all tests +TEST_LEVEL=2 make test + +# run important functional tests +TEST_LEVEL=1 make test + +# run core UT and basic functional tests make test ``` -### 2.2 Test data -TODO +When writing test cases, you should assign a test level for your test case using +following code. If left default, the test level will be 0, it will run in each +test stage. + +File test_module.py +```python +from modelscope.utils.test_utils import test_level + +class ImageCartoonTest(unittest.TestCase): + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_run_by_direct_model_download(self): + pass +``` + +### 2.2 Run tests + +1. Run your own single test case to test your self-implemented function. You can run your +test file directly, if it fails to run, pls check if variable `TEST_LEVEL` +exists in the environment and unset it. +```bash +python tests/path/to/your_test.py +``` + +2. Remember to run core tests in local environment before start a codereview, by default it will +only run test cases with level 0. +```bash +make tests +``` + +3. After you start a code review, ci tests will be triggered which will run test cases with level 1 + +4. Daily regression tests will run all cases at 0 am each day using master branch. ## Code Review diff --git a/modelscope/utils/test_utils.py b/modelscope/utils/test_utils.py new file mode 100644 index 00000000..c8ea0442 --- /dev/null +++ b/modelscope/utils/test_utils.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python +# Copyright (c) Alibaba, Inc. and its affiliates. + +import os + +TEST_LEVEL = 2 +TEST_LEVEL_STR = 'TEST_LEVEL' + + +def test_level(): + global TEST_LEVEL + if TEST_LEVEL_STR in os.environ: + TEST_LEVEL = int(os.environ[TEST_LEVEL_STR]) + + return TEST_LEVEL + + +def set_test_level(level: int): + global TEST_LEVEL + TEST_LEVEL = level diff --git a/tests/pipelines/test_image_captioning.py b/tests/pipelines/test_image_captioning.py index 76ffc79d..4fac4658 100644 --- a/tests/pipelines/test_image_captioning.py +++ b/tests/pipelines/test_image_captioning.py @@ -7,6 +7,7 @@ import unittest from modelscope.fileio import File from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level class ImageCaptionTest(unittest.TestCase): diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py index f1a627a0..ba5d05ad 100644 --- a/tests/pipelines/test_image_matting.py +++ b/tests/pipelines/test_image_matting.py @@ -11,6 +11,7 @@ from modelscope.pipelines import pipeline from modelscope.pydatasets import PyDataset from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.hub import get_model_cache_dir +from modelscope.utils.test_utils import test_level class ImageMattingTest(unittest.TestCase): @@ -38,6 +39,7 @@ class ImageMattingTest(unittest.TestCase): ) cv2.imwrite('result.png', result['output_png']) + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_dataset(self): input_location = [ 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' @@ -52,6 +54,7 @@ class ImageMattingTest(unittest.TestCase): cv2.imwrite('result.png', next(result)['output_png']) print(f'Output written to {osp.abspath("result.png")}') + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_modelhub(self): img_matting = pipeline(Tasks.image_matting, model=self.model_id) @@ -61,6 +64,7 @@ class ImageMattingTest(unittest.TestCase): cv2.imwrite('result.png', result['output_png']) print(f'Output written to {osp.abspath("result.png")}') + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_modelhub_default_model(self): img_matting = pipeline(Tasks.image_matting) diff --git a/tests/pipelines/test_person_image_cartoon.py b/tests/pipelines/test_person_image_cartoon.py index 6f352e42..ed912b1c 100644 --- a/tests/pipelines/test_person_image_cartoon.py +++ b/tests/pipelines/test_person_image_cartoon.py @@ -8,6 +8,7 @@ import cv2 from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level class ImageCartoonTest(unittest.TestCase): @@ -36,10 +37,12 @@ class ImageCartoonTest(unittest.TestCase): img_cartoon = pipeline(Tasks.image_generation, model=model_dir) self.pipeline_inference(img_cartoon, self.test_image) + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_modelhub(self): img_cartoon = pipeline(Tasks.image_generation, model=self.model_id) self.pipeline_inference(img_cartoon, self.test_image) + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_modelhub_default_model(self): img_cartoon = pipeline(Tasks.image_generation) self.pipeline_inference(img_cartoon, self.test_image) diff --git a/tests/pipelines/test_text_classification.py b/tests/pipelines/test_text_classification.py index 7f6dc77c..01fdd29b 100644 --- a/tests/pipelines/test_text_classification.py +++ b/tests/pipelines/test_text_classification.py @@ -12,6 +12,7 @@ from modelscope.preprocessors import SequenceClassificationPreprocessor from modelscope.pydatasets import PyDataset from modelscope.utils.constant import Hubs, Tasks from modelscope.utils.hub import get_model_cache_dir +from modelscope.utils.test_utils import test_level class SequenceClassificationTest(unittest.TestCase): @@ -43,6 +44,7 @@ class SequenceClassificationTest(unittest.TestCase): break print(r) + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run(self): model_url = 'https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com' \ '/release/easynlp_modelzoo/alibaba-pai/bert-base-sst2.zip' @@ -67,6 +69,7 @@ class SequenceClassificationTest(unittest.TestCase): Tasks.text_classification, model=model, preprocessor=preprocessor) print(pipeline2('Hello world!')) + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_model_from_modelhub(self): model = Model.from_pretrained(self.model_id) preprocessor = SequenceClassificationPreprocessor( @@ -77,6 +80,7 @@ class SequenceClassificationTest(unittest.TestCase): preprocessor=preprocessor) self.predict(pipeline_ins) + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_model_name(self): text_classification = pipeline( task=Tasks.text_classification, model=self.model_id) @@ -85,6 +89,7 @@ class SequenceClassificationTest(unittest.TestCase): 'glue', name='sst2', target='sentence', hub=Hubs.huggingface)) self.printDataset(result) + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_default_model(self): text_classification = pipeline(task=Tasks.text_classification) result = text_classification( @@ -92,6 +97,7 @@ class SequenceClassificationTest(unittest.TestCase): 'glue', name='sst2', target='sentence', hub=Hubs.huggingface)) self.printDataset(result) + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_dataset(self): model = Model.from_pretrained(self.model_id) preprocessor = SequenceClassificationPreprocessor( diff --git a/tests/pipelines/test_text_generation.py b/tests/pipelines/test_text_generation.py index d8f1b495..f98e135d 100644 --- a/tests/pipelines/test_text_generation.py +++ b/tests/pipelines/test_text_generation.py @@ -8,6 +8,7 @@ from modelscope.models.nlp import PalmForTextGenerationModel from modelscope.pipelines import TextGenerationPipeline, pipeline from modelscope.preprocessors import TextGenerationPreprocessor from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level class TextGenerationTest(unittest.TestCase): @@ -15,7 +16,7 @@ class TextGenerationTest(unittest.TestCase): input1 = "今日天气类型='晴'&温度变化趋势='大幅上升'&最低气温='28℃'&最高气温='31℃'&体感='湿热'" input2 = "今日天气类型='多云'&体感='舒适'&最低气温='26℃'&最高气温='30℃'" - @unittest.skip('skip temporarily to save test time') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run(self): cache_path = snapshot_download(self.model_id) preprocessor = TextGenerationPreprocessor( @@ -29,6 +30,7 @@ class TextGenerationTest(unittest.TestCase): print() print(f'input: {self.input2}\npipeline2: {pipeline2(self.input2)}') + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_model_from_modelhub(self): model = Model.from_pretrained(self.model_id) preprocessor = TextGenerationPreprocessor( @@ -37,11 +39,13 @@ class TextGenerationTest(unittest.TestCase): task=Tasks.text_generation, model=model, preprocessor=preprocessor) print(pipeline_ins(self.input1)) + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_model_name(self): pipeline_ins = pipeline( task=Tasks.text_generation, model=self.model_id) print(pipeline_ins(self.input2)) + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_default_model(self): pipeline_ins = pipeline(task=Tasks.text_generation) print(pipeline_ins(self.input2)) diff --git a/tests/run.py b/tests/run.py index 25404d7a..9f5d62a7 100644 --- a/tests/run.py +++ b/tests/run.py @@ -7,6 +7,11 @@ import sys import unittest from fnmatch import fnmatch +from modelscope.utils.logger import get_logger +from modelscope.utils.test_utils import set_test_level, test_level + +logger = get_logger() + def gather_test_cases(test_dir, pattern, list_tests): case_list = [] @@ -49,5 +54,9 @@ if __name__ == '__main__': '--pattern', default='test_*.py', help='test file pattern') parser.add_argument( '--test_dir', default='tests', help='directory to be tested') + parser.add_argument( + '--level', default=0, help='2 -- all, 1 -- p1, 0 -- p0') args = parser.parse_args() + set_test_level(args.level) + logger.info(f'TEST LEVEL: {test_level()}') main(args) From d983bdfc8e315e27f895fb24c6c20a8d128f17b7 Mon Sep 17 00:00:00 2001 From: "lingcai.wl" Date: Wed, 15 Jun 2022 18:37:40 +0800 Subject: [PATCH 08/16] [to #42463204] support Pil.Image for image_captioning Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9049211 --- modelscope/pipelines/multi_modal/image_captioning.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/modelscope/pipelines/multi_modal/image_captioning.py b/modelscope/pipelines/multi_modal/image_captioning.py index 91180e23..3e5f49d0 100644 --- a/modelscope/pipelines/multi_modal/image_captioning.py +++ b/modelscope/pipelines/multi_modal/image_captioning.py @@ -84,8 +84,11 @@ class ImageCaptionPipeline(Pipeline): s = torch.cat([s, self.eos_item]) return s - patch_image = self.patch_resize_transform( - load_image(input)).unsqueeze(0) + if isinstance(input, Image.Image): + patch_image = self.patch_resize_transform(input).unsqueeze(0) + else: + patch_image = self.patch_resize_transform( + load_image(input)).unsqueeze(0) patch_mask = torch.tensor([True]) text = 'what does the image describe?' src_text = encode_text( From ba471d449249f2cdc65991de9a2e0be91d289aed Mon Sep 17 00:00:00 2001 From: "zhangzhicheng.zzc" Date: Wed, 15 Jun 2022 23:35:12 +0800 Subject: [PATCH 09/16] [to #42322933]sentence-similarity Adding the new task of sentence_similarity, in which the model is the sofa version of structbert Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9016402 * sbert-sentence-similarity * [to #42322933] pip8 * merge with master for file dirs update * add test cases * pre-commit lint check * remove useless file * download models again~ * skip time consuming test case * update for pr reviews * merge with master * add test level * reset test level to env level * [to #42322933] init * [to #42322933] init * adding purge logic in test * merge with head * change test level * using sequence classification processor for similarity --- modelscope/models/__init__.py | 2 +- modelscope/models/nlp/__init__.py | 1 + .../models/nlp/sentence_similarity_model.py | 88 +++++++++++++++++++ modelscope/pipelines/base.py | 2 +- modelscope/pipelines/builder.py | 3 + modelscope/pipelines/nlp/__init__.py | 1 + .../nlp/sentence_similarity_pipeline.py | 65 ++++++++++++++ modelscope/preprocessors/__init__.py | 1 - modelscope/preprocessors/nlp.py | 40 +++++++-- modelscope/utils/constant.py | 1 + tests/pipelines/test_sentence_similarity.py | 67 ++++++++++++++ 11 files changed, 260 insertions(+), 11 deletions(-) create mode 100644 modelscope/models/nlp/sentence_similarity_model.py create mode 100644 modelscope/pipelines/nlp/sentence_similarity_pipeline.py create mode 100644 tests/pipelines/test_sentence_similarity.py diff --git a/modelscope/models/__init__.py b/modelscope/models/__init__.py index 170e525e..d9a89d35 100644 --- a/modelscope/models/__init__.py +++ b/modelscope/models/__init__.py @@ -2,4 +2,4 @@ from .base import Model from .builder import MODELS, build_model -from .nlp import BertForSequenceClassification +from .nlp import BertForSequenceClassification, SbertForSentenceSimilarity diff --git a/modelscope/models/nlp/__init__.py b/modelscope/models/nlp/__init__.py index b2a1d43b..be675c1b 100644 --- a/modelscope/models/nlp/__init__.py +++ b/modelscope/models/nlp/__init__.py @@ -1,2 +1,3 @@ +from .sentence_similarity_model import * # noqa F403 from .sequence_classification_model import * # noqa F403 from .text_generation_model import * # noqa F403 diff --git a/modelscope/models/nlp/sentence_similarity_model.py b/modelscope/models/nlp/sentence_similarity_model.py new file mode 100644 index 00000000..98daac92 --- /dev/null +++ b/modelscope/models/nlp/sentence_similarity_model.py @@ -0,0 +1,88 @@ +import os +from typing import Any, Dict + +import json +import numpy as np +import torch +from sofa import SbertModel +from sofa.models.sbert.modeling_sbert import SbertPreTrainedModel +from torch import nn + +from modelscope.utils.constant import Tasks +from ..base import Model, Tensor +from ..builder import MODELS + +__all__ = ['SbertForSentenceSimilarity'] + + +class SbertTextClassifier(SbertPreTrainedModel): + + def __init__(self, config): + super().__init__(config) + self.num_labels = config.num_labels + self.config = config + self.encoder = SbertModel(config, add_pooling_layer=True) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + self.classifier = nn.Linear(config.hidden_size, config.num_labels) + + def forward(self, input_ids=None, token_type_ids=None): + outputs = self.encoder( + input_ids, + token_type_ids=token_type_ids, + return_dict=None, + ) + pooled_output = outputs[1] + pooled_output = self.dropout(pooled_output) + logits = self.classifier(pooled_output) + return logits + + +@MODELS.register_module( + Tasks.sentence_similarity, + module_name=r'sbert-base-chinese-sentence-similarity') +class SbertForSentenceSimilarity(Model): + + def __init__(self, model_dir: str, *args, **kwargs): + """initialize the sentence similarity model from the `model_dir` path. + + Args: + model_dir (str): the model path. + model_cls (Optional[Any], optional): model loader, if None, use the + default loader to load model weights, by default None. + """ + super().__init__(model_dir, *args, **kwargs) + self.model_dir = model_dir + + self.model = SbertTextClassifier.from_pretrained( + model_dir, num_labels=2) + self.model.eval() + self.label_path = os.path.join(self.model_dir, 'label_mapping.json') + with open(self.label_path) as f: + self.label_mapping = json.load(f) + self.id2label = {idx: name for name, idx in self.label_mapping.items()} + + def forward(self, input: Dict[str, Any]) -> Dict[str, np.ndarray]: + """return the result by the model + + Args: + input (Dict[str, Any]): the preprocessed data + + Returns: + Dict[str, np.ndarray]: results + Example: + { + 'predictions': array([1]), # lable 0-negative 1-positive + 'probabilities': array([[0.11491239, 0.8850876 ]], dtype=float32), + 'logits': array([[-0.53860897, 1.5029076 ]], dtype=float32) # true value + } + """ + input_ids = torch.tensor(input['input_ids'], dtype=torch.long) + token_type_ids = torch.tensor( + input['token_type_ids'], dtype=torch.long) + with torch.no_grad(): + logits = self.model(input_ids, token_type_ids) + probs = logits.softmax(-1).numpy() + pred = logits.argmax(-1).numpy() + logits = logits.numpy() + res = {'predictions': pred, 'probabilities': probs, 'logits': logits} + return res diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py index f4d4d1b7..c69afdca 100644 --- a/modelscope/pipelines/base.py +++ b/modelscope/pipelines/base.py @@ -15,7 +15,7 @@ from modelscope.utils.logger import get_logger from .util import is_model_name Tensor = Union['torch.Tensor', 'tf.Tensor'] -Input = Union[str, PyDataset, 'PIL.Image.Image', 'numpy.ndarray'] +Input = Union[str, tuple, PyDataset, 'PIL.Image.Image', 'numpy.ndarray'] InputModel = Union[str, Model] output_keys = [ diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index ad3511cb..d4ad0c3f 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -13,6 +13,9 @@ PIPELINES = Registry('pipelines') DEFAULT_MODEL_FOR_PIPELINE = { # TaskName: (pipeline_module_name, model_repo) + Tasks.sentence_similarity: + ('sbert-base-chinese-sentence-similarity', + 'damo/nlp_structbert_sentence-similarity_chinese-base'), Tasks.image_matting: ('image-matting', 'damo/cv_unet_image-matting_damo'), Tasks.text_classification: ('bert-sentiment-analysis', 'damo/bert-base-sst2'), diff --git a/modelscope/pipelines/nlp/__init__.py b/modelscope/pipelines/nlp/__init__.py index 3dbbc1bb..1f15a7b8 100644 --- a/modelscope/pipelines/nlp/__init__.py +++ b/modelscope/pipelines/nlp/__init__.py @@ -1,2 +1,3 @@ +from .sentence_similarity_pipeline import * # noqa F403 from .sequence_classification_pipeline import * # noqa F403 from .text_generation_pipeline import * # noqa F403 diff --git a/modelscope/pipelines/nlp/sentence_similarity_pipeline.py b/modelscope/pipelines/nlp/sentence_similarity_pipeline.py new file mode 100644 index 00000000..44d91756 --- /dev/null +++ b/modelscope/pipelines/nlp/sentence_similarity_pipeline.py @@ -0,0 +1,65 @@ +import os +import uuid +from typing import Any, Dict, Union + +import json +import numpy as np + +from modelscope.models.nlp import SbertForSentenceSimilarity +from modelscope.preprocessors import SequenceClassificationPreprocessor +from modelscope.utils.constant import Tasks +from ...models import Model +from ..base import Input, Pipeline +from ..builder import PIPELINES + +__all__ = ['SentenceSimilarityPipeline'] + + +@PIPELINES.register_module( + Tasks.sentence_similarity, + module_name=r'sbert-base-chinese-sentence-similarity') +class SentenceSimilarityPipeline(Pipeline): + + def __init__(self, + model: Union[SbertForSentenceSimilarity, str], + preprocessor: SequenceClassificationPreprocessor = None, + **kwargs): + """use `model` and `preprocessor` to create a nlp sentence similarity pipeline for prediction + + Args: + model (SbertForSentenceSimilarity): a model instance + preprocessor (SequenceClassificationPreprocessor): a preprocessor instance + """ + assert isinstance(model, str) or isinstance(model, SbertForSentenceSimilarity), \ + 'model must be a single str or SbertForSentenceSimilarity' + sc_model = model if isinstance( + model, + SbertForSentenceSimilarity) else Model.from_pretrained(model) + if preprocessor is None: + preprocessor = SequenceClassificationPreprocessor( + sc_model.model_dir, + first_sequence='first_sequence', + second_sequence='second_sequence') + super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs) + + assert hasattr(self.model, 'id2label'), \ + 'id2label map should be initalizaed in init function.' + + def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]: + """process the prediction results + + Args: + inputs (Dict[str, Any]): _description_ + + Returns: + Dict[str, str]: the prediction results + """ + + probs = inputs['probabilities'][0] + num_classes = probs.shape[0] + top_indices = np.argpartition(probs, -num_classes)[-num_classes:] + cls_ids = top_indices[np.argsort(-probs[top_indices], axis=-1)] + probs = probs[cls_ids].tolist() + cls_names = [self.model.id2label[cid] for cid in cls_ids] + b = 0 + return {'scores': probs[b], 'labels': cls_names[b]} diff --git a/modelscope/preprocessors/__init__.py b/modelscope/preprocessors/__init__.py index 518ea977..81ca1007 100644 --- a/modelscope/preprocessors/__init__.py +++ b/modelscope/preprocessors/__init__.py @@ -5,4 +5,3 @@ from .builder import PREPROCESSORS, build_preprocessor from .common import Compose from .image import LoadImage, load_image from .nlp import * # noqa F403 -from .nlp import TextGenerationPreprocessor diff --git a/modelscope/preprocessors/nlp.py b/modelscope/preprocessors/nlp.py index 0de41bfc..6773eadf 100644 --- a/modelscope/preprocessors/nlp.py +++ b/modelscope/preprocessors/nlp.py @@ -10,7 +10,10 @@ from modelscope.utils.type_assert import type_assert from .base import Preprocessor from .builder import PREPROCESSORS -__all__ = ['Tokenize', 'SequenceClassificationPreprocessor'] +__all__ = [ + 'Tokenize', 'SequenceClassificationPreprocessor', + 'TextGenerationPreprocessor' +] @PREPROCESSORS.register_module(Fields.nlp) @@ -28,7 +31,7 @@ class Tokenize(Preprocessor): @PREPROCESSORS.register_module( - Fields.nlp, module_name=r'bert-sentiment-analysis') + Fields.nlp, module_name=r'bert-sequence-classification') class SequenceClassificationPreprocessor(Preprocessor): def __init__(self, model_dir: str, *args, **kwargs): @@ -48,21 +51,42 @@ class SequenceClassificationPreprocessor(Preprocessor): self.sequence_length = kwargs.pop('sequence_length', 128) self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir) + print(f'this is the tokenzier {self.tokenizer}') - @type_assert(object, str) - def __call__(self, data: str) -> Dict[str, Any]: + @type_assert(object, (str, tuple)) + def __call__(self, data: Union[str, tuple]) -> Dict[str, Any]: """process the raw input data Args: - data (str): a sentence - Example: - 'you are so handsome.' + data (str or tuple): + sentence1 (str): a sentence + Example: + 'you are so handsome.' + or + (sentence1, sentence2) + sentence1 (str): a sentence + Example: + 'you are so handsome.' + sentence2 (str): a sentence + Example: + 'you are so beautiful.' Returns: Dict[str, Any]: the preprocessed data """ - new_data = {self.first_sequence: data} + if not isinstance(data, tuple): + data = ( + data, + None, + ) + + sentence1, sentence2 = data + new_data = { + self.first_sequence: sentence1, + self.second_sequence: sentence2 + } + # preprocess the data for the model input rst = { diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index c6eb6385..2fcfee95 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -31,6 +31,7 @@ class Tasks(object): # nlp tasks sentiment_analysis = 'sentiment-analysis' + sentence_similarity = 'sentence-similarity' text_classification = 'text-classification' relation_extraction = 'relation-extraction' zero_shot = 'zero-shot' diff --git a/tests/pipelines/test_sentence_similarity.py b/tests/pipelines/test_sentence_similarity.py new file mode 100644 index 00000000..ac2ff4fb --- /dev/null +++ b/tests/pipelines/test_sentence_similarity.py @@ -0,0 +1,67 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import shutil +import unittest + +from maas_hub.snapshot_download import snapshot_download + +from modelscope.models import Model +from modelscope.models.nlp import SbertForSentenceSimilarity +from modelscope.pipelines import SentenceSimilarityPipeline, pipeline +from modelscope.preprocessors import SequenceClassificationPreprocessor +from modelscope.utils.constant import Tasks +from modelscope.utils.hub import get_model_cache_dir +from modelscope.utils.test_utils import test_level + + +class SentenceSimilarityTest(unittest.TestCase): + model_id = 'damo/nlp_structbert_sentence-similarity_chinese-base' + sentence1 = '今天气温比昨天高么?' + sentence2 = '今天湿度比昨天高么?' + + def setUp(self) -> None: + # switch to False if downloading everytime is not desired + purge_cache = True + if purge_cache: + shutil.rmtree( + get_model_cache_dir(self.model_id), ignore_errors=True) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run(self): + cache_path = snapshot_download(self.model_id) + tokenizer = SequenceClassificationPreprocessor(cache_path) + model = SbertForSentenceSimilarity(cache_path, tokenizer=tokenizer) + pipeline1 = SentenceSimilarityPipeline(model, preprocessor=tokenizer) + pipeline2 = pipeline( + Tasks.sentence_similarity, model=model, preprocessor=tokenizer) + print('test1') + print(f'sentence1: {self.sentence1}\nsentence2: {self.sentence2}\n' + f'pipeline1:{pipeline1(input=(self.sentence1, self.sentence2))}') + print() + print( + f'sentence1: {self.sentence1}\nsentence2: {self.sentence2}\n' + f'pipeline1: {pipeline2(input=(self.sentence1, self.sentence2))}') + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_from_modelhub(self): + model = Model.from_pretrained(self.model_id) + tokenizer = SequenceClassificationPreprocessor(model.model_dir) + pipeline_ins = pipeline( + task=Tasks.sentence_similarity, + model=model, + preprocessor=tokenizer) + print(pipeline_ins(input=(self.sentence1, self.sentence2))) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_name(self): + pipeline_ins = pipeline( + task=Tasks.sentence_similarity, model=self.model_id) + print(pipeline_ins(input=(self.sentence1, self.sentence2))) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_default_model(self): + pipeline_ins = pipeline(task=Tasks.sentence_similarity) + print(pipeline_ins(input=(self.sentence1, self.sentence2))) + + +if __name__ == '__main__': + unittest.main() From 4f7928bb6e3e609bf1c49fdcee83dd824fddba28 Mon Sep 17 00:00:00 2001 From: "wenmeng.zwm" Date: Thu, 16 Jun 2022 11:15:09 +0800 Subject: [PATCH 10/16] [to #42362853] formalize the output of pipeline and make pipeline reusable * format pipeline output and check it * fix UT * add docstr to clarify the difference between model.postprocess and pipeline.postprocess Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9051405 --- Makefile.docker | 3 +- docker/pytorch.dockerfile | 22 ++++- modelscope/models/base.py | 18 +++- .../nlp/sequence_classification_model.py | 17 ++++ modelscope/pipelines/base.py | 28 ++++++ .../nlp/sequence_classification_pipeline.py | 53 +++------- .../pipelines/nlp/text_generation_pipeline.py | 2 +- modelscope/pipelines/outputs.py | 98 +++++++++++++++++++ modelscope/utils/constant.py | 2 +- modelscope/utils/registry.py | 1 + tests/pipelines/test_base.py | 16 ++- 11 files changed, 203 insertions(+), 57 deletions(-) create mode 100644 modelscope/pipelines/outputs.py diff --git a/Makefile.docker b/Makefile.docker index bbac840e..97400318 100644 --- a/Makefile.docker +++ b/Makefile.docker @@ -6,7 +6,8 @@ DOCKER_FULL_NAME = $(DOCKER_REGISTRY)/$(DOCKER_ORG)/$(DOCKER_IMAGE) # CUDA_VERSION = 11.3 # CUDNN_VERSION = 8 BASE_RUNTIME = reg.docker.alibaba-inc.com/pai-dlc/pytorch-training:1.10PAI-gpu-py36-cu113-ubuntu18.04 -BASE_DEVEL = reg.docker.alibaba-inc.com/pai-dlc/pytorch-training:1.10PAI-gpu-py36-cu113-ubuntu18.04 +# BASE_DEVEL = reg.docker.alibaba-inc.com/pai-dlc/pytorch-training:1.10PAI-gpu-py36-cu113-ubuntu18.04 +BASE_DEVEL = pytorch/pytorch:1.10.0-cuda11.3-cudnn8-devel MODELSCOPE_VERSION = $(shell git describe --tags --always) diff --git a/docker/pytorch.dockerfile b/docker/pytorch.dockerfile index 73c35af1..4862cab6 100644 --- a/docker/pytorch.dockerfile +++ b/docker/pytorch.dockerfile @@ -8,13 +8,29 @@ # For reference: # https://docs.docker.com/develop/develop-images/build_enhancements/ -#ARG BASE_IMAGE=reg.docker.alibaba-inc.com/pai-dlc/pytorch-training:1.10PAI-gpu-py36-cu113-ubuntu18.04 -#FROM ${BASE_IMAGE} as dev-base +# ARG BASE_IMAGE=reg.docker.alibaba-inc.com/pai-dlc/pytorch-training:1.10PAI-gpu-py36-cu113-ubuntu18.04 +# FROM ${BASE_IMAGE} as dev-base -FROM reg.docker.alibaba-inc.com/pai-dlc/pytorch-training:1.10PAI-gpu-py36-cu113-ubuntu18.04 as dev-base +# FROM reg.docker.alibaba-inc.com/pai-dlc/pytorch-training:1.10PAI-gpu-py36-cu113-ubuntu18.04 as dev-base +FROM pytorch/pytorch:1.10.0-cuda11.3-cudnn8-devel +# FROM pytorch/pytorch:1.10.0-cuda11.3-cudnn8-runtime # config pip source RUN mkdir /root/.pip COPY docker/rcfiles/pip.conf.tsinghua /root/.pip/pip.conf +COPY docker/rcfiles/sources.list.aliyun /etc/apt/sources.list + +# Install essential Ubuntu packages +RUN apt-get update &&\ + apt-get install -y software-properties-common \ + build-essential \ + git \ + wget \ + vim \ + curl \ + zip \ + zlib1g-dev \ + unzip \ + pkg-config # install modelscope and its python env WORKDIR /opt/modelscope diff --git a/modelscope/models/base.py b/modelscope/models/base.py index 3e361f91..88b1e3b0 100644 --- a/modelscope/models/base.py +++ b/modelscope/models/base.py @@ -20,16 +20,24 @@ class Model(ABC): self.model_dir = model_dir def __call__(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: - return self.post_process(self.forward(input)) + return self.postprocess(self.forward(input)) @abstractmethod def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: pass - def post_process(self, input: Dict[str, Tensor], - **kwargs) -> Dict[str, Tensor]: - # model specific postprocess, implementation is optional - # will be called in Pipeline and evaluation loop(in the future) + def postprocess(self, input: Dict[str, Tensor], + **kwargs) -> Dict[str, Tensor]: + """ Model specific postprocess and convert model output to + standard model outputs. + + Args: + inputs: input data + + Return: + dict of results: a dict containing outputs of model, each + output should have the standard output name. + """ return input @classmethod diff --git a/modelscope/models/nlp/sequence_classification_model.py b/modelscope/models/nlp/sequence_classification_model.py index 6ced7a4e..a3cc4b68 100644 --- a/modelscope/models/nlp/sequence_classification_model.py +++ b/modelscope/models/nlp/sequence_classification_model.py @@ -1,5 +1,7 @@ +import os from typing import Any, Dict +import json import numpy as np from modelscope.utils.constant import Tasks @@ -34,6 +36,11 @@ class BertForSequenceClassification(Model): ('token_type_ids', torch.LongTensor)], output_keys=['predictions', 'probabilities', 'logits']) + self.label_path = os.path.join(self.model_dir, 'label_mapping.json') + with open(self.label_path) as f: + self.label_mapping = json.load(f) + self.id2label = {idx: name for name, idx in self.label_mapping.items()} + def forward(self, input: Dict[str, Any]) -> Dict[str, np.ndarray]: """return the result by the model @@ -50,3 +57,13 @@ class BertForSequenceClassification(Model): } """ return self.model.predict(input) + + def postprocess(self, inputs: Dict[str, np.ndarray], + **kwargs) -> Dict[str, np.ndarray]: + # N x num_classes + probs = inputs['probabilities'] + result = { + 'probs': probs, + } + + return result diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py index c69afdca..1da65213 100644 --- a/modelscope/pipelines/base.py +++ b/modelscope/pipelines/base.py @@ -12,6 +12,7 @@ from modelscope.pydatasets import PyDataset from modelscope.utils.config import Config from modelscope.utils.hub import get_model_cache_dir from modelscope.utils.logger import get_logger +from .outputs import TASK_OUTPUTS from .util import is_model_name Tensor = Union['torch.Tensor', 'tf.Tensor'] @@ -106,8 +107,25 @@ class Pipeline(ABC): out = self.preprocess(input) out = self.forward(out) out = self.postprocess(out, **post_kwargs) + self._check_output(out) return out + def _check_output(self, input): + # this attribute is dynamically attached by registry + # when cls is registered in registry using task name + task_name = self.group_key + if task_name not in TASK_OUTPUTS: + logger.warning(f'task {task_name} output keys are missing') + return + output_keys = TASK_OUTPUTS[task_name] + missing_keys = [] + for k in output_keys: + if k not in input: + missing_keys.append(k) + if len(missing_keys) > 0: + raise ValueError(f'expected output keys are {output_keys}, ' + f'those {missing_keys} are missing') + def preprocess(self, inputs: Input) -> Dict[str, Any]: """ Provide default implementation based on preprocess_cfg and user can reimplement it """ @@ -125,4 +143,14 @@ class Pipeline(ABC): @abstractmethod def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + """ If current pipeline support model reuse, common postprocess + code should be write here. + + Args: + inputs: input data + + Return: + dict of results: a dict containing outputs of model, each + output should have the standard output name. + """ raise NotImplementedError('postprocess') diff --git a/modelscope/pipelines/nlp/sequence_classification_pipeline.py b/modelscope/pipelines/nlp/sequence_classification_pipeline.py index 5a14f136..9d2e4273 100644 --- a/modelscope/pipelines/nlp/sequence_classification_pipeline.py +++ b/modelscope/pipelines/nlp/sequence_classification_pipeline.py @@ -41,50 +41,29 @@ class SequenceClassificationPipeline(Pipeline): second_sequence=None) super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs) - from easynlp.utils import io - self.label_path = os.path.join(sc_model.model_dir, - 'label_mapping.json') - with io.open(self.label_path) as f: - self.label_mapping = json.load(f) - self.label_id_to_name = { - idx: name - for name, idx in self.label_mapping.items() - } + assert hasattr(self.model, 'id2label'), \ + 'id2label map should be initalizaed in init function.' - def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]: + def postprocess(self, + inputs: Dict[str, Any], + topk: int = 5) -> Dict[str, str]: """process the prediction results Args: - inputs (Dict[str, Any]): _description_ + inputs (Dict[str, Any]): input data dict + topk (int): return topk classification result. Returns: Dict[str, str]: the prediction results """ + # NxC np.ndarray + probs = inputs['probs'][0] + num_classes = probs.shape[0] + topk = min(topk, num_classes) + top_indices = np.argpartition(probs, -topk)[-topk:] + cls_ids = top_indices[np.argsort(probs[top_indices])] + probs = probs[cls_ids].tolist() - probs = inputs['probabilities'] - logits = inputs['logits'] - predictions = np.argsort(-probs, axis=-1) - preds = predictions[0] - b = 0 - new_result = list() - for pred in preds: - new_result.append({ - 'pred': self.label_id_to_name[pred], - 'prob': float(probs[b][pred]), - 'logit': float(logits[b][pred]) - }) - new_results = list() - new_results.append({ - 'id': - inputs['id'][b] if 'id' in inputs else str(uuid.uuid4()), - 'output': - new_result, - 'predictions': - new_result[0]['pred'], - 'probabilities': - ','.join([str(t) for t in inputs['probabilities'][b]]), - 'logits': - ','.join([str(t) for t in inputs['logits'][b]]) - }) + cls_names = [self.model.id2label[cid] for cid in cls_ids] - return new_results[0] + return {'scores': probs, 'labels': cls_names} diff --git a/modelscope/pipelines/nlp/text_generation_pipeline.py b/modelscope/pipelines/nlp/text_generation_pipeline.py index 7ad2b67f..ea30a115 100644 --- a/modelscope/pipelines/nlp/text_generation_pipeline.py +++ b/modelscope/pipelines/nlp/text_generation_pipeline.py @@ -56,4 +56,4 @@ class TextGenerationPipeline(Pipeline): '').split('[SEP]')[0].replace('[CLS]', '').replace('[SEP]', '').replace('[UNK]', '') - return {'pred_string': pred_string} + return {'text': pred_string} diff --git a/modelscope/pipelines/outputs.py b/modelscope/pipelines/outputs.py new file mode 100644 index 00000000..1389abd3 --- /dev/null +++ b/modelscope/pipelines/outputs.py @@ -0,0 +1,98 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +from modelscope.utils.constant import Tasks + +TASK_OUTPUTS = { + + # ============ vision tasks =================== + + # image classification result for single sample + # { + # "labels": ["dog", "horse", "cow", "cat"], + # "scores": [0.9, 0.1, 0.05, 0.05] + # } + Tasks.image_classification: ['scores', 'labels'], + Tasks.image_tagging: ['scores', 'labels'], + + # object detection result for single sample + # { + # "boxes": [ + # [x1, y1, x2, y2], + # [x1, y1, x2, y2], + # [x1, y1, x2, y2], + # ], + # "labels": ["dog", "horse", "cow", "cat"], + # "scores": [0.9, 0.1, 0.05, 0.05] + # } + Tasks.object_detection: ['scores', 'labels', 'boxes'], + + # instance segmentation result for single sample + # { + # "masks": [ + # np.array in bgr channel order + # ], + # "labels": ["dog", "horse", "cow", "cat"], + # "scores": [0.9, 0.1, 0.05, 0.05] + # } + Tasks.image_segmentation: ['scores', 'labels', 'boxes'], + + # image generation/editing/matting result for single sample + # { + # "output_png": np.array with shape(h, w, 4) + # for matting or (h, w, 3) for general purpose + # } + Tasks.image_editing: ['output_png'], + Tasks.image_matting: ['output_png'], + Tasks.image_generation: ['output_png'], + + # pose estimation result for single sample + # { + # "poses": np.array with shape [num_pose, num_keypoint, 3], + # each keypoint is a array [x, y, score] + # "boxes": np.array with shape [num_pose, 4], each box is + # [x1, y1, x2, y2] + # } + Tasks.pose_estimation: ['poses', 'boxes'], + + # ============ nlp tasks =================== + + # text classification result for single sample + # { + # "labels": ["happy", "sad", "calm", "angry"], + # "scores": [0.9, 0.1, 0.05, 0.05] + # } + Tasks.text_classification: ['scores', 'labels'], + + # text generation result for single sample + # { + # "text": "this is text generated by a model." + # } + Tasks.text_generation: ['text'], + + # ============ audio tasks =================== + + # ============ multi-modal tasks =================== + + # image caption result for single sample + # { + # "caption": "this is an image caption text." + # } + Tasks.image_captioning: ['caption'], + + # visual grounding result for single sample + # { + # "boxes": [ + # [x1, y1, x2, y2], + # [x1, y1, x2, y2], + # [x1, y1, x2, y2], + # ], + # "scores": [0.9, 0.1, 0.05, 0.05] + # } + Tasks.visual_grounding: ['boxes', 'scores'], + + # text_to_image result for a single sample + # { + # "image": np.ndarray with shape [height, width, 3] + # } + Tasks.text_to_image_synthesis: ['image'] +} diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 2fcfee95..6ce835c5 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -51,7 +51,7 @@ class Tasks(object): text_to_speech = 'text-to-speech' speech_signal_process = 'speech-signal-process' - # multi-media + # multi-modal tasks image_captioning = 'image-captioning' visual_grounding = 'visual-grounding' text_to_image_synthesis = 'text-to-image-synthesis' diff --git a/modelscope/utils/registry.py b/modelscope/utils/registry.py index 888564c7..319e54cb 100644 --- a/modelscope/utils/registry.py +++ b/modelscope/utils/registry.py @@ -69,6 +69,7 @@ class Registry(object): f'{self._name}[{group_key}]') self._modules[group_key][module_name] = module_cls + module_cls.group_key = group_key if module_name in self._modules[default_group]: if id(self._modules[default_group][module_name]) == id(module_cls): diff --git a/tests/pipelines/test_base.py b/tests/pipelines/test_base.py index 14f646a9..73aebfdf 100644 --- a/tests/pipelines/test_base.py +++ b/tests/pipelines/test_base.py @@ -35,9 +35,10 @@ class CustomPipelineTest(unittest.TestCase): CustomPipeline1() def test_custom(self): + dummy_task = 'dummy-task' @PIPELINES.register_module( - group_key=Tasks.image_tagging, module_name='custom-image') + group_key=dummy_task, module_name='custom-image') class CustomImagePipeline(Pipeline): def __init__(self, @@ -67,32 +68,29 @@ class CustomPipelineTest(unittest.TestCase): outputs['filename'] = inputs['url'] img = inputs['img'] new_image = img.resize((img.width // 2, img.height // 2)) - outputs['resize_image'] = np.array(new_image) - outputs['dummy_result'] = 'dummy_result' + outputs['output_png'] = np.array(new_image) return outputs def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: return inputs self.assertTrue('custom-image' in PIPELINES.modules[default_group]) - add_default_pipeline_info(Tasks.image_tagging, 'custom-image') + add_default_pipeline_info(dummy_task, 'custom-image', overwrite=True) pipe = pipeline(pipeline_name='custom-image') - pipe2 = pipeline(Tasks.image_tagging) + pipe2 = pipeline(dummy_task) self.assertTrue(type(pipe) is type(pipe2)) img_url = 'http://pai-vision-data-hz.oss-cn-zhangjiakou.' \ 'aliyuncs.com/data/test/images/image1.jpg' output = pipe(img_url) self.assertEqual(output['filename'], img_url) - self.assertEqual(output['resize_image'].shape, (318, 512, 3)) - self.assertEqual(output['dummy_result'], 'dummy_result') + self.assertEqual(output['output_png'].shape, (318, 512, 3)) outputs = pipe([img_url for i in range(4)]) self.assertEqual(len(outputs), 4) for out in outputs: self.assertEqual(out['filename'], img_url) - self.assertEqual(out['resize_image'].shape, (318, 512, 3)) - self.assertEqual(out['dummy_result'], 'dummy_result') + self.assertEqual(out['output_png'].shape, (318, 512, 3)) if __name__ == '__main__': From ad8e080e37ef29dae1bc54255bb267a0b3993bb2 Mon Sep 17 00:00:00 2001 From: "yingda.chen" Date: Fri, 17 Jun 2022 10:25:54 +0800 Subject: [PATCH 11/16] [to #42322933] refactor model name Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9068994 --- modelscope/models/nlp/text_generation_model.py | 4 ++-- modelscope/pipelines/builder.py | 2 +- modelscope/pipelines/nlp/text_generation_pipeline.py | 7 +++---- tests/pipelines/test_image_matting.py | 2 +- tests/pipelines/test_text_generation.py | 4 ++-- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/modelscope/models/nlp/text_generation_model.py b/modelscope/models/nlp/text_generation_model.py index ebefc8d1..8feac691 100644 --- a/modelscope/models/nlp/text_generation_model.py +++ b/modelscope/models/nlp/text_generation_model.py @@ -4,11 +4,11 @@ from modelscope.utils.constant import Tasks from ..base import Model, Tensor from ..builder import MODELS -__all__ = ['PalmForTextGenerationModel'] +__all__ = ['PalmForTextGeneration'] @MODELS.register_module(Tasks.text_generation, module_name=r'palm') -class PalmForTextGenerationModel(Model): +class PalmForTextGeneration(Model): def __init__(self, model_dir: str, *args, **kwargs): """initialize the text generation model from the `model_dir` path. diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index d4ad0c3f..83d1641e 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -16,7 +16,7 @@ DEFAULT_MODEL_FOR_PIPELINE = { Tasks.sentence_similarity: ('sbert-base-chinese-sentence-similarity', 'damo/nlp_structbert_sentence-similarity_chinese-base'), - Tasks.image_matting: ('image-matting', 'damo/cv_unet_image-matting_damo'), + Tasks.image_matting: ('image-matting', 'damo/cv_unet_image-matting'), Tasks.text_classification: ('bert-sentiment-analysis', 'damo/bert-base-sst2'), Tasks.text_generation: ('palm', 'damo/nlp_palm_text-generation_chinese'), diff --git a/modelscope/pipelines/nlp/text_generation_pipeline.py b/modelscope/pipelines/nlp/text_generation_pipeline.py index ea30a115..8b6bf8a9 100644 --- a/modelscope/pipelines/nlp/text_generation_pipeline.py +++ b/modelscope/pipelines/nlp/text_generation_pipeline.py @@ -1,7 +1,7 @@ from typing import Dict, Optional, Union from modelscope.models import Model -from modelscope.models.nlp import PalmForTextGenerationModel +from modelscope.models.nlp import PalmForTextGeneration from modelscope.preprocessors import TextGenerationPreprocessor from modelscope.utils.constant import Tasks from ..base import Pipeline, Tensor @@ -14,7 +14,7 @@ __all__ = ['TextGenerationPipeline'] class TextGenerationPipeline(Pipeline): def __init__(self, - model: Union[PalmForTextGenerationModel, str], + model: Union[PalmForTextGeneration, str], preprocessor: Optional[TextGenerationPreprocessor] = None, **kwargs): """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction @@ -24,8 +24,7 @@ class TextGenerationPipeline(Pipeline): preprocessor (SequenceClassificationPreprocessor): a preprocessor instance """ sc_model = model if isinstance( - model, - PalmForTextGenerationModel) else Model.from_pretrained(model) + model, PalmForTextGeneration) else Model.from_pretrained(model) if preprocessor is None: preprocessor = TextGenerationPreprocessor( sc_model.model_dir, diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py index ba5d05ad..676153bf 100644 --- a/tests/pipelines/test_image_matting.py +++ b/tests/pipelines/test_image_matting.py @@ -17,7 +17,7 @@ from modelscope.utils.test_utils import test_level class ImageMattingTest(unittest.TestCase): def setUp(self) -> None: - self.model_id = 'damo/cv_unet_image-matting_damo' + self.model_id = 'damo/cv_unet_image-matting' # switch to False if downloading everytime is not desired purge_cache = True if purge_cache: diff --git a/tests/pipelines/test_text_generation.py b/tests/pipelines/test_text_generation.py index f98e135d..39d57ff7 100644 --- a/tests/pipelines/test_text_generation.py +++ b/tests/pipelines/test_text_generation.py @@ -4,7 +4,7 @@ import unittest from maas_hub.snapshot_download import snapshot_download from modelscope.models import Model -from modelscope.models.nlp import PalmForTextGenerationModel +from modelscope.models.nlp import PalmForTextGeneration from modelscope.pipelines import TextGenerationPipeline, pipeline from modelscope.preprocessors import TextGenerationPreprocessor from modelscope.utils.constant import Tasks @@ -21,7 +21,7 @@ class TextGenerationTest(unittest.TestCase): cache_path = snapshot_download(self.model_id) preprocessor = TextGenerationPreprocessor( cache_path, first_sequence='sentence', second_sequence=None) - model = PalmForTextGenerationModel( + model = PalmForTextGeneration( cache_path, tokenizer=preprocessor.tokenizer) pipeline1 = TextGenerationPipeline(model, preprocessor) pipeline2 = pipeline( From eb3209a79a9dcb0fd6da6bb56b5e29c2db010e14 Mon Sep 17 00:00:00 2001 From: "zhangzhicheng.zzc" Date: Fri, 17 Jun 2022 14:00:31 +0800 Subject: [PATCH 12/16] =?UTF-8?q?[to=20#42322933]=E4=B8=AD=E6=96=87?= =?UTF-8?q?=E5=88=86=E8=AF=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit chinese word segmentation Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9051491 * add word segmentation * Merge branch 'master' of http://gitlab.alibaba-inc.com/Ali-MaaS/MaaS-lib * test with model hub * merge with master * update some description and test levels * adding purge logic in test * merge with master * update variables definition * generic word segmentation model as token classification model * add output check --- modelscope/models/nlp/__init__.py | 1 + .../models/nlp/token_classification_model.py | 57 +++++++++++++++ modelscope/pipelines/builder.py | 3 + modelscope/pipelines/nlp/__init__.py | 1 + .../nlp/word_segmentation_pipeline.py | 71 +++++++++++++++++++ modelscope/pipelines/outputs.py | 13 ++++ modelscope/preprocessors/nlp.py | 50 ++++++++++++- modelscope/utils/constant.py | 1 + tests/pipelines/test_word_segmentation.py | 62 ++++++++++++++++ 9 files changed, 258 insertions(+), 1 deletion(-) create mode 100644 modelscope/models/nlp/token_classification_model.py create mode 100644 modelscope/pipelines/nlp/word_segmentation_pipeline.py create mode 100644 tests/pipelines/test_word_segmentation.py diff --git a/modelscope/models/nlp/__init__.py b/modelscope/models/nlp/__init__.py index be675c1b..aefcef4a 100644 --- a/modelscope/models/nlp/__init__.py +++ b/modelscope/models/nlp/__init__.py @@ -1,3 +1,4 @@ from .sentence_similarity_model import * # noqa F403 from .sequence_classification_model import * # noqa F403 from .text_generation_model import * # noqa F403 +from .token_classification_model import * # noqa F403 diff --git a/modelscope/models/nlp/token_classification_model.py b/modelscope/models/nlp/token_classification_model.py new file mode 100644 index 00000000..43d4aafb --- /dev/null +++ b/modelscope/models/nlp/token_classification_model.py @@ -0,0 +1,57 @@ +import os +from typing import Any, Dict, Union + +import numpy as np +import torch +from sofa import SbertConfig, SbertForTokenClassification + +from modelscope.utils.constant import Tasks +from ..base import Model, Tensor +from ..builder import MODELS + +__all__ = ['StructBertForTokenClassification'] + + +@MODELS.register_module( + Tasks.word_segmentation, + module_name=r'structbert-chinese-word-segmentation') +class StructBertForTokenClassification(Model): + + def __init__(self, model_dir: str, *args, **kwargs): + """initialize the word segmentation model from the `model_dir` path. + + Args: + model_dir (str): the model path. + model_cls (Optional[Any], optional): model loader, if None, use the + default loader to load model weights, by default None. + """ + super().__init__(model_dir, *args, **kwargs) + self.model_dir = model_dir + self.model = SbertForTokenClassification.from_pretrained( + self.model_dir) + self.config = SbertConfig.from_pretrained(self.model_dir) + + def forward(self, input: Dict[str, + Any]) -> Dict[str, Union[str, np.ndarray]]: + """return the result by the model + + Args: + input (Dict[str, Any]): the preprocessed data + + Returns: + Dict[str, Union[str,np.ndarray]]: results + Example: + { + 'predictions': array([1,4]), # lable 0-negative 1-positive + 'logits': array([[-0.53860897, 1.5029076 ]], dtype=float32) # true value + 'text': str(今天), + } + """ + input_ids = torch.tensor(input['input_ids']).unsqueeze(0) + output = self.model(input_ids) + logits = output.logits + pred = torch.argmax(logits[0], dim=-1) + pred = pred.numpy() + + rst = {'predictions': pred, 'logits': logits, 'text': input['text']} + return rst diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index 83d1641e..c24a7c3e 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -13,6 +13,9 @@ PIPELINES = Registry('pipelines') DEFAULT_MODEL_FOR_PIPELINE = { # TaskName: (pipeline_module_name, model_repo) + Tasks.word_segmentation: + ('structbert-chinese-word-segmentation', + 'damo/nlp_structbert_word-segmentation_chinese-base'), Tasks.sentence_similarity: ('sbert-base-chinese-sentence-similarity', 'damo/nlp_structbert_sentence-similarity_chinese-base'), diff --git a/modelscope/pipelines/nlp/__init__.py b/modelscope/pipelines/nlp/__init__.py index 1f15a7b8..f1dad0d6 100644 --- a/modelscope/pipelines/nlp/__init__.py +++ b/modelscope/pipelines/nlp/__init__.py @@ -1,3 +1,4 @@ from .sentence_similarity_pipeline import * # noqa F403 from .sequence_classification_pipeline import * # noqa F403 from .text_generation_pipeline import * # noqa F403 +from .word_segmentation_pipeline import * # noqa F403 diff --git a/modelscope/pipelines/nlp/word_segmentation_pipeline.py b/modelscope/pipelines/nlp/word_segmentation_pipeline.py new file mode 100644 index 00000000..49aa112a --- /dev/null +++ b/modelscope/pipelines/nlp/word_segmentation_pipeline.py @@ -0,0 +1,71 @@ +from typing import Any, Dict, Optional, Union + +import numpy as np + +from modelscope.models import Model +from modelscope.models.nlp import StructBertForTokenClassification +from modelscope.preprocessors import TokenClassifcationPreprocessor +from modelscope.utils.constant import Tasks +from ..base import Pipeline, Tensor +from ..builder import PIPELINES + +__all__ = ['WordSegmentationPipeline'] + + +@PIPELINES.register_module( + Tasks.word_segmentation, + module_name=r'structbert-chinese-word-segmentation') +class WordSegmentationPipeline(Pipeline): + + def __init__(self, + model: Union[StructBertForTokenClassification, str], + preprocessor: Optional[TokenClassifcationPreprocessor] = None, + **kwargs): + """use `model` and `preprocessor` to create a nlp word segmentation pipeline for prediction + + Args: + model (StructBertForTokenClassification): a model instance + preprocessor (TokenClassifcationPreprocessor): a preprocessor instance + """ + model = model if isinstance( + model, + StructBertForTokenClassification) else Model.from_pretrained(model) + if preprocessor is None: + preprocessor = TokenClassifcationPreprocessor(model.model_dir) + super().__init__(model=model, preprocessor=preprocessor, **kwargs) + self.tokenizer = preprocessor.tokenizer + self.config = model.config + self.id2label = self.config.id2label + + def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]: + """process the prediction results + + Args: + inputs (Dict[str, Any]): _description_ + + Returns: + Dict[str, str]: the prediction results + """ + + pred_list = inputs['predictions'] + labels = [] + for pre in pred_list: + labels.append(self.id2label[pre]) + labels = labels[1:-1] + chunks = [] + chunk = '' + assert len(inputs['text']) == len(labels) + for token, label in zip(inputs['text'], labels): + if label[0] == 'B' or label[0] == 'I': + chunk += token + else: + chunk += token + chunks.append(chunk) + chunk = '' + if chunk: + chunks.append(chunk) + seg_result = ' '.join(chunks) + rst = { + 'output': seg_result, + } + return rst diff --git a/modelscope/pipelines/outputs.py b/modelscope/pipelines/outputs.py index 1389abd3..c88e358c 100644 --- a/modelscope/pipelines/outputs.py +++ b/modelscope/pipelines/outputs.py @@ -69,6 +69,19 @@ TASK_OUTPUTS = { # } Tasks.text_generation: ['text'], + # word segmentation result for single sample + # { + # "output": "今天 天气 不错 , 适合 出去 游玩" + # } + Tasks.word_segmentation: ['output'], + + # sentence similarity result for single sample + # { + # "labels": "1", + # "scores": 0.9 + # } + Tasks.sentence_similarity: ['scores', 'labels'], + # ============ audio tasks =================== # ============ multi-modal tasks =================== diff --git a/modelscope/preprocessors/nlp.py b/modelscope/preprocessors/nlp.py index 6773eadf..6a4a25fc 100644 --- a/modelscope/preprocessors/nlp.py +++ b/modelscope/preprocessors/nlp.py @@ -12,7 +12,7 @@ from .builder import PREPROCESSORS __all__ = [ 'Tokenize', 'SequenceClassificationPreprocessor', - 'TextGenerationPreprocessor' + 'TextGenerationPreprocessor', 'TokenClassifcationPreprocessor' ] @@ -171,3 +171,51 @@ class TextGenerationPreprocessor(Preprocessor): rst['token_type_ids'].append(feature['token_type_ids']) return {k: torch.tensor(v) for k, v in rst.items()} + + +@PREPROCESSORS.register_module( + Fields.nlp, module_name=r'bert-token-classification') +class TokenClassifcationPreprocessor(Preprocessor): + + def __init__(self, model_dir: str, *args, **kwargs): + """preprocess the data via the vocab.txt from the `model_dir` path + + Args: + model_dir (str): model path + """ + + super().__init__(*args, **kwargs) + + from sofa import SbertTokenizer + self.model_dir: str = model_dir + self.tokenizer = SbertTokenizer.from_pretrained(self.model_dir) + + @type_assert(object, str) + def __call__(self, data: str) -> Dict[str, Any]: + """process the raw input data + + Args: + data (str): a sentence + Example: + 'you are so handsome.' + + Returns: + Dict[str, Any]: the preprocessed data + """ + # preprocess the data for the model input + + text = data.replace(' ', '').strip() + tokens = [] + for token in text: + token = self.tokenizer.tokenize(token) + tokens.extend(token) + input_ids = self.tokenizer.convert_tokens_to_ids(tokens) + input_ids = self.tokenizer.build_inputs_with_special_tokens(input_ids) + attention_mask = [1] * len(input_ids) + token_type_ids = [0] * len(input_ids) + return { + 'text': text, + 'input_ids': input_ids, + 'attention_mask': attention_mask, + 'token_type_ids': token_type_ids + } diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 6ce835c5..61049734 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -30,6 +30,7 @@ class Tasks(object): image_matting = 'image-matting' # nlp tasks + word_segmentation = 'word-segmentation' sentiment_analysis = 'sentiment-analysis' sentence_similarity = 'sentence-similarity' text_classification = 'text-classification' diff --git a/tests/pipelines/test_word_segmentation.py b/tests/pipelines/test_word_segmentation.py new file mode 100644 index 00000000..4ec2bf29 --- /dev/null +++ b/tests/pipelines/test_word_segmentation.py @@ -0,0 +1,62 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import shutil +import unittest + +from maas_hub.snapshot_download import snapshot_download + +from modelscope.models import Model +from modelscope.models.nlp import StructBertForTokenClassification +from modelscope.pipelines import WordSegmentationPipeline, pipeline +from modelscope.preprocessors import TokenClassifcationPreprocessor +from modelscope.utils.constant import Tasks +from modelscope.utils.hub import get_model_cache_dir +from modelscope.utils.test_utils import test_level + + +class WordSegmentationTest(unittest.TestCase): + model_id = 'damo/nlp_structbert_word-segmentation_chinese-base' + sentence = '今天天气不错,适合出去游玩' + + def setUp(self) -> None: + # switch to False if downloading everytime is not desired + purge_cache = True + if purge_cache: + shutil.rmtree( + get_model_cache_dir(self.model_id), ignore_errors=True) + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_run_by_direct_model_download(self): + cache_path = snapshot_download(self.model_id) + tokenizer = TokenClassifcationPreprocessor(cache_path) + model = StructBertForTokenClassification( + cache_path, tokenizer=tokenizer) + pipeline1 = WordSegmentationPipeline(model, preprocessor=tokenizer) + pipeline2 = pipeline( + Tasks.word_segmentation, model=model, preprocessor=tokenizer) + print(f'sentence: {self.sentence}\n' + f'pipeline1:{pipeline1(input=self.sentence)}') + print() + print(f'pipeline2: {pipeline2(input=self.sentence)}') + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_from_modelhub(self): + model = Model.from_pretrained(self.model_id) + tokenizer = TokenClassifcationPreprocessor(model.model_dir) + pipeline_ins = pipeline( + task=Tasks.word_segmentation, model=model, preprocessor=tokenizer) + print(pipeline_ins(input=self.sentence)) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_name(self): + pipeline_ins = pipeline( + task=Tasks.word_segmentation, model=self.model_id) + print(pipeline_ins(input=self.sentence)) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_default_model(self): + pipeline_ins = pipeline(task=Tasks.word_segmentation) + print(pipeline_ins(input=self.sentence)) + + +if __name__ == '__main__': + unittest.main() From 201922d33d97455458752c4baedcfa3af631754b Mon Sep 17 00:00:00 2001 From: "wenmeng.zwm" Date: Fri, 17 Jun 2022 19:33:15 +0800 Subject: [PATCH 13/16] [to #42461396] add git-lfs support and mv test data to git-lfs --- .gitattributes | 3 ++ .gitignore | 1 - data/test/images/image1.jpg | 3 ++ data/test/images/image_matting.png | 3 ++ docs/source/develop.md | 49 ++++++++++++++++++++++++ tests/pipelines/test_base.py | 3 +- tests/pipelines/test_image_captioning.py | 4 +- tests/pipelines/test_image_matting.py | 16 ++------ tests/preprocessors/test_image.py | 4 +- 9 files changed, 65 insertions(+), 21 deletions(-) create mode 100644 .gitattributes create mode 100644 data/test/images/image1.jpg create mode 100644 data/test/images/image_matting.png diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..9c607acc --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ +*.png filter=lfs diff=lfs merge=lfs -text +*.jpg filter=lfs diff=lfs merge=lfs -text +*.mp4 filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index 3e6a3f4a..c8a1c717 100644 --- a/.gitignore +++ b/.gitignore @@ -104,7 +104,6 @@ venv.bak/ # mypy .mypy_cache/ -data .vscode .idea diff --git a/data/test/images/image1.jpg b/data/test/images/image1.jpg new file mode 100644 index 00000000..450a969d --- /dev/null +++ b/data/test/images/image1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78094cc48fbcfd9b6d321fe13619ecc72b65e006fc1b4c4458409ade9979486d +size 129862 diff --git a/data/test/images/image_matting.png b/data/test/images/image_matting.png new file mode 100644 index 00000000..de3f1918 --- /dev/null +++ b/data/test/images/image_matting.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af83a94899a6d23339c3ecc5c4c58c57c835af57b531a2f4c50461184f820141 +size 603621 diff --git a/docs/source/develop.md b/docs/source/develop.md index f96590b0..96120088 100644 --- a/docs/source/develop.md +++ b/docs/source/develop.md @@ -91,6 +91,55 @@ make tests 4. Daily regression tests will run all cases at 0 am each day using master branch. +### 2.3 Test data storage + +As we need a lot of data for testing, including images, videos, models. We use git lfs +to store those large files. + +1. install git-lfs +for mac +```bash +brew install git-lfs +git lfs install +``` + +for centos, please download rpm from git-lfs github release [website](https://github.com/git-lfs/git-lfs/releases/tag/v3.2.0) +```bash +wget http://101374-public.oss-cn-hangzhou-zmf.aliyuncs.com/git-lfs-3.2.0-1.el7.x86_64.rpm +sudo rpm -ivh git-lfs-3.2.0-1.el7.x86_64.rpm +git lfs install +``` + +for ubuntu +```bash +curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash +sudo apt-get install git-lfs +git lfs install +``` + +2. track your data type using git lfs, for example, to track png files +```bash +git lfs track "*.png" +``` + +3. add your test files to `data/test/` folder, you can make directories if you need. +```bash +git add data/test/test.png +``` + +4. commit your test data to remote branch +```bash +git commit -m "xxx" +``` + +To pull data from remote repo, just as the same way you pull git files. +```bash +git pull origin branch_name +``` + + + + ## Code Review 1. Run following command to create an aone CR, replace `TARGET_BRANCH` and `CR_NAME` with the one you want. diff --git a/tests/pipelines/test_base.py b/tests/pipelines/test_base.py index 73aebfdf..c642ed4b 100644 --- a/tests/pipelines/test_base.py +++ b/tests/pipelines/test_base.py @@ -80,8 +80,7 @@ class CustomPipelineTest(unittest.TestCase): pipe2 = pipeline(dummy_task) self.assertTrue(type(pipe) is type(pipe2)) - img_url = 'http://pai-vision-data-hz.oss-cn-zhangjiakou.' \ - 'aliyuncs.com/data/test/images/image1.jpg' + img_url = 'data/test/images/image1.jpg' output = pipe(img_url) self.assertEqual(output['filename'], img_url) self.assertEqual(output['output_png'].shape, (318, 512, 3)) diff --git a/tests/pipelines/test_image_captioning.py b/tests/pipelines/test_image_captioning.py index 4fac4658..74a65806 100644 --- a/tests/pipelines/test_image_captioning.py +++ b/tests/pipelines/test_image_captioning.py @@ -27,9 +27,7 @@ class ImageCaptionTest(unittest.TestCase): img_captioning = pipeline( Tasks.image_captioning, model=ofile.name, bpe_dir=bpe_dir) - result = img_captioning( - 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' - ) + result = img_captioning('data/test/images/image_matting.png') print(result['caption']) diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py index 676153bf..6e102d00 100644 --- a/tests/pipelines/test_image_matting.py +++ b/tests/pipelines/test_image_matting.py @@ -34,16 +34,12 @@ class ImageMattingTest(unittest.TestCase): ofile.write(File.read(model_path)) img_matting = pipeline(Tasks.image_matting, model=tmp_dir) - result = img_matting( - 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' - ) + result = img_matting('data/test/images/image_matting.png') cv2.imwrite('result.png', result['output_png']) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_dataset(self): - input_location = [ - 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' - ] + input_location = ['data/test/images/image_matting.png'] # alternatively: # input_location = '/dir/to/images' @@ -58,9 +54,7 @@ class ImageMattingTest(unittest.TestCase): def test_run_modelhub(self): img_matting = pipeline(Tasks.image_matting, model=self.model_id) - result = img_matting( - 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' - ) + result = img_matting('data/test/images/image_matting.png') cv2.imwrite('result.png', result['output_png']) print(f'Output written to {osp.abspath("result.png")}') @@ -68,9 +62,7 @@ class ImageMattingTest(unittest.TestCase): def test_run_modelhub_default_model(self): img_matting = pipeline(Tasks.image_matting) - result = img_matting( - 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' - ) + result = img_matting('data/test/images/image_matting.png') cv2.imwrite('result.png', result['output_png']) print(f'Output written to {osp.abspath("result.png")}') diff --git a/tests/preprocessors/test_image.py b/tests/preprocessors/test_image.py index cfa7b11d..21ae780e 100644 --- a/tests/preprocessors/test_image.py +++ b/tests/preprocessors/test_image.py @@ -11,9 +11,7 @@ from modelscope.utils.logger import get_logger class ImagePreprocessorTest(unittest.TestCase): def test_load(self): - img = load_image( - 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' - ) + img = load_image('data/test/images/image_matting.png') self.assertTrue(isinstance(img, PIL.Image.Image)) self.assertEqual(img.size, (948, 533)) From 31498c1d6a81611f5a58db5e3d6e983e8f386ed1 Mon Sep 17 00:00:00 2001 From: "bin.xue" Date: Fri, 17 Jun 2022 19:56:11 +0800 Subject: [PATCH 14/16] [to #41669377] add speech AEC pipeline Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/8973072 * [to #41669377] docs and tools refinement and release 1. add build_doc linter script 2. add sphinx-docs support 3. add development doc and api doc 4. change version to 0.1.0 for the first internal release version Link: https://code.aone.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/8775307 * [to #41669377] add pipeline tutorial and fix bugs 1. add pipleine tutorial 2. fix bugs when using pipeline with certain model and preprocessor Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/8814301 * refine doc * feat: add audio aec pipeline and preprocessor * feat: add audio aec model classes * feat: add audio aec loss functions * refactor:delete no longer used loss function * [to #42281043] support kwargs in pipeline Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/8949062 * support kwargs in pipeline * update develop doc with CR instruction * Merge branch 'release/0.1' into dev/aec * style: reformat code by pre-commit tools * feat:support maas_lib pipeline auto downloading model * test:add aec test case as sample code * feat:aec pipeline use config from maashub * feat:aec pipeline use feature parameters from maashub * update setup.cfg to disable PEP8 rule W503 in flake8 and yapf * format:fix double quoted strings, indent issues and optimize import * refactor:extract some constant in aec pipeline * refactor: delete no longer used __main__ statement * chore:change all Chinese comments to English * fix: change file name style to lower case * refactor: rename model name * feat:load C++ .so from LD_LIBRARY_PATH * feat:register PROPROCESSOR for LinearAECAndFbank * refactory:move aec process from postprocess() to forward() and update comments * refactory:add more readable error message when audio sample rate is not 16000 * fix: package maas_lib renamed to modelscope in import statement * feat: optimize the error message of audio layer classes * format: delete empty lines * refactor: rename audio preprocessor and optimize error message * refactor: change aec model id to damo/speech_dfsmn_aec_psm_16k * refactor: change sample audio file url to public oss * Merge branch 'master' into dev/aec * feat: add output info for aec pipeline * fix: normalize output audio data to [-1.0, 1.0] * refactor:use constant from ModelFile * feat: AEC pipeline can use c++ lib in current working directory and the test will download it * fix: c++ downloading should work wherever test is triggerd --- modelscope/models/audio/__init__.py | 0 modelscope/models/audio/layers/__init__.py | 0 modelscope/models/audio/layers/activations.py | 60 +++ .../models/audio/layers/affine_transform.py | 78 +++ modelscope/models/audio/layers/deep_fsmn.py | 178 +++++++ modelscope/models/audio/layers/layer_base.py | 50 ++ .../models/audio/layers/uni_deep_fsmn.py | 482 +++++++++++++++++ modelscope/models/audio/network/__init__.py | 0 modelscope/models/audio/network/loss.py | 394 ++++++++++++++ .../models/audio/network/modulation_loss.py | 248 +++++++++ modelscope/models/audio/network/se_net.py | 483 ++++++++++++++++++ modelscope/pipelines/__init__.py | 2 +- modelscope/pipelines/audio/__init__.py | 1 + .../pipelines/audio/linear_aec_pipeline.py | 160 ++++++ modelscope/pipelines/outputs.py | 6 + modelscope/preprocessors/__init__.py | 1 + modelscope/preprocessors/audio.py | 230 +++++++++ requirements/runtime.txt | 1 + setup.cfg | 3 +- tests/pipelines/test_speech_signal_process.py | 56 ++ 20 files changed, 2431 insertions(+), 2 deletions(-) create mode 100644 modelscope/models/audio/__init__.py create mode 100644 modelscope/models/audio/layers/__init__.py create mode 100644 modelscope/models/audio/layers/activations.py create mode 100644 modelscope/models/audio/layers/affine_transform.py create mode 100644 modelscope/models/audio/layers/deep_fsmn.py create mode 100644 modelscope/models/audio/layers/layer_base.py create mode 100644 modelscope/models/audio/layers/uni_deep_fsmn.py create mode 100644 modelscope/models/audio/network/__init__.py create mode 100644 modelscope/models/audio/network/loss.py create mode 100644 modelscope/models/audio/network/modulation_loss.py create mode 100644 modelscope/models/audio/network/se_net.py create mode 100644 modelscope/pipelines/audio/linear_aec_pipeline.py create mode 100644 modelscope/preprocessors/audio.py create mode 100644 tests/pipelines/test_speech_signal_process.py diff --git a/modelscope/models/audio/__init__.py b/modelscope/models/audio/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modelscope/models/audio/layers/__init__.py b/modelscope/models/audio/layers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modelscope/models/audio/layers/activations.py b/modelscope/models/audio/layers/activations.py new file mode 100644 index 00000000..b0215bcc --- /dev/null +++ b/modelscope/models/audio/layers/activations.py @@ -0,0 +1,60 @@ +import torch.nn as nn + +from .layer_base import LayerBase + + +class RectifiedLinear(LayerBase): + + def __init__(self, input_dim, output_dim): + super(RectifiedLinear, self).__init__() + self.dim = input_dim + self.relu = nn.ReLU() + + def forward(self, input): + return self.relu(input) + + def to_kaldi_nnet(self): + re_str = '' + re_str += ' %d %d\n' % (self.dim, self.dim) + return re_str + + def load_kaldi_nnet(self, instr): + return instr + + +class LogSoftmax(LayerBase): + + def __init__(self, input_dim, output_dim): + super(LogSoftmax, self).__init__() + self.dim = input_dim + self.ls = nn.LogSoftmax() + + def forward(self, input): + return self.ls(input) + + def to_kaldi_nnet(self): + re_str = '' + re_str += ' %d %d\n' % (self.dim, self.dim) + return re_str + + def load_kaldi_nnet(self, instr): + return instr + + +class Sigmoid(LayerBase): + + def __init__(self, input_dim, output_dim): + super(Sigmoid, self).__init__() + self.dim = input_dim + self.sig = nn.Sigmoid() + + def forward(self, input): + return self.sig(input) + + def to_kaldi_nnet(self): + re_str = '' + re_str += ' %d %d\n' % (self.dim, self.dim) + return re_str + + def load_kaldi_nnet(self, instr): + return instr diff --git a/modelscope/models/audio/layers/affine_transform.py b/modelscope/models/audio/layers/affine_transform.py new file mode 100644 index 00000000..33479505 --- /dev/null +++ b/modelscope/models/audio/layers/affine_transform.py @@ -0,0 +1,78 @@ +import numpy as np +import torch as th +import torch.nn as nn + +from .layer_base import (LayerBase, expect_kaldi_matrix, expect_token_number, + to_kaldi_matrix) + + +class AffineTransform(LayerBase): + + def __init__(self, input_dim, output_dim): + super(AffineTransform, self).__init__() + self.input_dim = input_dim + self.output_dim = output_dim + self.linear = nn.Linear(input_dim, output_dim) + + def forward(self, input): + return self.linear(input) + + def to_kaldi_nnet(self): + re_str = '' + re_str += ' %d %d\n' % (self.output_dim, + self.input_dim) + re_str += ' 1 1 0\n' + linear_weights = self.state_dict()['linear.weight'] + x = linear_weights.squeeze().numpy() + re_str += to_kaldi_matrix(x) + linear_bias = self.state_dict()['linear.bias'] + x = linear_bias.squeeze().numpy() + re_str += to_kaldi_matrix(x) + return re_str + + def to_raw_nnet(self, fid): + linear_weights = self.state_dict()['linear.weight'] + x = linear_weights.squeeze().numpy() + x.tofile(fid) + + linear_bias = self.state_dict()['linear.bias'] + x = linear_bias.squeeze().numpy() + x.tofile(fid) + + def load_kaldi_nnet(self, instr): + output = expect_token_number( + instr, + '', + ) + if output is None: + raise Exception('AffineTransform format error for ') + instr, lr = output + + output = expect_token_number(instr, '') + if output is None: + raise Exception( + 'AffineTransform format error for ') + instr, lr = output + + output = expect_token_number(instr, '') + if output is None: + raise Exception('AffineTransform format error for ') + instr, lr = output + + output = expect_kaldi_matrix(instr) + if output is None: + raise Exception('AffineTransform format error for parsing matrix') + instr, mat = output + + print(mat.shape) + self.linear.weight = th.nn.Parameter( + th.from_numpy(mat).type(th.FloatTensor)) + + output = expect_kaldi_matrix(instr) + if output is None: + raise Exception('AffineTransform format error for parsing matrix') + instr, mat = output + mat = np.squeeze(mat) + self.linear.bias = th.nn.Parameter( + th.from_numpy(mat).type(th.FloatTensor)) + return instr diff --git a/modelscope/models/audio/layers/deep_fsmn.py b/modelscope/models/audio/layers/deep_fsmn.py new file mode 100644 index 00000000..72ba07dc --- /dev/null +++ b/modelscope/models/audio/layers/deep_fsmn.py @@ -0,0 +1,178 @@ +import numpy as np +import torch as th +import torch.nn as nn +import torch.nn.functional as F + +from .layer_base import (LayerBase, expect_kaldi_matrix, expect_token_number, + to_kaldi_matrix) + + +class DeepFsmn(LayerBase): + + def __init__(self, + input_dim, + output_dim, + lorder=None, + rorder=None, + hidden_size=None, + layer_norm=False, + dropout=0): + super(DeepFsmn, self).__init__() + + self.input_dim = input_dim + self.output_dim = output_dim + + if lorder is None: + return + + self.lorder = lorder + self.rorder = rorder + self.hidden_size = hidden_size + self.layer_norm = layer_norm + + self.linear = nn.Linear(input_dim, hidden_size) + self.norm = nn.LayerNorm(hidden_size) + self.drop1 = nn.Dropout(p=dropout) + self.drop2 = nn.Dropout(p=dropout) + self.project = nn.Linear(hidden_size, output_dim, bias=False) + + self.conv1 = nn.Conv2d( + output_dim, + output_dim, [lorder, 1], [1, 1], + groups=output_dim, + bias=False) + self.conv2 = nn.Conv2d( + output_dim, + output_dim, [rorder, 1], [1, 1], + groups=output_dim, + bias=False) + + def forward(self, input): + + f1 = F.relu(self.linear(input)) + + f1 = self.drop1(f1) + if self.layer_norm: + f1 = self.norm(f1) + + p1 = self.project(f1) + + x = th.unsqueeze(p1, 1) + + x_per = x.permute(0, 3, 2, 1) + + y = F.pad(x_per, [0, 0, self.lorder - 1, 0]) + yr = F.pad(x_per, [0, 0, 0, self.rorder]) + yr = yr[:, :, 1:, :] + + out = x_per + self.conv1(y) + self.conv2(yr) + out = self.drop2(out) + + out1 = out.permute(0, 3, 2, 1) + + return input + out1.squeeze() + + def to_kaldi_nnet(self): + re_str = '' + re_str += ' %d %d\n'\ + % (self.output_dim, self.input_dim) + re_str += ' %d %d %d %d 0\n'\ + % (1, self.hidden_size, self.lorder, 1) + lfiters = self.state_dict()['conv1.weight'] + x = np.flipud(lfiters.squeeze().numpy().T) + re_str += to_kaldi_matrix(x) + proj_weights = self.state_dict()['project.weight'] + x = proj_weights.squeeze().numpy() + re_str += to_kaldi_matrix(x) + linear_weights = self.state_dict()['linear.weight'] + x = linear_weights.squeeze().numpy() + re_str += to_kaldi_matrix(x) + linear_bias = self.state_dict()['linear.bias'] + x = linear_bias.squeeze().numpy() + re_str += to_kaldi_matrix(x) + return re_str + + def load_kaldi_nnet(self, instr): + output = expect_token_number( + instr, + '', + ) + if output is None: + raise Exception('UniDeepFsmn format error for ') + instr, lr = output + + output = expect_token_number( + instr, + '', + ) + if output is None: + raise Exception('UniDeepFsmn format error for ') + instr, hiddensize = output + self.hidden_size = int(hiddensize) + + output = expect_token_number( + instr, + '', + ) + if output is None: + raise Exception('UniDeepFsmn format error for ') + instr, lorder = output + self.lorder = int(lorder) + + output = expect_token_number( + instr, + '', + ) + if output is None: + raise Exception('UniDeepFsmn format error for ') + instr, lstride = output + self.lstride = lstride + + output = expect_token_number( + instr, + '', + ) + if output is None: + raise Exception('UniDeepFsmn format error for ') + + output = expect_kaldi_matrix(instr) + if output is None: + raise Exception('UniDeepFsmn format error for parsing matrix') + instr, mat = output + mat1 = np.fliplr(mat.T).copy() + self.conv1 = nn.Conv2d( + self.output_dim, + self.output_dim, [self.lorder, 1], [1, 1], + groups=self.output_dim, + bias=False) + mat_th = th.from_numpy(mat1).type(th.FloatTensor) + mat_th = mat_th.unsqueeze(1) + mat_th = mat_th.unsqueeze(3) + self.conv1.weight = th.nn.Parameter(mat_th) + + output = expect_kaldi_matrix(instr) + if output is None: + raise Exception('UniDeepFsmn format error for parsing matrix') + instr, mat = output + + self.project = nn.Linear(self.hidden_size, self.output_dim, bias=False) + self.linear = nn.Linear(self.input_dim, self.hidden_size) + + self.project.weight = th.nn.Parameter( + th.from_numpy(mat).type(th.FloatTensor)) + + output = expect_kaldi_matrix(instr) + if output is None: + raise Exception('UniDeepFsmn format error for parsing matrix') + instr, mat = output + self.linear.weight = th.nn.Parameter( + th.from_numpy(mat).type(th.FloatTensor)) + + output = expect_kaldi_matrix(instr) + if output is None: + raise Exception('UniDeepFsmn format error for parsing matrix') + instr, mat = output + self.linear.bias = th.nn.Parameter( + th.from_numpy(mat).type(th.FloatTensor)) + + return instr diff --git a/modelscope/models/audio/layers/layer_base.py b/modelscope/models/audio/layers/layer_base.py new file mode 100644 index 00000000..e56c4bc0 --- /dev/null +++ b/modelscope/models/audio/layers/layer_base.py @@ -0,0 +1,50 @@ +import abc +import re + +import numpy as np +import torch.nn as nn + + +def expect_token_number(instr, token): + first_token = re.match(r'^\s*' + token, instr) + if first_token is None: + return None + instr = instr[first_token.end():] + lr = re.match(r'^\s*(-?\d+\.?\d*e?-?\d*?)', instr) + if lr is None: + return None + return instr[lr.end():], lr.groups()[0] + + +def expect_kaldi_matrix(instr): + pos2 = instr.find('[', 0) + pos3 = instr.find(']', pos2) + mat = [] + for stt in instr[pos2 + 1:pos3].split('\n'): + tmp_mat = np.fromstring(stt, dtype=np.float32, sep=' ') + if tmp_mat.size > 0: + mat.append(tmp_mat) + return instr[pos3 + 1:], np.array(mat) + + +def to_kaldi_matrix(np_mat): + """ + function that transform as str numpy mat to standard kaldi str matrix + :param np_mat: numpy mat + :return: str + """ + np.set_printoptions(threshold=np.inf, linewidth=np.nan, suppress=True) + out_str = str(np_mat) + out_str = out_str.replace('[', '') + out_str = out_str.replace(']', '') + return '[ %s ]\n' % out_str + + +class LayerBase(nn.Module, metaclass=abc.ABCMeta): + + def __init__(self): + super(LayerBase, self).__init__() + + @abc.abstractmethod + def to_kaldi_nnet(self): + pass diff --git a/modelscope/models/audio/layers/uni_deep_fsmn.py b/modelscope/models/audio/layers/uni_deep_fsmn.py new file mode 100644 index 00000000..c22460c4 --- /dev/null +++ b/modelscope/models/audio/layers/uni_deep_fsmn.py @@ -0,0 +1,482 @@ +import numpy as np +import torch as th +import torch.nn as nn +import torch.nn.functional as F + +from .layer_base import (LayerBase, expect_kaldi_matrix, expect_token_number, + to_kaldi_matrix) + + +class SepConv(nn.Module): + + def __init__(self, + in_channels, + filters, + out_channels, + kernel_size=(5, 2), + dilation=(1, 1)): + """ :param kernel_size (time, frequency) + + """ + super(SepConv, self).__init__() + # depthwise + pointwise + self.dconv = nn.Conv2d( + in_channels, + in_channels * filters, + kernel_size, + dilation=dilation, + groups=in_channels) + self.pconv = nn.Conv2d( + in_channels * filters, out_channels, kernel_size=1) + self.padding = dilation[0] * (kernel_size[0] - 1) + + def forward(self, input): + ''' input: [B, C, T, F] + ''' + x = F.pad(input, [0, 0, self.padding, 0]) + x = self.dconv(x) + x = self.pconv(x) + return x + + +class Conv2d(nn.Module): + + def __init__(self, + input_dim, + output_dim, + lorder=20, + rorder=0, + groups=1, + bias=False, + skip_connect=True): + super(Conv2d, self).__init__() + self.lorder = lorder + self.conv = nn.Conv2d( + input_dim, output_dim, [lorder, 1], groups=groups, bias=bias) + self.rorder = rorder + if self.rorder: + self.conv2 = nn.Conv2d( + input_dim, output_dim, [rorder, 1], groups=groups, bias=bias) + self.skip_connect = skip_connect + + def forward(self, input): + # [B, 1, T, F] + x = th.unsqueeze(input, 1) + # [B, F, T, 1] + x_per = x.permute(0, 3, 2, 1) + y = F.pad(x_per, [0, 0, self.lorder - 1, 0]) + out = self.conv(y) + if self.rorder: + yr = F.pad(x_per, [0, 0, 0, self.rorder]) + yr = yr[:, :, 1:, :] + out += self.conv2(yr) + out = out.permute(0, 3, 2, 1).squeeze(1) + if self.skip_connect: + out = out + input + return out + + +class SelfAttLayer(nn.Module): + + def __init__(self, input_dim, output_dim, lorder=None, hidden_size=None): + super(SelfAttLayer, self).__init__() + + self.input_dim = input_dim + self.output_dim = output_dim + + if lorder is None: + return + + self.lorder = lorder + self.hidden_size = hidden_size + + self.linear = nn.Linear(input_dim, hidden_size) + + self.project = nn.Linear(hidden_size, output_dim, bias=False) + + self.att = nn.Linear(input_dim, lorder, bias=False) + + def forward(self, input): + + f1 = F.relu(self.linear(input)) + + p1 = self.project(f1) + + x = th.unsqueeze(p1, 1) + + x_per = x.permute(0, 3, 2, 1) + + y = F.pad(x_per, [0, 0, self.lorder - 1, 0]) + + # z [B, F, T, lorder] + z = x_per + for i in range(1, self.lorder): + z = th.cat([z, y[:, :, self.lorder - 1 - i:-i, :]], axis=-1) + + # [B, T, lorder] + att = F.softmax(self.att(input), dim=-1) + att = th.unsqueeze(att, 1) + z = th.sum(z * att, axis=-1) + + out1 = z.permute(0, 2, 1) + + return input + out1 + + +class TFFsmn(nn.Module): + + def __init__(self, + input_dim, + output_dim, + lorder=None, + hidden_size=None, + dilation=1, + layer_norm=False, + dropout=0, + skip_connect=True): + super(TFFsmn, self).__init__() + + self.skip_connect = skip_connect + + self.linear = nn.Linear(input_dim, hidden_size) + self.norm = nn.Identity() + if layer_norm: + self.norm = nn.LayerNorm(input_dim) + self.act = nn.ReLU() + self.project = nn.Linear(hidden_size, output_dim, bias=False) + + self.conv1 = nn.Conv2d( + output_dim, + output_dim, [lorder, 1], + dilation=[dilation, 1], + groups=output_dim, + bias=False) + self.padding_left = dilation * (lorder - 1) + dorder = 5 + self.conv2 = nn.Conv2d(1, 1, [dorder, 1], bias=False) + self.padding_freq = dorder - 1 + + def forward(self, input): + return self.compute1(input) + + def compute1(self, input): + ''' linear-dconv-relu(norm)-linear-dconv + ''' + x = self.linear(input) + # [B, 1, F, T] + x = th.unsqueeze(x, 1).permute(0, 1, 3, 2) + z = F.pad(x, [0, 0, self.padding_freq, 0]) + z = self.conv2(z) + x + x = z.permute(0, 3, 2, 1).squeeze(-1) + x = self.act(x) + x = self.norm(x) + x = self.project(x) + x = th.unsqueeze(x, 1).permute(0, 3, 2, 1) + # [B, F, T+lorder-1, 1] + y = F.pad(x, [0, 0, self.padding_left, 0]) + out = self.conv1(y) + if self.skip_connect: + out = out + x + out = out.permute(0, 3, 2, 1).squeeze() + + return input + out + + +class CNNFsmn(nn.Module): + ''' use cnn to reduce parameters + ''' + + def __init__(self, + input_dim, + output_dim, + lorder=None, + hidden_size=None, + dilation=1, + layer_norm=False, + dropout=0, + skip_connect=True): + super(CNNFsmn, self).__init__() + + self.input_dim = input_dim + self.output_dim = output_dim + self.skip_connect = skip_connect + + if lorder is None: + return + + self.lorder = lorder + self.hidden_size = hidden_size + + self.linear = nn.Linear(input_dim, hidden_size) + self.act = nn.ReLU() + kernel_size = (3, 8) + stride = (1, 4) + self.conv = nn.Sequential( + nn.ConstantPad2d((stride[1], 0, kernel_size[0] - 1, 0), 0), + nn.Conv2d(1, stride[1], kernel_size=kernel_size, stride=stride)) + + self.dconv = nn.Conv2d( + output_dim, + output_dim, [lorder, 1], + dilation=[dilation, 1], + groups=output_dim, + bias=False) + self.padding_left = dilation * (lorder - 1) + + def forward(self, input): + return self.compute2(input) + + def compute1(self, input): + ''' linear-relu(norm)-conv2d-relu?-dconv + ''' + # [B, T, F] + x = self.linear(input) + x = self.act(x) + x = th.unsqueeze(x, 1) + x = self.conv(x) + # [B, C, T, F] -> [B, 1, T, F] + b, c, t, f = x.shape + x = x.view([b, 1, t, -1]) + x = x.permute(0, 3, 2, 1) + # [B, F, T+lorder-1, 1] + y = F.pad(x, [0, 0, self.padding_left, 0]) + out = self.dconv(y) + if self.skip_connect: + out = out + x + out = out.permute(0, 3, 2, 1).squeeze() + return input + out + + def compute2(self, input): + ''' conv2d-relu-linear-relu?-dconv + ''' + x = th.unsqueeze(input, 1) + x = self.conv(x) + x = self.act(x) + # [B, C, T, F] -> [B, T, F] + b, c, t, f = x.shape + x = x.view([b, t, -1]) + x = self.linear(x) + x = th.unsqueeze(x, 1).permute(0, 3, 2, 1) + y = F.pad(x, [0, 0, self.padding_left, 0]) + out = self.dconv(y) + if self.skip_connect: + out = out + x + out = out.permute(0, 3, 2, 1).squeeze() + return input + out + + +class UniDeepFsmn(LayerBase): + + def __init__(self, + input_dim, + output_dim, + lorder=None, + hidden_size=None, + dilation=1, + layer_norm=False, + dropout=0, + skip_connect=True): + super(UniDeepFsmn, self).__init__() + + self.input_dim = input_dim + self.output_dim = output_dim + self.skip_connect = skip_connect + + if lorder is None: + return + + self.lorder = lorder + self.hidden_size = hidden_size + + self.linear = nn.Linear(input_dim, hidden_size) + self.norm = nn.Identity() + if layer_norm: + self.norm = nn.LayerNorm(input_dim) + self.act = nn.ReLU() + self.project = nn.Linear(hidden_size, output_dim, bias=False) + + self.conv1 = nn.Conv2d( + output_dim, + output_dim, [lorder, 1], + dilation=[dilation, 1], + groups=output_dim, + bias=False) + self.padding_left = dilation * (lorder - 1) + + def forward(self, input): + return self.compute1(input) + + def compute1(self, input): + ''' linear-relu(norm)-linear-dconv + ''' + # [B, T, F] + x = self.linear(input) + x = self.act(x) + x = self.norm(x) + x = self.project(x) + x = th.unsqueeze(x, 1).permute(0, 3, 2, 1) + # [B, F, T+lorder-1, 1] + y = F.pad(x, [0, 0, self.padding_left, 0]) + out = self.conv1(y) + if self.skip_connect: + out = out + x + out = out.permute(0, 3, 2, 1).squeeze() + + return input + out + + def compute2(self, input): + ''' linear-dconv-linear-relu(norm) + ''' + x = self.project(input) + x = th.unsqueeze(x, 1).permute(0, 3, 2, 1) + y = F.pad(x, [0, 0, self.padding_left, 0]) + out = self.conv1(y) + if self.skip_connect: + out = out + x + out = out.permute(0, 3, 2, 1).squeeze() + x = self.linear(out) + x = self.act(x) + x = self.norm(x) + + return input + x + + def compute3(self, input): + ''' dconv-linear-relu(norm)-linear + ''' + x = th.unsqueeze(input, 1).permute(0, 3, 2, 1) + y = F.pad(x, [0, 0, self.padding_left, 0]) + out = self.conv1(y) + if self.skip_connect: + out = out + x + out = out.permute(0, 3, 2, 1).squeeze() + x = self.linear(out) + x = self.act(x) + x = self.norm(x) + x = self.project(x) + + return input + x + + def to_kaldi_nnet(self): + re_str = '' + re_str += ' %d %d\n' \ + % (self.output_dim, self.input_dim) + re_str += ' %d %d %d %d 0\n' \ + % (1, self.hidden_size, self.lorder, 1) + lfiters = self.state_dict()['conv1.weight'] + x = np.flipud(lfiters.squeeze().numpy().T) + re_str += to_kaldi_matrix(x) + proj_weights = self.state_dict()['project.weight'] + x = proj_weights.squeeze().numpy() + re_str += to_kaldi_matrix(x) + linear_weights = self.state_dict()['linear.weight'] + x = linear_weights.squeeze().numpy() + re_str += to_kaldi_matrix(x) + linear_bias = self.state_dict()['linear.bias'] + x = linear_bias.squeeze().numpy() + re_str += to_kaldi_matrix(x) + return re_str + + def to_raw_nnet(self, fid): + lfiters = self.state_dict()['conv1.weight'] + x = np.flipud(lfiters.squeeze().numpy().T) + x.tofile(fid) + + proj_weights = self.state_dict()['project.weight'] + x = proj_weights.squeeze().numpy() + x.tofile(fid) + + linear_weights = self.state_dict()['linear.weight'] + x = linear_weights.squeeze().numpy() + x.tofile(fid) + + linear_bias = self.state_dict()['linear.bias'] + x = linear_bias.squeeze().numpy() + x.tofile(fid) + + def load_kaldi_nnet(self, instr): + output = expect_token_number( + instr, + '', + ) + if output is None: + raise Exception('UniDeepFsmn format error for ') + instr, lr = output + + output = expect_token_number( + instr, + '', + ) + if output is None: + raise Exception('UniDeepFsmn format error for ') + instr, hiddensize = output + self.hidden_size = int(hiddensize) + + output = expect_token_number( + instr, + '', + ) + if output is None: + raise Exception('UniDeepFsmn format error for ') + instr, lorder = output + self.lorder = int(lorder) + + output = expect_token_number( + instr, + '', + ) + if output is None: + raise Exception('UniDeepFsmn format error for ') + instr, lstride = output + self.lstride = lstride + + output = expect_token_number( + instr, + '', + ) + if output is None: + raise Exception('UniDeepFsmn format error for ') + + output = expect_kaldi_matrix(instr) + if output is None: + raise Exception('UniDeepFsmn format error for parsing matrix') + instr, mat = output + mat1 = np.fliplr(mat.T).copy() + + self.conv1 = nn.Conv2d( + self.output_dim, + self.output_dim, [self.lorder, 1], [1, 1], + groups=self.output_dim, + bias=False) + + mat_th = th.from_numpy(mat1).type(th.FloatTensor) + mat_th = mat_th.unsqueeze(1) + mat_th = mat_th.unsqueeze(3) + self.conv1.weight = th.nn.Parameter(mat_th) + + output = expect_kaldi_matrix(instr) + if output is None: + raise Exception('UniDeepFsmn format error for parsing matrix') + instr, mat = output + + self.project = nn.Linear(self.hidden_size, self.output_dim, bias=False) + self.linear = nn.Linear(self.input_dim, self.hidden_size) + + self.project.weight = th.nn.Parameter( + th.from_numpy(mat).type(th.FloatTensor)) + + output = expect_kaldi_matrix(instr) + if output is None: + raise Exception('UniDeepFsmn format error for parsing matrix') + instr, mat = output + self.linear.weight = th.nn.Parameter( + th.from_numpy(mat).type(th.FloatTensor)) + + output = expect_kaldi_matrix(instr) + if output is None: + raise Exception('UniDeepFsmn format error for parsing matrix') + instr, mat = output + mat = np.squeeze(mat) + self.linear.bias = th.nn.Parameter( + th.from_numpy(mat).type(th.FloatTensor)) + + return instr diff --git a/modelscope/models/audio/network/__init__.py b/modelscope/models/audio/network/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modelscope/models/audio/network/loss.py b/modelscope/models/audio/network/loss.py new file mode 100644 index 00000000..743661b3 --- /dev/null +++ b/modelscope/models/audio/network/loss.py @@ -0,0 +1,394 @@ +import torch +import torch.nn.functional as F + +from .modulation_loss import (GaborSTRFConv, MelScale, + ModulationDomainLossModule) + +EPS = 1e-8 + + +def compute_mask(mixed_spec, clean_spec, mask_type='psmiam', clip=1): + ''' + stft: (batch, ..., 2) or complex(batch, ...) + y = x + n + ''' + if torch.is_complex(mixed_spec): + yr, yi = mixed_spec.real, mixed_spec.imag + else: + yr, yi = mixed_spec[..., 0], mixed_spec[..., 1] + if torch.is_complex(clean_spec): + xr, xi = clean_spec.real, clean_spec.imag + else: + xr, xi = clean_spec[..., 0], clean_spec[..., 1] + + if mask_type == 'iam': + ymag = torch.sqrt(yr**2 + yi**2) + xmag = torch.sqrt(xr**2 + xi**2) + iam = xmag / (ymag + EPS) + return torch.clamp(iam, 0, 1) + + elif mask_type == 'psm': + ypow = yr**2 + yi**2 + psm = (xr * yr + xi * yi) / (ypow + EPS) + return torch.clamp(psm, 0, 1) + + elif mask_type == 'psmiam': + ypow = yr**2 + yi**2 + psm = (xr * yr + xi * yi) / (ypow + EPS) + ymag = torch.sqrt(yr**2 + yi**2) + xmag = torch.sqrt(xr**2 + xi**2) + iam = xmag / (ymag + EPS) + psmiam = psm * iam + return torch.clamp(psmiam, 0, 1) + + elif mask_type == 'crm': + ypow = yr**2 + yi**2 + mr = (xr * yr + xi * yi) / (ypow + EPS) + mi = (xi * yr - xr * yi) / (ypow + EPS) + mr = torch.clamp(mr, -clip, clip) + mi = torch.clamp(mi, -clip, clip) + return mr, mi + + +def energy_vad(spec, + thdhigh=320 * 600 * 600 * 2, + thdlow=320 * 300 * 300 * 2, + int16=True): + ''' + energy based vad should be accurate enough + spec: (batch, bins, frames, 2) + returns (batch, frames) + ''' + energy = torch.sum(spec[..., 0]**2 + spec[..., 1]**2, dim=1) + vad = energy > thdhigh + idx = torch.logical_and(vad == 0, energy > thdlow) + vad[idx] = 0.5 + return vad + + +def modulation_loss_init(n_fft): + gabor_strf_parameters = torch.load( + './network/gabor_strf_parameters.pt')['state_dict'] + gabor_modulation_kernels = GaborSTRFConv(supn=30, supk=30, nkern=60) + gabor_modulation_kernels.load_state_dict(gabor_strf_parameters) + + modulation_loss_module = ModulationDomainLossModule( + gabor_modulation_kernels.eval()) + for param in modulation_loss_module.parameters(): + param.requires_grad = False + + stft2mel = MelScale( + n_mels=80, sample_rate=16000, n_stft=n_fft // 2 + 1).cuda() + + return modulation_loss_module, stft2mel + + +def mask_loss_function( + loss_func='psm_loss', + loss_type='mse', # ['mse', 'mae', 'comb'] + mask_type='psmiam', + use_mod_loss=False, + use_wav2vec_loss=False, + n_fft=640, + hop_length=320, + EPS=1e-8, + weight=None): + if weight is not None: + print(f'Use loss weight: {weight}') + winlen = n_fft + window = torch.hamming_window(winlen, periodic=False) + + def stft(x, return_complex=False): + # returns [batch, bins, frames, 2] + return torch.stft( + x, + n_fft, + hop_length, + winlen, + window=window.to(x.device), + center=False, + return_complex=return_complex) + + def istft(x, slen): + return torch.istft( + x, + n_fft, + hop_length, + winlen, + window=window.to(x.device), + center=False, + length=slen) + + def mask_loss(targets, masks, nframes): + ''' [Batch, Time, Frequency] + ''' + with torch.no_grad(): + mask_for_loss = torch.ones_like(targets) + for idx, num in enumerate(nframes): + mask_for_loss[idx, num:, :] = 0 + masks = masks * mask_for_loss + targets = targets * mask_for_loss + + if weight is None: + alpha = 1 + else: # for aec ST + alpha = weight - targets + + if loss_type == 'mse': + loss = 0.5 * torch.sum(alpha * torch.pow(targets - masks, 2)) + elif loss_type == 'mae': + loss = torch.sum(alpha * torch.abs(targets - masks)) + else: # mse(mask), mae(mask) approx 1:2 + loss = 0.5 * torch.sum(alpha * torch.pow(targets - masks, 2) + + 0.1 * alpha * torch.abs(targets - masks)) + loss /= torch.sum(nframes) + return loss + + def spectrum_loss(targets, spec, nframes): + ''' [Batch, Time, Frequency, 2] + ''' + with torch.no_grad(): + mask_for_loss = torch.ones_like(targets[..., 0]) + for idx, num in enumerate(nframes): + mask_for_loss[idx, num:, :] = 0 + xr = spec[..., 0] * mask_for_loss + xi = spec[..., 1] * mask_for_loss + yr = targets[..., 0] * mask_for_loss + yi = targets[..., 1] * mask_for_loss + xmag = torch.sqrt(spec[..., 0]**2 + spec[..., 1]**2) * mask_for_loss + ymag = torch.sqrt(targets[..., 0]**2 + + targets[..., 1]**2) * mask_for_loss + + loss1 = torch.sum(torch.pow(xr - yr, 2) + torch.pow(xi - yi, 2)) + loss2 = torch.sum(torch.pow(xmag - ymag, 2)) + + loss = (loss1 + loss2) / torch.sum(nframes) + return loss + + def sa_loss_dlen(mixed, clean, masks, nframes): + yspec = stft(mixed).permute([0, 2, 1, 3]) / 32768 + xspec = stft(clean).permute([0, 2, 1, 3]) / 32768 + with torch.no_grad(): + mask_for_loss = torch.ones_like(xspec[..., 0]) + for idx, num in enumerate(nframes): + mask_for_loss[idx, num:, :] = 0 + emag = ((yspec[..., 0]**2 + yspec[..., 1]**2)**0.15) * (masks**0.3) + xmag = (xspec[..., 0]**2 + xspec[..., 1]**2)**0.15 + emag = emag * mask_for_loss + xmag = xmag * mask_for_loss + + loss = torch.sum(torch.pow(emag - xmag, 2)) / torch.sum(nframes) + return loss + + def psm_vad_loss_dlen(mixed, clean, masks, nframes, subtask=None): + mixed_spec = stft(mixed) + clean_spec = stft(clean) + targets = compute_mask(mixed_spec, clean_spec, mask_type) + # [B, T, F] + targets = targets.permute(0, 2, 1) + + loss = mask_loss(targets, masks, nframes) + + if subtask is not None: + vadtargets = energy_vad(clean_spec) + with torch.no_grad(): + mask_for_loss = torch.ones_like(targets[:, :, 0]) + for idx, num in enumerate(nframes): + mask_for_loss[idx, num:] = 0 + subtask = subtask[:, :, 0] * mask_for_loss + vadtargets = vadtargets * mask_for_loss + + loss_vad = F.binary_cross_entropy(subtask, vadtargets) + return loss + loss_vad + return loss + + def modulation_loss(mixed, clean, masks, nframes, subtask=None): + mixed_spec = stft(mixed, True) + clean_spec = stft(clean, True) + enhanced_mag = torch.abs(mixed_spec) + clean_mag = torch.abs(clean_spec) + with torch.no_grad(): + mask_for_loss = torch.ones_like(clean_mag) + for idx, num in enumerate(nframes): + mask_for_loss[idx, :, num:] = 0 + clean_mag = clean_mag * mask_for_loss + enhanced_mag = enhanced_mag * mask_for_loss * masks.permute([0, 2, 1]) + + # Covert to log-mel representation + # (B,T,#mel_channels) + clean_log_mel = torch.log( + torch.transpose(stft2mel(clean_mag**2), 2, 1) + 1e-8) + enhanced_log_mel = torch.log( + torch.transpose(stft2mel(enhanced_mag**2), 2, 1) + 1e-8) + + alpha = compute_mask(mixed_spec, clean_spec, mask_type) + alpha = alpha.permute(0, 2, 1) + loss = 0.05 * modulation_loss_module(enhanced_log_mel, clean_log_mel, + alpha) + loss2 = psm_vad_loss_dlen(mixed, clean, masks, nframes, subtask) + # print(loss.item(), loss2.item()) #approx 1:4 + loss = loss + loss2 + return loss + + def wav2vec_loss(mixed, clean, masks, nframes, subtask=None): + mixed /= 32768 + clean /= 32768 + mixed_spec = stft(mixed) + with torch.no_grad(): + mask_for_loss = torch.ones_like(masks) + for idx, num in enumerate(nframes): + mask_for_loss[idx, num:, :] = 0 + masks_est = masks * mask_for_loss + + estimate = mixed_spec * masks_est.permute([0, 2, 1]).unsqueeze(3) + est_clean = istft(estimate, clean.shape[1]) + loss = wav2vec_loss_module(est_clean, clean) + return loss + + def sisdr_loss_dlen(mixed, + clean, + masks, + nframes, + subtask=None, + zero_mean=True): + mixed_spec = stft(mixed) + with torch.no_grad(): + mask_for_loss = torch.ones_like(masks) + for idx, num in enumerate(nframes): + mask_for_loss[idx, num:, :] = 0 + masks_est = masks * mask_for_loss + + estimate = mixed_spec * masks_est.permute([0, 2, 1]).unsqueeze(3) + est_clean = istft(estimate, clean.shape[1]) + flen = min(clean.shape[1], est_clean.shape[1]) + clean = clean[:, :flen] + est_clean = est_clean[:, :flen] + + # follow asteroid/losses/sdr.py + if zero_mean: + clean = clean - torch.mean(clean, dim=1, keepdim=True) + est_clean = est_clean - torch.mean(est_clean, dim=1, keepdim=True) + + dot = torch.sum(est_clean * clean, dim=1, keepdim=True) + s_clean_energy = torch.sum(clean**2, dim=1, keepdim=True) + EPS + scaled_clean = dot * clean / s_clean_energy + e_noise = est_clean - scaled_clean + + # [batch] + sisdr = torch.sum( + scaled_clean**2, dim=1) / ( + torch.sum(e_noise**2, dim=1) + EPS) + sisdr = -10 * torch.log10(sisdr + EPS) + loss = sisdr.mean() + return loss + + def sisdr_freq_loss_dlen(mixed, clean, masks, nframes, subtask=None): + mixed_spec = stft(mixed) + clean_spec = stft(clean) + with torch.no_grad(): + mask_for_loss = torch.ones_like(masks) + for idx, num in enumerate(nframes): + mask_for_loss[idx, num:, :] = 0 + masks_est = masks * mask_for_loss + + estimate = mixed_spec * masks_est.permute([0, 2, 1]).unsqueeze(3) + + dot_real = estimate[..., 0] * clean_spec[..., 0] + \ + estimate[..., 1] * clean_spec[..., 1] + dot_imag = estimate[..., 0] * clean_spec[..., 1] - \ + estimate[..., 1] * clean_spec[..., 0] + dot = torch.cat([dot_real.unsqueeze(3), dot_imag.unsqueeze(3)], dim=-1) + s_clean_energy = clean_spec[..., 0] ** 2 + \ + clean_spec[..., 1] ** 2 + EPS + scaled_clean = dot * clean_spec / s_clean_energy.unsqueeze(3) + e_noise = estimate - scaled_clean + + # [batch] + scaled_clean_energy = torch.sum( + scaled_clean[..., 0]**2 + scaled_clean[..., 1]**2, dim=1) + e_noise_energy = torch.sum( + e_noise[..., 0]**2 + e_noise[..., 1]**2, dim=1) + sisdr = torch.sum( + scaled_clean_energy, dim=1) / ( + torch.sum(e_noise_energy, dim=1) + EPS) + sisdr = -10 * torch.log10(sisdr + EPS) + loss = sisdr.mean() + return loss + + def crm_loss_dlen(mixed, clean, masks, nframes, subtask=None): + mixed_spec = stft(mixed).permute([0, 2, 1, 3]) + clean_spec = stft(clean).permute([0, 2, 1, 3]) + mixed_spec = mixed_spec / 32768 + clean_spec = clean_spec / 32768 + tgt_mr, tgt_mi = compute_mask(mixed_spec, clean_spec, mask_type='crm') + + D = int(masks.shape[2] / 2) + with torch.no_grad(): + mask_for_loss = torch.ones_like(clean_spec[..., 0]) + for idx, num in enumerate(nframes): + mask_for_loss[idx, num:, :] = 0 + mr = masks[..., :D] * mask_for_loss + mi = masks[..., D:] * mask_for_loss + tgt_mr = tgt_mr * mask_for_loss + tgt_mi = tgt_mi * mask_for_loss + + if weight is None: + alpha = 1 + else: + alpha = weight - tgt_mr + # signal approximation + yr = mixed_spec[..., 0] + yi = mixed_spec[..., 1] + loss1 = torch.sum(alpha * torch.pow((mr * yr - mi * yi) - clean_spec[..., 0], 2)) \ + + torch.sum(alpha * torch.pow((mr * yi + mi * yr) - clean_spec[..., 1], 2)) + # mask approximation + loss2 = torch.sum(alpha * torch.pow(mr - tgt_mr, 2)) \ + + torch.sum(alpha * torch.pow(mi - tgt_mi, 2)) + loss = 0.5 * (loss1 + loss2) / torch.sum(nframes) + return loss + + def crm_miso_loss_dlen(mixed, clean, masks, nframes): + return crm_loss_dlen(mixed[..., 0], clean[..., 0], masks, nframes) + + def mimo_loss_dlen(mixed, clean, masks, nframes): + chs = mixed.shape[-1] + D = masks.shape[2] // chs + loss = psm_vad_loss_dlen(mixed[..., 0], clean[..., 0], masks[..., :D], + nframes) + for ch in range(1, chs): + loss1 = psm_vad_loss_dlen(mixed[..., ch], clean[..., ch], + masks[..., ch * D:ch * D + D], nframes) + loss = loss + loss1 + return loss / chs + + def spec_loss_dlen(mixed, clean, spec, nframes): + clean_spec = stft(clean).permute([0, 2, 1, 3]) + clean_spec = clean_spec / 32768 + + D = spec.shape[2] // 2 + spec_est = torch.cat([spec[..., :D, None], spec[..., D:, None]], + dim=-1) + loss = spectrum_loss(clean_spec, spec_est, nframes) + return loss + + if loss_func == 'psm_vad_loss_dlen': + return psm_vad_loss_dlen + elif loss_func == 'sisdr_loss_dlen': + return sisdr_loss_dlen + elif loss_func == 'sisdr_freq_loss_dlen': + return sisdr_freq_loss_dlen + elif loss_func == 'crm_loss_dlen': + return crm_loss_dlen + elif loss_func == 'modulation_loss': + return modulation_loss + elif loss_func == 'wav2vec_loss': + return wav2vec_loss + elif loss_func == 'mimo_loss_dlen': + return mimo_loss_dlen + elif loss_func == 'spec_loss_dlen': + return spec_loss_dlen + elif loss_func == 'sa_loss_dlen': + return sa_loss_dlen + else: + print('error loss func') + return None diff --git a/modelscope/models/audio/network/modulation_loss.py b/modelscope/models/audio/network/modulation_loss.py new file mode 100644 index 00000000..a45ddead --- /dev/null +++ b/modelscope/models/audio/network/modulation_loss.py @@ -0,0 +1,248 @@ +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torchaudio.transforms import MelScale + + +class ModulationDomainLossModule(torch.nn.Module): + """Modulation-domain loss function developed in [1] for supervised speech enhancement + + In our paper, we used the gabor-based STRF kernels as the modulation kernels and used the log-mel spectrogram + as the input spectrogram representation. + Specific parameter details are in the paper and in the example below + + Parameters + ---------- + modulation_kernels: nn.Module + Differentiable module that transforms a spectrogram representation to the modulation domain + + modulation_domain = modulation_kernels(input_tf_representation) + Input Spectrogram representation (B, T, F) ---> |(M) modulation_kernels|--->Modulation Domain(B, M, T', F') + + norm: boolean + Normalizes the modulation domain representation to be 0 mean across time + + [1] T. Vuong, Y. Xia, and R. M. Stern, “A modulation-domain lossfor neural-network-based real-time + speech enhancement” + Accepted ICASSP 2021, https://arxiv.org/abs/2102.07330 + + + """ + + def __init__(self, modulation_kernels, norm=True): + super(ModulationDomainLossModule, self).__init__() + + self.modulation_kernels = modulation_kernels + self.mse = nn.MSELoss(reduce=False) + self.norm = norm + + def forward(self, enhanced_spect, clean_spect, weight=None): + """Calculate modulation-domain loss + Args: + enhanced_spect (Tensor): spectrogram representation of enhanced signal (B, #frames, #freq_channels). + clean_spect (Tensor): spectrogram representation of clean ground-truth signal (B, #frames, #freq_channels). + Returns: + Tensor: Modulation-domain loss value. + """ + + clean_mod = self.modulation_kernels(clean_spect) + enhanced_mod = self.modulation_kernels(enhanced_spect) + + if self.norm: + mean_clean_mod = torch.mean(clean_mod, dim=2) + mean_enhanced_mod = torch.mean(enhanced_mod, dim=2) + + clean_mod = clean_mod - mean_clean_mod.unsqueeze(2) + enhanced_mod = enhanced_mod - mean_enhanced_mod.unsqueeze(2) + + if weight is None: + alpha = 1 + else: # TF-mask weight + alpha = 1 + torch.sum(weight, dim=-1, keepdim=True).unsqueeze(1) + mod_mse_loss = self.mse(enhanced_mod, clean_mod) * alpha + mod_mse_loss = torch.mean( + torch.sum(mod_mse_loss, dim=(1, 2, 3)) + / torch.sum(clean_mod**2, dim=(1, 2, 3))) + + return mod_mse_loss + + +class ModulationDomainNCCLossModule(torch.nn.Module): + """Modulation-domain loss function developed in [1] for supervised speech enhancement + + # Speech Intelligibility Prediction Using Spectro-Temporal Modulation Analysis - based off of this + + In our paper, we used the gabor-based STRF kernels as the modulation kernels and used the log-mel spectrogram + as the input spectrogram representation. + Specific parameter details are in the paper and in the example below + + Parameters + ---------- + modulation_kernels: nn.Module + Differentiable module that transforms a spectrogram representation to the modulation domain + + modulation_domain = modulation_kernels(input_tf_representation) + Input Spectrogram representation(B, T, F) --- (M) modulation_kernels---> Modulation Domain(B, M, T', F') + + [1] + + """ + + def __init__(self, modulation_kernels): + super(ModulationDomainNCCLossModule, self).__init__() + + self.modulation_kernels = modulation_kernels + self.mse = nn.MSELoss(reduce=False) + + def forward(self, enhanced_spect, clean_spect): + """Calculate modulation-domain loss + Args: + enhanced_spect (Tensor): spectrogram representation of enhanced signal (B, #frames, #freq_channels). + clean_spect (Tensor): spectrogram representation of clean ground-truth signal (B, #frames, #freq_channels). + Returns: + Tensor: Modulation-domain loss value. + """ + + clean_mod = self.modulation_kernels(clean_spect) + enhanced_mod = self.modulation_kernels(enhanced_spect) + mean_clean_mod = torch.mean(clean_mod, dim=2) + mean_enhanced_mod = torch.mean(enhanced_mod, dim=2) + + normalized_clean = clean_mod - mean_clean_mod.unsqueeze(2) + normalized_enhanced = enhanced_mod - mean_enhanced_mod.unsqueeze(2) + + inner_product = torch.sum( + normalized_clean * normalized_enhanced, dim=2) + normalized_denom = (torch.sum( + normalized_clean * normalized_clean, dim=2))**.5 * (torch.sum( + normalized_enhanced * normalized_enhanced, dim=2))**.5 + + ncc = inner_product / normalized_denom + mod_mse_loss = torch.mean((ncc - 1.0)**2) + + return mod_mse_loss + + +class GaborSTRFConv(nn.Module): + """Gabor-STRF-based cross-correlation kernel.""" + + def __init__(self, + supn, + supk, + nkern, + rates=None, + scales=None, + norm_strf=True, + real_only=False): + """Instantiate a Gabor-based STRF convolution layer. + Parameters + ---------- + supn: int + Time support in number of frames. Also the window length. + supk: int + Frequency support in number of channels. Also the window length. + nkern: int + Number of kernels, each with a learnable rate and scale. + rates: list of float, None + Initial values for temporal modulation. + scales: list of float, None + Initial values for spectral modulation. + norm_strf: Boolean + Normalize STRF kernels to be unit length + real_only: Boolean + If True, nkern REAL gabor-STRF kernels + If False, nkern//2 REAL and nkern//2 IMAGINARY gabor-STRF kernels + """ + super(GaborSTRFConv, self).__init__() + self.numN = supn + self.numK = supk + self.numKern = nkern + self.real_only = real_only + self.norm_strf = norm_strf + + if not real_only: + nkern = nkern // 2 + + if supk % 2 == 0: # force odd number + supk += 1 + self.supk = torch.arange(supk, dtype=torch.float32) + if supn % 2 == 0: # force odd number + supn += 1 + self.supn = torch.arange(supn, dtype=self.supk.dtype) + self.padding = (supn // 2, supk // 2) + # Set up learnable parameters + # for param in (rates, scales): + # assert (not param) or len(param) == nkern + if not rates: + + rates = torch.rand(nkern) * math.pi / 2.0 + + if not scales: + + scales = (torch.rand(nkern) * 2.0 - 1.0) * math.pi / 2.0 + + self.rates_ = nn.Parameter(torch.Tensor(rates)) + self.scales_ = nn.Parameter(torch.Tensor(scales)) + + def strfs(self): + """Make STRFs using the current parameters.""" + + if self.supn.device != self.rates_.device: # for first run + self.supn = self.supn.to(self.rates_.device) + self.supk = self.supk.to(self.rates_.device) + n0, k0 = self.padding + + nwind = .5 - .5 * \ + torch.cos(2 * math.pi * (self.supn + 1) / (len(self.supn) + 1)) + kwind = .5 - .5 * \ + torch.cos(2 * math.pi * (self.supk + 1) / (len(self.supk) + 1)) + + new_wind = torch.matmul((nwind).unsqueeze(-1), (kwind).unsqueeze(0)) + + n_n_0 = self.supn - n0 + k_k_0 = self.supk - k0 + n_mult = torch.matmul( + n_n_0.unsqueeze(1), + torch.ones((1, len(self.supk))).type(torch.FloatTensor).to( + self.rates_.device)) + k_mult = torch.matmul( + torch.ones((len(self.supn), + 1)).type(torch.FloatTensor).to(self.rates_.device), + k_k_0.unsqueeze(0)) + + inside = self.rates_.unsqueeze(1).unsqueeze( + 1) * n_mult + self.scales_.unsqueeze(1).unsqueeze(1) * k_mult + real_strf = torch.cos(inside) * new_wind.unsqueeze(0) + + if self.real_only: + final_strf = real_strf + + else: + imag_strf = torch.sin(inside) * new_wind.unsqueeze(0) + final_strf = torch.cat([real_strf, imag_strf], dim=0) + + if self.norm_strf: + final_strf = final_strf / (torch.sum( + final_strf**2, dim=(1, 2)).unsqueeze(1).unsqueeze(2))**.5 + + return final_strf + + def forward(self, sigspec): + """Forward pass a batch of (real) spectra [Batch x Time x Frequency].""" + if len(sigspec.shape) == 2: # expand batch dimension if single eg + sigspec = sigspec.unsqueeze(0) + strfs = self.strfs().unsqueeze(1).type_as(sigspec) + out = F.conv2d(sigspec.unsqueeze(1), strfs, padding=self.padding) + return out + + def __repr__(self): + """Gabor filter""" + report = """ + +++++ Gabor Filter Kernels [{}], supn[{}], supk[{}] real only [{}] norm strf [{}] +++++ + + """.format(self.numKern, self.numN, self.numK, self.real_only, + self.norm_strf) + + return report diff --git a/modelscope/models/audio/network/se_net.py b/modelscope/models/audio/network/se_net.py new file mode 100644 index 00000000..54808043 --- /dev/null +++ b/modelscope/models/audio/network/se_net.py @@ -0,0 +1,483 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..layers.activations import RectifiedLinear, Sigmoid +from ..layers.affine_transform import AffineTransform +from ..layers.deep_fsmn import DeepFsmn +from ..layers.uni_deep_fsmn import Conv2d, UniDeepFsmn + + +class MaskNet(nn.Module): + + def __init__(self, + indim, + outdim, + layers=9, + hidden_dim=128, + hidden_dim2=None, + lorder=20, + rorder=0, + dilation=1, + layer_norm=False, + dropout=0, + crm=False, + vad=False, + linearout=False): + super(MaskNet, self).__init__() + + self.linear1 = AffineTransform(indim, hidden_dim) + self.relu = RectifiedLinear(hidden_dim, hidden_dim) + if hidden_dim2 is None: + hidden_dim2 = hidden_dim + + if rorder == 0: + repeats = [ + UniDeepFsmn( + hidden_dim, + hidden_dim, + lorder, + hidden_dim2, + dilation=dilation, + layer_norm=layer_norm, + dropout=dropout) for i in range(layers) + ] + else: + repeats = [ + DeepFsmn( + hidden_dim, + hidden_dim, + lorder, + rorder, + hidden_dim2, + layer_norm=layer_norm, + dropout=dropout) for i in range(layers) + ] + self.deepfsmn = nn.Sequential(*repeats) + + self.linear2 = AffineTransform(hidden_dim, outdim) + + self.crm = crm + if self.crm: + self.sig = nn.Tanh() + else: + self.sig = Sigmoid(outdim, outdim) + + self.vad = vad + if self.vad: + self.linear3 = AffineTransform(hidden_dim, 1) + + self.layers = layers + self.linearout = linearout + if self.linearout and self.vad: + print('Warning: not supported nnet') + + def forward(self, feat, ctl=None): + x1 = self.linear1(feat) + x2 = self.relu(x1) + if ctl is not None: + ctl = min(ctl, self.layers - 1) + for i in range(ctl): + x2 = self.deepfsmn[i](x2) + mask = self.sig(self.linear2(x2)) + if self.vad: + vad = torch.sigmoid(self.linear3(x2)) + return mask, vad + else: + return mask + x3 = self.deepfsmn(x2) + if self.linearout: + return self.linear2(x3) + mask = self.sig(self.linear2(x3)) + if self.vad: + vad = torch.sigmoid(self.linear3(x3)) + return mask, vad + else: + return mask + + def to_kaldi_nnet(self): + re_str = '' + re_str += '\n' + re_str += self.linear1.to_kaldi_nnet() + re_str += self.relu.to_kaldi_nnet() + for dfsmn in self.deepfsmn: + re_str += dfsmn.to_kaldi_nnet() + re_str += self.linear2.to_kaldi_nnet() + re_str += self.sig.to_kaldi_nnet() + re_str += '\n' + + return re_str + + def to_raw_nnet(self, fid): + self.linear1.to_raw_nnet(fid) + for dfsmn in self.deepfsmn: + dfsmn.to_raw_nnet(fid) + self.linear2.to_raw_nnet(fid) + + +class StageNet(nn.Module): + + def __init__(self, + indim, + outdim, + layers=9, + layers2=6, + hidden_dim=128, + lorder=20, + rorder=0, + layer_norm=False, + dropout=0, + crm=False, + vad=False, + linearout=False): + super(StageNet, self).__init__() + + self.stage1 = nn.ModuleList() + self.stage2 = nn.ModuleList() + layer = nn.Sequential(nn.Linear(indim, hidden_dim), nn.ReLU()) + self.stage1.append(layer) + for i in range(layers): + layer = UniDeepFsmn( + hidden_dim, + hidden_dim, + lorder, + hidden_dim, + layer_norm=layer_norm, + dropout=dropout) + self.stage1.append(layer) + layer = nn.Sequential(nn.Linear(hidden_dim, 321), nn.Sigmoid()) + self.stage1.append(layer) + # stage2 + layer = nn.Sequential(nn.Linear(321 + indim, hidden_dim), nn.ReLU()) + self.stage2.append(layer) + for i in range(layers2): + layer = UniDeepFsmn( + hidden_dim, + hidden_dim, + lorder, + hidden_dim, + layer_norm=layer_norm, + dropout=dropout) + self.stage2.append(layer) + layer = nn.Sequential( + nn.Linear(hidden_dim, outdim), + nn.Sigmoid() if not crm else nn.Tanh()) + self.stage2.append(layer) + self.crm = crm + self.vad = vad + self.linearout = linearout + self.window = torch.hamming_window(640, periodic=False).cuda() + self.freezed = False + + def freeze(self): + if not self.freezed: + for param in self.stage1.parameters(): + param.requires_grad = False + self.freezed = True + print('freezed stage1') + + def forward(self, feat, mixture, ctl=None): + if ctl == 'off': + x = feat + for i in range(len(self.stage1)): + x = self.stage1[i](x) + return x + else: + self.freeze() + x = feat + for i in range(len(self.stage1)): + x = self.stage1[i](x) + + spec = torch.stft( + mixture / 32768, + 640, + 320, + 640, + self.window, + center=False, + return_complex=True) + spec = torch.view_as_real(spec).permute([0, 2, 1, 3]) + specmag = torch.sqrt(spec[..., 0]**2 + spec[..., 1]**2) + est = x * specmag + y = torch.cat([est, feat], dim=-1) + for i in range(len(self.stage2)): + y = self.stage2[i](y) + return y + + +class Unet(nn.Module): + + def __init__(self, + indim, + outdim, + layers=9, + dims=[256] * 4, + lorder=20, + rorder=0, + dilation=1, + layer_norm=False, + dropout=0, + crm=False, + vad=False, + linearout=False): + super(Unet, self).__init__() + + self.linear1 = AffineTransform(indim, dims[0]) + self.relu = RectifiedLinear(dims[0], dims[0]) + + self.encoder = nn.ModuleList() + self.decoder = nn.ModuleList() + for i in range(len(dims) - 1): + layer = nn.Sequential( + nn.Linear(dims[i], dims[i + 1]), nn.ReLU(), + nn.Linear(dims[i + 1], dims[i + 1], bias=False), + Conv2d( + dims[i + 1], + dims[i + 1], + lorder, + groups=dims[i + 1], + skip_connect=True)) + self.encoder.append(layer) + for i in range(len(dims) - 1, 0, -1): + layer = nn.Sequential( + nn.Linear(dims[i] * 2, dims[i - 1]), nn.ReLU(), + nn.Linear(dims[i - 1], dims[i - 1], bias=False), + Conv2d( + dims[i - 1], + dims[i - 1], + lorder, + groups=dims[i - 1], + skip_connect=True)) + self.decoder.append(layer) + self.tf = nn.ModuleList() + for i in range(layers - 2 * (len(dims) - 1)): + layer = nn.Sequential( + nn.Linear(dims[-1], dims[-1]), nn.ReLU(), + nn.Linear(dims[-1], dims[-1], bias=False), + Conv2d( + dims[-1], + dims[-1], + lorder, + groups=dims[-1], + skip_connect=True)) + self.tf.append(layer) + + self.linear2 = AffineTransform(dims[0], outdim) + self.crm = crm + self.act = nn.Tanh() if self.crm else nn.Sigmoid() + self.vad = False + self.layers = layers + self.linearout = linearout + + def forward(self, x, ctl=None): + x = self.linear1(x) + x = self.relu(x) + + encoder_out = [] + for i in range(len(self.encoder)): + x = self.encoder[i](x) + encoder_out.append(x) + for i in range(len(self.tf)): + x = self.tf[i](x) + for i in range(len(self.decoder)): + x = torch.cat([x, encoder_out[-1 - i]], dim=-1) + x = self.decoder[i](x) + + x = self.linear2(x) + if self.linearout: + return x + return self.act(x) + + +class BranchNet(nn.Module): + + def __init__(self, + indim, + outdim, + layers=9, + hidden_dim=256, + lorder=20, + rorder=0, + dilation=1, + layer_norm=False, + dropout=0, + crm=False, + vad=False, + linearout=False): + super(BranchNet, self).__init__() + + self.linear1 = AffineTransform(indim, hidden_dim) + self.relu = RectifiedLinear(hidden_dim, hidden_dim) + + self.convs = nn.ModuleList() + self.deepfsmn = nn.ModuleList() + self.FREQ = nn.ModuleList() + self.TIME = nn.ModuleList() + self.br1 = nn.ModuleList() + self.br2 = nn.ModuleList() + for i in range(layers): + ''' + layer = nn.Sequential( + nn.Linear(hidden_dim, hidden_dim), + nn.ReLU(), + nn.Linear(hidden_dim, hidden_dim, bias=False), + Conv2d(hidden_dim, hidden_dim, lorder, + groups=hidden_dim, skip_connect=True) + ) + self.deepfsmn.append(layer) + ''' + layer = nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.ReLU()) + self.FREQ.append(layer) + ''' + layer = nn.GRU(hidden_dim, hidden_dim, + batch_first=True, + bidirectional=False) + self.TIME.append(layer) + + layer = nn.Sequential( + nn.Linear(hidden_dim, hidden_dim//2, bias=False), + Conv2d(hidden_dim//2, hidden_dim//2, lorder, + groups=hidden_dim//2, skip_connect=True) + ) + self.br1.append(layer) + layer = nn.GRU(hidden_dim, hidden_dim//2, + batch_first=True, + bidirectional=False) + self.br2.append(layer) + ''' + + self.linear2 = AffineTransform(hidden_dim, outdim) + self.crm = crm + self.act = nn.Tanh() if self.crm else nn.Sigmoid() + self.vad = False + self.layers = layers + self.linearout = linearout + + def forward(self, x, ctl=None): + return self.forward_branch(x) + + def forward_sepconv(self, x): + x = torch.unsqueeze(x, 1) + for i in range(len(self.convs)): + x = self.convs[i](x) + x = F.relu(x) + B, C, H, W = x.shape + x = x.permute(0, 2, 1, 3) + x = torch.reshape(x, [B, H, C * W]) + x = self.linear1(x) + x = self.relu(x) + for i in range(self.layers): + x = self.deepfsmn[i](x) + x + x = self.linear2(x) + return self.act(x) + + def forward_branch(self, x): + x = self.linear1(x) + x = self.relu(x) + for i in range(self.layers): + z = self.FREQ[i](x) + x = z + x + x = self.linear2(x) + if self.linearout: + return x + return self.act(x) + + +class TACNet(nn.Module): + ''' transform average concatenate for ad hoc dr + ''' + + def __init__(self, + indim, + outdim, + layers=9, + hidden_dim=128, + lorder=20, + rorder=0, + crm=False, + vad=False, + linearout=False): + super(TACNet, self).__init__() + + self.linear1 = AffineTransform(indim, hidden_dim) + self.relu = RectifiedLinear(hidden_dim, hidden_dim) + + if rorder == 0: + repeats = [ + UniDeepFsmn(hidden_dim, hidden_dim, lorder, hidden_dim) + for i in range(layers) + ] + else: + repeats = [ + DeepFsmn(hidden_dim, hidden_dim, lorder, rorder, hidden_dim) + for i in range(layers) + ] + self.deepfsmn = nn.Sequential(*repeats) + + self.ch_transform = nn.ModuleList([]) + self.ch_average = nn.ModuleList([]) + self.ch_concat = nn.ModuleList([]) + for i in range(layers): + self.ch_transform.append( + nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.PReLU())) + self.ch_average.append( + nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.PReLU())) + self.ch_concat.append( + nn.Sequential( + nn.Linear(hidden_dim * 2, hidden_dim), nn.PReLU())) + + self.linear2 = AffineTransform(hidden_dim, outdim) + + self.crm = crm + if self.crm: + self.sig = nn.Tanh() + else: + self.sig = Sigmoid(outdim, outdim) + + self.vad = vad + if self.vad: + self.linear3 = AffineTransform(hidden_dim, 1) + + self.layers = layers + self.linearout = linearout + if self.linearout and self.vad: + print('Warning: not supported nnet') + + def forward(self, feat, ctl=None): + B, T, F = feat.shape + # assume 4ch + ch = 4 + zlist = [] + for c in range(ch): + z = self.linear1(feat[..., c * (F // 4):(c + 1) * (F // 4)]) + z = self.relu(z) + zlist.append(z) + for i in range(self.layers): + # forward + for c in range(ch): + zlist[c] = self.deepfsmn[i](zlist[c]) + + # transform + olist = [] + for c in range(ch): + z = self.ch_transform[i](zlist[c]) + olist.append(z) + # average + avg = 0 + for c in range(ch): + avg = avg + olist[c] + avg = avg / ch + avg = self.ch_average[i](avg) + # concate + for c in range(ch): + tac = torch.cat([olist[c], avg], dim=-1) + tac = self.ch_concat[i](tac) + zlist[c] = zlist[c] + tac + + for c in range(ch): + zlist[c] = self.sig(self.linear2(zlist[c])) + mask = torch.cat(zlist, dim=-1) + return mask + + def to_kaldi_nnet(self): + pass diff --git a/modelscope/pipelines/__init__.py b/modelscope/pipelines/__init__.py index d47ce8cf..14865872 100644 --- a/modelscope/pipelines/__init__.py +++ b/modelscope/pipelines/__init__.py @@ -1,4 +1,4 @@ -from .audio import * # noqa F403 +from .audio import LinearAECPipeline from .base import Pipeline from .builder import pipeline from .cv import * # noqa F403 diff --git a/modelscope/pipelines/audio/__init__.py b/modelscope/pipelines/audio/__init__.py index e69de29b..eaa31c7c 100644 --- a/modelscope/pipelines/audio/__init__.py +++ b/modelscope/pipelines/audio/__init__.py @@ -0,0 +1 @@ +from .linear_aec_pipeline import LinearAECPipeline diff --git a/modelscope/pipelines/audio/linear_aec_pipeline.py b/modelscope/pipelines/audio/linear_aec_pipeline.py new file mode 100644 index 00000000..528d8d47 --- /dev/null +++ b/modelscope/pipelines/audio/linear_aec_pipeline.py @@ -0,0 +1,160 @@ +import importlib +import os +from typing import Any, Dict + +import numpy as np +import scipy.io.wavfile as wav +import torch +import yaml + +from modelscope.preprocessors.audio import LinearAECAndFbank +from modelscope.utils.constant import ModelFile, Tasks +from ..base import Pipeline +from ..builder import PIPELINES + +FEATURE_MVN = 'feature.DEY.mvn.txt' + +CONFIG_YAML = 'dey_mini.yaml' + + +def initialize_config(module_cfg): + r"""According to config items, load specific module dynamically with params. + 1. Load the module corresponding to the "module" param. + 2. Call function (or instantiate class) corresponding to the "main" param. + 3. Send the param (in "args") into the function (or class) when calling ( or instantiating). + + Args: + module_cfg (dict): config items, eg: + { + "module": "models.model", + "main": "Model", + "args": {...} + } + + Returns: + the module loaded. + """ + module = importlib.import_module(module_cfg['module']) + return getattr(module, module_cfg['main'])(**module_cfg['args']) + + +@PIPELINES.register_module( + Tasks.speech_signal_process, module_name=r'speech_dfsmn_aec_psm_16k') +class LinearAECPipeline(Pipeline): + r"""AEC Inference Pipeline only support 16000 sample rate. + + When invoke the class with pipeline.__call__(), you should provide two params: + Dict[str, Any] + the path of wav files,eg:{ + "nearend_mic": "/your/data/near_end_mic_audio.wav", + "farend_speech": "/your/data/far_end_speech_audio.wav"} + output_path (str, optional): "/your/output/audio_after_aec.wav" + the file path to write generate audio. + """ + + def __init__(self, model): + r""" + Args: + model: model id on modelscope hub. + """ + super().__init__(model=model) + self.use_cuda = torch.cuda.is_available() + with open( + os.path.join(self.model, CONFIG_YAML), encoding='utf-8') as f: + self.config = yaml.full_load(f.read()) + self.config['io']['mvn'] = os.path.join(self.model, FEATURE_MVN) + self._init_model() + self.preprocessor = LinearAECAndFbank(self.config['io']) + + n_fft = self.config['loss']['args']['n_fft'] + hop_length = self.config['loss']['args']['hop_length'] + winlen = n_fft + window = torch.hamming_window(winlen, periodic=False) + + def stft(x): + return torch.stft( + x, + n_fft, + hop_length, + winlen, + center=False, + window=window.to(x.device), + return_complex=False) + + def istft(x, slen): + return torch.istft( + x, + n_fft, + hop_length, + winlen, + window=window.to(x.device), + center=False, + length=slen) + + self.stft = stft + self.istft = istft + + def _init_model(self): + checkpoint = torch.load( + os.path.join(self.model, ModelFile.TORCH_MODEL_BIN_FILE), + map_location='cpu') + self.model = initialize_config(self.config['nnet']) + if self.use_cuda: + self.model = self.model.cuda() + self.model.load_state_dict(checkpoint) + + def forward(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + r"""The AEC process. + + Args: + inputs: dict={'feature': Tensor, 'base': Tensor} + 'feature' feature of input audio. + 'base' the base audio to mask. + + Returns: + dict: + { + 'output_pcm': generated audio array + } + """ + output_data = self._process(inputs['feature'], inputs['base']) + return {'output_pcm': output_data} + + def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: + r"""The post process. Will save audio to file, if the output_path is given. + + Args: + inputs: dict: + { + 'output_pcm': generated audio array + } + kwargs: accept 'output_path' which is the path to write generated audio + + Returns: + dict: + { + 'output_pcm': generated audio array + } + """ + if 'output_path' in kwargs.keys(): + wav.write(kwargs['output_path'], self.preprocessor.SAMPLE_RATE, + inputs['output_pcm'].astype(np.int16)) + inputs['output_pcm'] = inputs['output_pcm'] / 32768.0 + return inputs + + def _process(self, fbanks, mixture): + if self.use_cuda: + fbanks = fbanks.cuda() + mixture = mixture.cuda() + if self.model.vad: + with torch.no_grad(): + masks, vad = self.model(fbanks.unsqueeze(0)) + masks = masks.permute([2, 1, 0]) + else: + with torch.no_grad(): + masks = self.model(fbanks.unsqueeze(0)) + masks = masks.permute([2, 1, 0]) + spectrum = self.stft(mixture) + masked_spec = spectrum * masks + masked_sig = self.istft(masked_spec, len(mixture)).cpu().numpy() + return masked_sig diff --git a/modelscope/pipelines/outputs.py b/modelscope/pipelines/outputs.py index c88e358c..15d8a995 100644 --- a/modelscope/pipelines/outputs.py +++ b/modelscope/pipelines/outputs.py @@ -84,6 +84,12 @@ TASK_OUTPUTS = { # ============ audio tasks =================== + # audio processed for single file in PCM format + # { + # "output_pcm": np.array with shape(samples,) and dtype float32 + # } + Tasks.speech_signal_process: ['output_pcm'], + # ============ multi-modal tasks =================== # image caption result for single sample diff --git a/modelscope/preprocessors/__init__.py b/modelscope/preprocessors/__init__.py index 81ca1007..5db5b407 100644 --- a/modelscope/preprocessors/__init__.py +++ b/modelscope/preprocessors/__init__.py @@ -1,5 +1,6 @@ # Copyright (c) Alibaba, Inc. and its affiliates. +from .audio import LinearAECAndFbank from .base import Preprocessor from .builder import PREPROCESSORS, build_preprocessor from .common import Compose diff --git a/modelscope/preprocessors/audio.py b/modelscope/preprocessors/audio.py new file mode 100644 index 00000000..a2c15714 --- /dev/null +++ b/modelscope/preprocessors/audio.py @@ -0,0 +1,230 @@ +import ctypes +import os +from typing import Any, Dict + +import numpy as np +import scipy.io.wavfile as wav +import torch +import torchaudio.compliance.kaldi as kaldi +from numpy.ctypeslib import ndpointer + +from modelscope.utils.constant import Fields +from .builder import PREPROCESSORS + + +def load_wav(path): + samp_rate, data = wav.read(path) + return np.float32(data), samp_rate + + +def load_library(libaec): + libaec_in_cwd = os.path.join('.', libaec) + if os.path.exists(libaec_in_cwd): + libaec = libaec_in_cwd + mitaec = ctypes.cdll.LoadLibrary(libaec) + fe_process = mitaec.fe_process_inst + fe_process.argtypes = [ + ndpointer(ctypes.c_float, flags='C_CONTIGUOUS'), + ndpointer(ctypes.c_float, flags='C_CONTIGUOUS'), ctypes.c_int, + ndpointer(ctypes.c_float, flags='C_CONTIGUOUS'), + ndpointer(ctypes.c_float, flags='C_CONTIGUOUS'), + ndpointer(ctypes.c_float, flags='C_CONTIGUOUS') + ] + return fe_process + + +def do_linear_aec(fe_process, mic, ref, int16range=True): + mic = np.float32(mic) + ref = np.float32(ref) + if len(mic) > len(ref): + mic = mic[:len(ref)] + out_mic = np.zeros_like(mic) + out_linear = np.zeros_like(mic) + out_echo = np.zeros_like(mic) + out_ref = np.zeros_like(mic) + if int16range: + mic /= 32768 + ref /= 32768 + fe_process(mic, ref, len(mic), out_mic, out_linear, out_echo) + # out_ref not in use here + if int16range: + out_mic *= 32768 + out_linear *= 32768 + out_echo *= 32768 + return out_mic, out_ref, out_linear, out_echo + + +def load_kaldi_feature_transform(filename): + fp = open(filename, 'r') + all_str = fp.read() + pos1 = all_str.find('AddShift') + pos2 = all_str.find('[', pos1) + pos3 = all_str.find(']', pos2) + mean = np.fromstring(all_str[pos2 + 1:pos3], dtype=np.float32, sep=' ') + pos1 = all_str.find('Rescale') + pos2 = all_str.find('[', pos1) + pos3 = all_str.find(']', pos2) + scale = np.fromstring(all_str[pos2 + 1:pos3], dtype=np.float32, sep=' ') + fp.close() + return mean, scale + + +class Feature: + r"""Extract feat from one utterance. + """ + + def __init__(self, + fbank_config, + feat_type='spec', + mvn_file=None, + cuda=False): + r""" + + Args: + fbank_config (dict): + feat_type (str): + raw: do nothing + fbank: use kaldi.fbank + spec: Real/Imag + logpow: log(1+|x|^2) + mvn_file (str): the path of data file for mean variance normalization + cuda: + """ + self.fbank_config = fbank_config + self.feat_type = feat_type + self.n_fft = fbank_config['frame_length'] * fbank_config[ + 'sample_frequency'] // 1000 + self.hop_length = fbank_config['frame_shift'] * fbank_config[ + 'sample_frequency'] // 1000 + self.window = torch.hamming_window(self.n_fft, periodic=False) + + self.mvn = False + if mvn_file is not None and os.path.exists(mvn_file): + print(f'loading mvn file: {mvn_file}') + shift, scale = load_kaldi_feature_transform(mvn_file) + self.shift = torch.from_numpy(shift) + self.scale = torch.from_numpy(scale) + self.mvn = True + if cuda: + self.window = self.window.cuda() + if self.mvn: + self.shift = self.shift.cuda() + self.scale = self.scale.cuda() + + def compute(self, utt): + r""" + + Args: + utt: in [-32768, 32767] range + + Returns: + [..., T, F] + """ + if self.feat_type == 'raw': + return utt + elif self.feat_type == 'fbank': + if len(utt.shape) == 1: + utt = utt.unsqueeze(0) + feat = kaldi.fbank(utt, **self.fbank_config) + elif self.feat_type == 'spec': + spec = torch.stft( + utt / 32768, + self.n_fft, + self.hop_length, + self.n_fft, + self.window, + center=False, + return_complex=True) + feat = torch.cat([spec.real, spec.imag], dim=-2).permute(-1, -2) + elif self.feat_type == 'logpow': + spec = torch.stft( + utt, + self.n_fft, + self.hop_length, + self.n_fft, + self.window, + center=False, + return_complex=True) + abspow = torch.abs(spec)**2 + feat = torch.log(1 + abspow).permute(-1, -2) + return feat + + def normalize(self, feat): + if self.mvn: + feat = feat + self.shift + feat = feat * self.scale + return feat + + +@PREPROCESSORS.register_module(Fields.audio) +class LinearAECAndFbank: + SAMPLE_RATE = 16000 + + def __init__(self, io_config): + self.trunc_length = 7200 * self.SAMPLE_RATE + self.linear_aec_delay = io_config['linear_aec_delay'] + self.feature = Feature(io_config['fbank_config'], + io_config['feat_type'], io_config['mvn']) + self.mitaec = load_library(io_config['mitaec_library']) + self.mask_on_mic = io_config['mask_on'] == 'nearend_mic' + + def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: + """ linear filtering the near end mic and far end audio, then extract the feature + :param data: dict with two keys and correspond audios: "nearend_mic" and "farend_speech" + :return: dict with two keys and Tensor values: "base" linear filtered audio,and "feature" + """ + # read files + nearend_mic, fs = load_wav(data['nearend_mic']) + assert fs == self.SAMPLE_RATE, f'The sample rate should be {self.SAMPLE_RATE}' + farend_speech, fs = load_wav(data['farend_speech']) + assert fs == self.SAMPLE_RATE, f'The sample rate should be {self.SAMPLE_RATE}' + if 'nearend_speech' in data: + nearend_speech, fs = load_wav(data['nearend_speech']) + assert fs == self.SAMPLE_RATE, f'The sample rate should be {self.SAMPLE_RATE}' + else: + nearend_speech = np.zeros_like(nearend_mic) + + out_mic, out_ref, out_linear, out_echo = do_linear_aec( + self.mitaec, nearend_mic, farend_speech) + # fix 20ms linear aec delay by delaying the target speech + extra_zeros = np.zeros([int(self.linear_aec_delay * fs)]) + nearend_speech = np.concatenate([extra_zeros, nearend_speech]) + # truncate files to the same length + flen = min( + len(out_mic), len(out_ref), len(out_linear), len(out_echo), + len(nearend_speech)) + fstart = 0 + flen = min(flen, self.trunc_length) + nearend_mic, out_ref, out_linear, out_echo, nearend_speech = ( + out_mic[fstart:flen], out_ref[fstart:flen], + out_linear[fstart:flen], out_echo[fstart:flen], + nearend_speech[fstart:flen]) + + # extract features (frames, [mic, linear, ref, aes?]) + feat = torch.FloatTensor() + + nearend_mic = torch.from_numpy(np.float32(nearend_mic)) + fbank_nearend_mic = self.feature.compute(nearend_mic) + feat = torch.cat([feat, fbank_nearend_mic], dim=1) + + out_linear = torch.from_numpy(np.float32(out_linear)) + fbank_out_linear = self.feature.compute(out_linear) + feat = torch.cat([feat, fbank_out_linear], dim=1) + + out_echo = torch.from_numpy(np.float32(out_echo)) + fbank_out_echo = self.feature.compute(out_echo) + feat = torch.cat([feat, fbank_out_echo], dim=1) + + # feature transform + feat = self.feature.normalize(feat) + + # prepare target + if nearend_speech is not None: + nearend_speech = torch.from_numpy(np.float32(nearend_speech)) + + if self.mask_on_mic: + base = nearend_mic + else: + base = out_linear + out_data = {'base': base, 'target': nearend_speech, 'feature': feat} + return out_data diff --git a/requirements/runtime.txt b/requirements/runtime.txt index 43684a06..dd5616a2 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -7,6 +7,7 @@ opencv-python-headless Pillow>=6.2.0 pyyaml requests +scipy tokenizers<=0.10.3 transformers<=4.16.2 yapf diff --git a/setup.cfg b/setup.cfg index 0b929b04..16c10cae 100644 --- a/setup.cfg +++ b/setup.cfg @@ -11,6 +11,7 @@ default_section = THIRDPARTY BASED_ON_STYLE = pep8 BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true +SPLIT_BEFORE_ARITHMETIC_OPERATOR = true [codespell] skip = *.ipynb @@ -20,5 +21,5 @@ ignore-words-list = patten,nd,ty,mot,hist,formating,winn,gool,datas,wan,confids [flake8] select = B,C,E,F,P,T4,W,B9 max-line-length = 120 -ignore = F401,F821 +ignore = F401,F821,W503 exclude = docs/src,*.pyi,.git diff --git a/tests/pipelines/test_speech_signal_process.py b/tests/pipelines/test_speech_signal_process.py new file mode 100644 index 00000000..8b5c9468 --- /dev/null +++ b/tests/pipelines/test_speech_signal_process.py @@ -0,0 +1,56 @@ +import os.path +import shutil +import unittest + +from modelscope.fileio import File +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks +from modelscope.utils.hub import get_model_cache_dir + +NEAREND_MIC_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/AEC/sample_audio/nearend_mic.wav' +FAREND_SPEECH_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/AEC/sample_audio/farend_speech.wav' +NEAREND_MIC_FILE = 'nearend_mic.wav' +FAREND_SPEECH_FILE = 'farend_speech.wav' + +AEC_LIB_URL = 'http://isv-data.oss-cn-hangzhou.aliyuncs.com/ics%2FMaaS%2FAEC%2Flib%2Flibmitaec_pyio.so' \ + '?Expires=1664085465&OSSAccessKeyId=LTAIxjQyZNde90zh&Signature=Y7gelmGEsQAJRK4yyHSYMrdWizk%3D' +AEC_LIB_FILE = 'libmitaec_pyio.so' + + +def download(remote_path, local_path): + local_dir = os.path.dirname(local_path) + if len(local_dir) > 0: + if not os.path.exists(local_dir): + os.makedirs(local_dir) + with open(local_path, 'wb') as ofile: + ofile.write(File.read(remote_path)) + + +class SpeechSignalProcessTest(unittest.TestCase): + + def setUp(self) -> None: + self.model_id = 'damo/speech_dfsmn_aec_psm_16k' + # switch to False if downloading everytime is not desired + purge_cache = True + if purge_cache: + shutil.rmtree( + get_model_cache_dir(self.model_id), ignore_errors=True) + # A temporary hack to provide c++ lib. Download it first. + download(AEC_LIB_URL, AEC_LIB_FILE) + + def test_run(self): + download(NEAREND_MIC_URL, NEAREND_MIC_FILE) + download(FAREND_SPEECH_URL, FAREND_SPEECH_FILE) + input = { + 'nearend_mic': NEAREND_MIC_FILE, + 'farend_speech': FAREND_SPEECH_FILE + } + aec = pipeline( + Tasks.speech_signal_process, + model=self.model_id, + pipeline_name=r'speech_dfsmn_aec_psm_16k') + aec(input, output_path='output.wav') + + +if __name__ == '__main__': + unittest.main() From c4b6a23bc96d12152774f616de1c4177f7a84116 Mon Sep 17 00:00:00 2001 From: "yingda.chen" Date: Mon, 20 Jun 2022 10:54:00 +0800 Subject: [PATCH 15/16] [to #42322933] unify naming for model and pipeline files Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9083378 --- modelscope/models/nlp/__init__.py | 8 ++++---- ...ation_model.py => bert_for_sequence_classification.py} | 0 ...xt_generation_model.py => palm_for_text_generation.py} | 2 +- ...milarity_model.py => sbert_for_sentence_similarity.py} | 0 ...ication_model.py => sbert_for_token_classification.py} | 1 - modelscope/pipelines/multi_modal/__init__.py | 2 +- .../{image_captioning.py => image_caption_pipeline.py} | 0 modelscope/pipelines/nlp/sentence_similarity_pipeline.py | 3 --- .../pipelines/nlp/sequence_classification_pipeline.py | 3 --- modelscope/pipelines/nlp/word_segmentation_pipeline.py | 2 -- 10 files changed, 6 insertions(+), 15 deletions(-) rename modelscope/models/nlp/{sequence_classification_model.py => bert_for_sequence_classification.py} (100%) rename modelscope/models/nlp/{text_generation_model.py => palm_for_text_generation.py} (98%) rename modelscope/models/nlp/{sentence_similarity_model.py => sbert_for_sentence_similarity.py} (100%) rename modelscope/models/nlp/{token_classification_model.py => sbert_for_token_classification.py} (99%) rename modelscope/pipelines/multi_modal/{image_captioning.py => image_caption_pipeline.py} (100%) diff --git a/modelscope/models/nlp/__init__.py b/modelscope/models/nlp/__init__.py index aefcef4a..7129fcb8 100644 --- a/modelscope/models/nlp/__init__.py +++ b/modelscope/models/nlp/__init__.py @@ -1,4 +1,4 @@ -from .sentence_similarity_model import * # noqa F403 -from .sequence_classification_model import * # noqa F403 -from .text_generation_model import * # noqa F403 -from .token_classification_model import * # noqa F403 +from .bert_for_sequence_classification import * # noqa F403 +from .palm_for_text_generation import * # noqa F403 +from .sbert_for_sentence_similarity import * # noqa F403 +from .sbert_for_token_classification import * # noqa F403 diff --git a/modelscope/models/nlp/sequence_classification_model.py b/modelscope/models/nlp/bert_for_sequence_classification.py similarity index 100% rename from modelscope/models/nlp/sequence_classification_model.py rename to modelscope/models/nlp/bert_for_sequence_classification.py diff --git a/modelscope/models/nlp/text_generation_model.py b/modelscope/models/nlp/palm_for_text_generation.py similarity index 98% rename from modelscope/models/nlp/text_generation_model.py rename to modelscope/models/nlp/palm_for_text_generation.py index 8feac691..ffba7265 100644 --- a/modelscope/models/nlp/text_generation_model.py +++ b/modelscope/models/nlp/palm_for_text_generation.py @@ -1,4 +1,4 @@ -from typing import Any, Dict +from typing import Dict from modelscope.utils.constant import Tasks from ..base import Model, Tensor diff --git a/modelscope/models/nlp/sentence_similarity_model.py b/modelscope/models/nlp/sbert_for_sentence_similarity.py similarity index 100% rename from modelscope/models/nlp/sentence_similarity_model.py rename to modelscope/models/nlp/sbert_for_sentence_similarity.py diff --git a/modelscope/models/nlp/token_classification_model.py b/modelscope/models/nlp/sbert_for_token_classification.py similarity index 99% rename from modelscope/models/nlp/token_classification_model.py rename to modelscope/models/nlp/sbert_for_token_classification.py index 43d4aafb..b918dc37 100644 --- a/modelscope/models/nlp/token_classification_model.py +++ b/modelscope/models/nlp/sbert_for_token_classification.py @@ -1,4 +1,3 @@ -import os from typing import Any, Dict, Union import numpy as np diff --git a/modelscope/pipelines/multi_modal/__init__.py b/modelscope/pipelines/multi_modal/__init__.py index 7d9a2c59..b1ee121c 100644 --- a/modelscope/pipelines/multi_modal/__init__.py +++ b/modelscope/pipelines/multi_modal/__init__.py @@ -1 +1 @@ -from .image_captioning import ImageCaptionPipeline +from .image_caption_pipeline import ImageCaptionPipeline diff --git a/modelscope/pipelines/multi_modal/image_captioning.py b/modelscope/pipelines/multi_modal/image_caption_pipeline.py similarity index 100% rename from modelscope/pipelines/multi_modal/image_captioning.py rename to modelscope/pipelines/multi_modal/image_caption_pipeline.py diff --git a/modelscope/pipelines/nlp/sentence_similarity_pipeline.py b/modelscope/pipelines/nlp/sentence_similarity_pipeline.py index 44d91756..1b630c10 100644 --- a/modelscope/pipelines/nlp/sentence_similarity_pipeline.py +++ b/modelscope/pipelines/nlp/sentence_similarity_pipeline.py @@ -1,8 +1,5 @@ -import os -import uuid from typing import Any, Dict, Union -import json import numpy as np from modelscope.models.nlp import SbertForSentenceSimilarity diff --git a/modelscope/pipelines/nlp/sequence_classification_pipeline.py b/modelscope/pipelines/nlp/sequence_classification_pipeline.py index 9d2e4273..1dbe2efd 100644 --- a/modelscope/pipelines/nlp/sequence_classification_pipeline.py +++ b/modelscope/pipelines/nlp/sequence_classification_pipeline.py @@ -1,8 +1,5 @@ -import os -import uuid from typing import Any, Dict, Union -import json import numpy as np from modelscope.models.nlp import BertForSequenceClassification diff --git a/modelscope/pipelines/nlp/word_segmentation_pipeline.py b/modelscope/pipelines/nlp/word_segmentation_pipeline.py index 49aa112a..1cc08a38 100644 --- a/modelscope/pipelines/nlp/word_segmentation_pipeline.py +++ b/modelscope/pipelines/nlp/word_segmentation_pipeline.py @@ -1,7 +1,5 @@ from typing import Any, Dict, Optional, Union -import numpy as np - from modelscope.models import Model from modelscope.models.nlp import StructBertForTokenClassification from modelscope.preprocessors import TokenClassifcationPreprocessor From 99fb50369544c244f1045bc880b6a04f300506bd Mon Sep 17 00:00:00 2001 From: "hemu.zp" Date: Mon, 20 Jun 2022 16:00:31 +0800 Subject: [PATCH 16/16] [to #42322933] Add Palm2.0 model. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 接入支持中英文的 Palm2.0 模型,复用 text-generation-pipeline Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9066550 --- .../models/nlp/palm_for_text_generation.py | 25 +++---- modelscope/pipelines/builder.py | 3 +- .../pipelines/nlp/text_generation_pipeline.py | 34 +++++---- modelscope/preprocessors/nlp.py | 11 ++- requirements/nlp.txt | 2 +- requirements/runtime.txt | 2 +- tests/pipelines/test_text_generation.py | 72 ++++++++++++------- 7 files changed, 83 insertions(+), 66 deletions(-) diff --git a/modelscope/models/nlp/palm_for_text_generation.py b/modelscope/models/nlp/palm_for_text_generation.py index ffba7265..e5799feb 100644 --- a/modelscope/models/nlp/palm_for_text_generation.py +++ b/modelscope/models/nlp/palm_for_text_generation.py @@ -7,7 +7,7 @@ from ..builder import MODELS __all__ = ['PalmForTextGeneration'] -@MODELS.register_module(Tasks.text_generation, module_name=r'palm') +@MODELS.register_module(Tasks.text_generation, module_name=r'palm2.0') class PalmForTextGeneration(Model): def __init__(self, model_dir: str, *args, **kwargs): @@ -18,35 +18,26 @@ class PalmForTextGeneration(Model): model_cls (Optional[Any], optional): model loader, if None, use the default loader to load model weights, by default None. """ - from sofa import PalmTokenizer - super().__init__(model_dir, *args, **kwargs) self.model_dir = model_dir - from sofa.models.palm import PalmForConditionalGeneration, TextGenerator - tokenizer = kwargs.pop('tokenizer', - PalmTokenizer.from_pretrained(model_dir)) + from sofa.models.palm_v2 import PalmForConditionalGeneration, Translator model = PalmForConditionalGeneration.from_pretrained(model_dir) - self.generator = TextGenerator(model, tokenizer) + self.tokenizer = model.tokenizer + self.generator = Translator(model) def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: """return the result by the model Args: - input (Dict[str, Any]): the preprocessed data + input (Dict[str, Tensor]): the preprocessed data Returns: - Dict[str, np.ndarray]: results + Dict[str, Tensor]: results Example: { - 'predictions': array([1]), # lable 0-negative 1-positive - 'probabilities': array([[0.11491239, 0.8850876 ]], dtype=float32), - 'logits': array([[-0.53860897, 1.5029076 ]], dtype=float32) # true value + 'predictions': Tensor([[1377, 4959, 2785, 6392...])]), # tokens need to be decode by tokenizer } """ - encoder_inputs = [ - input['input_ids'], input['token_type_ids'], - input['attention_mask'] - ] - return self.generator(encoder_inputs) + return self.generator(**input) diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index c24a7c3e..6e2c791d 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -22,7 +22,8 @@ DEFAULT_MODEL_FOR_PIPELINE = { Tasks.image_matting: ('image-matting', 'damo/cv_unet_image-matting'), Tasks.text_classification: ('bert-sentiment-analysis', 'damo/bert-base-sst2'), - Tasks.text_generation: ('palm', 'damo/nlp_palm_text-generation_chinese'), + Tasks.text_generation: ('palm2.0', + 'damo/nlp_palm2.0_text-generation_chinese-base'), Tasks.image_captioning: ('ofa', None), Tasks.image_generation: ('person-image-cartoon', diff --git a/modelscope/pipelines/nlp/text_generation_pipeline.py b/modelscope/pipelines/nlp/text_generation_pipeline.py index 8b6bf8a9..881e7ea6 100644 --- a/modelscope/pipelines/nlp/text_generation_pipeline.py +++ b/modelscope/pipelines/nlp/text_generation_pipeline.py @@ -10,7 +10,7 @@ from ..builder import PIPELINES __all__ = ['TextGenerationPipeline'] -@PIPELINES.register_module(Tasks.text_generation, module_name=r'palm') +@PIPELINES.register_module(Tasks.text_generation, module_name=r'palm2.0') class TextGenerationPipeline(Pipeline): def __init__(self, @@ -23,15 +23,16 @@ class TextGenerationPipeline(Pipeline): model (SequenceClassificationModel): a model instance preprocessor (SequenceClassificationPreprocessor): a preprocessor instance """ - sc_model = model if isinstance( + model = model if isinstance( model, PalmForTextGeneration) else Model.from_pretrained(model) if preprocessor is None: preprocessor = TextGenerationPreprocessor( - sc_model.model_dir, + model.model_dir, + model.tokenizer, first_sequence='sentence', second_sequence=None) - super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs) - self.tokenizer = preprocessor.tokenizer + super().__init__(model=model, preprocessor=preprocessor, **kwargs) + self.tokenizer = model.tokenizer def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, str]: """process the prediction results @@ -42,17 +43,20 @@ class TextGenerationPipeline(Pipeline): Returns: Dict[str, str]: the prediction results """ + replace_tokens_bert = (('[unused0]', ''), ('[PAD]', ''), + ('[unused1]', ''), (r' +', ' '), ('[SEP]', ''), + ('[unused2]', ''), ('[CLS]', ''), ('[UNK]', '')) + replace_tokens_roberta = ((r' +', ' '), ('', ''), ('', + ''), + ('', ''), ('', ''), ('', ' ')) - vocab_size = len(self.tokenizer.vocab) pred_list = inputs['predictions'] pred_ids = pred_list[0][0].cpu().numpy().tolist() - for j in range(len(pred_ids)): - if pred_ids[j] >= vocab_size: - pred_ids[j] = 100 - pred = self.tokenizer.convert_ids_to_tokens(pred_ids) - pred_string = ''.join(pred).replace( - '##', - '').split('[SEP]')[0].replace('[CLS]', - '').replace('[SEP]', - '').replace('[UNK]', '') + pred_string = self.tokenizer.decode(pred_ids) + for _old, _new in replace_tokens_bert: + pred_string = pred_string.replace(_old, _new) + pred_string.strip() + for _old, _new in replace_tokens_roberta: + pred_string = pred_string.replace(_old, _new) + pred_string.strip() return {'text': pred_string} diff --git a/modelscope/preprocessors/nlp.py b/modelscope/preprocessors/nlp.py index 6a4a25fc..9bcaa87c 100644 --- a/modelscope/preprocessors/nlp.py +++ b/modelscope/preprocessors/nlp.py @@ -115,17 +115,15 @@ class SequenceClassificationPreprocessor(Preprocessor): return rst -@PREPROCESSORS.register_module(Fields.nlp, module_name=r'palm') +@PREPROCESSORS.register_module(Fields.nlp, module_name=r'palm2.0') class TextGenerationPreprocessor(Preprocessor): - def __init__(self, model_dir: str, *args, **kwargs): + def __init__(self, model_dir: str, tokenizer, *args, **kwargs): """preprocess the data using the vocab.txt from the `model_dir` path Args: model_dir (str): model path """ - from sofa import PalmTokenizer - super().__init__(*args, **kwargs) self.model_dir: str = model_dir @@ -134,7 +132,7 @@ class TextGenerationPreprocessor(Preprocessor): self.second_sequence: str = kwargs.pop('second_sequence', 'second_sequence') self.sequence_length: int = kwargs.pop('sequence_length', 128) - self.tokenizer = PalmTokenizer.from_pretrained(model_dir) + self.tokenizer = tokenizer @type_assert(object, str) def __call__(self, data: str) -> Dict[str, Any]: @@ -153,7 +151,7 @@ class TextGenerationPreprocessor(Preprocessor): new_data = {self.first_sequence: data} # preprocess the data for the model input - rst = {'input_ids': [], 'attention_mask': [], 'token_type_ids': []} + rst = {'input_ids': [], 'attention_mask': []} max_seq_length = self.sequence_length @@ -168,7 +166,6 @@ class TextGenerationPreprocessor(Preprocessor): rst['input_ids'].append(feature['input_ids']) rst['attention_mask'].append(feature['attention_mask']) - rst['token_type_ids'].append(feature['token_type_ids']) return {k: torch.tensor(v) for k, v in rst.items()} diff --git a/requirements/nlp.txt b/requirements/nlp.txt index 8de83798..4e146a81 100644 --- a/requirements/nlp.txt +++ b/requirements/nlp.txt @@ -1 +1 @@ -https://alinlp.alibaba-inc.com/pypi/sofa-1.0.1.3-py3-none-any.whl +https://alinlp.alibaba-inc.com/pypi/sofa-1.0.2-py3-none-any.whl diff --git a/requirements/runtime.txt b/requirements/runtime.txt index dd5616a2..e97352aa 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -1,7 +1,7 @@ addict datasets easydict -https://mindscope.oss-cn-hangzhou.aliyuncs.com/sdklib/maas_hub-0.2.2.dev0-py3-none-any.whl +https://mindscope.oss-cn-hangzhou.aliyuncs.com/sdklib/maas_hub-0.2.4.dev0-py3-none-any.whl numpy opencv-python-headless Pillow>=6.2.0 diff --git a/tests/pipelines/test_text_generation.py b/tests/pipelines/test_text_generation.py index 39d57ff7..fbdd165f 100644 --- a/tests/pipelines/test_text_generation.py +++ b/tests/pipelines/test_text_generation.py @@ -12,43 +12,67 @@ from modelscope.utils.test_utils import test_level class TextGenerationTest(unittest.TestCase): - model_id = 'damo/nlp_palm_text-generation_chinese' - input1 = "今日天气类型='晴'&温度变化趋势='大幅上升'&最低气温='28℃'&最高气温='31℃'&体感='湿热'" - input2 = "今日天气类型='多云'&体感='舒适'&最低气温='26℃'&最高气温='30℃'" + model_id_zh = 'damo/nlp_palm2.0_text-generation_chinese-base' + model_id_en = 'damo/nlp_palm2.0_text-generation_english-base' + input_zh = """ + 本文总结了十个可穿戴产品的设计原则,而这些原则,同样也是笔者认为是这个行业最吸引人的地方: + 1.为人们解决重复性问题;2.从人开始,而不是从机器开始;3.要引起注意,但不要刻意;4.提升用户能力,而不是取代 + """ + input_en = """ + The Director of Public Prosecutions who let off Lord Janner over alleged child sex abuse started + her career at a legal chambers when the disgraced Labour peer was a top QC there . Alison Saunders , + 54 , sparked outrage last week when she decided the 86-year-old should not face astring of charges + of paedophilia against nine children because he has dementia . Today , newly-released documents + revealed damning evidence that abuse was covered up by police andsocial workers for more than 20 years . + And now it has emerged Mrs Saunders ' law career got off to a flying start when she secured her + pupillage -- a barrister 's training contract at 1 Garden Court Chambers in London in 1983 . + """ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run(self): - cache_path = snapshot_download(self.model_id) - preprocessor = TextGenerationPreprocessor( - cache_path, first_sequence='sentence', second_sequence=None) - model = PalmForTextGeneration( - cache_path, tokenizer=preprocessor.tokenizer) - pipeline1 = TextGenerationPipeline(model, preprocessor) - pipeline2 = pipeline( - Tasks.text_generation, model=model, preprocessor=preprocessor) - print(f'input: {self.input1}\npipeline1: {pipeline1(self.input1)}') - print() - print(f'input: {self.input2}\npipeline2: {pipeline2(self.input2)}') + for model_id, input in ((self.model_id_zh, self.input_zh), + (self.model_id_en, self.input_en)): + cache_path = snapshot_download(model_id) + model = PalmForTextGeneration(cache_path) + preprocessor = TextGenerationPreprocessor( + cache_path, + model.tokenizer, + first_sequence='sentence', + second_sequence=None) + pipeline1 = TextGenerationPipeline(model, preprocessor) + pipeline2 = pipeline( + Tasks.text_generation, model=model, preprocessor=preprocessor) + print( + f'pipeline1: {pipeline1(input)}\npipeline2: {pipeline2(input)}' + ) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_model_from_modelhub(self): - model = Model.from_pretrained(self.model_id) - preprocessor = TextGenerationPreprocessor( - model.model_dir, first_sequence='sentence', second_sequence=None) - pipeline_ins = pipeline( - task=Tasks.text_generation, model=model, preprocessor=preprocessor) - print(pipeline_ins(self.input1)) + for model_id, input in ((self.model_id_zh, self.input_zh), + (self.model_id_en, self.input_en)): + model = Model.from_pretrained(model_id) + preprocessor = TextGenerationPreprocessor( + model.model_dir, + model.tokenizer, + first_sequence='sentence', + second_sequence=None) + pipeline_ins = pipeline( + task=Tasks.text_generation, + model=model, + preprocessor=preprocessor) + print(pipeline_ins(input)) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_model_name(self): - pipeline_ins = pipeline( - task=Tasks.text_generation, model=self.model_id) - print(pipeline_ins(self.input2)) + for model_id, input in ((self.model_id_zh, self.input_zh), + (self.model_id_en, self.input_en)): + pipeline_ins = pipeline(task=Tasks.text_generation, model=model_id) + print(pipeline_ins(input)) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_default_model(self): pipeline_ins = pipeline(task=Tasks.text_generation) - print(pipeline_ins(self.input2)) + print(pipeline_ins(self.input_zh)) if __name__ == '__main__':