Merge branch 'master-github' into master-merge-github20230310

2025-12-16 16:27:45 +01:00 · 2023-03-10 13:52:31 +08:00
parent 1ce28bc4f9 8abfffc7e5
commit 99e94bc2c2
11 changed files with 97 additions and 48 deletions
--- a/.dev_scripts/build_image.sh
+++ b/.dev_scripts/build_image.sh
@@ -45,7 +45,7 @@ for i in "$@"; do
      ;;
    --modelscope=*)
      modelscope_version="${i#*=}"
-      shift # cudatoolkit for pytorch
+      shift # modelscope version
      ;;
    --test)
      run_ci_test=True
@@ -65,7 +65,7 @@ for i in "$@"; do
      ;;
    --push)
      is_push=True
-      shift # is dsw, will set dsw cache location
+      shift # option for push image to remote repo
      ;;
    --help)
      usage
@@ -126,7 +126,7 @@ echo "$is_dsw"
 if [ "$is_dsw" == "False" ]; then
    echo "Not DSW image"
 else
-    echo "Building dsw image well need set ModelScope lib cache location."
+    echo "Building dsw image will need set ModelScope lib cache location."
    docker_file_content="${docker_file_content} \nENV MODELSCOPE_CACHE=/mnt/workspace/.cache/modelscope"
 fi
 if [ "$is_ci_test" == "True" ]; then
--- a/README.md
+++ b/README.md
@@ -39,7 +39,8 @@ Apart from harboring implementations of a wide range of different models, ModelS

 # Models and Online Accessibility

-Hundreds of models are made publicly available on [ModelScope]( https://www.modelscope.cn)  (600+ and counting), covering the latest development in areas such as NLP, CV, Audio, Multi-modality, and AI for Science, etc. Many of these models represent the SOTA in their specific fields, and made their open-sourced debut on ModelScope. Users can visit ModelScope([modelscope.cn](http://www.modelscope.cn)) and experience first-hand how these models perform via online experience, with just a few clicks. Immediate developer-experience is also possible through the ModelScope Notebook, which is backed by ready-to-use CPU/GPU development environment in the cloud - only one click away on [ModelScope](https://www.modelscope.cn).
+Hundreds of models are made publicly available on [ModelScope]( https://www.modelscope.cn)  (700+ and counting), covering the latest development in areas such as NLP, CV, Audio, Multi-modality, and AI for Science, etc. Many of these models represent the SOTA in their specific fields, and made their open-sourced debut on ModelScope. Users can visit ModelScope([modelscope.cn](http://www.modelscope.cn)) and experience first-hand how these models perform via online experience, with just a few clicks. Immediate developer-experience is also possible through the ModelScope Notebook, which is backed by ready-to-use CPU/GPU development environment in the cloud - only one click away on [ModelScope](https://www.modelscope.cn).
+

 <p align="center">
    <br>
@@ -67,23 +68,20 @@ NLP:

 * [nlp_convai_text2sql_pretrain_cn](https://modelscope.cn/models/damo/nlp_convai_text2sql_pretrain_cn)

-Audio:
+Multi-Modal:

-* [speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch)
+* [multi-modal_clip-vit-base-patch16_zh](https://modelscope.cn/models/damo/multi-modal_clip-vit-base-patch16_zh)

-* [speech_sambert-hifigan_tts_zh-cn_16k](https://modelscope.cn/models/damo/speech_sambert-hifigan_tts_zh-cn_16k)
+* [ofa_pretrain_base_zh](https://modelscope.cn/models/damo/ofa_pretrain_base_zh)

-* [speech_charctc_kws_phone-xiaoyun](https://modelscope.cn/models/damo/speech_charctc_kws_phone-xiaoyun)
-
-* [u2pp_conformer-asr-cn-16k-online](https://modelscope.cn/models/wenet/u2pp_conformer-asr-cn-16k-online)
-
-* [speech_frcrn_ans_cirm_16k](https://modelscope.cn/models/damo/speech_frcrn_ans_cirm_16k)
-
-* [speech_dfsmn_aec_psm_16k](https://modelscope.cn/models/damo/speech_dfsmn_aec_psm_16k)
+* [Taiyi-Stable-Diffusion-1B-Chinese-v0.1](https://modelscope.cn/models/fengshenbang/Taiyi-Stable-Diffusion-1B-Chinese-v0.1)

+* [mplug_visual-question-answering_coco_large_en](https://modelscope.cn/models/damo/mplug_visual-question-answering_coco_large_en)

 CV:

+* [cv_controlnet_controllable-image-generation_nine-annotators](https://modelscope.cn/models/dienstag/cv_controlnet_controllable-image-generation_nine-annotators/summary)
+
 * [cv_tinynas_object-detection_damoyolo](https://modelscope.cn/models/damo/cv_tinynas_object-detection_damoyolo)

 * [cv_unet_person-image-cartoon_compound-models](https://modelscope.cn/models/damo/cv_unet_person-image-cartoon_compound-models)
@@ -101,15 +99,21 @@ CV:
 * [cv_resnest101_general_recognition](https://modelscope.cn/models/damo/cv_resnest101_general_recognition)


-Multi-Modal:
+Audio:

-* [multi-modal_clip-vit-base-patch16_zh](https://modelscope.cn/models/damo/multi-modal_clip-vit-base-patch16_zh)
+* [speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch)

-* [ofa_pretrain_base_zh](https://modelscope.cn/models/damo/ofa_pretrain_base_zh)
+* [speech_sambert-hifigan_tts_zh-cn_16k](https://modelscope.cn/models/damo/speech_sambert-hifigan_tts_zh-cn_16k)
+
+* [speech_charctc_kws_phone-xiaoyun](https://modelscope.cn/models/damo/speech_charctc_kws_phone-xiaoyun)
+
+* [u2pp_conformer-asr-cn-16k-online](https://modelscope.cn/models/wenet/u2pp_conformer-asr-cn-16k-online)
+
+* [speech_frcrn_ans_cirm_16k](https://modelscope.cn/models/damo/speech_frcrn_ans_cirm_16k)
+
+* [speech_dfsmn_aec_psm_16k](https://modelscope.cn/models/damo/speech_dfsmn_aec_psm_16k)

-* [Taiyi-Stable-Diffusion-1B-Chinese-v0.1](https://modelscope.cn/models/fengshenbang/Taiyi-Stable-Diffusion-1B-Chinese-v0.1)

-* [mplug_visual-question-answering_coco_large_en](https://modelscope.cn/models/damo/mplug_visual-question-answering_coco_large_en)

 AI for Science:

@@ -117,6 +121,8 @@ AI for Science:

 * [uni-fold-multimer](https://modelscope.cn/models/DPTech/uni-fold-multimer/summary)

+**Note:** Most models on ModelScope are public and can be downloaded without account registration on modelscope website([www.modelscope.cn](www.modelscope.cn)), please refer to instructions for [model download](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E4%B8%8B%E8%BD%BD), for dowloading models with api provided by modelscope library or git.
+
 # QuickTour

 We provide unified interface for inference using `pipeline`, fine-tuning and evaluation using `Trainer` for different tasks.
--- a/README_zh.md
+++ b/README_zh.md
@@ -37,7 +37,7 @@ ModelScope Library为模型贡献者提供了必要的分层API，以便将来
 除了包含各种模型的实现之外，ModelScope Library还支持与ModelScope后端服务进行必要的交互，特别是与Model-Hub和Dataset-Hub的交互。这种交互促进了模型和数据集的管理在后台无缝执行，包括模型数据集查询、版本控制、缓存管理等。

 # 部分模型和在线体验
-ModelScope开源了数百个(当前600+)模型，涵盖自然语言处理、计算机视觉、语音、多模态、科学计算等，其中包含数百个SOTA模型。用户可以进入ModelScope网站([modelscope.cn](http://www.modelscope.cn))的模型中心零门槛在线体验，或者Notebook方式体验模型。
+ModelScope开源了数百个(当前700+)模型，涵盖自然语言处理、计算机视觉、语音、多模态、科学计算等，其中包含数百个SOTA模型。用户可以进入ModelScope网站([modelscope.cn](http://www.modelscope.cn))的模型中心零门槛在线体验，或者Notebook方式体验模型。

 <p align="center">
    <br>
@@ -65,23 +65,20 @@ ModelScope开源了数百个(当前600+)模型，涵盖自然语言处理、计

 * [SPACE-T表格问答预训练模型-中文-通用领域-base](https://modelscope.cn/models/damo/nlp_convai_text2sql_pretrain_cn)

-语音：
+多模态：

-* [Paraformer语音识别-中文-通用-16k-离线-large-pytorch](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch)
+* [CLIP模型-中文-通用领域-base](https://modelscope.cn/models/damo/multi-modal_clip-vit-base-patch16_zh)

-* [语音合成-中文-多情感领域-16k-多发音人](https://modelscope.cn/models/damo/speech_sambert-hifigan_tts_zh-cn_16k)
+* [OFA预训练模型-中文-通用领域-base](https://modelscope.cn/models/damo/ofa_pretrain_base_zh)

-* [CTC语音唤醒-移动端-单麦-16k-小云小云](https://modelscope.cn/models/damo/speech_charctc_kws_phone-xiaoyun)
-
-* [WeNet-U2pp_Conformer-语音识别-中文-16k-实时](https://modelscope.cn/models/wenet/u2pp_conformer-asr-cn-16k-online)
-
-* [FRCRN语音降噪-单麦-16k](https://modelscope.cn/models/damo/speech_frcrn_ans_cirm_16k)
-
-* [DFSMN回声消除-单麦单参考-16k](https://modelscope.cn/models/damo/speech_dfsmn_aec_psm_16k)
+* [太乙-Stable-Diffusion-1B-中文-v0.1](https://modelscope.cn/models/fengshenbang/Taiyi-Stable-Diffusion-1B-Chinese-v0.1)

+* [mPLUG视觉问答模型-英文-large](https://modelscope.cn/models/damo/mplug_visual-question-answering_coco_large_en)

 计算机视觉：

+* [ControlNet可控图像生成](https://modelscope.cn/models/dienstag/cv_controlnet_controllable-image-generation_nine-annotators/summary)
+
 * [DAMOYOLO-高性能通用检测模型-S](https://modelscope.cn/models/damo/cv_tinynas_object-detection_damoyolo)

 * [DCT-Net人像卡通化](https://modelscope.cn/models/damo/cv_unet_person-image-cartoon_compound-models)
@@ -99,15 +96,22 @@ ModelScope开源了数百个(当前600+)模型，涵盖自然语言处理、计
 * [万物识别-中文-通用领域](https://modelscope.cn/models/damo/cv_resnest101_general_recognition)


-多模态：
+语音：

-* [CLIP模型-中文-通用领域-base](https://modelscope.cn/models/damo/multi-modal_clip-vit-base-patch16_zh)
+* [Paraformer语音识别-中文-通用-16k-离线-large-pytorch](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch)
+
+* [语音合成-中文-多情感领域-16k-多发音人](https://modelscope.cn/models/damo/speech_sambert-hifigan_tts_zh-cn_16k)
+
+* [CTC语音唤醒-移动端-单麦-16k-小云小云](https://modelscope.cn/models/damo/speech_charctc_kws_phone-xiaoyun)
+
+* [WeNet-U2pp_Conformer-语音识别-中文-16k-实时](https://modelscope.cn/models/wenet/u2pp_conformer-asr-cn-16k-online)
+
+* [FRCRN语音降噪-单麦-16k](https://modelscope.cn/models/damo/speech_frcrn_ans_cirm_16k)
+
+* [DFSMN回声消除-单麦单参考-16k](https://modelscope.cn/models/damo/speech_dfsmn_aec_psm_16k)

-* [OFA预训练模型-中文-通用领域-base](https://modelscope.cn/models/damo/ofa_pretrain_base_zh)

-* [太乙-Stable-Diffusion-1B-中文-v0.1](https://modelscope.cn/models/fengshenbang/Taiyi-Stable-Diffusion-1B-Chinese-v0.1)

-* [mPLUG视觉问答模型-英文-large](https://modelscope.cn/models/damo/mplug_visual-question-answering_coco_large_en)

 科学计算：

--- a/docs/source/develop.md
+++ b/docs/source/develop.md
@@ -10,7 +10,7 @@ We use the following toolsseed isortseed isortseed isort for linting and formatt

 Style configurations of yapf and isort can be found in [setup.cfg](../../setup.cfg).
 We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `seed-isort-config`, `isort`, `trailing whitespaces`,
-fixes `end-of-files`, sorts `requirments.txt` automatically on every commit.
+fixes `end-of-files`, sorts `requirements.txt` automatically on every commit.
 The config for a pre-commit hook is stored in [.pre-commit-config](../../.pre-commit-config.yaml).
 After you clone the repository, you will need to install initialize pre-commit hook.
 ```bash
@@ -81,7 +81,7 @@ exists in the environment and unset it.
 python tests/path/to/your_test.py
 ```

-2. Remember to run core tests in local environment before start a codereview, by default it will
+2. Remember to run core tests in local environment before start a code review, by default it will
 only run test cases with level 0.
 ```bash
 make tests
@@ -153,7 +153,7 @@ git pull origin branch_name
    git add .
    git commit -m "[to #42322933] my commit message"
    ```
-   note: you may replace [to #42322933]  with your own aone issue id (if any).
+   note: you may replace [to #42322933]  with your own alone issue id (if any).
 4. Push your change:
   ```shell
    git push --set-upstream origin dev/my-dev-branch
--- a/modelscope/exporters/nlp/csanmt_for_translation_exporter.py
+++ b/modelscope/exporters/nlp/csanmt_for_translation_exporter.py
@@ -69,7 +69,7 @@ class CsanmtForTranslationExporter(TfModelExporter):

        dummy_inputs = self.generate_dummy_inputs()
        with tf.Session(graph=tf.Graph()) as sess:
-            # Restore model from the saved_modle file, that is exported by TensorFlow estimator.
+            # Restore model from the saved_model file, that is exported by TensorFlow estimator.
            MetaGraphDef = tf.saved_model.loader.load(sess, ['serve'],
                                                      output_dir)

@@ -182,5 +182,5 @@ class CsanmtForTranslationExporter(TfModelExporter):

    def export_onnx(self, output_dir: str, opset=13, **kwargs):
        raise NotImplementedError(
-            'csanmt model does not support onnx format, consider using savedmodel instead.'
+            'csanmt model does not support onnx format, consider using saved model instead.'
        )
--- a/modelscope/hub/api.py
+++ b/modelscope/hub/api.py
@@ -125,7 +125,7 @@ class HubApi:
                     visibility: Optional[int] = ModelVisibility.PUBLIC,
                     license: Optional[str] = Licenses.APACHE_V2,
                     chinese_name: Optional[str] = None) -> str:
-        """Create model repo at ModelScopeHub.
+        """Create model repo at ModelScope Hub.

        Args:
            model_id (str): The model id
--- a/modelscope/models/cv/ocr_recognition/preprocessor.py
+++ b/modelscope/models/cv/ocr_recognition/preprocessor.py
@@ -7,6 +7,7 @@ import cv2
 import numpy as np
 import PIL
 import torch
+import math

 from modelscope.metainfo import Preprocessors
 from modelscope.preprocessors import Preprocessor, load_image
--- a/modelscope/models/multi_modal/diffusion/diffusion.py
+++ b/modelscope/models/multi_modal/diffusion/diffusion.py
@@ -40,6 +40,7 @@ def discretized_gaussian_log_likelihood(x0, mean, log_scale):


 def _i(tensor, t, x):
+    tensor = tensor.to(x.device)
    shape = (x.size(0), ) + (1, ) * (x.ndim - 1)
    return tensor[t].view(shape).to(x)

--- a/modelscope/preprocessors/init.py
+++ b/modelscope/preprocessors/init.py
@@ -80,7 +80,7 @@ else:
            'TextGenerationSentencePiecePreprocessor',
            'TextClassificationTransformersPreprocessor',
            'TokenClassificationTransformersPreprocessor',
-            'TextErrorCorrectionPreprocessor',
+            'TextErrorCorrectionPreprocessor', 'WordAlignmentPreprocessor',
            'TextGenerationTransformersPreprocessor', 'Tokenize',
            'TextGenerationT5Preprocessor',
            'WordSegmentationBlankSetToLabelPreprocessor',
--- a/tests/utils/case_file_analyzer.py
+++ b/tests/utils/case_file_analyzer.py
@@ -8,7 +8,7 @@ from typing import Any
 from modelscope.utils.logger import get_logger

 logger = get_logger()
-SYSTEM_TRAINER_BUILDER_FINCTION_NAME = 'build_trainer'
+SYSTEM_TRAINER_BUILDER_FUNCTION_NAME = 'build_trainer'
 SYSTEM_TRAINER_BUILDER_PARAMETER_NAME = 'name'
 SYSTEM_PIPELINE_BUILDER_FUNCTION_NAME = 'pipeline'
 SYSTEM_PIPELINE_BUILDER_PARAMETER_NAME = 'task'
@@ -263,12 +263,12 @@ def analysis_trainer_test_suite(test_file, modified_register_modules):
    # get test file global function and test class
    test_suite_root = ast.parse(src, test_file)
    test_suite_analyzer = AnalysisTestFile(
-        test_file, SYSTEM_TRAINER_BUILDER_FINCTION_NAME)
+        test_file, SYSTEM_TRAINER_BUILDER_FUNCTION_NAME)
    test_suite_analyzer.visit(test_suite_root)

    for test_class in test_suite_analyzer.test_classes:
        test_class_analyzer = AnalysisTestClass(
-            test_class, SYSTEM_TRAINER_BUILDER_FINCTION_NAME)
+            test_class, SYSTEM_TRAINER_BUILDER_FUNCTION_NAME)
        test_class_analyzer.visit(test_class)
        for test_method in test_class_analyzer.test_methods:
            for idx, custom_global_builder in enumerate(
@@ -278,7 +278,7 @@ def analysis_trainer_test_suite(test_file, modified_register_modules):
                    test_method, test_class_analyzer.setup_variables,
                    custom_global_builder,
                    test_suite_analyzer.custom_global_builder_calls[idx],
-                    SYSTEM_TRAINER_BUILDER_FINCTION_NAME,
+                    SYSTEM_TRAINER_BUILDER_FUNCTION_NAME,
                    SYSTEM_TRAINER_BUILDER_PARAMETER_NAME)
                if trainer_name is not None:
                    tested_trainers.append(trainer_name)
@@ -289,14 +289,14 @@ def analysis_trainer_test_suite(test_file, modified_register_modules):
                    test_method, test_class_analyzer.setup_variables,
                    custom_class_method_builder,
                    test_class_analyzer.custom_class_method_builder_calls[idx],
-                    SYSTEM_TRAINER_BUILDER_FINCTION_NAME,
+                    SYSTEM_TRAINER_BUILDER_FUNCTION_NAME,
                    SYSTEM_TRAINER_BUILDER_PARAMETER_NAME)
                if trainer_name is not None:
                    tested_trainers.append(trainer_name)

            trainer_name = get_builder_parameter_value(
                test_method, test_class_analyzer.setup_variables, None, None,
-                SYSTEM_TRAINER_BUILDER_FINCTION_NAME,
+                SYSTEM_TRAINER_BUILDER_FUNCTION_NAME,
                SYSTEM_TRAINER_BUILDER_PARAMETER_NAME
            )  # direct call the build_trainer
            if trainer_name is not None:
--- a/tools/convert_ckpt.py
+++ b/tools/convert_ckpt.py
@@ -0,0 +1,37 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import argparse
+import os
+import shutil
+
+import torch
+
+
+def convert_single_pth(fullname):
+    filename, ext = os.path.splitext(fullname)
+    checkpoint = torch.load(fullname, map_location='cpu')
+    only_module = 'state_dict' not in checkpoint
+    state_dict = checkpoint if only_module else checkpoint['state_dict']
+    torch.save(state_dict, fullname)
+
+    if not only_module:
+        checkpoint.pop('state_dict')
+    fullname_trainer = filename + '_trainer_state' + ext
+    torch.save(checkpoint, fullname_trainer)
+
+
+# This script is used to split pth files which generated before version 1.3.1 into two files.
+# there is only one argument: --dir, fill the dir contains the pth files inside.
+# NOTE: If you are using this script to convert the checkpoints of GPT3 or other sharding models,
+# please rename the checkpoint filenames after the conversion manually.
+parser = argparse.ArgumentParser()
+parser.add_argument('--dir', help='The dir contains the *.pth files.')
+args = parser.parse_args()
+folder = args.dir
+assert folder
+
+all_files = os.listdir(folder)
+all_files = [file for file in all_files if file.endswith('.pth')]
+for file in all_files:
+    shutil.copy(
+        os.path.join(folder, file), os.path.join(folder, file + '.legacy'))
+    convert_single_pth(os.path.join(folder, file))