From 6438c41144eb779be9b649ccac860991658e8dc1 Mon Sep 17 00:00:00 2001
From: "yongfei.zyf" <yongfei.zyf@alibaba-inc.com>
Date: Tue, 18 Oct 2022 16:50:47 +0800
Subject: [PATCH 01/15] [to #42322933]Support video url input processing       
  Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10437340

---
 modelscope/preprocessors/video.py | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/modelscope/preprocessors/video.py b/modelscope/preprocessors/video.py
index f693cd9e..794033b5 100644
--- a/modelscope/preprocessors/video.py
+++ b/modelscope/preprocessors/video.py
@@ -1,5 +1,10 @@
 import math
+import os
 import random
+import uuid
+from os.path import exists
+from tempfile import TemporaryDirectory
+from urllib.parse import urlparse
 
 import numpy as np
 import torch
@@ -9,6 +14,7 @@ import torchvision.transforms._transforms_video as transforms
 from decord import VideoReader
 from torchvision.transforms import Compose
 
+from modelscope.hub.file_download import http_get_file
 from modelscope.metainfo import Preprocessors
 from modelscope.utils.constant import Fields, ModeKeys
 from modelscope.utils.type_assert import type_assert
@@ -30,7 +36,22 @@ def ReadVideoData(cfg,
     Returns:
         data (Tensor): the normalized video clips for model inputs
     """
-    data = _decode_video(cfg, video_path, num_temporal_views_override)
+    url_parsed = urlparse(video_path)
+    if url_parsed.scheme in ('file', '') and exists(
+            url_parsed.path):  # Possibly a local file
+        data = _decode_video(cfg, video_path, num_temporal_views_override)
+    else:
+        with TemporaryDirectory() as temporary_cache_dir:
+            random_str = uuid.uuid4().hex
+            http_get_file(
+                url=video_path,
+                local_dir=temporary_cache_dir,
+                file_name=random_str,
+                cookies=None)
+            temp_file_path = os.path.join(temporary_cache_dir, random_str)
+            data = _decode_video(cfg, temp_file_path,
+                                 num_temporal_views_override)
+
     if num_spatial_crops_override is not None:
         num_spatial_crops = num_spatial_crops_override
         transform = kinetics400_tranform(cfg, num_spatial_crops_override)

From 865397763ef51773f2285efde5df001f624f7bf3 Mon Sep 17 00:00:00 2001
From: "xianzhe.xxz" <xianzhe.xxz@alibaba-inc.com>
Date: Tue, 18 Oct 2022 16:53:29 +0800
Subject: [PATCH 02/15] [to #42322933]add damoyolo model in
 tinynas-object-detection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

接入damyolo系列检测模型
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10377688
---
 modelscope/metainfo.py                        |  2 +
 .../models/cv/tinynas_detection/__init__.py   |  2 +
 .../cv/tinynas_detection/backbone/tinynas.py  | 29 +++++++----
 .../models/cv/tinynas_detection/detector.py   |  5 +-
 .../tinynas_detection/head/gfocal_v2_tiny.py  | 51 ++++++++++++-------
 .../tinynas_detection/neck/giraffe_fpn_v2.py  |  5 +-
 .../cv/tinynas_detection/tinynas_damoyolo.py  | 15 ++++++
 .../cv/tinynas_detection/tinynas_detector.py  |  2 +-
 .../cv/tinynas_detection_pipeline.py          | 22 +++++---
 modelscope/utils/file_utils.py                |  8 +++
 tests/pipelines/test_tinynas_detection.py     | 29 +++++++++--
 11 files changed, 126 insertions(+), 44 deletions(-)
 create mode 100644 modelscope/models/cv/tinynas_detection/tinynas_damoyolo.py

diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py
index fc18ead9..6c8d91fa 100644
--- a/modelscope/metainfo.py
+++ b/modelscope/metainfo.py
@@ -9,7 +9,9 @@ class Models(object):
 
         Model name should only contain model info but not task info.
     """
+    # tinynas models
     tinynas_detection = 'tinynas-detection'
+    tinynas_damoyolo = 'tinynas-damoyolo'
 
     # vision models
     detection = 'detection'
diff --git a/modelscope/models/cv/tinynas_detection/__init__.py b/modelscope/models/cv/tinynas_detection/__init__.py
index 13532d10..6d696ac4 100644
--- a/modelscope/models/cv/tinynas_detection/__init__.py
+++ b/modelscope/models/cv/tinynas_detection/__init__.py
@@ -7,10 +7,12 @@ from modelscope.utils.import_utils import LazyImportModule
 
 if TYPE_CHECKING:
     from .tinynas_detector import Tinynas_detector
+    from .tinynas_damoyolo import DamoYolo
 
 else:
     _import_structure = {
         'tinynas_detector': ['TinynasDetector'],
+        'tinynas_damoyolo': ['DamoYolo'],
     }
 
     import sys
diff --git a/modelscope/models/cv/tinynas_detection/backbone/tinynas.py b/modelscope/models/cv/tinynas_detection/backbone/tinynas.py
index 814ee550..87a28a2f 100755
--- a/modelscope/models/cv/tinynas_detection/backbone/tinynas.py
+++ b/modelscope/models/cv/tinynas_detection/backbone/tinynas.py
@@ -4,6 +4,7 @@
 import torch
 import torch.nn as nn
 
+from modelscope.utils.file_utils import read_file
 from ..core.base_ops import Focus, SPPBottleneck, get_activation
 from ..core.repvgg_block import RepVggBlock
 
@@ -49,12 +50,16 @@ class ResConvK1KX(nn.Module):
                  kernel_size,
                  stride,
                  force_resproj=False,
-                 act='silu'):
+                 act='silu',
+                 reparam=False):
         super(ResConvK1KX, self).__init__()
         self.stride = stride
         self.conv1 = ConvKXBN(in_c, btn_c, 1, 1)
-        self.conv2 = RepVggBlock(
-            btn_c, out_c, kernel_size, stride, act='identity')
+        if not reparam:
+            self.conv2 = ConvKXBN(btn_c, out_c, 3, stride)
+        else:
+            self.conv2 = RepVggBlock(
+                btn_c, out_c, kernel_size, stride, act='identity')
 
         if act is None:
             self.activation_function = torch.relu
@@ -97,7 +102,8 @@ class SuperResConvK1KX(nn.Module):
                  stride,
                  num_blocks,
                  with_spp=False,
-                 act='silu'):
+                 act='silu',
+                 reparam=False):
         super(SuperResConvK1KX, self).__init__()
         if act is None:
             self.act = torch.relu
@@ -124,7 +130,8 @@ class SuperResConvK1KX(nn.Module):
                 this_kernel_size,
                 this_stride,
                 force_resproj,
-                act=act)
+                act=act,
+                reparam=reparam)
             self.block_list.append(the_block)
             if block_id == 0 and with_spp:
                 self.block_list.append(
@@ -248,7 +255,8 @@ class TinyNAS(nn.Module):
                  with_spp=False,
                  use_focus=False,
                  need_conv1=True,
-                 act='silu'):
+                 act='silu',
+                 reparam=False):
         super(TinyNAS, self).__init__()
         assert len(out_indices) == len(out_channels)
         self.out_indices = out_indices
@@ -281,7 +289,8 @@ class TinyNAS(nn.Module):
                     block_info['s'],
                     block_info['L'],
                     spp,
-                    act=act)
+                    act=act,
+                    reparam=reparam)
                 self.block_list.append(the_block)
             elif the_block_class == 'SuperResConvKXKX':
                 spp = with_spp if idx == len(structure_info) - 1 else False
@@ -325,8 +334,8 @@ class TinyNAS(nn.Module):
 def load_tinynas_net(backbone_cfg):
     # load masternet model to path
     import ast
-
-    struct_str = ''.join([x.strip() for x in backbone_cfg.net_structure_str])
+    net_structure_str = read_file(backbone_cfg.structure_file)
+    struct_str = ''.join([x.strip() for x in net_structure_str])
     struct_info = ast.literal_eval(struct_str)
     for layer in struct_info:
         if 'nbitsA' in layer:
@@ -342,6 +351,6 @@ def load_tinynas_net(backbone_cfg):
         use_focus=backbone_cfg.use_focus,
         act=backbone_cfg.act,
         need_conv1=backbone_cfg.need_conv1,
-    )
+        reparam=backbone_cfg.reparam)
 
     return model
diff --git a/modelscope/models/cv/tinynas_detection/detector.py b/modelscope/models/cv/tinynas_detection/detector.py
index 615b13a8..42a71381 100644
--- a/modelscope/models/cv/tinynas_detection/detector.py
+++ b/modelscope/models/cv/tinynas_detection/detector.py
@@ -30,7 +30,7 @@ class SingleStageDetector(TorchModel):
         """
         super().__init__(model_dir, *args, **kwargs)
 
-        config_path = osp.join(model_dir, 'airdet_s.py')
+        config_path = osp.join(model_dir, self.config_name)
         config = parse_config(config_path)
         self.cfg = config
         model_path = osp.join(model_dir, config.model.name)
@@ -41,6 +41,9 @@ class SingleStageDetector(TorchModel):
         self.conf_thre = config.model.head.nms_conf_thre
         self.nms_thre = config.model.head.nms_iou_thre
 
+        if self.cfg.model.backbone.name == 'TinyNAS':
+            self.cfg.model.backbone.structure_file = osp.join(
+                model_dir, self.cfg.model.backbone.structure_file)
         self.backbone = build_backbone(self.cfg.model.backbone)
         self.neck = build_neck(self.cfg.model.neck)
         self.head = build_head(self.cfg.model.head)
diff --git a/modelscope/models/cv/tinynas_detection/head/gfocal_v2_tiny.py b/modelscope/models/cv/tinynas_detection/head/gfocal_v2_tiny.py
index 41f35968..66904ed1 100644
--- a/modelscope/models/cv/tinynas_detection/head/gfocal_v2_tiny.py
+++ b/modelscope/models/cv/tinynas_detection/head/gfocal_v2_tiny.py
@@ -124,11 +124,13 @@ class GFocalHead_Tiny(nn.Module):
             simOTA_iou_weight=3.0,
             octbase=8,
             simlqe=False,
+            use_lqe=True,
             **kwargs):
         self.simlqe = simlqe
         self.num_classes = num_classes
         self.in_channels = in_channels
         self.strides = strides
+        self.use_lqe = use_lqe
         self.feat_channels = feat_channels if isinstance(feat_channels, list) \
             else [feat_channels] * len(self.strides)
 
@@ -181,15 +183,20 @@ class GFocalHead_Tiny(nn.Module):
                     groups=self.conv_groups,
                     norm=self.norm,
                     act=self.act))
-        if not self.simlqe:
-            conf_vector = [nn.Conv2d(4 * self.total_dim, self.reg_channels, 1)]
+        if self.use_lqe:
+            if not self.simlqe:
+                conf_vector = [
+                    nn.Conv2d(4 * self.total_dim, self.reg_channels, 1)
+                ]
+            else:
+                conf_vector = [
+                    nn.Conv2d(4 * (self.reg_max + 1), self.reg_channels, 1)
+                ]
+            conf_vector += [self.relu]
+            conf_vector += [nn.Conv2d(self.reg_channels, 1, 1), nn.Sigmoid()]
+            reg_conf = nn.Sequential(*conf_vector)
         else:
-            conf_vector = [
-                nn.Conv2d(4 * (self.reg_max + 1), self.reg_channels, 1)
-            ]
-        conf_vector += [self.relu]
-        conf_vector += [nn.Conv2d(self.reg_channels, 1, 1), nn.Sigmoid()]
-        reg_conf = nn.Sequential(*conf_vector)
+            reg_conf = None
 
         return cls_convs, reg_convs, reg_conf
 
@@ -290,21 +297,27 @@ class GFocalHead_Tiny(nn.Module):
         N, C, H, W = bbox_pred.size()
         prob = F.softmax(
             bbox_pred.reshape(N, 4, self.reg_max + 1, H, W), dim=2)
-        if not self.simlqe:
-            prob_topk, _ = prob.topk(self.reg_topk, dim=2)
+        if self.use_lqe:
+            if not self.simlqe:
+                prob_topk, _ = prob.topk(self.reg_topk, dim=2)
 
-            if self.add_mean:
-                stat = torch.cat(
-                    [prob_topk, prob_topk.mean(dim=2, keepdim=True)], dim=2)
+                if self.add_mean:
+                    stat = torch.cat(
+                        [prob_topk,
+                         prob_topk.mean(dim=2, keepdim=True)],
+                        dim=2)
+                else:
+                    stat = prob_topk
+
+                quality_score = reg_conf(
+                    stat.reshape(N, 4 * self.total_dim, H, W))
             else:
-                stat = prob_topk
+                quality_score = reg_conf(
+                    bbox_pred.reshape(N, 4 * (self.reg_max + 1), H, W))
 
-            quality_score = reg_conf(stat.reshape(N, 4 * self.total_dim, H, W))
+            cls_score = gfl_cls(cls_feat).sigmoid() * quality_score
         else:
-            quality_score = reg_conf(
-                bbox_pred.reshape(N, 4 * (self.reg_max + 1), H, W))
-
-        cls_score = gfl_cls(cls_feat).sigmoid() * quality_score
+            cls_score = gfl_cls(cls_feat).sigmoid()
 
         flatten_cls_score = cls_score.flatten(start_dim=2).transpose(1, 2)
         flatten_bbox_pred = bbox_pred.flatten(start_dim=2).transpose(1, 2)
diff --git a/modelscope/models/cv/tinynas_detection/neck/giraffe_fpn_v2.py b/modelscope/models/cv/tinynas_detection/neck/giraffe_fpn_v2.py
index b710572f..b88c39f2 100644
--- a/modelscope/models/cv/tinynas_detection/neck/giraffe_fpn_v2.py
+++ b/modelscope/models/cv/tinynas_detection/neck/giraffe_fpn_v2.py
@@ -14,7 +14,6 @@ class GiraffeNeckV2(nn.Module):
         self,
         depth=1.0,
         width=1.0,
-        in_features=[2, 3, 4],
         in_channels=[256, 512, 1024],
         out_channels=[256, 512, 1024],
         depthwise=False,
@@ -24,7 +23,6 @@ class GiraffeNeckV2(nn.Module):
         block_name='BasicBlock',
     ):
         super().__init__()
-        self.in_features = in_features
         self.in_channels = in_channels
         Conv = DWConv if depthwise else BaseConv
 
@@ -169,8 +167,7 @@ class GiraffeNeckV2(nn.Module):
         """
 
         #  backbone
-        features = [out_features[f] for f in self.in_features]
-        [x2, x1, x0] = features
+        [x2, x1, x0] = out_features
 
         # node x3
         x13 = self.bu_conv13(x1)
diff --git a/modelscope/models/cv/tinynas_detection/tinynas_damoyolo.py b/modelscope/models/cv/tinynas_detection/tinynas_damoyolo.py
new file mode 100644
index 00000000..9effad3a
--- /dev/null
+++ b/modelscope/models/cv/tinynas_detection/tinynas_damoyolo.py
@@ -0,0 +1,15 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from modelscope.metainfo import Models
+from modelscope.models.builder import MODELS
+from modelscope.utils.constant import Tasks
+from .detector import SingleStageDetector
+
+
+@MODELS.register_module(
+    Tasks.image_object_detection, module_name=Models.tinynas_damoyolo)
+class DamoYolo(SingleStageDetector):
+
+    def __init__(self, model_dir, *args, **kwargs):
+        self.config_name = 'damoyolo_s.py'
+        super(DamoYolo, self).__init__(model_dir, *args, **kwargs)
diff --git a/modelscope/models/cv/tinynas_detection/tinynas_detector.py b/modelscope/models/cv/tinynas_detection/tinynas_detector.py
index e6f144df..92acf3fa 100644
--- a/modelscope/models/cv/tinynas_detection/tinynas_detector.py
+++ b/modelscope/models/cv/tinynas_detection/tinynas_detector.py
@@ -12,5 +12,5 @@ from .detector import SingleStageDetector
 class TinynasDetector(SingleStageDetector):
 
     def __init__(self, model_dir, *args, **kwargs):
-
+        self.config_name = 'airdet_s.py'
         super(TinynasDetector, self).__init__(model_dir, *args, **kwargs)
diff --git a/modelscope/pipelines/cv/tinynas_detection_pipeline.py b/modelscope/pipelines/cv/tinynas_detection_pipeline.py
index b2063629..d35d4d36 100644
--- a/modelscope/pipelines/cv/tinynas_detection_pipeline.py
+++ b/modelscope/pipelines/cv/tinynas_detection_pipeline.py
@@ -12,6 +12,8 @@ from modelscope.pipelines.base import Input, Pipeline
 from modelscope.pipelines.builder import PIPELINES
 from modelscope.preprocessors import LoadImage
 from modelscope.utils.constant import Tasks
+from modelscope.utils.cv.image_utils import \
+    show_image_object_detection_auto_result
 from modelscope.utils.logger import get_logger
 
 logger = get_logger()
@@ -52,10 +54,18 @@ class TinynasDetectionPipeline(Pipeline):
 
         bboxes, scores, labels = self.model.postprocess(inputs['data'])
         if bboxes is None:
-            return None
-        outputs = {
-            OutputKeys.SCORES: scores,
-            OutputKeys.LABELS: labels,
-            OutputKeys.BOXES: bboxes
-        }
+            outputs = {
+                OutputKeys.SCORES: [],
+                OutputKeys.LABELS: [],
+                OutputKeys.BOXES: []
+            }
+        else:
+            outputs = {
+                OutputKeys.SCORES: scores,
+                OutputKeys.LABELS: labels,
+                OutputKeys.BOXES: bboxes
+            }
         return outputs
+
+    def show_result(self, img_path, result, save_path=None):
+        show_image_object_detection_auto_result(img_path, result, save_path)
diff --git a/modelscope/utils/file_utils.py b/modelscope/utils/file_utils.py
index 9b82f8d2..cf59dc57 100644
--- a/modelscope/utils/file_utils.py
+++ b/modelscope/utils/file_utils.py
@@ -1,6 +1,7 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
 import inspect
+import os
 from pathlib import Path
 
 
@@ -35,3 +36,10 @@ def get_default_cache_dir():
     """
     default_cache_dir = Path.home().joinpath('.cache', 'modelscope')
     return default_cache_dir
+
+
+def read_file(path):
+
+    with open(path, 'r') as f:
+        text = f.read()
+    return text
diff --git a/tests/pipelines/test_tinynas_detection.py b/tests/pipelines/test_tinynas_detection.py
index 63db9145..43e1842d 100644
--- a/tests/pipelines/test_tinynas_detection.py
+++ b/tests/pipelines/test_tinynas_detection.py
@@ -4,22 +4,45 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
+from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class TinynasObjectDetectionTest(unittest.TestCase):
+class TinynasObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+
+    def setUp(self) -> None:
+        self.task = Tasks.image_object_detection
+        self.model_id = 'damo/cv_tinynas_object-detection_damoyolo'
 
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_run(self):
+    def test_run_airdet(self):
         tinynas_object_detection = pipeline(
             Tasks.image_object_detection, model='damo/cv_tinynas_detection')
         result = tinynas_object_detection(
             'data/test/images/image_detection.jpg')
         print(result)
 
+    @unittest.skip('will be enabled after damoyolo officially released')
+    def test_run_damoyolo(self):
+        tinynas_object_detection = pipeline(
+            Tasks.image_object_detection,
+            model='damo/cv_tinynas_object-detection_damoyolo')
+        result = tinynas_object_detection(
+            'data/test/images/image_detection.jpg')
+        print(result)
+
     @unittest.skip('demo compatibility test is only enabled on a needed-basis')
     def test_demo_compatibility(self):
-        self.test_demo()
+        self.compatibility_check()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_image_object_detection_auto_pipeline(self):
+        test_image = 'data/test/images/image_detection.jpg'
+        tinynas_object_detection = pipeline(
+            Tasks.image_object_detection, model='damo/cv_tinynas_detection')
+        result = tinynas_object_detection(test_image)
+        tinynas_object_detection.show_result(test_image, result,
+                                             'demo_ret.jpg')
 
 
 if __name__ == '__main__':

From 5f937b20cf5a7eeed034af5fccd11110b780c08e Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Tue, 18 Oct 2022 17:47:23 +0800
Subject: [PATCH 03/15] [to #45549080]fix: fix pre_commit dependency
 importlib-metadata 5.0.0 compability issue         Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10443005

---
 .pre-commit-config.yaml       | 2 +-
 .pre-commit-config_local.yaml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c6290ff4..48fe7547 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://gitlab.com/pycqa/flake8.git
-    rev: 3.8.3
+    rev: 4.0.0
     hooks:
       - id: flake8
         exclude: thirdparty/|examples/
diff --git a/.pre-commit-config_local.yaml b/.pre-commit-config_local.yaml
index 138561e3..0b2e2f39 100644
--- a/.pre-commit-config_local.yaml
+++ b/.pre-commit-config_local.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: /home/admin/pre-commit/flake8
-    rev: 3.8.3
+    rev: 4.0.0
     hooks:
       - id: flake8
         exclude: thirdparty/|examples/

From 9809d960b0d60af546ba16e9f9c9f64e8232c1e7 Mon Sep 17 00:00:00 2001
From: "xingjun.wxj" <xingjun.wxj@alibaba-inc.com>
Date: Wed, 19 Oct 2022 10:09:06 +0800
Subject: [PATCH 04/15] [to #42322933] Fix issue -- it's not necessary to login
 for loading public datasets.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

修复”公开数据集需要登录才能load“的问题
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10448936
---
 modelscope/hub/api.py                        | 8 +++++---
 modelscope/msdatasets/utils/dataset_utils.py | 8 +++-----
 modelscope/utils/constant.py                 | 7 -------
 3 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py
index dc4d0ab2..2aab142e 100644
--- a/modelscope/hub/api.py
+++ b/modelscope/hub/api.py
@@ -390,11 +390,13 @@ class HubApi:
         return resp['Data']
 
     def list_oss_dataset_objects(self, dataset_name, namespace, max_limit,
-                                 is_recursive, is_filter_dir, revision,
-                                 cookies):
+                                 is_recursive, is_filter_dir, revision):
         url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/oss/tree/?' \
             f'MaxLimit={max_limit}&Revision={revision}&Recursive={is_recursive}&FilterDir={is_filter_dir}'
-        cookies = requests.utils.dict_from_cookiejar(cookies)
+
+        cookies = ModelScopeConfig.get_cookies()
+        if cookies:
+            cookies = requests.utils.dict_from_cookiejar(cookies)
 
         resp = requests.get(url=url, cookies=cookies)
         resp = resp.json()
diff --git a/modelscope/msdatasets/utils/dataset_utils.py b/modelscope/msdatasets/utils/dataset_utils.py
index db9d1fee..c7aa7682 100644
--- a/modelscope/msdatasets/utils/dataset_utils.py
+++ b/modelscope/msdatasets/utils/dataset_utils.py
@@ -7,7 +7,7 @@ from typing import Any, Mapping, Optional, Sequence, Union
 from datasets.builder import DatasetBuilder
 
 from modelscope.hub.api import HubApi
-from modelscope.utils.constant import DEFAULT_DATASET_REVISION, DownloadParams
+from modelscope.utils.constant import DEFAULT_DATASET_REVISION
 from modelscope.utils.logger import get_logger
 from .dataset_builder import MsCsvDatasetBuilder, TaskSpecificDatasetBuilder
 
@@ -95,15 +95,13 @@ def list_dataset_objects(hub_api: HubApi, max_limit: int, is_recursive: bool,
         res (list): List of objects, i.e., ['train/images/001.png', 'train/images/002.png', 'val/images/001.png', ...]
     """
     res = []
-    cookies = hub_api.check_cookies_upload_data(use_cookies=True)
     objects = hub_api.list_oss_dataset_objects(
         dataset_name=dataset_name,
         namespace=namespace,
         max_limit=max_limit,
         is_recursive=is_recursive,
         is_filter_dir=True,
-        revision=version,
-        cookies=cookies)
+        revision=version)
 
     for item in objects:
         object_key = item.get('Key')
@@ -174,7 +172,7 @@ def get_dataset_files(subset_split_into: dict,
     modelscope_api = HubApi()
     objects = list_dataset_objects(
         hub_api=modelscope_api,
-        max_limit=DownloadParams.MAX_LIST_OBJECTS_NUM.value,
+        max_limit=-1,
         is_recursive=True,
         dataset_name=dataset_name,
         namespace=namespace,
diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py
index 6ba58c19..6c0f3e98 100644
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -231,13 +231,6 @@ class DownloadMode(enum.Enum):
     FORCE_REDOWNLOAD = 'force_redownload'
 
 
-class DownloadParams(enum.Enum):
-    """
-        Parameters for downloading dataset.
-    """
-    MAX_LIST_OBJECTS_NUM = 50000
-
-
 class DatasetFormations(enum.Enum):
     """ How a dataset is organized and interpreted
     """

From 63ac21147d93fb8a4c968c22a80f1b51e271dc2c Mon Sep 17 00:00:00 2001
From: Yingda Chen <yingda.chen@alibaba-inc.com>
Date: Wed, 19 Oct 2022 13:24:23 +0800
Subject: [PATCH 05/15] [to #42322933] fix some logs

---
 modelscope/utils/device.py   | 11 +++++------
 modelscope/utils/registry.py |  9 ++++++---
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/modelscope/utils/device.py b/modelscope/utils/device.py
index 33c0910d..6fc59e37 100644
--- a/modelscope/utils/device.py
+++ b/modelscope/utils/device.py
@@ -61,8 +61,8 @@ def device_placement(framework, device_name='gpu:0'):
     if framework == Frameworks.tf:
         import tensorflow as tf
         if device_type == Devices.gpu and not tf.test.is_gpu_available():
-            logger.warning(
-                'tensorflow cuda is not available, using cpu instead.')
+            logger.debug(
+                'tensorflow: cuda is not available, using cpu instead.')
         device_type = Devices.cpu
         if device_type == Devices.cpu:
             with tf.device('/CPU:0'):
@@ -78,7 +78,8 @@ def device_placement(framework, device_name='gpu:0'):
             if torch.cuda.is_available():
                 torch.cuda.set_device(f'cuda:{device_id}')
             else:
-                logger.warning('cuda is not available, using cpu instead.')
+                logger.debug(
+                    'pytorch: cuda is not available, using cpu instead.')
         yield
     else:
         yield
@@ -96,9 +97,7 @@ def create_device(device_name):
     if device_type == Devices.gpu:
         use_cuda = True
         if not torch.cuda.is_available():
-            logger.warning(
-                'cuda is not available, create gpu device failed, using cpu instead.'
-            )
+            logger.info('cuda is not available, using cpu instead.')
             use_cuda = False
 
     if use_cuda:
diff --git a/modelscope/utils/registry.py b/modelscope/utils/registry.py
index 7a9c79e2..73e94b3c 100644
--- a/modelscope/utils/registry.py
+++ b/modelscope/utils/registry.py
@@ -176,7 +176,7 @@ def build_from_cfg(cfg,
         raise TypeError('default_args must be a dict or None, '
                         f'but got {type(default_args)}')
 
-    # dynamic load installation reqruiements for this module
+    # dynamic load installation requirements for this module
     from modelscope.utils.import_utils import LazyImportModule
     sig = (registry.name.upper(), group_key, cfg['type'])
     LazyImportModule.import_module(sig)
@@ -193,8 +193,11 @@ def build_from_cfg(cfg,
     if isinstance(obj_type, str):
         obj_cls = registry.get(obj_type, group_key=group_key)
         if obj_cls is None:
-            raise KeyError(f'{obj_type} is not in the {registry.name}'
-                           f' registry group {group_key}')
+            raise KeyError(
+                f'{obj_type} is not in the {registry.name}'
+                f' registry group {group_key}. Please make'
+                f' sure the correct version of 1qqQModelScope library is used.'
+            )
         obj_cls.group_key = group_key
     elif inspect.isclass(obj_type) or inspect.isfunction(obj_type):
         obj_cls = obj_type

From 87b4a52b3c22446a2265cdf90500b6655497bfd7 Mon Sep 17 00:00:00 2001
From: "jiaqi.sjq" <jiaqi.sjq@alibaba-inc.com>
Date: Wed, 19 Oct 2022 23:29:58 +0800
Subject: [PATCH 06/15] Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10462681

---
 modelscope/hub/git.py        |  7 +++++--
 tests/hub/test_hub_upload.py | 39 +++++++++---------------------------
 2 files changed, 14 insertions(+), 32 deletions(-)

diff --git a/modelscope/hub/git.py b/modelscope/hub/git.py
index db76506e..1369fddf 100644
--- a/modelscope/hub/git.py
+++ b/modelscope/hub/git.py
@@ -184,8 +184,11 @@ class GitCommandWrapper(metaclass=Singleton):
         info = [
             line.strip()
             for line in rsp.stdout.decode('utf8').strip().split(os.linesep)
-        ][1:]
-        return ['/'.join(line.split('/')[1:]) for line in info]
+        ]
+        if len(info) == 1:
+            return ['/'.join(info[0].split('/')[1:])]
+        else:
+            return ['/'.join(line.split('/')[1:]) for line in info[1:]]
 
     def pull(self, repo_dir: str):
         cmds = ['-C', repo_dir, 'pull']
diff --git a/tests/hub/test_hub_upload.py b/tests/hub/test_hub_upload.py
index 2250164b..a10f7c81 100644
--- a/tests/hub/test_hub_upload.py
+++ b/tests/hub/test_hub_upload.py
@@ -7,12 +7,13 @@ import uuid
 
 from modelscope.hub.api import HubApi
 from modelscope.hub.constants import Licenses, ModelVisibility
+from modelscope.hub.errors import HTTPError, NotLoginException
 from modelscope.hub.repository import Repository
 from modelscope.hub.upload import upload_folder
 from modelscope.utils.constant import ModelFile
 from modelscope.utils.logger import get_logger
 from modelscope.utils.test_utils import test_level
-from .test_utils import TEST_ACCESS_TOKEN1, delete_credential
+from .test_utils import TEST_ACCESS_TOKEN1, TEST_MODEL_ORG, delete_credential
 
 logger = get_logger()
 
@@ -22,7 +23,7 @@ class HubUploadTest(unittest.TestCase):
     def setUp(self):
         logger.info('SetUp')
         self.api = HubApi()
-        self.user = os.environ.get('TEST_MODEL_ORG', 'citest')
+        self.user = TEST_MODEL_ORG
         logger.info(self.user)
         self.create_model_name = '%s/%s_%s' % (self.user, 'test_model_upload',
                                                uuid.uuid4().hex)
@@ -39,7 +40,10 @@ class HubUploadTest(unittest.TestCase):
     def tearDown(self):
         logger.info('TearDown')
         shutil.rmtree(self.model_dir, ignore_errors=True)
-        self.api.delete_model(model_id=self.create_model_name)
+        try:
+            self.api.delete_model(model_id=self.create_model_name)
+        except Exception:
+            pass
 
     def test_upload_exits_repo_master(self):
         logger.info('basic test for upload!')
@@ -119,48 +123,23 @@ class HubUploadTest(unittest.TestCase):
         logger.info('test upload without login!')
         self.api.login(TEST_ACCESS_TOKEN1)
         delete_credential()
-        try:
+        with self.assertRaises(NotLoginException):
             upload_folder(
                 model_id=self.create_model_name,
                 model_dir=self.finetune_path,
                 visibility=ModelVisibility.PUBLIC,
                 license=Licenses.APACHE_V2)
-        except Exception as e:
-            logger.info(e)
-            self.api.login(TEST_ACCESS_TOKEN1)
-            upload_folder(
-                model_id=self.create_model_name,
-                model_dir=self.finetune_path,
-                visibility=ModelVisibility.PUBLIC,
-                license=Licenses.APACHE_V2)
-            Repository(
-                model_dir=self.repo_path, clone_from=self.create_model_name)
-            assert os.path.exists(
-                os.path.join(self.repo_path, 'configuration.json'))
-            shutil.rmtree(self.repo_path, ignore_errors=True)
 
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_upload_invalid_repo(self):
         logger.info('test upload to invalid repo!')
         self.api.login(TEST_ACCESS_TOKEN1)
-        try:
+        with self.assertRaises(HTTPError):
             upload_folder(
                 model_id='%s/%s' % ('speech_tts', 'invalid_model_test'),
                 model_dir=self.finetune_path,
                 visibility=ModelVisibility.PUBLIC,
                 license=Licenses.APACHE_V2)
-        except Exception as e:
-            logger.info(e)
-            upload_folder(
-                model_id=self.create_model_name,
-                model_dir=self.finetune_path,
-                visibility=ModelVisibility.PUBLIC,
-                license=Licenses.APACHE_V2)
-            Repository(
-                model_dir=self.repo_path, clone_from=self.create_model_name)
-            assert os.path.exists(
-                os.path.join(self.repo_path, 'configuration.json'))
-            shutil.rmtree(self.repo_path, ignore_errors=True)
 
 
 if __name__ == '__main__':

From 089cadab4b0f0e6d17a76ef0ef88ae0c1c9d4fe5 Mon Sep 17 00:00:00 2001
From: Yingda Chen <yingda.chen@alibaba-inc.com>
Date: Thu, 20 Oct 2022 08:51:25 +0800
Subject: [PATCH 07/15] [to #42322933] disable unstable trainer test

---
 tests/trainers/test_image_denoise_trainer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/trainers/test_image_denoise_trainer.py b/tests/trainers/test_image_denoise_trainer.py
index 68ddf616..c4abca6a 100644
--- a/tests/trainers/test_image_denoise_trainer.py
+++ b/tests/trainers/test_image_denoise_trainer.py
@@ -51,7 +51,7 @@ class ImageDenoiseTrainerTest(unittest.TestCase):
         shutil.rmtree(self.tmp_dir, ignore_errors=True)
         super().tearDown()
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     def test_trainer(self):
         kwargs = dict(
             model=self.model_id,
@@ -65,7 +65,7 @@ class ImageDenoiseTrainerTest(unittest.TestCase):
         for i in range(2):
             self.assertIn(f'epoch_{i+1}.pth', results_files)
 
-    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     def test_trainer_with_model_and_args(self):
         model = NAFNetForImageDenoise.from_pretrained(self.cache_path)
         kwargs = dict(

From 3cd5f73da076cc550b747473199585a80bc90917 Mon Sep 17 00:00:00 2001
From: "yingda.chen" <yingda.chen@alibaba-inc.com>
Date: Thu, 20 Oct 2022 10:28:15 +0800
Subject: [PATCH 08/15] [to #42322933] refactor push_model         Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10463992

---
 modelscope/hub/api.py               | 128 +++++++++++++++++++++++++---
 modelscope/hub/file_download.py     |   3 +-
 modelscope/hub/git.py               |   4 +-
 modelscope/hub/repository.py        |   4 +-
 modelscope/hub/snapshot_download.py |   2 +-
 modelscope/hub/upload.py            | 117 -------------------------
 tests/hub/test_hub_operation.py     |   2 +-
 tests/hub/test_hub_upload.py        |  15 ++--
 8 files changed, 130 insertions(+), 145 deletions(-)
 delete mode 100644 modelscope/hub/upload.py

diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py
index 2aab142e..f8ca683a 100644
--- a/modelscope/hub/api.py
+++ b/modelscope/hub/api.py
@@ -1,8 +1,11 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
+# yapf: disable
+import datetime
 import os
 import pickle
 import shutil
+import tempfile
 from collections import defaultdict
 from http import HTTPStatus
 from http.cookiejar import CookieJar
@@ -16,17 +19,25 @@ from modelscope.hub.constants import (API_RESPONSE_FIELD_DATA,
                                       API_RESPONSE_FIELD_GIT_ACCESS_TOKEN,
                                       API_RESPONSE_FIELD_MESSAGE,
                                       API_RESPONSE_FIELD_USERNAME,
-                                      DEFAULT_CREDENTIALS_PATH)
+                                      DEFAULT_CREDENTIALS_PATH, Licenses,
+                                      ModelVisibility)
+from modelscope.hub.errors import (InvalidParameter, NotExistError,
+                                   NotLoginException, RequestError,
+                                   datahub_raise_on_error,
+                                   handle_http_post_error,
+                                   handle_http_response, is_ok, raise_on_error)
+from modelscope.hub.git import GitCommandWrapper
+from modelscope.hub.repository import Repository
+from modelscope.hub.utils.utils import (get_endpoint,
+                                        model_id_to_group_owner_name)
 from modelscope.utils.config_ds import DOWNLOADED_DATASETS_PATH
 from modelscope.utils.constant import (DEFAULT_DATASET_REVISION,
                                        DEFAULT_MODEL_REVISION,
                                        DatasetFormations, DatasetMetaFormats,
-                                       DownloadMode)
+                                       DownloadMode, ModelFile)
 from modelscope.utils.logger import get_logger
-from .errors import (InvalidParameter, NotExistError, RequestError,
-                     datahub_raise_on_error, handle_http_post_error,
-                     handle_http_response, is_ok, raise_on_error)
-from .utils.utils import get_endpoint, model_id_to_group_owner_name
+
+# yapf: enable
 
 logger = get_logger()
 
@@ -169,11 +180,106 @@ class HubApi:
         else:
             r.raise_for_status()
 
-    def list_model(self,
-                   owner_or_group: str,
-                   page_number=1,
-                   page_size=10) -> dict:
-        """List model in owner or group.
+    def push_model(self,
+                   model_id: str,
+                   model_dir: str,
+                   visibility: int = ModelVisibility.PUBLIC,
+                   license: str = Licenses.APACHE_V2,
+                   chinese_name: Optional[str] = None,
+                   commit_message: Optional[str] = 'upload model',
+                   revision: Optional[str] = DEFAULT_MODEL_REVISION):
+        """
+        Upload model from a given directory to given repository. A valid model directory
+        must contain a configuration.json file.
+
+        This function upload the files in given directory to given repository. If the
+        given repository is not exists in remote, it will automatically create it with
+        given visibility, license and chinese_name parameters. If the revision is also
+        not exists in remote repository, it will create a new branch for it.
+
+        This function must be called before calling HubApi's login with a valid token
+        which can be obtained from ModelScope's website.
+
+        Args:
+            model_id (`str`):
+                The model id to be uploaded, caller must have write permission for it.
+            model_dir(`str`):
+                The Absolute Path of the finetune result.
+            visibility(`int`, defaults to `0`):
+                Visibility of the new created model(1-private, 5-public). If the model is
+                not exists in ModelScope, this function will create a new model with this
+                visibility and this parameter is required. You can ignore this parameter
+                if you make sure the model's existence.
+            license(`str`, defaults to `None`):
+                License of the new created model(see License). If the model is not exists
+                in ModelScope, this function will create a new model with this license
+                and this parameter is required. You can ignore this parameter if you
+                make sure the model's existence.
+            chinese_name(`str`, *optional*, defaults to `None`):
+                chinese name of the new created model.
+            commit_message(`str`, *optional*, defaults to `None`):
+                commit message of the push request.
+            revision (`str`, *optional*, default to DEFAULT_MODEL_REVISION):
+                which branch to push. If the branch is not exists, It will create a new
+                branch and push to it.
+        """
+        if model_id is None:
+            raise InvalidParameter('model_id cannot be empty!')
+        if model_dir is None:
+            raise InvalidParameter('model_dir cannot be empty!')
+        if not os.path.exists(model_dir) or os.path.isfile(model_dir):
+            raise InvalidParameter('model_dir must be a valid directory.')
+        cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION)
+        if not os.path.exists(cfg_file):
+            raise ValueError(f'{model_dir} must contain a configuration.json.')
+        cookies = ModelScopeConfig.get_cookies()
+        if cookies is None:
+            raise NotLoginException('Must login before upload!')
+        files_to_save = os.listdir(model_dir)
+        try:
+            self.get_model(model_id=model_id)
+        except Exception:
+            if visibility is None or license is None:
+                raise InvalidParameter(
+                    'visibility and license cannot be empty if want to create new repo'
+                )
+            logger.info('Create new model %s' % model_id)
+            self.create_model(
+                model_id=model_id,
+                visibility=visibility,
+                license=license,
+                chinese_name=chinese_name)
+        tmp_dir = tempfile.mkdtemp()
+        git_wrapper = GitCommandWrapper()
+        try:
+            repo = Repository(model_dir=tmp_dir, clone_from=model_id)
+            branches = git_wrapper.get_remote_branches(tmp_dir)
+            if revision not in branches:
+                logger.info('Create new branch %s' % revision)
+                git_wrapper.new_branch(tmp_dir, revision)
+            git_wrapper.checkout(tmp_dir, revision)
+            for f in files_to_save:
+                if f[0] != '.':
+                    src = os.path.join(model_dir, f)
+                    if os.path.isdir(src):
+                        shutil.copytree(src, os.path.join(tmp_dir, f))
+                    else:
+                        shutil.copy(src, tmp_dir)
+            if not commit_message:
+                date = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
+                commit_message = '[automsg] push model %s to hub at %s' % (
+                    model_id, date)
+            repo.push(commit_message=commit_message, branch=revision)
+        except Exception:
+            raise
+        finally:
+            shutil.rmtree(tmp_dir, ignore_errors=True)
+
+    def list_models(self,
+                    owner_or_group: str,
+                    page_number=1,
+                    page_size=10) -> dict:
+        """List models in owner or group.
 
         Args:
             owner_or_group(`str`): owner or group.
diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py
index 1cc5645b..8ffc60bc 100644
--- a/modelscope/hub/file_download.py
+++ b/modelscope/hub/file_download.py
@@ -11,13 +11,12 @@ from typing import Dict, Optional, Union
 from uuid import uuid4
 
 import requests
-from filelock import FileLock
 from tqdm import tqdm
 
 from modelscope import __version__
+from modelscope.hub.api import HubApi, ModelScopeConfig
 from modelscope.utils.constant import DEFAULT_MODEL_REVISION
 from modelscope.utils.logger import get_logger
-from .api import HubApi, ModelScopeConfig
 from .constants import FILE_HASH
 from .errors import FileDownloadError, NotExistError
 from .utils.caching import ModelFileSystemCache
diff --git a/modelscope/hub/git.py b/modelscope/hub/git.py
index 1369fddf..fe1d1554 100644
--- a/modelscope/hub/git.py
+++ b/modelscope/hub/git.py
@@ -1,13 +1,10 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
 import os
-import re
 import subprocess
 from typing import List
-from xmlrpc.client import Boolean
 
 from modelscope.utils.logger import get_logger
-from .api import ModelScopeConfig
 from .errors import GitError
 
 logger = get_logger()
@@ -132,6 +129,7 @@ class GitCommandWrapper(metaclass=Singleton):
         return response
 
     def add_user_info(self, repo_base_dir, repo_name):
+        from modelscope.hub.api import ModelScopeConfig
         user_name, user_email = ModelScopeConfig.get_user_info()
         if user_name and user_email:
             # config user.name and user.email if exist
diff --git a/modelscope/hub/repository.py b/modelscope/hub/repository.py
index d92089ed..35c831a9 100644
--- a/modelscope/hub/repository.py
+++ b/modelscope/hub/repository.py
@@ -7,7 +7,6 @@ from modelscope.hub.errors import GitError, InvalidParameter, NotLoginException
 from modelscope.utils.constant import (DEFAULT_DATASET_REVISION,
                                        DEFAULT_MODEL_REVISION)
 from modelscope.utils.logger import get_logger
-from .api import ModelScopeConfig
 from .git import GitCommandWrapper
 from .utils.utils import get_endpoint
 
@@ -47,6 +46,7 @@ class Repository:
             err_msg = 'a non-default value of revision cannot be empty.'
             raise InvalidParameter(err_msg)
 
+        from modelscope.hub.api import ModelScopeConfig
         if auth_token:
             self.auth_token = auth_token
         else:
@@ -166,7 +166,7 @@ class DatasetRepository:
             err_msg = 'a non-default value of revision cannot be empty.'
             raise InvalidParameter(err_msg)
         self.revision = revision
-
+        from modelscope.hub.api import ModelScopeConfig
         if auth_token:
             self.auth_token = auth_token
         else:
diff --git a/modelscope/hub/snapshot_download.py b/modelscope/hub/snapshot_download.py
index cde6ad34..ac57d1b1 100644
--- a/modelscope/hub/snapshot_download.py
+++ b/modelscope/hub/snapshot_download.py
@@ -5,9 +5,9 @@ import tempfile
 from pathlib import Path
 from typing import Dict, Optional, Union
 
+from modelscope.hub.api import HubApi, ModelScopeConfig
 from modelscope.utils.constant import DEFAULT_MODEL_REVISION
 from modelscope.utils.logger import get_logger
-from .api import HubApi, ModelScopeConfig
 from .constants import FILE_HASH
 from .errors import NotExistError
 from .file_download import (get_file_download_url, http_get_file,
diff --git a/modelscope/hub/upload.py b/modelscope/hub/upload.py
deleted file mode 100644
index 9dffc60e..00000000
--- a/modelscope/hub/upload.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-
-import datetime
-import os
-import shutil
-import tempfile
-import uuid
-from typing import Dict, Optional
-from uuid import uuid4
-
-from filelock import FileLock
-
-from modelscope import __version__
-from modelscope.hub.api import HubApi, ModelScopeConfig
-from modelscope.hub.errors import InvalidParameter, NotLoginException
-from modelscope.hub.git import GitCommandWrapper
-from modelscope.hub.repository import Repository
-from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile
-from modelscope.utils.logger import get_logger
-
-logger = get_logger()
-
-
-def upload_folder(model_id: str,
-                  model_dir: str,
-                  visibility: int = 0,
-                  license: str = None,
-                  chinese_name: Optional[str] = None,
-                  commit_message: Optional[str] = None,
-                  revision: Optional[str] = DEFAULT_MODEL_REVISION):
-    """
-    Upload model from a given directory to given repository. A valid model directory
-    must contain a configuration.json file.
-
-    This function upload the files in given directory to given repository. If the
-    given repository is not exists in remote, it will automatically create it with
-    given visibility, license and chinese_name parameters. If the revision is also
-    not exists in remote repository, it will create a new branch for it.
-
-    This function must be called before calling HubApi's login with a valid token
-    which can be obtained from ModelScope's website.
-
-    Args:
-        model_id (`str`):
-            The model id to be uploaded, caller must have write permission for it.
-        model_dir(`str`):
-            The Absolute Path of the finetune result.
-        visibility(`int`, defaults to `0`):
-            Visibility of the new created model(1-private, 5-public). If the model is
-            not exists in ModelScope, this function will create a new model with this
-            visibility and this parameter is required. You can ignore this parameter
-            if you make sure the model's existence.
-        license(`str`, defaults to `None`):
-            License of the new created model(see License). If the model is not exists
-            in ModelScope, this function will create a new model with this license
-            and this parameter is required. You can ignore this parameter if you
-            make sure the model's existence.
-        chinese_name(`str`, *optional*, defaults to `None`):
-            chinese name of the new created model.
-        commit_message(`str`, *optional*, defaults to `None`):
-            commit message of the push request.
-        revision (`str`, *optional*, default to DEFAULT_MODEL_REVISION):
-            which branch to push. If the branch is not exists, It will create a new
-            branch and push to it.
-    """
-    if model_id is None:
-        raise InvalidParameter('model_id cannot be empty!')
-    if model_dir is None:
-        raise InvalidParameter('model_dir cannot be empty!')
-    if not os.path.exists(model_dir) or os.path.isfile(model_dir):
-        raise InvalidParameter('model_dir must be a valid directory.')
-    cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION)
-    if not os.path.exists(cfg_file):
-        raise ValueError(f'{model_dir} must contain a configuration.json.')
-    cookies = ModelScopeConfig.get_cookies()
-    if cookies is None:
-        raise NotLoginException('Must login before upload!')
-    files_to_save = os.listdir(model_dir)
-    api = HubApi()
-    try:
-        api.get_model(model_id=model_id)
-    except Exception:
-        if visibility is None or license is None:
-            raise InvalidParameter(
-                'visibility and license cannot be empty if want to create new repo'
-            )
-        logger.info('Create new model %s' % model_id)
-        api.create_model(
-            model_id=model_id,
-            visibility=visibility,
-            license=license,
-            chinese_name=chinese_name)
-    tmp_dir = tempfile.mkdtemp()
-    git_wrapper = GitCommandWrapper()
-    try:
-        repo = Repository(model_dir=tmp_dir, clone_from=model_id)
-        branches = git_wrapper.get_remote_branches(tmp_dir)
-        if revision not in branches:
-            logger.info('Create new branch %s' % revision)
-            git_wrapper.new_branch(tmp_dir, revision)
-        git_wrapper.checkout(tmp_dir, revision)
-        for f in files_to_save:
-            if f[0] != '.':
-                src = os.path.join(model_dir, f)
-                if os.path.isdir(src):
-                    shutil.copytree(src, os.path.join(tmp_dir, f))
-                else:
-                    shutil.copy(src, tmp_dir)
-        if not commit_message:
-            date = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
-            commit_message = '[automsg] push model %s to hub at %s' % (
-                model_id, date)
-        repo.push(commit_message=commit_message, branch=revision)
-    except Exception:
-        raise
-    finally:
-        shutil.rmtree(tmp_dir, ignore_errors=True)
diff --git a/tests/hub/test_hub_operation.py b/tests/hub/test_hub_operation.py
index c96db986..f2bdb2d3 100644
--- a/tests/hub/test_hub_operation.py
+++ b/tests/hub/test_hub_operation.py
@@ -127,7 +127,7 @@ class HubOperationTest(unittest.TestCase):
         return None
 
     def test_list_model(self):
-        data = self.api.list_model(TEST_MODEL_ORG)
+        data = self.api.list_models(TEST_MODEL_ORG)
         assert len(data['Models']) >= 1
 
 
diff --git a/tests/hub/test_hub_upload.py b/tests/hub/test_hub_upload.py
index a10f7c81..e1f61467 100644
--- a/tests/hub/test_hub_upload.py
+++ b/tests/hub/test_hub_upload.py
@@ -9,7 +9,6 @@ from modelscope.hub.api import HubApi
 from modelscope.hub.constants import Licenses, ModelVisibility
 from modelscope.hub.errors import HTTPError, NotLoginException
 from modelscope.hub.repository import Repository
-from modelscope.hub.upload import upload_folder
 from modelscope.utils.constant import ModelFile
 from modelscope.utils.logger import get_logger
 from modelscope.utils.test_utils import test_level
@@ -54,14 +53,14 @@ class HubUploadTest(unittest.TestCase):
             license=Licenses.APACHE_V2)
         os.system("echo '111'>%s"
                   % os.path.join(self.finetune_path, 'add1.py'))
-        upload_folder(
+        self.api.push_model(
             model_id=self.create_model_name, model_dir=self.finetune_path)
         Repository(model_dir=self.repo_path, clone_from=self.create_model_name)
         assert os.path.exists(os.path.join(self.repo_path, 'add1.py'))
         shutil.rmtree(self.repo_path, ignore_errors=True)
         os.system("echo '222'>%s"
                   % os.path.join(self.finetune_path, 'add2.py'))
-        upload_folder(
+        self.api.push_model(
             model_id=self.create_model_name,
             model_dir=self.finetune_path,
             revision='new_revision/version1')
@@ -73,7 +72,7 @@ class HubUploadTest(unittest.TestCase):
         shutil.rmtree(self.repo_path, ignore_errors=True)
         os.system("echo '333'>%s"
                   % os.path.join(self.finetune_path, 'add3.py'))
-        upload_folder(
+        self.api.push_model(
             model_id=self.create_model_name,
             model_dir=self.finetune_path,
             revision='new_revision/version2',
@@ -88,7 +87,7 @@ class HubUploadTest(unittest.TestCase):
         add4_path = os.path.join(self.finetune_path, 'temp')
         os.mkdir(add4_path)
         os.system("echo '444'>%s" % os.path.join(add4_path, 'add4.py'))
-        upload_folder(
+        self.api.push_model(
             model_id=self.create_model_name,
             model_dir=self.finetune_path,
             revision='new_revision/version1')
@@ -105,7 +104,7 @@ class HubUploadTest(unittest.TestCase):
         self.api.login(TEST_ACCESS_TOKEN1)
         os.system("echo '111'>%s"
                   % os.path.join(self.finetune_path, 'add1.py'))
-        upload_folder(
+        self.api.push_model(
             model_id=self.create_model_name,
             model_dir=self.finetune_path,
             revision='new_model_new_revision',
@@ -124,7 +123,7 @@ class HubUploadTest(unittest.TestCase):
         self.api.login(TEST_ACCESS_TOKEN1)
         delete_credential()
         with self.assertRaises(NotLoginException):
-            upload_folder(
+            self.api.push_model(
                 model_id=self.create_model_name,
                 model_dir=self.finetune_path,
                 visibility=ModelVisibility.PUBLIC,
@@ -135,7 +134,7 @@ class HubUploadTest(unittest.TestCase):
         logger.info('test upload to invalid repo!')
         self.api.login(TEST_ACCESS_TOKEN1)
         with self.assertRaises(HTTPError):
-            upload_folder(
+            self.api.push_model(
                 model_id='%s/%s' % ('speech_tts', 'invalid_model_test'),
                 model_dir=self.finetune_path,
                 visibility=ModelVisibility.PUBLIC,

From eb7bd5b8256d474e4382ee73f19860bed3d21689 Mon Sep 17 00:00:00 2001
From: "jiaqi.sjq" <jiaqi.sjq@alibaba-inc.com>
Date: Thu, 20 Oct 2022 11:46:50 +0800
Subject: [PATCH 09/15] [to #42322933]Add lock to tts's AM and model loaded
 since its AR part not lockfree

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10464620
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10464620
---
 modelscope/models/audio/tts/voice.py | 78 +++++++++++++++-------------
 1 file changed, 41 insertions(+), 37 deletions(-)

diff --git a/modelscope/models/audio/tts/voice.py b/modelscope/models/audio/tts/voice.py
index dc830db5..b7240088 100644
--- a/modelscope/models/audio/tts/voice.py
+++ b/modelscope/models/audio/tts/voice.py
@@ -2,6 +2,7 @@
 
 import os
 import pickle as pkl
+from threading import Lock
 
 import json
 import numpy as np
@@ -27,6 +28,7 @@ class Voice:
         self.__am_config = AttrDict(**am_config)
         self.__voc_config = AttrDict(**voc_config)
         self.__model_loaded = False
+        self.__lock = Lock()
         if 'am' not in self.__am_config:
             raise TtsModelConfigurationException(
                 'modelscope error: am configuration invalid')
@@ -71,34 +73,35 @@ class Voice:
         self.__generator.remove_weight_norm()
 
     def __am_forward(self, symbol_seq):
-        with torch.no_grad():
-            inputs_feat_lst = self.__ling_unit.encode_symbol_sequence(
-                symbol_seq)
-            inputs_sy = torch.from_numpy(inputs_feat_lst[0]).long().to(
-                self.__device)
-            inputs_tone = torch.from_numpy(inputs_feat_lst[1]).long().to(
-                self.__device)
-            inputs_syllable = torch.from_numpy(inputs_feat_lst[2]).long().to(
-                self.__device)
-            inputs_ws = torch.from_numpy(inputs_feat_lst[3]).long().to(
-                self.__device)
-            inputs_ling = torch.stack(
-                [inputs_sy, inputs_tone, inputs_syllable, inputs_ws],
-                dim=-1).unsqueeze(0)
-            inputs_emo = torch.from_numpy(inputs_feat_lst[4]).long().to(
-                self.__device).unsqueeze(0)
-            inputs_spk = torch.from_numpy(inputs_feat_lst[5]).long().to(
-                self.__device).unsqueeze(0)
-            inputs_len = torch.zeros(1).to(self.__device).long(
-            ) + inputs_emo.size(1) - 1  # minus 1 for "~"
-            res = self.__am_net(inputs_ling[:, :-1, :], inputs_emo[:, :-1],
-                                inputs_spk[:, :-1], inputs_len)
-            postnet_outputs = res['postnet_outputs']
-            LR_length_rounded = res['LR_length_rounded']
-            valid_length = int(LR_length_rounded[0].item())
-            postnet_outputs = postnet_outputs[
-                0, :valid_length, :].cpu().numpy()
-            return postnet_outputs
+        with self.__lock:
+            with torch.no_grad():
+                inputs_feat_lst = self.__ling_unit.encode_symbol_sequence(
+                    symbol_seq)
+                inputs_sy = torch.from_numpy(inputs_feat_lst[0]).long().to(
+                    self.__device)
+                inputs_tone = torch.from_numpy(inputs_feat_lst[1]).long().to(
+                    self.__device)
+                inputs_syllable = torch.from_numpy(
+                    inputs_feat_lst[2]).long().to(self.__device)
+                inputs_ws = torch.from_numpy(inputs_feat_lst[3]).long().to(
+                    self.__device)
+                inputs_ling = torch.stack(
+                    [inputs_sy, inputs_tone, inputs_syllable, inputs_ws],
+                    dim=-1).unsqueeze(0)
+                inputs_emo = torch.from_numpy(inputs_feat_lst[4]).long().to(
+                    self.__device).unsqueeze(0)
+                inputs_spk = torch.from_numpy(inputs_feat_lst[5]).long().to(
+                    self.__device).unsqueeze(0)
+                inputs_len = torch.zeros(1).to(self.__device).long(
+                ) + inputs_emo.size(1) - 1  # minus 1 for "~"
+                res = self.__am_net(inputs_ling[:, :-1, :], inputs_emo[:, :-1],
+                                    inputs_spk[:, :-1], inputs_len)
+                postnet_outputs = res['postnet_outputs']
+                LR_length_rounded = res['LR_length_rounded']
+                valid_length = int(LR_length_rounded[0].item())
+                postnet_outputs = postnet_outputs[
+                    0, :valid_length, :].cpu().numpy()
+                return postnet_outputs
 
     def __vocoder_forward(self, melspec):
         dim0 = list(melspec.shape)[-1]
@@ -118,14 +121,15 @@ class Voice:
             return audio
 
     def forward(self, symbol_seq):
-        if not self.__model_loaded:
-            torch.manual_seed(self.__am_config.seed)
-            if torch.cuda.is_available():
+        with self.__lock:
+            if not self.__model_loaded:
                 torch.manual_seed(self.__am_config.seed)
-                self.__device = torch.device('cuda')
-            else:
-                self.__device = torch.device('cpu')
-            self.__load_am()
-            self.__load_vocoder()
-            self.__model_loaded = True
+                if torch.cuda.is_available():
+                    torch.manual_seed(self.__am_config.seed)
+                    self.__device = torch.device('cuda')
+                else:
+                    self.__device = torch.device('cpu')
+                self.__load_am()
+                self.__load_vocoder()
+                self.__model_loaded = True
         return self.__vocoder_forward(self.__am_forward(symbol_seq))

From 01d521dd7851ee273de3793d7ad123ad99c0de9c Mon Sep 17 00:00:00 2001
From: "shouzhou.bx" <shouzhou.bx@alibaba-inc.com>
Date: Thu, 20 Oct 2022 11:51:52 +0800
Subject: [PATCH 10/15] [to #42322933]add face 2d keypoints finetune test case 
        Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10421808

    * add face 2d keypoints & human wholebody keypoint finrtune test case
---
 modelscope/metainfo.py                        |  4 +-
 modelscope/msdatasets/cv/easycv_base.py       | 13 ++--
 requirements/cv.txt                           |  2 +-
 .../test_easycv_trainer_face_2d_keypoints.py  | 71 +++++++++++++++++++
 4 files changed, 83 insertions(+), 7 deletions(-)
 create mode 100644 tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py

diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py
index 6c8d91fa..31bef3b8 100644
--- a/modelscope/metainfo.py
+++ b/modelscope/metainfo.py
@@ -452,9 +452,9 @@ class Datasets(object):
     """ Names for different datasets.
     """
     ClsDataset = 'ClsDataset'
-    Face2dKeypointsDataset = 'Face2dKeypointsDataset'
+    Face2dKeypointsDataset = 'FaceKeypointDataset'
     HandCocoWholeBodyDataset = 'HandCocoWholeBodyDataset'
-    HumanWholeBodyKeypointDataset = 'HumanWholeBodyKeypointDataset'
+    HumanWholeBodyKeypointDataset = 'WholeBodyCocoTopDownDataset'
     SegDataset = 'SegDataset'
     DetDataset = 'DetDataset'
     DetImagesMixDataset = 'DetImagesMixDataset'
diff --git a/modelscope/msdatasets/cv/easycv_base.py b/modelscope/msdatasets/cv/easycv_base.py
index a45827a3..7b6df6e0 100644
--- a/modelscope/msdatasets/cv/easycv_base.py
+++ b/modelscope/msdatasets/cv/easycv_base.py
@@ -26,11 +26,16 @@ class EasyCVBaseDataset(object):
         if self.split_config is not None:
             self._update_data_source(kwargs['data_source'])
 
+    def _update_data_root(self, input_dict, data_root):
+        for k, v in input_dict.items():
+            if isinstance(v, str) and self.DATA_ROOT_PATTERN in v:
+                input_dict.update(
+                    {k: v.replace(self.DATA_ROOT_PATTERN, data_root)})
+            elif isinstance(v, dict):
+                self._update_data_root(v, data_root)
+
     def _update_data_source(self, data_source):
         data_root = next(iter(self.split_config.values()))
         data_root = data_root.rstrip(osp.sep)
 
-        for k, v in data_source.items():
-            if isinstance(v, str) and self.DATA_ROOT_PATTERN in v:
-                data_source.update(
-                    {k: v.replace(self.DATA_ROOT_PATTERN, data_root)})
+        self._update_data_root(data_source, data_root)
diff --git a/requirements/cv.txt b/requirements/cv.txt
index d23fab3a..f29b296b 100644
--- a/requirements/cv.txt
+++ b/requirements/cv.txt
@@ -19,7 +19,7 @@ moviepy>=1.0.3
 networkx>=2.5
 numba
 onnxruntime>=1.10
-pai-easycv>=0.6.3.7
+pai-easycv>=0.6.3.9
 pandas
 psutil
 regex
diff --git a/tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py b/tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py
new file mode 100644
index 00000000..4dffa998
--- /dev/null
+++ b/tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py
@@ -0,0 +1,71 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import glob
+import os
+import shutil
+import tempfile
+import unittest
+
+import torch
+
+from modelscope.metainfo import Trainers
+from modelscope.msdatasets import MsDataset
+from modelscope.trainers import build_trainer
+from modelscope.utils.constant import DownloadMode, LogKeys, Tasks
+from modelscope.utils.logger import get_logger
+from modelscope.utils.test_utils import test_level
+
+
+@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
+class EasyCVTrainerTestFace2DKeypoints(unittest.TestCase):
+    model_id = 'damo/cv_mobilenet_face-2d-keypoints_alignment'
+
+    def setUp(self):
+        self.logger = get_logger()
+        self.logger.info(('Testing %s.%s' %
+                          (type(self).__name__, self._testMethodName)))
+
+    def _train(self, tmp_dir):
+        cfg_options = {'train.max_epochs': 2}
+
+        trainer_name = Trainers.easycv
+
+        train_dataset = MsDataset.load(
+            dataset_name='face_2d_keypoints_dataset',
+            namespace='modelscope',
+            split='train',
+            download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS)
+        eval_dataset = MsDataset.load(
+            dataset_name='face_2d_keypoints_dataset',
+            namespace='modelscope',
+            split='train',
+            download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS)
+
+        kwargs = dict(
+            model=self.model_id,
+            train_dataset=train_dataset,
+            eval_dataset=eval_dataset,
+            work_dir=tmp_dir,
+            cfg_options=cfg_options)
+
+        trainer = build_trainer(trainer_name, kwargs)
+        trainer.train()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_single_gpu(self):
+        temp_file_dir = tempfile.TemporaryDirectory()
+        tmp_dir = temp_file_dir.name
+        if not os.path.exists(tmp_dir):
+            os.makedirs(tmp_dir)
+
+        self._train(tmp_dir)
+
+        results_files = os.listdir(tmp_dir)
+        json_files = glob.glob(os.path.join(tmp_dir, '*.log.json'))
+        self.assertEqual(len(json_files), 1)
+        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
+
+        temp_file_dir.cleanup()
+
+
+if __name__ == '__main__':
+    unittest.main()

From 535acaef5bc01f8d6059e6acf364eaebb1147f7b Mon Sep 17 00:00:00 2001
From: "wenmeng.zwm" <wenmeng.zwm@alibaba-inc.com>
Date: Thu, 20 Oct 2022 12:13:19 +0800
Subject: [PATCH 11/15] [to #42322933]add test case to check xtcocotools
 availbility         Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10462622

    * add test case to check xtcocotools availbility
---
 tests/utils/test_compatibility.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 tests/utils/test_compatibility.py

diff --git a/tests/utils/test_compatibility.py b/tests/utils/test_compatibility.py
new file mode 100644
index 00000000..f5222261
--- /dev/null
+++ b/tests/utils/test_compatibility.py
@@ -0,0 +1,19 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import unittest
+
+
+class CompatibilityTest(unittest.TestCase):
+
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+
+    def tearDown(self):
+        super().tearDown()
+
+    def test_xtcocotools(self):
+        from xtcocotools.coco import COCO
+
+
+if __name__ == '__main__':
+    unittest.main()

From e7c7be6aae33537e25cf4187336a0c46479a75f3 Mon Sep 17 00:00:00 2001
From: "xingguang.zxg" <xingguang.zxg@alibaba-inc.com>
Date: Thu, 20 Oct 2022 12:51:48 +0800
Subject: [PATCH 12/15] fix cpu error         Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10432300

    * fix cpu error
---
 modelscope/models/cv/text_driven_segmentation/lseg_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modelscope/models/cv/text_driven_segmentation/lseg_model.py b/modelscope/models/cv/text_driven_segmentation/lseg_model.py
index 9a5754c6..ec381356 100644
--- a/modelscope/models/cv/text_driven_segmentation/lseg_model.py
+++ b/modelscope/models/cv/text_driven_segmentation/lseg_model.py
@@ -93,7 +93,7 @@ class TextDrivenSeg(TorchModel):
         """
         with torch.no_grad():
             if self.device_id == -1:
-                output = self.model(image)
+                output = self.model(image, [text])
             else:
                 device = torch.device('cuda', self.device_id)
                 output = self.model(image.to(device), [text])

From 1483c64638a002279443752adc195df6ef2e7494 Mon Sep 17 00:00:00 2001
From: "shichen.fsc" <shichen.fsc@alibaba-inc.com>
Date: Thu, 20 Oct 2022 12:54:37 +0800
Subject: [PATCH 13/15] [to #42322933] Fix ASR error when resample failed, and
 add all asr models UT, add apply-cmvn for pytorch models         Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10465241

---
 data/test/audios/asr_example_8K.wav           |   3 +
 data/test/audios/asr_example_cn_dialect.wav   |   3 +
 data/test/audios/asr_example_cn_en.wav        |   3 +
 data/test/audios/asr_example_en.wav           |   3 +
 data/test/audios/asr_example_es.wav           |   3 +
 data/test/audios/asr_example_id.wav           |   3 +
 data/test/audios/asr_example_ja.wav           |   3 +
 data/test/audios/asr_example_ko.wav           |   3 +
 data/test/audios/asr_example_ru.wav           |   3 +
 .../pipelines/audio/asr_inference_pipeline.py |  19 +-
 .../pipelines/audio/kws_kwsbp_pipeline.py     |   4 +-
 modelscope/preprocessors/asr.py               |   6 +
 modelscope/utils/audio/audio_utils.py         |  13 +-
 .../test_automatic_speech_recognition.py      | 297 +++++++++++++-----
 14 files changed, 285 insertions(+), 81 deletions(-)
 create mode 100644 data/test/audios/asr_example_8K.wav
 create mode 100644 data/test/audios/asr_example_cn_dialect.wav
 create mode 100644 data/test/audios/asr_example_cn_en.wav
 create mode 100644 data/test/audios/asr_example_en.wav
 create mode 100644 data/test/audios/asr_example_es.wav
 create mode 100644 data/test/audios/asr_example_id.wav
 create mode 100644 data/test/audios/asr_example_ja.wav
 create mode 100644 data/test/audios/asr_example_ko.wav
 create mode 100644 data/test/audios/asr_example_ru.wav

diff --git a/data/test/audios/asr_example_8K.wav b/data/test/audios/asr_example_8K.wav
new file mode 100644
index 00000000..956aad27
--- /dev/null
+++ b/data/test/audios/asr_example_8K.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e999c247bfebb03d556a31722f0ce7145cac20a67fac9da813ad336e1f549f9f
+size 38954
diff --git a/data/test/audios/asr_example_cn_dialect.wav b/data/test/audios/asr_example_cn_dialect.wav
new file mode 100644
index 00000000..e18fb05d
--- /dev/null
+++ b/data/test/audios/asr_example_cn_dialect.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32eb8d4d537941bf0edea69cd6723e8ba489fa3df64e13e29f96e4fae0b856f4
+size 93676
diff --git a/data/test/audios/asr_example_cn_en.wav b/data/test/audios/asr_example_cn_en.wav
new file mode 100644
index 00000000..8baf3193
--- /dev/null
+++ b/data/test/audios/asr_example_cn_en.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f57aee13ade70be6b2c6e4f5e5c7404bdb03057b63828baefbaadcf23855a4cb
+size 472012
diff --git a/data/test/audios/asr_example_en.wav b/data/test/audios/asr_example_en.wav
new file mode 100644
index 00000000..fa996eec
--- /dev/null
+++ b/data/test/audios/asr_example_en.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fee8e0460ca707f108782be0d93c555bf34fb6b1cb297e5fceed70192cc65f9b
+size 71244
diff --git a/data/test/audios/asr_example_es.wav b/data/test/audios/asr_example_es.wav
new file mode 100644
index 00000000..95b22dc3
--- /dev/null
+++ b/data/test/audios/asr_example_es.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:450e31f9df8c5b48c617900625f01cb64c484f079a9843179fe9feaa7d163e61
+size 181964
diff --git a/data/test/audios/asr_example_id.wav b/data/test/audios/asr_example_id.wav
new file mode 100644
index 00000000..54c30614
--- /dev/null
+++ b/data/test/audios/asr_example_id.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:255494c41bc1dfb0c954d827ec6ce775900e4f7a55fb0a7881bdf9d66a03b425
+size 112078
diff --git a/data/test/audios/asr_example_ja.wav b/data/test/audios/asr_example_ja.wav
new file mode 100644
index 00000000..e953fee2
--- /dev/null
+++ b/data/test/audios/asr_example_ja.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22a55277908bbc3ef60a0cf56b230eb507b9e837574e8f493e93644b1d21c281
+size 200556
diff --git a/data/test/audios/asr_example_ko.wav b/data/test/audios/asr_example_ko.wav
new file mode 100644
index 00000000..0dad1be3
--- /dev/null
+++ b/data/test/audios/asr_example_ko.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee92191836c76412463d8b282a7ab4e1aa57386ba699ec011a3e2c4d64f32f4b
+size 162636
diff --git a/data/test/audios/asr_example_ru.wav b/data/test/audios/asr_example_ru.wav
new file mode 100644
index 00000000..b0cb8f2f
--- /dev/null
+++ b/data/test/audios/asr_example_ru.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77d1537fc584c1505d8aa10ec8c86af57ab661199e4f28fd7ffee3c22d1e4e61
+size 160204
diff --git a/modelscope/pipelines/audio/asr_inference_pipeline.py b/modelscope/pipelines/audio/asr_inference_pipeline.py
index 4e8b658d..6a4864bf 100644
--- a/modelscope/pipelines/audio/asr_inference_pipeline.py
+++ b/modelscope/pipelines/audio/asr_inference_pipeline.py
@@ -47,22 +47,28 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
 
         if isinstance(audio_in, str):
             # load pcm data from url if audio_in is url str
-            self.audio_in = load_bytes_from_url(audio_in)
+            self.audio_in, checking_audio_fs = load_bytes_from_url(audio_in)
         elif isinstance(audio_in, bytes):
             # load pcm data from wav data if audio_in is wave format
-            self.audio_in = extract_pcm_from_wav(audio_in)
+            self.audio_in, checking_audio_fs = extract_pcm_from_wav(audio_in)
         else:
             self.audio_in = audio_in
 
+        # set the sample_rate of audio_in if checking_audio_fs is valid
+        if checking_audio_fs is not None:
+            self.audio_fs = checking_audio_fs
+
         if recog_type is None or audio_format is None:
             self.recog_type, self.audio_format, self.audio_in = asr_utils.type_checking(
                 audio_in=self.audio_in,
                 recog_type=recog_type,
                 audio_format=audio_format)
 
-        if hasattr(asr_utils, 'sample_rate_checking') and audio_fs is None:
-            self.audio_fs = asr_utils.sample_rate_checking(
+        if hasattr(asr_utils, 'sample_rate_checking'):
+            checking_audio_fs = asr_utils.sample_rate_checking(
                 self.audio_in, self.audio_format)
+            if checking_audio_fs is not None:
+                self.audio_fs = checking_audio_fs
 
         if self.preprocessor is None:
             self.preprocessor = WavToScp()
@@ -80,7 +86,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
 
         logger.info(f"Decoding with {inputs['audio_format']} files ...")
 
-        data_cmd: Sequence[Tuple[str, str]]
+        data_cmd: Sequence[Tuple[str, str, str]]
         if inputs['audio_format'] == 'wav' or inputs['audio_format'] == 'pcm':
             data_cmd = ['speech', 'sound']
         elif inputs['audio_format'] == 'kaldi_ark':
@@ -88,6 +94,9 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
         elif inputs['audio_format'] == 'tfrecord':
             data_cmd = ['speech', 'tfrecord']
 
+        if inputs.__contains__('mvn_file'):
+            data_cmd.append(inputs['mvn_file'])
+
         # generate asr inference command
         cmd = {
             'model_type': inputs['model_type'],
diff --git a/modelscope/pipelines/audio/kws_kwsbp_pipeline.py b/modelscope/pipelines/audio/kws_kwsbp_pipeline.py
index 5555c9e6..db6fc65d 100644
--- a/modelscope/pipelines/audio/kws_kwsbp_pipeline.py
+++ b/modelscope/pipelines/audio/kws_kwsbp_pipeline.py
@@ -51,10 +51,10 @@ class KeyWordSpottingKwsbpPipeline(Pipeline):
 
         if isinstance(audio_in, str):
             # load pcm data from url if audio_in is url str
-            audio_in = load_bytes_from_url(audio_in)
+            audio_in, audio_fs = load_bytes_from_url(audio_in)
         elif isinstance(audio_in, bytes):
             # load pcm data from wav data if audio_in is wave format
-            audio_in = extract_pcm_from_wav(audio_in)
+            audio_in, audio_fs = extract_pcm_from_wav(audio_in)
 
         output = self.preprocessor.forward(self.model.forward(), audio_in)
         output = self.forward(output)
diff --git a/modelscope/preprocessors/asr.py b/modelscope/preprocessors/asr.py
index facaa132..91bf5860 100644
--- a/modelscope/preprocessors/asr.py
+++ b/modelscope/preprocessors/asr.py
@@ -133,6 +133,12 @@ class WavToScp(Preprocessor):
             else:
                 inputs['asr_model_config'] = asr_model_config
 
+            if inputs['model_config'].__contains__('mvn_file'):
+                mvn_file = os.path.join(inputs['model_workspace'],
+                                        inputs['model_config']['mvn_file'])
+                assert os.path.exists(mvn_file), 'mvn_file does not exist'
+                inputs['mvn_file'] = mvn_file
+
         elif inputs['model_type'] == Frameworks.tf:
             assert inputs['model_config'].__contains__(
                 'vocab_file'), 'vocab_file does not exist'
diff --git a/modelscope/utils/audio/audio_utils.py b/modelscope/utils/audio/audio_utils.py
index 647d9521..32e2fa54 100644
--- a/modelscope/utils/audio/audio_utils.py
+++ b/modelscope/utils/audio/audio_utils.py
@@ -57,6 +57,7 @@ def update_conf(origin_config_file, new_config_file, conf_item: [str, str]):
 
 def extract_pcm_from_wav(wav: bytes) -> bytes:
     data = wav
+    sample_rate = None
     if len(data) > 44:
         frame_len = 44
         file_len = len(data)
@@ -70,29 +71,33 @@ def extract_pcm_from_wav(wav: bytes) -> bytes:
                         'Subchunk1ID'] == 'fmt ':
                 header_fields['SubChunk1Size'] = struct.unpack(
                     '<I', data[16:20])[0]
+                header_fields['SampleRate'] = struct.unpack('<I',
+                                                            data[24:28])[0]
+                sample_rate = header_fields['SampleRate']
 
                 if header_fields['SubChunk1Size'] == 16:
                     frame_len = 44
                 elif header_fields['SubChunk1Size'] == 18:
                     frame_len = 46
                 else:
-                    return data
+                    return data, sample_rate
 
                 data = wav[frame_len:file_len]
         except Exception:
             # no treatment
             pass
 
-    return data
+    return data, sample_rate
 
 
 def load_bytes_from_url(url: str) -> Union[bytes, str]:
+    sample_rate = None
     result = urlparse(url)
     if result.scheme is not None and len(result.scheme) > 0:
         storage = HTTPStorage()
         data = storage.read(url)
-        data = extract_pcm_from_wav(data)
+        data, sample_rate = extract_pcm_from_wav(data)
     else:
         data = url
 
-    return data
+    return data, sample_rate
diff --git a/tests/pipelines/test_automatic_speech_recognition.py b/tests/pipelines/test_automatic_speech_recognition.py
index 303fb6b9..c37a6a3f 100644
--- a/tests/pipelines/test_automatic_speech_recognition.py
+++ b/tests/pipelines/test_automatic_speech_recognition.py
@@ -45,6 +45,10 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase,
             'checking_item': OutputKeys.TEXT,
             'example': 'wav_example'
         },
+        'test_run_with_url_pytorch': {
+            'checking_item': OutputKeys.TEXT,
+            'example': 'wav_example'
+        },
         'test_run_with_url_tf': {
             'checking_item': OutputKeys.TEXT,
             'example': 'wav_example'
@@ -74,6 +78,170 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase,
         }
     }
 
+    all_models_info = [
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_paraformer_asr_nat-zh-cn-16k-common-vocab8358-tensorflow1',
+            'wav_path': 'data/test/audios/asr_example.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id': 'speech_paraformer_asr_nat-aishell1-pytorch',
+            'wav_path': 'data/test/audios/asr_example.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8358-tensorflow1',
+            'wav_path': 'data/test/audios/asr_example.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1',
+            'wav_path': 'data/test/audios/asr_example_8K.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-online',
+            'wav_path': 'data/test/audios/asr_example.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-offline',
+            'wav_path': 'data/test/audios/asr_example.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-zh-cn-8k-common-vocab8358-tensorflow1-online',
+            'wav_path': 'data/test/audios/asr_example_8K.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-zh-cn-8k-common-vocab8358-tensorflow1-offline',
+            'wav_path': 'data/test/audios/asr_example_8K.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR-large_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-offline',
+            'wav_path': 'data/test/audios/asr_example.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-cn-en-moe-16k-vocab8358-tensorflow1-online',
+            'wav_path': 'data/test/audios/asr_example_cn_en.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-cn-en-moe-16k-vocab8358-tensorflow1-offline',
+            'wav_path': 'data/test/audios/asr_example_cn_en.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-cn-dialect-16k-vocab8358-tensorflow1-online',
+            'wav_path': 'data/test/audios/asr_example_cn_dialect.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-cn-dialect-16k-vocab8358-tensorflow1-offline',
+            'wav_path': 'data/test/audios/asr_example_cn_dialect.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_paraformer_asr_nat-zh-cn-16k-common-vocab3444-tensorflow1-online',
+            'wav_path': 'data/test/audios/asr_example.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_paraformer_asr_nat-zh-cn-8k-common-vocab3444-tensorflow1-online',
+            'wav_path': 'data/test/audios/asr_example_8K.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-en-16k-common-vocab1080-tensorflow1-offline',
+            'wav_path': 'data/test/audios/asr_example_en.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-en-16k-common-vocab1080-tensorflow1-online',
+            'wav_path': 'data/test/audios/asr_example_en.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-ru-16k-common-vocab1664-tensorflow1-offline',
+            'wav_path': 'data/test/audios/asr_example_ru.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-ru-16k-common-vocab1664-tensorflow1-online',
+            'wav_path': 'data/test/audios/asr_example_ru.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-es-16k-common-vocab3445-tensorflow1-offline',
+            'wav_path': 'data/test/audios/asr_example_es.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-es-16k-common-vocab3445-tensorflow1-online',
+            'wav_path': 'data/test/audios/asr_example_es.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-ko-16k-common-vocab6400-tensorflow1-offline',
+            'wav_path': 'data/test/audios/asr_example_ko.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-ko-16k-common-vocab6400-tensorflow1-online',
+            'wav_path': 'data/test/audios/asr_example_ko.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-online',
+            'wav_path': 'data/test/audios/asr_example_ja.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-offline',
+            'wav_path': 'data/test/audios/asr_example_ja.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-id-16k-common-vocab1067-tensorflow1-online',
+            'wav_path': 'data/test/audios/asr_example_id.wav'
+        },
+        {
+            'model_group': 'damo',
+            'model_id':
+            'speech_UniASR_asr_2pass-id-16k-common-vocab1067-tensorflow1-offline',
+            'wav_path': 'data/test/audios/asr_example_id.wav'
+        },
+    ]
+
     def setUp(self) -> None:
         self.am_pytorch_model_id = 'damo/speech_paraformer_asr_nat-aishell1-pytorch'
         self.am_tf_model_id = 'damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8358-tensorflow1'
@@ -90,7 +258,7 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase,
     def run_pipeline(self,
                      model_id: str,
                      audio_in: Union[str, bytes],
-                     sr: int = 16000) -> Dict[str, Any]:
+                     sr: int = None) -> Dict[str, Any]:
         inference_16k_pipline = pipeline(
             task=Tasks.auto_speech_recognition, model=model_id)
 
@@ -136,46 +304,7 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase,
         return audio, fs
 
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_run_with_wav_pytorch(self):
-        """run with single waveform file
-        """
-
-        logger.info('Run ASR test with waveform file (pytorch)...')
-
-        wav_file_path = os.path.join(os.getcwd(), WAV_FILE)
-
-        rec_result = self.run_pipeline(
-            model_id=self.am_pytorch_model_id, audio_in=wav_file_path)
-        self.check_result('test_run_with_wav_pytorch', rec_result)
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_run_with_pcm_pytorch(self):
-        """run with wav data
-        """
-
-        logger.info('Run ASR test with wav data (pytorch)...')
-
-        audio, sr = self.wav2bytes(os.path.join(os.getcwd(), WAV_FILE))
-
-        rec_result = self.run_pipeline(
-            model_id=self.am_pytorch_model_id, audio_in=audio, sr=sr)
-        self.check_result('test_run_with_pcm_pytorch', rec_result)
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_run_with_wav_tf(self):
-        """run with single waveform file
-        """
-
-        logger.info('Run ASR test with waveform file (tensorflow)...')
-
-        wav_file_path = os.path.join(os.getcwd(), WAV_FILE)
-
-        rec_result = self.run_pipeline(
-            model_id=self.am_tf_model_id, audio_in=wav_file_path)
-        self.check_result('test_run_with_wav_tf', rec_result)
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_run_with_pcm_tf(self):
+    def test_run_with_pcm(self):
         """run with wav data
         """
 
@@ -187,8 +316,33 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase,
             model_id=self.am_tf_model_id, audio_in=audio, sr=sr)
         self.check_result('test_run_with_pcm_tf', rec_result)
 
+        logger.info('Run ASR test with wav data (pytorch)...')
+
+        rec_result = self.run_pipeline(
+            model_id=self.am_pytorch_model_id, audio_in=audio, sr=sr)
+        self.check_result('test_run_with_pcm_pytorch', rec_result)
+
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_run_with_url_tf(self):
+    def test_run_with_wav(self):
+        """run with single waveform file
+        """
+
+        logger.info('Run ASR test with waveform file (tensorflow)...')
+
+        wav_file_path = os.path.join(os.getcwd(), WAV_FILE)
+
+        rec_result = self.run_pipeline(
+            model_id=self.am_tf_model_id, audio_in=wav_file_path)
+        self.check_result('test_run_with_wav_tf', rec_result)
+
+        logger.info('Run ASR test with waveform file (pytorch)...')
+
+        rec_result = self.run_pipeline(
+            model_id=self.am_pytorch_model_id, audio_in=wav_file_path)
+        self.check_result('test_run_with_wav_pytorch', rec_result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_url(self):
         """run with single url file
         """
 
@@ -198,6 +352,12 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase,
             model_id=self.am_tf_model_id, audio_in=URL_FILE)
         self.check_result('test_run_with_url_tf', rec_result)
 
+        logger.info('Run ASR test with url file (pytorch)...')
+
+        rec_result = self.run_pipeline(
+            model_id=self.am_pytorch_model_id, audio_in=URL_FILE)
+        self.check_result('test_run_with_url_pytorch', rec_result)
+
     @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
     def test_run_with_wav_dataset_pytorch(self):
         """run with datasets, and audio format is waveform
@@ -217,7 +377,6 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase,
                  data.text  # hypothesis text
         """
 
-        logger.info('Run ASR test with waveform dataset (pytorch)...')
         logger.info('Downloading waveform testsets file ...')
 
         dataset_path = download_and_untar(
@@ -225,40 +384,38 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase,
             LITTLE_TESTSETS_URL, self.workspace)
         dataset_path = os.path.join(dataset_path, 'wav', 'test')
 
+        logger.info('Run ASR test with waveform dataset (tensorflow)...')
+
+        rec_result = self.run_pipeline(
+            model_id=self.am_tf_model_id, audio_in=dataset_path)
+        self.check_result('test_run_with_wav_dataset_tf', rec_result)
+
+        logger.info('Run ASR test with waveform dataset (pytorch)...')
+
         rec_result = self.run_pipeline(
             model_id=self.am_pytorch_model_id, audio_in=dataset_path)
         self.check_result('test_run_with_wav_dataset_pytorch', rec_result)
 
-    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
-    def test_run_with_wav_dataset_tf(self):
-        """run with datasets, and audio format is waveform
-           datasets directory:
-             <dataset_path>
-               wav
-                 test   # testsets
-                   xx.wav
-                   ...
-                 dev    # devsets
-                   yy.wav
-                   ...
-                 train  # trainsets
-                   zz.wav
-                   ...
-               transcript
-                 data.text  # hypothesis text
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_run_with_all_models(self):
+        """run with all models
         """
 
-        logger.info('Run ASR test with waveform dataset (tensorflow)...')
-        logger.info('Downloading waveform testsets file ...')
+        logger.info('Run ASR test with all models')
 
-        dataset_path = download_and_untar(
-            os.path.join(self.workspace, LITTLE_TESTSETS_FILE),
-            LITTLE_TESTSETS_URL, self.workspace)
-        dataset_path = os.path.join(dataset_path, 'wav', 'test')
-
-        rec_result = self.run_pipeline(
-            model_id=self.am_tf_model_id, audio_in=dataset_path)
-        self.check_result('test_run_with_wav_dataset_tf', rec_result)
+        for item in self.all_models_info:
+            model_id = item['model_group'] + '/' + item['model_id']
+            wav_path = item['wav_path']
+            rec_result = self.run_pipeline(
+                model_id=model_id, audio_in=wav_path)
+            if rec_result.__contains__(OutputKeys.TEXT):
+                logger.info(ColorCodes.MAGENTA + str(item['model_id']) + ' '
+                            + ColorCodes.YELLOW
+                            + str(rec_result[OutputKeys.TEXT])
+                            + ColorCodes.END)
+            else:
+                logger.info(ColorCodes.MAGENTA + str(rec_result)
+                            + ColorCodes.END)
 
     @unittest.skip('demo compatibility test is only enabled on a needed-basis')
     def test_demo_compatibility(self):

From acba1786b0ac263140e2743b51fc7fc7c36d0980 Mon Sep 17 00:00:00 2001
From: "yuze.zyz" <yuze.zyz@alibaba-inc.com>
Date: Thu, 20 Oct 2022 15:29:34 +0800
Subject: [PATCH 14/15] [to #42322933] Fix bug in UT daily

1. Fix bugs in daily test
2. Fix a bug that the updating of lr is before the first time of updating of optimizer
    TODO this will still cause warnings when GA is above 1
3. Remove the judgement of mode in text-classification's preprocessor to fit the base trainer(Bug)
     Update some regression bins to fit the preprocessor
4. Update the regression tool to let outer code modify atol and rtol
5. Add the default metric for text-classification task
6. Remove the useless ckpt conversion method in bert to avoid the requirement of tf when loading modeling_bert
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10430764
---
 data/test/regression/sbert-base-tnews.bin     |  3 +
 data/test/regression/sbert_nli.bin            |  4 +-
 data/test/regression/sbert_sen_sim.bin        |  4 +-
 ...rt_for_sequence_classification_exporter.py |  4 +-
 modelscope/metrics/builder.py                 |  1 +
 modelscope/models/nlp/bert/modeling_bert.py   | 78 ---------------
 modelscope/preprocessors/nlp/nlp_base.py      |  7 +-
 .../trainers/hooks/lr_scheduler_hook.py       |  2 +-
 modelscope/trainers/trainer.py                |  2 +-
 modelscope/utils/regress_test_utils.py        | 94 ++++++++++++-------
 tests/msdatasets/test_ms_dataset.py           |  3 +-
 .../test_finetune_sequence_classification.py  | 33 ++++++-
 tests/trainers/test_trainer_with_nlp.py       | 24 +++--
 13 files changed, 124 insertions(+), 135 deletions(-)
 create mode 100644 data/test/regression/sbert-base-tnews.bin

diff --git a/data/test/regression/sbert-base-tnews.bin b/data/test/regression/sbert-base-tnews.bin
new file mode 100644
index 00000000..1546860f
--- /dev/null
+++ b/data/test/regression/sbert-base-tnews.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bce1341f4b55d536771dad6e2b280458579f46c3216474ceb8a926022ab53d0
+size 151572
diff --git a/data/test/regression/sbert_nli.bin b/data/test/regression/sbert_nli.bin
index a5f680bb..68efb778 100644
--- a/data/test/regression/sbert_nli.bin
+++ b/data/test/regression/sbert_nli.bin
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:44e3925c15d86d8596baeb6bd1d153d86f57b7489798b2cf988a1248e110fd62
-size 62231
+oid sha256:6af5024a26337a440c7ea2935fce84af558dd982ee97a2f027bb922cc874292b
+size 61741
diff --git a/data/test/regression/sbert_sen_sim.bin b/data/test/regression/sbert_sen_sim.bin
index a59cbe0b..362f762c 100644
--- a/data/test/regression/sbert_sen_sim.bin
+++ b/data/test/regression/sbert_sen_sim.bin
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ff17a0272752de4c88d4254b2e881f97f8ef022f03609d03ee1de0ae964368a
-size 62235
+oid sha256:bbce084781342ca7274c2e4d02ed5c5de43ba213a3b76328d5994404d6544c41
+size 61745
diff --git a/modelscope/exporters/nlp/sbert_for_sequence_classification_exporter.py b/modelscope/exporters/nlp/sbert_for_sequence_classification_exporter.py
index dc1e2b92..52dab4bc 100644
--- a/modelscope/exporters/nlp/sbert_for_sequence_classification_exporter.py
+++ b/modelscope/exporters/nlp/sbert_for_sequence_classification_exporter.py
@@ -23,12 +23,14 @@ class SbertForSequenceClassificationExporter(TorchModelExporter):
 
     def generate_dummy_inputs(self,
                               shape: Tuple = None,
+                              pair: bool = False,
                               **kwargs) -> Dict[str, Any]:
         """Generate dummy inputs for model exportation to onnx or other formats by tracing.
 
         @param shape: A tuple of input shape which should have at most two dimensions.
             shape = (1, ) batch_size=1, sequence_length will be taken from the preprocessor.
             shape = (8, 128) batch_size=1, sequence_length=128, which will cover the config of the preprocessor.
+        @param pair: Generate sentence pairs or single sentences for dummy inputs.
         @return: Dummy inputs.
         """
 
@@ -55,7 +57,7 @@ class SbertForSequenceClassificationExporter(TorchModelExporter):
             **sequence_length
         })
         preprocessor: Preprocessor = build_preprocessor(cfg, field_name)
-        if preprocessor.pair:
+        if pair:
             first_sequence = preprocessor.tokenizer.unk_token
             second_sequence = preprocessor.tokenizer.unk_token
         else:
diff --git a/modelscope/metrics/builder.py b/modelscope/metrics/builder.py
index ee4d2840..1c8e16d7 100644
--- a/modelscope/metrics/builder.py
+++ b/modelscope/metrics/builder.py
@@ -32,6 +32,7 @@ task_default_metrics = {
     Tasks.sentiment_classification: [Metrics.seq_cls_metric],
     Tasks.token_classification: [Metrics.token_cls_metric],
     Tasks.text_generation: [Metrics.text_gen_metric],
+    Tasks.text_classification: [Metrics.seq_cls_metric],
     Tasks.image_denoising: [Metrics.image_denoise_metric],
     Tasks.image_color_enhancement: [Metrics.image_color_enhance_metric],
     Tasks.image_portrait_enhancement:
diff --git a/modelscope/models/nlp/bert/modeling_bert.py b/modelscope/models/nlp/bert/modeling_bert.py
index e91a6433..7c1dfcf5 100755
--- a/modelscope/models/nlp/bert/modeling_bert.py
+++ b/modelscope/models/nlp/bert/modeling_bert.py
@@ -15,7 +15,6 @@
 """PyTorch BERT model. """
 
 import math
-import os
 import warnings
 from dataclasses import dataclass
 from typing import Optional, Tuple
@@ -41,7 +40,6 @@ from transformers.modeling_utils import (PreTrainedModel,
                                          find_pruneable_heads_and_indices,
                                          prune_linear_layer)
 
-from modelscope.models.base import TorchModel
 from modelscope.utils.logger import get_logger
 from .configuration_bert import BertConfig
 
@@ -50,81 +48,6 @@ logger = get_logger(__name__)
 _CONFIG_FOR_DOC = 'BertConfig'
 
 
-def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
-    """Load tf checkpoints in a pytorch model."""
-    try:
-        import re
-
-        import numpy as np
-        import tensorflow as tf
-    except ImportError:
-        logger.error(
-            'Loading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see '
-            'https://www.tensorflow.org/install/ for installation instructions.'
-        )
-        raise
-    tf_path = os.path.abspath(tf_checkpoint_path)
-    logger.info(f'Converting TensorFlow checkpoint from {tf_path}')
-    # Load weights from TF model
-    init_vars = tf.train.list_variables(tf_path)
-    names = []
-    arrays = []
-    for name, shape in init_vars:
-        logger.info(f'Loading TF weight {name} with shape {shape}')
-        array = tf.train.load_variable(tf_path, name)
-        names.append(name)
-        arrays.append(array)
-
-    for name, array in zip(names, arrays):
-        name = name.split('/')
-        # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
-        # which are not required for using pretrained model
-        if any(n in [
-                'adam_v', 'adam_m', 'AdamWeightDecayOptimizer',
-                'AdamWeightDecayOptimizer_1', 'global_step'
-        ] for n in name):
-            logger.info(f"Skipping {'/'.join(name)}")
-            continue
-        pointer = model
-        for m_name in name:
-            if re.fullmatch(r'[A-Za-z]+_\d+', m_name):
-                scope_names = re.split(r'_(\d+)', m_name)
-            else:
-                scope_names = [m_name]
-            if scope_names[0] == 'kernel' or scope_names[0] == 'gamma':
-                pointer = getattr(pointer, 'weight')
-            elif scope_names[0] == 'output_bias' or scope_names[0] == 'beta':
-                pointer = getattr(pointer, 'bias')
-            elif scope_names[0] == 'output_weights':
-                pointer = getattr(pointer, 'weight')
-            elif scope_names[0] == 'squad':
-                pointer = getattr(pointer, 'classifier')
-            else:
-                try:
-                    pointer = getattr(pointer, scope_names[0])
-                except AttributeError:
-                    logger.info(f"Skipping {'/'.join(name)}")
-                    continue
-            if len(scope_names) >= 2:
-                num = int(scope_names[1])
-                pointer = pointer[num]
-        if m_name[-11:] == '_embeddings':
-            pointer = getattr(pointer, 'weight')
-        elif m_name == 'kernel':
-            array = np.transpose(array)
-        try:
-            if pointer.shape != array.shape:
-                raise ValueError(
-                    f'Pointer shape {pointer.shape} and array shape {array.shape} mismatched'
-                )
-        except AssertionError as e:
-            e.args += (pointer.shape, array.shape)
-            raise
-        logger.info(f'Initialize PyTorch weight {name}')
-        pointer.data = torch.from_numpy(array)
-    return model
-
-
 class BertEmbeddings(nn.Module):
     """Construct the embeddings from word, position and token_type embeddings."""
 
@@ -750,7 +673,6 @@ class BertPreTrainedModel(PreTrainedModel):
     """
 
     config_class = BertConfig
-    load_tf_weights = load_tf_weights_in_bert
     base_model_prefix = 'bert'
     supports_gradient_checkpointing = True
     _keys_to_ignore_on_load_missing = [r'position_ids']
diff --git a/modelscope/preprocessors/nlp/nlp_base.py b/modelscope/preprocessors/nlp/nlp_base.py
index 267dbb8c..bc96f569 100644
--- a/modelscope/preprocessors/nlp/nlp_base.py
+++ b/modelscope/preprocessors/nlp/nlp_base.py
@@ -2,7 +2,7 @@
 
 import os.path as osp
 import re
-from typing import Any, Dict, Iterable, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
 
 import numpy as np
 import sentencepiece as spm
@@ -217,7 +217,7 @@ class NLPTokenizerPreprocessorBase(Preprocessor):
             return isinstance(label, str) or isinstance(label, int)
 
         if labels is not None:
-            if isinstance(labels, Iterable) and all([label_can_be_mapped(label) for label in labels]) \
+            if isinstance(labels, (tuple, list)) and all([label_can_be_mapped(label) for label in labels]) \
                     and self.label2id is not None:
                 output[OutputKeys.LABELS] = [
                     self.label2id[str(label)] for label in labels
@@ -314,8 +314,7 @@ class SequenceClassificationPreprocessor(NLPTokenizerPreprocessorBase):
 
     def __init__(self, model_dir: str, mode=ModeKeys.INFERENCE, **kwargs):
         kwargs['truncation'] = kwargs.get('truncation', True)
-        kwargs['padding'] = kwargs.get(
-            'padding', False if mode == ModeKeys.INFERENCE else 'max_length')
+        kwargs['padding'] = kwargs.get('padding', 'max_length')
         kwargs['max_length'] = kwargs.pop('sequence_length', 128)
         super().__init__(model_dir, mode=mode, **kwargs)
 
diff --git a/modelscope/trainers/hooks/lr_scheduler_hook.py b/modelscope/trainers/hooks/lr_scheduler_hook.py
index ca0ec01b..32fb0250 100644
--- a/modelscope/trainers/hooks/lr_scheduler_hook.py
+++ b/modelscope/trainers/hooks/lr_scheduler_hook.py
@@ -47,7 +47,7 @@ class LrSchedulerHook(Hook):
         return lr
 
     def before_train_iter(self, trainer):
-        if not self.by_epoch:
+        if not self.by_epoch and trainer.iter > 0:
             if self.warmup_lr_scheduler is not None:
                 self.warmup_lr_scheduler.step()
             else:
diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py
index 9eaff762..61d11aa6 100644
--- a/modelscope/trainers/trainer.py
+++ b/modelscope/trainers/trainer.py
@@ -651,7 +651,7 @@ class EpochBasedTrainer(BaseTrainer):
         # TODO: support MsDataset load for cv
         if hasattr(data_cfg, 'name'):
             dataset = MsDataset.load(
-                dataset_name=data_cfg.name,
+                dataset_name=data_cfg.pop('name'),
                 **data_cfg,
             )
             cfg = ConfigDict(type=self.cfg.model.type, mode=mode)
diff --git a/modelscope/utils/regress_test_utils.py b/modelscope/utils/regress_test_utils.py
index 47bbadfe..3c1e5c1c 100644
--- a/modelscope/utils/regress_test_utils.py
+++ b/modelscope/utils/regress_test_utils.py
@@ -65,7 +65,8 @@ class RegressTool:
     def monitor_module_single_forward(self,
                                       module: nn.Module,
                                       file_name: str,
-                                      compare_fn=None):
+                                      compare_fn=None,
+                                      **kwargs):
         """Monitor a pytorch module in a single forward.
 
         @param module: A torch module
@@ -107,7 +108,7 @@ class RegressTool:
             baseline = os.path.join(tempfile.gettempdir(), name)
             self.load(baseline, name)
             with open(baseline, 'rb') as f:
-                baseline_json = pickle.load(f)
+                base = pickle.load(f)
 
             class NumpyEncoder(json.JSONEncoder):
                 """Special json encoder for numpy types
@@ -122,9 +123,9 @@ class RegressTool:
                         return obj.tolist()
                     return json.JSONEncoder.default(self, obj)
 
-            print(f'baseline: {json.dumps(baseline_json, cls=NumpyEncoder)}')
+            print(f'baseline: {json.dumps(base, cls=NumpyEncoder)}')
             print(f'latest  : {json.dumps(io_json, cls=NumpyEncoder)}')
-            if not compare_io_and_print(baseline_json, io_json, compare_fn):
+            if not compare_io_and_print(base, io_json, compare_fn, **kwargs):
                 raise ValueError('Result not match!')
 
     @contextlib.contextmanager
@@ -136,7 +137,8 @@ class RegressTool:
                              ignore_keys=None,
                              compare_random=True,
                              reset_dropout=True,
-                             lazy_stop_callback=None):
+                             lazy_stop_callback=None,
+                             **kwargs):
         """Monitor a pytorch module's backward data and cfg data within a step of the optimizer.
 
         This is usually useful when you try to change some dangerous code
@@ -265,14 +267,15 @@ class RegressTool:
                 baseline_json = pickle.load(f)
 
             if level == 'strict' and not compare_io_and_print(
-                    baseline_json['forward'], io_json, compare_fn):
+                    baseline_json['forward'], io_json, compare_fn, **kwargs):
                 raise RuntimeError('Forward not match!')
             if not compare_backward_and_print(
                     baseline_json['backward'],
                     bw_json,
                     compare_fn=compare_fn,
                     ignore_keys=ignore_keys,
-                    level=level):
+                    level=level,
+                    **kwargs):
                 raise RuntimeError('Backward not match!')
             cfg_opt1 = {
                 'optimizer': baseline_json['optimizer'],
@@ -286,7 +289,8 @@ class RegressTool:
                 'cfg': summary['cfg'],
                 'state': None if not compare_random else summary['state']
             }
-            if not compare_cfg_and_optimizers(cfg_opt1, cfg_opt2, compare_fn):
+            if not compare_cfg_and_optimizers(cfg_opt1, cfg_opt2, compare_fn,
+                                              **kwargs):
                 raise RuntimeError('Cfg or optimizers not match!')
 
 
@@ -303,7 +307,8 @@ class MsRegressTool(RegressTool):
                          compare_fn=None,
                          ignore_keys=None,
                          compare_random=True,
-                         lazy_stop_callback=None):
+                         lazy_stop_callback=None,
+                         **kwargs):
 
         if lazy_stop_callback is None:
 
@@ -319,7 +324,7 @@ class MsRegressTool(RegressTool):
 
                 trainer.register_hook(EarlyStopHook())
 
-        def _train_loop(trainer, *args, **kwargs):
+        def _train_loop(trainer, *args_train, **kwargs_train):
             with self.monitor_module_train(
                     trainer,
                     file_name,
@@ -327,9 +332,11 @@ class MsRegressTool(RegressTool):
                     compare_fn=compare_fn,
                     ignore_keys=ignore_keys,
                     compare_random=compare_random,
-                    lazy_stop_callback=lazy_stop_callback):
+                    lazy_stop_callback=lazy_stop_callback,
+                    **kwargs):
                 try:
-                    return trainer.train_loop_origin(*args, **kwargs)
+                    return trainer.train_loop_origin(*args_train,
+                                                     **kwargs_train)
                 except MsRegressTool.EarlyStopError:
                     pass
 
@@ -530,7 +537,8 @@ def compare_arguments_nested(print_content,
                 )
             return False
         if not all([
-                compare_arguments_nested(None, sub_arg1, sub_arg2)
+                compare_arguments_nested(
+                    None, sub_arg1, sub_arg2, rtol=rtol, atol=atol)
                 for sub_arg1, sub_arg2 in zip(arg1, arg2)
         ]):
             if print_content is not None:
@@ -551,7 +559,8 @@ def compare_arguments_nested(print_content,
                 print(f'{print_content}, key diff:{set(keys1) - set(keys2)}')
             return False
         if not all([
-                compare_arguments_nested(None, arg1[key], arg2[key])
+                compare_arguments_nested(
+                    None, arg1[key], arg2[key], rtol=rtol, atol=atol)
                 for key in keys1
         ]):
             if print_content is not None:
@@ -574,7 +583,7 @@ def compare_arguments_nested(print_content,
         raise ValueError(f'type not supported: {type1}')
 
 
-def compare_io_and_print(baseline_json, io_json, compare_fn=None):
+def compare_io_and_print(baseline_json, io_json, compare_fn=None, **kwargs):
     if compare_fn is None:
 
         def compare_fn(*args, **kwargs):
@@ -602,10 +611,10 @@ def compare_io_and_print(baseline_json, io_json, compare_fn=None):
         else:
             match = compare_arguments_nested(
                 f'unmatched module {key} input args', v1input['args'],
-                v2input['args']) and match
+                v2input['args'], **kwargs) and match
             match = compare_arguments_nested(
                 f'unmatched module {key} input kwargs', v1input['kwargs'],
-                v2input['kwargs']) and match
+                v2input['kwargs'], **kwargs) and match
         v1output = numpify_tensor_nested(v1['output'])
         v2output = numpify_tensor_nested(v2['output'])
         res = compare_fn(v1output, v2output, key, 'output')
@@ -615,8 +624,11 @@ def compare_io_and_print(baseline_json, io_json, compare_fn=None):
             )
             match = match and res
         else:
-            match = compare_arguments_nested(f'unmatched module {key} outputs',
-                                             v1output, v2output) and match
+            match = compare_arguments_nested(
+                f'unmatched module {key} outputs',
+                arg1=v1output,
+                arg2=v2output,
+                **kwargs) and match
     return match
 
 
@@ -624,7 +636,8 @@ def compare_backward_and_print(baseline_json,
                                bw_json,
                                level,
                                ignore_keys=None,
-                               compare_fn=None):
+                               compare_fn=None,
+                               **kwargs):
     if compare_fn is None:
 
         def compare_fn(*args, **kwargs):
@@ -653,18 +666,26 @@ def compare_backward_and_print(baseline_json,
             data2, grad2, data_after2 = bw_json[key]['data'], bw_json[key][
                 'grad'], bw_json[key]['data_after']
             match = compare_arguments_nested(
-                f'unmatched module {key} tensor data', data1, data2) and match
+                f'unmatched module {key} tensor data',
+                arg1=data1,
+                arg2=data2,
+                **kwargs) and match
             if level == 'strict':
                 match = compare_arguments_nested(
-                    f'unmatched module {key} grad data', grad1,
-                    grad2) and match
+                    f'unmatched module {key} grad data',
+                    arg1=grad1,
+                    arg2=grad2,
+                    **kwargs) and match
                 match = compare_arguments_nested(
                     f'unmatched module {key} data after step', data_after1,
-                    data_after2) and match
+                    data_after2, **kwargs) and match
     return match
 
 
-def compare_cfg_and_optimizers(baseline_json, cfg_json, compare_fn=None):
+def compare_cfg_and_optimizers(baseline_json,
+                               cfg_json,
+                               compare_fn=None,
+                               **kwargs):
     if compare_fn is None:
 
         def compare_fn(*args, **kwargs):
@@ -686,12 +707,12 @@ def compare_cfg_and_optimizers(baseline_json, cfg_json, compare_fn=None):
             print(
                 f"Optimizer type not equal:{optimizer1['type']} and {optimizer2['type']}"
             )
-        match = compare_arguments_nested('unmatched optimizer defaults',
-                                         optimizer1['defaults'],
-                                         optimizer2['defaults']) and match
-        match = compare_arguments_nested('unmatched optimizer state_dict',
-                                         optimizer1['state_dict'],
-                                         optimizer2['state_dict']) and match
+        match = compare_arguments_nested(
+            'unmatched optimizer defaults', optimizer1['defaults'],
+            optimizer2['defaults'], **kwargs) and match
+        match = compare_arguments_nested(
+            'unmatched optimizer state_dict', optimizer1['state_dict'],
+            optimizer2['state_dict'], **kwargs) and match
 
     res = compare_fn(lr_scheduler1, lr_scheduler2, None, 'lr_scheduler')
     if res is not None:
@@ -703,16 +724,17 @@ def compare_cfg_and_optimizers(baseline_json, cfg_json, compare_fn=None):
             print(
                 f"Optimizer type not equal:{lr_scheduler1['type']} and {lr_scheduler2['type']}"
             )
-        match = compare_arguments_nested('unmatched lr_scheduler state_dict',
-                                         lr_scheduler1['state_dict'],
-                                         lr_scheduler2['state_dict']) and match
+        match = compare_arguments_nested(
+            'unmatched lr_scheduler state_dict', lr_scheduler1['state_dict'],
+            lr_scheduler2['state_dict'], **kwargs) and match
 
     res = compare_fn(cfg1, cfg2, None, 'cfg')
     if res is not None:
         print(f'cfg compared with user compare_fn with result:{res}\n')
         match = match and res
     else:
-        match = compare_arguments_nested('unmatched cfg', cfg1, cfg2) and match
+        match = compare_arguments_nested(
+            'unmatched cfg', arg1=cfg1, arg2=cfg2, **kwargs) and match
 
     res = compare_fn(state1, state2, None, 'state')
     if res is not None:
@@ -721,6 +743,6 @@ def compare_cfg_and_optimizers(baseline_json, cfg_json, compare_fn=None):
         match = match and res
     else:
         match = compare_arguments_nested('unmatched random state', state1,
-                                         state2) and match
+                                         state2, **kwargs) and match
 
     return match
diff --git a/tests/msdatasets/test_ms_dataset.py b/tests/msdatasets/test_ms_dataset.py
index 91a3b5c5..1e537e93 100644
--- a/tests/msdatasets/test_ms_dataset.py
+++ b/tests/msdatasets/test_ms_dataset.py
@@ -52,7 +52,8 @@ class MsDatasetTest(unittest.TestCase):
     @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
     def test_ms_csv_basic(self):
         ms_ds_train = MsDataset.load(
-            'afqmc_small', namespace='userxiaoming', split='train')
+            'clue', subset_name='afqmc',
+            split='train').to_hf_dataset().select(range(5))
         print(next(iter(ms_ds_train)))
 
     @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
diff --git a/tests/trainers/test_finetune_sequence_classification.py b/tests/trainers/test_finetune_sequence_classification.py
index f2adfa22..27db1f18 100644
--- a/tests/trainers/test_finetune_sequence_classification.py
+++ b/tests/trainers/test_finetune_sequence_classification.py
@@ -16,7 +16,8 @@ from modelscope.trainers.optimizer.child_tuning_adamw_optimizer import \
     calculate_fisher
 from modelscope.utils.constant import ModelFile, Tasks
 from modelscope.utils.data_utils import to_device
-from modelscope.utils.regress_test_utils import MsRegressTool
+from modelscope.utils.regress_test_utils import (MsRegressTool,
+                                                 compare_arguments_nested)
 from modelscope.utils.test_utils import test_level
 
 
@@ -41,6 +42,33 @@ class TestFinetuneSequenceClassification(unittest.TestCase):
     def test_trainer_repeatable(self):
         import torch  # noqa
 
+        def compare_fn(value1, value2, key, type):
+            # Ignore the differences between optimizers of two torch versions
+            if type != 'optimizer':
+                return None
+
+            match = (value1['type'] == value2['type'])
+            shared_defaults = set(value1['defaults'].keys()).intersection(
+                set(value2['defaults'].keys()))
+            match = all([
+                compare_arguments_nested(f'Optimizer defaults {key} not match',
+                                         value1['defaults'][key],
+                                         value2['defaults'][key])
+                for key in shared_defaults
+            ]) and match
+            match = (len(value1['state_dict']['param_groups']) == len(
+                value2['state_dict']['param_groups'])) and match
+            for group1, group2 in zip(value1['state_dict']['param_groups'],
+                                      value2['state_dict']['param_groups']):
+                shared_keys = set(group1.keys()).intersection(
+                    set(group2.keys()))
+                match = all([
+                    compare_arguments_nested(
+                        f'Optimizer param_groups {key} not match', group1[key],
+                        group2[key]) for key in shared_keys
+                ]) and match
+            return match
+
         def cfg_modify_fn(cfg):
             cfg.task = 'nli'
             cfg['preprocessor'] = {'type': 'nli-tokenizer'}
@@ -98,7 +126,8 @@ class TestFinetuneSequenceClassification(unittest.TestCase):
             name=Trainers.nlp_base_trainer, default_args=kwargs)
 
         with self.regress_tool.monitor_ms_train(
-                trainer, 'sbert-base-tnews', level='strict'):
+                trainer, 'sbert-base-tnews', level='strict',
+                compare_fn=compare_fn):
             trainer.train()
 
     def finetune(self,
diff --git a/tests/trainers/test_trainer_with_nlp.py b/tests/trainers/test_trainer_with_nlp.py
index 6030ada9..8357e778 100644
--- a/tests/trainers/test_trainer_with_nlp.py
+++ b/tests/trainers/test_trainer_with_nlp.py
@@ -29,7 +29,8 @@ class TestTrainerWithNlp(unittest.TestCase):
             os.makedirs(self.tmp_dir)
 
         self.dataset = MsDataset.load(
-            'afqmc_small', namespace='userxiaoming', split='train')
+            'clue', subset_name='afqmc',
+            split='train').to_hf_dataset().select(range(2))
 
     def tearDown(self):
         shutil.rmtree(self.tmp_dir)
@@ -73,7 +74,7 @@ class TestTrainerWithNlp(unittest.TestCase):
         output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR)
         pipeline_sentence_similarity(output_dir)
 
-    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 3, 'skip test in current test level')
     def test_trainer_with_backbone_head(self):
         model_id = 'damo/nlp_structbert_sentiment-classification_chinese-base'
         kwargs = dict(
@@ -99,6 +100,8 @@ class TestTrainerWithNlp(unittest.TestCase):
         model_id = 'damo/nlp_structbert_sentiment-classification_chinese-base'
         cfg = read_config(model_id, revision='beta')
         cfg.train.max_epochs = 20
+        cfg.preprocessor.train['label2id'] = {'0': 0, '1': 1}
+        cfg.preprocessor.val['label2id'] = {'0': 0, '1': 1}
         cfg.train.work_dir = self.tmp_dir
         cfg_file = os.path.join(self.tmp_dir, 'config.json')
         cfg.dump(cfg_file)
@@ -120,22 +123,24 @@ class TestTrainerWithNlp(unittest.TestCase):
             checkpoint_path=os.path.join(self.tmp_dir, 'epoch_10.pth'))
         self.assertTrue(Metrics.accuracy in eval_results)
 
-    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     def test_trainer_with_configured_datasets(self):
         model_id = 'damo/nlp_structbert_sentence-similarity_chinese-base'
         cfg: Config = read_config(model_id)
         cfg.train.max_epochs = 20
+        cfg.preprocessor.train['label2id'] = {'0': 0, '1': 1}
+        cfg.preprocessor.val['label2id'] = {'0': 0, '1': 1}
         cfg.train.work_dir = self.tmp_dir
         cfg.dataset = {
             'train': {
-                'name': 'afqmc_small',
+                'name': 'clue',
+                'subset_name': 'afqmc',
                 'split': 'train',
-                'namespace': 'userxiaoming'
             },
             'val': {
-                'name': 'afqmc_small',
+                'name': 'clue',
+                'subset_name': 'afqmc',
                 'split': 'train',
-                'namespace': 'userxiaoming'
             },
         }
         cfg_file = os.path.join(self.tmp_dir, 'config.json')
@@ -159,6 +164,11 @@ class TestTrainerWithNlp(unittest.TestCase):
         model_id = 'damo/nlp_structbert_sentence-similarity_chinese-base'
         cfg: Config = read_config(model_id)
         cfg.train.max_epochs = 3
+        cfg.preprocessor.first_sequence = 'sentence1'
+        cfg.preprocessor.second_sequence = 'sentence2'
+        cfg.preprocessor.label = 'label'
+        cfg.preprocessor.train['label2id'] = {'0': 0, '1': 1}
+        cfg.preprocessor.val['label2id'] = {'0': 0, '1': 1}
         cfg.train.work_dir = self.tmp_dir
         cfg_file = os.path.join(self.tmp_dir, 'config.json')
         cfg.dump(cfg_file)

From 8ec90ccbf8926235ee3176d4fd6b7f10dbdf09ad Mon Sep 17 00:00:00 2001
From: "xiangpeng.wxp" <xiangpeng.wxp@alibaba-inc.com>
Date: Thu, 20 Oct 2022 17:35:27 +0800
Subject: [PATCH 15/15] [to #42322933] Add uttest for en2fr and fr2en tasks    
     Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10467797

    * add uttest for en2fr and fr2en tasks
---
 tests/pipelines/test_csanmt_translation.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/pipelines/test_csanmt_translation.py b/tests/pipelines/test_csanmt_translation.py
index f7ec81cd..83827813 100644
--- a/tests/pipelines/test_csanmt_translation.py
+++ b/tests/pipelines/test_csanmt_translation.py
@@ -26,6 +26,20 @@ class TranslationTest(unittest.TestCase, DemoCompatibilityCheck):
         pipeline_ins = pipeline(self.task, model=model_id)
         print(pipeline_ins(input=inputs))
 
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_model_name_for_en2fr(self):
+        model_id = 'damo/nlp_csanmt_translation_en2fr'
+        inputs = 'When I was in my 20s, I saw my very first psychotherapy client.'
+        pipeline_ins = pipeline(self.task, model=model_id)
+        print(pipeline_ins(input=inputs))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_model_name_for_fr2en(self):
+        model_id = 'damo/nlp_csanmt_translation_fr2en'
+        inputs = "Quand j'avais la vingtaine, j'ai vu mes tout premiers clients comme psychothérapeute."
+        pipeline_ins = pipeline(self.task, model=model_id)
+        print(pipeline_ins(input=inputs))
+
     @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     def test_run_with_default_model(self):
         inputs = '声明补充说，沃伦的同事都深感震惊，并且希望他能够投案自首。'