diff --git a/README.md b/README.md
index 68f03744..c9b071ab 100644
--- a/README.md
+++ b/README.md
@@ -108,9 +108,9 @@ Audio:
 * [speech_charctc_kws_phone-xiaoyun](https://modelscope.cn/models/damo/speech_charctc_kws_phone-xiaoyun)
 
 * [u2pp_conformer-asr-cn-16k-online](https://modelscope.cn/models/wenet/u2pp_conformer-asr-cn-16k-online)
- 
+
 * [speech_fsmn_vad_zh-cn-16k-common-pytorch](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary)
- 
+
 * [punc_ct-transformer_zh-cn-common-vocab272727-pytorch](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/summary)
 
 * [speech_frcrn_ans_cirm_16k](https://modelscope.cn/models/damo/speech_frcrn_ans_cirm_16k)
diff --git a/examples/pytorch/image_classification/finetune_image_classification.py b/examples/pytorch/image_classification/finetune_image_classification.py
index 4e96c2cd..e5bb9bdd 100644
--- a/examples/pytorch/image_classification/finetune_image_classification.py
+++ b/examples/pytorch/image_classification/finetune_image_classification.py
@@ -1,13 +1,12 @@
 import os
 from dataclasses import dataclass, field
 
+from modelscope import MsDataset, TrainingArgs
 from modelscope.metainfo import Trainers
-from modelscope.msdatasets.ms_dataset import MsDataset
 from modelscope.trainers.builder import build_trainer
-from modelscope.trainers.training_args import TrainingArgs
 
 
-@dataclass
+@dataclass(init=False)
 class ImageClassificationTrainingArgs(TrainingArgs):
     num_classes: int = field(
         default=None,
@@ -46,26 +45,35 @@ def create_dataset(name, split):
         dataset_name, namespace=namespace, subset_name='default', split=split)
 
 
-def train():
-    args = ImageClassificationTrainingArgs.from_cli(
-        model='damo/cv_vit-base_image-classification_ImageNet-labels',
-        max_epochs=1,
-        lr=1e-4,
-        optimizer='AdamW',
-        warmup_iters=1,
-        topk=(1, ))
-    if args.dataset_name is not None:
-        train_dataset = create_dataset(args.dataset_name, split='train')
-        val_dataset = create_dataset(args.dataset_name, split='validation')
+training_args = ImageClassificationTrainingArgs(
+    model='damo/cv_vit-base_image-classification_ImageNet-labels',
+    max_epochs=1,
+    lr=1e-4,
+    optimizer='AdamW',
+    warmup_iters=1,
+    topk=(1, )).parse_cli()
+config, args = training_args.to_config()
+
+
+def cfg_modify_fn(cfg):
+    if args.use_model_config:
+        cfg.merge_from_dict(config)
     else:
-        train_dataset = create_dataset(args.train_dataset_name, split='train')
-        val_dataset = create_dataset(args.val_dataset_name, split='validation')
+        cfg = config
+    return cfg
+
+
+def train():
+    train_dataset = create_dataset(
+        training_args.train_dataset_name, split=training_args.train_split)
+    val_dataset = create_dataset(
+        training_args.val_dataset_name, split=training_args.val_split)
 
     kwargs = dict(
         model=args.model,  # model id
         train_dataset=train_dataset,  # training dataset
         eval_dataset=val_dataset,  # validation dataset
-        cfg_modify_fn=args  # callback to modify configuration
+        cfg_modify_fn=cfg_modify_fn  # callback to modify configuration
     )
 
     # in distributed training, specify pytorch launcher
diff --git a/examples/pytorch/image_classification/run_train.sh b/examples/pytorch/image_classification/run_train.sh
index 5a7b3a09..ad560424 100644
--- a/examples/pytorch/image_classification/run_train.sh
+++ b/examples/pytorch/image_classification/run_train.sh
@@ -2,4 +2,7 @@ PYTHONPATH=. python -m torch.distributed.launch --nproc_per_node=2 \
     examples/pytorch/image_classification/finetune_image_classification.py \
     --num_classes 2 \
     --train_dataset_name 'tany0699/cats_and_dogs' \
-    --val_dataset_name 'tany0699/cats_and_dogs'
+    --val_dataset_name 'tany0699/cats_and_dogs' \
+    --train_split train \
+    --val_split validation \
+    --use_model_config true \
diff --git a/examples/pytorch/multi_modal_embedding/finetune_multi_modal_embedding.py b/examples/pytorch/multi_modal_embedding/finetune_multi_modal_embedding.py
index cc7da842..7b4cfbb8 100644
--- a/examples/pytorch/multi_modal_embedding/finetune_multi_modal_embedding.py
+++ b/examples/pytorch/multi_modal_embedding/finetune_multi_modal_embedding.py
@@ -1,15 +1,13 @@
 import os
 from dataclasses import dataclass, field
-from functools import partial
 
+from modelscope import MsDataset, TrainingArgs
 from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
 from modelscope.trainers import build_trainer
-from modelscope.trainers.training_args import (TrainingArgs, get_flatten_value,
-                                               set_flatten_value)
+from modelscope.trainers.training_args import set_flatten_value
 
 
-@dataclass
+@dataclass(init=False)
 class MultiModalEmbeddingArguments(TrainingArgs):
 
     trainer: str = field(
@@ -17,6 +15,12 @@ class MultiModalEmbeddingArguments(TrainingArgs):
             'help': 'The trainer used',
         })
 
+    work_dir: str = field(
+        default='./tmp',
+        metadata={
+            'help': 'The working path for saving checkpoint',
+        })
+
     use_fp16: bool = field(
         default=None,
         metadata={
@@ -35,7 +39,6 @@ class MultiModalEmbeddingArguments(TrainingArgs):
         default=None,
         metadata={
             'cfg_node': 'train.optimizer_hparams',
-            'cfg_getter': partial(get_flatten_value, exclusions=['lr']),
             'cfg_setter': set_flatten_value,
             'help': 'The optimizer init params except `lr`',
         })
@@ -51,7 +54,6 @@ class MultiModalEmbeddingArguments(TrainingArgs):
         default=None,
         metadata={
             'cfg_node': 'dataset.column_map',
-            'cfg_getter': get_flatten_value,
             'cfg_setter': set_flatten_value,
             'help': 'The column map for dataset',
         })
@@ -67,7 +69,6 @@ class MultiModalEmbeddingArguments(TrainingArgs):
         default=None,
         metadata={
             'cfg_node': 'train.lr_scheduler_hook',
-            'cfg_getter': get_flatten_value,
             'cfg_setter': set_flatten_value,
             'help': 'The parameters for lr scheduler hook',
         })
@@ -76,7 +77,6 @@ class MultiModalEmbeddingArguments(TrainingArgs):
         default=None,
         metadata={
             'cfg_node': 'train.optimizer_hook',
-            'cfg_getter': get_flatten_value,
             'cfg_setter': set_flatten_value,
             'help': 'The parameters for optimizer hook',
         })
@@ -92,23 +92,28 @@ class MultiModalEmbeddingArguments(TrainingArgs):
             'help': 'The data parallel world size',
         })
 
-    def __call__(self, config):
-        config = super().__call__(config)
-        config.merge_from_dict({'pretrained_model.model_name': self.model})
-        if self.clip_clamp:
-            config.train.hooks.append({'type': 'ClipClampLogitScaleHook'})
-        if self.world_size > 1:
-            config.train.launcher = 'pytorch'
-        return config
+
+config, args = MultiModalEmbeddingArguments().parse_cli().to_config()
+print(config, args)
 
 
-args = MultiModalEmbeddingArguments.from_cli(task='multi-modal-embedding')
-print(args)
+def cfg_modify_fn(cfg):
+    if args.use_model_config:
+        cfg.merge_from_dict(config)
+    else:
+        cfg = config
+    cfg.merge_from_dict({'pretrained_model.model_name': args.model})
+    if args.clip_clamp:
+        cfg.train.hooks.append({'type': 'ClipClampLogitScaleHook'})
+    if args.world_size > 1:
+        cfg.train.launcher = 'pytorch'
+    return cfg
+
 
 train_dataset = MsDataset.load(
-    args.dataset_name, namespace='modelscope', split='train')
+    args.train_dataset_name, namespace='modelscope', split='train')
 eval_dataset = MsDataset.load(
-    args.dataset_name, namespace='modelscope', split='validation')
+    args.train_dataset_name, namespace='modelscope', split='validation')
 
 os.makedirs(args.work_dir, exist_ok=True)
 kwargs = dict(
@@ -116,6 +121,6 @@ kwargs = dict(
     train_dataset=train_dataset,
     eval_dataset=eval_dataset,
     work_dir=args.work_dir,
-    cfg_modify_fn=args)
+    cfg_modify_fn=cfg_modify_fn)
 trainer = build_trainer(name=args.trainer, default_args=kwargs)
 trainer.train()
diff --git a/examples/pytorch/multi_modal_embedding/run_train.sh b/examples/pytorch/multi_modal_embedding/run_train.sh
index 89eef73e..3974405b 100644
--- a/examples/pytorch/multi_modal_embedding/run_train.sh
+++ b/examples/pytorch/multi_modal_embedding/run_train.sh
@@ -6,14 +6,16 @@ PYTHONPATH=. torchrun --nproc_per_node $DATA_PARALLEL_SIZE \
     --trainer 'clip-multi-modal-embedding' \
     --work_dir './workspace/ckpts/clip' \
     --model 'damo/multi-modal_clip-vit-base-patch16_zh' \
-    --dataset_name 'muge' \
+    --train_dataset_name 'muge' \
     --dataset_column_map 'img=image,text=query' \
     --max_epochs 1 \
     --use_fp16 true \
     --per_device_train_batch_size 180 \
+    --train_data_worker 0 \
     --train_shuffle true \
     --train_drop_last true \
     --per_device_eval_batch_size 128 \
+    --eval_data_worker 0 \
     --eval_shuffle true \
     --eval_drop_last true \
     --save_ckpt_best true \
@@ -33,3 +35,4 @@ PYTHONPATH=. torchrun --nproc_per_node $DATA_PARALLEL_SIZE \
     --optimizer_hook 'type=TorchAMPOptimizerHook,cumulative_iters=1,loss_keys=loss' \
     --clip_clamp true \
     --world_size $DATA_PARALLEL_SIZE \
+    --use_model_config true \
diff --git a/examples/pytorch/stable_diffusion/finetune_stable_diffusion.py b/examples/pytorch/stable_diffusion/finetune_stable_diffusion.py
index bd05097d..28ba853c 100644
--- a/examples/pytorch/stable_diffusion/finetune_stable_diffusion.py
+++ b/examples/pytorch/stable_diffusion/finetune_stable_diffusion.py
@@ -4,30 +4,32 @@ from modelscope.msdatasets import MsDataset
 from modelscope.trainers import EpochBasedTrainer, build_trainer
 from modelscope.trainers.training_args import TrainingArgs
 
-
-@dataclass
-class StableDiffusionArguments(TrainingArgs):
-
-    def __call__(self, config):
-        config = super().__call__(config)
-        config.train.lr_scheduler.T_max = self.max_epochs
-        config.model.inference = False
-        return config
-
-
-args = StableDiffusionArguments.from_cli(task='efficient-diffusion-tuning')
+training_args = TrainingArgs(task='efficient-diffusion-tuning').parse_cli()
+config, args = training_args.to_config()
 print(args)
 
-dataset = MsDataset.load(args.dataset_name, namespace=args.namespace)
+dataset = MsDataset.load(
+    args.train_dataset_name, namespace=args.train_dataset_namespace)
 train_dataset = dataset['train']
 validation_dataset = dataset['validation']
 
+
+def cfg_modify_fn(cfg):
+    if args.use_model_config:
+        cfg.merge_from_dict(config)
+    else:
+        cfg = config
+    cfg.train.lr_scheduler.T_max = training_args.max_epochs
+    cfg.model.inference = False
+    return cfg
+
+
 kwargs = dict(
-    model=args.model,
-    work_dir=args.work_dir,
+    model=training_args.model,
+    work_dir=training_args.work_dir,
     train_dataset=train_dataset,
     eval_dataset=validation_dataset,
-    cfg_modify_fn=args)
+    cfg_modify_fn=cfg_modify_fn)
 
 trainer: EpochBasedTrainer = build_trainer(name='trainer', default_args=kwargs)
 trainer.train()
diff --git a/examples/pytorch/stable_diffusion/run_train.sh b/examples/pytorch/stable_diffusion/run_train.sh
index c8bfa26c..0e551942 100644
--- a/examples/pytorch/stable_diffusion/run_train.sh
+++ b/examples/pytorch/stable_diffusion/run_train.sh
@@ -1,11 +1,12 @@
 PYTHONPATH=. torchrun examples/pytorch/stable_diffusion/finetune_stable_diffusion.py \
     --model 'damo/multi-modal_efficient-diffusion-tuning-lora' \
     --work_dir './tmp/stable_diffusion_tuning' \
-    --namespace 'damo' \
-    --dataset_name 'buptwq/lora-stable-diffusion-finetune-dog' \
-    --max_epochs 150 \
+    --train_dataset_namespace 'damo' \
+    --train_dataset_name 'controlnet_dataset_condition_fill50k' \
+    --max_epochs 1 \
     --save_ckpt_strategy 'by_epoch' \
     --logging_interval 100 \
     --train.dataloader.workers_per_gpu 0 \
     --evaluation.dataloader.workers_per_gpu 0 \
-    --train.optimizer.lr 1e-4
+    --train.optimizer.lr 1e-5 \
+    --use_model_config true
diff --git a/examples/pytorch/text_classification/finetune_text_classification.py b/examples/pytorch/text_classification/finetune_text_classification.py
index 7747bc25..dfcb7b4d 100644
--- a/examples/pytorch/text_classification/finetune_text_classification.py
+++ b/examples/pytorch/text_classification/finetune_text_classification.py
@@ -1,26 +1,18 @@
 import os
 from dataclasses import dataclass, field
 
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import EpochBasedTrainer, build_trainer
-from modelscope.trainers.training_args import TrainingArgs
+from modelscope import (EpochBasedTrainer, MsDataset, TrainingArgs,
+                        build_dataset_from_file)
+from modelscope.trainers import build_trainer
 
 
-def get_labels(cfg, metadata):
-    label2id = cfg.safe_get(metadata['cfg_node'])
-    if label2id is not None:
-        return ','.join(label2id.keys())
-
-
-def set_labels(cfg, labels, metadata):
+def set_labels(labels):
     if isinstance(labels, str):
         labels = labels.split(',')
-    cfg.merge_from_dict(
-        {metadata['cfg_node']: {label: id
-                                for id, label in enumerate(labels)}})
+    return {label: id for id, label in enumerate(labels)}
 
 
-@dataclass
+@dataclass(init=False)
 class TextClassificationArguments(TrainingArgs):
 
     first_sequence: str = field(
@@ -49,7 +41,6 @@ class TextClassificationArguments(TrainingArgs):
         metadata={
             'help': 'The labels of the dataset',
             'cfg_node': 'preprocessor.label2id',
-            'cfg_getter': get_labels,
             'cfg_setter': set_labels,
         })
 
@@ -60,30 +51,39 @@ class TextClassificationArguments(TrainingArgs):
             'cfg_node': 'preprocessor.type'
         })
 
-    def __call__(self, config):
-        config = super().__call__(config)
-        config.model['num_labels'] = len(self.labels)
-        if config.train.lr_scheduler.type == 'LinearLR':
-            config.train.lr_scheduler['total_iters'] = \
-                int(len(train_dataset) / self.per_device_train_batch_size) * self.max_epochs
-        return config
+
+config, args = TextClassificationArguments().parse_cli().to_config()
+
+print(config, args)
 
 
-args = TextClassificationArguments.from_cli(
-    task='text-classification', eval_metrics='seq-cls-metric')
+def cfg_modify_fn(cfg):
+    if args.use_model_config:
+        cfg.merge_from_dict(config)
+    else:
+        cfg = config
+    cfg.model['num_labels'] = len(cfg.preprocessor.label2id)
+    if cfg.train.lr_scheduler.type == 'LinearLR':
+        cfg.train.lr_scheduler['total_iters'] = \
+            int(len(train_dataset) / cfg.train.dataloader.batch_size_per_gpu) * cfg.train.max_epochs
+    return cfg
 
-print(args)
 
-dataset = MsDataset.load(args.dataset_name, subset_name=args.subset_name)
-train_dataset = dataset['train']
-validation_dataset = dataset['validation']
+if args.dataset_json_file is None:
+    dataset = MsDataset.load(
+        args.train_dataset_name, subset_name=args.train_subset_name)
+    train_dataset = dataset['train']
+    validation_dataset = dataset['validation']
+else:
+    train_dataset, validation_dataset = build_dataset_from_file(
+        args.dataset_json_file)
 
 kwargs = dict(
     model=args.model,
     train_dataset=train_dataset,
     eval_dataset=validation_dataset,
     seed=args.seed,
-    cfg_modify_fn=args)
+    cfg_modify_fn=cfg_modify_fn)
 
 os.environ['LOCAL_RANK'] = str(args.local_rank)
 trainer: EpochBasedTrainer = build_trainer(name='trainer', default_args=kwargs)
diff --git a/examples/pytorch/text_classification/run_train.sh b/examples/pytorch/text_classification/run_train.sh
index 93c23d0d..e91a9996 100644
--- a/examples/pytorch/text_classification/run_train.sh
+++ b/examples/pytorch/text_classification/run_train.sh
@@ -1,12 +1,16 @@
 PYTHONPATH=. python examples/pytorch/text_classification/finetune_text_classification.py \
+    --task 'text-classification' \
     --model 'damo/nlp_structbert_backbone_base_std' \
-    --dataset_name 'clue' \
-    --subset_name 'tnews' \
+    --train_dataset_name 'clue' \
+    --train_subset_name 'tnews' \
     --first_sequence 'sentence' \
     --preprocessor.label label \
     --model.num_labels 15 \
     --labels '0,1,2,3,4,5,6,7,8,9,10,11,12,13,14' \
     --preprocessor 'sen-cls-tokenizer' \
+    --use_model_config True \
+    --max_epochs 1 \
     --train.dataloader.workers_per_gpu 0 \
     --evaluation.dataloader.workers_per_gpu 0 \
     --train.optimizer.lr 1e-5 \
+    --eval_metrics 'seq-cls-metric' \
diff --git a/examples/pytorch/text_generation/finetune_text_generation.py b/examples/pytorch/text_generation/finetune_text_generation.py
index 7a140a0c..a89970e8 100644
--- a/examples/pytorch/text_generation/finetune_text_generation.py
+++ b/examples/pytorch/text_generation/finetune_text_generation.py
@@ -1,12 +1,11 @@
 from dataclasses import dataclass, field
 
+from modelscope import EpochBasedTrainer, MsDataset, TrainingArgs
 from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import EpochBasedTrainer, build_trainer
-from modelscope.trainers.training_args import TrainingArgs
+from modelscope.trainers import build_trainer
 
 
-@dataclass
+@dataclass(init=False)
 class TextGenerationArguments(TrainingArgs):
 
     trainer: str = field(
@@ -67,30 +66,35 @@ class TextGenerationArguments(TrainingArgs):
             'help': 'Whether to use MegatronHook',
         })
 
-    def __call__(self, config):
-        config = super().__call__(config)
-        if config.train.lr_scheduler.type == 'noam':
-            config.train.lr_scheduler = {
-                'type': 'LambdaLR',
-                'lr_lambda': noam_lambda,
-                'options': {
-                    'by_epoch': False
-                }
-            }
-        if self.use_megatron:
-            config.train.hooks.append({'type': 'MegatronHook'})
-        return config
-
 
 def noam_lambda(current_step: int):
     current_step += 1
     return min(current_step**(-0.5), current_step * 100**(-1.5))
 
 
-args = TextGenerationArguments.from_cli(task='text-generation')
-print(args)
+config, args = TextGenerationArguments().parse_cli().to_config()
+print(config, args)
 
-dataset = MsDataset.load(args.dataset_name)
+
+def cfg_modify_fn(cfg):
+    if args.use_model_config:
+        cfg.merge_from_dict(config)
+    else:
+        cfg = config
+    if cfg.train.lr_scheduler.type == 'noam':
+        cfg.train.lr_scheduler = {
+            'type': 'LambdaLR',
+            'lr_lambda': noam_lambda,
+            'options': {
+                'by_epoch': False
+            }
+        }
+    if args.use_megatron:
+        cfg.train.hooks.append({'type': 'MegatronHook'})
+    return cfg
+
+
+dataset = MsDataset.load(args.train_dataset_name)
 train_dataset = dataset['train']
 eval_dataset = dataset['validation' if 'validation' in dataset else 'test']
 
@@ -100,7 +104,7 @@ kwargs = dict(
     eval_dataset=eval_dataset,
     seed=args.seed,
     work_dir=args.work_dir,
-    cfg_modify_fn=args)
+    cfg_modify_fn=cfg_modify_fn)
 
 trainer: EpochBasedTrainer = build_trainer(
     name=args.trainer, default_args=kwargs)
diff --git a/examples/pytorch/text_generation/run_train_gpt3.sh b/examples/pytorch/text_generation/run_train_gpt3.sh
index a20a5bb2..fd37b42c 100644
--- a/examples/pytorch/text_generation/run_train_gpt3.sh
+++ b/examples/pytorch/text_generation/run_train_gpt3.sh
@@ -8,7 +8,7 @@ PYTHONPATH=. torchrun --nproc_per_node $WORLD_SIZE examples/pytorch/text_generat
     --trainer 'nlp-gpt3-trainer' \
     --work_dir './tmp' \
     --model 'damo/nlp_gpt3_text-generation_1.3B' \
-    --dataset_name 'chinese-poetry-collection' \
+    --train_dataset_name 'chinese-poetry-collection' \
     --preprocessor 'text-gen-jieba-tokenizer' \
     --src_txt 'text1' \
     --tgt_txt 'text2' \
@@ -20,4 +20,5 @@ PYTHONPATH=. torchrun --nproc_per_node $WORLD_SIZE examples/pytorch/text_generat
     --world_size $WORLD_SIZE \
     --tensor_model_parallel_size $TENSOR_MODEL_PARALLEL_SIZE \
     --use_megatron true \
-    # --dataset_name 'DuReader_robust-QG' \ # input&output
+    --use_model_config true \
+    # --train_dataset_name 'DuReader_robust-QG' \ # input&output
diff --git a/examples/pytorch/text_generation/run_train_mt5.sh b/examples/pytorch/text_generation/run_train_mt5.sh
new file mode 100644
index 00000000..6d032d6e
--- /dev/null
+++ b/examples/pytorch/text_generation/run_train_mt5.sh
@@ -0,0 +1,13 @@
+PYTHONPATH=. torchrun examples/pytorch/text_generation/finetune_text_generation.py \
+    --trainer 'text-generation-trainer' \
+    --work_dir './tmp' \
+    --task 'text2text-generation' \
+    --model 'damo/nlp_mt5_zero-shot-augment_chinese-base' \
+    --train_dataset_name 'DuReader_robust-QG' \
+    --src_txt 'text1' \
+    --tgt_txt 'text2' \
+    --max_epochs 1 \
+    --use_model_config True \
+    --per_device_train_batch_size 8 \
+    --lr 1e-3 \
+    --lr_scheduler 'noam' \
diff --git a/examples/pytorch/text_generation/run_train_palm.sh b/examples/pytorch/text_generation/run_train_palm.sh
index ff88ce7d..68b9e89d 100644
--- a/examples/pytorch/text_generation/run_train_palm.sh
+++ b/examples/pytorch/text_generation/run_train_palm.sh
@@ -2,10 +2,11 @@ PYTHONPATH=. torchrun examples/pytorch/text_generation/finetune_text_generation.
     --trainer 'text-generation-trainer' \
     --work_dir './tmp' \
     --model 'damo/nlp_palm2.0_pretrained_chinese-base' \
-    --dataset_name 'DuReader_robust-QG' \
+    --train_dataset_name 'DuReader_robust-QG' \
     --src_txt 'text1' \
     --tgt_txt 'text2' \
-    --max_epochs 15 \
+    --max_epochs 1 \
+    --use_model_config True \
     --per_device_train_batch_size 8 \
     --lr 1e-3 \
     --lr_scheduler 'noam' \
diff --git a/examples/pytorch/token_classification/finetune_token_classification.py b/examples/pytorch/token_classification/finetune_token_classification.py
index cf51ed22..3f9de791 100644
--- a/examples/pytorch/token_classification/finetune_token_classification.py
+++ b/examples/pytorch/token_classification/finetune_token_classification.py
@@ -1,20 +1,22 @@
 from dataclasses import dataclass, field
 
-from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import build_trainer
-from modelscope.trainers.training_args import (TrainingArgs, get_flatten_value,
-                                               set_flatten_value)
+from modelscope import (EpochBasedTrainer, MsDataset, TrainingArgs,
+                        build_dataset_from_file)
 
 
-@dataclass
+@dataclass(init=False)
 class TokenClassificationArguments(TrainingArgs):
-
     trainer: str = field(
-        default=Trainers.default, metadata={
+        default=None, metadata={
             'help': 'The trainer used',
         })
 
+    work_dir: str = field(
+        default='./tmp',
+        metadata={
+            'help': 'The working path for saving checkpoint',
+        })
+
     preprocessor: str = field(
         default=None,
         metadata={
@@ -29,60 +31,99 @@ class TokenClassificationArguments(TrainingArgs):
             'cfg_node': 'preprocessor.padding'
         })
 
-    train_dataset_params: str = field(
+    mode: str = field(
+        default='inference',
+        metadata={
+            'help': 'The preprocessor padding',
+            'cfg_node': 'preprocessor.mode'
+        })
+
+    first_sequence: str = field(
         default=None,
         metadata={
-            'cfg_node': 'dataset.train',
-            'cfg_getter': get_flatten_value,
-            'cfg_setter': set_flatten_value,
+            'cfg_node': 'preprocessor.first_sequence',
             'help': 'The parameters for train dataset',
         })
 
-    def __call__(self, config):
-        config = super().__call__(config)
-        if config.safe_get('dataset.train.label') == 'ner_tags':
-            ner_tags_labels = train_dataset['ner_tags'] + eval_dataset[
-                'ner_tags']
-            label_enumerate_values = self._get_label_list(ner_tags_labels)
-            config.merge_from_dict(
-                {'dataset.train.labels': label_enumerate_values})
-        if config.train.lr_scheduler.type == 'LinearLR':
-            config.train.lr_scheduler['total_iters'] = \
-                int(len(train_dataset) / self.per_device_train_batch_size) * self.max_epochs
-        return config
+    label: str = field(
+        default=None,
+        metadata={
+            'cfg_node': 'preprocessor.label',
+            'help': 'The parameters for train dataset',
+        })
 
-    # TODO: Future performance optimization in MsDataset
-    @staticmethod
-    def _get_label_list(labels):
-        unique_labels = set()
-        for label in labels:
-            unique_labels = unique_labels | set(label)
-        label_list = list(unique_labels)
-        label_list.sort()
-        return label_list
+    sequence_length: int = field(
+        default=128,
+        metadata={
+            'cfg_node': 'preprocessor.sequence_length',
+            'help': 'The parameters for train dataset',
+        })
 
 
-args = TokenClassificationArguments.from_cli(task='token-classification')
+training_args = TokenClassificationArguments().parse_cli()
+config, args = training_args.to_config()
 print(args)
 
-# load dataset
-train_dataset = MsDataset.load(
-    args.dataset_name,
-    subset_name=args.subset_name,
-    split='train',
-    namespace='damo')['train']
-eval_dataset = MsDataset.load(
-    args.dataset_name,
-    subset_name=args.subset_name,
-    split='validation',
-    namespace='damo')['validation']
+
+def get_label_list(labels):
+    unique_labels = set()
+    for label in labels:
+        unique_labels = unique_labels | set(label)
+    label_list = list(unique_labels)
+    label_list.sort()
+    return label_list
+
+
+def cfg_modify_fn(cfg):
+    if args.use_model_config:
+        cfg.merge_from_dict(config)
+    else:
+        cfg = config
+    labels = train_dataset[training_args.label] + validation_dataset[
+        training_args.label]
+    label_enumerate_values = get_label_list(labels)
+    cfg.merge_from_dict({
+        'preprocessor.label2id':
+        {label: id
+         for id, label in enumerate(label_enumerate_values)}
+    })
+    cfg.merge_from_dict({'model.num_labels': len(label_enumerate_values)})
+    cfg.merge_from_dict({'preprocessor.use_fast': True})
+    cfg.merge_from_dict({
+        'evaluation.metrics': {
+            'type': 'token-cls-metric',
+            'label2id':
+            {label: id
+             for id, label in enumerate(label_enumerate_values)}
+        }
+    })
+    if cfg.train.lr_scheduler.type == 'LinearLR':
+        cfg.train.lr_scheduler['total_iters'] = \
+            int(len(train_dataset) / cfg.train.dataloader.batch_size_per_gpu) * cfg.train.max_epochs
+    return cfg
+
+
+if args.dataset_json_file is None:
+    train_dataset = MsDataset.load(
+        args.train_dataset_name,
+        subset_name=args.train_subset_name,
+        split='train',
+        namespace=args.train_dataset_namespace)['train']
+    validation_dataset = MsDataset.load(
+        args.train_dataset_name,
+        subset_name=args.train_subset_name,
+        split='validation',
+        namespace=args.train_dataset_namespace)['validation']
+else:
+    train_dataset, validation_dataset = build_dataset_from_file(
+        args.dataset_json_file)
 
 kwargs = dict(
     model=args.model,
     train_dataset=train_dataset,
-    eval_dataset=eval_dataset,
+    eval_dataset=validation_dataset,
     work_dir=args.work_dir,
-    cfg_modify_fn=args)
+    cfg_modify_fn=cfg_modify_fn)
 
-trainer = build_trainer(name=args.trainer, default_args=kwargs)
+trainer = EpochBasedTrainer(**kwargs)
 trainer.train()
diff --git a/examples/pytorch/token_classification/run_train_mgeo.sh b/examples/pytorch/token_classification/run_train_mgeo.sh
index f80af84f..1e384ec5 100644
--- a/examples/pytorch/token_classification/run_train_mgeo.sh
+++ b/examples/pytorch/token_classification/run_train_mgeo.sh
@@ -1,15 +1,22 @@
-PYTHONPATH=. torchrun examples/pytorch/token_classification/finetune_token_classification.py \
+PYTHONPATH=. python examples/pytorch/token_classification/finetune_token_classification.py \
+    --task 'token-classification' \
     --trainer 'nlp-base-trainer' \
     --work_dir './tmp' \
     --model 'damo/mgeo_backbone_chinese_base' \
-    --dataset_name 'GeoGLUE' \
-    --subset_name 'GeoETA' \
-    --train_dataset_params 'first_sequence=tokens,label=ner_tags,sequence_length=128' \
+    --train_dataset_name 'GeoGLUE' \
+    --train_subset_name 'GeoETA' \
+    --train_dataset_namespace 'damo' \
+    --first_sequence 'tokens' \
+    --eval_strategy by_step \
+    --eval_interval 10 \
+    --label 'ner_tags' \
+    --sequence_length 128 \
     --preprocessor 'token-cls-tokenizer' \
     --preprocessor_padding 'max_length' \
     --max_epochs 1 \
+    --mode 'inference' \
+    --use_model_config True \
     --per_device_train_batch_size 32 \
+    --train_data_worker 0 \
+    --eval_data_worker 0 \
     --lr 3e-5 \
-    --save_ckpt_strategy 'by_epoch' \
-    --logging_interval 100 \
-    --eval_strategy 'by_epoch' \
diff --git a/examples/pytorch/token_classification/run_train_structbert.sh b/examples/pytorch/token_classification/run_train_structbert.sh
index 28967f60..a44c4519 100644
--- a/examples/pytorch/token_classification/run_train_structbert.sh
+++ b/examples/pytorch/token_classification/run_train_structbert.sh
@@ -1,16 +1,22 @@
-PYTHONPATH=. torchrun examples/pytorch/token_classification/finetune_token_classification.py \
+PYTHONPATH=. python examples/pytorch/token_classification/finetune_token_classification.py \
+    --task 'token-classification' \
     --trainer 'nlp-base-trainer' \
     --work_dir './tmp' \
     --model 'damo/nlp_structbert_backbone_base_std' \
-    --dataset_name 'GeoGLUE' \
-    --subset_name 'GeoETA' \
-    --train_dataset_params 'first_sequence=tokens,label=ner_tags,sequence_length=128' \
+    --train_dataset_name 'GeoGLUE' \
+    --train_subset_name 'GeoETA' \
+    --train_dataset_namespace 'damo' \
+    --first_sequence 'tokens' \
+    --eval_strategy by_step \
+    --eval_interval 20 \
+    --label 'ner_tags' \
+    --sequence_length 128 \
     --preprocessor 'token-cls-tokenizer' \
     --preprocessor_padding 'max_length' \
     --max_epochs 2 \
+    --mode 'inference' \
+    --use_model_config True \
     --per_device_train_batch_size 32 \
+    --train_data_worker 0 \
+    --eval_data_worker 0 \
     --lr 3e-5 \
-    --save_ckpt_strategy 'by_epoch' \
-    --logging_interval 1 \
-    --eval_strategy 'by_step' \
-    --eval_interval 20 \
diff --git a/examples/pytorch/transformers/configuration.json b/examples/pytorch/transformers/configuration.json
deleted file mode 100644
index df6a73c8..00000000
--- a/examples/pytorch/transformers/configuration.json
+++ /dev/null
@@ -1 +0,0 @@
-{"framework":"pytorch","train":{"work_dir":"/tmp","max_epochs":10,"dataloader":{"batch_size_per_gpu":16,"workers_per_gpu":0},"optimizer":{"type":"SGD","lr":0.001},"lr_scheduler":{"type":"StepLR","step_size":2},"hooks":[{"type":"CheckpointHook","interval":1}]},"evaluation":{"dataloader":{"batch_size_per_gpu":16,"workers_per_gpu":0,"shuffle":false}}}
diff --git a/examples/pytorch/transformers/finetune_transformers_model.py b/examples/pytorch/transformers/finetune_transformers_model.py
index bbfb807a..5110f751 100644
--- a/examples/pytorch/transformers/finetune_transformers_model.py
+++ b/examples/pytorch/transformers/finetune_transformers_model.py
@@ -5,11 +5,11 @@ from datasets import load_dataset
 from transformers import (BertForSequenceClassification, BertTokenizerFast,
                           default_data_collator)
 
+from modelscope import TrainingArgs
 from modelscope.trainers import EpochBasedTrainer, build_trainer
-from modelscope.trainers.default_config import DEFAULT_CONFIG, TrainingArgs
 
 
-@dataclass
+@dataclass(init=False)
 class TransformersArguments(TrainingArgs):
 
     num_labels: int = field(
@@ -17,13 +17,27 @@ class TransformersArguments(TrainingArgs):
             'help': 'The number of labels',
         })
 
+    sentence: str = field(
+        default=None, metadata={
+            'help': 'The sentence key',
+        })
 
-args = TransformersArguments.from_cli(
-    task='text-classification', eval_metrics='seq-cls-metric')
+    label: str = field(
+        default=None, metadata={
+            'help': 'The label key',
+        })
 
-print(args)
 
-dataset = load_dataset(args.dataset_name, args.subset_name)
+training_args = TransformersArguments(
+    task='text-classification', eval_metrics='seq-cls-metric').parse_cli()
+config, args = training_args.to_config()
+
+print(config, args)
+
+train_dataset = load_dataset(
+    args.train_dataset_name, args.train_subset_name, split=args.train_split)
+val_dataset = load_dataset(
+    args.val_dataset_name, args.val_subset_name, split=args.val_split)
 
 model = BertForSequenceClassification.from_pretrained(
     args.model, num_labels=args.num_labels)
@@ -31,26 +45,30 @@ tokenizer = BertTokenizerFast.from_pretrained(args.model)
 
 
 def tokenize_sentence(row):
-    return tokenizer(row['sentence'], padding='max_length', max_length=128)
+    return tokenizer(
+        row[training_args.sentence], padding='max_length', max_length=128)
 
 
 # Extra columns, Rename columns
-dataset = dataset.map(tokenize_sentence).remove_columns(['sentence',
-                                                         'idx']).rename_column(
-                                                             'label', 'labels')
+train_dataset = train_dataset.map(tokenize_sentence)
+val_dataset = val_dataset.map(tokenize_sentence)
+if training_args.label != 'labels':
+    train_dataset = train_dataset.rename_columns(
+        {training_args.label: 'labels'})
+    val_dataset = val_dataset.rename_columns({training_args.label: 'labels'})
 
 cfg_file = os.path.join(args.work_dir or './', 'configuration.json')
-DEFAULT_CONFIG.dump(cfg_file)
+config.dump(cfg_file)
 
 kwargs = dict(
     model=model,
     cfg_file=cfg_file,
     # data_collator
     data_collator=default_data_collator,
-    train_dataset=dataset['train'],
-    eval_dataset=dataset['validation'],
-    seed=args.seed,
-    cfg_modify_fn=args)
+    train_dataset=train_dataset,
+    eval_dataset=val_dataset,
+    remove_unused_data=True,
+    seed=args.seed)
 
 os.environ['LOCAL_RANK'] = str(args.local_rank)
 trainer: EpochBasedTrainer = build_trainer(name='trainer', default_args=kwargs)
diff --git a/examples/pytorch/transformers/run_train.sh b/examples/pytorch/transformers/run_train.sh
index c76c4636..94e5ef75 100644
--- a/examples/pytorch/transformers/run_train.sh
+++ b/examples/pytorch/transformers/run_train.sh
@@ -1,5 +1,14 @@
 PYTHONPATH=. python examples/pytorch/transformers/finetune_transformers_model.py \
     --model bert-base-uncased \
     --num_labels 15 \
-    --dataset_name clue \
-    --subset_name tnews
+    --train_dataset_name clue \
+    --train_subset_name tnews \
+    --train_split train \
+    --val_dataset_name clue \
+    --val_subset_name tnews \
+    --train_split train \
+    --val_split validation \
+    --sentence sentence \
+    --label label \
+    --eval_strategy by_step \
+    --eval_interval 100
diff --git a/modelscope/__init__.py b/modelscope/__init__.py
index 81fdf505..f7553958 100644
--- a/modelscope/__init__.py
+++ b/modelscope/__init__.py
@@ -1,4 +1,79 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-from .version import __release_datetime__, __version__
+from typing import TYPE_CHECKING
 
-__all__ = ['__version__', '__release_datetime__']
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+    from .version import __release_datetime__, __version__
+    from .trainers import EpochBasedTrainer, TrainingArgs, build_dataset_from_file
+    from .trainers import Hook, Priority
+    from .exporters import Exporter
+    from .exporters import TfModelExporter
+    from .exporters import TorchModelExporter
+    from .hub.api import HubApi
+    from .hub.snapshot_download import snapshot_download
+    from .hub.push_to_hub import push_to_hub, push_to_hub_async
+    from .hub.check_model import check_model_is_id, check_local_model_is_latest
+    from .metrics import AudioNoiseMetric, Metric, task_default_metrics, ImageColorEnhanceMetric, ImageDenoiseMetric, \
+        ImageInstanceSegmentationCOCOMetric, ImagePortraitEnhancementMetric, SequenceClassificationMetric, \
+        TextGenerationMetric, TokenClassificationMetric, VideoSummarizationMetric, MovieSceneSegmentationMetric, \
+        AccuracyMetric, BleuMetric, ImageInpaintingMetric, ReferringVideoObjectSegmentationMetric, \
+        VideoFrameInterpolationMetric, VideoStabilizationMetric, VideoSuperResolutionMetric, PplMetric, \
+        ImageQualityAssessmentDegradationMetric, ImageQualityAssessmentMosMetric, TextRankingMetric, \
+        LossMetric, ImageColorizationMetric, OCRRecognitionMetric
+    from .models import Model, TorchModel
+    from .preprocessors import Preprocessor
+    from .pipelines import Pipeline, pipeline
+    from .utils.hub import read_config, create_model_if_not_exist
+    from .utils.logger import get_logger
+    from .msdatasets import MsDataset
+
+else:
+    _import_structure = {
+        'version': ['__release_datetime__', '__version__'],
+        'trainers': [
+            'EpochBasedTrainer', 'TrainingArgs', 'Hook', 'Priority',
+            'build_dataset_from_file'
+        ],
+        'exporters': [
+            'Exporter',
+            'TfModelExporter',
+            'TorchModelExporter',
+        ],
+        'hub.api': ['HubApi'],
+        'hub.snapshot_download': ['snapshot_download'],
+        'hub.push_to_hub': ['push_to_hub', 'push_to_hub_async'],
+        'hub.check_model':
+        ['check_model_is_id', 'check_local_model_is_latest'],
+        'metrics': [
+            'AudioNoiseMetric', 'Metric', 'task_default_metrics',
+            'ImageColorEnhanceMetric', 'ImageDenoiseMetric',
+            'ImageInstanceSegmentationCOCOMetric',
+            'ImagePortraitEnhancementMetric', 'SequenceClassificationMetric',
+            'TextGenerationMetric', 'TokenClassificationMetric',
+            'VideoSummarizationMetric', 'MovieSceneSegmentationMetric',
+            'AccuracyMetric', 'BleuMetric', 'ImageInpaintingMetric',
+            'ReferringVideoObjectSegmentationMetric',
+            'VideoFrameInterpolationMetric', 'VideoStabilizationMetric',
+            'VideoSuperResolutionMetric', 'PplMetric',
+            'ImageQualityAssessmentDegradationMetric',
+            'ImageQualityAssessmentMosMetric', 'TextRankingMetric',
+            'LossMetric', 'ImageColorizationMetric', 'OCRRecognitionMetric'
+        ],
+        'models': ['Model', 'TorchModel'],
+        'preprocessors': ['Preprocessor'],
+        'pipelines': ['Pipeline', 'pipeline'],
+        'utils.hub': ['read_config', 'create_model_if_not_exist'],
+        'utils.logger': ['get_logger'],
+        'msdatasets': ['MsDataset']
+    }
+
+    import sys
+
+    sys.modules[__name__] = LazyImportModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/modelscope/cli/template/template.tpl b/modelscope/cli/template/template.tpl
index 0c09a925..78fe339c 100644
--- a/modelscope/cli/template/template.tpl
+++ b/modelscope/cli/template/template.tpl
@@ -122,10 +122,11 @@ class ${pipeline_name}(Pipeline):
 # Tips: usr_config_path is the temporary save configuration location， after upload modelscope hub, it is the model_id
 usr_config_path = '${configuration_path}'
 config = Config({
-    'framework': 'pytorch',
-    'task': '${task_name}',
-    'model': {'type': 'my-custom-model'},
-    "pipeline": {"type": "my-custom-pipeline"}
+    "framework": 'pytorch',
+    "task": '${task_name}',
+    "model": {'type': 'my-custom-model'},
+    "pipeline": {"type": "my-custom-pipeline"},
+    "allow_remote": True
 })
 config.dump('${configuration_path}' + 'configuration.json')
 
diff --git a/modelscope/models/cv/human_wholebody_keypoint/__init__.py b/modelscope/exporters/audio/__init__.py
similarity index 75%
rename from modelscope/models/cv/human_wholebody_keypoint/__init__.py
rename to modelscope/exporters/audio/__init__.py
index 30e23457..883151cd 100644
--- a/modelscope/models/cv/human_wholebody_keypoint/__init__.py
+++ b/modelscope/exporters/audio/__init__.py
@@ -1,14 +1,14 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
+
 from typing import TYPE_CHECKING
 
 from modelscope.utils.import_utils import LazyImportModule
 
 if TYPE_CHECKING:
-    from .human_wholebody_keypoint import HumanWholeBodyKeypoint
-
+    from .ans_dfsmn_exporter import ANSDFSMNExporter
 else:
     _import_structure = {
-        'human_wholebody_keypoint': ['HumanWholeBodyKeypoint']
+        'ans_dfsmn_exporter': ['ANSDFSMNExporter'],
     }
 
     import sys
diff --git a/modelscope/exporters/audio/ans_dfsmn_exporter.py b/modelscope/exporters/audio/ans_dfsmn_exporter.py
new file mode 100644
index 00000000..976f983f
--- /dev/null
+++ b/modelscope/exporters/audio/ans_dfsmn_exporter.py
@@ -0,0 +1,62 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+
+import torch
+
+from modelscope.exporters.builder import EXPORTERS
+from modelscope.exporters.torch_model_exporter import TorchModelExporter
+from modelscope.metainfo import Models
+from modelscope.utils.constant import ModelFile, Tasks
+
+INPUT_NAME = 'input'
+OUTPUT_NAME = 'output'
+
+
+@EXPORTERS.register_module(
+    Tasks.acoustic_noise_suppression, module_name=Models.speech_dfsmn_ans)
+class ANSDFSMNExporter(TorchModelExporter):
+
+    def export_onnx(self, output_dir: str, opset=9, **kwargs):
+        """Export the model as onnx format files.
+
+        Args:
+            output_dir: The output dir.
+            opset: The version of the ONNX operator set to use.
+            kwargs:
+                device: The device used to forward.
+        Returns:
+            A dict containing the model key - model file path pairs.
+        """
+        model = self.model if 'model' not in kwargs else kwargs.pop('model')
+        device_name = 'cpu' if 'device' not in kwargs else kwargs.pop('device')
+        model_bin_file = os.path.join(model.model_dir,
+                                      ModelFile.TORCH_MODEL_BIN_FILE)
+        if os.path.exists(model_bin_file):
+            checkpoint = torch.load(model_bin_file, map_location='cpu')
+            model.load_state_dict(checkpoint)
+        onnx_file = os.path.join(output_dir, ModelFile.ONNX_MODEL_FILE)
+
+        with torch.no_grad():
+            model.eval()
+            device = torch.device(device_name)
+            model.to(device)
+            model_script = torch.jit.script(model)
+            fbank_input = torch.zeros((1, 3, 120), dtype=torch.float32)
+            torch.onnx.export(
+                model_script,
+                fbank_input,
+                onnx_file,
+                opset_version=opset,
+                input_names=[INPUT_NAME],
+                output_names=[OUTPUT_NAME],
+                dynamic_axes={
+                    INPUT_NAME: {
+                        0: 'batch_size',
+                        1: 'number_of_frame'
+                    },
+                    OUTPUT_NAME: {
+                        0: 'batch_size',
+                        1: 'number_of_frame'
+                    }
+                })
+        return {'model': onnx_file}
diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py
index ad8d0c5d..e3436aea 100644
--- a/modelscope/hub/api.py
+++ b/modelscope/hub/api.py
@@ -6,6 +6,7 @@ import functools
 import os
 import pickle
 import platform
+import re
 import shutil
 import tempfile
 import uuid
@@ -15,10 +16,10 @@ from http.cookiejar import CookieJar
 from os.path import expanduser
 from typing import Dict, List, Optional, Tuple, Union
 
+import requests
 from requests import Session
 from requests.adapters import HTTPAdapter, Retry
 
-from modelscope import __version__
 from modelscope.hub.constants import (API_HTTP_CLIENT_TIMEOUT,
                                       API_RESPONSE_FIELD_DATA,
                                       API_RESPONSE_FIELD_EMAIL,
@@ -45,7 +46,7 @@ from modelscope.utils.constant import (DEFAULT_DATASET_REVISION,
                                        MASTER_MODEL_BRANCH, DatasetFormations,
                                        DatasetMetaFormats,
                                        DatasetVisibilityMap, DownloadChannel,
-                                       ModelFile)
+                                       ModelFile, VirgoDatasetConfig)
 from modelscope.utils.logger import get_logger
 from .utils.utils import (get_endpoint, get_release_datetime,
                           model_id_to_group_owner_name)
@@ -160,6 +161,7 @@ class HubApi:
             'Visibility': visibility,  # server check
             'License': license,
             'OriginalModelId': original_model_id,
+            'TrainId': os.environ.get('MODELSCOPE_TRAIN_ID', ''),
         }
         r = self.session.post(
             path, json=body, cookies=cookies, headers=self.headers)
@@ -236,8 +238,10 @@ class HubApi:
                    license: Optional[str] = Licenses.APACHE_V2,
                    chinese_name: Optional[str] = None,
                    commit_message: Optional[str] = 'upload model',
+                   tag: Optional[str] = None,
                    revision: Optional[str] = DEFAULT_REPOSITORY_REVISION,
-                   original_model_id: Optional[str] = None):
+                   original_model_id: Optional[str] = None,
+                   ignore_file_pattern: Optional[Union[List[str], str]] = None):
         """Upload model from a given directory to given repository. A valid model directory
         must contain a configuration.json file.
 
@@ -268,10 +272,13 @@ class HubApi:
                 chinese name of the new created model.
             commit_message(`str`, *optional*, defaults to `None`):
                 commit message of the push request.
+            tag(`str`, *optional*, defaults to `None`):
+                The tag on this commit
             revision (`str`, *optional*, default to DEFAULT_MODEL_REVISION):
                 which branch to push. If the branch is not exists, It will create a new
                 branch and push to it.
             original_model_id (str, optional): The base model id which this model is trained from
+            ignore_file_pattern (`Union[List[str], str]`, optional): The file pattern to ignore uploading
 
         Raises:
             InvalidParameter: Parameter invalid.
@@ -292,6 +299,10 @@ class HubApi:
         if cookies is None:
             raise NotLoginException('Must login before upload!')
         files_to_save = os.listdir(model_dir)
+        if ignore_file_pattern is None:
+            ignore_file_pattern = []
+        if isinstance(ignore_file_pattern, str):
+            ignore_file_pattern = [ignore_file_pattern]
         try:
             self.get_model(model_id=model_id)
         except Exception:
@@ -325,6 +336,8 @@ class HubApi:
                         shutil.rmtree(src, ignore_errors=True)
             for f in files_to_save:
                 if f[0] != '.':
+                    if any([re.search(pattern, f) is not None for pattern in ignore_file_pattern]):
+                        continue
                     src = os.path.join(model_dir, f)
                     if os.path.isdir(src):
                         shutil.copytree(src, os.path.join(tmp_dir, f))
@@ -338,6 +351,8 @@ class HubApi:
                 commit_message=commit_message,
                 local_branch=revision,
                 remote_branch=revision)
+            if tag is not None:
+                repo.tag_and_push(tag, tag)
         except Exception:
             raise
         finally:
@@ -581,6 +596,17 @@ class HubApi:
         file_list = file_list['Files']
         return file_list
 
+    @staticmethod
+    def dump_datatype_file(dataset_type: int, meta_cache_dir: str):
+        """
+        Dump the data_type as a local file, in order to get the dataset formation without calling the datahub.
+        More details, please refer to the class `modelscope.utils.constant.DatasetFormations`.
+        """
+        dataset_type_file_path = os.path.join(meta_cache_dir,
+                                              f'{str(dataset_type)}{DatasetFormations.formation_mark_ext.value}')
+        with open(dataset_type_file_path, 'w') as fp:
+            fp.write('*** Automatically-generated file, do not modify ***')
+
     def get_dataset_meta_files_local_paths(self, dataset_name: str,
                                            namespace: str,
                                            revision: str,
@@ -591,10 +617,7 @@ class HubApi:
         cookies = ModelScopeConfig.get_cookies()
 
         # Dump the data_type as a local file
-        dataset_type_file_path = os.path.join(meta_cache_dir,
-                                              f'{str(dataset_type)}{DatasetFormations.formation_mark_ext.value}')
-        with open(dataset_type_file_path, 'w') as fp:
-            fp.write('*** Automatically-generated file, do not modify ***')
+        HubApi.dump_datatype_file(dataset_type=dataset_type, meta_cache_dir=meta_cache_dir)
 
         for file_info in file_list:
             file_path = file_info['Path']
@@ -661,7 +684,6 @@ class HubApi:
             cookies = self._check_cookie(use_cookies=True)
         else:
             cookies = ModelScopeConfig.get_cookies()
-        r = self.session.get(url=datahub_url, cookies=cookies, headers=self.headers)
 
         r = self.session.get(
             url=datahub_url, cookies=cookies, headers=self.headers)
@@ -669,6 +691,31 @@ class HubApi:
         raise_on_error(resp)
         return resp['Data']
 
+    def get_virgo_meta(self, dataset_id: str, version: int = 1) -> dict:
+        """
+        Get virgo dataset meta info.
+        """
+        virgo_endpoint = os.environ.get(VirgoDatasetConfig.env_virgo_endpoint, '')
+        if not virgo_endpoint:
+            raise RuntimeError(f'Virgo endpoint is not set in env: {VirgoDatasetConfig.env_virgo_endpoint}')
+
+        virgo_dataset_url = f'{virgo_endpoint}/data/set/download'
+        cookies = requests.utils.dict_from_cookiejar(ModelScopeConfig.get_cookies())
+
+        dataset_info = dict(
+            dataSetId=dataset_id,
+            dataSetVersion=version
+        )
+        data = dict(
+            data=dataset_info,
+        )
+        r = self.session.post(url=virgo_dataset_url, json=data, cookies=cookies, headers=self.headers, timeout=900)
+        resp = r.json()
+        if resp['code'] != 0:
+            raise RuntimeError(f'Failed to get virgo dataset: {resp}')
+
+        return resp['data']
+
     def get_dataset_access_config_for_unzipped(self,
                                                dataset_name: str,
                                                namespace: str,
@@ -895,6 +942,7 @@ class ModelScopeConfig:
         if MODELSCOPE_CLOUD_USERNAME in os.environ:
             user_name = os.environ[MODELSCOPE_CLOUD_USERNAME]
 
+        from modelscope import __version__
         ua = 'modelscope/%s; python/%s; session_id/%s; platform/%s; processor/%s; env/%s; user/%s' % (
             __version__,
             platform.python_version(),
diff --git a/modelscope/hub/errors.py b/modelscope/hub/errors.py
index be94d7fd..4bf2f935 100644
--- a/modelscope/hub/errors.py
+++ b/modelscope/hub/errors.py
@@ -2,6 +2,7 @@
 
 from http import HTTPStatus
 
+import requests
 from requests.exceptions import HTTPError
 
 from modelscope.utils.logger import get_logger
@@ -57,13 +58,22 @@ def is_ok(rsp):
     return rsp['Code'] == HTTPStatus.OK and rsp['Success']
 
 
+def _decode_response_error(response: requests.Response):
+    if 'application/json' in response.headers.get('content-type', ''):
+        message = response.json()
+    else:
+        message = response.content.decode('utf-8')
+    return message
+
+
 def handle_http_post_error(response, url, request_body):
     try:
         response.raise_for_status()
     except HTTPError as error:
         logger.error('Request %s with body: %s exception' %
                      (url, request_body))
-        logger.error('Response details: %s' % response.content)
+        message = _decode_response_error(response)
+        logger.error('Response details: %s' % message)
         raise error
 
 
@@ -75,7 +85,8 @@ def handle_http_response(response, logger, cookies, model_id):
             logger.error(
                 f'Authentication token does not exist, failed to access model {model_id} which may not exist or may be \
                 private. Please login first.')
-        logger.error('Response details: %s' % response.content)
+        message = _decode_response_error(response)
+        logger.error('Response details: %s' % message)
         raise error
 
 
diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py
index 380d2432..6d3ad63d 100644
--- a/modelscope/hub/file_download.py
+++ b/modelscope/hub/file_download.py
@@ -12,7 +12,6 @@ import requests
 from requests.adapters import Retry
 from tqdm import tqdm
 
-from modelscope import __version__
 from modelscope.hub.api import HubApi, ModelScopeConfig
 from modelscope.hub.constants import (API_FILE_DOWNLOAD_CHUNK_SIZE,
                                       API_FILE_DOWNLOAD_RETRY_TIMES,
diff --git a/modelscope/hub/git.py b/modelscope/hub/git.py
index 80887738..b0fae148 100644
--- a/modelscope/hub/git.py
+++ b/modelscope/hub/git.py
@@ -55,16 +55,10 @@ class GitCommandWrapper(metaclass=Singleton):
             response.check_returncode()
             return response
         except subprocess.CalledProcessError as error:
-            if response.returncode == 1:
-                logger.info('Nothing to commit.')
-                return response
-            else:
-                logger.error(
-                    'There are error run git command, you may need to login first.'
-                )
-                raise GitError('stdout: %s, stderr: %s' %
-                               (response.stdout.decode('utf8'),
-                                error.stderr.decode('utf8')))
+            logger.error('There are error run git command.')
+            raise GitError(
+                'stdout: %s, stderr: %s' %
+                (response.stdout.decode('utf8'), error.stderr.decode('utf8')))
 
     def config_auth_token(self, repo_dir, auth_token):
         url = self.get_repo_remote_url(repo_dir)
@@ -199,8 +193,11 @@ class GitCommandWrapper(metaclass=Singleton):
         else:
             return ['/'.join(line.split('/')[1:]) for line in info[1:]]
 
-    def pull(self, repo_dir: str):
-        cmds = ['-C', repo_dir, 'pull']
+    def pull(self,
+             repo_dir: str,
+             remote: str = 'origin',
+             branch: str = 'master'):
+        cmds = ['-C', repo_dir, 'pull', remote, branch]
         return self._run_git_command(*cmds)
 
     def push(self,
diff --git a/modelscope/hub/push_to_hub.py b/modelscope/hub/push_to_hub.py
index ee7b240e..d117cc7f 100644
--- a/modelscope/hub/push_to_hub.py
+++ b/modelscope/hub/push_to_hub.py
@@ -4,8 +4,8 @@ import concurrent.futures
 import os
 
 from modelscope.hub.api import HubApi
-from modelscope.hub.constants import Licenses, ModelVisibility
-from modelscope.hub.errors import NotExistError
+from modelscope.hub.constants import ModelVisibility
+from modelscope.utils.constant import DEFAULT_REPOSITORY_REVISION
 from modelscope.utils.logger import get_logger
 
 logger = get_logger()
@@ -18,7 +18,10 @@ def _api_push_to_hub(repo_name,
                      token,
                      private=True,
                      commit_message='',
-                     source_repo=''):
+                     tag=None,
+                     source_repo='',
+                     ignore_file_pattern=None,
+                     revision=DEFAULT_REPOSITORY_REVISION):
     try:
         api = HubApi()
         api.login(token)
@@ -29,7 +32,10 @@ def _api_push_to_hub(repo_name,
             if not private else ModelVisibility.PRIVATE,
             chinese_name=repo_name,
             commit_message=commit_message,
-            original_model_id=source_repo)
+            tag=tag,
+            original_model_id=source_repo,
+            ignore_file_pattern=ignore_file_pattern,
+            revision=revision)
         commit_message = commit_message or 'No commit message'
         logger.info(
             f'Successfully upload the model to {repo_name} with message: {commit_message}'
@@ -48,7 +54,10 @@ def push_to_hub(repo_name,
                 private=True,
                 retry=3,
                 commit_message='',
-                source_repo=''):
+                tag=None,
+                source_repo='',
+                ignore_file_pattern=None,
+                revision=DEFAULT_REPOSITORY_REVISION):
     """
     Args:
         repo_name: The repo name for the modelhub repo
@@ -57,13 +66,18 @@ def push_to_hub(repo_name,
         private: If is a private repo, default True
         retry: Retry times if something error in uploading, default 3
         commit_message: The commit message
+        tag: The tag of this commit
         source_repo: The source repo (model id) which this model comes from
-
+        ignore_file_pattern: The file pattern to be ignored in uploading.
+        revision: The branch to commit to
     Returns:
         The boolean value to represent whether the model is uploaded.
     """
     if token is None:
         token = os.environ.get('MODELSCOPE_API_TOKEN')
+    if ignore_file_pattern is None:
+        ignore_file_pattern = os.environ.get('UPLOAD_IGNORE_FILE_PATTERN')
+    assert repo_name is not None
     assert token is not None, 'Either pass in a token or to set `MODELSCOPE_API_TOKEN` in the environment variables.'
     assert os.path.isdir(output_dir)
     assert 'configuration.json' in os.listdir(output_dir) or 'configuration.yaml' in os.listdir(output_dir) \
@@ -73,7 +87,8 @@ def push_to_hub(repo_name,
         f'Uploading {output_dir} to {repo_name} with message {commit_message}')
     for i in range(retry):
         if _api_push_to_hub(repo_name, output_dir, token, private,
-                            commit_message, source_repo):
+                            commit_message, tag, source_repo,
+                            ignore_file_pattern, revision):
             return True
     return False
 
@@ -83,7 +98,10 @@ def push_to_hub_async(repo_name,
                       token=None,
                       private=True,
                       commit_message='',
-                      source_repo=''):
+                      tag=None,
+                      source_repo='',
+                      ignore_file_pattern=None,
+                      revision=DEFAULT_REPOSITORY_REVISION):
     """
     Args:
         repo_name: The repo name for the modelhub repo
@@ -91,13 +109,18 @@ def push_to_hub_async(repo_name,
         token: The user api token, function will check the `MODELSCOPE_API_TOKEN` variable if this argument is None
         private: If is a private repo, default True
         commit_message: The commit message
+        tag: The tag of this commit
         source_repo: The source repo (model id) which this model comes from
-
+        ignore_file_pattern: The file pattern to be ignored in uploading
+        revision: The branch to commit to
     Returns:
         A handler to check the result and the status
     """
     if token is None:
         token = os.environ.get('MODELSCOPE_API_TOKEN')
+    if ignore_file_pattern is None:
+        ignore_file_pattern = os.environ.get('UPLOAD_IGNORE_FILE_PATTERN')
+    assert repo_name is not None
     assert token is not None, 'Either pass in a token or to set `MODELSCOPE_API_TOKEN` in the environment variables.'
     assert os.path.isdir(output_dir)
     assert 'configuration.json' in os.listdir(output_dir) or 'configuration.yaml' in os.listdir(output_dir) \
@@ -106,4 +129,5 @@ def push_to_hub_async(repo_name,
     logger.info(
         f'Uploading {output_dir} to {repo_name} with message {commit_message}')
     return _executor.submit(_api_push_to_hub, repo_name, output_dir, token,
-                            private, commit_message, source_repo)
+                            private, commit_message, tag, source_repo,
+                            ignore_file_pattern, revision)
diff --git a/modelscope/hub/repository.py b/modelscope/hub/repository.py
index 1d107a3c..3fc6da2b 100644
--- a/modelscope/hub/repository.py
+++ b/modelscope/hub/repository.py
@@ -88,6 +88,26 @@ class Repository:
             remote = None
         return remote
 
+    def pull(self, remote: str = 'origin', branch: str = 'master'):
+        """Pull remote branch
+
+        Args:
+            remote (str, optional): The remote name. Defaults to 'origin'.
+            branch (str, optional): The remote branch. Defaults to 'master'.
+        """
+        self.git_wrapper.pull(self.model_dir, remote=remote, branch=branch)
+
+    def add_lfs_type(self, file_name_suffix: str):
+        """Add file suffix to lfs list.
+
+        Args:
+            file_name_suffix (str): The file name suffix.
+                examples '*.safetensors'
+        """
+        os.system(
+            "printf '%s filter=lfs diff=lfs merge=lfs -text\n'>>%s" %
+            (file_name_suffix, os.path.join(self.model_dir, '.gitattributes')))
+
     def push(self,
              commit_message: str,
              local_branch: Optional[str] = DEFAULT_REPOSITORY_REVISION,
@@ -120,7 +140,6 @@ class Repository:
                                        self.model_repo_name)
 
         url = self.git_wrapper.get_repo_remote_url(self.model_dir)
-        self.git_wrapper.pull(self.model_dir)
 
         self.git_wrapper.add(self.model_dir, all_files=True)
         self.git_wrapper.commit(self.model_dir, commit_message)
diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py
index 60ad6d85..c4057314 100644
--- a/modelscope/metainfo.py
+++ b/modelscope/metainfo.py
@@ -116,15 +116,9 @@ class Models(object):
     bad_image_detecting = 'bad-image-detecting'
     controllable_image_generation = 'controllable-image-generation'
     longshortnet = 'longshortnet'
+    fastinst = 'fastinst'
     pedestrian_attribute_recognition = 'pedestrian-attribute-recognition'
 
-    # EasyCV models
-    yolox = 'YOLOX'
-    segformer = 'Segformer'
-    hand_2d_keypoints = 'HRNet-Hand2D-Keypoints'
-    image_object_detection_auto = 'image-object-detection-auto'
-    dino = 'DINO'
-
     # nlp models
     bert = 'bert'
     palm = 'palm-v2'
@@ -177,6 +171,7 @@ class Models(object):
     speech_frcrn_ans_cirm_16k = 'speech_frcrn_ans_cirm_16k'
     speech_dfsmn_ans = 'speech_dfsmn_ans'
     speech_dfsmn_kws_char_farfield = 'speech_dfsmn_kws_char_farfield'
+    speech_dfsmn_kws_char_farfield_iot = 'speech_dfsmn_kws_char_farfield_iot'
     speech_kws_fsmn_char_ctc_nearfield = 'speech_kws_fsmn_char_ctc_nearfield'
     speech_mossformer_separation_temporal_8k = 'speech_mossformer_separation_temporal_8k'
     kws_kwsbp = 'kws-kwsbp'
@@ -187,6 +182,9 @@ class Models(object):
     generic_sv = 'generic-sv'
     ecapa_tdnn_sv = 'ecapa-tdnn-sv'
     campplus_sv = 'cam++-sv'
+    eres2net_sv = 'eres2net-sv'
+    scl_sd = 'scl-sd'
+    rdino_tdnn_sv = 'rdino_ecapa-tdnn-sv'
     generic_lm = 'generic-lm'
 
     # multi-modal models
@@ -205,6 +203,8 @@ class Models(object):
     hitea = 'hitea'
     soonet = 'soonet'
     efficient_diffusion_tuning = 'efficient-diffusion-tuning'
+    mplug_owl = 'mplug-owl'
+    clip_interrogator = 'clip-interrogator'
 
     # science models
     unifold = 'unifold'
@@ -255,6 +255,7 @@ class Pipelines(object):
     should use task name for this pipeline.
         For pipeline which suuport only one model, we should use ${Model}-${Task} as its name.
     """
+    pipeline_template = 'pipeline-template'
     # vision tasks
     portrait_matting = 'unet-image-matting'
     universal_matting = 'unet-universal-matting'
@@ -277,8 +278,6 @@ class Pipelines(object):
     tbs_detection = 'tbs-detection'
     object_detection = 'vit-object-detection'
     abnormal_object_detection = 'abnormal-object-detection'
-    easycv_detection = 'easycv-detection'
-    easycv_segmentation = 'easycv-segmentation'
     face_2d_keypoints = 'mobilenet_face-2d-keypoints_alignment'
     salient_detection = 'u2net-salient-detection'
     salient_boudary_detection = 'res2net-salient-detection'
@@ -347,7 +346,6 @@ class Pipelines(object):
     video_single_object_tracking_procontext = 'procontext-vitb-video-single-object-tracking'
     video_multi_object_tracking = 'video-multi-object-tracking'
     image_panoptic_segmentation = 'image-panoptic-segmentation'
-    image_panoptic_segmentation_easycv = 'image-panoptic-segmentation-easycv'
     video_summarization = 'googlenet_pgl_video_summarization'
     language_guided_video_summarization = 'clip-it-video-summarization'
     image_semantic_segmentation = 'image-semantic-segmentation'
@@ -402,7 +400,7 @@ class Pipelines(object):
     nerf_recon_acc = 'nerf-recon-acc'
     bad_image_detecting = 'bad-image-detecting'
     controllable_image_generation = 'controllable-image-generation'
-
+    fast_instance_segmentation = 'fast-instance-segmentation'
     image_quality_assessment_mos = 'image-quality-assessment-mos'
     image_quality_assessment_man = 'image-quality-assessment-man'
     image_quality_assessment_degradation = 'image-quality-assessment-degradation'
@@ -485,6 +483,9 @@ class Pipelines(object):
     speaker_diarization_inference = 'speaker-diarization-inference'
     vad_inference = 'vad-inference'
     speaker_verification = 'speaker-verification'
+    speaker_verification_rdino = 'speaker-verification-rdino'
+    speaker_verification_eres2net = 'speaker-verification-eres2net'
+    speaker_change_locating = 'speaker-change-locating'
     lm_inference = 'language-score-prediction'
     speech_timestamp_inference = 'speech-timestamp-inference'
 
@@ -514,6 +515,7 @@ class Pipelines(object):
     gridvlp_multi_modal_embedding = 'gridvlp-multi-modal-embedding'
     soonet_video_temporal_grounding = 'soonet-video-temporal-grounding'
     efficient_diffusion_tuning = 'efficient-diffusion-tuning'
+    multimodal_dialogue = 'multimodal-dialogue'
 
     # science tasks
     protein_structure = 'unifold-protein-structure'
@@ -881,6 +883,7 @@ class NLPTrainers(object):
     document_grounded_dialog_rerank_trainer = 'document-grounded-dialog-rerank-trainer'
     document_grounded_dialog_retrieval_trainer = 'document-grounded-dialog-retrieval-trainer'
     siamese_uie_trainer = 'siamese-uie-trainer'
+    translation_evaluation_trainer = 'translation-evaluation-trainer'
 
 
 class MultiModalTrainers(object):
@@ -911,7 +914,6 @@ class Trainers(CVTrainers, NLPTrainers, MultiModalTrainers, AudioTrainers):
     """
 
     default = 'trainer'
-    easycv = 'easycv'
     tinynas_damoyolo = 'tinynas-damoyolo'
 
     @staticmethod
@@ -933,8 +935,6 @@ class Trainers(CVTrainers, NLPTrainers, MultiModalTrainers, AudioTrainers):
             return Fields.multi_modal
         elif attribute_or_value == Trainers.default:
             return Trainers.default
-        elif attribute_or_value == Trainers.easycv:
-            return Trainers.easycv
         else:
             return 'unknown'
 
@@ -1034,6 +1034,8 @@ class Preprocessors(object):
     vldoc_preprocessor = 'vldoc-preprocessor'
     hitea_tasks_preprocessor = 'hitea-tasks-preprocessor'
     diffusion_image_generation_preprocessor = 'diffusion-image-generation-preprocessor'
+    mplug_owl_preprocessor = 'mplug-owl-preprocessor'
+    image_captioning_clip_interrogator_preprocessor = 'image-captioning-clip-interrogator-preprocessor'
 
     # science preprocessor
     unifold_preprocessor = 'unifold-preprocessor'
@@ -1098,6 +1100,8 @@ class Metrics(object):
     # metric for image-colorization task
     image_colorization_metric = 'image-colorization-metric'
     ocr_recognition_metric = 'ocr-recognition-metric'
+    # metric for translation evaluation
+    translation_evaluation_metric = 'translation-evaluation-metric'
 
 
 class Optimizers(object):
@@ -1165,14 +1169,6 @@ class LR_Schedulers(object):
 class CustomDatasets(object):
     """ Names for different datasets.
     """
-    ClsDataset = 'ClsDataset'
-    Face2dKeypointsDataset = 'FaceKeypointDataset'
-    HandCocoWholeBodyDataset = 'HandCocoWholeBodyDataset'
-    HumanWholeBodyKeypointDataset = 'WholeBodyCocoTopDownDataset'
-    SegDataset = 'SegDataset'
-    DetDataset = 'DetDataset'
-    DetImagesMixDataset = 'DetImagesMixDataset'
-    PanopticDataset = 'PanopticDataset'
     PairedDataset = 'PairedDataset'
     SiddDataset = 'SiddDataset'
     GoproDataset = 'GoproDataset'
diff --git a/modelscope/metrics/__init__.py b/modelscope/metrics/__init__.py
index 17767001..6f5dfbde 100644
--- a/modelscope/metrics/__init__.py
+++ b/modelscope/metrics/__init__.py
@@ -31,6 +31,7 @@ if TYPE_CHECKING:
     from .loss_metric import LossMetric
     from .image_colorization_metric import ImageColorizationMetric
     from .ocr_recognition_metric import OCRRecognitionMetric
+    from .translation_evaluation_metric import TranslationEvaluationMetric
 else:
     _import_structure = {
         'audio_noise_metric': ['AudioNoiseMetric'],
@@ -62,7 +63,8 @@ else:
         'text_ranking_metric': ['TextRankingMetric'],
         'loss_metric': ['LossMetric'],
         'image_colorization_metric': ['ImageColorizationMetric'],
-        'ocr_recognition_metric': ['OCRRecognitionMetric']
+        'ocr_recognition_metric': ['OCRRecognitionMetric'],
+        'translation_evaluation_metric': ['TranslationEvaluationMetric']
     }
 
     import sys
diff --git a/modelscope/metrics/builder.py b/modelscope/metrics/builder.py
index 2bc756e6..43aaea14 100644
--- a/modelscope/metrics/builder.py
+++ b/modelscope/metrics/builder.py
@@ -42,6 +42,7 @@ class MetricKeys(object):
     NDCG = 'ndcg'
     AR = 'AR'
     Colorfulness = 'colorfulness'
+    Kendall_Tau_Correlation = 'kendall_tau_correlation'
 
 
 task_default_metrics = {
@@ -76,6 +77,7 @@ task_default_metrics = {
     Tasks.bad_image_detecting: [Metrics.accuracy],
     Tasks.ocr_recognition: [Metrics.ocr_recognition_metric],
     Tasks.efficient_diffusion_tuning: [Metrics.loss_metric],
+    Tasks.translation_evaluation: [Metrics.translation_evaluation_metric]
 }
 
 
diff --git a/modelscope/metrics/translation_evaluation_metric.py b/modelscope/metrics/translation_evaluation_metric.py
new file mode 100644
index 00000000..81705d3b
--- /dev/null
+++ b/modelscope/metrics/translation_evaluation_metric.py
@@ -0,0 +1,174 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import importlib
+from typing import Dict, List, Union
+
+from pandas import DataFrame
+
+from modelscope.metainfo import Metrics
+from modelscope.metrics.base import Metric
+from modelscope.metrics.builder import METRICS, MetricKeys
+from modelscope.models.nlp.unite.configuration import InputFormat
+from modelscope.utils.logger import get_logger
+from modelscope.utils.registry import default_group
+
+logger = get_logger()
+
+
+@METRICS.register_module(
+    group_key=default_group, module_name=Metrics.translation_evaluation_metric)
+class TranslationEvaluationMetric(Metric):
+    r"""The metric class for translation evaluation.
+
+    """
+
+    def __init__(self, gap_threshold: float = 25.0):
+        r"""Build a translation evaluation metric, following the designed
+            Kendall's tau correlation from WMT Metrics Shared Task competitions.
+
+            Args:
+                gap_threshold: The score gap denoting the available hypothesis pair.
+
+            Returns:
+                A metric for translation evaluation.
+        """
+        self.gap_threshold = gap_threshold
+
+        self.lp = list()
+        self.segment_id = list()
+        self.raw_score = list()
+        self.score = list()
+        self.input_format = list()
+
+    def clear(self) -> None:
+        r"""Clear all the stored variables.
+        """
+        self.lp.clear()
+        self.segment_id.clear()
+        self.raw_score.clear()
+        self.input_format.clear()
+
+        self.score.clear()
+
+        return
+
+    def add(self, outputs: Dict[str, List[float]],
+            inputs: Dict[str, List[Union[float, int]]]) -> None:
+        r"""Collect the related results for processing.
+
+            Args:
+                outputs: Dict containing 'scores'
+                inputs: Dict containing 'labels' and 'segment_ids'
+
+        """
+
+        self.lp += inputs['lp']
+        self.segment_id += inputs['segment_id']
+        self.raw_score += inputs['raw_score']
+        self.input_format += inputs['input_format']
+
+        self.score += outputs['score']
+
+        return
+
+    def evaluate(self) -> Dict[str, Dict[str, float]]:
+        r"""Compute the Kendall's tau correlation.
+
+            Returns:
+                A dict denoting Kendall's tau correlation.
+
+        """
+
+        data = {
+            'lp': self.lp,
+            'segment_id': self.segment_id,
+            'raw_score': self.raw_score,
+            'input_format': self.input_format,
+            'score': self.score
+        }
+        data = DataFrame(data=data)
+        correlation = dict()
+
+        for input_format in data.input_format.unique():
+            logger.info('Evaluation results for %s input format'
+                        % input_format.value)
+            input_format_data = data[data.input_format == input_format]
+
+            temp_correlation = dict()
+
+            for lp in sorted(input_format_data.lp.unique()):
+                sub_data = input_format_data[input_format_data.lp == lp]
+                temp_correlation[input_format.value + '_'
+                                 + lp] = self.compute_kendall_tau(sub_data)
+                logger.info(
+                    '\t%s: %f' %
+                    (lp,
+                     temp_correlation[input_format.value + '_' + lp] * 100))
+
+            avg_correlation = sum(
+                temp_correlation.values()) / len(temp_correlation)
+            correlation[input_format.value + '_avg'] = avg_correlation
+            logger.info('Average evaluation result for %s input format: %f' %
+                        (input_format.value, avg_correlation))
+            logger.info('')
+            correlation.update(temp_correlation)
+
+        return correlation
+
+    def merge(self, other: 'TranslationEvaluationMetric') -> None:
+        r"""Merge the predictions from other TranslationEvaluationMetric objects.
+
+            Args:
+                other: Another TranslationEvaluationMetric object.
+
+        """
+
+        self.lp += other.lp
+        self.segment_id += other.segment_ids
+        self.raw_score += other.raw_score
+        self.input_format += other.input_format
+
+        self.score += other.score
+
+        return
+
+    def compute_kendall_tau(self, csv_data: DataFrame) -> float:
+        r"""Compute kendall's tau correlation.
+
+            Args:
+                csv_data: The pandas dataframe.
+
+            Returns:
+                float: THe kendall's Tau correlation.
+
+        """
+        concor = discor = 0
+
+        for segment_id in sorted(csv_data.segment_id.unique()):
+            group_csv_data = csv_data[csv_data.segment_id == segment_id]
+
+            examples = group_csv_data.to_dict('records')
+
+            for i in range(0, len(examples)):
+                for j in range(i + 1, len(examples)):
+                    if self.raw_score[i] - self.raw_score[
+                            j] >= self.gap_threshold:
+                        if self.score[i] > self.score[j]:
+                            concor += 1
+                        elif self.score[i] < self.score[j]:
+                            discor += 1
+                    elif self.raw_score[i] - self.raw_score[
+                            j] <= -self.gap_threshold:
+                        if self.score[i] < self.score[j]:
+                            concor += 1
+                        elif self.score[i] > self.score[j]:
+                            discor += 1
+
+        if concor + discor == 0:
+            logger.warning(
+                'We don\'t have available pairs when evaluation. '
+                'Marking the kendall tau correlation as the lowest value (-1.0).'
+            )
+            return -1.0
+        else:
+            return (concor - discor) / (concor + discor)
diff --git a/modelscope/models/audio/ans/conv_stft.py b/modelscope/models/audio/ans/conv_stft.py
index 4b393a4c..3d37f1aa 100644
--- a/modelscope/models/audio/ans/conv_stft.py
+++ b/modelscope/models/audio/ans/conv_stft.py
@@ -39,7 +39,7 @@ class ConvSTFT(nn.Module):
         super(ConvSTFT, self).__init__()
 
         if fft_len is None:
-            self.fft_len = np.int(2**np.ceil(np.log2(win_len)))
+            self.fft_len = int(2**np.ceil(np.log2(win_len)))
         else:
             self.fft_len = fft_len
 
@@ -78,7 +78,7 @@ class ConviSTFT(nn.Module):
                  fix=True):
         super(ConviSTFT, self).__init__()
         if fft_len is None:
-            self.fft_len = np.int(2**np.ceil(np.log2(win_len)))
+            self.fft_len = int(2**np.ceil(np.log2(win_len)))
         else:
             self.fft_len = fft_len
         kernel, window = init_kernels(
diff --git a/modelscope/models/audio/asr/generic_automatic_speech_recognition.py b/modelscope/models/audio/asr/generic_automatic_speech_recognition.py
index 25de839e..8dd11982 100644
--- a/modelscope/models/audio/asr/generic_automatic_speech_recognition.py
+++ b/modelscope/models/audio/asr/generic_automatic_speech_recognition.py
@@ -45,27 +45,5 @@ class GenericAutomaticSpeechRecognition(Model):
     def forward(self) -> Dict[str, Any]:
         """preload model and return the info of the model
         """
-        if self.model_cfg['model_config']['type'] == Frameworks.tf:
-            from easyasr import asr_inference_paraformer_tf
-            if hasattr(asr_inference_paraformer_tf, 'preload'):
-                model_workspace = self.model_cfg['model_workspace']
-                model_path = os.path.join(model_workspace,
-                                          self.model_cfg['am_model'])
-                vocab_path = os.path.join(
-                    model_workspace,
-                    self.model_cfg['model_config']['vocab_file'])
-                sampled_ids = 'seq2seq/sampled_ids'
-                sampled_lengths = 'seq2seq/sampled_lengths'
-                if 'sampled_ids' in self.model_cfg['model_config']:
-                    sampled_ids = self.model_cfg['model_config']['sampled_ids']
-                if 'sampled_lengths' in self.model_cfg['model_config']:
-                    sampled_lengths = self.model_cfg['model_config'][
-                        'sampled_lengths']
-                asr_inference_paraformer_tf.preload(
-                    ngpu=1,
-                    asr_model_file=model_path,
-                    vocab_file=vocab_path,
-                    sampled_ids=sampled_ids,
-                    sampled_lengths=sampled_lengths)
 
         return self.model_cfg
diff --git a/modelscope/models/audio/kws/farfield/fsmn_sele_v3.py b/modelscope/models/audio/kws/farfield/fsmn_sele_v3.py
new file mode 100644
index 00000000..d57354d0
--- /dev/null
+++ b/modelscope/models/audio/kws/farfield/fsmn_sele_v3.py
@@ -0,0 +1,233 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .fsmn import AffineTransform, Fsmn, LinearTransform, RectifiedLinear
+from .model_def import HEADER_BLOCK_SIZE, ActivationType, LayerType, f32ToI32
+
+
+class DFSMNUnit(nn.Module):
+    """ one multi-channel deep fsmn unit
+    Args:
+        dimin:                  input dimension
+        dimexpand:              feature expansion dimension
+        dimout:                 output dimension
+        lorder:                 left ofder
+        rorder:                 right order
+    """
+
+    def __init__(self,
+                 dimin=64,
+                 dimexpand=128,
+                 dimout=64,
+                 lorder=10,
+                 rorder=1):
+        super(DFSMNUnit, self).__init__()
+
+        self.expand = AffineTransform(dimin, dimexpand)
+        self.shrink = LinearTransform(dimexpand, dimout)
+        self.fsmn = Fsmn(dimout, dimout, lorder, rorder, 1, 1)
+
+        self.debug = False
+        self.dataout = None
+
+    def forward(self, input):
+        """
+        Args:
+            input: [batch, time, feature]
+        """
+        out1 = F.relu(self.expand(input))
+        out2 = self.shrink(out1)
+        out3 = self.fsmn(out2)
+
+        # add skip connection for matched data
+        if input.shape[-1] == out3.shape[-1]:
+            out3 = input + out3
+        if self.debug:
+            self.dataout = out3
+        return out3
+
+    def print_model(self):
+        self.expand.printModel()
+        self.shrink.printModel()
+        self.fsmn.printModel()
+
+    def to_kaldi_nnet(self):
+        re_str = self.expand.toKaldiNNet()
+        relu = RectifiedLinear(self.expand.linear.out_features,
+                               self.expand.linear.out_features)
+        re_str += relu.toKaldiNNet()
+        re_str = self.shrink.toKaldiNNet()
+        re_str += self.fsmn.toKaldiNNet()
+        return re_str
+
+
+class FSMNSeleNetV3(nn.Module):
+    """ Deep FSMN model with channel selection performs multi-channel kws.
+    Zhang, Shiliang, et al. "Deep-FSMN for large vocabulary continuous speech
+    recognition." 2018 IEEE International Conference on Acoustics, Speech and
+    Signal Processing (ICASSP). IEEE, 2018.
+
+    Args:
+        input_dim:              input dimension
+        linear_dim:             fsmn input dimension
+        proj_dim:               fsmn projection dimension
+        lorder:                 fsmn left order
+        rorder:                 fsmn right order
+        num_syn:                output dimension
+        fsmn_layers:            no. of fsmn units
+    """
+
+    def __init__(self,
+                 input_dim=120,
+                 linear_dim=128,
+                 proj_dim=64,
+                 lorder=10,
+                 rorder=1,
+                 num_syn=5,
+                 fsmn_layers=5):
+        super(FSMNSeleNetV3, self).__init__()
+
+        self.mem = []
+        # the first unit, mapping input dim to proj dim
+        unit = DFSMNUnit(input_dim, linear_dim, proj_dim, lorder, rorder)
+        self.mem.append(unit)
+        self.add_module('mem_{:d}'.format(0), unit)
+
+        # deep fsmn layers with skip connection
+        for i in range(1, fsmn_layers):
+            unit = DFSMNUnit(proj_dim, linear_dim, proj_dim, lorder, rorder)
+            self.mem.append(unit)
+            self.add_module('mem_{:d}'.format(i), unit)
+
+        self.expand2 = AffineTransform(proj_dim, linear_dim)
+        self.decision = AffineTransform(linear_dim, num_syn)
+
+    def forward(self, input):
+        # multi-channel temp space, [batch, time, channel, feature]
+        if torch.cuda.is_available():
+            x = torch.zeros(input.shape[0], input.shape[1], input.shape[2],
+                            self.expand2.linear.out_features).cuda()
+        else:
+            x = torch.zeros(input.shape[0], input.shape[1], input.shape[2],
+                            self.expand2.linear.out_features)
+
+        for n in range(input.shape[2]):
+            chin = input[:, :, n, :]
+
+            for unit in self.mem:
+                chout = unit(chin)
+                chin = chout
+
+            x[:, :, n, :] = F.relu(self.expand2(chout))
+
+        # perform max pooling
+        pool = nn.MaxPool2d((x.shape[2], 1), stride=(x.shape[2], 1))
+        y = pool(x)
+
+        # remove channel dimension
+        y = torch.squeeze(y, -2)
+        z = self.decision(y)
+
+        return z
+
+    def print_model(self):
+        for unit in self.mem:
+            unit.print_model()
+
+        self.expand2.printModel()
+        self.decision.printModel()
+
+    def print_header(self):
+        """ get DFSMN params
+        """
+        input_dim = self.mem[0].expand.linear.in_features
+        linear_dim = self.mem[0].expand.linear.out_features
+        proj_dim = self.mem[0].shrink.linear.out_features
+        lorder = self.mem[0].fsmn.conv_left.kernel_size[0]
+        rorder = 0
+        if self.mem[0].fsmn.conv_right is not None:
+            rorder = self.mem[0].fsmn.conv_right.kernel_size[0]
+
+        num_syn = self.decision.linear.out_features
+        fsmn_layers = len(self.mem)
+
+        # no. of output channels, 0.0 means the same as numins
+        numouts = 1.0
+
+        #
+        # write total header
+        #
+        header = [0.0] * HEADER_BLOCK_SIZE * 5
+        # numins
+        header[0] = 0.0
+        # numouts
+        header[1] = numouts
+        # dimins
+        header[2] = input_dim
+        # dimouts
+        header[3] = num_syn
+        # numlayers
+        header[4] = 4
+
+        #
+        # write each layer's header
+        #
+        hidx = 1
+
+        header[HEADER_BLOCK_SIZE * hidx + 0] = float(
+            LayerType.LAYER_DFSMN.value)
+        header[HEADER_BLOCK_SIZE * hidx + 1] = 0.0
+        header[HEADER_BLOCK_SIZE * hidx + 2] = input_dim
+        header[HEADER_BLOCK_SIZE * hidx + 3] = linear_dim
+        header[HEADER_BLOCK_SIZE * hidx + 4] = proj_dim
+        header[HEADER_BLOCK_SIZE * hidx + 5] = lorder
+        header[HEADER_BLOCK_SIZE * hidx + 6] = rorder
+        header[HEADER_BLOCK_SIZE * hidx + 7] = fsmn_layers
+        hidx += 1
+
+        header[HEADER_BLOCK_SIZE * hidx + 0] = float(
+            LayerType.LAYER_DENSE.value)
+        header[HEADER_BLOCK_SIZE * hidx + 1] = 0.0
+        header[HEADER_BLOCK_SIZE * hidx + 2] = proj_dim
+        header[HEADER_BLOCK_SIZE * hidx + 3] = linear_dim
+        header[HEADER_BLOCK_SIZE * hidx + 4] = 1.0
+        header[HEADER_BLOCK_SIZE * hidx + 5] = float(
+            ActivationType.ACTIVATION_RELU.value)
+        hidx += 1
+
+        header[HEADER_BLOCK_SIZE * hidx + 0] = float(
+            LayerType.LAYER_MAX_POOLING.value)
+        header[HEADER_BLOCK_SIZE * hidx + 1] = 0.0
+        header[HEADER_BLOCK_SIZE * hidx + 2] = linear_dim
+        hidx += 1
+
+        header[HEADER_BLOCK_SIZE * hidx + 0] = float(
+            LayerType.LAYER_DENSE.value)
+        header[HEADER_BLOCK_SIZE * hidx + 1] = numouts
+        header[HEADER_BLOCK_SIZE * hidx + 2] = linear_dim
+        header[HEADER_BLOCK_SIZE * hidx + 3] = num_syn
+        header[HEADER_BLOCK_SIZE * hidx + 4] = 1.0
+        header[HEADER_BLOCK_SIZE * hidx + 5] = float(
+            ActivationType.ACTIVATION_SOFTMAX.value)
+
+        for h in header:
+            print(f32ToI32(h))
+
+    def to_kaldi_nnet(self):
+        re_str = '<Nnet>\n'
+        for unit in self.mem:
+            re_str += unit.to_kaldi_nnet()
+        re_str = self.expand2.toKaldiNNet()
+        relu = RectifiedLinear(self.expand2.linear.out_features,
+                               self.expand2.linear.out_features)
+        re_str += relu.toKaldiNNet()
+        re_str += self.decision.toKaldiNNet()
+        re_str += '<Softmax> %d %d\n' % (self.decision.linear.out_features,
+                                         self.decision.linear.out_features)
+        re_str += '<!EndOfComponent>\n'
+        re_str += '</Nnet>\n'
+
+        return re_str
diff --git a/modelscope/models/audio/kws/farfield/model.py b/modelscope/models/audio/kws/farfield/model.py
index fff88805..670ac97c 100644
--- a/modelscope/models/audio/kws/farfield/model.py
+++ b/modelscope/models/audio/kws/farfield/model.py
@@ -11,6 +11,7 @@ from modelscope.models.builder import MODELS
 from modelscope.utils.audio.audio_utils import update_conf
 from modelscope.utils.constant import Tasks
 from .fsmn_sele_v2 import FSMNSeleNetV2
+from .fsmn_sele_v3 import FSMNSeleNetV3
 
 
 @MODELS.register_module(
@@ -18,6 +19,7 @@ from .fsmn_sele_v2 import FSMNSeleNetV2
 class FSMNSeleNetV2Decorator(TorchModel):
     r""" A decorator of FSMNSeleNetV2 for integrating into modelscope framework """
 
+    MODEL_CLASS = FSMNSeleNetV2
     MODEL_TXT = 'model.txt'
     SC_CONFIG = 'sound_connect.conf'
 
@@ -33,7 +35,7 @@ class FSMNSeleNetV2Decorator(TorchModel):
         """
         super().__init__(model_dir, *args, **kwargs)
         if training:
-            self.model = FSMNSeleNetV2(*args, **kwargs)
+            self.model = self.MODEL_CLASS(*args, **kwargs)
         else:
             sc_config_file = os.path.join(model_dir, self.SC_CONFIG)
             model_txt_file = os.path.join(model_dir, self.MODEL_TXT)
@@ -42,7 +44,7 @@ class FSMNSeleNetV2Decorator(TorchModel):
 
             self._sc = None
             if os.path.exists(model_txt_file):
-                conf_dict = dict(mode=56542, kws_model=model_txt_file)
+                conf_dict = dict(kws_model=model_txt_file)
                 update_conf(sc_config_file, new_config_file, conf_dict)
                 import py_sound_connect
                 self._sc = py_sound_connect.SoundConnect(new_config_file)
@@ -50,8 +52,8 @@ class FSMNSeleNetV2Decorator(TorchModel):
                 self.size_out = self._sc.bytesPerBlockOut()
             else:
                 raise Exception(
-                    f'Invalid model directory! Failed to load model file: {model_txt_file}.'
-                )
+                    f'Invalid model directory! Failed to load model file:'
+                    f' {model_txt_file}.')
 
     def __del__(self):
         if hasattr(self, 'tmp_dir'):
@@ -73,3 +75,24 @@ class FSMNSeleNetV2Decorator(TorchModel):
                 'confidence': self._sc.kwsConfidence()
             }
         return result
+
+
+@MODELS.register_module(
+    Tasks.keyword_spotting,
+    module_name=Models.speech_dfsmn_kws_char_farfield_iot)
+class FSMNSeleNetV3Decorator(FSMNSeleNetV2Decorator):
+    r""" A decorator of FSMNSeleNetV3 for integrating into modelscope framework """
+
+    MODEL_CLASS = FSMNSeleNetV3
+
+    def __init__(self,
+                 model_dir: str,
+                 training: Optional[bool] = False,
+                 *args,
+                 **kwargs):
+        """initialize the dfsmn model from the `model_dir` path.
+
+        Args:
+            model_dir (str): the model path.
+        """
+        super().__init__(model_dir, training, *args, **kwargs)
diff --git a/modelscope/models/audio/sv/DTDNN.py b/modelscope/models/audio/sv/DTDNN.py
index d9e21ce8..d86d6799 100644
--- a/modelscope/models/audio/sv/DTDNN.py
+++ b/modelscope/models/audio/sv/DTDNN.py
@@ -76,11 +76,13 @@ class CAMPPlus(nn.Module):
                  bn_size=4,
                  init_channels=128,
                  config_str='batchnorm-relu',
-                 memory_efficient=True):
+                 memory_efficient=True,
+                 output_level='segment'):
         super(CAMPPlus, self).__init__()
 
         self.head = FCM(feat_dim=feat_dim)
         channels = self.head.out_channels
+        self.output_level = output_level
 
         self.xvector = nn.Sequential(
             OrderedDict([
@@ -118,10 +120,14 @@ class CAMPPlus(nn.Module):
         self.xvector.add_module('out_nonlinear',
                                 get_nonlinear(config_str, channels))
 
-        self.xvector.add_module('stats', StatsPool())
-        self.xvector.add_module(
-            'dense',
-            DenseLayer(channels * 2, embedding_size, config_str='batchnorm_'))
+        if self.output_level == 'segment':
+            self.xvector.add_module('stats', StatsPool())
+            self.xvector.add_module(
+                'dense',
+                DenseLayer(
+                    channels * 2, embedding_size, config_str='batchnorm_'))
+        else:
+            assert self.output_level == 'frame', '`output_level` should be set to \'segment\' or \'frame\'. '
 
         for m in self.modules():
             if isinstance(m, (nn.Conv1d, nn.Linear)):
@@ -133,6 +139,8 @@ class CAMPPlus(nn.Module):
         x = x.permute(0, 2, 1)  # (B,T,F) => (B,F,T)
         x = self.head(x)
         x = self.xvector(x)
+        if self.output_level == 'frame':
+            x = x.transpose(1, 2)
         return x
 
 
diff --git a/modelscope/models/audio/sv/ERes2Net.py b/modelscope/models/audio/sv/ERes2Net.py
new file mode 100644
index 00000000..615be064
--- /dev/null
+++ b/modelscope/models/audio/sv/ERes2Net.py
@@ -0,0 +1,344 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+""" Res2Net implementation is adapted from https://github.com/wenet-e2e/wespeaker.
+    ERes2Net incorporates both local and global feature fusion techniques to improve the performance. The local feature
+    fusion (LFF) fuses the features within one single residual block to extract the local signal.
+    The global feature fusion (GFF) takes acoustic features of different scales as input to aggregate global signal.
+"""
+import math
+import os
+from typing import Any, Dict, Union
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchaudio.compliance.kaldi as Kaldi
+
+import modelscope.models.audio.sv.pooling_layers as pooling_layers
+from modelscope.metainfo import Models
+from modelscope.models import MODELS, TorchModel
+from modelscope.models.audio.sv.fusion import AFF
+from modelscope.utils.constant import Tasks
+
+
+class ReLU(nn.Hardtanh):
+
+    def __init__(self, inplace=False):
+        super(ReLU, self).__init__(0, 20, inplace)
+
+    def __repr__(self):
+        inplace_str = 'inplace' if self.inplace else ''
+        return self.__class__.__name__ + ' (' \
+            + inplace_str + ')'
+
+
+def conv1x1(in_planes, out_planes, stride=1):
+    '1x1 convolution without padding'
+    return nn.Conv2d(
+        in_planes,
+        out_planes,
+        kernel_size=1,
+        stride=stride,
+        padding=0,
+        bias=False)
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    '3x3 convolution with padding'
+    return nn.Conv2d(
+        in_planes,
+        out_planes,
+        kernel_size=3,
+        stride=stride,
+        padding=1,
+        bias=False)
+
+
+class BasicBlockRes2Net(nn.Module):
+    expansion = 2
+
+    def __init__(self, in_planes, planes, stride=1, baseWidth=32, scale=2):
+        super(BasicBlockRes2Net, self).__init__()
+        width = int(math.floor(planes * (baseWidth / 64.0)))
+        self.conv1 = conv1x1(in_planes, width * scale, stride)
+        self.bn1 = nn.BatchNorm2d(width * scale)
+        self.nums = scale
+
+        convs = []
+        bns = []
+        for i in range(self.nums):
+            convs.append(conv3x3(width, width))
+            bns.append(nn.BatchNorm2d(width))
+        self.convs = nn.ModuleList(convs)
+        self.bns = nn.ModuleList(bns)
+        self.relu = ReLU(inplace=True)
+
+        self.conv3 = conv1x1(width * scale, planes * self.expansion)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion * planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(
+                    in_planes,
+                    self.expansion * planes,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False), nn.BatchNorm2d(self.expansion * planes))
+        self.stride = stride
+        self.width = width
+        self.scale = scale
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        spx = torch.split(out, self.width, 1)
+        for i in range(self.nums):
+            if i == 0:
+                sp = spx[i]
+            else:
+                sp = sp + spx[i]
+            sp = self.convs[i](sp)
+            sp = self.relu(self.bns[i](sp))
+            if i == 0:
+                out = sp
+            else:
+                out = torch.cat((out, sp), 1)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        residual = self.shortcut(x)
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class BasicBlockRes2Net_diff_AFF(nn.Module):
+    expansion = 2
+
+    def __init__(self, in_planes, planes, stride=1, baseWidth=32, scale=2):
+        super(BasicBlockRes2Net_diff_AFF, self).__init__()
+        width = int(math.floor(planes * (baseWidth / 64.0)))
+        self.conv1 = conv1x1(in_planes, width * scale, stride)
+        self.bn1 = nn.BatchNorm2d(width * scale)
+        self.nums = scale
+
+        convs = []
+        fuse_models = []
+        bns = []
+        for i in range(self.nums):
+            convs.append(conv3x3(width, width))
+            bns.append(nn.BatchNorm2d(width))
+        for j in range(self.nums - 1):
+            fuse_models.append(AFF(channels=width))
+
+        self.convs = nn.ModuleList(convs)
+        self.bns = nn.ModuleList(bns)
+        self.fuse_models = nn.ModuleList(fuse_models)
+        self.relu = ReLU(inplace=True)
+
+        self.conv3 = conv1x1(width * scale, planes * self.expansion)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion * planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(
+                    in_planes,
+                    self.expansion * planes,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False), nn.BatchNorm2d(self.expansion * planes))
+        self.stride = stride
+        self.width = width
+        self.scale = scale
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        spx = torch.split(out, self.width, 1)
+        for i in range(self.nums):
+            if i == 0:
+                sp = spx[i]
+            else:
+                sp = self.fuse_models[i - 1](sp, spx[i])
+
+            sp = self.convs[i](sp)
+            sp = self.relu(self.bns[i](sp))
+            if i == 0:
+                out = sp
+            else:
+                out = torch.cat((out, sp), 1)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        residual = self.shortcut(x)
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class ERes2Net(nn.Module):
+
+    def __init__(self,
+                 block=BasicBlockRes2Net,
+                 block_fuse=BasicBlockRes2Net_diff_AFF,
+                 num_blocks=[3, 4, 6, 3],
+                 m_channels=32,
+                 feat_dim=80,
+                 embed_dim=192,
+                 pooling_func='TSTP',
+                 two_emb_layer=False):
+        super(ERes2Net, self).__init__()
+        self.in_planes = m_channels
+        self.feat_dim = feat_dim
+        self.embed_dim = embed_dim
+        self.stats_dim = int(feat_dim / 8) * m_channels * 8
+        self.two_emb_layer = two_emb_layer
+
+        self.conv1 = nn.Conv2d(
+            1, m_channels, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(m_channels)
+        self.layer1 = self._make_layer(
+            block, m_channels, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(
+            block, m_channels * 2, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(
+            block_fuse, m_channels * 4, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(
+            block_fuse, m_channels * 8, num_blocks[3], stride=2)
+
+        # downsampling
+        self.layer1_downsample = nn.Conv2d(
+            m_channels * 2,
+            m_channels * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            bias=False)
+        self.layer2_downsample = nn.Conv2d(
+            m_channels * 4,
+            m_channels * 8,
+            kernel_size=3,
+            padding=1,
+            stride=2,
+            bias=False)
+        self.layer3_downsample = nn.Conv2d(
+            m_channels * 8,
+            m_channels * 16,
+            kernel_size=3,
+            padding=1,
+            stride=2,
+            bias=False)
+
+        # bottom-up fusion
+        self.fuse_mode12 = AFF(channels=m_channels * 4)
+        self.fuse_mode123 = AFF(channels=m_channels * 8)
+        self.fuse_mode1234 = AFF(channels=m_channels * 16)
+
+        self.n_stats = 1 if pooling_func == 'TAP' or pooling_func == 'TSDP' else 2
+        self.pool = getattr(pooling_layers, pooling_func)(
+            in_dim=self.stats_dim * block.expansion)
+        self.seg_1 = nn.Linear(self.stats_dim * block.expansion * self.n_stats,
+                               embed_dim)
+        if self.two_emb_layer:
+            self.seg_bn_1 = nn.BatchNorm1d(embed_dim, affine=False)
+            self.seg_2 = nn.Linear(embed_dim, embed_dim)
+        else:
+            self.seg_bn_1 = nn.Identity()
+            self.seg_2 = nn.Identity()
+
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1] * (num_blocks - 1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = x.permute(0, 2, 1)
+
+        x = x.unsqueeze_(1)
+        out = F.relu(self.bn1(self.conv1(x)))
+        out1 = self.layer1(out)
+
+        # bottom-up fusion
+        out2 = self.layer2(out1)
+        out1_downsample = self.layer1_downsample(out1)
+        fuse_out12 = self.fuse_mode12(out2, out1_downsample)
+
+        out3 = self.layer3(out2)
+        fuse_out12_downsample = self.layer2_downsample(fuse_out12)
+        fuse_out123 = self.fuse_mode123(out3, fuse_out12_downsample)
+
+        out4 = self.layer4(out3)
+        fuse_out123_downsample = self.layer3_downsample(fuse_out123)
+        fuse_out1234 = self.fuse_mode1234(out4, fuse_out123_downsample)
+        stats = self.pool(fuse_out1234)
+
+        embed_a = self.seg_1(stats)
+        if self.two_emb_layer:
+            out = F.relu(embed_a)
+            out = self.seg_bn_1(out)
+            embed_b = self.seg_2(out)
+            return embed_b
+        else:
+            return embed_a
+
+
+@MODELS.register_module(
+    Tasks.speaker_verification, module_name=Models.eres2net_sv)
+class SpeakerVerificationERes2Net(TorchModel):
+    r"""Enhanced Res2Net architecture with local and global feature fusion. ERes2Net is mainly composed
+    of LFF and GFF. The LFF extracts localization-preserved speaker features and strengthen the local information
+    interaction. GFF fuses multi-scale feature maps in bottom-up pathway to obtain global information.
+    Args:
+        model_dir: A model dir.
+        model_config: The model config.
+    """
+
+    def __init__(self, model_dir, model_config: Dict[str, Any], *args,
+                 **kwargs):
+        super().__init__(model_dir, model_config, *args, **kwargs)
+        self.model_config = model_config
+        self.other_config = kwargs
+        self.feature_dim = 80
+
+        self.embedding_model = ERes2Net()
+
+        pretrained_model_name = kwargs['pretrained_model']
+        self.__load_check_point(pretrained_model_name)
+
+        self.embedding_model.eval()
+
+    def forward(self, audio):
+        assert len(audio.shape) == 2 and audio.shape[
+            0] == 1, 'modelscope error: the shape of input audio to model needs to be [1, T]'
+        # audio shape: [1, T]
+        feature = self.__extract_feature(audio)
+        embedding = self.embedding_model(feature)
+
+        return embedding
+
+    def __extract_feature(self, audio):
+        feature = Kaldi.fbank(audio, num_mel_bins=self.feature_dim)
+        feature = feature - feature.mean(dim=0, keepdim=True)
+        feature = feature.unsqueeze(0)
+        return feature
+
+    def __load_check_point(self, pretrained_model_name, device=None):
+        if not device:
+            device = torch.device('cpu')
+        self.embedding_model.load_state_dict(
+            torch.load(
+                os.path.join(self.model_dir, pretrained_model_name),
+                map_location=device),
+            strict=True)
diff --git a/modelscope/models/audio/sv/fusion.py b/modelscope/models/audio/sv/fusion.py
new file mode 100644
index 00000000..615529bd
--- /dev/null
+++ b/modelscope/models/audio/sv/fusion.py
@@ -0,0 +1,32 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import torch
+import torch.nn as nn
+
+
+class AFF(nn.Module):
+
+    def __init__(self, channels=64, r=4):
+        super(AFF, self).__init__()
+        inter_channels = int(channels // r)
+
+        self.local_att = nn.Sequential(
+            nn.Conv2d(
+                channels * 2,
+                inter_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0),
+            nn.BatchNorm2d(inter_channels),
+            nn.SiLU(inplace=True),
+            nn.Conv2d(
+                inter_channels, channels, kernel_size=1, stride=1, padding=0),
+            nn.BatchNorm2d(channels),
+        )
+
+    def forward(self, x, ds_y):
+        xa = torch.cat((x, ds_y), dim=1)
+        x_att = self.local_att(xa)
+        x_att = 1.0 + torch.tanh(x_att)
+        xo = torch.mul(x, x_att) + torch.mul(ds_y, 2.0 - x_att)
+
+        return xo
diff --git a/modelscope/models/audio/sv/pooling_layers.py b/modelscope/models/audio/sv/pooling_layers.py
new file mode 100644
index 00000000..0fdc44ca
--- /dev/null
+++ b/modelscope/models/audio/sv/pooling_layers.py
@@ -0,0 +1,107 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+""" This implementation is adapted from https://github.com/wenet-e2e/wespeaker.
+"""
+import torch
+import torch.nn as nn
+
+
+class TAP(nn.Module):
+    """
+    Temporal average pooling, only first-order mean is considered
+    """
+
+    def __init__(self, **kwargs):
+        super(TAP, self).__init__()
+
+    def forward(self, x):
+        pooling_mean = x.mean(dim=-1)
+        # To be compatable with 2D input
+        pooling_mean = pooling_mean.flatten(start_dim=1)
+        return pooling_mean
+
+
+class TSDP(nn.Module):
+    """
+    Temporal standard deviation pooling, only second-order std is considered
+    """
+
+    def __init__(self, **kwargs):
+        super(TSDP, self).__init__()
+
+    def forward(self, x):
+        # The last dimension is the temporal axis
+        pooling_std = torch.sqrt(torch.var(x, dim=-1) + 1e-8)
+        pooling_std = pooling_std.flatten(start_dim=1)
+        return pooling_std
+
+
+class TSTP(nn.Module):
+    """
+    Temporal statistics pooling, concatenate mean and std, which is used in
+    x-vector
+    Comment: simple concatenation can not make full use of both statistics
+    """
+
+    def __init__(self, **kwargs):
+        super(TSTP, self).__init__()
+
+    def forward(self, x):
+        # The last dimension is the temporal axis
+        pooling_mean = x.mean(dim=-1)
+        pooling_std = torch.sqrt(torch.var(x, dim=-1) + 1e-8)
+        pooling_mean = pooling_mean.flatten(start_dim=1)
+        pooling_std = pooling_std.flatten(start_dim=1)
+
+        stats = torch.cat((pooling_mean, pooling_std), 1)
+        return stats
+
+
+class ASTP(nn.Module):
+    """ Attentive statistics pooling: Channel- and context-dependent
+        statistics pooling, first used in ECAPA_TDNN.
+    """
+
+    def __init__(self, in_dim, bottleneck_dim=128, global_context_att=False):
+        super(ASTP, self).__init__()
+        self.global_context_att = global_context_att
+
+        # Use Conv1d with stride == 1 rather than Linear, then we don't
+        # need to transpose inputs.
+        if global_context_att:
+            self.linear1 = nn.Conv1d(
+                in_dim * 3, bottleneck_dim,
+                kernel_size=1)  # equals W and b in the paper
+        else:
+            self.linear1 = nn.Conv1d(
+                in_dim, bottleneck_dim,
+                kernel_size=1)  # equals W and b in the paper
+        self.linear2 = nn.Conv1d(
+            bottleneck_dim, in_dim,
+            kernel_size=1)  # equals V and k in the paper
+
+    def forward(self, x):
+        """
+        x: a 3-dimensional tensor in tdnn-based architecture (B,F,T)
+            or a 4-dimensional tensor in resnet architecture (B,C,F,T)
+            0-dim: batch-dimension, last-dim: time-dimension (frame-dimension)
+        """
+        if len(x.shape) == 4:
+            x = x.reshape(x.shape[0], x.shape[1] * x.shape[2], x.shape[3])
+        assert len(x.shape) == 3
+
+        if self.global_context_att:
+            context_mean = torch.mean(x, dim=-1, keepdim=True).expand_as(x)
+            context_std = torch.sqrt(
+                torch.var(x, dim=-1, keepdim=True) + 1e-10).expand_as(x)
+            x_in = torch.cat((x, context_mean, context_std), dim=1)
+        else:
+            x_in = x
+
+        # DON'T use ReLU here! ReLU may be hard to converge.
+        alpha = torch.tanh(
+            self.linear1(x_in))  # alpha = F.relu(self.linear1(x_in))
+        alpha = torch.softmax(self.linear2(alpha), dim=2)
+        mean = torch.sum(alpha * x, dim=2)
+        var = torch.sum(alpha * (x**2), dim=2) - mean**2
+        std = torch.sqrt(var.clamp(min=1e-10))
+        return torch.cat([mean, std], dim=1)
diff --git a/modelscope/models/audio/sv/rdino.py b/modelscope/models/audio/sv/rdino.py
new file mode 100644
index 00000000..0d51ee7a
--- /dev/null
+++ b/modelscope/models/audio/sv/rdino.py
@@ -0,0 +1,573 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+""" This ECAPA-TDNN implementation is adapted from https://github.com/speechbrain/speechbrain.
+    RDINOHead implementation is adapted from DINO framework.
+"""
+import math
+import os
+from typing import Any, Dict, Union
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchaudio.compliance.kaldi as Kaldi
+
+from modelscope.metainfo import Models
+from modelscope.models import MODELS, TorchModel
+from modelscope.utils.constant import Tasks
+
+
+def length_to_mask(length, max_len=None, dtype=None, device=None):
+    assert len(length.shape) == 1
+
+    if max_len is None:
+        max_len = length.max().long().item()
+    mask = torch.arange(
+        max_len, device=length.device, dtype=length.dtype).expand(
+            len(length), max_len) < length.unsqueeze(1)
+
+    if dtype is None:
+        dtype = length.dtype
+
+    if device is None:
+        device = length.device
+
+    mask = torch.as_tensor(mask, dtype=dtype, device=device)
+    return mask
+
+
+def get_padding_elem(L_in: int, stride: int, kernel_size: int, dilation: int):
+    if stride > 1:
+        n_steps = math.ceil(((L_in - kernel_size * dilation) / stride) + 1)
+        L_out = stride * (n_steps - 1) + kernel_size * dilation
+        padding = [kernel_size // 2, kernel_size // 2]
+
+    else:
+        L_out = (L_in - dilation * (kernel_size - 1) - 1) // stride + 1
+
+        padding = [(L_in - L_out) // 2, (L_in - L_out) // 2]
+    return padding
+
+
+class Conv1d(nn.Module):
+
+    def __init__(
+        self,
+        out_channels,
+        kernel_size,
+        in_channels,
+        stride=1,
+        dilation=1,
+        padding='same',
+        groups=1,
+        bias=True,
+        padding_mode='reflect',
+    ):
+        super().__init__()
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.dilation = dilation
+        self.padding = padding
+        self.padding_mode = padding_mode
+
+        self.conv = nn.Conv1d(
+            in_channels,
+            out_channels,
+            self.kernel_size,
+            stride=self.stride,
+            dilation=self.dilation,
+            padding=0,
+            groups=groups,
+            bias=bias,
+        )
+
+    def forward(self, x):
+        if self.padding == 'same':
+            x = self._manage_padding(x, self.kernel_size, self.dilation,
+                                     self.stride)
+
+        elif self.padding == 'causal':
+            num_pad = (self.kernel_size - 1) * self.dilation
+            x = F.pad(x, (num_pad, 0))
+
+        elif self.padding == 'valid':
+            pass
+
+        else:
+            raise ValueError(
+                "Padding must be 'same', 'valid' or 'causal'. Got "
+                + self.padding)
+
+        wx = self.conv(x)
+
+        return wx
+
+    def _manage_padding(
+        self,
+        x,
+        kernel_size: int,
+        dilation: int,
+        stride: int,
+    ):
+        L_in = x.shape[-1]
+        padding = get_padding_elem(L_in, stride, kernel_size, dilation)
+        x = F.pad(x, padding, mode=self.padding_mode)
+
+        return x
+
+
+class BatchNorm1d(nn.Module):
+
+    def __init__(
+        self,
+        input_size,
+        eps=1e-05,
+        momentum=0.1,
+    ):
+        super().__init__()
+        self.norm = nn.BatchNorm1d(
+            input_size,
+            eps=eps,
+            momentum=momentum,
+        )
+
+    def forward(self, x):
+        return self.norm(x)
+
+
+class TDNNBlock(nn.Module):
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        dilation,
+        activation=nn.ReLU,
+        groups=1,
+    ):
+        super(TDNNBlock, self).__init__()
+        self.conv = Conv1d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            dilation=dilation,
+            groups=groups,
+        )
+        self.activation = activation()
+        self.norm = BatchNorm1d(input_size=out_channels)
+
+    def forward(self, x):
+        return self.norm(self.activation(self.conv(x)))
+
+
+class Res2NetBlock(torch.nn.Module):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 scale=8,
+                 kernel_size=3,
+                 dilation=1):
+        super(Res2NetBlock, self).__init__()
+        assert in_channels % scale == 0
+        assert out_channels % scale == 0
+
+        in_channel = in_channels // scale
+        hidden_channel = out_channels // scale
+
+        self.blocks = nn.ModuleList([
+            TDNNBlock(
+                in_channel,
+                hidden_channel,
+                kernel_size=kernel_size,
+                dilation=dilation,
+            ) for i in range(scale - 1)
+        ])
+        self.scale = scale
+
+    def forward(self, x):
+        y = []
+        for i, x_i in enumerate(torch.chunk(x, self.scale, dim=1)):
+            if i == 0:
+                y_i = x_i
+            elif i == 1:
+                y_i = self.blocks[i - 1](x_i)
+            else:
+                y_i = self.blocks[i - 1](x_i + y_i)
+            y.append(y_i)
+        y = torch.cat(y, dim=1)
+        return y
+
+
+class SEBlock(nn.Module):
+
+    def __init__(self, in_channels, se_channels, out_channels):
+        super(SEBlock, self).__init__()
+
+        self.conv1 = Conv1d(
+            in_channels=in_channels, out_channels=se_channels, kernel_size=1)
+        self.relu = torch.nn.ReLU(inplace=True)
+        self.conv2 = Conv1d(
+            in_channels=se_channels, out_channels=out_channels, kernel_size=1)
+        self.sigmoid = torch.nn.Sigmoid()
+
+    def forward(self, x, lengths=None):
+        L = x.shape[-1]
+        if lengths is not None:
+            mask = length_to_mask(lengths * L, max_len=L, device=x.device)
+            mask = mask.unsqueeze(1)
+            total = mask.sum(dim=2, keepdim=True)
+            s = (x * mask).sum(dim=2, keepdim=True) / total
+        else:
+            s = x.mean(dim=2, keepdim=True)
+
+        s = self.relu(self.conv1(s))
+        s = self.sigmoid(self.conv2(s))
+
+        return s * x
+
+
+class AttentiveStatisticsPooling(nn.Module):
+
+    def __init__(self, channels, attention_channels=128, global_context=True):
+        super().__init__()
+
+        self.eps = 1e-12
+        self.global_context = global_context
+        if global_context:
+            self.tdnn = TDNNBlock(channels * 3, attention_channels, 1, 1)
+        else:
+            self.tdnn = TDNNBlock(channels, attention_channels, 1, 1)
+        self.tanh = nn.Tanh()
+        self.conv = Conv1d(
+            in_channels=attention_channels,
+            out_channels=channels,
+            kernel_size=1)
+
+    def forward(self, x, lengths=None):
+        L = x.shape[-1]
+
+        def _compute_statistics(x, m, dim=2, eps=self.eps):
+            mean = (m * x).sum(dim)
+            std = torch.sqrt(
+                (m * (x - mean.unsqueeze(dim)).pow(2)).sum(dim).clamp(eps))
+            return mean, std
+
+        if lengths is None:
+            lengths = torch.ones(x.shape[0], device=x.device)
+
+        # Make binary mask of shape [N, 1, L]
+        mask = length_to_mask(lengths * L, max_len=L, device=x.device)
+        mask = mask.unsqueeze(1)
+
+        # Expand the temporal context of the pooling layer by allowing the
+        # self-attention to look at global properties of the utterance.
+        if self.global_context:
+            # torch.std is unstable for backward computation
+            # https://github.com/pytorch/pytorch/issues/4320
+            total = mask.sum(dim=2, keepdim=True).float()
+            mean, std = _compute_statistics(x, mask / total)
+            mean = mean.unsqueeze(2).repeat(1, 1, L)
+            std = std.unsqueeze(2).repeat(1, 1, L)
+            attn = torch.cat([x, mean, std], dim=1)
+        else:
+            attn = x
+
+        # Apply layers
+        attn = self.conv(self.tanh(self.tdnn(attn)))
+
+        # Filter out zero-paddings
+        attn = attn.masked_fill(mask == 0, float('-inf'))
+
+        attn = F.softmax(attn, dim=2)
+        mean, std = _compute_statistics(x, attn)
+        # Append mean and std of the batch
+        pooled_stats = torch.cat((mean, std), dim=1)
+        pooled_stats = pooled_stats.unsqueeze(2)
+
+        return pooled_stats
+
+
+class SERes2NetBlock(nn.Module):
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        res2net_scale=8,
+        se_channels=128,
+        kernel_size=1,
+        dilation=1,
+        activation=torch.nn.ReLU,
+        groups=1,
+    ):
+        super().__init__()
+        self.out_channels = out_channels
+        self.tdnn1 = TDNNBlock(
+            in_channels,
+            out_channels,
+            kernel_size=1,
+            dilation=1,
+            activation=activation,
+            groups=groups,
+        )
+        self.res2net_block = Res2NetBlock(out_channels, out_channels,
+                                          res2net_scale, kernel_size, dilation)
+        self.tdnn2 = TDNNBlock(
+            out_channels,
+            out_channels,
+            kernel_size=1,
+            dilation=1,
+            activation=activation,
+            groups=groups,
+        )
+        self.se_block = SEBlock(out_channels, se_channels, out_channels)
+
+        self.shortcut = None
+        if in_channels != out_channels:
+            self.shortcut = Conv1d(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+            )
+
+    def forward(self, x, lengths=None):
+        residual = x
+        if self.shortcut:
+            residual = self.shortcut(x)
+
+        x = self.tdnn1(x)
+        x = self.res2net_block(x)
+        x = self.tdnn2(x)
+        x = self.se_block(x, lengths)
+
+        return x + residual
+
+
+class ECAPA_TDNN(nn.Module):
+    """An implementation of the speaker embedding model in a paper.
+    "ECAPA-TDNN: Emphasized Channel Attention, Propagation and Aggregation in
+    TDNN Based Speaker Verification" (https://arxiv.org/abs/2005.07143).
+    """
+
+    def __init__(
+        self,
+        input_size,
+        device='cpu',
+        lin_neurons=512,
+        activation=torch.nn.ReLU,
+        channels=[512, 512, 512, 512, 1536],
+        kernel_sizes=[5, 3, 3, 3, 1],
+        dilations=[1, 2, 3, 4, 1],
+        attention_channels=128,
+        res2net_scale=8,
+        se_channels=128,
+        global_context=True,
+        groups=[1, 1, 1, 1, 1],
+    ):
+
+        super().__init__()
+        assert len(channels) == len(kernel_sizes)
+        assert len(channels) == len(dilations)
+        self.channels = channels
+        self.blocks = nn.ModuleList()
+
+        # The initial TDNN layer
+        self.blocks.append(
+            TDNNBlock(
+                input_size,
+                channels[0],
+                kernel_sizes[0],
+                dilations[0],
+                activation,
+                groups[0],
+            ))
+
+        # SE-Res2Net layers
+        for i in range(1, len(channels) - 1):
+            self.blocks.append(
+                SERes2NetBlock(
+                    channels[i - 1],
+                    channels[i],
+                    res2net_scale=res2net_scale,
+                    se_channels=se_channels,
+                    kernel_size=kernel_sizes[i],
+                    dilation=dilations[i],
+                    activation=activation,
+                    groups=groups[i],
+                ))
+
+        # Multi-layer feature aggregation
+        self.mfa = TDNNBlock(
+            channels[-1],
+            channels[-1],
+            kernel_sizes[-1],
+            dilations[-1],
+            activation,
+            groups=groups[-1],
+        )
+
+        # Attentive Statistical Pooling
+        self.asp = AttentiveStatisticsPooling(
+            channels[-1],
+            attention_channels=attention_channels,
+            global_context=global_context,
+        )
+        self.asp_bn = BatchNorm1d(input_size=channels[-1] * 2)
+
+        # Final linear transformation
+        self.fc = Conv1d(
+            in_channels=channels[-1] * 2,
+            out_channels=lin_neurons,
+            kernel_size=1,
+        )
+
+    def forward(self, x, lengths=None):
+        """Returns the embedding vector.
+
+        Arguments
+        ---------
+        x : torch.Tensor
+            Tensor of shape (batch, time, channel).
+        """
+        x = x.transpose(1, 2)
+
+        xl = []
+        for layer in self.blocks:
+            try:
+                x = layer(x, lengths=lengths)
+            except TypeError:
+                x = layer(x)
+            xl.append(x)
+
+        # Multi-layer feature aggregation
+        x = torch.cat(xl[1:], dim=1)
+        x = self.mfa(x)
+
+        # Attentive Statistical Pooling
+        x = self.asp(x, lengths=lengths)
+        x = self.asp_bn(x)
+
+        # Final linear transformation
+        x = self.fc(x)
+
+        x = x.transpose(1, 2).squeeze(1)
+        return x
+
+
+class RDINOHead(nn.Module):
+
+    def __init__(self,
+                 in_dim,
+                 out_dim,
+                 use_bn=False,
+                 norm_last_layer=True,
+                 nlayers=3,
+                 hidden_dim=2048,
+                 bottleneck_dim=256,
+                 add_dim=8192):
+        super().__init__()
+        nlayers = max(nlayers, 1)
+        if nlayers == 1:
+            self.mlp = nn.Linear(in_dim, bottleneck_dim)
+        else:
+            layers = [nn.Linear(in_dim, hidden_dim)]
+            if use_bn:
+                layers.append(nn.BatchNorm1d(hidden_dim))
+            layers.append(nn.GELU())
+            for _ in range(nlayers - 2):
+                layers.append(nn.Linear(hidden_dim, hidden_dim))
+                if use_bn:
+                    layers.append(nn.BatchNorm1d(hidden_dim))
+                layers.append(nn.GELU())
+
+            layers.append(nn.Linear(hidden_dim, add_dim))
+            self.mlp = nn.Sequential(*layers)
+        self.add_layer = nn.Linear(add_dim, bottleneck_dim)
+        self.apply(self._init_weights)
+        self.last_layer = nn.utils.weight_norm(
+            nn.Linear(bottleneck_dim, out_dim, bias=False))
+        self.last_layer.weight_g.data.fill_(1)
+        if norm_last_layer:
+            self.last_layer.weight_g.requires_grad = False
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            torch.nn.init.trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+
+    def forward(self, x):
+        vicr_out = self.mlp(x)
+        x = self.add_layer(vicr_out)
+        x = nn.functional.normalize(x, dim=-1, p=2)
+        x = self.last_layer(x)
+        return vicr_out, x
+
+
+class Combine(nn.Module):
+
+    def __init__(self, backbone, head):
+        super(Combine, self).__init__()
+        self.backbone = backbone
+        self.head = head
+
+    def forward(self, x):
+        x = self.backbone(x)
+        output = self.head(x)
+        return output
+
+
+@MODELS.register_module(
+    Tasks.speaker_verification, module_name=Models.rdino_tdnn_sv)
+class SpeakerVerification_RDINO(TorchModel):
+
+    def __init__(self, model_dir, model_config: Dict[str, Any], *args,
+                 **kwargs):
+        super().__init__(model_dir, model_config, *args, **kwargs)
+        self.model_config = model_config
+        self.other_config = kwargs
+        if self.model_config['channel'] != 1024:
+            raise ValueError(
+                'modelscope error: Currently only 1024-channel ecapa tdnn is supported.'
+            )
+
+        self.feature_dim = 80
+        channels_config = [1024, 1024, 1024, 1024, 3072]
+
+        self.embedding_model = ECAPA_TDNN(
+            self.feature_dim, channels=channels_config)
+        self.embedding_model = Combine(self.embedding_model,
+                                       RDINOHead(512, 65536, True))
+
+        pretrained_model_name = kwargs['pretrained_model']
+        self.__load_check_point(pretrained_model_name)
+
+        self.embedding_model.eval()
+
+    def forward(self, audio):
+        assert len(audio.shape) == 2 and audio.shape[
+            0] == 1, 'modelscope error: the shape of input audio to model needs to be [1, T]'
+        # audio shape: [1, T]
+        feature = self.__extract_feature(audio)
+        embedding = self.embedding_model.backbone(feature)
+
+        return embedding
+
+    def __extract_feature(self, audio):
+        feature = Kaldi.fbank(audio, num_mel_bins=self.feature_dim)
+        feature = feature - feature.mean(dim=0, keepdim=True)
+        feature = feature.unsqueeze(0)
+        return feature
+
+    def __load_check_point(self, pretrained_model_name, device=None):
+        if not device:
+            device = torch.device('cpu')
+        state_dict = torch.load(
+            os.path.join(self.model_dir, pretrained_model_name),
+            map_location=device)
+        state_dict_tea = {
+            k.replace('module.', ''): v
+            for k, v in state_dict['teacher'].items()
+        }
+        self.embedding_model.load_state_dict(state_dict_tea, strict=True)
diff --git a/modelscope/models/audio/sv/speaker_change_locator.py b/modelscope/models/audio/sv/speaker_change_locator.py
new file mode 100644
index 00000000..c22e4c1b
--- /dev/null
+++ b/modelscope/models/audio/sv/speaker_change_locator.py
@@ -0,0 +1,319 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+from collections import OrderedDict
+from typing import Any, Dict, Union
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchaudio.compliance.kaldi as Kaldi
+
+from modelscope.metainfo import Models
+from modelscope.models import MODELS, TorchModel
+from modelscope.models.audio.sv.DTDNN import CAMPPlus
+from modelscope.utils.constant import Tasks
+
+
+class MultiHeadSelfAttention(nn.Module):
+
+    def __init__(self, n_units, h=8, dropout=0.1):
+        super(MultiHeadSelfAttention, self).__init__()
+        self.linearQ = nn.Linear(n_units, n_units)
+        self.linearK = nn.Linear(n_units, n_units)
+        self.linearV = nn.Linear(n_units, n_units)
+        self.linearO = nn.Linear(n_units, n_units)
+        self.d_k = n_units // h
+        self.h = h
+        self.dropout = nn.Dropout(p=dropout)
+        self.att = None
+
+    def forward(self, x, batch_size):
+        # x: (BT, F)
+        q = self.linearQ(x).reshape(batch_size, -1, self.h, self.d_k)
+        k = self.linearK(x).reshape(batch_size, -1, self.h, self.d_k)
+        v = self.linearV(x).reshape(batch_size, -1, self.h, self.d_k)
+        scores = torch.matmul(q.transpose(1, 2), k.permute(
+            0, 2, 3, 1)) / np.sqrt(self.d_k)
+        # scores: (B, h, T, T)
+        self.att = F.softmax(scores, dim=3)
+        p_att = self.dropout(self.att)
+        # v : (B, T, h, d_k)
+        # p_att : (B, h, T, T)
+        x = torch.matmul(p_att, v.transpose(1, 2))
+        # x : (B, h, T, d_k)
+        x = x.transpose(1, 2).reshape(-1, self.h * self.d_k)
+        return self.linearO(x)
+
+
+class PositionwiseFeedForward(nn.Module):
+
+    def __init__(self, n_units, d_units, dropout):
+        super(PositionwiseFeedForward, self).__init__()
+        self.linear1 = nn.Linear(n_units, d_units)
+        self.linear2 = nn.Linear(d_units, n_units)
+        self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, x):
+        return self.linear2(self.dropout(F.relu(self.linear1(x))))
+
+
+class PosEncoding(nn.Module):
+
+    def __init__(self, max_seq_len, d_word_vec):
+        super(PosEncoding, self).__init__()
+        pos_enc = np.array([[
+            pos / np.power(10000, 2.0 * (j // 2) / d_word_vec)
+            for j in range(d_word_vec)
+        ] for pos in range(max_seq_len)])
+        pos_enc[:, 0::2] = np.sin(pos_enc[:, 0::2])
+        pos_enc[:, 1::2] = np.cos(pos_enc[:, 1::2])
+        pad_row = np.zeros([1, d_word_vec])
+        pos_enc = np.concatenate([pad_row, pos_enc]).astype(np.float32)
+
+        self.pos_enc = torch.nn.Embedding(max_seq_len + 1, d_word_vec)
+        self.pos_enc.weight = torch.nn.Parameter(
+            torch.from_numpy(pos_enc), requires_grad=False)
+
+    def forward(self, input_len):
+        max_len = torch.max(input_len)
+        input_pos = torch.LongTensor([
+            list(range(1, len + 1)) + [0] * (max_len - len)
+            for len in input_len
+        ])
+
+        return self.pos_enc(input_pos)
+
+
+class TransformerEncoder(nn.Module):
+
+    def __init__(self,
+                 idim,
+                 n_units=256,
+                 n_layers=2,
+                 e_units=512,
+                 h=4,
+                 dropout=0.1):
+        super(TransformerEncoder, self).__init__()
+        self.linear_in = nn.Linear(idim, n_units)
+        self.lnorm_in = nn.LayerNorm(n_units)
+
+        self.n_layers = n_layers
+        self.dropout = nn.Dropout(p=dropout)
+        for i in range(n_layers):
+            setattr(self, '{}{:d}'.format('lnorm1_', i), nn.LayerNorm(n_units))
+            setattr(self, '{}{:d}'.format('self_att_', i),
+                    MultiHeadSelfAttention(n_units, h))
+            setattr(self, '{}{:d}'.format('lnorm2_', i), nn.LayerNorm(n_units))
+            setattr(self, '{}{:d}'.format('ff_', i),
+                    PositionwiseFeedForward(n_units, e_units, dropout))
+        self.lnorm_out = nn.LayerNorm(n_units)
+
+    def forward(self, x):
+        # x: [B, num_anchors, T, n_in]
+        bs, num, tframe, dim = x.size()
+        x = x.reshape(bs * num, tframe, -1)  # [B*num_anchors, T, dim]
+        # x: (B, T, F) ... batch, time, (mel)freq
+        B_size, T_size, _ = x.shape
+        # e: (BT, F)
+        e = self.linear_in(x.reshape(B_size * T_size, -1))
+        # Encoder stack
+        for i in range(self.n_layers):
+            # layer normalization
+            e = getattr(self, '{}{:d}'.format('lnorm1_', i))(e)
+            # self-attention
+            s = getattr(self, '{}{:d}'.format('self_att_', i))(e, x.shape[0])
+            # residual
+            e = e + self.dropout(s)
+            # layer normalization
+            e = getattr(self, '{}{:d}'.format('lnorm2_', i))(e)
+            # positionwise feed-forward
+            s = getattr(self, '{}{:d}'.format('ff_', i))(e)
+            # residual
+            e = e + self.dropout(s)
+        # final layer normalization
+        # output: (BT, F)
+        # output: (B, F, T)
+        output = self.lnorm_out(e).reshape(B_size, T_size, -1)
+        output = output.reshape(bs, num, tframe,
+                                -1)  # [B, num_anchors, T, dim]
+        return output
+
+
+class TransformerEncoder_out(nn.Module):
+
+    def __init__(self,
+                 idim,
+                 n_units=256,
+                 n_layers=2,
+                 e_units=512,
+                 h=4,
+                 dropout=0.1):
+        super(TransformerEncoder_out, self).__init__()
+        self.linear_in = nn.Linear(idim, n_units)
+        self.lnorm_in = nn.LayerNorm(n_units)
+
+        self.n_layers = n_layers
+        self.dropout = nn.Dropout(p=dropout)
+        for i in range(n_layers):
+            setattr(self, '{}{:d}'.format('lnorm1_', i), nn.LayerNorm(n_units))
+            setattr(self, '{}{:d}'.format('self_att_', i),
+                    MultiHeadSelfAttention(n_units, h))
+            setattr(self, '{}{:d}'.format('lnorm2_', i), nn.LayerNorm(n_units))
+            setattr(self, '{}{:d}'.format('ff_', i),
+                    PositionwiseFeedForward(n_units, e_units, dropout))
+        self.lnorm_out = nn.LayerNorm(n_units)
+
+    def forward(self, x):
+        # x: (B, T, F)
+        B_size, T_size, _ = x.shape
+        # e: (BT, F)
+        e = self.linear_in(x.reshape(B_size * T_size, -1))
+        # Encoder stack
+        for i in range(self.n_layers):
+            # layer normalization
+            e = getattr(self, '{}{:d}'.format('lnorm1_', i))(e)
+            # self-attention
+            s = getattr(self, '{}{:d}'.format('self_att_', i))(e, x.shape[0])
+            # residual
+            e = e + self.dropout(s)
+            # layer normalization
+            e = getattr(self, '{}{:d}'.format('lnorm2_', i))(e)
+            # positionwise feed-forward
+            s = getattr(self, '{}{:d}'.format('ff_', i))(e)
+            # residual
+            e = e + self.dropout(s)
+        # final layer normalization
+        # output: (BT, F)
+        # output: (B, T, F)
+        output = self.lnorm_out(e).reshape(B_size, T_size, -1)
+        return output
+
+
+class OutLayer(nn.Module):
+
+    def __init__(self, n_units=256, num_anchors=2):
+        super(OutLayer, self).__init__()
+        self.combine = TransformerEncoder_out(num_anchors * n_units, n_units)
+        self.out_linear = nn.Linear(n_units // num_anchors, 1)
+
+    def forward(self, input):
+        # input: [B, num_anchors, T, dim]
+        bs, num, tframe, dim = input.size()
+        output = input.permute(0, 2, 1,
+                               3).reshape(bs, tframe,
+                                          -1)  # [Bs, t, num_anchors*dim]
+        output = self.combine(output)  # [Bs, t, n_units]
+        output = output.reshape(
+            bs, tframe, num, -1)  # [Bs, t, num_anchors, n_units//num_anchors]
+        output = self.out_linear(output).squeeze(-1)  # [Bs, t, num_anchors]
+
+        return output
+
+
+class TransformerDetector(nn.Module):
+
+    def __init__(self,
+                 frame_dim=512,
+                 anchor_dim=192,
+                 hidden_dim=256,
+                 max_seq_len=1000):
+        super(TransformerDetector, self).__init__()
+        self.detection = TransformerEncoder(
+            idim=frame_dim + anchor_dim, n_units=hidden_dim)
+        self.output = OutLayer(n_units=hidden_dim)
+        self.pos_enc = PosEncoding(max_seq_len, hidden_dim)
+
+    def forward(self, feats, anchors):
+        # feats: [1, t, fdim]
+        num_frames = feats.shape[1]
+        num_anchors = anchors.shape[1]
+        bs = feats.shape[0]
+        feats = feats.unsqueeze(1).repeat(
+            1, num_anchors, 1, 1)  # shape: [Bs, num_anchors, t, fdim]
+        anchors = anchors.unsqueeze(2).repeat(
+            1, 1, num_frames, 1)  # shape: [Bs, num_anchors, t, xdim]
+        sd_in = torch.cat((feats, anchors),
+                          dim=-1)  # shape: [Bs, num_anchors, t, fdim+xdim]
+        sd_out = self.detection(sd_in)  # shape: [Bs, num_anchors, t, sd_dim]
+
+        # pos
+        pos_emb = self.pos_enc(torch.tensor([num_frames] * (bs * num_anchors)))
+        pos_emb = pos_emb.reshape(bs, num_anchors, num_frames, -1)
+        sd_out += pos_emb
+
+        # output
+        output = self.output(sd_out)  # shape: [Bs, t, num_anchors]
+
+        return output
+
+
+@MODELS.register_module(Tasks.speaker_diarization, module_name=Models.scl_sd)
+class SpeakerChangeLocatorTransformer(TorchModel):
+    r"""A speaekr change locator using the transformer architecture as the backbone.
+    Args:
+        model_dir: A model dir.
+        model_config: The model config.
+    """
+
+    def __init__(self, model_dir, model_config: Dict[str, Any], *args,
+                 **kwargs):
+        super().__init__(model_dir, model_config, *args, **kwargs)
+        self.model_config = model_config
+
+        self.feature_dim = self.model_config['fbank_dim']
+        frame_size = self.model_config['frame_size']
+        anchor_size = self.model_config['anchor_size']
+
+        self.encoder = CAMPPlus(self.feature_dim, output_level='frame')
+        self.backend = TransformerDetector(
+            frame_dim=frame_size, anchor_dim=anchor_size)
+
+        pretrained_encoder = kwargs['pretrained_encoder']
+        pretrained_backend = kwargs['pretrained_backend']
+
+        self.__load_check_point(pretrained_encoder, pretrained_backend)
+
+        self.encoder.eval()
+        self.backend.eval()
+
+    def forward(self, audio, anchors):
+        assert len(audio.shape) == 2 and audio.shape[
+            0] == 1, 'modelscope error: the shape of input audio to model needs to be [1, T]'
+        assert len(
+            anchors.shape
+        ) == 3 and anchors.shape[0] == 1 and anchors.shape[
+            1] == 2, 'modelscope error: the shape of input anchors to model needs to be [1, 2, D]'
+        # audio shape: [1, T]
+        feature = self.__extract_feature(audio)
+        frame_state = self.encoder(feature)
+        output = self.backend(frame_state, anchors)
+        output = output.squeeze(0).detach().cpu().sigmoid()
+
+        time_scale_factor = int(np.ceil(feature.shape[1] / output.shape[0]))
+        output = output.unsqueeze(1).expand(-1, time_scale_factor,
+                                            -1).reshape(-1, output.shape[-1])
+        return output
+
+    def __extract_feature(self, audio):
+        feature = Kaldi.fbank(audio, num_mel_bins=self.feature_dim)
+        feature = feature - feature.mean(dim=0, keepdim=True)
+        feature = feature.unsqueeze(0)
+        return feature
+
+    def __load_check_point(self,
+                           pretrained_encoder,
+                           pretrained_backend,
+                           device=None):
+        if not device:
+            device = torch.device('cpu')
+        self.encoder.load_state_dict(
+            torch.load(
+                os.path.join(self.model_dir, pretrained_encoder),
+                map_location=device))
+
+        self.backend.load_state_dict(
+            torch.load(
+                os.path.join(self.model_dir, pretrained_backend),
+                map_location=device))
diff --git a/modelscope/models/audio/tts/voice.py b/modelscope/models/audio/tts/voice.py
index 645a528f..ed9edf43 100644
--- a/modelscope/models/audio/tts/voice.py
+++ b/modelscope/models/audio/tts/voice.py
@@ -17,11 +17,9 @@ from kantts.train.trainer import GAN_Trainer, Sambert_Trainer, distributed_init
 from kantts.utils.ling_unit.ling_unit import KanTtsLinguisticUnit
 from torch.utils.data import DataLoader
 
-from modelscope import __version__
 from modelscope.utils.audio.audio_utils import TtsCustomParams
 from modelscope.utils.audio.tts_exceptions import (
     TtsModelConfigurationException, TtsModelNotExistsException)
-from modelscope.utils.constant import ModelFile, Tasks
 from modelscope.utils.logger import get_logger
 
 logger = get_logger()
@@ -394,6 +392,7 @@ class Voice:
         logger.info(f'TRAINING steps: {train_max_steps}')
         config['create_time'] = time.strftime('%Y-%m-%d %H:%M:%S',
                                               time.localtime())
+        from modelscope import __version__
         config['modelscope_version'] = __version__
 
         with open(os.path.join(stage_dir, 'config.yaml'), 'w') as f:
@@ -558,6 +557,7 @@ class Voice:
         logger.info(f'resume from: {resume_from}')
         config['create_time'] = time.strftime('%Y-%m-%d %H:%M:%S',
                                               time.localtime())
+        from modelscope import __version__
         config['modelscope_version'] = __version__
 
         with open(os.path.join(stage_dir, 'config.yaml'), 'w') as f:
diff --git a/modelscope/models/cv/__init__.py b/modelscope/models/cv/__init__.py
index 21487216..39acec69 100644
--- a/modelscope/models/cv/__init__.py
+++ b/modelscope/models/cv/__init__.py
@@ -4,9 +4,8 @@
 from . import (action_recognition, animal_recognition, bad_image_detecting,
                body_2d_keypoints, body_3d_keypoints, cartoon,
                cmdssl_video_embedding, controllable_image_generation,
-               crowd_counting, face_2d_keypoints, face_detection,
-               face_generation, face_reconstruction, human_reconstruction,
-               human_wholebody_keypoint, image_classification,
+               crowd_counting, face_detection, face_generation,
+               face_reconstruction, human_reconstruction, image_classification,
                image_color_enhance, image_colorization, image_defrcn_fewshot,
                image_denoise, image_inpainting, image_instance_segmentation,
                image_matching, image_mvs_depth_estimation,
diff --git a/modelscope/models/cv/body_2d_keypoints/hrnet_v2.py b/modelscope/models/cv/body_2d_keypoints/hrnet_v2.py
index ebd69adb..19e426b2 100644
--- a/modelscope/models/cv/body_2d_keypoints/hrnet_v2.py
+++ b/modelscope/models/cv/body_2d_keypoints/hrnet_v2.py
@@ -72,7 +72,7 @@ class PoseHighResolutionNetV2(TorchModel):
         self.stage4, pre_stage_channels = self._make_stage(
             self.stage4_cfg, num_channels, multi_scale_output=True)
         """final four layers"""
-        last_inp_channels = np.int(np.sum(pre_stage_channels))
+        last_inp_channels = int(np.sum(pre_stage_channels))
         self.final_layer = nn.Sequential(
             nn.Conv2d(
                 in_channels=last_inp_channels,
diff --git a/modelscope/models/cv/cartoon/facelib/face_landmark.py b/modelscope/models/cv/cartoon/facelib/face_landmark.py
index 3b7cc1b9..3c53f3a6 100644
--- a/modelscope/models/cv/cartoon/facelib/face_landmark.py
+++ b/modelscope/models/cv/cartoon/facelib/face_landmark.py
@@ -81,7 +81,7 @@ class FaceLandmark:
         bbox[2] = center[0] + one_edge // 2
         bbox[3] = center[1] + one_edge // 2
 
-        bbox = bbox.astype(np.int)
+        bbox = bbox.astype(int)
         crop_image = bimg[bbox[1]:bbox[3], bbox[0]:bbox[2], :]
         h, w, _ = crop_image.shape
         crop_image = cv2.resize(
diff --git a/modelscope/models/cv/crowd_counting/hrnet_aspp_relu.py b/modelscope/models/cv/crowd_counting/hrnet_aspp_relu.py
index 0d1bd3ca..64f40da0 100644
--- a/modelscope/models/cv/crowd_counting/hrnet_aspp_relu.py
+++ b/modelscope/models/cv/crowd_counting/hrnet_aspp_relu.py
@@ -356,7 +356,7 @@ class HighResolutionNet(nn.Module):
                                                        num_channels)
         self.stage3, pre_stage_channels = self._make_stage(
             self.stage3_cfg, num_channels)
-        last_inp_channels = np.int(np.sum(pre_stage_channels)) + 256
+        last_inp_channels = int(np.sum(pre_stage_channels)) + 256
         self.redc_layer = nn.Sequential(
             nn.Conv2d(
                 in_channels=last_inp_channels,
diff --git a/modelscope/models/cv/easycv_base.py b/modelscope/models/cv/easycv_base.py
deleted file mode 100644
index 7bc35e84..00000000
--- a/modelscope/models/cv/easycv_base.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.models.base import BaseModel
-from easycv.utils.ms_utils import EasyCVMeta
-
-from modelscope.models.base import TorchModel
-
-
-class EasyCVBaseModel(BaseModel, TorchModel):
-    """Base model for EasyCV."""
-
-    def __init__(self, model_dir=None, args=(), kwargs={}):
-        kwargs.pop(EasyCVMeta.ARCH, None)  # pop useless keys
-        BaseModel.__init__(self)
-        TorchModel.__init__(self, model_dir=model_dir)
-
-    def forward(self, img, mode='train', **kwargs):
-        if self.training:
-            losses = self.forward_train(img, **kwargs)
-            loss, log_vars = self._parse_losses(losses)
-            return dict(loss=loss, log_vars=log_vars)
-        else:
-            return self.forward_test(img, **kwargs)
-
-    def __call__(self, *args, **kwargs):
-        return self.forward(*args, **kwargs)
diff --git a/modelscope/models/cv/face_2d_keypoints/__init__.py b/modelscope/models/cv/face_2d_keypoints/__init__.py
deleted file mode 100644
index 636ba0f4..00000000
--- a/modelscope/models/cv/face_2d_keypoints/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
-    from .face_2d_keypoints_align import Face2DKeypoints
-
-else:
-    _import_structure = {'face_2d_keypoints_align': ['Face2DKeypoints']}
-
-    import sys
-
-    sys.modules[__name__] = LazyImportModule(
-        __name__,
-        globals()['__file__'],
-        _import_structure,
-        module_spec=__spec__,
-        extra_objects={},
-    )
diff --git a/modelscope/models/cv/face_2d_keypoints/face_2d_keypoints_align.py b/modelscope/models/cv/face_2d_keypoints/face_2d_keypoints_align.py
deleted file mode 100644
index 468662a0..00000000
--- a/modelscope/models/cv/face_2d_keypoints/face_2d_keypoints_align.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.models.face.face_keypoint import FaceKeypoint
-
-from modelscope.metainfo import Models
-from modelscope.models.builder import MODELS
-from modelscope.models.cv.easycv_base import EasyCVBaseModel
-from modelscope.utils.constant import Tasks
-
-
-@MODELS.register_module(
-    group_key=Tasks.face_2d_keypoints, module_name=Models.face_2d_keypoints)
-class Face2DKeypoints(EasyCVBaseModel, FaceKeypoint):
-
-    def __init__(self, model_dir=None, *args, **kwargs):
-        EasyCVBaseModel.__init__(self, model_dir, args, kwargs)
-        FaceKeypoint.__init__(self, *args, **kwargs)
diff --git a/modelscope/models/cv/face_detection/peppa_pig_face/face_landmark.py b/modelscope/models/cv/face_detection/peppa_pig_face/face_landmark.py
index 03a3b5b7..e7e2ddaf 100644
--- a/modelscope/models/cv/face_detection/peppa_pig_face/face_landmark.py
+++ b/modelscope/models/cv/face_detection/peppa_pig_face/face_landmark.py
@@ -82,7 +82,7 @@ class FaceLandmark:
         bbox[2] = center[0] + one_edge // 2
         bbox[3] = center[1] + one_edge // 2
 
-        bbox = bbox.astype(np.int)
+        bbox = bbox.astype(int)
         crop_image = bimg[bbox[1]:bbox[3], bbox[0]:bbox[2], :]
         h, w, _ = crop_image.shape
         crop_image = cv2.resize(crop_image,
diff --git a/modelscope/models/cv/hand_2d_keypoints/__init__.py b/modelscope/models/cv/hand_2d_keypoints/__init__.py
deleted file mode 100644
index 2b06f19a..00000000
--- a/modelscope/models/cv/hand_2d_keypoints/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
-    from .hand_2d_keypoints import Hand2dKeyPoints
-
-else:
-    _import_structure = {'hand_2d_keypoints': ['Hand2dKeyPoints']}
-
-    import sys
-
-    sys.modules[__name__] = LazyImportModule(
-        __name__,
-        globals()['__file__'],
-        _import_structure,
-        module_spec=__spec__,
-        extra_objects={},
-    )
diff --git a/modelscope/models/cv/hand_2d_keypoints/hand_2d_keypoints.py b/modelscope/models/cv/hand_2d_keypoints/hand_2d_keypoints.py
deleted file mode 100644
index 15a97c30..00000000
--- a/modelscope/models/cv/hand_2d_keypoints/hand_2d_keypoints.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.models.pose import TopDown
-
-from modelscope.metainfo import Models
-from modelscope.models.builder import MODELS
-from modelscope.models.cv.easycv_base import EasyCVBaseModel
-from modelscope.utils.constant import Tasks
-
-
-@MODELS.register_module(
-    group_key=Tasks.hand_2d_keypoints, module_name=Models.hand_2d_keypoints)
-class Hand2dKeyPoints(EasyCVBaseModel, TopDown):
-
-    def __init__(self, model_dir=None, *args, **kwargs):
-        EasyCVBaseModel.__init__(self, model_dir, args, kwargs)
-        TopDown.__init__(self, *args, **kwargs)
diff --git a/modelscope/models/cv/human_reconstruction/models/human_segmenter.py b/modelscope/models/cv/human_reconstruction/models/human_segmenter.py
index 3f0261e7..29bf6f70 100644
--- a/modelscope/models/cv/human_reconstruction/models/human_segmenter.py
+++ b/modelscope/models/cv/human_reconstruction/models/human_segmenter.py
@@ -31,7 +31,7 @@ class human_segmenter(object):
             img = np.dstack((img, img, img))
         elif img.shape[2] == 4:
             img = img[:, :, :3]
-        img = img.astype(np.float)
+        img = img.astype(float)
         return img
 
     def run(self, img):
diff --git a/modelscope/models/cv/human_reconstruction/utils.py b/modelscope/models/cv/human_reconstruction/utils.py
index 45653dc6..67e1efdb 100644
--- a/modelscope/models/cv/human_reconstruction/utils.py
+++ b/modelscope/models/cv/human_reconstruction/utils.py
@@ -69,8 +69,8 @@ def eval_grid(coords,
               num_samples=512 * 512 * 512):
     resolution = coords.shape[1:4]
     sdf = np.zeros(resolution)
-    dirty = np.ones(resolution, dtype=np.bool)
-    grid_mask = np.zeros(resolution, dtype=np.bool)
+    dirty = np.ones(resolution, dtype=bool)
+    grid_mask = np.zeros(resolution, dtype=bool)
     reso = resolution[0] // init_resolution
 
     while reso > 0:
diff --git a/modelscope/models/cv/human_wholebody_keypoint/human_wholebody_keypoint.py b/modelscope/models/cv/human_wholebody_keypoint/human_wholebody_keypoint.py
deleted file mode 100644
index dd3c0290..00000000
--- a/modelscope/models/cv/human_wholebody_keypoint/human_wholebody_keypoint.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.models.pose.top_down import TopDown
-
-from modelscope.metainfo import Models
-from modelscope.models.builder import MODELS
-from modelscope.models.cv.easycv_base import EasyCVBaseModel
-from modelscope.utils.constant import Tasks
-
-
-@MODELS.register_module(
-    group_key=Tasks.human_wholebody_keypoint,
-    module_name=Models.human_wholebody_keypoint)
-class HumanWholeBodyKeypoint(EasyCVBaseModel, TopDown):
-
-    def __init__(self, model_dir=None, *args, **kwargs):
-        EasyCVBaseModel.__init__(self, model_dir, args, kwargs)
-        TopDown.__init__(self, *args, **kwargs)
diff --git a/modelscope/models/cv/image_defrcn_fewshot/utils/voc_register.py b/modelscope/models/cv/image_defrcn_fewshot/utils/voc_register.py
index 7a94066e..0b043493 100644
--- a/modelscope/models/cv/image_defrcn_fewshot/utils/voc_register.py
+++ b/modelscope/models/cv/image_defrcn_fewshot/utils/voc_register.py
@@ -163,7 +163,7 @@ def load_filtered_voc_instances(name: str, root: str, dirname: str, split: str,
                     os.path.join(split_dir,
                                  'box_{}shot_{}_train.txt'.format(shot,
                                                                   cls))) as f:
-                fileids_ = np.loadtxt(f, dtype=np.str).tolist()
+                fileids_ = np.loadtxt(f, dtype=np.str_).tolist()
                 if isinstance(fileids_, str):
                     fileids_ = [fileids_]
                 fileids_ = [
@@ -219,7 +219,7 @@ def load_filtered_voc_instances(name: str, root: str, dirname: str, split: str,
         with PathManager.open(
                 os.path.join(root, dirname, 'ImageSets', 'Main',
                              split + '.txt')) as f:
-            fileids = np.loadtxt(f, dtype=np.str)
+            fileids = np.loadtxt(f, dtype=np.str_)
 
         for fileid in fileids:
             anno_file = os.path.join(root, dirname, 'Annotations',
diff --git a/modelscope/models/cv/image_instance_segmentation/__init__.py b/modelscope/models/cv/image_instance_segmentation/__init__.py
index 60e688eb..8041a7e7 100644
--- a/modelscope/models/cv/image_instance_segmentation/__init__.py
+++ b/modelscope/models/cv/image_instance_segmentation/__init__.py
@@ -8,10 +8,12 @@ if TYPE_CHECKING:
     from .maskdino_swin import MaskDINOSwin
     from .model import CascadeMaskRCNNSwinModel
     from .maskdino_model import MaskDINOSwinModel
+    from .fastinst_model import FastInst
     from .postprocess_utils import get_img_ins_seg_result, get_maskdino_ins_seg_result
 else:
     _import_structure = {
         'cascade_mask_rcnn_swin': ['CascadeMaskRCNNSwin'],
+        'fastinst_model': ['FastInst'],
         'maskdino_swin': ['MaskDINOSwin'],
         'model': ['CascadeMaskRCNNSwinModel'],
         'maskdino_model': ['MaskDINOSwinModel'],
diff --git a/modelscope/models/cv/image_instance_segmentation/backbones/__init__.py b/modelscope/models/cv/image_instance_segmentation/backbones/__init__.py
index bbeac51e..1e7325f3 100644
--- a/modelscope/models/cv/image_instance_segmentation/backbones/__init__.py
+++ b/modelscope/models/cv/image_instance_segmentation/backbones/__init__.py
@@ -6,10 +6,12 @@ from modelscope.utils.import_utils import LazyImportModule
 if TYPE_CHECKING:
     from .swin_transformer import SwinTransformer
     from .swin_transformer import D2SwinTransformer
+    from .resnet import build_resnet_backbone
 
 else:
     _import_structure = {
         'swin_transformer': ['SwinTransformer', 'D2SwinTransformer'],
+        'resnet': ['build_resnet_backbone']
     }
 
     import sys
diff --git a/modelscope/models/cv/image_instance_segmentation/backbones/resnet.py b/modelscope/models/cv/image_instance_segmentation/backbones/resnet.py
new file mode 100644
index 00000000..4e2a5ec1
--- /dev/null
+++ b/modelscope/models/cv/image_instance_segmentation/backbones/resnet.py
@@ -0,0 +1,114 @@
+# Part of the implementation is borrowed and modified from Detectron2, publicly available at
+# https://github.com/facebookresearch/detectron2/blob/main/projects/DeepLab/deeplab/resnet.py
+
+import torch.nn.functional as F
+from torch import nn
+
+from modelscope.models.cv.image_human_parsing.backbone.deeplab_resnet import (
+    BottleneckBlock, DeeplabResNet, get_norm)
+from modelscope.models.cv.image_instance_segmentation.maskdino.utils import \
+    Conv2d
+
+
+class BasicStem(nn.Module):
+    """
+    The standard ResNet stem (layers before the first residual block),
+    with a conv, relu and max_pool.
+    """
+
+    def __init__(self, in_channels=3, out_channels=64, norm='BN'):
+        """
+        Args:
+            norm (str or callable): norm after the first conv layer.
+                See :func:`layers.get_norm` for supported format.
+        """
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.stride = 4
+        self.conv1 = Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size=7,
+            stride=2,
+            padding=3,
+            bias=False,
+            norm=get_norm(norm, out_channels),
+        )
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = F.relu_(x)
+        x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)
+        return x
+
+
+def build_resnet_backbone(out_features, depth, num_groups, width_per_group,
+                          norm, stem_out_channels, res2_out_channels,
+                          stride_in_1x1, res4_dilation, res5_dilation,
+                          res5_multi_grid, input_shape):
+    stem = BasicStem(
+        in_channels=input_shape['channels'],
+        out_channels=stem_out_channels,
+        norm=norm)
+    bottleneck_channels = num_groups * width_per_group
+    in_channels = stem_out_channels
+    out_channels = res2_out_channels
+
+    assert res4_dilation in {
+        1, 2
+    }, 'res4_dilation cannot be {}.'.format(res4_dilation)
+    assert res5_dilation in {
+        1, 2, 4
+    }, 'res5_dilation cannot be {}.'.format(res5_dilation)
+    if res4_dilation == 2:
+        # Always dilate res5 if res4 is dilated.
+        assert res5_dilation == 4
+
+    num_blocks_per_stage = {
+        50: [3, 4, 6, 3],
+        101: [3, 4, 23, 3],
+        152: [3, 8, 36, 3]
+    }[depth]
+
+    stages = []
+    out_stage_idx = [{
+        'res2': 2,
+        'res3': 3,
+        'res4': 4,
+        'res5': 5
+    }[f] for f in out_features]
+    max_stage_idx = max(out_stage_idx)
+    for idx, stage_idx in enumerate(range(2, max_stage_idx + 1)):
+        if stage_idx == 4:
+            dilation = res4_dilation
+        elif stage_idx == 5:
+            dilation = res5_dilation
+        else:
+            dilation = 1
+        first_stride = 1 if idx == 0 or dilation > 1 else 2
+        stride_per_block = [first_stride]
+        stride_per_block += [1] * (num_blocks_per_stage[idx] - 1)
+        stage_kargs = {
+            'num_blocks': num_blocks_per_stage[idx],
+            'stride_per_block': stride_per_block,
+            'in_channels': in_channels,
+            'out_channels': out_channels,
+            'norm': norm,
+            'bottleneck_channels': bottleneck_channels,
+            'stride_in_1x1': stride_in_1x1,
+            'dilation': dilation,
+            'num_groups': num_groups,
+            'block_class': BottleneckBlock
+        }
+        if stage_idx == 5:
+            stage_kargs.pop('dilation')
+            stage_kargs['dilation_per_block'] = [
+                dilation * mg for mg in res5_multi_grid
+            ]
+        blocks = DeeplabResNet.make_stage(**stage_kargs)
+        in_channels = out_channels
+        out_channels *= 2
+        bottleneck_channels *= 2
+        stages.append(blocks)
+    return DeeplabResNet(stem, stages, out_features=out_features)
diff --git a/modelscope/models/cv/image_instance_segmentation/fastinst/__init__.py b/modelscope/models/cv/image_instance_segmentation/fastinst/__init__.py
new file mode 100644
index 00000000..b937315b
--- /dev/null
+++ b/modelscope/models/cv/image_instance_segmentation/fastinst/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
diff --git a/modelscope/models/cv/image_instance_segmentation/fastinst/fastinst_decoder.py b/modelscope/models/cv/image_instance_segmentation/fastinst/fastinst_decoder.py
new file mode 100644
index 00000000..aa4300f6
--- /dev/null
+++ b/modelscope/models/cv/image_instance_segmentation/fastinst/fastinst_decoder.py
@@ -0,0 +1,351 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import math
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from modelscope.models.cv.image_colorization.ddcolor.utils.transformer_utils import (
+    MLP, CrossAttentionLayer, FFNLayer, SelfAttentionLayer)
+
+
+class QueryProposal(nn.Module):
+
+    def __init__(self, num_features, num_queries, num_classes):
+        super().__init__()
+        self.topk = num_queries
+        self.num_classes = num_classes
+
+        self.conv_proposal_cls_logits = nn.Sequential(
+            nn.Conv2d(
+                num_features, num_features, kernel_size=3, stride=1,
+                padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(
+                num_features,
+                num_classes + 1,
+                kernel_size=1,
+                stride=1,
+                padding=0),
+        )
+
+    @torch.no_grad()
+    def compute_coordinates(self, x):
+        h, w = x.size(2), x.size(3)
+        y_loc = torch.linspace(0, 1, h, device=x.device)
+        x_loc = torch.linspace(0, 1, w, device=x.device)
+        y_loc, x_loc = torch.meshgrid(y_loc, x_loc)
+        locations = torch.stack([x_loc, y_loc], 0).unsqueeze(0)
+        return locations
+
+    def seek_local_maximum(self, x, epsilon=1e-6):
+        """
+        inputs:
+            x: torch.tensor, shape [b, c, h, w]
+        return:
+            torch.tensor, shape [b, c, h, w]
+        """
+        x_pad = F.pad(x, (1, 1, 1, 1), 'constant', 0)
+        # top, bottom, left, right, top-left, top-right, bottom-left, bottom-right
+        maximum = (x >= x_pad[:, :, :-2, 1:-1]) & \
+                  (x >= x_pad[:, :, 2:, 1:-1]) & \
+                  (x >= x_pad[:, :, 1:-1, :-2]) & \
+                  (x >= x_pad[:, :, 1:-1, 2:]) & \
+                  (x >= x_pad[:, :, :-2, :-2]) & \
+                  (x >= x_pad[:, :, :-2, 2:]) & \
+                  (x >= x_pad[:, :, 2:, :-2]) & \
+                  (x >= x_pad[:, :, 2:, 2:]) & \
+                  (x >= epsilon)
+        return maximum.to(x)
+
+    def forward(self, x, pos_embeddings):
+
+        proposal_cls_logits = self.conv_proposal_cls_logits(x)  # b, c, h, w
+        proposal_cls_probs = proposal_cls_logits.softmax(dim=1)  # b, c, h, w
+        proposal_cls_one_hot = F.one_hot(
+            proposal_cls_probs[:, :-1, :, :].max(1)[1],
+            num_classes=self.num_classes + 1).permute(0, 3, 1, 2)  # b, c, h, w
+        proposal_cls_probs = proposal_cls_probs.mul(proposal_cls_one_hot)
+        proposal_local_maximum_map = self.seek_local_maximum(
+            proposal_cls_probs)  # b, c, h, w
+        proposal_cls_probs = proposal_cls_probs + proposal_local_maximum_map  # b, c, h, w
+
+        # top-k indices
+        topk_indices = torch.topk(
+            proposal_cls_probs[:, :-1, :, :].flatten(2).max(1)[0],
+            self.topk,
+            dim=1)[1]  # b, q
+        topk_indices = topk_indices.unsqueeze(1)  # b, 1, q
+
+        # topk queries
+        topk_proposals = torch.gather(
+            x.flatten(2), dim=2, index=topk_indices.repeat(1, x.shape[1],
+                                                           1))  # b, c, q
+        pos_embeddings = pos_embeddings.repeat(x.shape[0], 1, 1, 1).flatten(2)
+        topk_pos_embeddings = torch.gather(
+            pos_embeddings,
+            dim=2,
+            index=topk_indices.repeat(1, pos_embeddings.shape[1],
+                                      1))  # b, c, q
+        if self.training:
+            locations = self.compute_coordinates(x).repeat(x.shape[0], 1, 1, 1)
+            topk_locations = torch.gather(
+                locations.flatten(2),
+                dim=2,
+                index=topk_indices.repeat(1, locations.shape[1], 1))
+            topk_locations = topk_locations.transpose(-1, -2)  # b, q, 2
+        else:
+            topk_locations = None
+        return topk_proposals, topk_pos_embeddings, topk_locations, proposal_cls_logits
+
+
+class FastInstDecoder(nn.Module):
+
+    def __init__(self, in_channels, *, num_classes: int, hidden_dim: int,
+                 num_queries: int, num_aux_queries: int, nheads: int,
+                 dim_feedforward: int, dec_layers: int, pre_norm: bool,
+                 mask_dim: int):
+        """
+        Args:
+            in_channels: channels of the input features
+            num_classes: number of classes
+            hidden_dim: Transformer feature dimension
+            num_queries: number of queries
+            num_aux_queries: number of auxiliary queries
+            nheads: number of heads
+            dim_feedforward: feature dimension in feedforward network
+            dec_layers: number of Transformer decoder layers
+            pre_norm: whether to use pre-LayerNorm or not
+            mask_dim: mask feature dimension
+        """
+        super().__init__()
+        self.num_heads = nheads
+        self.num_layers = dec_layers
+        self.num_queries = num_queries
+        self.num_aux_queries = num_aux_queries
+        self.num_classes = num_classes
+
+        meta_pos_size = int(round(math.sqrt(self.num_queries)))
+        self.meta_pos_embed = nn.Parameter(
+            torch.empty(1, hidden_dim, meta_pos_size, meta_pos_size))
+        if num_aux_queries > 0:
+            self.empty_query_features = nn.Embedding(num_aux_queries,
+                                                     hidden_dim)
+            self.empty_query_pos_embed = nn.Embedding(num_aux_queries,
+                                                      hidden_dim)
+
+        self.query_proposal = QueryProposal(hidden_dim, num_queries,
+                                            num_classes)
+
+        self.transformer_query_cross_attention_layers = nn.ModuleList()
+        self.transformer_query_self_attention_layers = nn.ModuleList()
+        self.transformer_query_ffn_layers = nn.ModuleList()
+        self.transformer_mask_cross_attention_layers = nn.ModuleList()
+        self.transformer_mask_ffn_layers = nn.ModuleList()
+        for idx in range(self.num_layers):
+            self.transformer_query_cross_attention_layers.append(
+                CrossAttentionLayer(
+                    d_model=hidden_dim,
+                    nhead=nheads,
+                    dropout=0.0,
+                    normalize_before=pre_norm))
+            self.transformer_query_self_attention_layers.append(
+                SelfAttentionLayer(
+                    d_model=hidden_dim,
+                    nhead=nheads,
+                    dropout=0.0,
+                    normalize_before=pre_norm))
+            self.transformer_query_ffn_layers.append(
+                FFNLayer(
+                    d_model=hidden_dim,
+                    dim_feedforward=dim_feedforward,
+                    dropout=0.0,
+                    normalize_before=pre_norm))
+            self.transformer_mask_cross_attention_layers.append(
+                CrossAttentionLayer(
+                    d_model=hidden_dim,
+                    nhead=nheads,
+                    dropout=0.0,
+                    normalize_before=pre_norm))
+            self.transformer_mask_ffn_layers.append(
+                FFNLayer(
+                    d_model=hidden_dim,
+                    dim_feedforward=dim_feedforward,
+                    dropout=0.0,
+                    normalize_before=pre_norm))
+
+        self.decoder_query_norm_layers = nn.ModuleList()
+        self.class_embed_layers = nn.ModuleList()
+        self.mask_embed_layers = nn.ModuleList()
+        self.mask_features_layers = nn.ModuleList()
+        for idx in range(self.num_layers + 1):
+            self.decoder_query_norm_layers.append(nn.LayerNorm(hidden_dim))
+            self.class_embed_layers.append(
+                MLP(hidden_dim, hidden_dim, num_classes + 1, 3))
+            self.mask_embed_layers.append(
+                MLP(hidden_dim, hidden_dim, mask_dim, 3))
+            self.mask_features_layers.append(nn.Linear(hidden_dim, mask_dim))
+
+    def forward(self, x, mask_features, targets=None):
+        bs = x[0].shape[0]
+        proposal_size = x[1].shape[-2:]
+        pixel_feature_size = x[2].shape[-2:]
+
+        pixel_pos_embeds = F.interpolate(
+            self.meta_pos_embed,
+            size=pixel_feature_size,
+            mode='bilinear',
+            align_corners=False)
+        proposal_pos_embeds = F.interpolate(
+            self.meta_pos_embed,
+            size=proposal_size,
+            mode='bilinear',
+            align_corners=False)
+
+        pixel_features = x[2].flatten(2).permute(2, 0, 1)
+        pixel_pos_embeds = pixel_pos_embeds.flatten(2).permute(2, 0, 1)
+
+        query_features, query_pos_embeds, query_locations, proposal_cls_logits = self.query_proposal(
+            x[1], proposal_pos_embeds)
+        query_features = query_features.permute(2, 0, 1)
+        query_pos_embeds = query_pos_embeds.permute(2, 0, 1)
+        if self.num_aux_queries > 0:
+            aux_query_features = self.empty_query_features.weight.unsqueeze(
+                1).repeat(1, bs, 1)
+            aux_query_pos_embed = self.empty_query_pos_embed.weight.unsqueeze(
+                1).repeat(1, bs, 1)
+            query_features = torch.cat([query_features, aux_query_features],
+                                       dim=0)
+            query_pos_embeds = torch.cat(
+                [query_pos_embeds, aux_query_pos_embed], dim=0)
+
+        outputs_class, outputs_mask, attn_mask, _, _ = self.forward_prediction_heads(
+            query_features,
+            pixel_features,
+            pixel_feature_size,
+            -1,
+            return_attn_mask=True)
+        predictions_class = [outputs_class]
+        predictions_mask = [outputs_mask]
+        predictions_matching_index = [None]
+        query_feature_memory = [query_features]
+        pixel_feature_memory = [pixel_features]
+
+        for i in range(self.num_layers):
+            query_features, pixel_features = self.forward_one_layer(
+                query_features, pixel_features, query_pos_embeds,
+                pixel_pos_embeds, attn_mask, i)
+            if i < self.num_layers - 1:
+                outputs_class, outputs_mask, attn_mask, _, _ = self.forward_prediction_heads(
+                    query_features,
+                    pixel_features,
+                    pixel_feature_size,
+                    i,
+                    return_attn_mask=True,
+                )
+            else:
+                outputs_class, outputs_mask, _, matching_indices, gt_attn_mask = self.forward_prediction_heads(
+                    query_features,
+                    pixel_features,
+                    pixel_feature_size,
+                    i,
+                )
+            predictions_class.append(outputs_class)
+            predictions_mask.append(outputs_mask)
+            predictions_matching_index.append(None)
+            query_feature_memory.append(query_features)
+            pixel_feature_memory.append(pixel_features)
+
+        out = {
+            'proposal_cls_logits':
+            proposal_cls_logits,
+            'query_locations':
+            query_locations,
+            'pred_logits':
+            predictions_class[-1],
+            'pred_masks':
+            predictions_mask[-1],
+            'pred_indices':
+            predictions_matching_index[-1],
+            'aux_outputs':
+            self._set_aux_loss(predictions_class, predictions_mask,
+                               predictions_matching_index, query_locations)
+        }
+        return out
+
+    def forward_one_layer(self, query_features, pixel_features,
+                          query_pos_embeds, pixel_pos_embeds, attn_mask, i):
+        pixel_features = self.transformer_mask_cross_attention_layers[i](
+            pixel_features,
+            query_features,
+            query_pos=pixel_pos_embeds,
+            pos=query_pos_embeds)
+        pixel_features = self.transformer_mask_ffn_layers[i](pixel_features)
+
+        query_features = self.transformer_query_cross_attention_layers[i](
+            query_features,
+            pixel_features,
+            memory_mask=attn_mask,
+            query_pos=query_pos_embeds,
+            pos=pixel_pos_embeds)
+        query_features = self.transformer_query_self_attention_layers[i](
+            query_features, query_pos=query_pos_embeds)
+        query_features = self.transformer_query_ffn_layers[i](query_features)
+        return query_features, pixel_features
+
+    def forward_prediction_heads(self,
+                                 query_features,
+                                 pixel_features,
+                                 pixel_feature_size,
+                                 idx_layer,
+                                 return_attn_mask=False,
+                                 return_gt_attn_mask=False,
+                                 targets=None,
+                                 query_locations=None):
+        decoder_query_features = self.decoder_query_norm_layers[idx_layer + 1](
+            query_features[:self.num_queries])
+        decoder_query_features = decoder_query_features.transpose(0, 1)
+        if idx_layer + 1 == self.num_layers:
+            outputs_class = self.class_embed_layers[idx_layer + 1](
+                decoder_query_features)
+        else:
+            outputs_class = None
+        outputs_mask_embed = self.mask_embed_layers[idx_layer + 1](
+            decoder_query_features)
+        outputs_mask_features = self.mask_features_layers[idx_layer + 1](
+            pixel_features.transpose(0, 1))
+
+        outputs_mask = torch.einsum('bqc,blc->bql', outputs_mask_embed,
+                                    outputs_mask_features)
+        outputs_mask = outputs_mask.reshape(-1, self.num_queries,
+                                            *pixel_feature_size)
+
+        if return_attn_mask:
+            # outputs_mask.shape: b, q, h, w
+            attn_mask = F.pad(outputs_mask,
+                              (0, 0, 0, 0, 0, self.num_aux_queries),
+                              'constant', 1)
+            attn_mask = (attn_mask < 0.).flatten(2)  # b, q, hw
+            invalid_query = attn_mask.all(-1, keepdim=True)  # b, q, 1
+            attn_mask = (~invalid_query) & attn_mask  # b, q, hw
+            attn_mask = attn_mask.unsqueeze(1).repeat(1, self.num_heads, 1,
+                                                      1).flatten(0, 1)
+            attn_mask = attn_mask.detach()
+        else:
+            attn_mask = None
+
+        matching_indices = None
+        gt_attn_mask = None
+
+        return outputs_class, outputs_mask, attn_mask, matching_indices, gt_attn_mask
+
+    @torch.jit.unused
+    def _set_aux_loss(self, outputs_class, outputs_seg_masks, output_indices,
+                      output_query_locations):
+        return [{
+            'query_locations': output_query_locations,
+            'pred_logits': a,
+            'pred_masks': b,
+            'pred_matching_indices': c
+        } for a, b, c in zip(outputs_class[:-1], outputs_seg_masks[:-1],
+                             output_indices[:-1])]
diff --git a/modelscope/models/cv/image_instance_segmentation/fastinst/fastinst_encoder.py b/modelscope/models/cv/image_instance_segmentation/fastinst/fastinst_encoder.py
new file mode 100644
index 00000000..46b3f74d
--- /dev/null
+++ b/modelscope/models/cv/image_instance_segmentation/fastinst/fastinst_encoder.py
@@ -0,0 +1,180 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import logging
+from typing import Callable, Optional, Union
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from modelscope.models.cv.image_instance_segmentation.maskdino.utils import \
+    Conv2d
+
+
+# This is a modified FPN decoder.
+class BaseFPN(nn.Module):
+
+    def __init__(
+        self,
+        input_shape,
+        *,
+        convs_dim: int,
+        mask_dim: int,
+        norm: Optional[Union[str, Callable]] = None,
+    ):
+        """
+        Args:
+            input_shape: shapes (channels and stride) of the input features
+            convs_dim: number of output channels for the intermediate conv layers.
+            mask_dim: number of output channels for the final conv layer.
+            norm (str or callable): normalization for all conv layers
+        """
+        super().__init__()
+
+        input_shape = sorted(input_shape.items(), key=lambda x: x[1]['stride'])
+        self.in_features = [k for k, v in input_shape
+                            ]  # starting from "res3" to "res5"
+        feature_channels = [v['channels'] for k, v in input_shape]
+
+        lateral_convs = []
+        output_convs = []
+
+        use_bias = norm == ''
+        for idx, in_channels in enumerate(feature_channels):
+            lateral_norm = nn.GroupNorm(32, convs_dim)
+            output_norm = nn.GroupNorm(32, convs_dim)
+
+            lateral_conv = Conv2d(
+                in_channels,
+                convs_dim,
+                kernel_size=1,
+                bias=use_bias,
+                norm=lateral_norm)
+            output_conv = Conv2d(
+                convs_dim,
+                convs_dim,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                bias=use_bias,
+                norm=output_norm,
+                activation=F.relu,
+            )
+            self.add_module('adapter_{}'.format(idx + 1), lateral_conv)
+            self.add_module('layer_{}'.format(idx + 1), output_conv)
+
+            lateral_convs.append(lateral_conv)
+            output_convs.append(output_conv)
+        # Place convs into top-down order (from low to high resolution)
+        # to make the top-down computation in forward clearer.
+        self.lateral_convs = lateral_convs[::-1]
+        self.output_convs = output_convs[::-1]
+
+        self.convs_dim = convs_dim
+        self.num_feature_levels = 3  # always use 3 scales
+
+    def forward_features(self, features):
+        multi_scale_features = []
+        num_cur_levels = 0
+        # Reverse feature maps into top-down order (from low to high resolution)
+        for idx, f in enumerate(self.in_features[::-1]):
+            x = features[f]
+            lateral_conv = self.lateral_convs[idx]
+            output_conv = self.output_convs[idx]
+            if idx == 0:
+                y = lateral_conv(x)
+            else:
+                cur_fpn = lateral_conv(x)
+                y = cur_fpn + F.interpolate(
+                    y,
+                    size=cur_fpn.shape[-2:],
+                    mode='bilinear',
+                    align_corners=False)
+            y = output_conv(y)
+
+            if num_cur_levels < self.num_feature_levels:
+                multi_scale_features.append(y)
+                num_cur_levels += 1
+        return None, multi_scale_features
+
+    def forward(self, features, targets=None):
+        logger = logging.getLogger(__name__)
+        logger.warning(
+            'Calling forward() may cause unpredicted behavior of PixelDecoder module.'
+        )
+        return self.forward_features(features)
+
+
+class PyramidPoolingModule(nn.Module):
+
+    def __init__(self, in_channels, channels=512, sizes=(1, 2, 3, 6)):
+        super().__init__()
+        self.stages = []
+        self.stages = nn.ModuleList(
+            [self._make_stage(in_channels, channels, size) for size in sizes])
+        self.bottleneck = Conv2d(in_channels + len(sizes) * channels,
+                                 in_channels, 1)
+
+    def _make_stage(self, features, out_features, size):
+        prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
+        conv = Conv2d(features, out_features, 1)
+        return nn.Sequential(prior, conv)
+
+    def forward(self, feats):
+        h, w = feats.size(2), feats.size(3)
+        priors = [
+            F.interpolate(
+                input=F.relu_(stage(feats)),
+                size=(h, w),
+                mode='bilinear',
+                align_corners=False) for stage in self.stages
+        ] + [feats]
+        out = F.relu_(self.bottleneck(torch.cat(priors, 1)))
+        return out
+
+
+class PyramidPoolingModuleFPN(BaseFPN):
+
+    def __init__(
+        self,
+        input_shape,
+        *,
+        convs_dim: int,
+        mask_dim: int,
+        norm: Optional[Union[str, Callable]] = None,
+    ):
+        """
+        NOTE: this interface is experimental.
+        Args:
+            input_shape: shapes (channels and stride) of the input features
+            convs_dim: number of output channels for the intermediate conv layers.
+            mask_dim: number of output channels for the final conv layer.
+            norm (str or callable): normalization for all conv layers
+        """
+        super().__init__(
+            input_shape, convs_dim=convs_dim, mask_dim=mask_dim, norm=norm)
+        self.ppm = PyramidPoolingModule(convs_dim, convs_dim // 4)
+
+    def forward_features(self, features):
+        multi_scale_features = []
+        num_cur_levels = 0
+        # Reverse feature maps into top-down order (from low to high resolution)
+        for idx, f in enumerate(self.in_features[::-1]):
+            x = features[f]
+            lateral_conv = self.lateral_convs[idx]
+            output_conv = self.output_convs[idx]
+            if idx == 0:
+                y = self.ppm(lateral_conv(x))
+            else:
+                cur_fpn = lateral_conv(x)
+                y = cur_fpn + F.interpolate(
+                    y,
+                    size=cur_fpn.shape[-2:],
+                    mode='bilinear',
+                    align_corners=False)
+            y = output_conv(y)
+
+            if num_cur_levels < self.num_feature_levels:
+                multi_scale_features.append(y)
+                num_cur_levels += 1
+
+        return None, multi_scale_features
diff --git a/modelscope/models/cv/image_instance_segmentation/fastinst_model.py b/modelscope/models/cv/image_instance_segmentation/fastinst_model.py
new file mode 100644
index 00000000..f9cfbc4f
--- /dev/null
+++ b/modelscope/models/cv/image_instance_segmentation/fastinst_model.py
@@ -0,0 +1,221 @@
+# Part of implementation is borrowed and modified from Mask2Former, publicly available at
+# https://github.com/facebookresearch/Mask2Former.
+import os
+from typing import Any, Dict, List
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from modelscope.metainfo import Models
+from modelscope.models.base import TorchModel
+from modelscope.models.builder import MODELS
+from modelscope.models.cv.image_instance_segmentation.maskdino_swin import \
+    ImageList
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+from .backbones import build_resnet_backbone
+from .fastinst.fastinst_decoder import FastInstDecoder
+from .fastinst.fastinst_encoder import PyramidPoolingModuleFPN
+
+logger = get_logger()
+
+
+@MODELS.register_module(Tasks.image_segmentation, module_name=Models.fastinst)
+class FastInst(TorchModel):
+
+    def __init__(self,
+                 model_dir,
+                 backbone=None,
+                 encoder=None,
+                 decoder=None,
+                 pretrained=None,
+                 classes=None,
+                 **kwargs):
+        """
+        Deep Learning Technique for Human Parsing: A Survey and Outlook. See https://arxiv.org/abs/2301.00394
+        Args:
+            backbone (dict): backbone config.
+            encoder (dict): encoder config.
+            decoder (dict): decoder config.
+            pretrained (bool): whether to use pretrained model
+            classes (list): class names
+        """
+        super(FastInst, self).__init__(model_dir, **kwargs)
+
+        self.backbone = build_resnet_backbone(
+            **backbone, input_shape={'channels': 3})
+        in_features = encoder.pop('in_features')
+        input_shape = {
+            k: v
+            for k, v in self.backbone.output_shape().items()
+            if k in in_features
+        }
+        encoder = PyramidPoolingModuleFPN(input_shape=input_shape, **encoder)
+        decoder = FastInstDecoder(in_channels=encoder.convs_dim, **decoder)
+        self.sem_seg_head = FastInstHead(
+            pixel_decoder=encoder, transformer_predictor=decoder)
+
+        self.num_classes = decoder.num_classes
+        self.num_queries = decoder.num_queries
+        self.size_divisibility = 32
+        self.register_buffer(
+            'pixel_mean',
+            torch.Tensor([123.675, 116.28, 103.53]).view(-1, 1, 1), False)
+        self.register_buffer(
+            'pixel_std',
+            torch.Tensor([58.395, 57.12, 57.375]).view(-1, 1, 1), False)
+        self.classes = classes
+        self.test_topk_per_image = 100
+
+        if pretrained:
+            model_path = os.path.join(model_dir, ModelFile.TORCH_MODEL_FILE)
+            logger.info(f'loading model from {model_path}')
+            weight = torch.load(model_path, map_location='cpu')['model']
+            tgt_weight = self.state_dict()
+            for name in list(weight.keys()):
+                if name in tgt_weight:
+                    load_size = weight[name].size()
+                    tgt_size = tgt_weight[name].size()
+                    mis_match = False
+                    if len(load_size) != len(tgt_size):
+                        mis_match = True
+                    else:
+                        for n1, n2 in zip(load_size, tgt_size):
+                            if n1 != n2:
+                                mis_match = True
+                                break
+                    if mis_match:
+                        logger.info(
+                            f'size mismatch for {name} '
+                            f'({load_size} -> {tgt_size}), skip loading.')
+                        del weight[name]
+                else:
+                    logger.info(
+                        f'{name} doesn\'t exist in current model, skip loading.'
+                    )
+
+            self.load_state_dict(weight, strict=False)
+            logger.info('load model done')
+
+    def forward(self, batched_inputs: List[dict]) -> Dict[str, Any]:
+        images = [x['image'].to(self.device) for x in batched_inputs]
+        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
+        images = ImageList.from_tensors(images, self.size_divisibility)
+
+        features = self.backbone(images.tensor)
+        outputs = self.sem_seg_head(features)
+
+        return dict(
+            outputs=outputs, batched_inputs=batched_inputs, images=images)
+
+    def postprocess(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        outputs = input['outputs']
+        batched_inputs = input['batched_inputs']
+        images = input['images']
+        if self.training:
+            raise NotImplementedError
+        else:
+            mask_cls_results = outputs['pred_logits']  # (B, Q, C+1)
+            mask_pred_results = outputs['pred_masks']  # (B, Q, H, W)
+            # upsample masks
+            mask_pred_results = F.interpolate(
+                mask_pred_results,
+                size=(images.tensor.shape[-2], images.tensor.shape[-1]),
+                mode='bilinear',
+                align_corners=False,
+            )
+
+            del outputs
+
+            processed_results = []
+            for mask_cls_result, mask_pred_result, input_per_image, image_size in zip(
+                    mask_cls_results, mask_pred_results, batched_inputs,
+                    images.image_sizes):
+                height = input_per_image.get('height', image_size[0])
+                width = input_per_image.get('width', image_size[1])
+                processed_results.append({})  # for each image
+
+                mask_pred_result = self.sem_seg_postprocess(
+                    mask_pred_result, image_size, height, width)
+                mask_cls_result = mask_cls_result.to(mask_pred_result)
+
+                instance_r = self.instance_inference(mask_cls_result,
+                                                     mask_pred_result)
+                processed_results[-1]['instances'] = instance_r
+
+        return dict(eval_result=processed_results)
+
+    @property
+    def device(self):
+        return self.pixel_mean.device
+
+    def sem_seg_postprocess(self, result, img_size, output_height,
+                            output_width):
+        result = result[:, :img_size[0], :img_size[1]].expand(1, -1, -1, -1)
+        result = F.interpolate(
+            result,
+            size=(output_height, output_width),
+            mode='bilinear',
+            align_corners=False)[0]
+        return result
+
+    def instance_inference(self, mask_cls, mask_pred):
+        # mask_pred is already processed to have the same shape as original input
+        image_size = mask_pred.shape[-2:]
+
+        # [Q, K]
+        scores = F.softmax(mask_cls, dim=-1)[:, :-1]
+        labels = torch.arange(
+            self.num_classes,
+            device=self.device).unsqueeze(0).repeat(self.num_queries,
+                                                    1).flatten(0, 1)
+        scores_per_image, topk_indices = scores.flatten(0, 1).topk(
+            self.test_topk_per_image, sorted=False)
+        labels_per_image = labels[topk_indices]
+
+        topk_indices = topk_indices // self.num_classes
+        mask_pred = mask_pred[topk_indices]
+
+        result = {'image_size': image_size}
+        # mask (before sigmoid)
+        mask_pred_sigmoid = mask_pred.sigmoid()
+        result['pred_masks'] = (mask_pred_sigmoid > 0.5).float()
+
+        # calculate average mask prob
+        mask_scores_per_image = (mask_pred_sigmoid.flatten(1)
+                                 * result['pred_masks'].flatten(1)).sum(1) / (
+                                     result['pred_masks'].flatten(1).sum(1)
+                                     + 1e-6)
+        result['scores'] = scores_per_image * mask_scores_per_image
+        result['pred_classes'] = labels_per_image
+        return result
+
+
+class FastInstHead(nn.Module):
+
+    def __init__(
+            self,
+            *,
+            pixel_decoder: nn.Module,
+            # extra parameters
+            transformer_predictor: nn.Module):
+        """
+        NOTE: this interface is experimental.
+        Args:
+            pixel_decoder: the pixel decoder module
+            transformer_predictor: the transformer decoder that makes prediction
+        """
+        super().__init__()
+        self.pixel_decoder = pixel_decoder
+        self.predictor = transformer_predictor
+
+    def forward(self, features, targets=None):
+        return self.layers(features, targets)
+
+    def layers(self, features, targets=None):
+        mask_features, multi_scale_features = self.pixel_decoder.forward_features(
+            features)
+        predictions = self.predictor(multi_scale_features, mask_features,
+                                     targets)
+        return predictions
diff --git a/modelscope/models/cv/image_instance_segmentation/postprocess_utils.py b/modelscope/models/cv/image_instance_segmentation/postprocess_utils.py
index fdbb2fb0..aad7d8e9 100644
--- a/modelscope/models/cv/image_instance_segmentation/postprocess_utils.py
+++ b/modelscope/models/cv/image_instance_segmentation/postprocess_utils.py
@@ -108,16 +108,16 @@ def get_img_ins_seg_result(img_seg_result=None,
     for seg_result in img_seg_result:
 
         box = [
-            np.int(seg_result[0]),
-            np.int(seg_result[1]),
-            np.int(seg_result[2]),
-            np.int(seg_result[3])
+            int(seg_result[0]),
+            int(seg_result[1]),
+            int(seg_result[2]),
+            int(seg_result[3])
         ]
-        score = np.float(seg_result[4])
+        score = float(seg_result[4])
         category = seg_result[5]
 
         mask = np.array(seg_result[6], order='F', dtype='uint8')
-        mask = mask.astype(np.float)
+        mask = mask.astype(float)
 
         results_dict[OutputKeys.BOXES].append(box)
         results_dict[OutputKeys.MASKS].append(mask)
diff --git a/modelscope/models/cv/image_mvs_depth_estimation/colmap2mvsnet.py b/modelscope/models/cv/image_mvs_depth_estimation/colmap2mvsnet.py
index feda4430..37d92c13 100644
--- a/modelscope/models/cv/image_mvs_depth_estimation/colmap2mvsnet.py
+++ b/modelscope/models/cv/image_mvs_depth_estimation/colmap2mvsnet.py
@@ -382,7 +382,7 @@ def processing_single_scene(args):
                 points3d[p3d_id].xyz[0], points3d[p3d_id].xyz[1],
                 points3d[p3d_id].xyz[2], 1
             ])
-            zs.append(np.asscalar(transformed[2]))
+            zs.append(transformed[2].item())
         zs_sorted = sorted(zs)
         # relaxed depth range
         max_ratio = 0.1
diff --git a/modelscope/models/cv/image_mvs_depth_estimation/depth_filter.py b/modelscope/models/cv/image_mvs_depth_estimation/depth_filter.py
index 16cdedf4..4ef6275a 100644
--- a/modelscope/models/cv/image_mvs_depth_estimation/depth_filter.py
+++ b/modelscope/models/cv/image_mvs_depth_estimation/depth_filter.py
@@ -40,7 +40,7 @@ def read_mask(filename):
 
 # save a binary mask
 def save_mask(filename, mask):
-    assert mask.dtype == np.bool
+    assert mask.dtype == bool
     mask = mask.astype(np.uint8) * 255
     Image.fromarray(mask).save(filename)
 
diff --git a/modelscope/models/cv/image_panoptic_segmentation/__init__.py b/modelscope/models/cv/image_panoptic_segmentation/__init__.py
index 1af5b6f8..2b2be4b7 100644
--- a/modelscope/models/cv/image_panoptic_segmentation/__init__.py
+++ b/modelscope/models/cv/image_panoptic_segmentation/__init__.py
@@ -5,7 +5,6 @@ from modelscope.utils.import_utils import LazyImportModule
 
 if TYPE_CHECKING:
     from .panseg_model import SwinLPanopticSegmentation
-    from .r50_panseg_model import R50PanopticSegmentation
 
 else:
     _import_structure = {
diff --git a/modelscope/models/cv/image_panoptic_segmentation/r50_panseg_model.py b/modelscope/models/cv/image_panoptic_segmentation/r50_panseg_model.py
deleted file mode 100644
index 73b6b76c..00000000
--- a/modelscope/models/cv/image_panoptic_segmentation/r50_panseg_model.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-
-from easycv.models.segmentation import Mask2Former
-
-from modelscope.metainfo import Models
-from modelscope.models.builder import MODELS
-from modelscope.models.cv.easycv_base import EasyCVBaseModel
-from modelscope.utils.constant import Tasks
-
-
-@MODELS.register_module(
-    group_key=Tasks.image_segmentation,
-    module_name=Models.r50_panoptic_segmentation)
-class R50PanopticSegmentation(EasyCVBaseModel, Mask2Former):
-
-    def __init__(self, model_dir=None, *args, **kwargs):
-        EasyCVBaseModel.__init__(self, model_dir, args, kwargs)
-        Mask2Former.__init__(self, *args, **kwargs)
diff --git a/modelscope/models/cv/image_semantic_segmentation/segformer.py b/modelscope/models/cv/image_semantic_segmentation/segformer.py
deleted file mode 100644
index 46303526..00000000
--- a/modelscope/models/cv/image_semantic_segmentation/segformer.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.models.segmentation import EncoderDecoder
-
-from modelscope.metainfo import Models
-from modelscope.models.builder import MODELS
-from modelscope.models.cv.easycv_base import EasyCVBaseModel
-from modelscope.utils.constant import Tasks
-
-
-@MODELS.register_module(
-    group_key=Tasks.image_segmentation, module_name=Models.segformer)
-class Segformer(EasyCVBaseModel, EncoderDecoder):
-
-    def __init__(self, model_dir=None, *args, **kwargs):
-        EasyCVBaseModel.__init__(self, model_dir, args, kwargs)
-        EncoderDecoder.__init__(self, *args, **kwargs)
diff --git a/modelscope/models/cv/image_semantic_segmentation/semantic_seg_model.py b/modelscope/models/cv/image_semantic_segmentation/semantic_seg_model.py
index 2b38ebad..455f29fb 100644
--- a/modelscope/models/cv/image_semantic_segmentation/semantic_seg_model.py
+++ b/modelscope/models/cv/image_semantic_segmentation/semantic_seg_model.py
@@ -60,7 +60,7 @@ class SemanticSegmentation(TorchModel):
         ids = ids[legal_indices]
 
         segms = (semantic_result[None] == ids[:, None, None])
-        masks = [it.astype(np.int) for it in segms]
+        masks = [it.astype(int) for it in segms]
         labels_txt = np.array(self.CLASSES)[ids].tolist()
 
         results = {
diff --git a/modelscope/models/cv/image_skychange/ptsemseg/hrnet_backnone.py b/modelscope/models/cv/image_skychange/ptsemseg/hrnet_backnone.py
index 66429d67..8fcb6625 100644
--- a/modelscope/models/cv/image_skychange/ptsemseg/hrnet_backnone.py
+++ b/modelscope/models/cv/image_skychange/ptsemseg/hrnet_backnone.py
@@ -458,7 +458,7 @@ class HrnetBackBone(nn.Module):
         self.stage4, pre_stage_channels = self._make_stage(
             self.stage4_cfg, num_channels, multi_scale_output=True)
 
-        self.backbone_last_inp_channels = np.int(np.sum(pre_stage_channels))
+        self.backbone_last_inp_channels = int(np.sum(pre_stage_channels))
 
     def _make_transition_layer(self, num_channels_pre_layer,
                                num_channels_cur_layer):
diff --git a/modelscope/models/cv/image_skychange/ptsemseg/hrnet_super_and_ocr.py b/modelscope/models/cv/image_skychange/ptsemseg/hrnet_super_and_ocr.py
index 09768451..5dbef66e 100644
--- a/modelscope/models/cv/image_skychange/ptsemseg/hrnet_super_and_ocr.py
+++ b/modelscope/models/cv/image_skychange/ptsemseg/hrnet_super_and_ocr.py
@@ -259,7 +259,7 @@ class HrnetSuperAndOcr(HrnetBackBone):
             num_channels = [64, last_inp_channels]
             self.stage_super, super_stage_channels = self._make_stage(
                 self.super_dict, num_channels)
-            last_inp_channels = np.int(np.sum(super_stage_channels))
+            last_inp_channels = int(np.sum(super_stage_channels))
 
             if self.is_contain_aspp:
                 aspp_param = kwargs['aspp']
@@ -372,7 +372,7 @@ class HrnetSuperAndOcr(HrnetBackBone):
             num_channels = [64, ocr_mid_channels]
             self.stage_super, super_stage_channels = self._make_stage(
                 self.super_dict, num_channels)
-            last_inp_channels = np.int(np.sum(super_stage_channels))
+            last_inp_channels = int(np.sum(super_stage_channels))
 
             self.cls_head = nn.Sequential(
                 nn.Conv2d(
diff --git a/modelscope/models/cv/movie_scene_segmentation/model.py b/modelscope/models/cv/movie_scene_segmentation/model.py
index 336af3b3..818a3876 100644
--- a/modelscope/models/cv/movie_scene_segmentation/model.py
+++ b/modelscope/models/cv/movie_scene_segmentation/model.py
@@ -13,7 +13,8 @@ import torch.nn as nn
 import torch.nn.functional as F
 import torchvision.transforms as TF
 from PIL import Image
-from shotdetect_scenedetect_lgss import shot_detect
+from shotdetect_scenedetect_lgss import shot_detector
+from tqdm import tqdm
 
 from modelscope.metainfo import Models
 from modelscope.models.base.base_torch_model import TorchModel
@@ -60,6 +61,9 @@ class MovieSceneSegmentationModel(TorchModel):
         self.head_sbd = nn.Linear(hdim, 2)
         load_param_with_prefix('head_sbd', self.head_sbd, params)
 
+        self.shot_detector = shot_detector()
+        self.shot_detector.init(**self.cfg.preprocessor.shot_detect)
+
         self.test_transform = TF.Compose([
             TF.Resize(size=256, interpolation=Image.BICUBIC),
             TF.CenterCrop(224),
@@ -98,29 +102,45 @@ class MovieSceneSegmentationModel(TorchModel):
     def inference(self, batch):
         logger.info('Begin scene detect ......')
         bs = self.cfg.pipeline.batch_size_per_gpu
-        sids = batch['sid']
-        inputs = batch['shot_feat']
+        device = self.crn.attention_mask.device
 
-        shot_num = len(sids)
+        shot_timecode_lst = batch['shot_timecode_lst']
+        shot_idx_lst = batch['shot_idx_lst']
+
+        shot_num = len(shot_timecode_lst)
         cnt = math.ceil(shot_num / bs)
 
-        infer_sid, infer_pred = [], []
+        infer_pred = []
         infer_result = {}
-        for i in range(cnt):
+        self.shot_detector.start()
+
+        for i in tqdm(range(cnt)):
             start = i * bs
             end = (i + 1) * bs if (i + 1) * bs < shot_num else shot_num
-            input_ = inputs[start:end]
-            sid_ = sids[start:end]
-            input_ = torch.stack(input_)
+
+            batch_shot_idx_lst = shot_idx_lst[start:end]
+
+            shot_start_idx = batch_shot_idx_lst[0][0]
+            shot_end_idx = batch_shot_idx_lst[-1][-1]
+            batch_timecode_lst = {
+                i: shot_timecode_lst[i]
+                for i in range(shot_start_idx, shot_end_idx + 1)
+            }
+            batch_shot_keyf_lst = self.shot_detector.get_frame_img(
+                batch_timecode_lst, shot_start_idx, shot_num)
+            inputs = self.get_batch_input(batch_shot_keyf_lst, shot_start_idx,
+                                          batch_shot_idx_lst)
+
+            input_ = torch.stack(inputs).to(device)
             outputs = self.shared_step(input_)  # shape [b,2]
             prob = F.softmax(outputs, dim=1)
-            infer_sid.extend(sid_.cpu().detach().numpy())
             infer_pred.extend(prob[:, 1].cpu().detach().numpy())
-        infer_result.update({'pred': np.stack(infer_pred)})
-        infer_result.update({'sid': infer_sid})
 
-        assert len(infer_result['sid']) == len(sids)
-        assert len(infer_result['pred']) == len(inputs)
+        infer_result.update({'pred': np.stack(infer_pred)})
+        infer_result.update({'sid': np.arange(shot_num)})
+
+        assert len(infer_result['pred']) == shot_num
+        self.shot_detector.release()
         return infer_result
 
     def shared_step(self, inputs):
@@ -162,38 +182,48 @@ class MovieSceneSegmentationModel(TorchModel):
         logger.info('Generate scene .......')
 
         pred_dict = inputs['feat']
+        shot2keyf = inputs['shot2keyf']
         thres = self.cfg.pipeline.save_threshold
 
         anno_dict = get_pred_boundary(pred_dict, thres)
         scene_dict_lst, scene_list, shot_num, shot_dict_lst = pred2scene(
-            self.shot2keyf, anno_dict)
+            shot2keyf, anno_dict)
         if self.cfg.pipeline.save_split_scene:
             re_dir = scene2video(inputs['input_video_pth'], scene_list, thres)
             print(f'Split scene video saved to {re_dir}')
         return len(scene_list), scene_dict_lst, shot_num, shot_dict_lst
 
-    def preprocess(self, inputs):
-        logger.info('Begin shot detect......')
-        shot_keyf_lst, anno, shot2keyf = shot_detect(
-            inputs, **self.cfg.preprocessor.shot_detect)
-        logger.info('Shot detect done!')
+    def get_batch_input(self, shot_keyf_lst, shot_start_idx, shot_idx_lst):
 
-        single_shot_feat, sid = [], []
+        single_shot_feat = []
         for idx, one_shot in enumerate(shot_keyf_lst):
             one_shot = [
                 self.test_transform(one_frame) for one_frame in one_shot
             ]
             one_shot = torch.stack(one_shot, dim=0)
             single_shot_feat.append(one_shot)
-            sid.append(idx)
+
         single_shot_feat = torch.stack(single_shot_feat, dim=0)
+
         shot_feat = []
+        for idx, shot_idx in enumerate(shot_idx_lst):
+            shot_idx_ = shot_idx - shot_start_idx
+            _one_shot = single_shot_feat[shot_idx_]
+            shot_feat.append(_one_shot)
+
+        return shot_feat
+
+    def preprocess(self, inputs):
+        logger.info('Begin shot detect......')
+        shot_timecode_lst, anno, shot2keyf = self.shot_detector.shot_detect(
+            inputs, **self.cfg.preprocessor.shot_detect)
+        logger.info('Shot detect done!')
+
+        shot_idx_lst = []
         for idx, one_shot in enumerate(anno):
             shot_idx = int(one_shot['shot_id']) + np.arange(
                 -self.neighbor_size, self.neighbor_size + 1)
-            shot_idx = np.clip(shot_idx, 0, one_shot['num_shot'])
-            _one_shot = single_shot_feat[shot_idx]
-            shot_feat.append(_one_shot)
-        self.shot2keyf = shot2keyf
-        self.anno = anno
-        return shot_feat, sid
+            shot_idx = np.clip(shot_idx, 0, one_shot['num_shot'] - 1)
+            shot_idx_lst.append(shot_idx)
+
+        return shot2keyf, anno, shot_timecode_lst, shot_idx_lst
diff --git a/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py b/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py
index 49155716..34bebce0 100644
--- a/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py
+++ b/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py
@@ -10,11 +10,12 @@ from tqdm import tqdm
 
 
 def get_pred_boundary(pred_dict, threshold=0.5):
-    pred = pred_dict['pred']
+    pred = pred_dict['pred'].cpu().numpy()
+    sid = pred_dict['sid'].cpu().numpy().astype(np.int32)
     tmp = (pred > threshold).astype(np.int32)
     anno_dict = {}
     for idx in range(len(tmp)):
-        anno_dict.update({str(pred_dict['sid'][idx]).zfill(4): int(tmp[idx])})
+        anno_dict.update({str(sid[idx]).zfill(4): int(tmp[idx])})
     return anno_dict
 
 
diff --git a/modelscope/models/cv/nerf_recon_acc/network/segmenter.py b/modelscope/models/cv/nerf_recon_acc/network/segmenter.py
index d71b9f16..e3d0ca8d 100644
--- a/modelscope/models/cv/nerf_recon_acc/network/segmenter.py
+++ b/modelscope/models/cv/nerf_recon_acc/network/segmenter.py
@@ -31,7 +31,7 @@ class ObjectSegmenter(object):
         elif img.shape[2] == 4:
             img = img[:, :, :3]
         img = img[:, :, ::-1]
-        img = img.astype(np.float)
+        img = img.astype(float)
         return img
 
     def run_mask(self, img):
diff --git a/modelscope/models/cv/object_detection/dino.py b/modelscope/models/cv/object_detection/dino.py
deleted file mode 100644
index e6c652f1..00000000
--- a/modelscope/models/cv/object_detection/dino.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.models.detection.detectors import Detection as _Detection
-
-from modelscope.metainfo import Models
-from modelscope.models.builder import MODELS
-from modelscope.models.cv.easycv_base import EasyCVBaseModel
-from modelscope.utils.constant import Tasks
-
-
-@MODELS.register_module(
-    group_key=Tasks.image_object_detection, module_name=Models.dino)
-class DINO(EasyCVBaseModel, _Detection):
-
-    def __init__(self, model_dir=None, *args, **kwargs):
-        EasyCVBaseModel.__init__(self, model_dir, args, kwargs)
-        _Detection.__init__(self, *args, **kwargs)
diff --git a/modelscope/models/cv/object_detection/yolox_pai.py b/modelscope/models/cv/object_detection/yolox_pai.py
deleted file mode 100644
index 7888cf82..00000000
--- a/modelscope/models/cv/object_detection/yolox_pai.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.models.detection.detectors import YOLOX as _YOLOX
-
-from modelscope.metainfo import Models
-from modelscope.models.builder import MODELS
-from modelscope.models.cv.easycv_base import EasyCVBaseModel
-from modelscope.utils.constant import Tasks
-
-
-@MODELS.register_module(
-    group_key=Tasks.image_object_detection, module_name=Models.yolox)
-@MODELS.register_module(
-    group_key=Tasks.image_object_detection,
-    module_name=Models.image_object_detection_auto)
-@MODELS.register_module(
-    group_key=Tasks.domain_specific_object_detection, module_name=Models.yolox)
-class YOLOX(EasyCVBaseModel, _YOLOX):
-
-    def __init__(self, model_dir=None, *args, **kwargs):
-        EasyCVBaseModel.__init__(self, model_dir, args, kwargs)
-        _YOLOX.__init__(self, *args, **kwargs)
diff --git a/modelscope/models/cv/object_detection_3d/depe/result_vis.py b/modelscope/models/cv/object_detection_3d/depe/result_vis.py
index d577ab68..efaef0b5 100644
--- a/modelscope/models/cv/object_detection_3d/depe/result_vis.py
+++ b/modelscope/models/cv/object_detection_3d/depe/result_vis.py
@@ -30,7 +30,7 @@ def depth2color(depth):
     if gray == 1:
         return tuple(colors[-1].tolist())
     num_rank = len(colors) - 1
-    rank = np.floor(gray * num_rank).astype(np.int)
+    rank = np.floor(gray * num_rank).astype(int)
     diff = (gray - rank / num_rank) * num_rank
     tmp = colors[rank + 1] - colors[rank]
     return tuple((colors[rank] + tmp * diff).tolist())
@@ -136,7 +136,7 @@ def plot_result(res_path,
             l2g = get_lidar2global(infos)
             corners_lidar = corners_global @ np.linalg.inv(l2g).T
             corners_lidar = corners_lidar[:, :3]
-        pred_flag = np.ones((corners_lidar.shape[0] // 8, ), dtype=np.bool)
+        pred_flag = np.ones((corners_lidar.shape[0] // 8, ), dtype=bool)
         scores = [
             pred_res[rid]['detection_score'] for rid in range(len(pred_res))
         ]
@@ -151,7 +151,7 @@ def plot_result(res_path,
                    origin=(0.5, 0.5, 0.5)).corners.numpy().reshape(-1, 3)
             corners_lidar = np.concatenate([corners_lidar, corners_lidar_gt],
                                            axis=0)
-            gt_flag = np.ones((corners_lidar_gt.shape[0] // 8), dtype=np.bool)
+            gt_flag = np.ones((corners_lidar_gt.shape[0] // 8), dtype=bool)
             pred_flag = np.concatenate(
                 [pred_flag, np.logical_not(gt_flag)], axis=0)
             scores = scores + [0 for _ in range(infos['gt_boxes'].shape[0])]
@@ -169,7 +169,7 @@ def plot_result(res_path,
                 check_point_in_img(corners_img, img.shape[0], img.shape[1]))
             valid = valid.reshape(
                 -1, 8)  # valid means: d>0 and visible in current view
-            corners_img = corners_img.reshape(-1, 8, 2).astype(np.int)
+            corners_img = corners_img.reshape(-1, 8, 2).astype(int)
             for aid in range(valid.shape[0]):
                 if scores[aid] < vis_thred and pred_flag[aid]:
                     continue
diff --git a/modelscope/models/cv/ocr_recognition/model.py b/modelscope/models/cv/ocr_recognition/model.py
index 6eb13403..2406b6dc 100644
--- a/modelscope/models/cv/ocr_recognition/model.py
+++ b/modelscope/models/cv/ocr_recognition/model.py
@@ -90,8 +90,15 @@ class OCRRecognition(TorchModel):
                 f'recognizer should be either ConvNextViT, CRNN, but got {cfgs.model.recognizer}'
             )
         if model_path != '':
-            self.recognizer.load_state_dict(
-                torch.load(model_path, map_location='cpu'))
+            params_pretrained = torch.load(model_path, map_location='cpu')
+            model_dict = self.recognizer.state_dict()
+            # remove prefix for finetuned models
+            check_point = {
+                k.replace('recognizer.', ''): v
+                for k, v in params_pretrained.items()
+            }
+            model_dict.update(check_point)
+            self.recognizer.load_state_dict(model_dict)
 
         dict_path = os.path.join(model_dir, ModelFile.VOCAB_FILE)
         self.labelMapping = dict()
diff --git a/modelscope/models/cv/open_vocabulary_detection_vild/vild.py b/modelscope/models/cv/open_vocabulary_detection_vild/vild.py
index 999ec27a..2aea0593 100644
--- a/modelscope/models/cv/open_vocabulary_detection_vild/vild.py
+++ b/modelscope/models/cv/open_vocabulary_detection_vild/vild.py
@@ -176,8 +176,7 @@ class OpenVocabularyDetectionViLD(Model):
         # Filter out invalid rois (nmsed rois)
         valid_indices = np.where(
             np.logical_and(
-                np.isin(
-                    np.arange(len(roi_scores), dtype=np.int), nmsed_indices),
+                np.isin(np.arange(len(roi_scores), dtype=int), nmsed_indices),
                 np.logical_and(
                     np.logical_not(np.all(roi_boxes == 0., axis=-1)),
                     np.logical_and(roi_scores >= min_rpn_score_thresh,
diff --git a/modelscope/models/cv/panorama_depth_estimation/networks/layers.py b/modelscope/models/cv/panorama_depth_estimation/networks/layers.py
index 99e166aa..52fb3d39 100644
--- a/modelscope/models/cv/panorama_depth_estimation/networks/layers.py
+++ b/modelscope/models/cv/panorama_depth_estimation/networks/layers.py
@@ -72,7 +72,7 @@ class Cube2Equirec(nn.Module):
                 self.equ_h, 0), 3 * self.equ_w // 8, 1)
 
         # Prepare ceil mask
-        mask = np.zeros((self.equ_h, self.equ_w // 4), np.bool)
+        mask = np.zeros((self.equ_h, self.equ_w // 4), bool)
         idx = np.linspace(-np.pi, np.pi, self.equ_w // 4) / 4
         idx = self.equ_h // 2 - np.round(
             np.arctan(np.cos(idx)) * self.equ_h / np.pi).astype(int)
diff --git a/modelscope/models/cv/video_depth_estimation/utils/depth.py b/modelscope/models/cv/video_depth_estimation/utils/depth.py
index e9f287e7..5fbf6aa6 100644
--- a/modelscope/models/cv/video_depth_estimation/utils/depth.py
+++ b/modelscope/models/cv/video_depth_estimation/utils/depth.py
@@ -29,7 +29,7 @@ def load_depth(file):
     elif file.endswith('png'):
         depth_png = np.array(load_image(file), dtype=int)
         assert (np.max(depth_png) > 255), 'Wrong .png depth file'
-        return depth_png.astype(np.float) / 256.
+        return depth_png.astype(float) / 256.
     else:
         raise NotImplementedError('Depth extension not supported.')
 
diff --git a/modelscope/models/cv/video_frame_interpolation/utils/scene_change_detection.py b/modelscope/models/cv/video_frame_interpolation/utils/scene_change_detection.py
index 4cbe60a7..379fe855 100644
--- a/modelscope/models/cv/video_frame_interpolation/utils/scene_change_detection.py
+++ b/modelscope/models/cv/video_frame_interpolation/utils/scene_change_detection.py
@@ -85,7 +85,7 @@ def do_scene_detect(F01_tensor, F10_tensor, img0_tensor, img1_tensor):
     img_diff = ori_img.float() - ref_img.float()
     img_diff = torch.abs(img_diff)
 
-    kernel = np.ones([8, 8], np.float) / 64
+    kernel = np.ones([8, 8], float) / 64
     kernel = torch.FloatTensor(kernel).to(device).unsqueeze(0).unsqueeze(0)
     diff = F.conv2d(img_diff, kernel, padding=4)
 
diff --git a/modelscope/models/cv/video_multi_object_tracking/tracker/matching.py b/modelscope/models/cv/video_multi_object_tracking/tracker/matching.py
index 45d2f5c0..e5c2e8a9 100644
--- a/modelscope/models/cv/video_multi_object_tracking/tracker/matching.py
+++ b/modelscope/models/cv/video_multi_object_tracking/tracker/matching.py
@@ -27,7 +27,7 @@ def linear_assignment(cost_matrix, thresh):
 
 
 def ious(atlbrs, btlbrs):
-    ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
+    ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=float)
     if ious.size == 0:
         return ious
 
@@ -60,13 +60,13 @@ def embedding_distance(tracks, detections, metric='cosine'):
         cost_matrix: np.ndarray
     """
 
-    cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
+    cost_matrix = np.zeros((len(tracks), len(detections)), dtype=float)
     if cost_matrix.size == 0:
         return cost_matrix
     det_features = np.asarray([track.curr_feat for track in detections],
-                              dtype=np.float)
+                              dtype=float)
     track_features = np.asarray([track.smooth_feat for track in tracks],
-                                dtype=np.float)
+                                dtype=float)
     cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric))
     return cost_matrix
 
diff --git a/modelscope/models/cv/video_multi_object_tracking/tracker/multitracker.py b/modelscope/models/cv/video_multi_object_tracking/tracker/multitracker.py
index 1dc3297f..d38477b7 100644
--- a/modelscope/models/cv/video_multi_object_tracking/tracker/multitracker.py
+++ b/modelscope/models/cv/video_multi_object_tracking/tracker/multitracker.py
@@ -28,7 +28,7 @@ class STrack(BaseTrack):
     def __init__(self, tlwh, score, temp_feat, buffer_size=30):
 
         # wait activate
-        self._tlwh = np.asarray(tlwh, dtype=np.float)
+        self._tlwh = np.asarray(tlwh, dtype=float)
         self.kalman_filter = None
         self.mean, self.covariance = None, None
         self.is_activated = False
diff --git a/modelscope/models/multi_modal/__init__.py b/modelscope/models/multi_modal/__init__.py
index e85c48fb..9fa34baf 100644
--- a/modelscope/models/multi_modal/__init__.py
+++ b/modelscope/models/multi_modal/__init__.py
@@ -20,6 +20,8 @@ if TYPE_CHECKING:
     from .vldoc import VLDocForDocVLEmbedding
     from .video_synthesis import TextToVideoSynthesis
     from .efficient_diffusion_tuning import EfficientStableDiffusion
+    from .mplug_owl import MplugOwlForConditionalGeneration
+    from .clip_interrogator import CLIP_Interrogator
 
 else:
     _import_structure = {
@@ -37,7 +39,9 @@ else:
         ['MultiStageDiffusionForTextToImageSynthesis'],
         'vldoc': ['VLDocForDocVLEmbedding'],
         'video_synthesis': ['TextToVideoSynthesis'],
-        'efficient_diffusion_tuning': ['EfficientStableDiffusion']
+        'efficient_diffusion_tuning': ['EfficientStableDiffusion'],
+        'mplug_owl': ['MplugOwlForConditionalGeneration'],
+        'clip_interrogator': ['CLIP_Interrogator'],
     }
 
     import sys
diff --git a/modelscope/models/multi_modal/clip_interrogator/__init__.py b/modelscope/models/multi_modal/clip_interrogator/__init__.py
new file mode 100644
index 00000000..96fefbf6
--- /dev/null
+++ b/modelscope/models/multi_modal/clip_interrogator/__init__.py
@@ -0,0 +1 @@
+from .model import CLIP_Interrogator
diff --git a/modelscope/models/multi_modal/clip_interrogator/model.py b/modelscope/models/multi_modal/clip_interrogator/model.py
new file mode 100644
index 00000000..a7e27cbd
--- /dev/null
+++ b/modelscope/models/multi_modal/clip_interrogator/model.py
@@ -0,0 +1,599 @@
+# This implementation is adopted from CLIP-Interrogator, made pubicly available under the MIT License at
+# https://github.com/pharmapsychotic/clip-interrogator/blob/main/clip_interrogator/clip_interrogator.py
+
+import hashlib
+import math
+import os
+import time
+from dataclasses import dataclass
+from typing import List, Optional
+
+import numpy as np
+import open_clip
+import requests
+import torch
+import torchvision.transforms as transforms
+from PIL import Image
+from safetensors.numpy import load_file, save_file
+from tqdm import tqdm
+from transformers import (AutoModelForCausalLM, AutoProcessor,
+                          Blip2ForConditionalGeneration,
+                          BlipForConditionalGeneration)
+
+from modelscope.metainfo import Models
+from modelscope.models.base import TorchModel
+from modelscope.models.builder import MODELS
+from modelscope.outputs import OutputKeys
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+__all__ = ['CLIP_Interrogator']
+
+CAPTION_MODELS = {
+    'blip-base': 'blip-image-captioning-base',
+    'blip-large': 'blip-image-captioning-large',
+    'blip2-2.7b': 'blip2-opt-2.7b',
+    'blip2-flan-t5-xl': 'blip2-flan-t5-xl',
+    'git-large-coco': 'git-large-coco',
+}
+
+
+@dataclass
+class Config:
+    # models can optionally be passed in directly
+    caption_model = None
+    caption_processor = None
+    clip_model = None
+    clip_preprocess = None
+
+    # blip settings
+    caption_max_length: int = 32
+    caption_model_name: Optional[
+        str] = 'blip-large'  # use a key from CAPTION_MODELS or None
+    caption_offload: bool = False
+
+    # clip settings
+    clip_model_name: str = 'ViT-L-14/openai'
+    clip_model_path: Optional[str] = None
+    clip_offload: bool = False
+
+    # interrogator settings
+    cache_path: str = 'cache'  # path to store cached text embeddings
+    download_cache: bool = False  # when true, cached embeds are downloaded from huggingface
+    chunk_size: int = 2048  # batch size for CLIP, use smaller for lower VRAM
+    data_path: str = os.path.join(os.path.dirname(__file__), 'data')
+    device: str = ('cuda' if torch.cuda.is_available() else 'cpu')
+    flavor_intermediate_count: int = 2048
+    quiet: bool = False  # when quiet progress bars are not shown
+
+    def apply_low_vram_defaults(self):
+        self.caption_model_name = 'blip-base'
+        self.caption_offload = True
+        self.clip_offload = True
+        self.chunk_size = 1024
+        self.flavor_intermediate_count = 1024
+
+
+# CLIP-Interrogator utilize CLIP and BLIP to generate rich caption for images.
+# CLIP is a zero-shot image classifier which can be used to generate image and text embeddings.
+# BLIP is a new VLP framework which transfers flexibly to both vision-language understanding and generation tasks.
+# BLIP effectively utilizes the noisy web data by bootstrapping the captions, where
+# a captioner generates synthetic captions and a filter removes the noisy ones.
+# Please infer to the paper CLIP: Learning Transferable Visual Models From Natural Language Supervision
+# https://arxiv.org/abs/2103.00020
+# BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation
+# https://arxiv.org/abs/2201.12086
+
+
+class Interrogator():
+
+    def __init__(self, config: Config):
+        self.config = config
+        self.device = config.device
+        self.dtype = torch.float16 if self.device == 'cuda' else torch.float32
+        self.caption_offloaded = True
+        self.clip_offloaded = True
+        self.load_caption_model()
+        self.load_clip_model()
+
+    def load_caption_model(self):
+        if self.config.caption_model is None and self.config.caption_model_name:
+            if not self.config.quiet:
+                print(
+                    f'Loading caption model {self.config.caption_model_name}...'
+                )
+
+            model_path = CAPTION_MODELS[self.config.caption_model_name]
+            if self.config.caption_model_name.startswith('git-'):
+                caption_model = AutoModelForCausalLM.from_pretrained(
+                    os.path.join(self.config.cache_path, model_path),
+                    torch_dtype=torch.float32)
+            elif self.config.caption_model_name.startswith('blip2-'):
+                caption_model = Blip2ForConditionalGeneration.from_pretrained(
+                    os.path.join(self.config.cache_path, model_path),
+                    torch_dtype=self.dtype)
+            else:
+                caption_model = BlipForConditionalGeneration.from_pretrained(
+                    os.path.join(self.config.cache_path, model_path),
+                    torch_dtype=self.dtype)
+            self.caption_processor = AutoProcessor.from_pretrained(
+                os.path.join(self.config.cache_path, model_path))
+
+            caption_model.eval()
+            if not self.config.caption_offload:
+                caption_model = caption_model.to(self.config.device)
+            self.caption_model = caption_model
+        else:
+            self.caption_model = self.config.caption_model
+            self.caption_processor = self.config.caption_processor
+
+    def load_clip_model(self):
+        start_time = time.time()
+        config = self.config
+
+        clip_model_name, clip_model_pretrained_name = config.clip_model_name.split(
+            '/', 2)
+
+        if config.clip_model is None:
+            if not config.quiet:
+                print(f'Loading CLIP model {config.clip_model_name}...')
+
+            self.clip_model, _, self.clip_preprocess = open_clip.create_model_and_transforms(
+                clip_model_name,
+                pretrained=clip_model_pretrained_name,
+                precision='fp16' if config.device == 'cuda' else 'fp32',
+                device=config.device,
+                jit=False,
+                cache_dir=config.clip_model_path)
+            self.clip_model.eval()
+        else:
+            self.clip_model = config.clip_model
+            self.clip_preprocess = config.clip_preprocess
+        self.tokenize = open_clip.get_tokenizer(clip_model_name)
+
+        sites = [
+            'Artstation', 'behance', 'cg society', 'cgsociety', 'deviantart',
+            'dribbble', 'flickr', 'instagram', 'pexels', 'pinterest',
+            'pixabay', 'pixiv', 'polycount', 'reddit', 'shutterstock',
+            'tumblr', 'unsplash', 'zbrush central'
+        ]
+        trending_list = [site for site in sites]
+        trending_list.extend(['trending on ' + site for site in sites])
+        trending_list.extend(['featured on ' + site for site in sites])
+        trending_list.extend([site + ' contest winner' for site in sites])
+
+        raw_artists = load_list(config.data_path, 'artists.txt')
+        artists = [f'by {a}' for a in raw_artists]
+        artists.extend([f'inspired by {a}' for a in raw_artists])
+
+        self._prepare_clip()
+        self.artists = LabelTable(artists, 'artists', self)
+        self.flavors = LabelTable(
+            load_list(config.data_path, 'flavors.txt'), 'flavors', self)
+        self.mediums = LabelTable(
+            load_list(config.data_path, 'mediums.txt'), 'mediums', self)
+        self.movements = LabelTable(
+            load_list(config.data_path, 'movements.txt'), 'movements', self)
+        self.trendings = LabelTable(trending_list, 'trendings', self)
+        self.negative = LabelTable(
+            load_list(config.data_path, 'negative.txt'), 'negative', self)
+
+        end_time = time.time()
+        if not config.quiet:
+            print(
+                f'Loaded CLIP model and data in {end_time-start_time:.2f} seconds.'
+            )
+
+    def chain(self,
+              image_features: torch.Tensor,
+              phrases: List[str],
+              best_prompt: str = '',
+              best_sim: float = 0,
+              min_count: int = 8,
+              max_count: int = 32,
+              desc='Chaining',
+              reverse: bool = False) -> str:
+        self._prepare_clip()
+
+        phrases = set(phrases)
+        if not best_prompt:
+            best_prompt = self.rank_top(
+                image_features, [f for f in phrases], reverse=reverse)
+            best_sim = self.similarity(image_features, best_prompt)
+            phrases.remove(best_prompt)
+        curr_prompt, curr_sim = best_prompt, best_sim
+
+        def check(addition: str, idx: int) -> bool:
+            nonlocal best_prompt, best_sim, curr_prompt, curr_sim
+            prompt = curr_prompt + ', ' + addition
+            sim = self.similarity(image_features, prompt)
+            if reverse:
+                sim = -sim
+
+            if sim > best_sim:
+                best_prompt, best_sim = prompt, sim
+            if sim > curr_sim or idx < min_count:
+                curr_prompt, curr_sim = prompt, sim
+                return True
+            return False
+
+        for idx in tqdm(
+                range(max_count), desc=desc, disable=self.config.quiet):
+            best = self.rank_top(
+                image_features, [f'{curr_prompt}, {f}' for f in phrases],
+                reverse=reverse)
+            flave = best[len(curr_prompt) + 2:]
+            if not check(flave, idx):
+                break
+            if _prompt_at_max_len(curr_prompt, self.tokenize):
+                break
+            phrases.remove(flave)
+
+        return best_prompt
+
+    def generate_caption(self, pil_image: Image) -> str:
+        assert self.caption_model is not None, 'No caption model loaded.'
+        self._prepare_caption()
+        inputs = self.caption_processor(
+            images=pil_image, return_tensors='pt').to(self.device)
+        if not self.config.caption_model_name.startswith('git-'):
+            inputs = inputs.to(self.dtype)
+        tokens = self.caption_model.generate(
+            **inputs, max_new_tokens=self.config.caption_max_length)
+        return self.caption_processor.batch_decode(
+            tokens, skip_special_tokens=True)[0].strip()
+
+    def image_to_features(self, image: Image) -> torch.Tensor:
+        self._prepare_clip()
+        images = self.clip_preprocess(image).unsqueeze(0).to(self.device)
+        with torch.no_grad(), torch.cuda.amp.autocast():
+            image_features = self.clip_model.encode_image(images)
+            image_features /= image_features.norm(dim=-1, keepdim=True)
+        return image_features
+
+    def interrogate_classic(self,
+                            image: Image,
+                            max_flavors: int = 3,
+                            caption: Optional[str] = None) -> str:
+        """Classic mode creates a prompt in a standard format first describing the image,
+        then listing the artist, trending, movement, and flavor text modifiers."""
+        caption = caption or self.generate_caption(image)
+        image_features = self.image_to_features(image)
+
+        medium = self.mediums.rank(image_features, 1)[0]
+        artist = self.artists.rank(image_features, 1)[0]
+        trending = self.trendings.rank(image_features, 1)[0]
+        movement = self.movements.rank(image_features, 1)[0]
+        flaves = ', '.join(self.flavors.rank(image_features, max_flavors))
+
+        if caption.startswith(medium):
+            prompt = f'{caption} {artist}, {trending}, {movement}, {flaves}'
+        else:
+            prompt = f'{caption}, {medium} {artist}, {trending}, {movement}, {flaves}'
+
+        return _truncate_to_fit(prompt, self.tokenize)
+
+    def interrogate_fast(self,
+                         image: Image,
+                         max_flavors: int = 32,
+                         caption: Optional[str] = None) -> str:
+        """Fast mode simply adds the top ranked terms after a caption. It generally results in
+        better similarity between generated prompt and image than classic mode, but the prompts
+        are less readable."""
+        caption = caption or self.generate_caption(image)
+        image_features = self.image_to_features(image)
+        merged = _merge_tables([
+            self.artists, self.flavors, self.mediums, self.movements,
+            self.trendings
+        ], self)
+        tops = merged.rank(image_features, max_flavors)
+        return _truncate_to_fit(caption + ', ' + ', '.join(tops),
+                                self.tokenize)
+
+    def interrogate_negative(self, image: Image, max_flavors: int = 32) -> str:
+        """Negative mode chains together the most dissimilar terms to the image. It can be used
+        to help build a negative prompt to pair with the regular positive prompt and often
+        improve the results of generated images particularly with Stable Diffusion 2."""
+        image_features = self.image_to_features(image)
+        flaves = self.flavors.rank(
+            image_features,
+            self.config.flavor_intermediate_count,
+            reverse=True)
+        flaves = flaves + self.negative.labels
+        return self.chain(
+            image_features,
+            flaves,
+            max_count=max_flavors,
+            reverse=True,
+            desc='Negative chain')
+
+    def interrogate(self,
+                    image: Image,
+                    min_flavors: int = 8,
+                    max_flavors: int = 32,
+                    caption: Optional[str] = None) -> str:
+        caption = caption or self.generate_caption(image)
+        image_features = self.image_to_features(image)
+
+        merged = _merge_tables([
+            self.artists, self.flavors, self.mediums, self.movements,
+            self.trendings
+        ], self)
+        flaves = merged.rank(image_features,
+                             self.config.flavor_intermediate_count)
+        best_prompt, best_sim = caption, self.similarity(
+            image_features, caption)
+        best_prompt = self.chain(
+            image_features,
+            flaves,
+            best_prompt,
+            best_sim,
+            min_count=min_flavors,
+            max_count=max_flavors,
+            desc='Flavor chain')
+
+        fast_prompt = self.interrogate_fast(
+            image, max_flavors, caption=caption)
+        classic_prompt = self.interrogate_classic(
+            image, max_flavors, caption=caption)
+        candidates = [caption, classic_prompt, fast_prompt, best_prompt]
+        return candidates[np.argmax(
+            self.similarities(image_features, candidates))]
+
+    def rank_top(self,
+                 image_features: torch.Tensor,
+                 text_array: List[str],
+                 reverse: bool = False) -> str:
+        self._prepare_clip()
+        text_tokens = self.tokenize([text
+                                     for text in text_array]).to(self.device)
+        with torch.no_grad(), torch.cuda.amp.autocast():
+            text_features = self.clip_model.encode_text(text_tokens)
+            text_features /= text_features.norm(dim=-1, keepdim=True)
+            similarity = text_features @ image_features.T
+            if reverse:
+                similarity = -similarity
+        return text_array[similarity.argmax().item()]
+
+    def similarity(self, image_features: torch.Tensor, text: str) -> float:
+        self._prepare_clip()
+        text_tokens = self.tokenize([text]).to(self.device)
+        with torch.no_grad(), torch.cuda.amp.autocast():
+            text_features = self.clip_model.encode_text(text_tokens)
+            text_features /= text_features.norm(dim=-1, keepdim=True)
+            similarity = text_features @ image_features.T
+        return similarity[0][0].item()
+
+    def similarities(self, image_features: torch.Tensor,
+                     text_array: List[str]) -> List[float]:
+        self._prepare_clip()
+        text_tokens = self.tokenize([text
+                                     for text in text_array]).to(self.device)
+        with torch.no_grad(), torch.cuda.amp.autocast():
+            text_features = self.clip_model.encode_text(text_tokens)
+            text_features /= text_features.norm(dim=-1, keepdim=True)
+            similarity = text_features @ image_features.T
+        return similarity.T[0].tolist()
+
+    def _prepare_caption(self):
+        if self.config.clip_offload and not self.clip_offloaded:
+            self.clip_model = self.clip_model.to('cpu')
+            self.clip_offloaded = True
+        if self.caption_offloaded:
+            self.caption_model = self.caption_model.to(self.device)
+            self.caption_offloaded = False
+
+    def _prepare_clip(self):
+        if self.config.caption_offload and not self.caption_offloaded:
+            self.caption_model = self.caption_model.to('cpu')
+            self.caption_offloaded = True
+        if self.clip_offloaded:
+            self.clip_model = self.clip_model.to(self.device)
+            self.clip_offloaded = False
+
+
+class LabelTable():
+
+    def __init__(self, labels: List[str], desc: str, ci: Interrogator):
+        clip_model, config = ci.clip_model, ci.config
+        self.chunk_size = config.chunk_size
+        self.config = config
+        self.device = config.device
+        self.embeds = []
+        self.labels = labels
+        self.tokenize = ci.tokenize
+
+        hash = hashlib.sha256(','.join(labels).encode()).hexdigest()
+        sanitized_name = self.config.clip_model_name.replace('/', '_').replace(
+            '@', '_')
+        self._load_cached(desc, hash, sanitized_name)
+
+        if len(self.labels) != len(self.embeds):
+            self.embeds = []
+            chunks = np.array_split(
+                self.labels, max(1,
+                                 len(self.labels) / config.chunk_size))
+            for chunk in tqdm(
+                    chunks,
+                    desc=f'Preprocessing {desc}' if desc else None,
+                    disable=self.config.quiet):
+                text_tokens = self.tokenize(chunk).to(self.device)
+                with torch.no_grad(), torch.cuda.amp.autocast():
+                    text_features = clip_model.encode_text(text_tokens)
+                    text_features /= text_features.norm(dim=-1, keepdim=True)
+                    text_features = text_features.half().cpu().numpy()
+                for i in range(text_features.shape[0]):
+                    self.embeds.append(text_features[i])
+
+            if desc and self.config.cache_path:
+                os.makedirs(self.config.cache_path, exist_ok=True)
+                cache_filepath = os.path.join(
+                    self.config.cache_path,
+                    f'{sanitized_name}_{desc}.safetensors')
+                tensors = {
+                    'embeds': np.stack(self.embeds),
+                    'hash': np.array([ord(c) for c in hash], dtype=np.int8)
+                }
+                save_file(tensors, cache_filepath)
+
+        if self.device == 'cpu' or self.device == torch.device('cpu'):
+            self.embeds = [e.astype(np.float32) for e in self.embeds]
+
+    def _load_cached(self, desc: str, hash: str, sanitized_name: str) -> bool:
+        if self.config.cache_path is None or desc is None:
+            return False
+
+        cached_safetensors = os.path.join(
+            self.config.cache_path, f'{sanitized_name}_{desc}.safetensors')
+
+        if os.path.exists(cached_safetensors):
+            try:
+                tensors = load_file(cached_safetensors)
+            except Exception as e:
+                print(f'Failed to load {cached_safetensors}')
+                print(e)
+                return False
+            if 'hash' in tensors and 'embeds' in tensors:
+                if np.array_equal(
+                        tensors['hash'],
+                        np.array([ord(c) for c in hash], dtype=np.int8)):
+                    self.embeds = tensors['embeds']
+                    if len(self.embeds.shape) == 2:
+                        self.embeds = [
+                            self.embeds[i] for i in range(self.embeds.shape[0])
+                        ]
+                    return True
+
+        return False
+
+    def _rank(self,
+              image_features: torch.Tensor,
+              text_embeds: torch.Tensor,
+              top_count: int = 1,
+              reverse: bool = False) -> str:
+        top_count = min(top_count, len(text_embeds))
+        text_embeds = torch.stack([torch.from_numpy(t)
+                                   for t in text_embeds]).to(self.device)
+        with torch.cuda.amp.autocast():
+            similarity = image_features @ text_embeds.T
+            if reverse:
+                similarity = -similarity
+        _, top_labels = similarity.float().cpu().topk(top_count, dim=-1)
+        return [top_labels[0][i].numpy() for i in range(top_count)]
+
+    def rank(self,
+             image_features: torch.Tensor,
+             top_count: int = 1,
+             reverse: bool = False) -> List[str]:
+        if len(self.labels) <= self.chunk_size:
+            tops = self._rank(
+                image_features,
+                self.embeds,
+                top_count=top_count,
+                reverse=reverse)
+            return [self.labels[i] for i in tops]
+
+        num_chunks = int(math.ceil(len(self.labels) / self.chunk_size))
+        keep_per_chunk = int(self.chunk_size / num_chunks)
+
+        top_labels, top_embeds = [], []
+        for chunk_idx in tqdm(range(num_chunks), disable=self.config.quiet):
+            start = chunk_idx * self.chunk_size
+            stop = min(start + self.chunk_size, len(self.embeds))
+            tops = self._rank(
+                image_features,
+                self.embeds[start:stop],
+                top_count=keep_per_chunk,
+                reverse=reverse)
+            top_labels.extend([self.labels[start + i] for i in tops])
+            top_embeds.extend([self.embeds[start + i] for i in tops])
+
+        tops = self._rank(image_features, top_embeds, top_count=top_count)
+        return [top_labels[i] for i in tops]
+
+
+def _download_file(url: str,
+                   filepath: str,
+                   chunk_size: int = 4 * 1024 * 1024,
+                   quiet: bool = False):
+    r = requests.get(url, stream=True)
+    if r.status_code != 200:
+        return
+
+    file_size = int(r.headers.get('Content-Length', 0))
+    filename = url.split('/')[-1]
+    progress = tqdm(
+        total=file_size,
+        unit='B',
+        unit_scale=True,
+        desc=filename,
+        disable=quiet)
+    with open(filepath, 'wb') as f:
+        for chunk in r.iter_content(chunk_size=chunk_size):
+            if chunk:
+                f.write(chunk)
+                progress.update(len(chunk))
+    progress.close()
+
+
+def _merge_tables(tables: List[LabelTable], ci: Interrogator) -> LabelTable:
+    m = LabelTable([], None, ci)
+    for table in tables:
+        m.labels.extend(table.labels)
+        m.embeds.extend(table.embeds)
+    return m
+
+
+def _prompt_at_max_len(text: str, tokenize) -> bool:
+    tokens = tokenize([text])
+    return tokens[0][-1] != 0
+
+
+def _truncate_to_fit(text: str, tokenize) -> str:
+    parts = text.split(', ')
+    new_text = parts[0]
+    for part in parts[1:]:
+        if _prompt_at_max_len(new_text + part, tokenize):
+            break
+        new_text += ', ' + part
+    return new_text
+
+
+def list_caption_models() -> List[str]:
+    return list(CAPTION_MODELS.keys())
+
+
+def list_clip_models() -> List[str]:
+    return ['/'.join(x) for x in open_clip.list_pretrained()]
+
+
+def load_list(data_path: str, filename: Optional[str] = None) -> List[str]:
+    """Load a list of strings from a file."""
+    if filename is not None:
+        data_path = os.path.join(data_path, filename)
+    with open(data_path, 'r', encoding='utf-8', errors='replace') as f:
+        items = [line.strip() for line in f.readlines()]
+    return items
+
+
+@MODELS.register_module(
+    Tasks.image_captioning, module_name=Models.clip_interrogator)
+class CLIP_Interrogator(TorchModel):
+
+    def __init__(self, model_dir, device='cuda', device_id=0, *args, **kwargs):
+        super().__init__(
+            model_dir=model_dir, device_id=device_id, *args, **kwargs)
+        self.device = device
+        self.dtype = torch.float16 if self.device == 'cuda' else torch.float32
+        cf = Config(clip_model_name='ViT-L-14/openai')
+        cf.data_path = os.path.join(model_dir, 'data')
+        cf.clip_model_path = model_dir
+        cf.cache_path = model_dir
+        self.ci = Interrogator(cf)
+
+    def forward(self, inputs):
+        image = transforms.ToPILImage()(inputs)
+        return {'caption': self.ci.interrogate(image)}
diff --git a/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py b/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py
index 813f750e..743c049a 100644
--- a/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py
+++ b/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py
@@ -128,13 +128,13 @@ class VideoCLIPForMultiModalEmbedding(TorchModel):
                           local_transform,
                           s=None,
                           e=None):
-        video_mask = np.zeros(self.max_frames, dtype=np.long)
+        video_mask = np.zeros(self.max_frames, dtype=int)
         max_video_length = 0
 
         # T x 3 x H x W
         video = np.zeros((self.max_frames, 3, rawVideoExtractor.size,
                           rawVideoExtractor.size),
-                         dtype=np.float)
+                         dtype=float)
 
         if s is None:
             start_time, end_time = None, None
diff --git a/modelscope/models/multi_modal/mplug_owl/__init__.py b/modelscope/models/multi_modal/mplug_owl/__init__.py
new file mode 100644
index 00000000..76ccfb5a
--- /dev/null
+++ b/modelscope/models/multi_modal/mplug_owl/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2021-2023 The Alibaba DAMO mPLUG Authors.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .configuration_mplug_owl import (MplugOwlConfig, MplugOwlVisionConfig,
+                                      MplugOwlVisualAbstractorConfig)
+from .modeling_mplug_owl import MplugOwlForConditionalGeneration
diff --git a/modelscope/models/multi_modal/mplug_owl/configuration_mplug_owl.py b/modelscope/models/multi_modal/mplug_owl/configuration_mplug_owl.py
new file mode 100644
index 00000000..6e32238a
--- /dev/null
+++ b/modelscope/models/multi_modal/mplug_owl/configuration_mplug_owl.py
@@ -0,0 +1,257 @@
+# Copyright 2021-2023 The Alibaba DAMO mPLUG Team Authors.
+# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" MPLUG OWL model configuration """
+import copy
+import os
+from typing import Union
+
+from transformers import PretrainedConfig
+from transformers.models.auto import CONFIG_MAPPING
+from transformers.utils import logging
+
+from modelscope.utils.constant import Tasks
+
+logger = logging.get_logger()
+
+
+class MplugOwlVisionConfig(PretrainedConfig):
+    r"""
+    Args:
+        hidden_size (`int`, *optional*, defaults to 768):
+            Dimensionality of the encoder layers and the pooler layer.
+        intermediate_size (`int`, *optional*, defaults to 3072):
+            Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
+        num_hidden_layers (`int`, *optional*, defaults to 12):
+            Number of hidden layers in the Transformer encoder.
+        num_attention_heads (`int`, *optional*, defaults to 12):
+            Number of attention heads for each attention layer in the Transformer encoder.
+        image_size (`int`, *optional*, defaults to 224):
+            The size (resolution) of each image.
+        patch_size (`int`, *optional*, defaults to 32):
+            The size (resolution) of each patch.
+        hidden_act (`str` or `function`, *optional*, defaults to `"quick_gelu"`):
+            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
+            `"relu"`, `"selu"` and `"gelu_new"` ``"quick_gelu"` are supported.
+        layer_norm_eps (`float`, *optional*, defaults to 1e-5):
+            The epsilon used by the layer normalization layers.
+        attention_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for the attention probabilities.
+        initializer_range (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        initializer_factor (`float`, *optional*, defaults to 1):
+            A factor for initializing all weight matrices (should be kept to 1, used internally for initialization
+            testing).
+    ```"""
+
+    model_type = 'mplug_owl_vision_model'
+
+    def __init__(
+        self,
+        hidden_size=1024,
+        intermediate_size=4096,
+        projection_dim=768,
+        num_hidden_layers=24,
+        num_attention_heads=16,
+        num_channels=3,
+        image_size=224,
+        patch_size=14,
+        hidden_act='quick_gelu',
+        layer_norm_eps=1e-6,
+        attention_dropout=0.0,
+        initializer_range=0.02,
+        initializer_factor=1.0,
+        use_flash_attn=False,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.projection_dim = projection_dim
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.num_channels = num_channels
+        self.patch_size = patch_size
+        self.image_size = image_size
+        self.initializer_range = initializer_range
+        self.initializer_factor = initializer_factor
+        self.attention_dropout = attention_dropout
+        self.layer_norm_eps = layer_norm_eps
+        self.hidden_act = hidden_act
+        self.use_flash_attn = use_flash_attn
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path: Union[str,
+                                                                  os.PathLike],
+                        **kwargs) -> 'PretrainedConfig':
+        config_dict, kwargs = cls.get_config_dict(
+            pretrained_model_name_or_path, **kwargs)
+
+        # get the vision config dict if we are loading from MplugOwlConfig
+        if config_dict.get('model_type') == 'mplug_owl':
+            config_dict = config_dict['vision_config']
+
+        if 'model_type' in config_dict and hasattr(
+                cls,
+                'model_type') and config_dict['model_type'] != cls.model_type:
+            logger.warning(
+                f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
+                f'{cls.model_type}. This is not supported for all configurations of models and can yield errors.'
+            )
+
+        return cls.from_dict(config_dict, **kwargs)
+
+
+class MplugOwlVisualAbstractorConfig(PretrainedConfig):
+
+    model_type = 'MPlugOwlVisualAbstractor'
+
+    def __init__(
+        self,
+        hidden_size=1024,
+        num_hidden_layers=6,
+        num_attention_heads=16,
+        intermediate_size=4096,
+        attention_probs_dropout_prob=0.1,
+        initializer_range=0.02,
+        layer_norm_eps=1e-6,
+        encoder_hidden_size=1024,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+
+        self.hidden_size = hidden_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.intermediate_size = intermediate_size
+        self.attention_probs_dropout_prob = attention_probs_dropout_prob
+        self.initializer_range = initializer_range
+        self.layer_norm_eps = layer_norm_eps
+        self.encoder_hidden_size = encoder_hidden_size
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path: Union[str,
+                                                                  os.PathLike],
+                        **kwargs) -> 'PretrainedConfig':
+        config_dict, kwargs = cls.get_config_dict(
+            pretrained_model_name_or_path, **kwargs)
+
+        # get the qformer config dict if we are loading from MplugOwlConfig
+        if config_dict.get('model_type') == 'mplug_owl':
+            config_dict = config_dict['abstractor_config']
+
+        if 'model_type' in config_dict and hasattr(
+                cls,
+                'model_type') and config_dict['model_type'] != cls.model_type:
+            logger.warning(
+                f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
+                f'{cls.model_type}. This is not supported for all configurations of models and can yield errors.'
+            )
+
+        return cls.from_dict(config_dict, **kwargs)
+
+
+class MplugOwlConfig(PretrainedConfig):
+    r"""
+    Args:
+        vision_config (`dict`, *optional*):
+            Dictionary of configuration options used to initialize [`MplugOwlVisionConfig`].
+        qformer_config (`dict`, *optional*):
+            Dictionary of configuration options used to initialize [`MplugOwlVisualAbstractorConfig`].
+        text_config (`dict`, *optional*):
+            Dictionary of configuration options used to initialize any [`PretrainedConfig`].
+        num_query_tokens (`int`, *optional*, defaults to 32):
+            The number of query tokens passed through the Transformer.
+
+        kwargs (*optional*):
+            Dictionary of keyword arguments.
+    """
+
+    model_type = 'mplug_owl'
+    is_composition = True
+
+    def __init__(self,
+                 task=Tasks.multimodal_dialogue,
+                 vision_config=None,
+                 visual_abstractor_config=None,
+                 text_config=None,
+                 num_query_tokens=64,
+                 **kwargs):
+
+        super().__init__(**kwargs)
+        self.task = task
+        if vision_config is None:
+            vision_config = MplugOwlVisionConfig().to_dict()
+            logger.info('vision_config is None.')
+
+        if visual_abstractor_config is None:
+            visual_abstractor_config = {}
+            logger.info('abstractor_config is None. ')
+
+        if text_config is None:
+            # we use LLAMA 7b by default
+            from transformers.models.llama.configuration_llama import \
+                LlamaConfig
+            text_config = LlamaConfig(pad_token_id=2).to_dict()
+            logger.info('text_config is None.')
+
+        self.vision_config = MplugOwlVisionConfig(**vision_config)
+        self.visual_abstractor_config = MplugOwlVisualAbstractorConfig(
+            **visual_abstractor_config)
+        text_model_type = text_config[
+            'model_type'] if 'model_type' in text_config else 'llama'
+        self.text_config = CONFIG_MAPPING[text_model_type](**text_config)
+
+        self.tie_word_embeddings = self.text_config.tie_word_embeddings
+
+        self.num_query_tokens = num_query_tokens
+        self.initializer_factor = 1.0
+        self.initializer_range = 0.02
+
+    @classmethod
+    def from_vision_abstractor_text_configs(
+        cls,
+        vision_config: MplugOwlVisionConfig,
+        visual_abstractor_config: MplugOwlVisualAbstractorConfig,
+        text_config: PretrainedConfig,
+        **kwargs,
+    ):
+        r"""
+        Returns:
+            [`MplugOwlConfig`]: An instance of a configuration object
+        """
+
+        return cls(
+            vision_config=vision_config.to_dict(),
+            visual_abstractor_config=visual_abstractor_config.to_dict(),
+            text_config=text_config.to_dict(),
+            **kwargs,
+        )
+
+    def to_dict(self):
+        """
+        Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`].
+
+        Returns:
+            `Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
+        """
+        output = copy.deepcopy(self.__dict__)
+        output['vision_config'] = self.vision_config.to_dict()
+        tmp = self.visual_abstractor_config.to_dict()
+        output['visual_abstractor_config'] = tmp
+        output['text_config'] = self.text_config.to_dict()
+        output['model_type'] = self.__class__.model_type
+        return output
diff --git a/modelscope/models/multi_modal/mplug_owl/modeling_mplug_owl.py b/modelscope/models/multi_modal/mplug_owl/modeling_mplug_owl.py
new file mode 100644
index 00000000..21a29185
--- /dev/null
+++ b/modelscope/models/multi_modal/mplug_owl/modeling_mplug_owl.py
@@ -0,0 +1,1551 @@
+# Copyright 2021-2023 The Alibaba DAMO mPLUG Team Authors.
+# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch MPLUG OWL model. """
+
+import copy
+import logging
+import math
+import os
+import os.path as osp
+import random
+from dataclasses import dataclass
+from io import BytesIO
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint
+import transformers
+from torch.nn import CrossEntropyLoss
+from transformers.activations import ACT2FN
+from transformers.modeling_outputs import (
+    BaseModelOutput, BaseModelOutputWithPastAndCrossAttentions,
+    BaseModelOutputWithPooling, BaseModelOutputWithPoolingAndCrossAttentions,
+    CausalLMOutputWithCrossAttentions)
+from transformers.modeling_utils import (PreTrainedModel,
+                                         apply_chunking_to_forward,
+                                         find_pruneable_heads_and_indices,
+                                         prune_linear_layer)
+from transformers.models.auto import AutoModelForCausalLM
+from transformers.utils import ModelOutput
+
+from modelscope.metainfo import Models
+from modelscope.models import TorchModel
+from modelscope.models.base import Tensor
+from modelscope.models.builder import MODELS
+from modelscope.models.multi_modal.mplug_owl.configuration_mplug_owl import (
+    MplugOwlConfig, MplugOwlVisionConfig, MplugOwlVisualAbstractorConfig)
+from modelscope.outputs import OutputKeys
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile, Tasks
+
+__all__ = ['MplugOwlForConditionalGeneration']
+
+
+@dataclass
+class MplugOwlForConditionalGenerationModelOutput(ModelOutput):
+    """
+    Class defining the outputs of [`MPlugOwlForConditionalGeneration`].
+
+    Args:
+        loss (`torch.FloatTensor`, *optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
+            Language modeling loss from the language model.
+        logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
+            Prediction scores of the language modeling head of the language model.
+        vision_outputs (`BaseModelOutputWithPooling`):
+            Outputs of the vision encoder.
+
+        language_model_outputs (`CausalLMOutputWithPast`):
+            Outputs of the language model.
+    """
+
+    loss: Optional[Tuple[torch.FloatTensor]] = None
+    logits: Optional[Tuple[torch.FloatTensor]] = None
+    vision_outputs: Optional[torch.FloatTensor] = None
+    language_model_outputs: Optional[Tuple[torch.FloatTensor]] = None
+
+    def to_tuple(self) -> Tuple[Any]:
+        return tuple(
+            self[k] if k not in ['vision_outputs', 'language_model_outputs'
+                                 ] else getattr(self, k).to_tuple()
+            for k in self.keys())
+
+
+def get_ltor_masks_and_position_ids_from_embeddings(data):
+    """Build masks and position id for left to right model."""
+
+    # Extract batch size and sequence length.
+    micro_batch_size, seq_length = data.size()[:2]
+
+    # Attention mask (lower triangular).
+    att_mask_batch = 1
+    attention_mask = torch.tril(
+        torch.ones((att_mask_batch, seq_length, seq_length),
+                   device=data.device)).view(att_mask_batch, 1, seq_length,
+                                             seq_length)
+
+    # Loss mask.
+    loss_mask = torch.ones(
+        data.size()[:2], dtype=torch.float, device=data.device)
+
+    # Position ids.
+    position_ids = torch.arange(
+        seq_length, dtype=torch.long, device=data.device)
+    position_ids = position_ids.unsqueeze(0).expand_as(data[..., 0])
+
+    # Convert attention mask to binary:
+    attention_mask = (attention_mask < 0.5)
+
+    return attention_mask, loss_mask, position_ids
+
+
+class MplugOwlVisionEmbeddings(nn.Module):
+
+    def __init__(self, config: MplugOwlVisionConfig):
+        super().__init__()
+        self.config = config
+        self.hidden_size = config.hidden_size
+        self.image_size = config.image_size
+        self.patch_size = config.patch_size
+
+        self.cls_token = nn.Parameter(torch.randn(1, 1, self.hidden_size))
+
+        self.patch_embed = nn.Conv2d(
+            in_channels=3,
+            out_channels=self.hidden_size,
+            kernel_size=self.patch_size,
+            stride=self.patch_size,
+            bias=False)
+
+        self.num_patches = (self.image_size // self.patch_size)**2
+
+        self.position_embedding = nn.Parameter(
+            torch.randn(1, self.num_patches + 1, self.hidden_size))
+
+        self.pre_layernorm = LayerNormFp32(
+            self.hidden_size, eps=config.layer_norm_eps)
+
+    def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
+        batch_size = pixel_values.size(0)
+        image_embeds = self.patch_embed(pixel_values)
+        image_embeds = image_embeds.flatten(2).transpose(1, 2)
+
+        class_embeds = self.cls_token.expand(batch_size, 1,
+                                             -1).to(image_embeds.dtype)
+        embeddings = torch.cat([class_embeds, image_embeds], dim=1)
+        embeddings = embeddings + \
+            self.position_embedding[:, : embeddings.size(1)].to(
+                image_embeds.dtype)
+        embeddings = self.pre_layernorm(embeddings)
+        return embeddings
+
+
+class LayerNormFp32(nn.LayerNorm):
+    """Subclass torch's LayerNorm to handle fp16 (by casting to float32 and back)."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def forward(self, x: torch.Tensor):
+        output = torch.nn.functional.layer_norm(
+            x.float(),
+            self.normalized_shape,
+            self.weight.float() if self.weight is not None else None,
+            self.bias.float() if self.bias is not None else None,
+            self.eps,
+        )
+        return output.type_as(x)
+
+
+class MplugOwlVisionAttention(nn.Module):
+    """Multi-headed attention from 'Attention Is All You Need' paper"""
+
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.hidden_size = config.hidden_size
+        self.num_heads = config.num_attention_heads
+        self.head_dim = self.hidden_size // self.num_heads
+        if self.head_dim * self.num_heads != self.hidden_size:
+            raise ValueError(
+                f'hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size} and `num_heads`:'
+                f' {self.num_heads}).')
+        self.scale = self.head_dim**-0.5
+        self.dropout = nn.Dropout(config.attention_dropout)
+
+        self.query_key_value = nn.Linear(self.hidden_size,
+                                         3 * self.hidden_size)
+        self.dense = nn.Linear(self.hidden_size, self.hidden_size)
+
+    def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
+        return tensor.view(bsz, seq_len, self.num_heads,
+                           self.head_dim).transpose(1, 2).contiguous()
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        head_mask: Optional[torch.Tensor] = None,
+        output_attentions: Optional[bool] = False,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor],
+               Optional[Tuple[torch.Tensor]]]:
+        """Input shape: Batch x Time x Channel"""
+
+        bsz, seq_len, embed_dim = hidden_states.size()
+
+        mixed_qkv = self.query_key_value(hidden_states)
+
+        mixed_qkv = mixed_qkv.reshape(bsz, seq_len, self.num_heads, 3,
+                                      embed_dim // self.num_heads).permute(
+                                          3, 0, 2, 1, 4)  # [3, b, np, sq, hn]
+        query_states, key_states, value_states = (
+            mixed_qkv[0],
+            mixed_qkv[1],
+            mixed_qkv[2],
+        )
+
+        # Take the dot product between "query" and "key" to get the raw attention scores.
+        attention_scores = torch.matmul(query_states,
+                                        key_states.transpose(-1, -2))
+
+        attention_scores = attention_scores * self.scale
+
+        # Normalize the attention scores to probabilities.
+        attention_probs = torch.softmax(attention_scores, dim=-1)
+
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        attention_probs = self.dropout(attention_probs)
+
+        # Mask heads if we want to
+        if head_mask is not None:
+            attention_probs = attention_probs * head_mask
+
+        context_layer = torch.matmul(attention_probs,
+                                     value_states).permute(0, 2, 1, 3)
+
+        new_context_layer_shape = context_layer.size()[:-2] + (
+            self.hidden_size, )
+        context_layer = context_layer.reshape(new_context_layer_shape)
+
+        output = self.dense(context_layer)
+
+        outputs = (output, attention_probs) if output_attentions else (output,
+                                                                       None)
+
+        return outputs
+
+
+class QuickGELU(nn.Module):
+
+    def forward(self, x: torch.Tensor):
+        return x * torch.sigmoid(1.702 * x)
+
+
+class MplugOwlMLP(nn.Module):
+
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.activation_fn = QuickGELU()
+        self.fc1 = nn.Linear(config.hidden_size, config.intermediate_size)
+        self.fc2 = nn.Linear(config.intermediate_size, config.hidden_size)
+
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        hidden_states = self.fc1(hidden_states)
+        hidden_states = self.activation_fn(hidden_states)
+        hidden_states = self.fc2(hidden_states)
+        return hidden_states
+
+
+class MplugOwlVisionEncoderLayer(nn.Module):
+
+    def __init__(self, config: MplugOwlVisionConfig):
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        self.self_attn = MplugOwlVisionAttention(config)
+        self.input_layernorm = LayerNormFp32(
+            self.hidden_size, eps=config.layer_norm_eps)
+        self.mlp = MplugOwlMLP(config)
+        self.post_attention_layernorm = LayerNormFp32(
+            self.hidden_size, eps=config.layer_norm_eps)
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: torch.Tensor,
+        output_attentions: Optional[bool] = False,
+    ) -> Tuple[torch.FloatTensor]:
+        """
+        Args:
+            hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
+            attention_mask (`torch.FloatTensor`): attention mask of size
+                `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
+                `(config.encoder_attention_heads,)`.
+            output_attentions (`bool`, *optional*):
+                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+                returned tensors for more detail.
+        """
+        residual = hidden_states
+
+        hidden_states = self.input_layernorm(hidden_states)
+        hidden_states, attn_weights = self.self_attn(
+            hidden_states=hidden_states,
+            head_mask=attention_mask,
+            output_attentions=output_attentions,
+        )
+        hidden_states = hidden_states + residual
+        residual = hidden_states
+        hidden_states = self.post_attention_layernorm(hidden_states)
+        hidden_states = self.mlp(hidden_states)
+
+        hidden_states = hidden_states + residual
+
+        outputs = (hidden_states, )
+
+        if output_attentions:
+            outputs += (attn_weights, )
+
+        return outputs
+
+
+class MplugOwlPreTrainedModel(PreTrainedModel):
+    """
+    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
+    models.
+    """
+
+    config_class = MplugOwlConfig
+    base_model_prefix = 'mplug_owl'
+    supports_gradient_checkpointing = True
+    _keys_to_ignore_on_load_missing = [
+        r'position_ids',
+        r'language_model.encoder.embed_tokens.weight',
+        r'language_model.decoder.embed_tokens.weight',
+        r'language_model.lm_head.weight',
+    ]
+    _no_split_modules = ['MplugOwlAttention']
+    _keep_in_fp32_modules = ['wo']
+
+    def _init_weights(self, module):
+        """Initialize the weights"""
+        factor = self.config.initializer_range
+        if isinstance(module, nn.Conv2d) or isinstance(
+                module, nn.Embedding) or isinstance(module, nn.Linear):
+            module.weight.data.normal_(mean=0.0, std=factor)
+            if hasattr(module, 'bias') and module.bias is not None:
+                module.bias.data.zero_()
+
+        if isinstance(module, MplugOwlVisionEmbeddings):
+            if hasattr(self.config, 'vision_config'):
+                factor = self.config.vision_config.initializer_range
+            nn.init.trunc_normal_(
+                module.position_embedding, mean=0.0, std=factor)
+            nn.init.trunc_normal_(module.cls_token, mean=0.0, std=factor)
+
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+        elif isinstance(module, nn.Linear) and module.bias is not None:
+            module.bias.data.zero_()
+        elif isinstance(module, nn.Parameter):
+            nn.init.trunc_normal_(module.data, mean=0.0, std=factor)
+
+    def _set_gradient_checkpointing(self, module, value=False):
+        if isinstance(module, MplugOwlVisionEncoder):
+            module.gradient_checkpointing = value
+
+
+MPLUG_OWL_START_DOCSTRING = r"""
+    This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
+    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
+    etc.)
+
+    This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
+    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
+    and behavior.
+
+    Parameters:
+        config ([`MplugOwlConfig`]): Model configuration class with all the parameters of the model.
+            Initializing with a config file does not load the weights associated with the model, only the
+            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
+"""
+
+MPLUG_OWL_VISION_INPUTS_DOCSTRING = r"""
+    Args:
+        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
+            Pixel values. Pixel values can be obtained using [`MplugOwlPreprocessor`].
+        output_attentions (`bool`, *optional*):
+            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
+            tensors for more detail.
+        output_hidden_states (`bool`, *optional*):
+            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+            more detail.
+        return_dict (`bool`, *optional*):
+            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+"""
+
+MPLUG_OWL_TEXT_INPUTS_DOCSTRING = r"""
+    Args:
+        input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
+            Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
+            it. Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+            [`PreTrainedTokenizer.__call__`] for details. [What are input IDs?](../glossary#input-ids)
+        attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+            [What are attention masks?](../glossary#attention-mask)
+        decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
+            Indices of decoder input sequence tokens in the vocabulary.
+
+            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+            [`PreTrainedTokenizer.__call__`] for details.
+
+            [What are decoder input IDs?](../glossary#decoder-input-ids)
+
+            T5 uses the `pad_token_id` as the starting token for `decoder_input_ids` generation. If `past_key_values`
+            is used, optionally only the last `decoder_input_ids` have to be input (see `past_key_values`).
+
+            To know more on how to prepare `decoder_input_ids` for pretraining take a look at [T5
+            Training](./t5#training).
+        decoder_attention_mask (`torch.BoolTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
+            Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also
+            be used by default.
+        output_attentions (`bool`, *optional*):
+            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
+            tensors for more detail.
+        output_hidden_states (`bool`, *optional*):
+            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+            more detail.
+        return_dict (`bool`, *optional*):
+            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+"""
+
+MPLUG_OWL_INPUTS_DOCSTRING = r"""
+    Args:
+        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
+            Pixel values. Pixel values can be obtained using [`MplugOwlPreprocessor`].
+        input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Indices of input sequence tokens in the vocabulary of the language model. Input tokens can optionally be
+            provided to serve as text prompt, which the language model can continue.
+
+            Indices can be obtained using [`MplugOwlPreprocessor`]. See [`MplugOwlPreprocessor.__call__`] for details.
+
+            [What are input IDs?](../glossary#input-ids)
+        attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+
+            [What are attention masks?](../glossary#attention-mask)
+
+        decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
+            Indices of decoder input sequence tokens in the vocabulary of the language model. Only relevant in case an
+            encoder-decoder language model (like T5) is used.
+
+            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+            [`PreTrainedTokenizer.__call__`] for details. [What are decoder input IDs?](../glossary#decoder-input-ids)
+
+        decoder_attention_mask (`torch.BoolTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
+            Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also
+            be used by default.
+
+            Only relevant in case an encoder-decoder language model (like T5) is used.
+
+        output_attentions (`bool`, *optional*):
+            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
+            tensors for more detail.
+        output_hidden_states (`bool`, *optional*):
+            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+            more detail.
+        return_dict (`bool`, *optional*):
+            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+"""
+
+
+class MplugOwlVisionEncoder(nn.Module):
+    """
+    Transformer encoder consisting of `config.num_hidden_layers` self attention layers. Each layer is a
+    [`MplugOwlVisionEncoderLayer`].
+
+    Args:
+        config (`MplugOwlVisionConfig`):
+            The corresponding vision configuration for the `MplugOwlEncoder`.
+    """
+
+    def __init__(self, config: MplugOwlVisionConfig):
+        super().__init__()
+        self.config = config
+        self.layers = nn.ModuleList([
+            MplugOwlVisionEncoderLayer(config)
+            for _ in range(config.num_hidden_layers)
+        ])
+        self.gradient_checkpointing = False
+
+    def forward(
+        self,
+        inputs_embeds,
+        attention_mask: Optional[torch.Tensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, BaseModelOutput]:
+        r"""
+        Args:
+            inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
+                Embedded representation of the inputs. Should be float, not int tokens.
+            attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+                Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+                - 1 for tokens that are **not masked**,
+                - 0 for tokens that are **masked**.
+
+                [What are attention masks?](../glossary#attention-mask)
+            output_attentions (`bool`, *optional*):
+                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+                returned tensors for more detail.
+            output_hidden_states (`bool`, *optional*):
+                Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
+                for more detail.
+            return_dict (`bool`, *optional*):
+                Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+        """
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else
+            self.config.output_hidden_states)
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        encoder_states = () if output_hidden_states else None
+        all_attentions = () if output_attentions else None
+
+        hidden_states = inputs_embeds
+        for idx, encoder_layer in enumerate(self.layers):
+            if output_hidden_states:
+                encoder_states = encoder_states + (hidden_states, )
+            if self.gradient_checkpointing and self.training:
+
+                def create_custom_forward(module):
+
+                    def custom_forward(*inputs):
+                        return module(*inputs, output_attentions)
+
+                    return custom_forward
+
+                layer_outputs = torch.utils.checkpoint.checkpoint(
+                    create_custom_forward(encoder_layer),
+                    hidden_states,
+                    attention_mask,
+                )
+            else:
+                layer_outputs = encoder_layer(
+                    hidden_states,
+                    attention_mask,
+                    output_attentions=output_attentions,
+                )
+
+            hidden_states = layer_outputs[0]
+
+            if output_attentions:
+                all_attentions = all_attentions + (layer_outputs[1], )
+
+        if output_hidden_states:
+            encoder_states = encoder_states + (hidden_states, )
+
+        if not return_dict:
+            return tuple(
+                v for v in [hidden_states, encoder_states, all_attentions]
+                if v is not None)
+        return BaseModelOutput(
+            last_hidden_state=hidden_states,
+            hidden_states=encoder_states,
+            attentions=all_attentions)
+
+
+class MplugOwlVisionModel(MplugOwlPreTrainedModel):
+    main_input_name = 'pixel_values'
+    config_class = MplugOwlVisionConfig
+
+    def __init__(self, config: MplugOwlVisionConfig):
+        super().__init__(config)
+        self.config = config
+        self.hidden_size = config.hidden_size
+
+        self.embeddings = MplugOwlVisionEmbeddings(config)
+        self.encoder = MplugOwlVisionEncoder(config)
+        self.post_layernorm = LayerNormFp32(
+            self.hidden_size, eps=config.layer_norm_eps)
+
+        self.post_init()
+
+    def forward(
+        self,
+        pixel_values: Optional[torch.FloatTensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, BaseModelOutputWithPooling]:
+        r"""
+        Returns:
+
+        """
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else
+            self.config.output_hidden_states)
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        if pixel_values is None:
+            raise ValueError('You have to specify pixel_values')
+
+        hidden_states = self.embeddings(pixel_values)
+
+        encoder_outputs = self.encoder(
+            inputs_embeds=hidden_states,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+
+        last_hidden_state = encoder_outputs[0]
+        last_hidden_state = self.post_layernorm(last_hidden_state)
+
+        pooled_output = last_hidden_state[:, 0, :]
+        pooled_output = self.post_layernorm(pooled_output)
+
+        if not return_dict:
+            return (last_hidden_state, pooled_output) + encoder_outputs[1:]
+
+        return BaseModelOutputWithPooling(
+            last_hidden_state=last_hidden_state,
+            pooler_output=pooled_output,
+            hidden_states=encoder_outputs.hidden_states,
+            attentions=encoder_outputs.attentions,
+        )
+
+    def get_input_embeddings(self):
+        return self.embeddings
+
+
+class MplugOwlVisualAbstractorMLP(nn.Module):
+
+    def __init__(self, config: MplugOwlVisualAbstractorConfig):
+        super().__init__()
+        self.config = config
+        in_features = config.hidden_size
+        hidden_features = config.intermediate_size
+        hidden_features = int(2 * hidden_features / 3)
+        multiple_of = 256
+        hidden_features = multiple_of * \
+            ((hidden_features + multiple_of - 1) // multiple_of)
+        self.act = nn.SiLU()
+
+        self.w1 = nn.Linear(in_features, hidden_features)
+        self.w2 = nn.Linear(hidden_features, in_features)
+        self.w3 = nn.Linear(in_features, hidden_features)
+        self.ffn_ln = LayerNormFp32(hidden_features, eps=config.layer_norm_eps)
+
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        hidden_states = self.act(
+            self.w1(hidden_states)) * self.w3(hidden_states)
+        hidden_states = self.ffn_ln(hidden_states)
+        hidden_states = self.w2(hidden_states)
+        return hidden_states
+
+
+class MplugOwlVisualAbstractorMultiHeadAttention(nn.Module):
+
+    def __init__(self, config: MplugOwlVisualAbstractorConfig):
+        super().__init__()
+        self.config = config
+        if config.hidden_size % config.num_attention_heads != 0:
+            raise ValueError(
+                'The hidden size (%d) is not a multiple of the number of attention heads (%d)'
+                % (config.hidden_size, config.num_attention_heads))
+
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size
+                                       / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.encoder_hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.encoder_hidden_size, self.all_head_size)
+
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+        self.save_attention = False
+
+    def save_attn_gradients(self, attn_gradients):
+        self.attn_gradients = attn_gradients
+
+    def get_attn_gradients(self):
+        return self.attn_gradients
+
+    def save_attention_map(self, attention_map):
+        self.attention_map = attention_map
+
+    def get_attention_map(self):
+        return self.attention_map
+
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads,
+                                       self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        past_key_value=None,
+        output_attentions=False,
+    ):
+        # If this is instantiated as a cross-attention module, the keys
+        # and values come from an encoder; the attention mask needs to be
+        # such that the encoder's padding tokens are not attended to.
+        key_layer = self.transpose_for_scores(self.key(encoder_hidden_states))
+        value_layer = self.transpose_for_scores(
+            self.value(encoder_hidden_states))
+        attention_mask = encoder_attention_mask
+
+        mixed_query_layer = self.query(hidden_states)
+
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+
+        past_key_value = (key_layer, value_layer)
+
+        # Take the dot product between "query" and "key" to get the raw attention scores.
+        attention_scores = torch.matmul(query_layer,
+                                        key_layer.transpose(-1, -2))
+
+        attention_scores = attention_scores / \
+            math.sqrt(self.attention_head_size)
+
+        if attention_mask is not None:
+            # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
+            attention_scores = attention_scores + attention_mask
+
+        # Normalize the attention scores to probabilities.
+        attention_probs = nn.Softmax(dim=-1)(attention_scores)
+
+        if self.save_attention:
+            self.save_attention_map(attention_probs)
+            attention_probs.register_hook(self.save_attn_gradients)
+
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        attention_probs_dropped = self.dropout(attention_probs)
+
+        # Mask heads if we want to
+        if head_mask is not None:
+            attention_probs_dropped = attention_probs_dropped * head_mask
+
+        context_layer = torch.matmul(attention_probs_dropped, value_layer)
+
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (
+            self.all_head_size, )
+        context_layer = context_layer.view(*new_context_layer_shape)
+
+        outputs = (context_layer,
+                   attention_probs) if output_attentions else (context_layer, )
+
+        outputs = outputs + (past_key_value, )
+        return outputs
+
+
+class MplugOwlVisualAbstractorCrossOutput(nn.Module):
+
+    def __init__(self, config: MplugOwlVisualAbstractorConfig):
+        super().__init__()
+        dim = config.hidden_size
+        self.out_proj = nn.Linear(dim, dim, bias=True)
+        self.norm2 = LayerNormFp32(dim)
+        self.mlp = MplugOwlVisualAbstractorMLP(config)
+
+    def forward(self, hidden_states: torch.Tensor,
+                input_tensor: torch.Tensor) -> torch.Tensor:
+        input_tensor = input_tensor + self.out_proj(hidden_states)
+        input_tensor = input_tensor + self.mlp(self.norm2(input_tensor))
+        return input_tensor
+
+
+class MplugOwlVisualAbstractorAttention(nn.Module):
+
+    def __init__(self, config: MplugOwlVisualAbstractorConfig):
+        super().__init__()
+        self.attention = MplugOwlVisualAbstractorMultiHeadAttention(config)
+        self.output = MplugOwlVisualAbstractorCrossOutput(config)
+        self.pruned_heads = set()
+        self.norm1 = LayerNormFp32(config.hidden_size)
+        self.normk = LayerNormFp32(config.hidden_size)
+
+    def prune_heads(self, heads):
+        if len(heads) == 0:
+            return
+        heads, index = find_pruneable_heads_and_indices(
+            heads, self.attention.num_attention_heads,
+            self.attention.attention_head_size, self.pruned_heads)
+
+        # Prune linear layers
+        self.attention.query = prune_linear_layer(self.attention.query, index)
+        self.attention.key = prune_linear_layer(self.attention.key, index)
+        self.attention.value = prune_linear_layer(self.attention.value, index)
+        self.output.dense = prune_linear_layer(
+            self.output.out_proj, index, dim=1)
+
+        # Update hyper params and store pruned heads
+        self.attention.num_attention_heads = self.attention.num_attention_heads - \
+            len(heads)
+        self.attention.all_head_size = self.attention.attention_head_size * \
+            self.attention.num_attention_heads
+        self.pruned_heads = self.pruned_heads.union(heads)
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.FloatTensor] = None,
+        head_mask: Optional[torch.FloatTensor] = None,
+        encoder_hidden_states: Optional[torch.FloatTensor] = None,
+        encoder_attention_mask: Optional[torch.FloatTensor] = None,
+        past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
+        output_attentions: Optional[bool] = False,
+    ) -> Tuple[torch.Tensor]:
+        # HACK we apply norm on q and k
+        hidden_states = self.norm1(hidden_states)
+        encoder_hidden_states = self.normk(encoder_hidden_states)
+        encoder_hidden_states = torch.cat(
+            [hidden_states, encoder_hidden_states], dim=1)
+        encoder_attention_mask = torch.cat(
+            [attention_mask, encoder_attention_mask], dim=-1)
+        self_outputs = self.attention(
+            hidden_states,
+            attention_mask,
+            head_mask,
+            encoder_hidden_states,
+            encoder_attention_mask,
+            past_key_value,
+            output_attentions,
+        )
+        attention_output = self.output(self_outputs[0], hidden_states)
+        # add attentions if we output them
+        outputs = (attention_output, ) + self_outputs[1:]
+        return outputs
+
+
+class MplugOwlVisualAbstractorLayer(nn.Module):
+
+    def __init__(self, config, layer_idx):
+        super().__init__()
+        self.chunk_size_feed_forward = config.chunk_size_feed_forward
+        self.seq_len_dim = 1
+
+        self.layer_idx = layer_idx
+
+        self.crossattention = MplugOwlVisualAbstractorAttention(config)
+        self.has_cross_attention = True
+
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=False,
+    ):
+        if encoder_hidden_states is None:
+            raise ValueError(
+                'encoder_hidden_states must be given for cross-attention layers'
+            )
+        cross_attention_outputs = self.crossattention(
+            hidden_states,
+            attention_mask,
+            head_mask,
+            encoder_hidden_states,
+            encoder_attention_mask,
+            output_attentions=output_attentions,
+        )
+        query_attention_output = cross_attention_outputs[0]
+
+        outputs = (query_attention_output, )
+        return outputs
+
+
+class MplugOwlVisualAbstractorEncoder(nn.Module):
+
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.layers = nn.ModuleList([
+            MplugOwlVisualAbstractorLayer(config, layer_idx)
+            for layer_idx in range(config.num_hidden_layers)
+        ])
+        self.gradient_checkpointing = False
+
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        past_key_values=None,
+        output_attentions=False,
+        output_hidden_states=False,
+        return_dict=True,
+    ):
+        all_hidden_states = () if output_hidden_states else None
+
+        for i in range(self.config.num_hidden_layers):
+            layer_module = self.layers[i]
+            if output_hidden_states:
+                all_hidden_states = all_hidden_states + (hidden_states, )
+
+            layer_head_mask = head_mask[i] if head_mask is not None else None
+            past_key_value = past_key_values[
+                i] if past_key_values is not None else None
+
+            if getattr(self.config, 'gradient_checkpointing',
+                       False) and self.training:
+
+                def create_custom_forward(module):
+
+                    def custom_forward(*inputs):
+                        return module(*inputs, past_key_value,
+                                      output_attentions)
+
+                    return custom_forward
+
+                layer_outputs = torch.utils.checkpoint.checkpoint(
+                    create_custom_forward(layer_module),
+                    hidden_states,
+                    attention_mask,
+                    layer_head_mask,
+                    encoder_hidden_states,
+                    encoder_attention_mask,
+                )
+            else:
+                layer_outputs = layer_module(
+                    hidden_states,
+                    attention_mask,
+                    layer_head_mask,
+                    encoder_hidden_states,
+                    encoder_attention_mask,
+                    output_attentions,
+                )
+
+            hidden_states = layer_outputs[0]
+
+        return BaseModelOutput(last_hidden_state=hidden_states, )
+
+
+class MplugOwlVisualAbstractorModel(MplugOwlPreTrainedModel):
+
+    def __init__(self, config: MplugOwlVisualAbstractorConfig,
+                 language_hidden_size):
+        super().__init__(config)
+        self.config = config
+
+        self.encoder = MplugOwlVisualAbstractorEncoder(config)
+        self.visual_fc = torch.nn.Linear(config.hidden_size,
+                                         language_hidden_size)
+        self.vit_eos = torch.nn.Parameter(
+            torch.randn(1, 1, language_hidden_size))
+        self.post_init()
+
+    def _prune_heads(self, heads_to_prune):
+        """
+        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
+        class PreTrainedModel
+        """
+        for layer, heads in heads_to_prune.items():
+            self.encoder.layer[layer].attention.prune_heads(heads)
+
+    def get_extended_attention_mask(
+        self,
+        attention_mask: torch.Tensor,
+        input_shape: Tuple[int],
+        device: torch.device,
+    ) -> torch.Tensor:
+        """
+        Makes broadcastable attention and causal masks so that future and masked tokens are ignored.
+
+        Arguments:
+            attention_mask (`torch.Tensor`):
+                Mask with ones indicating tokens to attend to, zeros for tokens to ignore.
+            input_shape (`Tuple[int]`):
+                The shape of the input to the model.
+            device: (`torch.device`):
+                The device of the input to the model.
+
+        Returns:
+            `torch.Tensor` The extended attention mask, with a the same dtype as `attention_mask.dtype`.
+        """
+        # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
+        # ourselves in which case we just need to make it broadcastable to all heads.
+        if attention_mask.dim() == 3:
+            extended_attention_mask = attention_mask[:, None, :, :]
+        elif attention_mask.dim() == 2:
+            extended_attention_mask = attention_mask[:, None, None, :]
+        else:
+            raise ValueError(
+                'Wrong shape for input_ids (shape {}) or attention_mask (shape {})'
+                .format(input_shape, attention_mask.shape))
+
+        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
+        # masked positions, this operation will create a tensor which is 0.0 for
+        # positions we want to attend and -10000.0 for masked positions.
+        # Since we are adding it to the raw scores before the softmax, this is
+        # effectively the same as removing these entirely.
+        extended_attention_mask = extended_attention_mask.to(
+            dtype=self.dtype)  # fp16 compatibility
+        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
+        return extended_attention_mask
+
+    def forward(
+        self,
+        query_embeds,
+        attention_mask=None,
+        head_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        past_key_values=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+    ):
+        r"""
+        encoder_hidden_states  (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, `optional`):
+            Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
+            the model is configured as a decoder.
+        encoder_attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, `optional`):
+            Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
+            the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`:
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+        past_key_values (`tuple(tuple(torch.FloatTensor))` of length `config.n_layers` with each tuple having 4 tensors:
+            shape `(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`): Contains precomputed key and
+            value hidden states of the attention blocks. Can be used to speed up decoding. If `past_key_values` are
+            used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key
+            value states given to this model) of shape `(batch_size, 1)` instead of all `decoder_input_ids` of shape
+            `(batch_size, sequence_length)`.
+        """
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else
+            self.config.output_hidden_states)
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        embedding_output = query_embeds
+        input_shape = embedding_output.size()[:-1]
+        batch_size, seq_length = input_shape
+        device = embedding_output.device
+
+        # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
+        # ourselves in which case we just need to make it broadcastable to all heads.
+        if attention_mask is None:
+            attention_mask = torch.ones(
+                (query_embeds.shape[0], query_embeds.shape[1]),
+                dtype=torch.long,
+                device=query_embeds.device)
+        extended_attention_mask = self.get_extended_attention_mask(
+            attention_mask, input_shape, device)
+
+        # If a 2D or 3D attention mask is provided for the cross-attention
+        # we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]
+        if encoder_hidden_states is not None:
+            if type(encoder_hidden_states) == list:
+                encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states[
+                    0].size()
+            else:
+                (
+                    encoder_batch_size,
+                    encoder_sequence_length,
+                    _,
+                ) = encoder_hidden_states.size()
+            encoder_hidden_shape = (encoder_batch_size,
+                                    encoder_sequence_length)
+
+            if type(encoder_attention_mask) == list:
+                encoder_extended_attention_mask = [
+                    self.invert_attention_mask(mask)
+                    for mask in encoder_attention_mask
+                ]
+            elif encoder_attention_mask is None:
+                encoder_attention_mask = torch.ones(
+                    encoder_hidden_shape, device=device)
+                encoder_extended_attention_mask = self.invert_attention_mask(
+                    encoder_attention_mask)
+            else:
+                encoder_extended_attention_mask = self.invert_attention_mask(
+                    encoder_attention_mask)
+        else:
+            encoder_extended_attention_mask = None
+
+        # Prepare head mask if needed
+        # 1.0 in head_mask indicate we keep the head
+        # attention_probs has shape bsz x n_heads x N x N
+        # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
+        # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
+        head_mask = self.get_head_mask(head_mask,
+                                       self.config.num_hidden_layers)
+
+        encoder_outputs = self.encoder(
+            embedding_output,
+            attention_mask=extended_attention_mask,
+            head_mask=head_mask,
+            encoder_hidden_states=encoder_hidden_states,
+            encoder_attention_mask=encoder_extended_attention_mask,
+            past_key_values=past_key_values,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        sequence_output = encoder_outputs[0]
+        pooled_output = sequence_output[:, 0, :]
+
+        sequence_output = self.visual_fc(sequence_output)
+        eos_repeat = self.vit_eos.repeat(sequence_output.shape[0], 1, 1)
+        sequence_output = torch.cat([sequence_output, eos_repeat], dim=1)
+
+        return BaseModelOutputWithPooling(
+            last_hidden_state=sequence_output,
+            pooler_output=pooled_output,
+            hidden_states=encoder_outputs.hidden_states,
+        )
+
+
+class MplugOwlModel(MplugOwlPreTrainedModel):
+    r"""The mPLUG-Owl model is a multi-modal conversation model that support various modalities as input.
+    mPLUG-Owl consists a visual encoder, a visual abstrator module and a language decoder model, which enables
+    both image and text input.
+    This model is implemented base on mPLUG-Owl: Modularization Empowers Large Language Models with Multimodality.
+    `Paper <https://arxiv.org/abs/2304.14178>`.
+    """
+    config_class = MplugOwlConfig
+    main_input_name = 'pixel_values'
+
+    def __init__(self, config: MplugOwlConfig):
+        super().__init__(config)
+
+        self.vision_model = MplugOwlVisionModel(config.vision_config)
+
+        self.query_tokens = nn.Parameter(
+            torch.zeros(1, config.num_query_tokens,
+                        config.visual_abstractor_config.hidden_size))
+        self.abstractor = MplugOwlVisualAbstractorModel(
+            config.visual_abstractor_config, config.text_config.hidden_size)
+
+        # if config.use_decoder_only_language_model:
+        language_model = AutoModelForCausalLM.from_config(config.text_config)
+        self.language_model = language_model
+
+        # Initialize weights and apply final processing
+        self.post_init()
+
+    def get_input_embeddings(self):
+        return self.language_model.get_input_embeddings()
+
+    def set_input_embeddings(self, value):
+        self.language_model.set_input_embeddings(value)
+
+    def set_output_embeddings(self, new_embeddings):
+        self.language_model.set_output_embeddings(new_embeddings)
+
+    def get_output_embeddings(self) -> nn.Module:
+        return self.language_model.get_output_embeddings()
+
+    def get_encoder(self):
+        return self.language_model.get_encoder()
+
+    def get_decoder(self):
+        return self.language_model.get_decoder()
+
+    def _tie_weights(self):
+        if not self.config.use_decoder_only_language_model:
+            self.language_model.encoder.embed_tokens = self.language_model.shared
+            self.language_model.decoder.embed_tokens = self.language_model.shared
+
+    def get_text_features(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        decoder_input_ids: Optional[torch.Tensor] = None,
+        decoder_attention_mask: Optional[torch.Tensor] = None,
+        labels: Optional[torch.Tensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ):
+
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else
+            self.config.output_hidden_states)
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        if self.config.use_decoder_only_language_model:
+            text_outputs = self.language_model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                output_attentions=output_attentions,
+                output_hidden_states=output_hidden_states,
+                return_dict=return_dict,
+            )
+        else:
+            inputs_embeds = self.language_model.get_input_embeddings()(
+                input_ids)
+
+            text_outputs = self.language_model(
+                inputs_embeds=inputs_embeds,
+                attention_mask=attention_mask,
+                decoder_input_ids=decoder_input_ids,
+                decoder_attention_mask=decoder_attention_mask,
+                output_attentions=output_attentions,
+                output_hidden_states=output_hidden_states,
+                return_dict=return_dict,
+                labels=labels,
+            )
+
+        return text_outputs
+
+    def get_image_features(
+        self,
+        pixel_values: Optional[torch.FloatTensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ):
+
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else
+            self.config.output_hidden_states)
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        vision_outputs = self.vision_model(
+            pixel_values=pixel_values,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        return vision_outputs
+
+
+def get_media_indices(my_list):
+    if isinstance(my_list, torch.Tensor):
+        my_list = my_list.cpu().tolist()
+    result = []
+    for i in range(len(my_list)):
+        if i == 0 and my_list[i] < 0:
+            result.append(i)
+        elif my_list[i] != my_list[i - 1] and my_list[i] < 0:
+            result.append(i)
+    return result
+
+
+class MplugOwlForConditionalGenerationHF(MplugOwlPreTrainedModel):
+    config_class = MplugOwlConfig
+    main_input_name = 'pixel_values'
+
+    def __init__(self, config: MplugOwlConfig, **kwargs):
+        super().__init__(config)
+
+        self.vision_model = MplugOwlVisionModel(config.vision_config)
+
+        self.query_tokens = nn.Parameter(
+            torch.zeros(1, config.num_query_tokens,
+                        config.visual_abstractor_config.hidden_size))
+        self.abstractor = MplugOwlVisualAbstractorModel(
+            config.visual_abstractor_config, config.text_config.hidden_size)
+
+        # if config.use_decoder_only_language_model:
+        language_model = AutoModelForCausalLM.from_config(config.text_config)
+        self.language_model = language_model
+
+        # Initialize weights and apply final processing
+        self.post_init()
+        self.main_input_name = 'input_ids'
+
+    def get_input_embeddings(self):
+        return self.language_model.get_input_embeddings()
+
+    def set_input_embeddings(self, value):
+        self.language_model.set_input_embeddings(value)
+
+    def set_output_embeddings(self, new_embeddings):
+        self.language_model.set_output_embeddings(new_embeddings)
+
+    def get_output_embeddings(self) -> nn.Module:
+        return self.language_model.get_output_embeddings()
+
+    def get_encoder(self):
+        return self.language_model.get_encoder()
+
+    def get_decoder(self):
+        return self.language_model.get_decoder()
+
+    def _tie_weights(self):
+        if not self.config.use_decoder_only_language_model:
+            self.language_model.encoder.embed_tokens = self.language_model.shared
+            self.language_model.decoder.embed_tokens = self.language_model.shared
+
+    def _preprocess_accelerate(self):
+        r"""
+        Some pre-processing hacks to make the model `accelerate` compatible. Check
+        https://github.com/huggingface/transformers/pull/21707 for more details.
+        """
+        hf_device_map = self.hf_device_map
+
+        if len(
+                hf_device_map
+        ) > 1 and 'language_model' not in hf_device_map and torch.cuda.device_count(
+        ) > 1:
+            # warn users about unexpected behavior when using multi-GPU + mPLUG-Owl + `accelerate`.
+            logger.warning(
+                'The `language_model` is not in the `hf_device_map` dictionary and you are running your script'
+                ' in a multi-GPU environment. this may lead to unexpected behavior when using `accelerate`.'
+                ' Please pass a `device_map` that contains `language_model` to remove this warning.'
+                ' Please refer to https://github.com/huggingface/blog/blob/main/accelerate-large-models.md for'
+                ' more details on creating a `device_map` for large models.', )
+
+        if hasattr(self.language_model, '_hf_hook'):
+            self.language_model._hf_hook.io_same_device = True  # For `generate` compatibility
+
+    def forward(
+        self,
+        pixel_values: torch.FloatTensor,
+        input_ids: torch.FloatTensor,
+        num_images,
+        non_padding_mask: Optional[torch.LongTensor] = None,
+        non_media_mask: Optional[torch.LongTensor] = None,
+        prompt_mask: Optional[torch.LongTensor] = None,
+        attention_mask: Optional[torch.LongTensor] = None,
+        decoder_input_ids: Optional[torch.LongTensor] = None,
+        decoder_attention_mask: Optional[torch.LongTensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        labels: Optional[torch.LongTensor] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, MplugOwlForConditionalGenerationModelOutput]:
+
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        # get text embedding
+        text_tokens_ = input_ids
+        batch_size = input_ids.shape[0]
+
+        media_token_indices = [
+            # [:-1] since we would not use the last token for embedding
+            get_media_indices(text_tokens_[i][:-1]) for i in range(batch_size)
+        ]
+        text_tokens_[text_tokens_ < 0] = 1  # Not used
+        text_embeds = self.get_input_embeddings()(
+            text_tokens_)  # Temporally Embedding
+
+        if pixel_values is not None:
+            pixel_values = pixel_values.half()
+            image_embeds = self.vision_model(
+                pixel_values, return_dict=True).last_hidden_state
+
+            image_attention_mask = torch.ones(
+                image_embeds.size()[:-1],
+                dtype=torch.long,
+                device=image_embeds.device)
+            query_tokens = self.query_tokens.expand(image_embeds.shape[0], -1,
+                                                    -1)
+
+            query_features = self.abstractor(
+                query_embeds=query_tokens,
+                encoder_hidden_states=image_embeds,
+                encoder_attention_mask=image_attention_mask,
+            )['last_hidden_state']
+            img_seq_length = query_features.shape[1]
+
+        num_images_per_sample = num_images.long().cpu().tolist()
+
+        text_chunk_embeds = []
+        img_idx = 0
+        for b in range(batch_size):
+            start = 0
+            result = []
+            if len(media_token_indices[b]) > 0:
+                for i, pos in enumerate(media_token_indices[b]):
+                    if pos > start:
+                        result.append(text_embeds[b, start:pos])
+                    result.append(query_features[img_idx + i])
+                    start = pos + img_seq_length
+            if start < text_embeds.shape[1]:
+                result.append(text_embeds[b, start:])
+
+            img_idx += num_images_per_sample[b]
+            text_chunk_embeds.append(torch.cat(result, dim=0))
+
+        # Actual Input Embeddings
+        input_embeds = torch.stack(text_chunk_embeds, dim=0)
+
+        # Create causal mask and position ids
+        _, loss_mask, position_ids = \
+            get_ltor_masks_and_position_ids_from_embeddings(input_embeds)
+
+        # Calculate the loss_mask
+        non_padding_mask = non_padding_mask.long()
+        non_media_mask = non_media_mask.long()
+        prompt_mask = prompt_mask.long()  # TODO How to deal with prompt mask
+        loss_mask = loss_mask[:, :-1]
+
+        loss_mask = loss_mask * non_padding_mask * non_media_mask * prompt_mask
+
+        # Forward into GPT
+        outputs = self.language_model(
+            inputs_embeds=input_embeds,
+            attention_mask=attention_mask,
+            labels=labels,
+        )
+        outputs.loss = (outputs.loss
+                        * loss_mask.view(-1)).sum() / loss_mask.sum()
+        return outputs
+
+    @torch.no_grad()
+    def generate(
+        self,
+        pixel_values: torch.FloatTensor,
+        input_ids: Optional[torch.LongTensor] = None,
+        attention_mask: Optional[torch.LongTensor] = None,
+        **generate_kwargs,
+    ) -> torch.LongTensor:
+        """
+        Overrides `generate` function to be able to use the model as a conditional generator.
+
+        Args:
+            pixel_values (`torch.FloatTensor` of shape (batch_size, num_channels, height, width)):
+                Input images to be processed.
+            input_ids (`torch.LongTensor` of shape (batch_size, sequence_length), *optional*):
+                The sequence used as a prompt for the generation.
+            attention_mask (`torch.LongTensor` of shape (batch_size, sequence_length), *optional*):
+                Mask to avoid performing attention on padding token indices
+
+        Returns:
+            captions (list): A list of strings of length batch_size * num_captions.
+        """
+
+        if input_ids is not None:
+            batch_size = input_ids.size(0)
+            media_token_indices = [
+                get_media_indices(input_ids[i]) for i in range(batch_size)
+            ]
+            num_images_per_sample = [len(x) for x in media_token_indices]
+            input_ids = input_ids.clone()
+            input_ids[input_ids < 0] = 0  # Not used
+
+        if attention_mask is None:
+            attention_mask = torch.ones_like(input_ids).long().to(
+                input_ids.device)
+
+        if hasattr(self, 'hf_device_map'):
+            # preprocess for `accelerate`
+            self._preprocess_accelerate()
+        batch_size = input_ids.shape[0]
+        # get text embedding
+        inputs_embeds = self.get_input_embeddings()(input_ids)
+        # get visual embedding
+        if pixel_values is not None:
+            pixel_values = pixel_values.half()
+            pixel_values = pixel_values.to(input_ids.device)
+            with torch.no_grad():
+                image_embeds = self.vision_model(
+                    pixel_values, return_dict=True).last_hidden_state
+                image_attention_mask = torch.ones(
+                    image_embeds.size()[:-1],
+                    dtype=torch.long,
+                    device=image_embeds.device)
+                query_tokens = self.query_tokens.expand(
+                    image_embeds.shape[0], -1, -1)
+                query_outputs = self.abstractor(
+                    query_embeds=query_tokens,
+                    encoder_hidden_states=image_embeds,
+                    encoder_attention_mask=image_attention_mask,
+                    return_dict=True,
+                )
+                query_output = query_outputs['last_hidden_state']
+                image_embeds = query_output
+            img_seq_length = image_embeds.shape[1]
+
+            # ===================
+            # Get actual input embeddings
+            # ===================
+            text_chunk_embeds = []
+            text_chunk_attns = []
+            img_idx = 0
+
+            for b in range(batch_size):
+                start = 0
+                result = []
+                result_attn = []
+                for i, pos in enumerate(media_token_indices[b]):
+                    if pos > start:
+                        result.append(inputs_embeds[b, start:pos])
+                        result_attn.append(attention_mask[b, start:pos])
+                    result.append(image_embeds[img_idx + i])
+                    result_attn.append(
+                        torch.ones(
+                            image_embeds[img_idx + i].shape[0],
+                            device=inputs_embeds.device))
+                    start = pos + img_seq_length
+                if start < inputs_embeds.shape[1]:
+                    result.append(inputs_embeds[b, start:])
+                    result_attn.append(attention_mask[b, start:])
+
+                img_idx += num_images_per_sample[b]
+                text_chunk_embeds.append(torch.cat(result, dim=0))
+                text_chunk_attns.append(torch.cat(result_attn, dim=0))
+            inputs_embeds = torch.stack(text_chunk_embeds, dim=0)
+            attention_mask = torch.stack(text_chunk_attns, dim=0)
+
+        outputs = self.language_model.generate(
+            inputs_embeds=inputs_embeds,
+            attention_mask=attention_mask,
+            **generate_kwargs,
+        )
+
+        return outputs
+
+
+@MODELS.register_module(
+    Tasks.multimodal_dialogue, module_name=Models.mplug_owl)
+class MplugOwlForConditionalGeneration(TorchModel):
+
+    def __init__(self, model_dir: str, *args, **kwargs):
+        """initialize the mPLUG-Owl model from the `model_dir` path.
+        Args:
+            model_dir (str): the model path.
+        """
+
+        super().__init__(model_dir, *args, **kwargs)
+        self.model = MplugOwlForConditionalGenerationHF.from_pretrained(
+            model_dir,
+            torch_dtype=torch.half,
+        )
+
+    def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]:
+        output = self.model.generate(**input)
+        return output
diff --git a/modelscope/models/nlp/mglm/blocklm_utils.py b/modelscope/models/nlp/mglm/blocklm_utils.py
index b05cd2c2..e75aea92 100644
--- a/modelscope/models/nlp/mglm/blocklm_utils.py
+++ b/modelscope/models/nlp/mglm/blocklm_utils.py
@@ -212,10 +212,10 @@ class ConstructBlockStrategy:
                          block_spans,
                          rng,
                          task='bert'):
-        position_ids = np.arange(len(tokens), dtype=np.long)
+        position_ids = np.arange(len(tokens), dtype=int)
         targets = copy.deepcopy(tokens)
         mask_id = self.tokenizer.get_command('MASK').Id
-        mlm_masks = np.zeros(len(tokens), dtype=np.long)
+        mlm_masks = np.zeros(len(tokens), dtype=int)
         for start, end in block_spans:
             for idx in range(start, end):
                 tokens[idx] = mask_id
@@ -231,7 +231,7 @@ class ConstructBlockStrategy:
                         rng,
                         task='bert'):
         text_length = len(tokens)
-        position_ids = np.ones(len(tokens), dtype=np.long)
+        position_ids = np.ones(len(tokens), dtype=int)
         for start, end in block_spans:
             position_ids[start + 1:end] = 0
         position_ids = np.cumsum(position_ids) - 1
@@ -270,7 +270,7 @@ class ConstructBlockStrategy:
                                            (end - start + 1))
             if self.block_position_encoding:
                 target_block_position_ids.append(
-                    np.arange(1, end - start + 2, dtype=np.long))
+                    np.arange(1, end - start + 2, dtype=int))
             else:
                 target_block_position_ids.append([1] * (end - start + 1))
         block_spans.sort(key=lambda x: x[0])
@@ -307,7 +307,7 @@ class ConstructBlockStrategy:
             target_tokens = target_tokens + [
                 self.tokenizer.get_command('eop').Id
             ]
-            loss_masks = np.ones(len(target_tokens), dtype=np.long)
+            loss_masks = np.ones(len(target_tokens), dtype=int)
             return source_tokens, target_tokens, loss_masks
         else:
             tokens = np.concatenate(source_tokens + target_tokens)
@@ -326,12 +326,12 @@ class ConstructBlockStrategy:
                 for pos in mask_pos:
                     tokens[pos] = self.tokenizer.get_command('dBLOCK').Id
             targets = np.concatenate(source_tokens + targets)
-            loss_masks = np.ones(len(tokens), dtype=np.long)
+            loss_masks = np.ones(len(tokens), dtype=int)
             loss_masks[:source_length] = 0
             position_ids = np.concatenate(source_position_ids
                                           + target_position_ids)
             block_position_ids = np.concatenate(
-                [np.zeros(source_length, dtype=np.long)]
+                [np.zeros(source_length, dtype=int)]
                 + target_block_position_ids)
             position_ids = np.stack([position_ids, block_position_ids], axis=0)
             if attention_mask is not None:
@@ -539,22 +539,21 @@ class ConstructBlockStrategy:
                         (source_tokens, [self.generation_mask], target_tokens))
                     loss_masks = np.concatenate(
                         (np.zeros(len(source_tokens) + 1,
-                                  dtype=np.long), target_masks))
+                                  dtype=int), target_masks))
                     token_batch.append(tokens)
                     target_batch.append(targets)
                     loss_mask_batch.append(loss_masks)
                     position_ids = np.arange(
-                        len(source_tokens) + len(target_tokens) + 1,
-                        dtype=np.long)
+                        len(source_tokens) + len(target_tokens) + 1, dtype=int)
                     position_ids[len(source_tokens) + 1:] = len(source_tokens)
                     if self.block_position_encoding:
                         block_position_ids = np.concatenate(
-                            (np.zeros(len(source_tokens), dtype=np.long),
-                             np.arange(len(target_tokens) + 1, dtype=np.long)))
+                            (np.zeros(len(source_tokens), dtype=int),
+                             np.arange(len(target_tokens) + 1, dtype=int)))
                     else:
                         block_position_ids = np.concatenate(
-                            (np.zeros(len(source_tokens) + 1, dtype=np.long),
-                             np.ones(len(target_tokens) + 1, dtype=np.long)))
+                            (np.zeros(len(source_tokens) + 1, dtype=int),
+                             np.ones(len(target_tokens) + 1, dtype=int)))
                     position_id_batch.append(
                         np.stack([position_ids, block_position_ids], axis=0))
                 else:
@@ -597,27 +596,25 @@ class ConstructBlockStrategy:
             max_length = max(seq_lengths)
             token_batch = [
                 np.concatenate(
-                    (tokens, np.zeros(max_length - len(tokens),
-                                      dtype=np.long)))
+                    (tokens, np.zeros(max_length - len(tokens), dtype=int)))
                 for tokens in token_batch
             ]
             target_batch = [
                 np.concatenate(
-                    (targets,
-                     np.zeros(max_length - len(targets), dtype=np.long)))
+                    (targets, np.zeros(max_length - len(targets), dtype=int)))
                 for targets in target_batch
             ]
             loss_mask_batch = [
                 np.concatenate(
                     (loss_masks,
-                     np.zeros(max_length - len(loss_masks), dtype=np.long)))
+                     np.zeros(max_length - len(loss_masks), dtype=int)))
                 for loss_masks in loss_mask_batch
             ]
             position_id_batch = [
-                np.concatenate((position_ids,
-                                np.zeros(
-                                    (2, max_length - position_ids.shape[1]),
-                                    dtype=np.long)),
-                               axis=1) for position_ids in position_id_batch
+                np.concatenate(
+                    (position_ids,
+                     np.zeros(
+                         (2, max_length - position_ids.shape[1]), dtype=int)),
+                    axis=1) for position_ids in position_id_batch
             ]
         return token_batch, target_batch, loss_mask_batch, position_id_batch
diff --git a/modelscope/models/nlp/mglm/data_utils/datasets.py b/modelscope/models/nlp/mglm/data_utils/datasets.py
index 39ffaea3..37bfbcc2 100644
--- a/modelscope/models/nlp/mglm/data_utils/datasets.py
+++ b/modelscope/models/nlp/mglm/data_utils/datasets.py
@@ -583,8 +583,8 @@ class XLDataset(data.Dataset):
     def getidx(self, idx):
         tokens, targets, loss_masks = [], [], []
         attention_mask = np.concatenate(
-            (np.zeros((self.max_seq_len, self.mem_len), dtype=np.long),
-             np.ones((self.max_seq_len, self.max_seq_len), dtype=np.long)),
+            (np.zeros((self.max_seq_len, self.mem_len), dtype=int),
+             np.ones((self.max_seq_len, self.max_seq_len), dtype=int)),
             axis=1)
         sample_idx = bisect_right(self.indices, idx * self.max_seq_len)
         last_end = 0 if sample_idx == 0 else self.indices[sample_idx - 1]
diff --git a/modelscope/models/nlp/mglm/test/test_block.py b/modelscope/models/nlp/mglm/test/test_block.py
index ed4225da..eb630835 100644
--- a/modelscope/models/nlp/mglm/test/test_block.py
+++ b/modelscope/models/nlp/mglm/test/test_block.py
@@ -28,7 +28,7 @@ def main():
     counts = np.array([0] * 10)
     for _ in range(10000):
         spans = strategy.sample_span_in_document(
-            np.array([1, 2, 3, 0, 4, 5, 6, 7, 9, 0], dtype=np.long), [1, 1],
+            np.array([1, 2, 3, 0, 4, 5, 6, 7, 9, 0], dtype=int), [1, 1],
             random.Random())
         for start, end in spans:
             counts[start:end] += 1
diff --git a/modelscope/models/nlp/mglm/test/test_rel_shift.py b/modelscope/models/nlp/mglm/test/test_rel_shift.py
index 00cbb9fe..ad68b15e 100644
--- a/modelscope/models/nlp/mglm/test/test_rel_shift.py
+++ b/modelscope/models/nlp/mglm/test/test_rel_shift.py
@@ -17,7 +17,7 @@ def main():
         num_iters=300000,
         decay_style='cosine',
         decay_ratio=0.1)
-    steps = np.arange(0, 400000, 10, dtype=np.long)
+    steps = np.arange(0, 400000, 10, dtype=int)
     rates = []
     for step in steps:
         lr_scheduler.num_iters = step
diff --git a/modelscope/models/nlp/unite/__init__.py b/modelscope/models/nlp/unite/__init__.py
index 06c2146e..939f0ab7 100644
--- a/modelscope/models/nlp/unite/__init__.py
+++ b/modelscope/models/nlp/unite/__init__.py
@@ -5,12 +5,12 @@ from typing import TYPE_CHECKING
 from modelscope.utils.import_utils import LazyImportModule
 
 if TYPE_CHECKING:
-    from .configuration_unite import UniTEConfig
-    from .modeling_unite import UniTEForTranslationEvaluation
+    from .configuration import UniTEConfig
+    from .translation_evaluation import UniTEForTranslationEvaluation
 else:
     _import_structure = {
-        'configuration_unite': ['UniTEConfig'],
-        'modeling_unite': ['UniTEForTranslationEvaluation'],
+        'configuration': ['UniTEConfig'],
+        'translation_evaluation': ['UniTEForTranslationEvaluation'],
     }
 
     import sys
diff --git a/modelscope/models/nlp/unite/configuration_unite.py b/modelscope/models/nlp/unite/configuration.py
similarity index 93%
rename from modelscope/models/nlp/unite/configuration_unite.py
rename to modelscope/models/nlp/unite/configuration.py
index b0a48585..402538f7 100644
--- a/modelscope/models/nlp/unite/configuration_unite.py
+++ b/modelscope/models/nlp/unite/configuration.py
@@ -9,7 +9,7 @@ from modelscope.utils.config import Config
 logger = logging.get_logger()
 
 
-class EvaluationMode(Enum):
+class InputFormat(Enum):
     SRC = 'src'
     REF = 'ref'
     SRC_REF = 'src-ref'
diff --git a/modelscope/models/nlp/unite/modeling_unite.py b/modelscope/models/nlp/unite/translation_evaluation.py
similarity index 61%
rename from modelscope/models/nlp/unite/modeling_unite.py
rename to modelscope/models/nlp/unite/translation_evaluation.py
index deea737d..c7e96027 100644
--- a/modelscope/models/nlp/unite/modeling_unite.py
+++ b/modelscope/models/nlp/unite/translation_evaluation.py
@@ -20,6 +20,8 @@ from transformers.activations import ACT2FN
 from modelscope.metainfo import Models
 from modelscope.models.base import TorchModel
 from modelscope.models.builder import MODELS
+from modelscope.models.nlp.unite.configuration import InputFormat
+from modelscope.outputs.nlp_outputs import TranslationEvaluationOutput
 from modelscope.utils.constant import Tasks
 from modelscope.utils.logger import get_logger
 
@@ -71,8 +73,16 @@ class LayerwiseAttention(Module):
         mask: torch.Tensor = None,
     ) -> torch.Tensor:
         tensors = torch.cat(list(x.unsqueeze(dim=0) for x in tensors), dim=0)
-        normed_weights = softmax(
-            self.scalar_parameters, dim=0).view(-1, 1, 1, 1)
+
+        if self.training and self.dropout:
+            normed_weights = softmax(
+                torch.where(self.dropout_mask.uniform_() > self.dropout,
+                            self.scalar_parameters, self.dropout_fill),
+                dim=-1)
+        else:
+            normed_weights = softmax(self.scalar_parameters, dim=-1)
+
+        normed_weights = normed_weights.view(-1, 1, 1, 1)
 
         mask_float = mask.float()
         weighted_sum = (normed_weights
@@ -97,18 +107,18 @@ class FeedForward(Module):
         Feed Forward Neural Network.
 
         Args:
-        in_dim (:obj:`int`):
-            Number of input features.
-        out_dim (:obj:`int`, defaults to 1):
-            Number of output features. Default is 1 -- a single scalar.
-        hidden_sizes (:obj:`List[int]`, defaults to `[3072, 768]`):
-            List with hidden layer sizes.
-        activations (:obj:`str`, defaults to `Sigmoid`):
-            Name of the activation function to be used in the hidden layers.
-        final_activation (:obj:`str`, Optional, defaults to `None`):
-            Name of the final activation function if any.
-        dropout (:obj:`float`, defaults to 0.1):
-            Dropout ratio to be used in the hidden layers.
+            in_dim (:obj:`int`):
+                Number of input features.
+            out_dim (:obj:`int`, defaults to 1):
+                Number of output features. Default is 1 -- a single scalar.
+            hidden_sizes (:obj:`List[int]`, defaults to `[3072, 768]`):
+                List with hidden layer sizes.
+            activations (:obj:`str`, defaults to `Sigmoid`):
+                Name of the activation function to be used in the hidden layers.
+            final_activation (:obj:`str`, Optional, defaults to `None`):
+                Name of the final activation function if any.
+            dropout (:obj:`float`, defaults to 0.1):
+                Dropout ratio to be used in the hidden layers.
         """
         super().__init__()
         modules = []
@@ -266,8 +276,11 @@ class UniTEForTranslationEvaluation(TorchModel):
 
         return
 
-    def forward(self, input_sentences: List[torch.Tensor]):
-        input_ids = self.combine_input_sentences(input_sentences)
+    def forward(self,
+                input_ids: torch.Tensor,
+                input_format: Optional[List[InputFormat]] = None,
+                score: Optional[torch.Tensor] = None,
+                **kwargs) -> TranslationEvaluationOutput:
         attention_mask = input_ids.ne(self.pad_token_id).long()
         outputs = self.encoder(
             input_ids=input_ids,
@@ -276,125 +289,138 @@ class UniTEForTranslationEvaluation(TorchModel):
             return_dict=True)
         mix_states = self.layerwise_attention(outputs['hidden_states'],
                                               attention_mask)
-        pred = self.estimator(mix_states)
-        return pred.squeeze(dim=-1)
+        pred = self.estimator(mix_states).squeeze(dim=-1)
+        output = TranslationEvaluationOutput(
+            score=pred.cpu().tolist(), input_format=input_format)
 
-    def load_checkpoint(self, path: str, device: torch.device):
-        state_dict = torch.load(path, map_location=device)
-        self.load_state_dict(state_dict)
+        if score is not None:
+            loss = (pred - score).pow(2).mean()
+            output['loss'] = loss
+
+        return output
+
+    def load_checkpoint(self, path: str, device: torch.device, plm_only: bool):
+        if plm_only:
+            self.encoder = self.encoder.from_pretrained(path).to(device)
+            self.encoder.pooler = None
+        else:
+            state_dict = torch.load(path, map_location=device)
+            self.load_state_dict(state_dict)
         logger.info('Loading checkpoint parameters from %s' % path)
         return
 
-    def combine_input_sentences(self, input_sent_groups: List[torch.Tensor]):
-        for input_sent_group in input_sent_groups[1:]:
-            input_sent_group[:, 0] = self.eos_token_id
 
-        if len(input_sent_groups) == 3:
-            cutted_sents = self.cut_long_sequences3(input_sent_groups)
-        else:
-            cutted_sents = self.cut_long_sequences2(input_sent_groups)
-        return cutted_sents
-
-    @staticmethod
-    def cut_long_sequences2(all_input_concat: List[List[torch.Tensor]],
+def combine_input_sentences(all_input_concat: List[List[torch.Tensor]],
                             maximum_length: int = 512,
-                            pad_idx: int = 1):
-        all_input_concat = list(zip(*all_input_concat))
-        collected_tuples = list()
-        for tensor_tuple in all_input_concat:
-            all_lens = tuple(len(x) for x in tensor_tuple)
+                            pad_idx: int = 1,
+                            eos_idx: int = 2):
+    for group in all_input_concat[1:]:
+        group[:, 0] = eos_idx
 
-            if sum(all_lens) > maximum_length:
-                lengths = dict(enumerate(all_lens))
-                lengths_sorted_idxes = list(x[0] for x in sorted(
-                    lengths.items(), key=lambda d: d[1], reverse=True))
+    if len(all_input_concat) == 3:
+        return cut_long_sequences3(all_input_concat, maximum_length, pad_idx)
+    else:
+        return cut_long_sequences2(all_input_concat, maximum_length, pad_idx)
 
-                offset = ceil((sum(lengths.values()) - maximum_length) / 2)
 
-                if min(all_lens) > (maximum_length
-                                    // 2) and min(all_lens) > offset:
-                    lengths = dict((k, v - offset) for k, v in lengths.items())
-                else:
-                    lengths[lengths_sorted_idxes[
-                        0]] = maximum_length - lengths[lengths_sorted_idxes[1]]
+def cut_long_sequences2(all_input_concat: List[List[torch.Tensor]],
+                        maximum_length: int = 512,
+                        pad_idx: int = 1):
+    all_input_concat = list(zip(*all_input_concat))
+    collected_tuples = list()
+    for tensor_tuple in all_input_concat:
+        tensor_tuple = tuple(
+            x.masked_select(x.ne(pad_idx)) for x in tensor_tuple)
+        all_lens = tuple(len(x) for x in tensor_tuple)
 
-                new_lens = list(lengths[k]
-                                for k in range(0, len(tensor_tuple)))
-                new_tensor_tuple = tuple(
-                    x[:y] for x, y in zip(tensor_tuple, new_lens))
-                for x, y in zip(new_tensor_tuple, tensor_tuple):
-                    x[-1] = y[-1]
-                collected_tuples.append(new_tensor_tuple)
+        if sum(all_lens) > maximum_length:
+            lengths = dict(enumerate(all_lens))
+            lengths_sorted_idxes = list(x[0] for x in sorted(
+                lengths.items(), key=lambda d: d[1], reverse=True))
+
+            offset = ceil((sum(lengths.values()) - maximum_length) / 2)
+
+            if min(all_lens) > (maximum_length
+                                // 2) and min(all_lens) > offset:
+                lengths = dict((k, v - offset) for k, v in lengths.items())
             else:
-                collected_tuples.append(tensor_tuple)
+                lengths[lengths_sorted_idxes[0]] = maximum_length - lengths[
+                    lengths_sorted_idxes[1]]
 
-        concat_tensor = list(torch.cat(x, dim=0) for x in collected_tuples)
-        all_input_concat_padded = pad_sequence(
-            concat_tensor, batch_first=True, padding_value=pad_idx)
+            new_lens = list(lengths[k] for k in range(0, len(tensor_tuple)))
+            new_tensor_tuple = tuple(x[:y]
+                                     for x, y in zip(tensor_tuple, new_lens))
+            for x, y in zip(new_tensor_tuple, tensor_tuple):
+                x[-1] = y[-1]
+            collected_tuples.append(new_tensor_tuple)
+        else:
+            collected_tuples.append(tensor_tuple)
 
-        return all_input_concat_padded
+    concat_tensor = list(torch.cat(x, dim=0) for x in collected_tuples)
+    all_input_concat_padded = pad_sequence(
+        concat_tensor, batch_first=True, padding_value=pad_idx)
+    return all_input_concat_padded
 
-    @staticmethod
-    def cut_long_sequences3(all_input_concat: List[List[torch.Tensor]],
-                            maximum_length: int = 512,
-                            pad_idx: int = 1):
-        all_input_concat = list(zip(*all_input_concat))
-        collected_tuples = list()
-        for tensor_tuple in all_input_concat:
-            all_lens = tuple(len(x) for x in tensor_tuple)
 
-            if sum(all_lens) > maximum_length:
-                lengths = dict(enumerate(all_lens))
-                lengths_sorted_idxes = list(x[0] for x in sorted(
-                    lengths.items(), key=lambda d: d[1], reverse=True))
+def cut_long_sequences3(all_input_concat: List[List[torch.Tensor]],
+                        maximum_length: int = 512,
+                        pad_idx: int = 1):
+    all_input_concat = list(zip(*all_input_concat))
+    collected_tuples = list()
+    for tensor_tuple in all_input_concat:
+        tensor_tuple = tuple(
+            x.masked_select(x.ne(pad_idx)) for x in tensor_tuple)
+        all_lens = tuple(len(x) for x in tensor_tuple)
 
-                offset = ceil((sum(lengths.values()) - maximum_length) / 3)
+        if sum(all_lens) > maximum_length:
+            lengths = dict(enumerate(all_lens))
+            lengths_sorted_idxes = list(x[0] for x in sorted(
+                lengths.items(), key=lambda d: d[1], reverse=True))
 
-                if min(all_lens) > (maximum_length
-                                    // 3) and min(all_lens) > offset:
-                    lengths = dict((k, v - offset) for k, v in lengths.items())
-                else:
-                    while sum(lengths.values()) > maximum_length:
-                        if lengths[lengths_sorted_idxes[0]] > lengths[
-                                lengths_sorted_idxes[1]]:
-                            offset = maximum_length - lengths[
-                                lengths_sorted_idxes[1]] - lengths[
-                                    lengths_sorted_idxes[2]]
-                            if offset > lengths[lengths_sorted_idxes[1]]:
-                                lengths[lengths_sorted_idxes[0]] = offset
-                            else:
-                                lengths[lengths_sorted_idxes[0]] = lengths[
-                                    lengths_sorted_idxes[1]]
-                        elif lengths[lengths_sorted_idxes[0]] == lengths[
-                                lengths_sorted_idxes[1]] > lengths[
-                                    lengths_sorted_idxes[2]]:
-                            offset = (maximum_length
-                                      - lengths[lengths_sorted_idxes[2]]) // 2
-                            if offset > lengths[lengths_sorted_idxes[2]]:
-                                lengths[lengths_sorted_idxes[0]] = lengths[
-                                    lengths_sorted_idxes[1]] = offset
-                            else:
-                                lengths[lengths_sorted_idxes[0]] = lengths[
-                                    lengths_sorted_idxes[1]] = lengths[
-                                        lengths_sorted_idxes[2]]
+            offset = ceil((sum(lengths.values()) - maximum_length) / 3)
+
+            if min(all_lens) > (maximum_length
+                                // 3) and min(all_lens) > offset:
+                lengths = dict((k, v - offset) for k, v in lengths.items())
+            else:
+                while sum(lengths.values()) > maximum_length:
+                    if lengths[lengths_sorted_idxes[0]] > lengths[
+                            lengths_sorted_idxes[1]]:
+                        offset = maximum_length - lengths[lengths_sorted_idxes[
+                            1]] - lengths[lengths_sorted_idxes[2]]
+                        if offset > lengths[lengths_sorted_idxes[1]]:
+                            lengths[lengths_sorted_idxes[0]] = offset
+                        else:
+                            lengths[lengths_sorted_idxes[0]] = lengths[
+                                lengths_sorted_idxes[1]]
+                    elif lengths[lengths_sorted_idxes[0]] == lengths[
+                            lengths_sorted_idxes[1]] > lengths[
+                                lengths_sorted_idxes[2]]:
+                        offset = (maximum_length
+                                  - lengths[lengths_sorted_idxes[2]]) // 2
+                        if offset > lengths[lengths_sorted_idxes[2]]:
+                            lengths[lengths_sorted_idxes[0]] = lengths[
+                                lengths_sorted_idxes[1]] = offset
                         else:
                             lengths[lengths_sorted_idxes[0]] = lengths[
                                 lengths_sorted_idxes[1]] = lengths[
-                                    lengths_sorted_idxes[
-                                        2]] = maximum_length // 3
+                                    lengths_sorted_idxes[2]]
+                    else:
+                        lengths[lengths_sorted_idxes[0]] = lengths[
+                            lengths_sorted_idxes[1]] = lengths[
+                                lengths_sorted_idxes[2]] = maximum_length // 3
 
-                new_lens = list(lengths[k] for k in range(0, len(lengths)))
-                new_tensor_tuple = tuple(
-                    x[:y] for x, y in zip(tensor_tuple, new_lens))
+            new_lens = list(lengths[k] for k in range(0, len(lengths)))
+            new_tensor_tuple = tuple(x[:y]
+                                     for x, y in zip(tensor_tuple, new_lens))
 
-                for x, y in zip(new_tensor_tuple, tensor_tuple):
-                    x[-1] = y[-1]
-                collected_tuples.append(new_tensor_tuple)
-            else:
-                collected_tuples.append(tensor_tuple)
+            for x, y in zip(new_tensor_tuple, tensor_tuple):
+                x[-1] = y[-1]
+            collected_tuples.append(new_tensor_tuple)
+        else:
+            collected_tuples.append(tensor_tuple)
 
-        concat_tensor = list(torch.cat(x, dim=0) for x in collected_tuples)
-        all_input_concat_padded = pad_sequence(
-            concat_tensor, batch_first=True, padding_value=pad_idx)
-
-        return all_input_concat_padded
+    concat_tensor = list(torch.cat(x, dim=0) for x in collected_tuples)
+    all_input_concat_padded = pad_sequence(
+        concat_tensor, batch_first=True, padding_value=pad_idx)
+    return all_input_concat_padded
diff --git a/modelscope/models/science/unifold/data/msa_pairing.py b/modelscope/models/science/unifold/data/msa_pairing.py
index cc65962c..77c4e9a6 100644
--- a/modelscope/models/science/unifold/data/msa_pairing.py
+++ b/modelscope/models/science/unifold/data/msa_pairing.py
@@ -115,7 +115,7 @@ def pad_features(feature: np.ndarray, feature_name: str) -> np.ndarray:
     Returns:
         The feature with an additional padding row.
     """
-    assert feature.dtype != np.dtype(np.string_)
+    assert feature.dtype != np.dtype(np.str_)
     if feature_name in (
             'msa_all_seq',
             'msa_mask_all_seq',
diff --git a/modelscope/models/science/unifold/msa/templates.py b/modelscope/models/science/unifold/msa/templates.py
index d1ff8cf1..f2d3d79c 100644
--- a/modelscope/models/science/unifold/msa/templates.py
+++ b/modelscope/models/science/unifold/msa/templates.py
@@ -1100,9 +1100,9 @@ class HmmsearchHitFeaturizer(TemplateHitFeaturizer):
                 np.zeros((1, num_res, residue_constants.atom_type_num, 3),
                          np.float32),
                 'template_domain_names':
-                np.array([''.encode()], dtype=np.object),
+                np.array([''.encode()], dtype=np.object_),
                 'template_sequence':
-                np.array([''.encode()], dtype=np.object),
+                np.array([''.encode()], dtype=np.object_),
                 'template_sum_probs':
                 np.array([0], dtype=np.float32),
             }
diff --git a/modelscope/msdatasets/auth/auth_config.py b/modelscope/msdatasets/auth/auth_config.py
index 576a6efd..e09db93c 100644
--- a/modelscope/msdatasets/auth/auth_config.py
+++ b/modelscope/msdatasets/auth/auth_config.py
@@ -23,6 +23,15 @@ class OssAuthConfig(BaseAuthConfig):
             cookies=cookies, git_token=git_token, user_info=user_info)
 
 
+class VirgoAuthConfig(BaseAuthConfig):
+    """The authorization config for virgo dataset."""
+
+    def __init__(self, cookies: CookieJar, git_token: str,
+                 user_info: Tuple[str, str]):
+        super().__init__(
+            cookies=cookies, git_token=git_token, user_info=user_info)
+
+
 class MaxComputeAuthConfig(BaseAuthConfig):
     # TODO: MaxCompute dataset to be supported.
     def __init__(self, cookies: CookieJar, git_token: str,
diff --git a/modelscope/msdatasets/context/dataset_context_config.py b/modelscope/msdatasets/context/dataset_context_config.py
index 26b05f7d..4007d60c 100644
--- a/modelscope/msdatasets/context/dataset_context_config.py
+++ b/modelscope/msdatasets/context/dataset_context_config.py
@@ -42,6 +42,7 @@ class DatasetContextConfig:
         self.data_files = data_files
         self.cache_root_dir = cache_root_dir
         self.use_streaming = use_streaming
+        self.download_virgo_files: bool = False
 
     @property
     def config_kwargs(self) -> dict:
diff --git a/modelscope/msdatasets/data_loader/data_loader.py b/modelscope/msdatasets/data_loader/data_loader.py
index 1ef92372..b1450c61 100644
--- a/modelscope/msdatasets/data_loader/data_loader.py
+++ b/modelscope/msdatasets/data_loader/data_loader.py
@@ -1,11 +1,12 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
+import os
 from abc import ABC, abstractmethod
 from typing import Optional, Union
 
 from datasets import (Dataset, DatasetBuilder, DatasetDict, IterableDataset,
                       IterableDatasetDict)
-from datasets import load_dataset as hf_data_loader
+from datasets import load_dataset as hf_load_dataset
 
 from modelscope.hub.api import ModelScopeConfig
 from modelscope.msdatasets.auth.auth_config import OssAuthConfig
@@ -13,13 +14,18 @@ from modelscope.msdatasets.context.dataset_context_config import \
     DatasetContextConfig
 from modelscope.msdatasets.data_files.data_files_manager import \
     DataFilesManager
-from modelscope.msdatasets.dataset_cls.dataset import ExternalDataset
+from modelscope.msdatasets.dataset_cls import ExternalDataset
 from modelscope.msdatasets.meta.data_meta_manager import DataMetaManager
-from modelscope.utils.constant import DatasetFormations
+from modelscope.utils.constant import (DatasetFormations, DatasetPathName,
+                                       DownloadMode, VirgoDatasetConfig)
+from modelscope.utils.logger import get_logger
+from modelscope.utils.url_utils import valid_url
+
+logger = get_logger()
 
 
-class BaseDataLoader(ABC):
-    """Base dataset loader to load data."""
+class BaseDownloader(ABC):
+    """Base dataset downloader to load data."""
 
     def __init__(self, dataset_context_config: DatasetContextConfig):
         self.dataset_context_config = dataset_context_config
@@ -28,35 +34,35 @@ class BaseDataLoader(ABC):
     def process(self):
         """The entity processing pipeline for fetching the data. """
         raise NotImplementedError(
-            f'No default implementation provided for {BaseDataLoader.__name__}.process.'
+            f'No default implementation provided for {BaseDownloader.__name__}.process.'
         )
 
     @abstractmethod
     def _authorize(self):
         raise NotImplementedError(
-            f'No default implementation provided for {BaseDataLoader.__name__}._authorize.'
+            f'No default implementation provided for {BaseDownloader.__name__}._authorize.'
         )
 
     @abstractmethod
     def _build(self):
         raise NotImplementedError(
-            f'No default implementation provided for {BaseDataLoader.__name__}._build.'
+            f'No default implementation provided for {BaseDownloader.__name__}._build.'
         )
 
     @abstractmethod
     def _prepare_and_download(self):
         raise NotImplementedError(
-            f'No default implementation provided for {BaseDataLoader.__name__}._prepare_and_download.'
+            f'No default implementation provided for {BaseDownloader.__name__}._prepare_and_download.'
         )
 
     @abstractmethod
     def _post_process(self):
         raise NotImplementedError(
-            f'No default implementation provided for {BaseDataLoader.__name__}._post_process.'
+            f'No default implementation provided for {BaseDownloader.__name__}._post_process.'
         )
 
 
-class OssDataLoader(BaseDataLoader):
+class OssDownloader(BaseDownloader):
 
     def __init__(self, dataset_context_config: DatasetContextConfig):
         super().__init__(dataset_context_config)
@@ -127,7 +133,7 @@ class OssDataLoader(BaseDataLoader):
             raise f'meta-file: {dataset_name}.py not found on the modelscope hub.'
 
         if dataset_py_script and dataset_formation == DatasetFormations.hf_compatible:
-            self.dataset = hf_data_loader(
+            self.dataset = hf_load_dataset(
                 dataset_py_script,
                 name=subset_name,
                 revision=version,
@@ -147,8 +153,151 @@ class OssDataLoader(BaseDataLoader):
             self.dataset.custom_map = self.dataset_context_config.data_meta_config.meta_type_map
 
 
-class MaxComputeDataLoader(BaseDataLoader):
-    """Data loader for MaxCompute data source."""
+class VirgoDownloader(BaseDownloader):
+    """Data downloader for Virgo data source."""
+
+    def __init__(self, dataset_context_config: DatasetContextConfig):
+        super().__init__(dataset_context_config)
+        self.dataset = None
+
+    def process(self):
+        """
+        Sequential data fetching virgo dataset process: authorize -> build -> prepare_and_download -> post_process
+        """
+        self._authorize()
+        self._build()
+        self._prepare_and_download()
+        self._post_process()
+
+    def _authorize(self):
+        """Authorization of virgo dataset."""
+        from modelscope.msdatasets.auth.auth_config import VirgoAuthConfig
+
+        cookies = ModelScopeConfig.get_cookies()
+        user_info = ModelScopeConfig.get_user_info()
+
+        if not self.dataset_context_config.auth_config:
+            auth_config = VirgoAuthConfig(
+                cookies=cookies, git_token='', user_info=user_info)
+        else:
+            auth_config = self.dataset_context_config.auth_config
+            auth_config.cookies = cookies
+            auth_config.git_token = ''
+            auth_config.user_info = user_info
+
+        self.dataset_context_config.auth_config = auth_config
+
+    def _build(self):
+        """
+        Fetch virgo meta and build virgo dataset.
+        """
+        from modelscope.msdatasets.dataset_cls.dataset import VirgoDataset
+        import pandas as pd
+
+        meta_manager = DataMetaManager(self.dataset_context_config)
+        meta_manager.fetch_virgo_meta()
+        self.dataset_context_config = meta_manager.dataset_context_config
+        self.dataset = VirgoDataset(
+            **self.dataset_context_config.config_kwargs)
+
+        virgo_cache_dir = os.path.join(
+            self.dataset_context_config.cache_root_dir,
+            self.dataset_context_config.namespace,
+            self.dataset_context_config.dataset_name,
+            self.dataset_context_config.version)
+        os.makedirs(
+            os.path.join(virgo_cache_dir, DatasetPathName.META_NAME),
+            exist_ok=True)
+        meta_content_cache_file = os.path.join(virgo_cache_dir,
+                                               DatasetPathName.META_NAME,
+                                               'meta_content.csv')
+
+        if isinstance(self.dataset.meta, pd.DataFrame):
+            meta_content_df = self.dataset.meta
+            meta_content_df.to_csv(meta_content_cache_file, index=False)
+            self.dataset.meta_content_cache_file = meta_content_cache_file
+            self.dataset.virgo_cache_dir = virgo_cache_dir
+            logger.info(
+                f'Virgo meta content saved to {meta_content_cache_file}')
+
+    def _prepare_and_download(self):
+        """
+        Fetch data-files from oss-urls in the virgo meta content.
+        """
+
+        download_virgo_files = self.dataset_context_config.config_kwargs.pop(
+            'download_virgo_files', '')
+
+        if self.dataset.data_type == 0 and download_virgo_files:
+            import requests
+            import json
+            import shutil
+            from urllib.parse import urlparse
+            from functools import partial
+
+            def download_file(meta_info_val, data_dir):
+                file_url_list = []
+                file_path_list = []
+                try:
+                    meta_info_val = json.loads(meta_info_val)
+                    # get url first, if not exist, try to get inner_url
+                    file_url = meta_info_val.get('url', '')
+                    if file_url:
+                        file_url_list.append(file_url)
+                    else:
+                        tmp_inner_member_list = meta_info_val.get(
+                            'inner_url', '')
+                        for item in tmp_inner_member_list:
+                            file_url = item.get('url', '')
+                            if file_url:
+                                file_url_list.append(file_url)
+
+                    for one_file_url in file_url_list:
+                        is_url = valid_url(one_file_url)
+                        if is_url:
+                            url_parse_res = urlparse(file_url)
+                            file_name = os.path.basename(url_parse_res.path)
+                        else:
+                            raise ValueError(f'Unsupported url: {file_url}')
+                        file_path = os.path.join(data_dir, file_name)
+                        file_path_list.append((one_file_url, file_path))
+
+                except Exception as e:
+                    logger.error(f'parse virgo meta info error: {e}')
+                    file_path_list = []
+
+                for file_url_item, file_path_item in file_path_list:
+                    if file_path_item and not os.path.exists(file_path_item):
+                        logger.info(f'Downloading file to {file_path_item}')
+                        os.makedirs(data_dir, exist_ok=True)
+                        with open(file_path_item, 'wb') as f:
+                            f.write(requests.get(file_url_item).content)
+
+                return file_path_list
+
+            self.dataset.download_virgo_files = True
+            download_mode = self.dataset_context_config.download_mode
+            data_files_dir = os.path.join(self.dataset.virgo_cache_dir,
+                                          DatasetPathName.DATA_FILES_NAME)
+
+            if download_mode == DownloadMode.FORCE_REDOWNLOAD:
+                shutil.rmtree(data_files_dir, ignore_errors=True)
+
+            from tqdm import tqdm
+            tqdm.pandas(desc='apply download_file')
+            self.dataset.meta[
+                VirgoDatasetConfig.
+                col_cache_file] = self.dataset.meta.progress_apply(
+                    lambda row: partial(
+                        download_file, data_dir=data_files_dir)(row.meta_info),
+                    axis=1)
+
+    def _post_process(self):
+        ...
+
+
+class MaxComputeDownloader(BaseDownloader):
+    """Data downloader for MaxCompute data source."""
 
     # TODO: MaxCompute data source to be supported .
     def __init__(self, dataset_context_config: DatasetContextConfig):
diff --git a/modelscope/msdatasets/data_loader/data_loader_manager.py b/modelscope/msdatasets/data_loader/data_loader_manager.py
index 3c8a638a..5be32de1 100644
--- a/modelscope/msdatasets/data_loader/data_loader_manager.py
+++ b/modelscope/msdatasets/data_loader/data_loader_manager.py
@@ -9,7 +9,7 @@ from datasets import load_dataset as hf_data_loader
 from modelscope.hub.api import HubApi
 from modelscope.msdatasets.context.dataset_context_config import \
     DatasetContextConfig
-from modelscope.msdatasets.data_loader.data_loader import OssDataLoader
+from modelscope.msdatasets.data_loader.data_loader import OssDownloader
 from modelscope.utils.constant import EXTENSIONS_TO_LOAD
 from modelscope.utils.logger import get_logger
 
@@ -127,7 +127,7 @@ class RemoteDataLoaderManager(DataLoaderManager):
             return dataset_ret
         # To use the modelscope data loader
         elif data_loader_type == RemoteDataLoaderType.MS_DATA_LOADER:
-            oss_data_loader = OssDataLoader(
+            oss_data_loader = OssDownloader(
                 dataset_context_config=self.dataset_context_config)
             oss_data_loader.process()
             # download statistics
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/__init__.py
index 9eb62168..a367fe79 100644
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/__init__.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/__init__.py
@@ -27,12 +27,6 @@ if TYPE_CHECKING:
     from .video_frame_interpolation import VideoFrameInterpolationDataset
     from .video_stabilization import VideoStabilizationDataset
     from .video_super_resolution import VideoSuperResolutionDataset
-    from .image_semantic_segmentation import SegDataset
-    from .face_2d_keypoins import FaceKeypointDataset
-    from .hand_2d_keypoints import HandCocoWholeBodyDataset
-    from .human_wholebody_keypoint import WholeBodyCocoTopDownDataset
-    from .image_classification import ClsDataset
-    from .object_detection import DetDataset, DetImagesMixDataset
     from .ocr_detection import DataLoader, ImageDataset, QuadMeasurer
     from .ocr_recognition_dataset import OCRRecognitionDataset
     from .image_colorization import ImageColorizationDataset
@@ -66,12 +60,6 @@ else:
         'video_frame_interpolation': ['VideoFrameInterpolationDataset'],
         'video_stabilization': ['VideoStabilizationDataset'],
         'video_super_resolution': ['VideoSuperResolutionDataset'],
-        'image_semantic_segmentation': ['SegDataset'],
-        'face_2d_keypoins': ['FaceKeypointDataset'],
-        'hand_2d_keypoints': ['HandCocoWholeBodyDataset'],
-        'human_wholebody_keypoint': ['WholeBodyCocoTopDownDataset'],
-        'image_classification': ['ClsDataset'],
-        'object_detection': ['DetDataset', 'DetImagesMixDataset'],
         'ocr_detection': ['DataLoader', 'ImageDataset', 'QuadMeasurer'],
         'ocr_recognition_dataset': ['OCRRecognitionDataset'],
         'image_colorization': ['ImageColorizationDataset'],
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/__init__.py
deleted file mode 100644
index e9d76b7e..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
-    from .face_2d_keypoints_dataset import FaceKeypointDataset
-
-else:
-    _import_structure = {'face_2d_keypoints_dataset': ['FaceKeypointDataset']}
-
-    import sys
-
-    sys.modules[__name__] = LazyImportModule(
-        __name__,
-        globals()['__file__'],
-        _import_structure,
-        module_spec=__spec__,
-        extra_objects={},
-    )
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/face_2d_keypoints_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/face_2d_keypoints_dataset.py
deleted file mode 100644
index 9f55901f..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/face_2d_keypoints_dataset.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.datasets.face import FaceKeypointDataset as _FaceKeypointDataset
-
-from modelscope.metainfo import CustomDatasets
-from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS
-from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \
-    EasyCVBaseDataset
-from modelscope.utils.constant import Tasks
-
-
-@CUSTOM_DATASETS.register_module(
-    group_key=Tasks.face_2d_keypoints,
-    module_name=CustomDatasets.Face2dKeypointsDataset)
-class FaceKeypointDataset(EasyCVBaseDataset, _FaceKeypointDataset):
-    """EasyCV dataset for face 2d keypoints.
-
-    Args:
-        split_config (dict): Dataset root path from MSDataset, e.g.
-            {"train":"local cache path"} or {"evaluation":"local cache path"}.
-        preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
-            the model if supplied. Not support yet.
-        mode: Training or Evaluation.
-    """
-
-    def __init__(self,
-                 split_config=None,
-                 preprocessor=None,
-                 mode=None,
-                 *args,
-                 **kwargs) -> None:
-        EasyCVBaseDataset.__init__(
-            self,
-            split_config=split_config,
-            preprocessor=preprocessor,
-            mode=mode,
-            args=args,
-            kwargs=kwargs)
-        _FaceKeypointDataset.__init__(self, *args, **kwargs)
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/__init__.py
deleted file mode 100644
index 3af670e3..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
-    from .hand_2d_keypoints_dataset import HandCocoWholeBodyDataset
-
-else:
-    _import_structure = {
-        'hand_2d_keypoints_dataset': ['HandCocoWholeBodyDataset']
-    }
-
-    import sys
-
-    sys.modules[__name__] = LazyImportModule(
-        __name__,
-        globals()['__file__'],
-        _import_structure,
-        module_spec=__spec__,
-        extra_objects={},
-    )
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/hand_2d_keypoints_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/hand_2d_keypoints_dataset.py
deleted file mode 100644
index c6163715..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/hand_2d_keypoints_dataset.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.datasets.pose import \
-    HandCocoWholeBodyDataset as _HandCocoWholeBodyDataset
-
-from modelscope.metainfo import CustomDatasets
-from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS
-from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \
-    EasyCVBaseDataset
-from modelscope.utils.constant import Tasks
-
-
-@CUSTOM_DATASETS.register_module(
-    group_key=Tasks.hand_2d_keypoints,
-    module_name=CustomDatasets.HandCocoWholeBodyDataset)
-class HandCocoWholeBodyDataset(EasyCVBaseDataset, _HandCocoWholeBodyDataset):
-    """EasyCV dataset for human hand 2d keypoints.
-
-    Args:
-        split_config (dict): Dataset root path from MSDataset, e.g.
-            {"train":"local cache path"} or {"evaluation":"local cache path"}.
-        preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
-            the model if supplied. Not support yet.
-        mode: Training or Evaluation.
-    """
-
-    def __init__(self,
-                 split_config=None,
-                 preprocessor=None,
-                 mode=None,
-                 *args,
-                 **kwargs) -> None:
-        EasyCVBaseDataset.__init__(
-            self,
-            split_config=split_config,
-            preprocessor=preprocessor,
-            mode=mode,
-            args=args,
-            kwargs=kwargs)
-        _HandCocoWholeBodyDataset.__init__(self, *args, **kwargs)
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/__init__.py
deleted file mode 100644
index 472ed2d8..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
-    from .human_wholebody_keypoint_dataset import WholeBodyCocoTopDownDataset
-
-else:
-    _import_structure = {
-        'human_wholebody_keypoint_dataset': ['WholeBodyCocoTopDownDataset']
-    }
-
-    import sys
-
-    sys.modules[__name__] = LazyImportModule(
-        __name__,
-        globals()['__file__'],
-        _import_structure,
-        module_spec=__spec__,
-        extra_objects={},
-    )
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py
deleted file mode 100644
index 59c97af8..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.datasets.pose import \
-    WholeBodyCocoTopDownDataset as _WholeBodyCocoTopDownDataset
-
-from modelscope.metainfo import CustomDatasets
-from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS
-from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \
-    EasyCVBaseDataset
-from modelscope.utils.constant import Tasks
-
-
-@CUSTOM_DATASETS.register_module(
-    group_key=Tasks.human_wholebody_keypoint,
-    module_name=CustomDatasets.HumanWholeBodyKeypointDataset)
-class WholeBodyCocoTopDownDataset(EasyCVBaseDataset,
-                                  _WholeBodyCocoTopDownDataset):
-    """EasyCV dataset for human whole body 2d keypoints.
-
-    Args:
-        split_config (dict): Dataset root path from MSDataset, e.g.
-            {"train":"local cache path"} or {"evaluation":"local cache path"}.
-        preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
-            the model if supplied. Not support yet.
-        mode: Training or Evaluation.
-    """
-
-    def __init__(self,
-                 split_config=None,
-                 preprocessor=None,
-                 mode=None,
-                 *args,
-                 **kwargs) -> None:
-        EasyCVBaseDataset.__init__(
-            self,
-            split_config=split_config,
-            preprocessor=preprocessor,
-            mode=mode,
-            args=args,
-            kwargs=kwargs)
-        _WholeBodyCocoTopDownDataset.__init__(self, *args, **kwargs)
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/__init__.py
deleted file mode 100644
index 95e8d7a1..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
-    from .classification_dataset import ClsDataset
-
-else:
-    _import_structure = {'classification_dataset': ['ClsDataset']}
-
-    import sys
-
-    sys.modules[__name__] = LazyImportModule(
-        __name__,
-        globals()['__file__'],
-        _import_structure,
-        module_spec=__spec__,
-        extra_objects={},
-    )
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/classification_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/classification_dataset.py
deleted file mode 100644
index 386810c7..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/classification_dataset.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.datasets.classification import ClsDataset as _ClsDataset
-
-from modelscope.metainfo import CustomDatasets
-from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS
-from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \
-    EasyCVBaseDataset
-from modelscope.utils.constant import Tasks
-
-
-@CUSTOM_DATASETS.register_module(
-    group_key=Tasks.image_classification,
-    module_name=CustomDatasets.ClsDataset)
-class ClsDataset(_ClsDataset):
-    """EasyCV dataset for classification.
-
-    Args:
-        split_config (dict): Dataset root path from MSDataset, e.g.
-            {"train":"local cache path"} or {"evaluation":"local cache path"}.
-        preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
-            the model if supplied. Not support yet.
-        mode: Training or Evaluation.
-    """
-
-    def __init__(self,
-                 split_config=None,
-                 preprocessor=None,
-                 mode=None,
-                 *args,
-                 **kwargs) -> None:
-        EasyCVBaseDataset.__init__(
-            self,
-            split_config=split_config,
-            preprocessor=preprocessor,
-            mode=mode,
-            args=args,
-            kwargs=kwargs)
-        _ClsDataset.__init__(self, *args, **kwargs)
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/__init__.py
deleted file mode 100644
index 26121bdb..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
-    from .segmentation_dataset import SegDataset
-
-else:
-    _import_structure = {'easycv_segmentation': ['SegDataset']}
-
-    import sys
-
-    sys.modules[__name__] = LazyImportModule(
-        __name__,
-        globals()['__file__'],
-        _import_structure,
-        module_spec=__spec__,
-        extra_objects={},
-    )
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/segmentation_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/segmentation_dataset.py
deleted file mode 100644
index 71e7c42b..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/segmentation_dataset.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.datasets.segmentation import SegDataset as _SegDataset
-
-from modelscope.metainfo import CustomDatasets
-from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS
-from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \
-    EasyCVBaseDataset
-from modelscope.utils.constant import Tasks
-
-
-@CUSTOM_DATASETS.register_module(
-    group_key=Tasks.image_segmentation, module_name=CustomDatasets.SegDataset)
-class SegDataset(EasyCVBaseDataset, _SegDataset):
-    """EasyCV dataset for Sementic segmentation.
-    For more details, please refer to :
-    https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/segmentation/raw.py .
-
-    Args:
-        split_config (dict): Dataset root path from MSDataset, e.g.
-            {"train":"local cache path"} or {"evaluation":"local cache path"}.
-        preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
-            the model if supplied. Not support yet.
-        mode: Training or Evaluation.
-        data_source: Data source config to parse input data.
-        pipeline: Sequence of transform object or config dict to be composed.
-        ignore_index (int): Label index to be ignored.
-        profiling: If set True, will print transform time.
-    """
-
-    def __init__(self,
-                 split_config=None,
-                 preprocessor=None,
-                 mode=None,
-                 *args,
-                 **kwargs) -> None:
-        EasyCVBaseDataset.__init__(
-            self,
-            split_config=split_config,
-            preprocessor=preprocessor,
-            mode=mode,
-            args=args,
-            kwargs=kwargs)
-        _SegDataset.__init__(self, *args, **kwargs)
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/__init__.py
deleted file mode 100644
index 403163e9..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
-    from .detection_dataset import DetDataset, DetImagesMixDataset
-
-else:
-    _import_structure = {
-        'detection_dataset': ['DetDataset', 'DetImagesMixDataset']
-    }
-
-    import sys
-
-    sys.modules[__name__] = LazyImportModule(
-        __name__,
-        globals()['__file__'],
-        _import_structure,
-        module_spec=__spec__,
-        extra_objects={},
-    )
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/detection_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/detection_dataset.py
deleted file mode 100644
index 66c11f64..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/detection_dataset.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-
-from easycv.datasets.detection import DetDataset as _DetDataset
-from easycv.datasets.detection import \
-    DetImagesMixDataset as _DetImagesMixDataset
-
-from modelscope.metainfo import CustomDatasets
-from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS
-from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \
-    EasyCVBaseDataset
-from modelscope.utils.constant import Tasks
-
-
-@CUSTOM_DATASETS.register_module(
-    group_key=Tasks.image_object_detection,
-    module_name=CustomDatasets.DetDataset)
-@CUSTOM_DATASETS.register_module(
-    group_key=Tasks.image_segmentation, module_name=CustomDatasets.DetDataset)
-class DetDataset(EasyCVBaseDataset, _DetDataset):
-    """EasyCV dataset for object detection.
-    For more details, please refer to https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/detection/raw.py .
-
-    Args:
-        split_config (dict): Dataset root path from MSDataset, e.g.
-            {"train":"local cache path"} or {"evaluation":"local cache path"}.
-        preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
-            the model if supplied. Not support yet.
-        mode: Training or Evaluation.
-        data_source: Data source config to parse input data.
-        pipeline: Transform config list
-        profiling: If set True, will print pipeline time
-        classes: A list of class names, used in evaluation for result and groundtruth visualization
-    """
-
-    def __init__(self,
-                 split_config=None,
-                 preprocessor=None,
-                 mode=None,
-                 *args,
-                 **kwargs) -> None:
-        EasyCVBaseDataset.__init__(
-            self,
-            split_config=split_config,
-            preprocessor=preprocessor,
-            mode=mode,
-            args=args,
-            kwargs=kwargs)
-        _DetDataset.__init__(self, *args, **kwargs)
-
-
-@CUSTOM_DATASETS.register_module(
-    group_key=Tasks.image_object_detection,
-    module_name=CustomDatasets.DetImagesMixDataset)
-@CUSTOM_DATASETS.register_module(
-    group_key=Tasks.domain_specific_object_detection,
-    module_name=CustomDatasets.DetImagesMixDataset)
-class DetImagesMixDataset(EasyCVBaseDataset, _DetImagesMixDataset):
-    """EasyCV dataset for object detection, a wrapper of multiple images mixed dataset.
-    Suitable for training on multiple images mixed data augmentation like
-    mosaic and mixup. For the augmentation pipeline of mixed image data,
-    the `get_indexes` method needs to be provided to obtain the image
-    indexes, and you can set `skip_flags` to change the pipeline running
-    process. At the same time, we provide the `dynamic_scale` parameter
-    to dynamically change the output image size.
-    output boxes format: cx, cy, w, h
-
-    For more details, please refer to https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/detection/mix.py .
-
-    Args:
-        split_config (dict): Dataset root path from MSDataset, e.g.
-            {"train":"local cache path"} or {"evaluation":"local cache path"}.
-        preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
-            the model if supplied. Not support yet.
-        mode: Training or Evaluation.
-        data_source (:obj:`DetSourceCoco`): Data source config to parse input data.
-        pipeline (Sequence[dict]): Sequence of transform object or
-            config dict to be composed.
-        dynamic_scale (tuple[int], optional): The image scale can be changed
-            dynamically. Default to None.
-        skip_type_keys (list[str], optional): Sequence of type string to
-            be skip pipeline. Default to None.
-        label_padding: out labeling padding [N, 120, 5]
-    """
-
-    def __init__(self,
-                 split_config=None,
-                 preprocessor=None,
-                 mode=None,
-                 *args,
-                 **kwargs) -> None:
-        EasyCVBaseDataset.__init__(
-            self,
-            split_config=split_config,
-            preprocessor=preprocessor,
-            mode=mode,
-            args=args,
-            kwargs=kwargs)
-        _DetImagesMixDataset.__init__(self, *args, **kwargs)
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py
index bc9cd3ca..bfbb6eb3 100644
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py
@@ -34,10 +34,12 @@ def Q2B(uchar):
     Tasks.ocr_recognition, module_name=Models.ocr_recognition)
 class OCRRecognitionDataset(TorchCustomDataset):
 
-    def __init__(self, **kwargs):
+    def __init__(self, local_lmdb=None, preprocessor=None, **kwargs):
         split_config = kwargs['split_config']
         cache_root = next(iter(split_config.values()))
         lmdb_path = os.path.join(cache_root, DATASET_STRUCTURE['lmdb'])
+        if local_lmdb is not None:
+            lmdb_path = local_lmdb
         self.env = lmdb.open(
             lmdb_path,
             max_readers=1,
@@ -51,7 +53,7 @@ class OCRRecognitionDataset(TorchCustomDataset):
         self.nSamples = 0
         with self.env.begin(write=False) as txn:
             self.nSamples = int(txn.get('num-samples'.encode()))
-        self.reco_preprocess = kwargs['preprocessor']
+        self.reco_preprocess = preprocessor
 
     def __len__(self):
         return self.nSamples
diff --git a/modelscope/msdatasets/dataset_cls/dataset.py b/modelscope/msdatasets/dataset_cls/dataset.py
index 4acf51b1..9114285e 100644
--- a/modelscope/msdatasets/dataset_cls/dataset.py
+++ b/modelscope/msdatasets/dataset_cls/dataset.py
@@ -4,11 +4,15 @@ import copy
 import os
 
 import datasets
+import pandas as pd
 from datasets import IterableDataset
-from PIL import Image
 
-from modelscope.utils.constant import EXTENSIONS_TO_LOAD
+from modelscope.msdatasets.utils.maxcompute_utils import MaxComputeUtil
+from modelscope.utils.constant import (DEFAULT_MAXCOMPUTE_ENDPOINT,
+                                       EXTENSIONS_TO_LOAD, MaxComputeEnvs,
+                                       VirgoDatasetConfig)
 from modelscope.utils.logger import get_logger
+from modelscope.utils.url_utils import fetch_csv_with_url, valid_url
 
 logger = get_logger()
 
@@ -97,6 +101,7 @@ class NativeIterableDataset(IterableDataset):
                         ex_cache_path = dl_manager.download_and_extract(v)
                         ret[k] = ex_cache_path
                         if k.endswith('Image:FILE'):
+                            from PIL import Image
                             ret[k + ':Object'] = Image.open(fp=ex_cache_path)
                         if k.endswith('Audio:FILE'):
                             import torchaudio
@@ -108,3 +113,154 @@ class NativeIterableDataset(IterableDataset):
 
     def __len__(self):
         return 1
+
+
+class VirgoDataset(object):
+    """Dataset class for Virgo.
+
+    Attributes:
+        _meta_content (str): Virgo meta data content, could be a url that contains csv file.
+        _data_type (int): Virgo dataset type, 0-Standard virgo dataset; Others-User define dataset (to be supported)
+
+    Examples:
+        >>> from modelscope.msdatasets.dataset_cls.dataset import VirgoDataset
+        >>> input_kwargs = {'metaContent': 'http://xxx-xxx/xxx.csv', 'samplingType': 0}
+        >>> virgo_dataset = VirgoDataset(**input_kwargs)
+        >>> print(virgo_dataset[1])
+        >>> print(len(virgo_dataset))
+        >>> for line in virgo_dataset:
+        >>>     print(line)
+
+        Note: If you set `download_virgo_files` to True by using
+            MsDataset.load(dataset_name='your-virgo-dataset-id', hub=Hubs.virgo, download_virgo_files=True),
+            you can get the cache file path of the virgo dataset, the column name is `cache_file`.
+        >>> if virgo_dataset.download_virgo_files:
+        >>>     print(virgo_dataset[1].get('cache_file'))
+    """
+
+    def __init__(self, **kwargs):
+
+        self._meta_content: str = ''
+        self.data_type: int = 0
+        self.odps_table_name: str = ''
+        self.odps_table_partition: str = None
+        self._odps_utils: MaxComputeUtil = None
+        self.config_kwargs = kwargs
+
+        self._meta: pd.DataFrame = pd.DataFrame()
+
+        self._meta_content = self.config_kwargs.pop(
+            VirgoDatasetConfig.meta_content, '')
+        self.data_type = self.config_kwargs.pop(
+            VirgoDatasetConfig.sampling_type, 0)
+
+        self._check_variables()
+        self._parse_meta()
+
+        self.meta_content_cache_file = ''
+        self.virgo_cache_dir = ''
+        self.download_virgo_files: bool = False
+
+        self.odps_table_ins = None
+        self.odps_reader_ins = None
+        self.odps_batch_size = self.config_kwargs.pop('odps_batch_size', 100)
+        self.odps_limit = self.config_kwargs.pop('odps_limit', None)
+        self.odps_drop_last = self.config_kwargs.pop('odps_drop_last', False)
+        if self._odps_utils:
+            self.odps_table_ins, self.odps_reader_ins = self._odps_utils.get_table_reader_ins(
+                self.odps_table_name, self.odps_table_partition)
+
+    def __getitem__(self, index):
+        if self.odps_reader_ins:
+            return MaxComputeUtil.gen_reader_item(
+                reader=self.odps_reader_ins,
+                index=index,
+                batch_size_in=self.odps_batch_size,
+                limit_in=self.odps_limit,
+                drop_last_in=self.odps_drop_last,
+                partitions=self.odps_table_ins.table_schema.partitions,
+                columns=self.odps_table_ins.table_schema.names)
+        return self._meta.iloc[index].to_dict()
+
+    def __len__(self):
+        if isinstance(self._meta, dict):
+            return self._meta.get('odpsCount', 0)
+        return len(self._meta)
+
+    def __iter__(self):
+        if self.odps_reader_ins:
+            odps_batch_data = MaxComputeUtil.gen_reader_batch(
+                reader=self.odps_reader_ins,
+                batch_size_in=self.odps_batch_size,
+                limit_in=self.odps_limit,
+                drop_last_in=self.odps_drop_last,
+                partitions=self.odps_table_ins.table_schema.partitions,
+                columns=self.odps_table_ins.table_schema.names)
+            for batch in odps_batch_data:
+                yield batch
+        else:
+            for _, row in self._meta.iterrows():
+                yield row.to_dict()
+
+    @property
+    def meta(self) -> pd.DataFrame:
+        """
+        Virgo meta data. Contains columns: id, meta_info, analysis_result, external_info and
+            cache_file (if download_virgo_files is True).
+        """
+        return self._meta
+
+    def _parse_meta(self):
+        # Fetch csv content
+        if isinstance(self._meta_content, str) and valid_url(
+                self._meta_content):
+            meta_content_df = fetch_csv_with_url(self._meta_content)
+            self._meta = meta_content_df
+        elif isinstance(self._meta_content, dict):
+            self._meta = self._meta_content
+            self.odps_table_name = self._meta.get('odpsTableName', '')
+            self.odps_table_partition = self._meta.get('odpsTablePartition',
+                                                       None)
+            self._odps_utils = self._get_odps_info()
+        else:
+            raise 'The meta content must be url or dict.'
+
+    @staticmethod
+    def _get_odps_info() -> MaxComputeUtil:
+        """
+        Get MaxComputeUtil instance.
+
+        Args:
+            None
+
+        Returns:
+            MaxComputeUtil instance.
+        """
+        access_id = os.environ.get(MaxComputeEnvs.ACCESS_ID, '')
+        access_key = os.environ.get(MaxComputeEnvs.ACCESS_SECRET_KEY, '')
+        proj_name = os.environ.get(MaxComputeEnvs.PROJECT_NAME, '')
+        endpoint = os.environ.get(MaxComputeEnvs.ENDPOINT,
+                                  DEFAULT_MAXCOMPUTE_ENDPOINT)
+
+        if not access_id or not access_key or not proj_name:
+            raise ValueError(
+                f'Please set MaxCompute envs for Virgo: {MaxComputeEnvs.ACCESS_ID}, '
+                f'{MaxComputeEnvs.ACCESS_SECRET_KEY}, {MaxComputeEnvs.PROJECT_NAME}, '
+                f'{MaxComputeEnvs.ENDPOINT}(default: http://service-corp.odps.aliyun-inc.com/api)'
+            )
+
+        return MaxComputeUtil(access_id, access_key, proj_name, endpoint)
+
+    def _check_variables(self):
+        """Check member variables in this class.
+            1. Condition-1: self._meta_content cannot be empty
+            2. Condition-2: self._meta_content must be url when self._data_type is 0
+        """
+        if not self._meta_content:
+            raise 'Them meta content cannot be empty.'
+        if self.data_type not in [0, 1]:
+            raise 'Supported samplingType should be 0 or 1, others are not supported yet.'
+        if self.data_type == 0 and not valid_url(self._meta_content):
+            raise 'The meta content must be url when data type is 0.'
+        if self.data_type == 1 and not isinstance(self._meta_content, dict):
+            raise 'The meta content must be dict when data type is 1.'
diff --git a/modelscope/msdatasets/download/dataset_builder.py b/modelscope/msdatasets/download/dataset_builder.py
index 73a3a1a1..8ad5243a 100644
--- a/modelscope/msdatasets/download/dataset_builder.py
+++ b/modelscope/msdatasets/download/dataset_builder.py
@@ -18,8 +18,8 @@ from datasets.utils.py_utils import map_nested
 from modelscope.hub.api import HubApi
 from modelscope.msdatasets.context.dataset_context_config import \
     DatasetContextConfig
-from modelscope.msdatasets.dataset_cls.dataset import (ExternalDataset,
-                                                       NativeIterableDataset)
+from modelscope.msdatasets.dataset_cls import (ExternalDataset,
+                                               NativeIterableDataset)
 from modelscope.msdatasets.download.download_manager import \
     DataStreamingDownloadManager
 from modelscope.msdatasets.utils.dataset_utils import \
diff --git a/modelscope/msdatasets/meta/data_meta_manager.py b/modelscope/msdatasets/meta/data_meta_manager.py
index d90b8d5e..0fa74c37 100644
--- a/modelscope/msdatasets/meta/data_meta_manager.py
+++ b/modelscope/msdatasets/meta/data_meta_manager.py
@@ -140,6 +140,14 @@ class DataMetaManager(object):
 
         self.dataset_context_config.data_meta_config = data_meta_config
 
+    def fetch_virgo_meta(self) -> None:
+        virgo_dataset_id = self.dataset_context_config.dataset_name
+        version = int(self.dataset_context_config.version)
+
+        meta_content = self.api.get_virgo_meta(
+            dataset_id=virgo_dataset_id, version=version)
+        self.dataset_context_config.config_kwargs.update(meta_content)
+
     def _fetch_meta_from_cache(self, meta_cache_dir):
         local_paths = defaultdict(list)
         dataset_type = None
diff --git a/modelscope/msdatasets/ms_dataset.py b/modelscope/msdatasets/ms_dataset.py
index 0a88eb91..912e061d 100644
--- a/modelscope/msdatasets/ms_dataset.py
+++ b/modelscope/msdatasets/ms_dataset.py
@@ -13,13 +13,14 @@ from datasets.utils.file_utils import is_relative_path
 from modelscope.hub.repository import DatasetRepository
 from modelscope.msdatasets.context.dataset_context_config import \
     DatasetContextConfig
+from modelscope.msdatasets.data_loader.data_loader import VirgoDownloader
 from modelscope.msdatasets.data_loader.data_loader_manager import (
     LocalDataLoaderManager, LocalDataLoaderType, RemoteDataLoaderManager,
     RemoteDataLoaderType)
+from modelscope.msdatasets.dataset_cls import (ExternalDataset,
+                                               NativeIterableDataset)
 from modelscope.msdatasets.dataset_cls.custom_datasets.builder import \
     build_custom_dataset
-from modelscope.msdatasets.dataset_cls.dataset import (ExternalDataset,
-                                                       NativeIterableDataset)
 from modelscope.msdatasets.utils.delete_utils import DatasetDeleteManager
 from modelscope.msdatasets.utils.upload_utils import DatasetUploadManager
 from modelscope.preprocessors import build_preprocessor
@@ -28,7 +29,7 @@ from modelscope.utils.config_ds import MS_DATASETS_CACHE
 from modelscope.utils.constant import (DEFAULT_DATASET_NAMESPACE,
                                        DEFAULT_DATASET_REVISION, ConfigFields,
                                        DownloadMode, Hubs, ModeKeys, Tasks,
-                                       UploadMode)
+                                       UploadMode, VirgoDatasetConfig)
 from modelscope.utils.import_utils import is_tf_available, is_torch_available
 from modelscope.utils.logger import get_logger
 
@@ -188,9 +189,6 @@ class MsDataset:
                 data_files (str or Sequence or Mapping, optional): Path(s) to source data file(s).
                 split (str, optional): Which split of the data to load.
                 hub (Hubs or str, optional): When loading from a remote hub, where it is from. default Hubs.modelscope
-                download_mode (DownloadMode or str, optional):
-                    How to treat existing datasets. default DownloadMode.REUSE_DATASET_IF_EXISTS
-                config_kwargs (additional keyword arguments): Keyword arguments to be passed
                 download_mode (DownloadMode or str, optional): How to treat existing datasets. default
                                                                DownloadMode.REUSE_DATASET_IF_EXISTS
                 cache_dir (str, Optional): User-define local cache directory.
@@ -287,6 +285,23 @@ class MsDataset:
                         custom_cfg=custom_cfg, **config_kwargs)
                     dataset_inst.is_custom = True
             return dataset_inst
+        elif hub == Hubs.virgo:
+            # Rewrite the namespace, version and cache_dir for virgo dataset.
+            if namespace == DEFAULT_DATASET_NAMESPACE:
+                dataset_context_config.namespace = VirgoDatasetConfig.default_virgo_namespace
+            if version == DEFAULT_DATASET_REVISION:
+                dataset_context_config.version = VirgoDatasetConfig.default_dataset_version
+            if cache_dir == MS_DATASETS_CACHE:
+                from modelscope.utils.config_ds import CACHE_HOME
+                cache_dir = os.path.join(CACHE_HOME, 'virgo', 'hub',
+                                         'datasets')
+                dataset_context_config.cache_root_dir = cache_dir
+
+            virgo_downloader = VirgoDownloader(dataset_context_config)
+            virgo_downloader.process()
+
+            return virgo_downloader.dataset
+
         else:
             raise 'Please adjust input args to specify a loading mode, we support following scenes: ' \
                   'loading from local disk, huggingface hub and modelscope hub.'
diff --git a/modelscope/msdatasets/utils/maxcompute_utils.py b/modelscope/msdatasets/utils/maxcompute_utils.py
new file mode 100644
index 00000000..83c6370d
--- /dev/null
+++ b/modelscope/msdatasets/utils/maxcompute_utils.py
@@ -0,0 +1,160 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import math
+
+import pandas as pd
+
+
+class MaxComputeUtil:
+    """
+    MaxCompute util class.
+
+    Args:
+        access_id: your access id of MaxCompute
+        access_key: access key of MaxCompute
+        project_name: your project name of MaxCompute
+        endpoint: endpoint of MaxCompute
+
+    Attributes:
+        _odps: ODPS object
+
+    """
+
+    def __init__(self, access_id, access_key, project_name, endpoint):
+        from odps import ODPS
+        self._odps = ODPS(access_id, access_key, project_name, endpoint)
+
+    def _get_table(self, table_name):
+        """
+        Get MaxCompute table object.
+        """
+        return self._odps.get_table(table_name)
+
+    def _read_data(self, table_name: str, pt_condition: str) -> pd.DataFrame:
+        """
+        Read data from MaxCompute table.
+        :param table_name: table name
+        :param pt_condition: partition condition,
+            Example: pt_condition = 'dt=20230331'
+        :return: pandas dataframe with all data
+        """
+        t = self._get_table(table_name)
+
+        with t.open_reader(partition=pt_condition, limit=False) as reader:
+            pd_df = reader.to_pandas()
+
+        return pd_df
+
+    def fetch_data_to_csv(self, table_name: str, pt_condition: str,
+                          output_path: str) -> None:
+        """
+        Fetch data from MaxCompute table to local file.
+        :param table_name: table name
+        :param pt_condition: partition condition,
+            Example: pt_condition = 'dt=20230331'
+        :param output_path: output path
+        :return: None
+        """
+        pd_df = self._read_data(table_name, pt_condition)
+        pd_df.to_csv(output_path, index=False)
+        print(f'Fetch data to {output_path} successfully.')
+
+    @staticmethod
+    def _check_batch_args(reader, batch_size, limit):
+        if not limit:
+            limit = reader.count
+        if batch_size <= 0:
+            raise ValueError(
+                f'batch_size must be positive, but got {batch_size}')
+        if batch_size > limit:
+            batch_size = limit
+        return batch_size, limit
+
+    @staticmethod
+    def gen_reader_batch(reader, batch_size_in: int, limit_in: int,
+                         drop_last_in: bool, partitions: list, columns: list):
+        """
+        Generate batch data from MaxCompute table.
+
+        Args:
+            reader: MaxCompute table reader
+            batch_size_in: batch size
+            limit_in: limit of data, None means fetch all data
+            drop_last_in: whether drop last incomplete batch data
+            partitions: table partitions
+            columns: table columns
+
+        Returns:
+            batch data generator
+        """
+
+        batch_size_in, limit_in = MaxComputeUtil._check_batch_args(
+            reader, batch_size_in, limit_in)
+
+        batch_num = math.floor(limit_in / batch_size_in)
+        for i in range(batch_num + 1):
+            if i == batch_num and not drop_last_in and limit_in % batch_size_in > 0:
+                batch_records = reader[i * batch_size_in:(
+                    i * batch_size_in + (limit_in % batch_size_in))]
+            else:
+                batch_records = reader[i * batch_size_in:(i + 1)
+                                       * batch_size_in]
+            batch_data_list = []
+            for record in batch_records:
+                tmp_vals = [val for _, val in list(record)]
+                tmp_vals = tmp_vals[:(len(tmp_vals) - len(partitions))]
+                batch_data_list.append(tmp_vals)
+            yield pd.DataFrame(batch_data_list, columns=columns)
+
+    @staticmethod
+    def gen_reader_item(reader, index: int, batch_size_in: int, limit_in: int,
+                        drop_last_in: bool, partitions: list, columns: list):
+        """
+        Get single batch data from MaxCompute table by indexing.
+
+        Args:
+            reader: MaxCompute table reader
+            index: index of batch data
+            batch_size_in: batch size
+            limit_in: limit of data, None means fetch all data
+            drop_last_in: whether drop last incomplete batch data
+            partitions: table partitions
+            columns: table columns
+
+        Returns:
+            single batch data (dataframe)
+        """
+        batch_size_in, limit_in = MaxComputeUtil._check_batch_args(
+            reader, batch_size_in, limit_in)
+
+        if drop_last_in:
+            batch_num = math.floor(limit_in / batch_size_in)
+        else:
+            batch_num = math.ceil(limit_in / batch_size_in)
+
+        if index < 0:
+            raise ValueError(f'index must be non-negative, but got {index}')
+        if index >= batch_num:
+            raise ValueError(
+                f'index must be less than batch_num, but got index={index}, batch_num={batch_num}'
+            )
+
+        start = index * batch_size_in
+        end = (index + 1) * batch_size_in
+        if end > limit_in:
+            end = limit_in
+        batch_item = reader[start:end]
+
+        batch_data_list = []
+        for record in batch_item:
+            tmp_vals = [val for _, val in list(record)]
+            tmp_vals = tmp_vals[:(len(tmp_vals) - len(partitions))]
+            batch_data_list.append(tmp_vals)
+
+        return pd.DataFrame(batch_data_list, columns=columns)
+
+    def get_table_reader_ins(self, table_name: str, pt_condition: str = None):
+
+        table_ins = self._get_table(table_name)
+        with table_ins.open_reader(partition=pt_condition) as reader:
+            return table_ins, reader
diff --git a/modelscope/outputs/nlp_outputs.py b/modelscope/outputs/nlp_outputs.py
index e288df70..d6b934c2 100644
--- a/modelscope/outputs/nlp_outputs.py
+++ b/modelscope/outputs/nlp_outputs.py
@@ -454,3 +454,13 @@ class SentencEmbeddingModelOutput(ModelOutputBase):
     query_embeddings: Tensor = None
     doc_embeddings: Tensor = None
     loss: Tensor = None
+
+
+@dataclass
+class TranslationEvaluationOutput(ModelOutputBase):
+    """The output class for translation evaluation models.
+    """
+
+    score: Tensor = None
+    loss: Tensor = None
+    input_format: List[str] = None
diff --git a/modelscope/outputs/outputs.py b/modelscope/outputs/outputs.py
index ddbe4593..ab24a34c 100644
--- a/modelscope/outputs/outputs.py
+++ b/modelscope/outputs/outputs.py
@@ -1,6 +1,10 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from collections import OrderedDict, namedtuple
 from dataclasses import dataclass, fields
+from typing import Dict, List, Tuple
+
+import numpy as np
+import torch
 
 from modelscope.utils.constant import Tasks
 
@@ -50,7 +54,7 @@ class OutputKeys(object):
     SQL_STRING = 'sql_string'
     SQL_QUERY = 'sql_query'
     HISTORY = 'history'
-    QUERT_RESULT = 'query_result'
+    QUERY_RESULT = 'query_result'
     TIMESTAMPS = 'timestamps'
     SHOT_NUM = 'shot_num'
     SCENE_NUM = 'scene_num'
@@ -62,8 +66,343 @@ class OutputKeys(object):
     TBOUNDS = 'tbounds'
 
 
-TASK_OUTPUTS = {
+OutputTypes = {
+    OutputKeys.LOSS: float,  # checked
+    OutputKeys.LOGITS: np.ndarray,  # checked.
+    OutputKeys.SCORES: List[float],  # checked
+    OutputKeys.SCORE: float,  # checked
+    OutputKeys.LABEL: str,  # checked
+    OutputKeys.LABELS: List[str],  # checked
+    OutputKeys.INPUT_IDS: np.ndarray,  # checked
+    OutputKeys.LABEL_POS: np.ndarray,  # checked
+    OutputKeys.POSES:
+    List[np.ndarray],  # [Tuple(np.ndarray, np.ndarray)]  # checked doubtful
+    OutputKeys.CAPTION: str,
+    OutputKeys.BOXES: np.ndarray,  # checked
+    OutputKeys.KEYPOINTS: np.ndarray,  # checked
+    OutputKeys.MASKS: np.ndarray,  # checked
+    OutputKeys.DEPTHS: List[np.ndarray],  # checked
+    OutputKeys.DEPTHS_COLOR: List[np.ndarray],  # checked
+    OutputKeys.LAYOUT: np.ndarray,  # checked
+    OutputKeys.TEXT: str,  # checked
+    OutputKeys.POLYGONS: np.array,  # checked
+    OutputKeys.OUTPUT: Dict,
+    OutputKeys.OUTPUT_IMG: 'image',  # checked
+    OutputKeys.OUTPUT_IMGS: List[np.ndarray],  # checked
+    OutputKeys.OUTPUT_VIDEO: 'bytes',
+    OutputKeys.OUTPUT_PCM: np.ndarray,
+    OutputKeys.OUTPUT_PCM_LIST: List[np.ndarray],
+    OutputKeys.OUTPUT_WAV: np.ndarray,
+    OutputKeys.OUTPUT_OBJ: Dict,
+    OutputKeys.OUTPUT_MESH: np.ndarray,
+    OutputKeys.IMG_EMBEDDING: np.ndarray,
+    OutputKeys.SPK_EMBEDDING: np.ndarray,
+    OutputKeys.SPO_LIST: List[float],
+    OutputKeys.TEXT_EMBEDDING: np.ndarray,
+    OutputKeys.TRANSLATION: str,
+    OutputKeys.RESPONSE: Dict,
+    OutputKeys.PREDICTION: np.ndarray,  # checked
+    OutputKeys.PREDICTIONS: List[np.ndarray],
+    OutputKeys.PROBABILITIES: np.ndarray,
+    OutputKeys.DIALOG_STATES: object,
+    OutputKeys.VIDEO_EMBEDDING: np.ndarray,
+    OutputKeys.UUID: str,
+    OutputKeys.WORD: str,
+    OutputKeys.KWS_LIST: List[str],
+    OutputKeys.SQL_STRING: str,  # checked
+    OutputKeys.SQL_QUERY: str,  # checked
+    OutputKeys.HISTORY: Dict,  # checked
+    OutputKeys.QUERY_RESULT: Dict,  # checked
+    OutputKeys.TIMESTAMPS: str,
+    OutputKeys.SHOT_NUM: int,
+    OutputKeys.SCENE_NUM: int,
+    OutputKeys.SCENE_META_LIST: List[int],
+    OutputKeys.SHOT_META_LIST: List[int],
+    OutputKeys.MATCHES: List[np.ndarray],
+    OutputKeys.PCD12: np.ndarray,
+    OutputKeys.PCD12_ALIGN: np.ndarray,
+    OutputKeys.TBOUNDS: Dict,
+}
 
+OutputTypeSchema = {
+    OutputKeys.LOSS: {
+        'type': 'number'
+    },  # checked
+    OutputKeys.LOGITS: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },  # checked.
+    OutputKeys.SCORES: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },  # checked
+    OutputKeys.SCORE: {
+        'type': 'number'
+    },  # checked
+    OutputKeys.LABEL: {
+        'type': 'string'
+    },  # checked
+    OutputKeys.LABELS: {
+        'type': 'array',
+        'items': {
+            'type': 'string'
+        }
+    },  # checked
+    OutputKeys.INPUT_IDS: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },  # checked
+    OutputKeys.LABEL_POS: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },  # checked
+    OutputKeys.POSES: {
+        'type': 'array',
+        'items': {
+            'type': 'array',
+            'items': {
+                'type': 'number'
+            }
+        }
+    },  # [Tuple(np.ndarray, np.ndarray)]  # checked doubtful
+    OutputKeys.CAPTION: {
+        'type': 'string'
+    },
+    OutputKeys.BOXES: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },  # checked
+    OutputKeys.KEYPOINTS: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },  # checked
+    OutputKeys.MASKS: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },  # checked
+    OutputKeys.DEPTHS: {
+        'type': 'array',
+        'items': {
+            'type': 'array',
+            'items': {
+                'type': 'number'
+            }
+        }
+    },  # checked
+    OutputKeys.DEPTHS_COLOR: {
+        'type': 'array',
+        'items': {
+            'type': 'array',
+            'items': {
+                'type': 'number'
+            }
+        }
+    },  # checked
+    OutputKeys.LAYOUT: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },  # checked
+    OutputKeys.TEXT: {
+        'type': 'string'
+    },  # checked
+    OutputKeys.POLYGONS: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },  # checked
+    OutputKeys.OUTPUT: {
+        'type': 'object'
+    },
+    OutputKeys.OUTPUT_IMG: {
+        'type': 'string',
+        'description': 'The base64 encoded image.',
+    },  # checked
+    OutputKeys.OUTPUT_IMGS: {
+        'type': 'array',
+        'items': {
+            'type': 'string',
+            'description': 'The base64 encoded image.',
+        }
+    },  # checked
+    OutputKeys.OUTPUT_VIDEO: {
+        'type': 'string',
+        'description': 'The base64 encoded video.',
+    },
+    OutputKeys.OUTPUT_PCM: {
+        'type': 'string',
+        'description': 'The base64 encoded PCM.',
+    },
+    OutputKeys.OUTPUT_PCM_LIST: {
+        'type': 'array',
+        'items': {
+            'type': 'string',
+            'description': 'The base64 encoded PCM.',
+        }
+    },
+    OutputKeys.OUTPUT_WAV: {
+        'type': 'string',
+        'description': 'The base64 encoded WAV.',
+    },
+    OutputKeys.OUTPUT_OBJ: {
+        'type': 'object'
+    },
+    OutputKeys.OUTPUT_MESH: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },
+    OutputKeys.IMG_EMBEDDING: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },
+    OutputKeys.SPK_EMBEDDING: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },
+    OutputKeys.SPO_LIST: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },
+    OutputKeys.TEXT_EMBEDDING: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },
+    OutputKeys.TRANSLATION: {
+        'type': 'string'
+    },
+    OutputKeys.RESPONSE: {
+        'type': 'object'
+    },
+    OutputKeys.PREDICTION: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },  # checked
+    OutputKeys.PREDICTIONS: {
+        'type': 'array',
+        'items': {
+            'type': 'array',
+            'items': {
+                'type': 'number'
+            }
+        }
+    },
+    OutputKeys.PROBABILITIES: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },
+    OutputKeys.DIALOG_STATES: {
+        'type': 'object'
+    },
+    OutputKeys.VIDEO_EMBEDDING: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },
+    OutputKeys.UUID: {
+        'type': 'string'
+    },
+    OutputKeys.WORD: {
+        'type': 'string'
+    },
+    OutputKeys.KWS_LIST: {
+        'type': 'array',
+        'items': {
+            'type': 'string'
+        }
+    },
+    OutputKeys.SQL_STRING: {
+        'type': 'string'
+    },  # checked
+    OutputKeys.SQL_QUERY: {
+        'type': 'string'
+    },  # checked
+    OutputKeys.HISTORY: {
+        'type': 'object'
+    },  # checked
+    OutputKeys.QUERY_RESULT: {
+        'type': 'object'
+    },  # checked
+    OutputKeys.TIMESTAMPS: {
+        'type': 'string'
+    },
+    OutputKeys.SHOT_NUM: {
+        'type': 'integer'
+    },
+    OutputKeys.SCENE_NUM: {
+        'type': 'integer'
+    },
+    OutputKeys.SCENE_META_LIST: {
+        'type': 'array',
+        'items': {
+            'type': 'integer'
+        }
+    },
+    OutputKeys.SHOT_META_LIST: {
+        'type': 'array',
+        'items': {
+            'type': 'integer'
+        }
+    },
+    OutputKeys.MATCHES: {
+        'type': 'array',
+        'items': {
+            'type': 'array',
+            'items': {
+                'type': 'number'
+            }
+        }
+    },
+    OutputKeys.PCD12: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },
+    OutputKeys.PCD12_ALIGN: {
+        'type': 'array',
+        'items': {
+            'type': 'number'
+        }
+    },
+    OutputKeys.TBOUNDS: {
+        'type': 'object'
+    },
+}
+
+TASK_OUTPUTS = {
+    Tasks.task_template:
+    [OutputKeys.BOXES, OutputKeys.OUTPUT_IMG, OutputKeys.TEXT_EMBEDDING],
     # ============ vision tasks ===================
 
     # ocr detection result for single sample
@@ -388,8 +727,9 @@ TASK_OUTPUTS = {
     #       "scores": [0.885272, 0.014790631, 0.014558001]
     #       "labels": ['噪声强度', '模糊程度', '压缩强度'],
     # }
-    Tasks.image_quality_assessment_degradation:
-    [OutputKeys.SCORES, OutputKeys.LABELS],
+    Tasks.image_quality_assessment_degradation: [
+        OutputKeys.SCORES, OutputKeys.LABELS
+    ],
 
     # live category recognition result for single video
     # {
@@ -1029,6 +1369,10 @@ TASK_OUTPUTS = {
     # {"text": "this is a text answser. "}
     Tasks.video_question_answering: [OutputKeys.TEXT],
 
+    # Multimodal Dialogue result for a sample
+    # {"text": "this is a text response. "}
+    Tasks.multimodal_dialogue: [OutputKeys.TEXT],
+
     # auto_speech_recognition result for a single sample
     # {
     #    "text": "每天都要快乐喔"
@@ -1107,9 +1451,9 @@ TASK_OUTPUTS = {
     # }
     Tasks.image_skychange: [OutputKeys.OUTPUT_IMG],
     # {
-    #     'scores': [0.1, 0.2, 0.3, ...]
+    #     'score': [0.1, 0.2, 0.3, ...]
     # }
-    Tasks.translation_evaluation: [OutputKeys.SCORES],
+    Tasks.translation_evaluation: [OutputKeys.SCORE],
 
     # video object segmentation result for a single video
     #   {
@@ -1140,6 +1484,7 @@ TASK_OUTPUTS = {
     Tasks.document_grounded_dialog_rerank: [OutputKeys.OUTPUT],
     Tasks.document_grounded_dialog_retrieval: [OutputKeys.OUTPUT],
     Tasks.video_temporal_grounding: [OutputKeys.SCORES, OutputKeys.TBOUNDS],
+    Tasks.text_to_video_synthesis: [OutputKeys.OUTPUT_VIDEO],
 }
 
 
diff --git a/modelscope/pipeline_inputs.py b/modelscope/pipeline_inputs.py
index 032bdff6..8cb031e7 100644
--- a/modelscope/pipeline_inputs.py
+++ b/modelscope/pipeline_inputs.py
@@ -20,7 +20,7 @@ class InputType(object):
     BOX = 'box'
     DICT = 'dict'
     LIST = 'list'
-    INT = 'int'
+    NUMBER = 'number'
 
 
 INPUT_TYPE = {
@@ -31,7 +31,42 @@ INPUT_TYPE = {
     InputType.BOX: (list, np.ndarray),
     InputType.DICT: (dict, type(None)),
     InputType.LIST: (list, type(None)),
-    InputType.INT: int,
+    InputType.NUMBER: int,
+}
+
+INPUT_TYPE_SCHEMA = {
+    InputType.IMAGE: {
+        'type': 'string',
+        'description': 'Base64 encoded image file or url string.'
+    },  # support url or base64 encoded file.
+    InputType.AUDIO: {
+        'type': 'string',
+        'description': 'Base64 encoded audio file or url string..'
+    },  # support url or base64 encoded file.
+    InputType.VIDEO: {
+        'type': 'string',
+        'description': 'Base64 encoded video file or url string..'
+    },  # support url or base64 encoded file.
+    InputType.TEXT: {
+        'type': 'string',
+        'description': 'The input text.'
+    },
+    InputType.BOX: {
+        'type': 'array',
+        'description': 'Box coordinate, should be int.',
+        'items': {
+            'type': 'number'
+        }
+    },
+    InputType.DICT: {  # unknown properties
+        'type': 'object',
+    },
+    InputType.LIST: {
+        'type': 'array'
+    },  # unknown item type.
+    InputType.NUMBER: {
+        'type': 'integer'
+    },
 }
 
 
@@ -47,12 +82,19 @@ def check_input_type(input_type, input):
 
 
 TASK_INPUTS = {
+
+    Tasks.task_template: {
+        'image': InputType.IMAGE,
+        'text': InputType.TEXT
+    },
     # if task input is single var, value is  InputType
     # if task input is a tuple,  value is tuple of InputType
     # if task input is a dict, value is a dict of InputType, where key
     # equals the one needed in pipeline input dict
     # if task input is a list, value is a set of input format, in which
-    # each element corresponds to one input format as described above.
+    # each element corresponds to one input format as described above and
+    # must include a dict format.
+
     # ============ vision tasks ===================
     Tasks.ocr_detection:
     InputType.IMAGE,
@@ -73,7 +115,7 @@ TASK_INPUTS = {
     Tasks.human_detection:
     InputType.IMAGE,
     Tasks.face_image_generation:
-    InputType.INT,
+    InputType.NUMBER,
     Tasks.image_classification:
     InputType.IMAGE,
     Tasks.image_object_detection:
@@ -191,8 +233,7 @@ TASK_INPUTS = {
     Tasks.nli: (InputType.TEXT, InputType.TEXT),
     Tasks.sentiment_classification:
     InputType.TEXT,
-    Tasks.zero_shot_classification:
-    InputType.TEXT,
+    Tasks.zero_shot_classification: InputType.TEXT,
     Tasks.relation_extraction:
     InputType.TEXT,
     Tasks.translation:
@@ -212,7 +253,13 @@ TASK_INPUTS = {
         'source_sentence': InputType.LIST,
         'sentences_to_compare': InputType.LIST,
     },
-    Tasks.text_ranking: (InputType.TEXT, InputType.TEXT),
+    Tasks.text_ranking: [
+        (InputType.TEXT, InputType.TEXT),
+        {
+            'source_sentence': InputType.LIST,
+            'sentences_to_compare': InputType.LIST
+        }
+    ],
     Tasks.text_generation:
     InputType.TEXT,
     Tasks.fid_dialogue: {
@@ -261,7 +308,7 @@ TASK_INPUTS = {
     },
 
     # ============ audio tasks ===================
-    Tasks.auto_speech_recognition:
+    Tasks.auto_speech_recognition:  # input can be audio, or audio and text.
     [InputType.AUDIO, {
         'wav': InputType.AUDIO,
         'text': InputType.TEXT
@@ -290,6 +337,9 @@ TASK_INPUTS = {
     Tasks.video_captioning: [InputType.VIDEO, {
         'video': InputType.VIDEO,
     }],
+    Tasks.multimodal_dialogue: {
+        'messages': InputType.LIST,
+    },
     Tasks.visual_grounding: {
         'image': InputType.IMAGE,
         'text': InputType.TEXT
@@ -332,5 +382,9 @@ TASK_INPUTS = {
         'video_input_path': InputType.TEXT,
         'video_output_path': InputType.TEXT,
         'mask_path': InputType.TEXT,
-    }
+    },
+    Tasks.text_to_video_synthesis: {
+        'text': InputType.TEXT
+    },
+    Tasks.video_summarization: InputType.TEXT,
 }
diff --git a/modelscope/pipelines/__init__.py b/modelscope/pipelines/__init__.py
index 71fe307b..d98a7af9 100644
--- a/modelscope/pipelines/__init__.py
+++ b/modelscope/pipelines/__init__.py
@@ -1,7 +1,5 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
 
-from modelscope.utils.import_utils import LazyImportModule
 from . import audio, cv, multi_modal, nlp
 from .base import Pipeline
 from .builder import pipeline
diff --git a/modelscope/pipelines/audio/asr_inference_pipeline.py b/modelscope/pipelines/audio/asr_inference_pipeline.py
index b5a4cba7..b9c0bd03 100644
--- a/modelscope/pipelines/audio/asr_inference_pipeline.py
+++ b/modelscope/pipelines/audio/asr_inference_pipeline.py
@@ -54,6 +54,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
                  lm_model_revision: Optional[str] = None,
                  timestamp_model: Optional[Union[Model, str]] = None,
                  timestamp_model_revision: Optional[str] = None,
+                 ngpu: int = 1,
                  **kwargs):
         """
         Use `model` and `preprocessor` to create an asr pipeline for prediction
@@ -87,7 +88,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
             beam_size('int'):
                 beam size for decoding
             ctc_weight('float'):
-                CTC weight in joint decoding
+                the CTC weight in joint decoding
             lm_weight('float'):
                 lm weight
             decoding_ind('int', defaults to 0):
@@ -119,48 +120,48 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
         self.model_cfg = self.model.forward()
 
         self.cmd = self.get_cmd(kwargs, model)
-        if self.cmd['code_base'] == 'funasr':
-            from funasr.bin import asr_inference_launch
-            self.funasr_infer_modelscope = asr_inference_launch.inference_launch(
-                mode=self.cmd['mode'],
-                maxlenratio=self.cmd['maxlenratio'],
-                minlenratio=self.cmd['minlenratio'],
-                batch_size=self.cmd['batch_size'],
-                beam_size=self.cmd['beam_size'],
-                ngpu=self.cmd['ngpu'],
-                ctc_weight=self.cmd['ctc_weight'],
-                lm_weight=self.cmd['lm_weight'],
-                penalty=self.cmd['penalty'],
-                log_level=self.cmd['log_level'],
-                asr_train_config=self.cmd['asr_train_config'],
-                asr_model_file=self.cmd['asr_model_file'],
-                cmvn_file=self.cmd['cmvn_file'],
-                lm_file=self.cmd['lm_file'],
-                token_type=self.cmd['token_type'],
-                key_file=self.cmd['key_file'],
-                lm_train_config=self.cmd['lm_train_config'],
-                bpemodel=self.cmd['bpemodel'],
-                allow_variable_data_keys=self.cmd['allow_variable_data_keys'],
-                output_dir=self.cmd['output_dir'],
-                dtype=self.cmd['dtype'],
-                seed=self.cmd['seed'],
-                ngram_weight=self.cmd['ngram_weight'],
-                nbest=self.cmd['nbest'],
-                num_workers=self.cmd['num_workers'],
-                vad_infer_config=self.cmd['vad_infer_config'],
-                vad_model_file=self.cmd['vad_model_file'],
-                vad_cmvn_file=self.cmd['vad_cmvn_file'],
-                punc_model_file=self.cmd['punc_model_file'],
-                punc_infer_config=self.cmd['punc_infer_config'],
-                timestamp_model_file=self.cmd['timestamp_model_file'],
-                timestamp_infer_config=self.cmd['timestamp_infer_config'],
-                timestamp_cmvn_file=self.cmd['timestamp_cmvn_file'],
-                outputs_dict=self.cmd['outputs_dict'],
-                param_dict=self.cmd['param_dict'],
-                token_num_relax=self.cmd['token_num_relax'],
-                decoding_ind=self.cmd['decoding_ind'],
-                decoding_mode=self.cmd['decoding_mode'],
-            )
+        from funasr.bin import asr_inference_launch
+        self.funasr_infer_modelscope = asr_inference_launch.inference_launch(
+            mode=self.cmd['mode'],
+            maxlenratio=self.cmd['maxlenratio'],
+            minlenratio=self.cmd['minlenratio'],
+            batch_size=self.cmd['batch_size'],
+            beam_size=self.cmd['beam_size'],
+            ngpu=self.cmd['ngpu'],
+            ctc_weight=self.cmd['ctc_weight'],
+            lm_weight=self.cmd['lm_weight'],
+            penalty=self.cmd['penalty'],
+            log_level=self.cmd['log_level'],
+            asr_train_config=self.cmd['asr_train_config'],
+            asr_model_file=self.cmd['asr_model_file'],
+            cmvn_file=self.cmd['cmvn_file'],
+            lm_file=self.cmd['lm_file'],
+            token_type=self.cmd['token_type'],
+            key_file=self.cmd['key_file'],
+            lm_train_config=self.cmd['lm_train_config'],
+            bpemodel=self.cmd['bpemodel'],
+            allow_variable_data_keys=self.cmd['allow_variable_data_keys'],
+            output_dir=self.cmd['output_dir'],
+            dtype=self.cmd['dtype'],
+            seed=self.cmd['seed'],
+            ngram_weight=self.cmd['ngram_weight'],
+            nbest=self.cmd['nbest'],
+            num_workers=self.cmd['num_workers'],
+            vad_infer_config=self.cmd['vad_infer_config'],
+            vad_model_file=self.cmd['vad_model_file'],
+            vad_cmvn_file=self.cmd['vad_cmvn_file'],
+            punc_model_file=self.cmd['punc_model_file'],
+            punc_infer_config=self.cmd['punc_infer_config'],
+            timestamp_model_file=self.cmd['timestamp_model_file'],
+            timestamp_infer_config=self.cmd['timestamp_infer_config'],
+            timestamp_cmvn_file=self.cmd['timestamp_cmvn_file'],
+            outputs_dict=self.cmd['outputs_dict'],
+            param_dict=self.cmd['param_dict'],
+            token_num_relax=self.cmd['token_num_relax'],
+            decoding_ind=self.cmd['decoding_ind'],
+            decoding_mode=self.cmd['decoding_mode'],
+            **kwargs,
+        )
 
     def __call__(self,
                  audio_in: Union[str, bytes],
@@ -197,7 +198,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
         """
 
         # code base
-        code_base = self.cmd['code_base']
+        # code_base = self.cmd['code_base']
         self.recog_type = recog_type
         self.audio_format = audio_format
         self.audio_fs = None
@@ -207,31 +208,21 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
             self.cmd['output_dir'] = output_dir
         self.cmd['param_dict'] = param_dict
 
-        if code_base == 'funasr':
-            if isinstance(audio_in, str):
-                # for funasr code, generate wav.scp from url or local path
-                self.audio_in, self.raw_inputs = generate_scp_from_url(
-                    audio_in)
-            elif isinstance(audio_in, bytes):
-                self.audio_in = audio_in
-                self.raw_inputs = None
-            else:
-                import numpy
-                import torch
-                if isinstance(audio_in, torch.Tensor):
-                    self.audio_in = None
-                    self.raw_inputs = audio_in
-                elif isinstance(audio_in, numpy.ndarray):
-                    self.audio_in = None
-                    self.raw_inputs = audio_in
-        elif isinstance(audio_in, str):
-            # load pcm data from url if audio_in is url str
-            self.audio_in, checking_audio_fs = load_bytes_from_url(audio_in)
+        if isinstance(audio_in, str):
+            # for funasr code, generate wav.scp from url or local path
+            self.audio_in, self.raw_inputs = generate_scp_from_url(audio_in)
         elif isinstance(audio_in, bytes):
-            # load pcm data from wav data if audio_in is wave format
-            self.audio_in, checking_audio_fs = extract_pcm_from_wav(audio_in)
-        else:
             self.audio_in = audio_in
+            self.raw_inputs = None
+        else:
+            import numpy
+            import torch
+            if isinstance(audio_in, torch.Tensor):
+                self.audio_in = None
+                self.raw_inputs = audio_in
+            elif isinstance(audio_in, numpy.ndarray):
+                self.audio_in = None
+                self.raw_inputs = audio_in
 
         # set the sample_rate of audio_in if checking_audio_fs is valid
         if checking_audio_fs is not None:
@@ -265,12 +256,6 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
         if self.preprocessor is None:
             self.preprocessor = WavToScp()
 
-        # pipeline() from pipelines/builder.py passes 'device' but 'ngpu' needed here
-        device = extra_args.get('device')
-        if device == 'cpu':
-            extra_args['ngpu'] = 0
-        elif device == 'gpu':
-            extra_args['ngpu'] = 1
         outputs = self.preprocessor.config_checking(self.model_cfg)
         # generate asr inference command
         cmd = {
@@ -323,109 +308,88 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
             }
         }
 
-        if self.framework == Frameworks.torch:
-            frontend_conf = None
-            token_num_relax = None
-            decoding_ind = None
-            decoding_mode = None
-            if os.path.exists(outputs['am_model_config']):
-                config_file = open(
-                    outputs['am_model_config'], encoding='utf-8')
-                root = yaml.full_load(config_file)
-                config_file.close()
-                if 'frontend_conf' in root:
-                    frontend_conf = root['frontend_conf']
-            if os.path.exists(outputs['asr_model_config']):
-                config_file = open(
-                    outputs['asr_model_config'], encoding='utf-8')
-                root = yaml.full_load(config_file)
-                config_file.close()
-                if 'token_num_relax' in root:
-                    token_num_relax = root['token_num_relax']
-                if 'decoding_ind' in root:
-                    decoding_ind = root['decoding_ind']
-                if 'decoding_mode' in root:
-                    decoding_mode = root['decoding_mode']
+        frontend_conf = None
+        token_num_relax = None
+        decoding_ind = None
+        decoding_mode = None
+        if os.path.exists(outputs['am_model_config']):
+            config_file = open(outputs['am_model_config'], encoding='utf-8')
+            root = yaml.full_load(config_file)
+            config_file.close()
+            if 'frontend_conf' in root:
+                frontend_conf = root['frontend_conf']
+        if os.path.exists(outputs['asr_model_config']):
+            config_file = open(outputs['asr_model_config'], encoding='utf-8')
+            root = yaml.full_load(config_file)
+            config_file.close()
+            if 'token_num_relax' in root:
+                token_num_relax = root['token_num_relax']
+            if 'decoding_ind' in root:
+                decoding_ind = root['decoding_ind']
+            if 'decoding_mode' in root:
+                decoding_mode = root['decoding_mode']
 
-                cmd['beam_size'] = root['beam_size']
-                cmd['penalty'] = root['penalty']
-                cmd['maxlenratio'] = root['maxlenratio']
-                cmd['minlenratio'] = root['minlenratio']
-                cmd['ctc_weight'] = root['ctc_weight']
-                cmd['lm_weight'] = root['lm_weight']
-            cmd['asr_train_config'] = outputs['am_model_config']
-            cmd['lm_file'] = outputs['lm_model_path']
-            cmd['lm_train_config'] = outputs['lm_model_config']
-            cmd['batch_size'] = outputs['model_config']['batch_size']
-            cmd['frontend_conf'] = frontend_conf
-            if frontend_conf is not None and 'fs' in frontend_conf:
-                cmd['fs']['model_fs'] = frontend_conf['fs']
-            cmd['token_num_relax'] = token_num_relax
-            cmd['decoding_ind'] = decoding_ind
-            cmd['decoding_mode'] = decoding_mode
-            if outputs.__contains__('mvn_file'):
-                cmd['cmvn_file'] = outputs['mvn_file']
-            model_config = self.model_cfg['model_config']
-            if model_config.__contains__('vad_model') and self.vad_model != '':
-                self.vad_model = model_config['vad_model']
-            if model_config.__contains__('vad_model_revision'):
-                self.vad_model_revision = model_config['vad_model_revision']
-            if model_config.__contains__(
-                    'punc_model') and self.punc_model != '':
-                self.punc_model = model_config['punc_model']
-            if model_config.__contains__('punc_model_revision'):
-                self.punc_model_revision = model_config['punc_model_revision']
-            if model_config.__contains__(
-                    'timestamp_model') and self.timestamp_model != '':
-                self.timestamp_model = model_config['timestamp_model']
-            if model_config.__contains__('timestamp_model_revision'):
-                self.timestamp_model_revision = model_config[
-                    'timestamp_model_revision']
-            update_local_model(model_config, model_path, extra_args)
-            self.load_vad_model(cmd)
-            self.load_punc_model(cmd)
-            self.load_lm_model(cmd)
-            self.load_timestamp_model(cmd)
+            cmd['beam_size'] = root['beam_size']
+            cmd['penalty'] = root['penalty']
+            cmd['maxlenratio'] = root['maxlenratio']
+            cmd['minlenratio'] = root['minlenratio']
+            cmd['ctc_weight'] = root['ctc_weight']
+            cmd['lm_weight'] = root['lm_weight']
+        cmd['asr_train_config'] = outputs['am_model_config']
+        cmd['lm_file'] = outputs['lm_model_path']
+        cmd['lm_train_config'] = outputs['lm_model_config']
+        cmd['batch_size'] = outputs['model_config']['batch_size']
+        cmd['frontend_conf'] = frontend_conf
+        if frontend_conf is not None and 'fs' in frontend_conf:
+            cmd['fs']['model_fs'] = frontend_conf['fs']
+        cmd['token_num_relax'] = token_num_relax
+        cmd['decoding_ind'] = decoding_ind
+        cmd['decoding_mode'] = decoding_mode
+        if outputs.__contains__('mvn_file'):
+            cmd['cmvn_file'] = outputs['mvn_file']
+        model_config = self.model_cfg['model_config']
+        if model_config.__contains__('vad_model') and self.vad_model != '':
+            self.vad_model = model_config['vad_model']
+        if model_config.__contains__('vad_model_revision'):
+            self.vad_model_revision = model_config['vad_model_revision']
+        if model_config.__contains__('punc_model') and self.punc_model != '':
+            self.punc_model = model_config['punc_model']
+        if model_config.__contains__('punc_model_revision'):
+            self.punc_model_revision = model_config['punc_model_revision']
+        if model_config.__contains__(
+                'timestamp_model') and self.timestamp_model != '':
+            self.timestamp_model = model_config['timestamp_model']
+        if model_config.__contains__('timestamp_model_revision'):
+            self.timestamp_model_revision = model_config[
+                'timestamp_model_revision']
+        update_local_model(model_config, model_path, extra_args)
+        self.load_vad_model(cmd)
+        self.load_punc_model(cmd)
+        self.load_lm_model(cmd)
+        self.load_timestamp_model(cmd)
 
-            user_args_dict = [
-                'output_dir',
-                'batch_size',
-                'mode',
-                'ngpu',
-                'beam_size',
-                'ctc_weight',
-                'lm_weight',
-                'decoding_ind',
-                'decoding_mode',
-                'vad_model_file',
-                'vad_infer_config',
-                'vad_cmvn_file',
-                'punc_model_file',
-                'punc_infer_config',
-                'param_dict',
-            ]
+        user_args_dict = [
+            'output_dir',
+            'batch_size',
+            'mode',
+            'ngpu',
+            'beam_size',
+            'ctc_weight',
+            'lm_weight',
+            'decoding_ind',
+            'decoding_mode',
+            'vad_model_file',
+            'vad_infer_config',
+            'vad_cmvn_file',
+            'punc_model_file',
+            'punc_infer_config',
+            'param_dict',
+        ]
 
-            for user_args in user_args_dict:
-                if user_args in extra_args and extra_args[
-                        user_args] is not None:
-                    cmd[user_args] = extra_args[user_args]
-
-        elif self.framework == Frameworks.tf:
-            cmd['fs']['model_fs'] = outputs['model_config']['fs']
-            cmd['hop_length'] = outputs['model_config']['hop_length']
-            cmd['feature_dims'] = outputs['model_config']['feature_dims']
-            cmd['predictions_file'] = 'text'
-            cmd['cmvn_file'] = outputs['am_mvn_file']
-            cmd['vocab_file'] = outputs['vocab_file']
-            if 'idx_text' in outputs:
-                cmd['idx_text'] = outputs['idx_text']
-            if 'sampled_ids' in outputs['model_config']:
-                cmd['sampled_ids'] = outputs['model_config']['sampled_ids']
-            if 'sampled_lengths' in outputs['model_config']:
-                cmd['sampled_lengths'] = outputs['model_config'][
-                    'sampled_lengths']
-        else:
-            raise ValueError('model type is mismatching')
+        for user_args in user_args_dict:
+            if user_args in extra_args and extra_args[user_args] is not None:
+                cmd[user_args] = extra_args[user_args]
+                del extra_args[user_args]
 
         return cmd
 
@@ -520,23 +484,12 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
         logger.info(f"Decoding with {inputs['audio_format']} files ...")
 
         data_cmd: Sequence[Tuple[str, str, str]]
-        if self.cmd['code_base'] == 'funasr':
-            if isinstance(self.audio_in, bytes):
-                data_cmd = [self.audio_in, 'speech', 'bytes']
-            elif isinstance(self.audio_in, str):
-                data_cmd = [self.audio_in, 'speech', 'sound']
-            elif self.raw_inputs is not None:
-                data_cmd = None
-        else:
-            if inputs['audio_format'] == 'wav' or inputs[
-                    'audio_format'] == 'pcm':
-                data_cmd = ['speech', 'sound']
-            elif inputs['audio_format'] == 'kaldi_ark':
-                data_cmd = ['speech', 'kaldi_ark']
-            elif inputs['audio_format'] == 'tfrecord':
-                data_cmd = ['speech', 'tfrecord']
-            if inputs.__contains__('mvn_file'):
-                data_cmd.append(inputs['mvn_file'])
+        if isinstance(self.audio_in, bytes):
+            data_cmd = [self.audio_in, 'speech', 'bytes']
+        elif isinstance(self.audio_in, str):
+            data_cmd = [self.audio_in, 'speech', 'sound']
+        elif self.raw_inputs is not None:
+            data_cmd = None
 
         # generate asr inference command
         self.cmd['name_and_type'] = data_cmd
@@ -618,34 +571,9 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
         return ref_list
 
     def run_inference(self, cmd, **kwargs):
-        asr_result = []
-        if self.framework == Frameworks.torch and cmd['code_base'] == 'funasr':
-            asr_result = self.funasr_infer_modelscope(
-                cmd['name_and_type'], cmd['raw_inputs'], cmd['output_dir'],
-                cmd['fs'], cmd['param_dict'], **kwargs)
-
-        elif self.framework == Frameworks.tf:
-            from easyasr import asr_inference_paraformer_tf
-            if hasattr(asr_inference_paraformer_tf, 'set_parameters'):
-                asr_inference_paraformer_tf.set_parameters(
-                    language=cmd['lang'])
-            else:
-                # in order to support easyasr-0.0.2
-                cmd['fs'] = cmd['fs']['model_fs']
-
-            asr_result = asr_inference_paraformer_tf.asr_inference(
-                ngpu=cmd['ngpu'],
-                name_and_type=cmd['name_and_type'],
-                audio_lists=cmd['audio_in'],
-                idx_text_file=cmd['idx_text'],
-                asr_model_file=cmd['asr_model_file'],
-                vocab_file=cmd['vocab_file'],
-                am_mvn_file=cmd['cmvn_file'],
-                predictions_file=cmd['predictions_file'],
-                fs=cmd['fs'],
-                hop_length=cmd['hop_length'],
-                feature_dims=cmd['feature_dims'],
-                sampled_ids=cmd['sampled_ids'],
-                sampled_lengths=cmd['sampled_lengths'])
+        asr_result = self.funasr_infer_modelscope(cmd['name_and_type'],
+                                                  cmd['raw_inputs'],
+                                                  cmd['output_dir'], cmd['fs'],
+                                                  cmd['param_dict'], **kwargs)
 
         return asr_result
diff --git a/modelscope/pipelines/audio/lm_infer_pipeline.py b/modelscope/pipelines/audio/lm_infer_pipeline.py
index f271ea45..75d835d6 100644
--- a/modelscope/pipelines/audio/lm_infer_pipeline.py
+++ b/modelscope/pipelines/audio/lm_infer_pipeline.py
@@ -35,7 +35,10 @@ class LanguageModelPipeline(Pipeline):
 
     """
 
-    def __init__(self, model: Union[Model, str] = None, **kwargs):
+    def __init__(self,
+                 model: Union[Model, str] = None,
+                 ngpu: int = 1,
+                 **kwargs):
         """
         Use `model` to create a LM pipeline for prediction
         Args:
@@ -88,7 +91,9 @@ class LanguageModelPipeline(Pipeline):
             split_with_space=self.cmd['split_with_space'],
             seg_dict_file=self.cmd['seg_dict_file'],
             output_dir=self.cmd['output_dir'],
-            param_dict=self.cmd['param_dict'])
+            param_dict=self.cmd['param_dict'],
+            **kwargs,
+        )
 
     def __call__(self,
                  text_in: str = None,
@@ -189,6 +194,7 @@ class LanguageModelPipeline(Pipeline):
         for user_args in user_args_dict:
             if user_args in extra_args and extra_args[user_args] is not None:
                 cmd[user_args] = extra_args[user_args]
+                del extra_args[user_args]
 
         return cmd
 
diff --git a/modelscope/pipelines/audio/punctuation_processing_pipeline.py b/modelscope/pipelines/audio/punctuation_processing_pipeline.py
index 2f4dee7a..3ab3481d 100644
--- a/modelscope/pipelines/audio/punctuation_processing_pipeline.py
+++ b/modelscope/pipelines/audio/punctuation_processing_pipeline.py
@@ -39,7 +39,10 @@ class PunctuationProcessingPipeline(Pipeline):
 
     """
 
-    def __init__(self, model: Union[Model, str] = None, **kwargs):
+    def __init__(self,
+                 model: Union[Model, str] = None,
+                 ngpu: int = 1,
+                 **kwargs):
         """use `model` to create an asr pipeline for prediction
         """
         super().__init__(model=model, **kwargs)
@@ -59,7 +62,9 @@ class PunctuationProcessingPipeline(Pipeline):
             train_config=self.cmd['train_config'],
             model_file=self.cmd['model_file'],
             output_dir=self.cmd['output_dir'],
-            param_dict=self.cmd['param_dict'])
+            param_dict=self.cmd['param_dict'],
+            **kwargs,
+        )
 
     def __call__(self,
                  text_in: str = None,
@@ -141,6 +146,7 @@ class PunctuationProcessingPipeline(Pipeline):
         for user_args in user_args_dict:
             if user_args in extra_args and extra_args[user_args] is not None:
                 cmd[user_args] = extra_args[user_args]
+                del extra_args[user_args]
 
         return cmd
 
diff --git a/modelscope/pipelines/audio/speaker_change_locating_pipeline.py b/modelscope/pipelines/audio/speaker_change_locating_pipeline.py
new file mode 100644
index 00000000..0bab08ac
--- /dev/null
+++ b/modelscope/pipelines/audio/speaker_change_locating_pipeline.py
@@ -0,0 +1,105 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import io
+from typing import Any, Dict, List, Union
+
+import numpy as np
+import soundfile as sf
+import torch
+
+from modelscope.fileio import File
+from modelscope.metainfo import Pipelines
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines.base import InputModel, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+__all__ = ['SpeakerChangeLocatingPipeline']
+
+
+@PIPELINES.register_module(
+    Tasks.speaker_diarization, module_name=Pipelines.speaker_change_locating)
+class SpeakerChangeLocatingPipeline(Pipeline):
+    """Speaker Change Locating Inference Pipeline
+    use `model` to create a speaker change Locating pipeline.
+
+    Args:
+        model (SpeakerChangeLocatingPipeline): A model instance, or a model local dir, or a model id in the model hub.
+        kwargs (dict, `optional`):
+            Extra kwargs passed into the pipeline's constructor.
+    Example:
+    >>> from modelscope.pipelines import pipeline
+    >>> from modelscope.utils.constant import Tasks
+    >>> p = pipeline(
+    >>>    task=Tasks.speaker_diarization, model='damo/speech_campplus-transformer_scl_zh-cn_16k-common')
+    >>> print(p(audio))
+
+    """
+
+    def __init__(self, model: InputModel, **kwargs):
+        """use `model` to create a speaker change Locating pipeline for prediction
+        Args:
+            model (str): a valid offical model id
+        """
+        super().__init__(model=model, **kwargs)
+        self.model_config = self.model.model_config
+        self.config = self.model.model_config
+        self.anchor_size = self.config['anchor_size']
+
+    def __call__(self, audio: str, embds: List = None) -> Dict[str, Any]:
+        if embds is not None:
+            assert len(embds) == 2
+            assert isinstance(embds[0], np.ndarray) and isinstance(
+                embds[1], np.ndarray)
+            assert embds[0].shape == (
+                self.anchor_size, ) and embds[1].shape == (self.anchor_size, )
+        else:
+            embd1 = np.zeros(self.anchor_size // 2)
+            embd2 = np.ones(self.anchor_size - self.anchor_size // 2)
+            embd3 = np.ones(self.anchor_size // 2)
+            embd4 = np.zeros(self.anchor_size - self.anchor_size // 2)
+            embds = [
+                np.stack([embd1, embd2], axis=1).flatten(),
+                np.stack([embd3, embd4], axis=1).flatten(),
+            ]
+        anchors = torch.from_numpy(np.stack(embds,
+                                            axis=0)).float().unsqueeze(0)
+
+        output = self.preprocess(audio)
+        output = self.forward(output, anchors)
+        output = self.postprocess(output)
+
+        return output
+
+    def forward(self, input: torch.Tensor, anchors: torch.Tensor):
+        output = self.model(input, anchors)
+        return output
+
+    def postprocess(self, input: torch.Tensor) -> Dict[str, Any]:
+        predict = np.where(np.diff(input.argmax(-1).numpy()))
+        try:
+            predict = predict[0][0] * 0.01 + 0.02
+            predict = round(predict, 2)
+            return {OutputKeys.TEXT: f'The change point is at {predict}s.'}
+        except Exception:
+            return {OutputKeys.TEXT: 'No change point is found.'}
+
+    def preprocess(self, input: str) -> torch.Tensor:
+        if isinstance(input, str):
+            file_bytes = File.read(input)
+            data, fs = sf.read(io.BytesIO(file_bytes), dtype='float32')
+            if len(data.shape) == 2:
+                data = data[:, 0]
+            if fs != self.model_config['sample_rate']:
+                raise ValueError(
+                    'modelscope error: Only support %d sample rate files'
+                    % self.model_cfg['sample_rate'])
+            data = torch.from_numpy(data).unsqueeze(0)
+        else:
+            raise ValueError(
+                'modelscope error: The input type is restricted to audio file address'
+                % i)
+        return data
diff --git a/modelscope/pipelines/audio/speaker_diarization_pipeline.py b/modelscope/pipelines/audio/speaker_diarization_pipeline.py
index f4f68cba..71715ecd 100644
--- a/modelscope/pipelines/audio/speaker_diarization_pipeline.py
+++ b/modelscope/pipelines/audio/speaker_diarization_pipeline.py
@@ -48,6 +48,7 @@ class SpeakerDiarizationPipeline(Pipeline):
                  model: Union[Model, str] = None,
                  sv_model: Optional[Union[Model, str]] = None,
                  sv_model_revision: Optional[str] = None,
+                 ngpu: int = 1,
                  **kwargs):
         """use `model` to create a speaker diarization pipeline for prediction
         Args:
@@ -90,6 +91,7 @@ class SpeakerDiarizationPipeline(Pipeline):
             dur_threshold=self.cmd['dur_threshold'],
             out_format=self.cmd['out_format'],
             param_dict=self.cmd['param_dict'],
+            **kwargs,
         )
 
     def __call__(self,
@@ -203,6 +205,7 @@ class SpeakerDiarizationPipeline(Pipeline):
                     cmd[user_args].update(extra_args[user_args])
                 else:
                     cmd[user_args] = extra_args[user_args]
+                del extra_args[user_args]
 
         return cmd
 
diff --git a/modelscope/pipelines/audio/speaker_verification_eres2net_pipeline.py b/modelscope/pipelines/audio/speaker_verification_eres2net_pipeline.py
new file mode 100644
index 00000000..ef91d83b
--- /dev/null
+++ b/modelscope/pipelines/audio/speaker_verification_eres2net_pipeline.py
@@ -0,0 +1,110 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import io
+from typing import Any, Dict, List, Union
+
+import soundfile as sf
+import torch
+
+from modelscope.fileio import File
+from modelscope.metainfo import Pipelines
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines.base import InputModel, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(
+    Tasks.speaker_verification,
+    module_name=Pipelines.speaker_verification_eres2net)
+class ERes2Net_Pipeline(Pipeline):
+    """Speaker Verification Inference Pipeline
+    use `model` to create a Speaker Verification pipeline.
+
+    Args:
+        model (SpeakerVerificationPipeline): A model instance, or a model local dir, or a model id in the model hub.
+        kwargs (dict, `optional`):
+            Extra kwargs passed into the pipeline's constructor.
+    Example:
+    >>> from modelscope.pipelines import pipeline
+    >>> from modelscope.utils.constant import Tasks
+    >>> p = pipeline(
+    >>>    task=Tasks.speaker_verification, model='damo/speech_ecapa-tdnn_sv_en_voxceleb_16k')
+    >>> print(p([audio_1, audio_2]))
+
+    """
+
+    def __init__(self, model: InputModel, **kwargs):
+        """use `model` to create a speaker verification pipeline for prediction
+        Args:
+            model (str): a valid offical model id
+        """
+        super().__init__(model=model, **kwargs)
+        self.model_config = self.model.model_config
+        self.config = self.model.other_config
+        self.thr = self.config['yesOrno_thr']
+
+    def __call__(self,
+                 in_audios: List[str],
+                 thr: float = None) -> Dict[str, Any]:
+        if thr is not None:
+            self.thr = thr
+        if self.thr < -1 or self.thr > 1:
+            raise ValueError(
+                'modelscope error: the thr value should be in [-1, 1], but found to be %f.'
+                % self.thr)
+        outputs = self.preprocess(in_audios)
+        outputs = self.forward(outputs)
+        outputs = self.postprocess(outputs)
+
+        return outputs
+
+    def forward(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        emb1 = self.model(inputs['data1'])
+        emb2 = self.model(inputs['data2'])
+
+        return {'emb1': emb1, 'emb2': emb2}
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        score = self.compute_cos_similarity(inputs['emb1'], inputs['emb2'])
+        score = round(score, 5)
+        if score >= self.thr:
+            ans = 'yes'
+        else:
+            ans = 'no'
+
+        return {OutputKeys.SCORE: score, OutputKeys.TEXT: ans}
+
+    def preprocess(self, inputs: List[str],
+                   **preprocess_params) -> Dict[str, Any]:
+        if len(inputs) != 2:
+            raise ValueError(
+                'modelscope error: Two input audio files are required.')
+        output = {}
+        for i in range(len(inputs)):
+            if isinstance(inputs[i], str):
+                file_bytes = File.read(inputs[i])
+                data, fs = sf.read(io.BytesIO(file_bytes), dtype='float32')
+                if len(data.shape) == 2:
+                    data = data[:, 0]
+                if fs != self.model_config['sample_rate']:
+                    raise ValueError(
+                        'modelscope error: Only support %d sample rate files'
+                        % self.model_cfg['sample_rate'])
+                output['data%d' %
+                       (i + 1)] = torch.from_numpy(data).unsqueeze(0)
+            else:
+                raise ValueError(
+                    'modelscope error: The input type is temporarily restricted to audio file address'
+                    % i)
+        return output
+
+    def compute_cos_similarity(self, emb1: torch.Tensor,
+                               emb2: torch.Tensor) -> float:
+        assert len(emb1.shape) == 2 and len(emb2.shape) == 2
+        cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
+        cosine = cos(emb1, emb2)
+        return cosine.item()
diff --git a/modelscope/pipelines/audio/speaker_verification_pipeline.py b/modelscope/pipelines/audio/speaker_verification_pipeline.py
index 97e73627..e576885a 100644
--- a/modelscope/pipelines/audio/speaker_verification_pipeline.py
+++ b/modelscope/pipelines/audio/speaker_verification_pipeline.py
@@ -41,7 +41,10 @@ class SpeakerVerificationPipeline(Pipeline):
 
     """
 
-    def __init__(self, model: Union[Model, str] = None, **kwargs):
+    def __init__(self,
+                 model: Union[Model, str] = None,
+                 ngpu: int = 1,
+                 **kwargs):
         """use `model` to create an asr pipeline for prediction
         """
         super().__init__(model=model, **kwargs)
@@ -67,6 +70,7 @@ class SpeakerVerificationPipeline(Pipeline):
             embedding_node=self.cmd['embedding_node'],
             sv_threshold=self.cmd['sv_threshold'],
             param_dict=self.cmd['param_dict'],
+            **kwargs,
         )
 
     def __call__(self,
@@ -168,6 +172,7 @@ class SpeakerVerificationPipeline(Pipeline):
                     cmd[user_args].update(extra_args[user_args])
                 else:
                     cmd[user_args] = extra_args[user_args]
+                del extra_args[user_args]
 
         return cmd
 
diff --git a/modelscope/pipelines/audio/speaker_verification_rdino_pipeline.py b/modelscope/pipelines/audio/speaker_verification_rdino_pipeline.py
new file mode 100644
index 00000000..dd08ccf4
--- /dev/null
+++ b/modelscope/pipelines/audio/speaker_verification_rdino_pipeline.py
@@ -0,0 +1,110 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import io
+from typing import Any, Dict, List, Union
+
+import soundfile as sf
+import torch
+
+from modelscope.fileio import File
+from modelscope.metainfo import Pipelines
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines.base import InputModel, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(
+    Tasks.speaker_verification,
+    module_name=Pipelines.speaker_verification_rdino)
+class RDINO_Pipeline(Pipeline):
+    """Speaker Verification Inference Pipeline
+    use `model` to create a Speaker Verification pipeline.
+
+    Args:
+        model (SpeakerVerificationPipeline): A model instance, or a model local dir, or a model id in the model hub.
+        kwargs (dict, `optional`):
+            Extra kwargs passed into the pipeline's constructor.
+    Example:
+    >>> from modelscope.pipelines import pipeline
+    >>> from modelscope.utils.constant import Tasks
+    >>> p = pipeline(
+    >>>    task=Tasks.speaker_verification, model='damo/speech_ecapa-tdnn_sv_en_voxceleb_16k')
+    >>> print(p([audio_1, audio_2]))
+
+    """
+
+    def __init__(self, model: InputModel, **kwargs):
+        """use `model` to create a speaker verification pipeline for prediction
+        Args:
+            model (str): a valid offical model id
+        """
+        super().__init__(model=model, **kwargs)
+        self.model_config = self.model.model_config
+        self.config = self.model.other_config
+        self.thr = self.config['yesOrno_thr']
+
+    def __call__(self,
+                 in_audios: List[str],
+                 thr: float = None) -> Dict[str, Any]:
+        if thr is not None:
+            self.thr = thr
+        if self.thr < -1 or self.thr > 1:
+            raise ValueError(
+                'modelscope error: the thr value should be in [-1, 1], but found to be %f.'
+                % self.thr)
+        outputs = self.preprocess(in_audios)
+        outputs = self.forward(outputs)
+        outputs = self.postprocess(outputs)
+
+        return outputs
+
+    def forward(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        emb1 = self.model(inputs['data1'])
+        emb2 = self.model(inputs['data2'])
+
+        return {'emb1': emb1, 'emb2': emb2}
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        score = self.compute_cos_similarity(inputs['emb1'], inputs['emb2'])
+        score = round(score, 5)
+        if score >= self.thr:
+            ans = 'yes'
+        else:
+            ans = 'no'
+
+        return {OutputKeys.SCORE: score, OutputKeys.TEXT: ans}
+
+    def preprocess(self, inputs: List[str],
+                   **preprocess_params) -> Dict[str, Any]:
+        if len(inputs) != 2:
+            raise ValueError(
+                'modelscope error: Two input audio files are required.')
+        output = {}
+        for i in range(len(inputs)):
+            if isinstance(inputs[i], str):
+                file_bytes = File.read(inputs[i])
+                data, fs = sf.read(io.BytesIO(file_bytes), dtype='float32')
+                if len(data.shape) == 2:
+                    data = data[:, 0]
+                if fs != self.model_config['sample_rate']:
+                    raise ValueError(
+                        'modelscope error: Only support %d sample rate files'
+                        % self.model_cfg['sample_rate'])
+                output['data%d' %
+                       (i + 1)] = torch.from_numpy(data).unsqueeze(0)
+            else:
+                raise ValueError(
+                    'modelscope error: The input type is temporarily restricted to audio file address'
+                    % i)
+        return output
+
+    def compute_cos_similarity(self, emb1: torch.Tensor,
+                               emb2: torch.Tensor) -> float:
+        assert len(emb1.shape) == 2 and len(emb2.shape) == 2
+        cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
+        cosine = cos(emb1, emb2)
+        return cosine.item()
diff --git a/modelscope/pipelines/audio/timestamp_pipeline.py b/modelscope/pipelines/audio/timestamp_pipeline.py
index b60fef05..0968b359 100644
--- a/modelscope/pipelines/audio/timestamp_pipeline.py
+++ b/modelscope/pipelines/audio/timestamp_pipeline.py
@@ -40,7 +40,10 @@ class TimestampPipeline(Pipeline):
 
     """
 
-    def __init__(self, model: Union[Model, str] = None, **kwargs):
+    def __init__(self,
+                 model: Union[Model, str] = None,
+                 ngpu: int = 1,
+                 **kwargs):
         """
         Use `model` and `preprocessor` to create an asr pipeline for prediction
         Args:
@@ -84,7 +87,9 @@ class TimestampPipeline(Pipeline):
             allow_variable_data_keys=self.cmd['allow_variable_data_keys'],
             split_with_space=self.cmd['split_with_space'],
             seg_dict_file=self.cmd['seg_dict_file'],
-            param_dict=self.cmd['param_dict'])
+            param_dict=self.cmd['param_dict'],
+            **kwargs,
+        )
 
     def __call__(self,
                  audio_in: Union[str, bytes],
@@ -264,6 +269,7 @@ class TimestampPipeline(Pipeline):
         for user_args in user_args_dict:
             if user_args in extra_args and extra_args[user_args] is not None:
                 cmd[user_args] = extra_args[user_args]
+                del extra_args[user_args]
 
         return cmd
 
diff --git a/modelscope/pipelines/audio/voice_activity_detection_pipeline.py b/modelscope/pipelines/audio/voice_activity_detection_pipeline.py
index c1c6e01f..0121b242 100644
--- a/modelscope/pipelines/audio/voice_activity_detection_pipeline.py
+++ b/modelscope/pipelines/audio/voice_activity_detection_pipeline.py
@@ -41,7 +41,10 @@ class VoiceActivityDetectionPipeline(Pipeline):
 
     """
 
-    def __init__(self, model: Union[Model, str] = None, **kwargs):
+    def __init__(self,
+                 model: Union[Model, str] = None,
+                 ngpu: int = 1,
+                 **kwargs):
         """use `model` to create an vad pipeline for prediction
         """
         super().__init__(model=model, **kwargs)
@@ -60,7 +63,9 @@ class VoiceActivityDetectionPipeline(Pipeline):
             key_file=self.cmd['key_file'],
             vad_infer_config=self.cmd['vad_infer_config'],
             vad_model_file=self.cmd['vad_model_file'],
-            vad_cmvn_file=self.cmd['vad_cmvn_file'])
+            vad_cmvn_file=self.cmd['vad_cmvn_file'],
+            **kwargs,
+        )
 
     def __call__(self,
                  audio_in: Union[str, bytes],
@@ -209,6 +214,7 @@ class VoiceActivityDetectionPipeline(Pipeline):
         for user_args in user_args_dict:
             if user_args in extra_args and extra_args[user_args] is not None:
                 cmd[user_args] = extra_args[user_args]
+                del extra_args[user_args]
 
         return cmd
 
diff --git a/modelscope/pipelines/cv/__init__.py b/modelscope/pipelines/cv/__init__.py
index 54289644..e9d7a785 100644
--- a/modelscope/pipelines/cv/__init__.py
+++ b/modelscope/pipelines/cv/__init__.py
@@ -9,7 +9,6 @@ if TYPE_CHECKING:
     from .animal_recognition_pipeline import AnimalRecognitionPipeline
     from .body_2d_keypoints_pipeline import Body2DKeypointsPipeline
     from .body_3d_keypoints_pipeline import Body3DKeypointsPipeline
-    from .hand_2d_keypoints_pipeline import Hand2DKeypointsPipeline
     from .cmdssl_video_embedding_pipeline import CMDSSLVideoEmbeddingPipeline
     from .card_detection_pipeline import CardDetectionPipeline
     from .hicossl_video_embedding_pipeline import HICOSSLVideoEmbeddingPipeline
@@ -29,13 +28,10 @@ if TYPE_CHECKING:
     from .image_classification_pipeline import GeneralImageClassificationPipeline
     from .image_color_enhance_pipeline import ImageColorEnhancePipeline
     from .image_colorization_pipeline import ImageColorizationPipeline
-    from .image_classification_pipeline import ImageClassificationPipeline
     from .image_denoise_pipeline import ImageDenoisePipeline
     from .image_deblur_pipeline import ImageDeblurPipeline
     from .image_instance_segmentation_pipeline import ImageInstanceSegmentationPipeline
     from .image_matting_pipeline import ImageMattingPipeline
-    from .image_panoptic_segmentation_pipeline import ImagePanopticSegmentationPipeline
-    from .image_semantic_segmentation_pipeline import ImagePanopticSegmentationEasyCVPipeline
     from .image_portrait_enhancement_pipeline import ImagePortraitEnhancementPipeline
     from .image_reid_person_pipeline import ImageReidPersonPipeline
     from .image_semantic_segmentation_pipeline import ImageSemanticSegmentationPipeline
@@ -46,7 +42,6 @@ if TYPE_CHECKING:
     from .image_inpainting_pipeline import ImageInpaintingPipeline
     from .image_paintbyexample_pipeline import ImagePaintbyexamplePipeline
     from .product_retrieval_embedding_pipeline import ProductRetrievalEmbeddingPipeline
-    from .realtime_object_detection_pipeline import RealtimeObjectDetectionPipeline
     from .live_category_pipeline import LiveCategoryPipeline
     from .ocr_detection_pipeline import OCRDetectionPipeline
     from .ocr_recognition_pipeline import OCRRecognitionPipeline
@@ -59,10 +54,6 @@ if TYPE_CHECKING:
     from .video_category_pipeline import VideoCategoryPipeline
     from .virtual_try_on_pipeline import VirtualTryonPipeline
     from .shop_segmentation_pipleline import ShopSegmentationPipeline
-    from .easycv_pipelines import (EasyCVDetectionPipeline,
-                                   EasyCVSegmentationPipeline,
-                                   Face2DKeypointsPipeline,
-                                   HumanWholebodyKeypointsPipeline)
     from .text_driven_segmentation_pipleline import TextDrivenSegmentationPipeline
     from .movie_scene_segmentation_pipeline import MovieSceneSegmentationPipeline
     from .mog_face_detection_pipeline import MogFaceDetectionPipeline
@@ -123,7 +114,6 @@ else:
         'animal_recognition_pipeline': ['AnimalRecognitionPipeline'],
         'body_2d_keypoints_pipeline': ['Body2DKeypointsPipeline'],
         'body_3d_keypoints_pipeline': ['Body3DKeypointsPipeline'],
-        'hand_2d_keypoints_pipeline': ['Hand2DKeypointsPipeline'],
         'card_detection_pipeline': ['CardDetectionPipeline'],
         'cmdssl_video_embedding_pipeline': ['CMDSSLVideoEmbeddingPipeline'],
         'hicossl_video_embedding_pipeline': ['HICOSSLVideoEmbeddingPipeline'],
@@ -140,7 +130,7 @@ else:
         'face_recognition_onnx_fm_pipeline': ['FaceRecognitionOnnxFmPipeline'],
         'general_recognition_pipeline': ['GeneralRecognitionPipeline'],
         'image_classification_pipeline':
-        ['GeneralImageClassificationPipeline', 'ImageClassificationPipeline'],
+        ['GeneralImageClassificationPipeline'],
         'image_cartoon_pipeline': ['ImageCartoonPipeline'],
         'image_denoise_pipeline': ['ImageDenoisePipeline'],
         'image_deblur_pipeline': ['ImageDeblurPipeline'],
@@ -149,10 +139,6 @@ else:
         'image_instance_segmentation_pipeline':
         ['ImageInstanceSegmentationPipeline'],
         'image_matting_pipeline': ['ImageMattingPipeline'],
-        'image_panoptic_segmentation_pipeline': [
-            'ImagePanopticSegmentationPipeline',
-            'ImagePanopticSegmentationEasyCVPipeline'
-        ],
         'image_portrait_enhancement_pipeline':
         ['ImagePortraitEnhancementPipeline'],
         'image_reid_person_pipeline': ['ImageReidPersonPipeline'],
@@ -164,8 +150,6 @@ else:
         ['Image2ImageTranslationPipeline'],
         'product_retrieval_embedding_pipeline':
         ['ProductRetrievalEmbeddingPipeline'],
-        'realtime_object_detection_pipeline':
-        ['RealtimeObjectDetectionPipeline'],
         'live_category_pipeline': ['LiveCategoryPipeline'],
         'image_to_image_generate_pipeline': ['Image2ImageGenerationPipeline'],
         'image_inpainting_pipeline': ['ImageInpaintingPipeline'],
@@ -180,12 +164,6 @@ else:
         'video_category_pipeline': ['VideoCategoryPipeline'],
         'virtual_try_on_pipeline': ['VirtualTryonPipeline'],
         'shop_segmentation_pipleline': ['ShopSegmentationPipeline'],
-        'easycv_pipelines': [
-            'EasyCVDetectionPipeline',
-            'EasyCVSegmentationPipeline',
-            'Face2DKeypointsPipeline',
-            'HumanWholebodyKeypointsPipeline',
-        ],
         'text_driven_segmentation_pipleline':
         ['TextDrivenSegmentationPipeline'],
         'movie_scene_segmentation_pipeline':
@@ -202,9 +180,8 @@ else:
         ['FaceAttributeRecognitionPipeline'],
         'mtcnn_face_detection_pipeline': ['MtcnnFaceDetectionPipeline'],
         'hand_static_pipeline': ['HandStaticPipeline'],
-        'referring_video_object_segmentation_pipeline': [
-            'ReferringVideoObjectSegmentationPipeline'
-        ],
+        'referring_video_object_segmentation_pipeline':
+        ['ReferringVideoObjectSegmentationPipeline'],
         'language_guided_video_summarization_pipeline': [
             'LanguageGuidedVideoSummarizationPipeline'
         ],
diff --git a/modelscope/pipelines/cv/easycv_pipelines/__init__.py b/modelscope/pipelines/cv/easycv_pipelines/__init__.py
deleted file mode 100644
index e0209b85..00000000
--- a/modelscope/pipelines/cv/easycv_pipelines/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
-    from .detection_pipeline import EasyCVDetectionPipeline
-    from .segmentation_pipeline import EasyCVSegmentationPipeline
-    from .face_2d_keypoints_pipeline import Face2DKeypointsPipeline
-    from .human_wholebody_keypoint_pipeline import HumanWholebodyKeypointsPipeline
-else:
-    _import_structure = {
-        'detection_pipeline': ['EasyCVDetectionPipeline'],
-        'segmentation_pipeline': ['EasyCVSegmentationPipeline'],
-        'face_2d_keypoints_pipeline': ['Face2DKeypointsPipeline'],
-        'human_wholebody_keypoint_pipeline':
-        ['HumanWholebodyKeypointsPipeline'],
-    }
-
-    import sys
-
-    sys.modules[__name__] = LazyImportModule(
-        __name__,
-        globals()['__file__'],
-        _import_structure,
-        module_spec=__spec__,
-        extra_objects={},
-    )
diff --git a/modelscope/pipelines/cv/easycv_pipelines/base.py b/modelscope/pipelines/cv/easycv_pipelines/base.py
deleted file mode 100644
index 0a31be94..00000000
--- a/modelscope/pipelines/cv/easycv_pipelines/base.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import os.path as osp
-from typing import Any
-
-import numpy as np
-from easycv.utils.ms_utils import EasyCVMeta
-from PIL import ImageFile
-
-from modelscope.hub.snapshot_download import snapshot_download
-from modelscope.pipelines.util import is_official_hub_path
-from modelscope.utils.config import Config
-from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, Invoke,
-                                       ModelFile, ThirdParty)
-from modelscope.utils.device import create_device
-
-
-class EasyCVPipeline(object):
-    """Base pipeline for EasyCV.
-    Loading configuration file of modelscope style by default,
-    but it is actually use the predictor api of easycv to predict.
-    So here we do some adaptation work for configuration and predict api.
-    """
-
-    def __init__(self, model: str, model_file_pattern='*.pt', *args, **kwargs):
-        """
-            model (str): model id on modelscope hub or local model path.
-            model_file_pattern (str): model file pattern.
-
-        """
-        self.model_file_pattern = model_file_pattern
-
-        assert isinstance(model, str)
-        if osp.exists(model):
-            model_dir = model
-        else:
-            assert is_official_hub_path(
-                model), 'Only support local model path and official hub path!'
-            model_dir = snapshot_download(
-                model_id=model,
-                revision=DEFAULT_MODEL_REVISION,
-                user_agent={
-                    Invoke.KEY: Invoke.PIPELINE,
-                    ThirdParty.KEY: ThirdParty.EASYCV
-                })
-
-        assert osp.isdir(model_dir)
-        model_files = glob.glob(
-            os.path.join(model_dir, self.model_file_pattern))
-        assert len(
-            model_files
-        ) == 1, f'Need one model file, but find {len(model_files)}: {model_files}'
-
-        model_path = model_files[0]
-        self.model_path = model_path
-        self.model_dir = model_dir
-
-        # get configuration file from source model dir
-        self.config_file = os.path.join(model_dir, ModelFile.CONFIGURATION)
-        assert os.path.exists(
-            self.config_file
-        ), f'Not find "{ModelFile.CONFIGURATION}" in model directory!'
-
-        self.cfg = Config.from_file(self.config_file)
-        if 'device' in kwargs:
-            kwargs['device'] = create_device(kwargs['device'])
-        if 'predictor_config' in kwargs:
-            kwargs.pop('predictor_config')
-        self.predict_op = self._build_predict_op(**kwargs)
-
-    def _build_predict_op(self, **kwargs):
-        """Build EasyCV predictor."""
-        from easycv.predictors.builder import build_predictor
-
-        easycv_config = self._to_easycv_config()
-        pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, {
-            'model_path': self.model_path,
-            'config_file': easycv_config,
-            **kwargs
-        })
-        return pipeline_op
-
-    def _to_easycv_config(self):
-        """Adapt to EasyCV predictor."""
-        # TODO: refine config compatibility problems
-
-        easycv_arch = self.cfg.model.pop(EasyCVMeta.ARCH, None)
-        model_cfg = self.cfg.model
-        # Revert to the configuration of easycv
-        if easycv_arch is not None:
-            model_cfg.update(easycv_arch)
-
-        easycv_config = Config(dict(model=model_cfg))
-
-        reserved_keys = []
-        if hasattr(self.cfg, EasyCVMeta.META):
-            easycv_meta_cfg = getattr(self.cfg, EasyCVMeta.META)
-            reserved_keys = easycv_meta_cfg.get(EasyCVMeta.RESERVED_KEYS, [])
-            for key in reserved_keys:
-                easycv_config.merge_from_dict({key: getattr(self.cfg, key)})
-        if 'test_pipeline' not in reserved_keys:
-            easycv_config.merge_from_dict(
-                {'test_pipeline': self.cfg.dataset.val.get('pipeline', [])})
-
-        return easycv_config
-
-    def _is_single_inputs(self, inputs):
-        if isinstance(inputs, str) or (isinstance(inputs, list)
-                                       and len(inputs) == 1) or isinstance(
-                                           inputs, np.ndarray) or isinstance(
-                                               inputs, ImageFile.ImageFile):
-            return True
-
-        return False
-
-    def __call__(self, inputs) -> Any:
-        outputs = self.predict_op(inputs)
-
-        if self._is_single_inputs(inputs):
-            outputs = outputs[0]
-
-        return outputs
diff --git a/modelscope/pipelines/cv/easycv_pipelines/detection_pipeline.py b/modelscope/pipelines/cv/easycv_pipelines/detection_pipeline.py
deleted file mode 100644
index 2a95ebb4..00000000
--- a/modelscope/pipelines/cv/easycv_pipelines/detection_pipeline.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import Any
-
-from modelscope.metainfo import Pipelines
-from modelscope.outputs import OutputKeys
-from modelscope.pipelines.builder import PIPELINES
-from modelscope.utils.constant import ModelFile, Tasks
-from modelscope.utils.cv.image_utils import \
-    show_image_object_detection_auto_result
-from .base import EasyCVPipeline
-
-
-@PIPELINES.register_module(
-    Tasks.image_object_detection, module_name=Pipelines.easycv_detection)
-@PIPELINES.register_module(
-    Tasks.image_object_detection,
-    module_name=Pipelines.image_object_detection_auto)
-@PIPELINES.register_module(
-    Tasks.domain_specific_object_detection,
-    module_name=Pipelines.hand_detection)
-class EasyCVDetectionPipeline(EasyCVPipeline):
-    """Pipeline for easycv detection task."""
-
-    def __init__(self,
-                 model: str,
-                 model_file_pattern=ModelFile.TORCH_MODEL_FILE,
-                 *args,
-                 **kwargs):
-        """
-            model (str): model id on modelscope hub or local model path.
-            model_file_pattern (str): model file pattern.
-        """
-
-        super(EasyCVDetectionPipeline, self).__init__(
-            model=model,
-            model_file_pattern=model_file_pattern,
-            *args,
-            **kwargs)
-
-    def show_result(self, img_path, result, save_path=None):
-        show_image_object_detection_auto_result(img_path, result, save_path)
-
-    def __call__(self, inputs) -> Any:
-        outputs = self.predict_op(inputs)
-
-        scores = []
-        labels = []
-        boxes = []
-        for output in outputs:
-            for score, label, box in zip(output['detection_scores'],
-                                         output['detection_classes'],
-                                         output['detection_boxes']):
-                scores.append(score)
-                labels.append(self.cfg.CLASSES[label])
-                boxes.append([b for b in box])
-
-        results = [{
-            OutputKeys.SCORES: scores,
-            OutputKeys.LABELS: labels,
-            OutputKeys.BOXES: boxes
-        } for output in outputs]
-
-        if self._is_single_inputs(inputs):
-            results = results[0]
-
-        return results
diff --git a/modelscope/pipelines/cv/easycv_pipelines/face_2d_keypoints_pipeline.py b/modelscope/pipelines/cv/easycv_pipelines/face_2d_keypoints_pipeline.py
deleted file mode 100644
index 0ddc6a6c..00000000
--- a/modelscope/pipelines/cv/easycv_pipelines/face_2d_keypoints_pipeline.py
+++ /dev/null
@@ -1,244 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import copy
-import math
-from typing import Any
-
-import cv2
-import numpy as np
-
-from modelscope.metainfo import Pipelines
-from modelscope.outputs import OutputKeys
-from modelscope.pipelines import pipeline
-from modelscope.pipelines.builder import PIPELINES
-from modelscope.preprocessors import LoadImage
-from modelscope.utils.constant import ModelFile, Tasks
-from modelscope.utils.logger import get_logger
-from .base import EasyCVPipeline
-
-logger = get_logger()
-
-
-@PIPELINES.register_module(
-    Tasks.face_2d_keypoints, module_name=Pipelines.face_2d_keypoints)
-class Face2DKeypointsPipeline(EasyCVPipeline):
-    """Pipeline for face 2d keypoints detection."""
-
-    def __init__(self,
-                 model: str,
-                 model_file_pattern=ModelFile.TORCH_MODEL_FILE,
-                 *args,
-                 **kwargs):
-        """
-            model (str): model id on modelscope hub or local model path.
-            model_file_pattern (str): model file pattern.
-        """
-
-        super(Face2DKeypointsPipeline, self).__init__(
-            model=model,
-            model_file_pattern=model_file_pattern,
-            *args,
-            **kwargs)
-
-        # face detect pipeline
-        det_model_id = 'damo/cv_resnet_facedetection_scrfd10gkps'
-        self.face_detection = pipeline(
-            Tasks.face_detection, model=det_model_id)
-
-    def show_result(self, img, points, scale=2, save_path=None):
-        return self.predict_op.show_result(img, points, scale, save_path)
-
-    def _choose_face(self, det_result, min_face=10):
-        """
-        choose face with maximum area
-        Args:
-            det_result: output of face detection pipeline
-            min_face: minimum size of valid face w/h
-        """
-        bboxes = np.array(det_result[OutputKeys.BOXES])
-        landmarks = np.array(det_result[OutputKeys.KEYPOINTS])
-        if bboxes.shape[0] == 0:
-            logger.warning('No face detected!')
-            return None
-        # face idx with enough size
-        face_idx = []
-        for i in range(bboxes.shape[0]):
-            box = bboxes[i]
-            if (box[2] - box[0]) >= min_face and (box[3] - box[1]) >= min_face:
-                face_idx += [i]
-        if len(face_idx) == 0:
-            logger.warning(
-                f'Face size not enough, less than {min_face}x{min_face}!')
-            return None
-        bboxes = bboxes[face_idx]
-        landmarks = landmarks[face_idx]
-
-        return bboxes, landmarks
-
-    def expend_box(self, box, w, h, scalex=0.3, scaley=0.5):
-        x1 = box[0]
-        y1 = box[1]
-        wb = box[2] - x1
-        hb = box[3] - y1
-        deltax = int(wb * scalex)
-        deltay1 = int(hb * scaley)
-        deltay2 = int(hb * scalex)
-        x1 = x1 - deltax
-        y1 = y1 - deltay1
-        if x1 < 0:
-            deltax = deltax + x1
-            x1 = 0
-        if y1 < 0:
-            deltay1 = deltay1 + y1
-            y1 = 0
-        x2 = x1 + wb + 2 * deltax
-        y2 = y1 + hb + deltay1 + deltay2
-        x2 = np.clip(x2, 0, w - 1)
-        y2 = np.clip(y2, 0, h - 1)
-        return [x1, y1, x2, y2]
-
-    def rotate_point(self, angle, center, landmark):
-        rad = angle * np.pi / 180.0
-        alpha = np.cos(rad)
-        beta = np.sin(rad)
-        M = np.zeros((2, 3), dtype=np.float32)
-        M[0, 0] = alpha
-        M[0, 1] = beta
-        M[0, 2] = (1 - alpha) * center[0] - beta * center[1]
-        M[1, 0] = -beta
-        M[1, 1] = alpha
-        M[1, 2] = beta * center[0] + (1 - alpha) * center[1]
-
-        landmark_ = np.asarray([(M[0, 0] * x + M[0, 1] * y + M[0, 2],
-                                 M[1, 0] * x + M[1, 1] * y + M[1, 2])
-                                for (x, y) in landmark])
-        return M, landmark_
-
-    def rotate_crop_img(self, img, pts, M):
-        imgT = cv2.warpAffine(img, M, (int(img.shape[1]), int(img.shape[0])))
-
-        x1 = pts[5][0]
-        x2 = pts[5][0]
-        y1 = pts[5][1]
-        y2 = pts[5][1]
-        for i in range(0, 9):
-            x1 = min(x1, pts[i][0])
-            x2 = max(x2, pts[i][0])
-            y1 = min(y1, pts[i][1])
-            y2 = max(y2, pts[i][1])
-
-        height, width, _ = imgT.shape
-        x1 = min(max(0, int(x1)), width)
-        y1 = min(max(0, int(y1)), height)
-        x2 = min(max(0, int(x2)), width)
-        y2 = min(max(0, int(y2)), height)
-        sub_imgT = imgT[y1:y2, x1:x2]
-
-        return sub_imgT, imgT, [x1, y1, x2, y2]
-
-    def crop_img(self, imgT, pts):
-        enlarge_ratio = 1.1
-
-        x1 = np.min(pts[:, 0])
-        x2 = np.max(pts[:, 0])
-        y1 = np.min(pts[:, 1])
-        y2 = np.max(pts[:, 1])
-        w = x2 - x1 + 1
-        h = y2 - y1 + 1
-        x1 = int(x1 - (enlarge_ratio - 1.0) / 2.0 * w)
-        y1 = int(y1 - (enlarge_ratio - 1.0) / 2.0 * h)
-        x1 = max(0, x1)
-        y1 = max(0, y1)
-
-        new_w = int(enlarge_ratio * w)
-        new_h = int(enlarge_ratio * h)
-        new_x1 = x1
-        new_y1 = y1
-        new_x2 = new_x1 + new_w
-        new_y2 = new_y1 + new_h
-
-        height, width, _ = imgT.shape
-
-        new_x1 = min(max(0, new_x1), width)
-        new_y1 = min(max(0, new_y1), height)
-        new_x2 = max(min(width, new_x2), 0)
-        new_y2 = max(min(height, new_y2), 0)
-
-        sub_imgT = imgT[new_y1:new_y2, new_x1:new_x2]
-
-        return sub_imgT, [new_x1, new_y1, new_x2, new_y2]
-
-    def __call__(self, inputs) -> Any:
-        img = LoadImage.convert_to_ndarray(inputs)
-        h, w, c = img.shape
-        img_rgb = copy.deepcopy(img)
-        img_rgb = img_rgb[:, :, ::-1]
-        det_result = self.face_detection(img_rgb)
-
-        bboxes = np.array(det_result[OutputKeys.BOXES])
-        if bboxes.shape[0] == 0:
-            logger.warning('No face detected!')
-            results = {
-                OutputKeys.KEYPOINTS: [],
-                OutputKeys.POSES: [],
-                OutputKeys.BOXES: []
-            }
-            return results
-
-        boxes, keypoints = self._choose_face(det_result)
-
-        output_boxes = []
-        output_keypoints = []
-        output_poses = []
-        for index, box_ori in enumerate(boxes):
-            box = self.expend_box(box_ori, w, h, scalex=0.1, scaley=0.1)
-            y0 = int(box[1])
-            y1 = int(box[3])
-            x0 = int(box[0])
-            x1 = int(box[2])
-            sub_img = img[y0:y1, x0:x1]
-
-            keypoint = keypoints[index]
-            pts = [[keypoint[0], keypoint[1]], [keypoint[2], keypoint[3]],
-                   [keypoint[4], keypoint[5]], [keypoint[6], keypoint[7]],
-                   [keypoint[8], keypoint[9]], [box[0], box[1]],
-                   [box[2], box[1]], [box[0], box[3]], [box[2], box[3]]]
-            # radian
-            angle = math.atan2((pts[1][1] - pts[0][1]),
-                               (pts[1][0] - pts[0][0]))
-            # angle
-            theta = angle * (180 / np.pi)
-
-            center = [w // 2, h // 2]
-            cx, cy = center
-            M, landmark_ = self.rotate_point(theta, (cx, cy), pts)
-            sub_imgT, imgT, bbox = self.rotate_crop_img(img, landmark_, M)
-
-            outputs = self.predict_op([sub_imgT])[0]
-            tmp_keypoints = outputs['point']
-
-            for idx in range(0, len(tmp_keypoints)):
-                tmp_keypoints[idx][0] += bbox[0]
-                tmp_keypoints[idx][1] += bbox[1]
-
-            for idx in range(0, 6):
-                sub_img, bbox = self.crop_img(imgT, tmp_keypoints)
-                outputs = self.predict_op([sub_img])[0]
-                tmp_keypoints = outputs['point']
-                for idx in range(0, len(tmp_keypoints)):
-                    tmp_keypoints[idx][0] += bbox[0]
-                    tmp_keypoints[idx][1] += bbox[1]
-
-            M2, tmp_keypoints = self.rotate_point(-theta, (cx, cy),
-                                                  tmp_keypoints)
-
-            output_keypoints.append(np.array(tmp_keypoints))
-            output_poses.append(np.array(outputs['pose']))
-            output_boxes.append(np.array(box_ori))
-
-        results = {
-            OutputKeys.KEYPOINTS: output_keypoints,
-            OutputKeys.POSES: output_poses,
-            OutputKeys.BOXES: output_boxes
-        }
-
-        return results
diff --git a/modelscope/pipelines/cv/easycv_pipelines/human_wholebody_keypoint_pipeline.py b/modelscope/pipelines/cv/easycv_pipelines/human_wholebody_keypoint_pipeline.py
deleted file mode 100644
index 903c4106..00000000
--- a/modelscope/pipelines/cv/easycv_pipelines/human_wholebody_keypoint_pipeline.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import os.path
-from typing import Any
-
-from modelscope.metainfo import Pipelines
-from modelscope.outputs import OutputKeys
-from modelscope.pipelines.builder import PIPELINES
-from modelscope.utils.constant import ModelFile, Tasks
-from .base import EasyCVPipeline
-
-
-@PIPELINES.register_module(
-    Tasks.human_wholebody_keypoint,
-    module_name=Pipelines.human_wholebody_keypoint)
-class HumanWholebodyKeypointsPipeline(EasyCVPipeline):
-    """Pipeline for human wholebody 2d keypoints detection."""
-
-    def __init__(self,
-                 model: str,
-                 model_file_pattern=ModelFile.TORCH_MODEL_FILE,
-                 *args,
-                 **kwargs):
-        """
-            model (str): model id on modelscope hub or local model path.
-            model_file_pattern (str): model file pattern.
-        """
-        super(HumanWholebodyKeypointsPipeline, self).__init__(
-            model=model,
-            model_file_pattern=model_file_pattern,
-            *args,
-            **kwargs)
-
-    def _build_predict_op(self, **kwargs):
-        """Build EasyCV predictor."""
-        from easycv.predictors.builder import build_predictor
-        detection_predictor_type = self.cfg['DETECTION']['type']
-        detection_model_path = os.path.join(
-            self.model_dir, self.cfg['DETECTION']['model_path'])
-        detection_cfg_file = os.path.join(self.model_dir,
-                                          self.cfg['DETECTION']['config_file'])
-        detection_score_threshold = self.cfg['DETECTION']['score_threshold']
-        self.cfg.pipeline.predictor_config[
-            'detection_predictor_config'] = dict(
-                type=detection_predictor_type,
-                model_path=detection_model_path,
-                config_file=detection_cfg_file,
-                score_threshold=detection_score_threshold)
-        easycv_config = self._to_easycv_config()
-        pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, {
-            'model_path': self.model_path,
-            'config_file': easycv_config,
-            **kwargs
-        })
-        return pipeline_op
-
-    def __call__(self, inputs) -> Any:
-        outputs = self.predict_op(inputs)
-
-        results = [{
-            OutputKeys.KEYPOINTS: output['keypoints'],
-            OutputKeys.BOXES: output['boxes']
-        } for output in outputs]
-
-        if self._is_single_inputs(inputs):
-            results = results[0]
-
-        return results
diff --git a/modelscope/pipelines/cv/easycv_pipelines/segmentation_pipeline.py b/modelscope/pipelines/cv/easycv_pipelines/segmentation_pipeline.py
deleted file mode 100644
index bd09fc9b..00000000
--- a/modelscope/pipelines/cv/easycv_pipelines/segmentation_pipeline.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import Any
-
-import numpy as np
-
-from modelscope.metainfo import Pipelines
-from modelscope.outputs import OutputKeys
-from modelscope.pipelines.builder import PIPELINES
-from modelscope.utils.constant import Tasks
-from .base import EasyCVPipeline
-
-
-@PIPELINES.register_module(
-    Tasks.image_segmentation, module_name=Pipelines.easycv_segmentation)
-class EasyCVSegmentationPipeline(EasyCVPipeline):
-    """Pipeline for easycv segmentation task."""
-
-    def __init__(self, model: str, model_file_pattern='*.pt', *args, **kwargs):
-        """
-            model (str): model id on modelscope hub or local model path.
-            model_file_pattern (str): model file pattern.
-        """
-
-        super(EasyCVSegmentationPipeline, self).__init__(
-            model=model,
-            model_file_pattern=model_file_pattern,
-            *args,
-            **kwargs)
-
-    def __call__(self, inputs) -> Any:
-        outputs = self.predict_op(inputs)
-
-        semantic_result = outputs[0]['seg_pred']
-
-        ids = np.unique(semantic_result)[::-1]
-        legal_indices = ids != len(self.predict_op.CLASSES)  # for VOID label
-        ids = ids[legal_indices]
-        segms = (semantic_result[None] == ids[:, None, None])
-        masks = [it.astype(np.int) for it in segms]
-        labels_txt = np.array(self.predict_op.CLASSES)[ids].tolist()
-
-        results = {
-            OutputKeys.MASKS: masks,
-            OutputKeys.LABELS: labels_txt,
-            OutputKeys.SCORES: [0.999 for _ in range(len(labels_txt))]
-        }
-        return results
diff --git a/modelscope/pipelines/cv/face_reconstruction_pipeline.py b/modelscope/pipelines/cv/face_reconstruction_pipeline.py
index f8240fc0..b9a8e320 100644
--- a/modelscope/pipelines/cv/face_reconstruction_pipeline.py
+++ b/modelscope/pipelines/cv/face_reconstruction_pipeline.py
@@ -134,7 +134,7 @@ class FaceReconstructionPipeline(Pipeline):
         img = LoadImage.convert_to_ndarray(input)
         if len(img.shape) == 2:
             img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-        img = img.astype(np.float)
+        img = img.astype(float)
         result = {'img': img}
         return result
 
diff --git a/modelscope/pipelines/cv/fast_instance_segmentation_pipeline.py b/modelscope/pipelines/cv/fast_instance_segmentation_pipeline.py
new file mode 100644
index 00000000..6ee341de
--- /dev/null
+++ b/modelscope/pipelines/cv/fast_instance_segmentation_pipeline.py
@@ -0,0 +1,116 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import Any, Dict, Optional, Union
+
+import numpy as np
+import torch
+import torchvision.transforms as T
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.image_instance_segmentation import FastInst
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(
+    Tasks.image_segmentation, module_name=Pipelines.fast_instance_segmentation)
+class FastInstanceSegmentationPipeline(Pipeline):
+
+    def __init__(self,
+                 model: Union[FastInst, str],
+                 preprocessor: Optional = None,
+                 **kwargs):
+        r"""The inference pipeline for fastinst models.
+
+        The model outputs a dict with keys of `scores`, `labels`, and `masks`.
+
+        Args:
+            model (`str` or `Model` or module instance): A model instance or a model local dir
+                or a model id in the model hub.
+            preprocessor (`Preprocessor`, `optional`): A Preprocessor instance.
+            kwargs (dict, `optional`):
+                Extra kwargs passed into the preprocessor's constructor.
+
+        Examples:
+            >>> from modelscope.outputs import OutputKeys
+            >>> from modelscope.pipelines import pipeline
+            >>> pipeline_ins = pipeline('image-segmentation',
+                model='damo/cv_resnet50_fast-instance-segmentation_coco')
+            >>> input_img = 'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/image_instance_segmentation.jpg'
+            >>> print(pipeline_ins(input_img)[OutputKeys.LABELS])
+        """
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        self.model.eval()
+
+    def _get_preprocess_shape(self, oldh, oldw, short_edge_length, max_size):
+        h, w = oldh, oldw
+        size = short_edge_length * 1.0
+        scale = size / min(h, w)
+        if h < w:
+            newh, neww = size, scale * w
+        else:
+            newh, neww = scale * h, size
+        if max(newh, neww) > max_size:
+            scale = max_size * 1.0 / max(newh, neww)
+            newh = newh * scale
+            neww = neww * scale
+        neww = int(neww + 0.5)
+        newh = int(newh + 0.5)
+        return (newh, neww)
+
+    def preprocess(self,
+                   input: Input,
+                   min_size=640,
+                   max_size=1333) -> Dict[str, Any]:
+        image = LoadImage.convert_to_img(input)
+        w, h = image.size[:2]
+        dataset_dict = {'width': w, 'height': h}
+        new_h, new_w = self._get_preprocess_shape(h, w, min_size, max_size)
+        test_transforms = T.Compose([
+            T.Resize((new_h, new_w)),
+            T.ToTensor(),
+        ])
+        image = test_transforms(image)
+        dataset_dict['image'] = image * 255.
+        result = {'batched_inputs': [dataset_dict]}
+        return result
+
+    def forward(self, input: Dict[str, Any],
+                **forward_params) -> Dict[str, Any]:
+        with torch.no_grad():
+            output = self.model(**input)
+        return output
+
+    def postprocess(self,
+                    inputs: Dict[str, Any],
+                    score_thr=0.5) -> Dict[str, Any]:
+        predictions = inputs['eval_result'][0]['instances']
+        scores = predictions['scores'].detach().cpu().numpy()
+        pred_masks = predictions['pred_masks'].detach().cpu().numpy()
+        pred_classes = predictions['pred_classes'].detach().cpu().numpy()
+
+        thresholded_idxs = np.array(scores) >= score_thr
+        scores = scores[thresholded_idxs]
+        pred_classes = pred_classes[thresholded_idxs]
+        pred_masks = pred_masks[thresholded_idxs]
+
+        results_dict = {
+            OutputKeys.MASKS: [],
+            OutputKeys.LABELS: [],
+            OutputKeys.SCORES: []
+        }
+        for score, cls, mask in zip(scores, pred_classes, pred_masks):
+            score = np.float64(score)
+            label = self.model.classes[int(cls)]
+            mask = np.array(mask, dtype=np.float64)
+
+            results_dict[OutputKeys.SCORES].append(score)
+            results_dict[OutputKeys.LABELS].append(label)
+            results_dict[OutputKeys.MASKS].append(mask)
+
+        return results_dict
diff --git a/modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py b/modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py
deleted file mode 100644
index 63281e80..00000000
--- a/modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import os.path
-
-from modelscope.metainfo import Pipelines
-from modelscope.pipelines.builder import PIPELINES
-from modelscope.utils.constant import ModelFile, Tasks
-from .easycv_pipelines.base import EasyCVPipeline
-
-
-@PIPELINES.register_module(
-    Tasks.hand_2d_keypoints, module_name=Pipelines.hand_2d_keypoints)
-class Hand2DKeypointsPipeline(EasyCVPipeline):
-    """Pipeline for hand pose keypoint task."""
-
-    def __init__(self,
-                 model: str,
-                 model_file_pattern=ModelFile.TORCH_MODEL_FILE,
-                 *args,
-                 **kwargs):
-        """
-            model (str): model id on modelscope hub or local model path.
-            model_file_pattern (str): model file pattern.
-        """
-        super(Hand2DKeypointsPipeline, self).__init__(
-            model=model,
-            model_file_pattern=model_file_pattern,
-            *args,
-            **kwargs)
-
-    def _build_predict_op(self, **kwargs):
-        """Build EasyCV predictor."""
-        from easycv.predictors.builder import build_predictor
-        detection_predictor_type = self.cfg['DETECTION']['type']
-        detection_model_path = os.path.join(
-            self.model_dir, self.cfg['DETECTION']['model_path'])
-        detection_cfg_file = os.path.join(self.model_dir,
-                                          self.cfg['DETECTION']['config_file'])
-        detection_score_threshold = self.cfg['DETECTION']['score_threshold']
-        self.cfg.pipeline.predictor_config[
-            'detection_predictor_config'] = dict(
-                type=detection_predictor_type,
-                model_path=detection_model_path,
-                config_file=detection_cfg_file,
-                score_threshold=detection_score_threshold)
-        easycv_config = self._to_easycv_config()
-        pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, {
-            'model_path': self.model_path,
-            'config_file': easycv_config,
-            **kwargs
-        })
-        return pipeline_op
diff --git a/modelscope/pipelines/cv/image_detection_pipeline.py b/modelscope/pipelines/cv/image_detection_pipeline.py
index 86963c37..2b8275c2 100644
--- a/modelscope/pipelines/cv/image_detection_pipeline.py
+++ b/modelscope/pipelines/cv/image_detection_pipeline.py
@@ -30,7 +30,7 @@ class ImageDetectionPipeline(Pipeline):
     def preprocess(self, input: Input) -> Dict[str, Any]:
 
         img = LoadImage.convert_to_ndarray(input)
-        img = img.astype(np.float)
+        img = img.astype(np.float64)
         img = self.model.preprocess(img)
         result = {'img': img}
         return result
diff --git a/modelscope/pipelines/cv/image_matting_pipeline.py b/modelscope/pipelines/cv/image_matting_pipeline.py
index 5f5d1d56..bee655c5 100644
--- a/modelscope/pipelines/cv/image_matting_pipeline.py
+++ b/modelscope/pipelines/cv/image_matting_pipeline.py
@@ -53,7 +53,7 @@ class ImageMattingPipeline(Pipeline):
 
     def preprocess(self, input: Input) -> Dict[str, Any]:
         img = LoadImage.convert_to_ndarray(input)
-        img = img.astype(np.float)
+        img = img.astype(float)
         result = {'img': img}
         return result
 
diff --git a/modelscope/pipelines/cv/image_panoptic_segmentation_pipeline.py b/modelscope/pipelines/cv/image_panoptic_segmentation_pipeline.py
deleted file mode 100644
index fe941d9f..00000000
--- a/modelscope/pipelines/cv/image_panoptic_segmentation_pipeline.py
+++ /dev/null
@@ -1,135 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import Any, Dict, Union
-
-import cv2
-import numpy as np
-import PIL
-import torch
-
-from modelscope.metainfo import Pipelines
-from modelscope.outputs import OutputKeys
-from modelscope.pipelines.base import Input, Pipeline
-from modelscope.pipelines.builder import PIPELINES
-from modelscope.pipelines.cv.easycv_pipelines.base import EasyCVPipeline
-from modelscope.preprocessors import load_image
-from modelscope.utils.constant import Tasks
-from modelscope.utils.logger import get_logger
-
-logger = get_logger()
-
-
-@PIPELINES.register_module(
-    Tasks.image_segmentation,
-    module_name=Pipelines.image_panoptic_segmentation)
-class ImagePanopticSegmentationPipeline(Pipeline):
-
-    def __init__(self, model: str, **kwargs):
-        """
-        use `model` to create a image panoptic segmentation pipeline for prediction
-        Args:
-            model: model id on modelscope hub.
-        """
-        super().__init__(model=model, **kwargs)
-
-        logger.info('panoptic segmentation model, pipeline init')
-
-    def preprocess(self, input: Input) -> Dict[str, Any]:
-        from mmdet.datasets.pipelines import Compose
-        from mmcv.parallel import collate, scatter
-        from mmdet.datasets import replace_ImageToTensor
-
-        cfg = self.model.cfg
-        # build the data pipeline
-
-        if isinstance(input, str):
-            cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
-            img = np.array(load_image(input))
-            img = img[:, :, ::-1]  # convert to bgr
-        elif isinstance(input, PIL.Image.Image):
-            cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
-            img = np.array(input.convert('RGB'))
-        elif isinstance(input, np.ndarray):
-            cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
-            if len(input.shape) == 2:
-                img = cv2.cvtColor(input, cv2.COLOR_GRAY2BGR)
-            else:
-                img = input
-        else:
-            raise TypeError(f'input should be either str, PIL.Image,'
-                            f' np.array, but got {type(input)}')
-
-        # collect data
-        data = dict(img=img)
-        cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
-        test_pipeline = Compose(cfg.data.test.pipeline)
-
-        data = test_pipeline(data)
-        # copy from mmdet_model collect data
-        data = collate([data], samples_per_gpu=1)
-        data['img_metas'] = [
-            img_metas.data[0] for img_metas in data['img_metas']
-        ]
-        data['img'] = [img.data[0] for img in data['img']]
-        if next(self.model.parameters()).is_cuda:
-            # scatter to specified GPU
-            data = scatter(data, [next(self.model.parameters()).device])[0]
-
-        return data
-
-    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
-        results = self.model.inference(input)
-
-        return results
-
-    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
-        # bz=1, tcguo
-        pan_results = inputs[0]['pan_results']
-        INSTANCE_OFFSET = 1000
-
-        ids = np.unique(pan_results)[::-1]
-        legal_indices = ids != self.model.num_classes  # for VOID label
-        ids = ids[legal_indices]
-        labels = np.array([id % INSTANCE_OFFSET for id in ids], dtype=np.int64)
-        segms = (pan_results[None] == ids[:, None, None])
-        masks = [it.astype(np.int) for it in segms]
-        labels_txt = np.array(self.model.CLASSES)[labels].tolist()
-
-        outputs = {
-            OutputKeys.MASKS: masks,
-            OutputKeys.LABELS: labels_txt,
-            OutputKeys.SCORES: [0.999 for _ in range(len(labels_txt))]
-        }
-        return outputs
-
-
-@PIPELINES.register_module(
-    Tasks.image_segmentation,
-    module_name=Pipelines.image_panoptic_segmentation_easycv)
-class ImagePanopticSegmentationEasyCVPipeline(EasyCVPipeline):
-    """Pipeline built upon easycv for image segmentation."""
-
-    def __init__(self, model: str, model_file_pattern='*.pt', *args, **kwargs):
-        """
-            model (str): model id on modelscope hub or local model path.
-            model_file_pattern (str): model file pattern.
-        """
-        super(ImagePanopticSegmentationEasyCVPipeline, self).__init__(
-            model=model,
-            model_file_pattern=model_file_pattern,
-            *args,
-            **kwargs)
-
-    def __call__(self, inputs) -> Any:
-        outputs = self.predict_op(inputs)
-        easycv_results = outputs[0]
-
-        results = {
-            OutputKeys.MASKS:
-            easycv_results[OutputKeys.MASKS],
-            OutputKeys.LABELS:
-            easycv_results[OutputKeys.LABELS],
-            OutputKeys.SCORES:
-            [0.999 for _ in range(len(easycv_results[OutputKeys.LABELS]))]
-        }
-
-        return results
diff --git a/modelscope/pipelines/cv/image_style_transfer_pipeline.py b/modelscope/pipelines/cv/image_style_transfer_pipeline.py
index e5fd0d48..49a0bff0 100644
--- a/modelscope/pipelines/cv/image_style_transfer_pipeline.py
+++ b/modelscope/pipelines/cv/image_style_transfer_pipeline.py
@@ -73,12 +73,12 @@ class ImageStyleTransferPipeline(Pipeline):
         content = LoadImage.convert_to_ndarray(content)
         if len(content.shape) == 2:
             content = cv2.cvtColor(content, cv2.COLOR_GRAY2BGR)
-        content_img = content.astype(np.float)
+        content_img = content.astype(float)
 
         style_img = LoadImage.convert_to_ndarray(style)
         if len(style_img.shape) == 2:
             style_img = cv2.cvtColor(style_img, cv2.COLOR_GRAY2BGR)
-        style_img = style_img.astype(np.float)
+        style_img = style_img.astype(float)
 
         result = {'content': content_img, 'style': style_img}
         return result
diff --git a/modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py b/modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py
index 3fffc546..3cef5c28 100644
--- a/modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py
+++ b/modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py
@@ -43,23 +43,32 @@ class MovieSceneSegmentationPipeline(Pipeline):
         """
         self.input_video_pth = input
         if isinstance(input, str):
-            shot_feat, sid = self.model.preprocess(input)
+            self.shot2keyf, self.anno, self.shot_timecode_lst, self.shot_idx_lst = self.model.preprocess(
+                input)
         else:
             raise TypeError(f'input should be a str,'
                             f'  but got {type(input)}')
 
-        result = {'sid': sid, 'shot_feat': shot_feat}
+        result = {
+            'shot_timecode_lst': self.shot_timecode_lst,
+            'shot_idx_lst': self.shot_idx_lst
+        }
 
-        return result
+        with torch.no_grad():
+            output = self.model.inference(result)
+
+        return output
 
     def forward(self, input: Dict[str, Any],
                 **forward_params) -> Dict[str, Any]:
-        with torch.no_grad():
-            output = self.model.inference(input)
-        return output
+        return input
 
     def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
-        data = {'input_video_pth': self.input_video_pth, 'feat': inputs}
+        data = {
+            'input_video_pth': self.input_video_pth,
+            'feat': inputs,
+            'shot2keyf': self.shot2keyf
+        }
         scene_num, scene_meta_lst, shot_num, shot_meta_lst = self.model.postprocess(
             data)
         result = {
diff --git a/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py b/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py
index 39195bcd..123057f5 100644
--- a/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py
+++ b/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py
@@ -225,7 +225,7 @@ class ReferringVideoObjectSegmentationPipeline(Pipeline):
 def apply_mask(image, mask, color, transparency=0.7):
     mask = mask[..., np.newaxis].repeat(repeats=3, axis=2)
     mask = mask * transparency
-    color_matrix = np.ones(image.shape, dtype=np.float) * color
+    color_matrix = np.ones(image.shape, dtype=np.float64) * color
     out_image = color_matrix * mask + image * (1.0 - mask)
     return out_image
 
diff --git a/modelscope/pipelines/cv/skin_retouching_pipeline.py b/modelscope/pipelines/cv/skin_retouching_pipeline.py
index b2b5f4ca..da9b912f 100644
--- a/modelscope/pipelines/cv/skin_retouching_pipeline.py
+++ b/modelscope/pipelines/cv/skin_retouching_pipeline.py
@@ -105,7 +105,7 @@ class SkinRetouchingPipeline(Pipeline):
         img = LoadImage.convert_to_ndarray(input)
         if len(img.shape) == 2:
             img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-        img = img.astype(np.float)
+        img = img.astype(float)
         result = {'img': img}
         return result
 
diff --git a/modelscope/pipelines/cv/tbs_detection_pipeline.py b/modelscope/pipelines/cv/tbs_detection_pipeline.py
index 58831846..8bbac9c8 100644
--- a/modelscope/pipelines/cv/tbs_detection_pipeline.py
+++ b/modelscope/pipelines/cv/tbs_detection_pipeline.py
@@ -116,7 +116,7 @@ class TBSDetectionPipeline(Pipeline):
             - **labels** (`List[str]`, optional) -- The boxes's class_names of detected object in image.
         """
         img = LoadImage.convert_to_ndarray(input)
-        img = img.astype(np.float)
+        img = img.astype(float)
         result = {'img': img, 'img_path': input}
         return result
 
diff --git a/modelscope/pipelines/multi_modal/__init__.py b/modelscope/pipelines/multi_modal/__init__.py
index 2e496952..b28e9a71 100644
--- a/modelscope/pipelines/multi_modal/__init__.py
+++ b/modelscope/pipelines/multi_modal/__init__.py
@@ -21,6 +21,7 @@ if TYPE_CHECKING:
     from .diffusers_wrapped import StableDiffusionWrapperPipeline, ChineseStableDiffusionPipeline
     from .soonet_video_temporal_grounding_pipeline import SOONetVideoTemporalGroundingPipeline
     from .text_to_video_synthesis_pipeline import TextToVideoSynthesisPipeline
+    from .multimodal_dialogue_pipeline import MultimodalDialoguePipeline
 else:
     _import_structure = {
         'image_captioning_pipeline': ['ImageCaptioningPipeline'],
@@ -45,6 +46,7 @@ else:
         'soonet_video_temporal_grounding_pipeline':
         ['SOONetVideoTemporalGroundingPipeline'],
         'text_to_video_synthesis_pipeline': ['TextToVideoSynthesisPipeline'],
+        'multimodal_dialogue_pipeline': ['MultimodalDialoguePipeline']
     }
 
     import sys
diff --git a/modelscope/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py b/modelscope/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py
index d1e3a2ae..ce0455b6 100644
--- a/modelscope/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py
+++ b/modelscope/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py
@@ -39,10 +39,10 @@ class DiffusersPipeline(Pipeline):
         self.models = [self.model]
         self.has_multiple_models = len(self.models) > 1
 
-    def preprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+    def preprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]:
         return inputs
 
-    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+    def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]:
         return inputs
 
     def __call__(self, input: Union[Input, List[Input]], *args,
diff --git a/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py b/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py
index d1627962..539fd4ba 100644
--- a/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py
+++ b/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py
@@ -46,7 +46,9 @@ class ChineseStableDiffusionPipeline(DiffusersPipeline):
 
         torch_dtype = kwargs.get('torch_dtype', torch.float32)
         self.pipeline = _DiffuersChineseStableDiffusionPipeline.from_pretrained(
-            model, torch_dtype=torch_dtype).to(self.device)
+            model, torch_dtype=torch_dtype)
+        self.pipeline.text_encoder.pooler = None
+        self.pipeline.to(self.device)
 
     def forward(self, inputs: Dict[str, Any],
                 **forward_params) -> Dict[str, Any]:
@@ -73,7 +75,7 @@ class ChineseStableDiffusionPipeline(DiffusersPipeline):
             callback=inputs.get('callback'),
             callback_steps=inputs.get('callback_steps', 1))
 
-    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+    def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]:
         images = []
         for img in inputs.images:
             if isinstance(img, Image.Image):
diff --git a/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py b/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py
index b6d9d3bd..49b4ef37 100644
--- a/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py
+++ b/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py
@@ -65,7 +65,7 @@ class StableDiffusionWrapperPipeline(DiffusersPipeline):
             callback=inputs.get('callback'),
             callback_steps=inputs.get('callback_steps', 1))
 
-    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+    def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]:
         images = []
         for img in inputs.images:
             if isinstance(img, Image.Image):
diff --git a/modelscope/pipelines/multi_modal/image_captioning_pipeline.py b/modelscope/pipelines/multi_modal/image_captioning_pipeline.py
index fbab88fd..17b850da 100644
--- a/modelscope/pipelines/multi_modal/image_captioning_pipeline.py
+++ b/modelscope/pipelines/multi_modal/image_captioning_pipeline.py
@@ -1,15 +1,18 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from typing import Any, Dict, Optional, Union
 
+import numpy as np
 import torch
 
 from modelscope.metainfo import Pipelines
-from modelscope.models.multi_modal import MPlugForAllTasks, OfaForAllTasks
+from modelscope.models.multi_modal import (CLIP_Interrogator, MPlugForAllTasks,
+                                           OfaForAllTasks)
 from modelscope.pipelines.base import Model, Pipeline
 from modelscope.pipelines.builder import PIPELINES
 from modelscope.pipelines.util import batch_process
-from modelscope.preprocessors import (MPlugPreprocessor, OfaPreprocessor,
-                                      Preprocessor)
+from modelscope.preprocessors import (
+    ImageCaptioningClipInterrogatorPreprocessor, MPlugPreprocessor,
+    OfaPreprocessor, Preprocessor, load_image)
 from modelscope.utils.constant import ModelFile, Tasks
 from modelscope.utils.logger import get_logger
 
@@ -28,6 +31,17 @@ class ImageCaptioningPipeline(Pipeline):
         use `model` and `preprocessor` to create a image captioning pipeline for prediction
         Args:
             model: model id on modelscope hub.
+        Examples:
+        from modelscope.pipelines import pipeline
+        from modelscope.utils.constant import Tasks
+
+        model_id = 'damo/cv_clip-interrogator'
+        input_image = "test.png"
+
+        pipeline_ci = pipeline(Tasks.image_captioning, model=model_id)
+        print(pipeline_ci(input_image))
+
+
         """
         super().__init__(model=model, preprocessor=preprocessor, **kwargs)
         self.model.eval()
@@ -39,6 +53,9 @@ class ImageCaptioningPipeline(Pipeline):
                 self.preprocessor = OfaPreprocessor(self.model.model_dir)
             elif isinstance(self.model, MPlugForAllTasks):
                 self.preprocessor = MPlugPreprocessor(self.model.model_dir)
+            elif isinstance(self.model, CLIP_Interrogator):
+                self.preprocessor = ImageCaptioningClipInterrogatorPreprocessor(
+                )
 
     def _batch(self, data):
         if isinstance(self.model, OfaForAllTasks):
diff --git a/modelscope/pipelines/multi_modal/multimodal_dialogue_pipeline.py b/modelscope/pipelines/multi_modal/multimodal_dialogue_pipeline.py
new file mode 100644
index 00000000..31df19fc
--- /dev/null
+++ b/modelscope/pipelines/multi_modal/multimodal_dialogue_pipeline.py
@@ -0,0 +1,90 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import Any, Dict, Optional, Union
+
+import torch
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.multi_modal import MplugOwlForConditionalGeneration
+from modelscope.outputs import OutputKeys, TokenGeneratorOutput
+from modelscope.pipelines.base import Model, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import MplugOwlPreprocessor, Preprocessor
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(
+    Tasks.multimodal_dialogue, module_name=Pipelines.multimodal_dialogue)
+class MultimodalDialoguePipeline(Pipeline):
+    r""" Multimodal Dialogue Pipeline.
+
+    Examples:
+    >>> from modelscope.pipelines import pipeline
+    >>> chatbot = pipeline('multimodal-dialogue', 'damo/multi-modal_mplug_owl_multimodal-dialogue_7b')
+    >>> image = 'data/resource/portrait_input.png'
+    >>> system_prompt_1 = 'The following is a conversation between a curious human and AI assistant.'
+    >>> system_prompt_2 = "The assistant gives helpful, detailed, and polite answers to the user's questions."
+    >>> messages = {
+    >>>       'messages': [
+    >>>            {
+    >>>                'role': 'system',
+    >>>                'content': system_prompt_1 + ' ' + system_prompt_2
+    >>>            },
+    >>>            {
+    >>>                'role': 'user',
+    >>>                'content': [{
+    >>>                    'image': image
+    >>>                }]
+    >>>            },
+    >>>            {
+    >>>                'role': 'user',
+    >>>                'content': 'Describe the facial expression of the man.'
+    >>>            },
+    >>>        ]
+    >>>    }
+    >>> chatbot(messages)
+    >>> {
+    >>>     "text": he is angry.
+    >>> }
+    >>>
+    """
+
+    def __init__(self,
+                 model: Union[Model, str],
+                 preprocessor: Optional[Preprocessor] = None,
+                 **kwargs):
+        """
+        use `model` and `preprocessor` to create a multimodal dialogue pipeline for prediction
+        Args:
+            model: model id on modelscope hub.
+        """
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        self.model.eval()
+        if preprocessor is None:
+            if isinstance(self.model, MplugOwlForConditionalGeneration):
+                self.preprocessor = MplugOwlPreprocessor(self.model.model_dir)
+
+    def forward(self, inputs: Dict[str, Any],
+                **forward_params) -> Dict[str, Any]:
+        """
+        the `forward_params` can be the generation configurations listed in transformers library.
+        """
+        with torch.no_grad():
+            return super().forward(inputs, **forward_params)
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
+        """process the prediction results
+
+        Args:
+            inputs (Dict[str, Any]): _description_
+
+        Returns:
+            Dict[str, str]: the prediction results
+        """
+        if isinstance(self.model, MplugOwlForConditionalGeneration):
+            output = self.preprocessor.tokenizer.decode(
+                inputs[0], skip_special_tokens=True)
+            inputs = {OutputKeys.TEXT: output}
+        return inputs
diff --git a/modelscope/pipelines/multi_modal/text_to_video_synthesis_pipeline.py b/modelscope/pipelines/multi_modal/text_to_video_synthesis_pipeline.py
index ee6635a6..50e2437b 100644
--- a/modelscope/pipelines/multi_modal/text_to_video_synthesis_pipeline.py
+++ b/modelscope/pipelines/multi_modal/text_to_video_synthesis_pipeline.py
@@ -1,5 +1,6 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
+import os
 import tempfile
 from typing import Any, Dict, Optional
 
@@ -62,8 +63,10 @@ class TextToVideoSynthesisPipeline(Pipeline):
                     **post_params) -> Dict[str, Any]:
         video = tensor2vid(inputs['video'])
         output_video_path = post_params.get('output_video', None)
+        temp_video_file = False
         if output_video_path is None:
             output_video_path = tempfile.NamedTemporaryFile(suffix='.mp4').name
+            temp_video_file = True
 
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         h, w, c = video[0].shape
@@ -72,7 +75,15 @@ class TextToVideoSynthesisPipeline(Pipeline):
         for i in range(len(video)):
             img = cv2.cvtColor(video[i], cv2.COLOR_RGB2BGR)
             video_writer.write(img)
-        return {OutputKeys.OUTPUT_VIDEO: output_video_path}
+        video_writer.release()
+        if temp_video_file:
+            video_file_content = b''
+            with open(output_video_path, 'rb') as f:
+                video_file_content = f.read()
+            os.remove(output_video_path)
+            return {OutputKeys.OUTPUT_VIDEO: video_file_content}
+        else:
+            return {OutputKeys.OUTPUT_VIDEO: output_video_path}
 
 
 def tensor2vid(video, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
diff --git a/modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py b/modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py
index fa7b23b8..a0e75638 100644
--- a/modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py
+++ b/modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py
@@ -41,7 +41,8 @@ class DialogIntentPredictionPipeline(Pipeline):
             config_file=config_file,
             device=device,
             auto_collate=auto_collate,
-            **kwargs)
+            compile=kwargs.pop('compile', False),
+            compile_options=kwargs.pop('compile_options', {}))
         if preprocessor is None:
             self.preprocessor = DialogIntentPredictionPreprocessor(
                 self.model.model_dir, **kwargs)
diff --git a/modelscope/pipelines/nlp/document_grounded_dialog_generate_pipeline.py b/modelscope/pipelines/nlp/document_grounded_dialog_generate_pipeline.py
index 8c773dfe..dfcd95e6 100644
--- a/modelscope/pipelines/nlp/document_grounded_dialog_generate_pipeline.py
+++ b/modelscope/pipelines/nlp/document_grounded_dialog_generate_pipeline.py
@@ -47,7 +47,8 @@ class DocumentGroundedDialogGeneratePipeline(Pipeline):
             config_file=config_file,
             device=device,
             auto_collate=auto_collate,
-            **kwargs)
+            compile=kwargs.pop('compile', False),
+            compile_options=kwargs.pop('compile_options', {}))
 
         if preprocessor is None:
             self.preprocessor = DocumentGroundedDialogGeneratePreprocessor(
diff --git a/modelscope/pipelines/nlp/document_grounded_dialog_rerank_pipeline.py b/modelscope/pipelines/nlp/document_grounded_dialog_rerank_pipeline.py
index 8fdef380..29993594 100644
--- a/modelscope/pipelines/nlp/document_grounded_dialog_rerank_pipeline.py
+++ b/modelscope/pipelines/nlp/document_grounded_dialog_rerank_pipeline.py
@@ -65,7 +65,8 @@ class DocumentGroundedDialogRerankPipeline(Pipeline):
             device=device,
             auto_collate=auto_collate,
             seed=seed,
-            **kwarg)
+            compile=kwarg.pop('compile', False),
+            compile_options=kwarg.pop('compile_options', {}))
         self.model = model
         self.preprocessor = preprocessor
         self.device = device
diff --git a/modelscope/pipelines/nlp/document_grounded_dialog_retrieval_pipeline.py b/modelscope/pipelines/nlp/document_grounded_dialog_retrieval_pipeline.py
index c3fb1a32..31890a73 100644
--- a/modelscope/pipelines/nlp/document_grounded_dialog_retrieval_pipeline.py
+++ b/modelscope/pipelines/nlp/document_grounded_dialog_retrieval_pipeline.py
@@ -56,7 +56,8 @@ class DocumentGroundedDialogRetrievalPipeline(Pipeline):
             config_file=config_file,
             device=device,
             auto_collate=auto_collate,
-            **kwargs)
+            compile=kwargs.pop('compile', False),
+            compile_options=kwargs.pop('compile_options', {}))
 
         if preprocessor is None:
             self.preprocessor = DocumentGroundedDialogRetrievalPreprocessor(
diff --git a/modelscope/pipelines/nlp/document_segmentation_pipeline.py b/modelscope/pipelines/nlp/document_segmentation_pipeline.py
index 6e195ed0..d528eee0 100644
--- a/modelscope/pipelines/nlp/document_segmentation_pipeline.py
+++ b/modelscope/pipelines/nlp/document_segmentation_pipeline.py
@@ -6,11 +6,9 @@ from typing import Any, Dict, List, Union
 import numpy as np
 import torch
 from datasets import Dataset
-from transformers.models.bert.modeling_bert import BertConfig
 
 from modelscope.metainfo import Pipelines
 from modelscope.models import Model
-from modelscope.models.nlp.ponet.configuration import PoNetConfig
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines.base import Pipeline, Tensor
 from modelscope.pipelines.builder import PIPELINES
@@ -51,11 +49,9 @@ class DocumentSegmentationPipeline(Pipeline):
             auto_collate=auto_collate,
             **kwargs)
 
-        kwargs = kwargs
-        if 'compile' in kwargs.keys():
-            kwargs.pop('compile')
-        if 'compile_options' in kwargs.keys():
-            kwargs.pop('compile_options')
+        kwargs.pop('compile', None)
+        kwargs.pop('compile_options', None)
+
         self.model_dir = self.model.model_dir
         self.model_cfg = self.model.model_cfg
         if preprocessor is None:
diff --git a/modelscope/pipelines/nlp/extractive_summarization_pipeline.py b/modelscope/pipelines/nlp/extractive_summarization_pipeline.py
index c01f28fc..a4e67607 100644
--- a/modelscope/pipelines/nlp/extractive_summarization_pipeline.py
+++ b/modelscope/pipelines/nlp/extractive_summarization_pipeline.py
@@ -44,11 +44,8 @@ class ExtractiveSummarizationPipeline(Pipeline):
             auto_collate=auto_collate,
             **kwargs)
 
-        kwargs = kwargs
-        if 'compile' in kwargs.keys():
-            kwargs.pop('compile')
-        if 'compile_options' in kwargs.keys():
-            kwargs.pop('compile_options')
+        kwargs.pop('compile', None)
+        kwargs.pop('compile_options', None)
 
         self.model_dir = self.model.model_dir
         self.model_cfg = self.model.model_cfg
diff --git a/modelscope/pipelines/nlp/feature_extraction_pipeline.py b/modelscope/pipelines/nlp/feature_extraction_pipeline.py
index 0f6979ba..c82db03c 100644
--- a/modelscope/pipelines/nlp/feature_extraction_pipeline.py
+++ b/modelscope/pipelines/nlp/feature_extraction_pipeline.py
@@ -54,7 +54,8 @@ class FeatureExtractionPipeline(Pipeline):
             config_file=config_file,
             device=device,
             auto_collate=auto_collate,
-            **kwargs)
+            compile=kwargs.pop('compile', False),
+            compile_options=kwargs.pop('compile_options', {}))
 
         assert isinstance(self.model, Model), \
             f'please check whether model config exists in {ModelFile.CONFIGURATION}'
diff --git a/modelscope/pipelines/nlp/fill_mask_pipeline.py b/modelscope/pipelines/nlp/fill_mask_pipeline.py
index 6bc7622f..7b034786 100644
--- a/modelscope/pipelines/nlp/fill_mask_pipeline.py
+++ b/modelscope/pipelines/nlp/fill_mask_pipeline.py
@@ -63,7 +63,8 @@ class FillMaskPipeline(Pipeline):
             config_file=config_file,
             device=device,
             auto_collate=auto_collate,
-            **kwargs)
+            compile=kwargs.pop('compile', False),
+            compile_options=kwargs.pop('compile_options', {}))
 
         assert isinstance(self.model, Model), \
             f'please check whether model config exists in {ModelFile.CONFIGURATION}'
diff --git a/modelscope/pipelines/nlp/named_entity_recognition_pipeline.py b/modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
index 2cf30037..d035802b 100644
--- a/modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
+++ b/modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
@@ -56,7 +56,8 @@ class NamedEntityRecognitionPipeline(TokenClassificationPipeline):
             config_file=config_file,
             device=device,
             auto_collate=auto_collate,
-            **kwargs)
+            compile=kwargs.pop('compile', False),
+            compile_options=kwargs.pop('compile_options', {}))
 
         assert isinstance(self.model, Model), \
             f'please check whether model config exists in {ModelFile.CONFIGURATION}'
diff --git a/modelscope/pipelines/nlp/sentence_embedding_pipeline.py b/modelscope/pipelines/nlp/sentence_embedding_pipeline.py
index 4e01397d..9d5cc80f 100644
--- a/modelscope/pipelines/nlp/sentence_embedding_pipeline.py
+++ b/modelscope/pipelines/nlp/sentence_embedding_pipeline.py
@@ -43,7 +43,8 @@ class SentenceEmbeddingPipeline(Pipeline):
             config_file=config_file,
             device=device,
             auto_collate=auto_collate,
-            **kwargs)
+            compile=kwargs.pop('compile', False),
+            compile_options=kwargs.pop('compile_options', {}))
 
         assert isinstance(self.model, Model), \
             f'please check whether model config exists in {ModelFile.CONFIGURATION}'
diff --git a/modelscope/pipelines/nlp/siamese_uie_pipeline.py b/modelscope/pipelines/nlp/siamese_uie_pipeline.py
index cdbd9119..d548d2e8 100644
--- a/modelscope/pipelines/nlp/siamese_uie_pipeline.py
+++ b/modelscope/pipelines/nlp/siamese_uie_pipeline.py
@@ -21,7 +21,7 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines.base import Pipeline
 from modelscope.pipelines.builder import PIPELINES
 from modelscope.preprocessors import Preprocessor, SiameseUiePreprocessor
-from modelscope.utils.constant import Tasks
+from modelscope.utils.constant import ModelFile, Tasks
 
 Input = Union[str, tuple, MsDataset, 'Image.Image', 'numpy.ndarray']
 
@@ -68,7 +68,8 @@ class SiameseUiePipeline(Pipeline):
             config_file=config_file,
             device=device,
             auto_collate=auto_collate,
-            **kwargs)
+            compile=kwargs.pop('compile', False),
+            compile_options=kwargs.pop('compile_options', {}))
 
         assert isinstance(self.model, Model), \
             f'please check whether model config exists in {ModelFile.CONFIGURATION}'
diff --git a/modelscope/pipelines/nlp/table_question_answering_pipeline.py b/modelscope/pipelines/nlp/table_question_answering_pipeline.py
index 0472ecb8..7c064f57 100644
--- a/modelscope/pipelines/nlp/table_question_answering_pipeline.py
+++ b/modelscope/pipelines/nlp/table_question_answering_pipeline.py
@@ -52,7 +52,8 @@ class TableQuestionAnsweringPipeline(Pipeline):
             config_file=config_file,
             device=device,
             auto_collate=auto_collate,
-            **kwargs)
+            compile=kwargs.pop('compile', False),
+            compile_options=kwargs.pop('compile_options', {}))
 
         assert isinstance(self.model, Model), \
             f'please check whether model config exists in {ModelFile.CONFIGURATION}'
@@ -402,7 +403,7 @@ class TableQuestionAnsweringPipeline(Pipeline):
             OutputKeys.SQL_STRING: sql.string,
             OutputKeys.SQL_QUERY: sql.query,
             OutputKeys.HISTORY: result['sql'],
-            OutputKeys.QUERT_RESULT: tabledata,
+            OutputKeys.QUERY_RESULT: tabledata,
         }
 
         return {OutputKeys.OUTPUT: output}
diff --git a/modelscope/pipelines/nlp/text_classification_pipeline.py b/modelscope/pipelines/nlp/text_classification_pipeline.py
index a300b008..3b06f435 100644
--- a/modelscope/pipelines/nlp/text_classification_pipeline.py
+++ b/modelscope/pipelines/nlp/text_classification_pipeline.py
@@ -76,7 +76,7 @@ class TextClassificationPipeline(Pipeline):
                     field=Fields.multi_modal,
                     **kwargs)
             else:
-                first_sequence = kwargs.pop('first_sequence', 'first_sequence')
+                first_sequence = kwargs.pop('first_sequence', 'text')
                 second_sequence = kwargs.pop('second_sequence', None)
                 sequence_length = kwargs.pop('sequence_length', 512)
                 self.preprocessor = Preprocessor.from_pretrained(
diff --git a/modelscope/pipelines/nlp/text_generation_pipeline.py b/modelscope/pipelines/nlp/text_generation_pipeline.py
index 2b851dc4..d1aa5ff6 100644
--- a/modelscope/pipelines/nlp/text_generation_pipeline.py
+++ b/modelscope/pipelines/nlp/text_generation_pipeline.py
@@ -59,7 +59,8 @@ class TextGenerationPipeline(Pipeline):
             config_file=config_file,
             device=device,
             auto_collate=auto_collate,
-            **kwargs)
+            compile=kwargs.pop('compile', False),
+            compile_options=kwargs.pop('compile_options', {}))
 
         assert isinstance(self.model, Model), \
             f'please check whether model config exists in {ModelFile.CONFIGURATION}'
diff --git a/modelscope/pipelines/nlp/text_ranking_pipeline.py b/modelscope/pipelines/nlp/text_ranking_pipeline.py
index a42baaa2..7539634e 100644
--- a/modelscope/pipelines/nlp/text_ranking_pipeline.py
+++ b/modelscope/pipelines/nlp/text_ranking_pipeline.py
@@ -44,7 +44,8 @@ class TextRankingPipeline(Pipeline):
             config_file=config_file,
             device=device,
             auto_collate=auto_collate,
-            **kwargs)
+            compile=kwargs.pop('compile', False),
+            compile_options=kwargs.pop('compile_options', {}))
 
         assert isinstance(self.model, Model), \
             f'please check whether model config exists in {ModelFile.CONFIGURATION}'
diff --git a/modelscope/pipelines/nlp/token_classification_pipeline.py b/modelscope/pipelines/nlp/token_classification_pipeline.py
index daa4823c..9fd8e325 100644
--- a/modelscope/pipelines/nlp/token_classification_pipeline.py
+++ b/modelscope/pipelines/nlp/token_classification_pipeline.py
@@ -51,7 +51,9 @@ class TokenClassificationPipeline(Pipeline):
             preprocessor=preprocessor,
             config_file=config_file,
             device=device,
-            auto_collate=auto_collate)
+            auto_collate=auto_collate,
+            compile=kwargs.pop('compile', False),
+            compile_options=kwargs.pop('compile_options', {}))
 
         assert isinstance(self.model, Model), \
             f'please check whether model config exists in {ModelFile.CONFIGURATION}'
diff --git a/modelscope/pipelines/nlp/translation_evaluation_pipeline.py b/modelscope/pipelines/nlp/translation_evaluation_pipeline.py
index 8a339517..4450aad7 100644
--- a/modelscope/pipelines/nlp/translation_evaluation_pipeline.py
+++ b/modelscope/pipelines/nlp/translation_evaluation_pipeline.py
@@ -9,12 +9,11 @@ import torch
 
 from modelscope.metainfo import Pipelines
 from modelscope.models.base import Model
-from modelscope.models.nlp.unite.configuration_unite import EvaluationMode
+from modelscope.models.nlp.unite.configuration import InputFormat
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines.base import InputModel, Pipeline
 from modelscope.pipelines.builder import PIPELINES
-from modelscope.preprocessors import (Preprocessor,
-                                      TranslationEvaluationPreprocessor)
+from modelscope.preprocessors import Preprocessor
 from modelscope.utils.config import Config
 from modelscope.utils.constant import ModelFile, Tasks
 from modelscope.utils.logger import get_logger
@@ -31,16 +30,18 @@ class TranslationEvaluationPipeline(Pipeline):
     def __init__(self,
                  model: InputModel,
                  preprocessor: Optional[Preprocessor] = None,
-                 eval_mode: EvaluationMode = EvaluationMode.SRC_REF,
+                 input_format: InputFormat = InputFormat.SRC_REF,
                  device: str = 'gpu',
                  **kwargs):
-        r"""Build a translation pipeline with a model dir or a model id in the model hub.
+        r"""Build a translation evaluation pipeline with a model dir or a model id in the model hub.
 
         Args:
             model: A Model instance.
-            eval_mode: Evaluation mode, choosing one from `"EvaluationMode.SRC_REF"`,
-                `"EvaluationMode.SRC"`, `"EvaluationMode.REF"`. Aside from hypothesis, the
+            preprocessor: The preprocessor for this pipeline.
+            input_format: Input format, choosing one from `"InputFormat.SRC_REF"`,
+                `"InputFormat.SRC"`, `"InputFormat.REF"`. Aside from hypothesis, the
                 source/reference/source+reference can be presented during evaluation.
+            device: Used device for this pipeline.
         """
         super().__init__(
             model=model,
@@ -48,44 +49,40 @@ class TranslationEvaluationPipeline(Pipeline):
             compile=kwargs.pop('compile', False),
             compile_options=kwargs.pop('compile_options', {}))
 
-        self.eval_mode = eval_mode
-        self.checking_eval_mode()
+        self.input_format = input_format
+        self.checking_input_format()
         assert isinstance(self.model, Model), \
             f'please check whether model config exists in {ModelFile.CONFIGURATION}'
 
-        self.preprocessor = TranslationEvaluationPreprocessor(
-            self.model.model_dir,
-            self.eval_mode) if preprocessor is None else preprocessor
-
         self.model.load_checkpoint(
             osp.join(self.model.model_dir, ModelFile.TORCH_MODEL_BIN_FILE),
-            self.device)
+            device=self.device,
+            plm_only=False)
         self.model.eval()
 
         return
 
-    def checking_eval_mode(self):
-        if self.eval_mode == EvaluationMode.SRC:
+    def checking_input_format(self):
+        if self.input_format == InputFormat.SRC:
             logger.info('Evaluation mode: source-only')
-        elif self.eval_mode == EvaluationMode.REF:
+        elif self.input_format == InputFormat.REF:
             logger.info('Evaluation mode: reference-only')
-        elif self.eval_mode == EvaluationMode.SRC_REF:
+        elif self.input_format == InputFormat.SRC_REF:
             logger.info('Evaluation mode: source-reference-combined')
         else:
-            raise ValueError(
-                'Evaluation mode should be one choice among'
-                '\'EvaluationMode.SRC\', \'EvaluationMode.REF\', and'
-                '\'EvaluationMode.SRC_REF\'.')
+            raise ValueError('Evaluation mode should be one choice among'
+                             '\'InputFormat.SRC\', \'InputFormat.REF\', and'
+                             '\'InputFormat.SRC_REF\'.')
 
-    def change_eval_mode(self,
-                         eval_mode: EvaluationMode = EvaluationMode.SRC_REF):
+    def change_input_format(self,
+                            input_format: InputFormat = InputFormat.SRC_REF):
         logger.info('Changing the evaluation mode.')
-        self.eval_mode = eval_mode
-        self.checking_eval_mode()
-        self.preprocessor.eval_mode = eval_mode
+        self.input_format = input_format
+        self.checking_input_format()
+        self.preprocessor.change_input_format(input_format)
         return
 
-    def __call__(self, input: Dict[str, Union[str, List[str]]], **kwargs):
+    def __call__(self, input_dict: Dict[str, Union[str, List[str]]], **kwargs):
         r"""Implementation of __call__ function.
 
         Args:
@@ -108,12 +105,12 @@ class TranslationEvaluationPipeline(Pipeline):
                 }
                 ```
         """
-        return super().__call__(input=input, **kwargs)
+        return super().__call__(input=input_dict, **kwargs)
 
-    def forward(self,
-                input_ids: List[torch.Tensor]) -> Dict[str, torch.Tensor]:
-        return self.model(input_ids)
+    def forward(
+            self, input_dict: Dict[str,
+                                   torch.Tensor]) -> Dict[str, torch.Tensor]:
+        return self.model(**input_dict)
 
     def postprocess(self, output: torch.Tensor) -> Dict[str, Any]:
-        result = {OutputKeys.SCORES: output.cpu().tolist()}
-        return result
+        return output
diff --git a/modelscope/pipelines/nlp/user_satisfaction_estimation_pipeline.py b/modelscope/pipelines/nlp/user_satisfaction_estimation_pipeline.py
index 76fcd7a8..197a941f 100644
--- a/modelscope/pipelines/nlp/user_satisfaction_estimation_pipeline.py
+++ b/modelscope/pipelines/nlp/user_satisfaction_estimation_pipeline.py
@@ -51,7 +51,8 @@ class UserSatisfactionEstimationPipeline(Pipeline):
             config_file=config_file,
             device=device,
             auto_collate=auto_collate,
-            **kwargs)
+            compile=kwargs.pop('compile', False),
+            compile_options=kwargs.pop('compile_options', {}))
 
         if hasattr(self.preprocessor, 'id2label'):
             self.id2label = self.preprocessor.id2label
diff --git a/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py b/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
index 9cd27adc..18ba40c8 100644
--- a/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
+++ b/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
@@ -67,7 +67,8 @@ class ZeroShotClassificationPipeline(Pipeline):
             config_file=config_file,
             device=device,
             auto_collate=auto_collate,
-            **kwargs)
+            compile=kwargs.pop('compile', False),
+            compile_options=kwargs.pop('compile_options', {}))
         self.entailment_id = 0
         self.contradiction_id = 2
 
diff --git a/modelscope/pipelines/pipeline_template.py b/modelscope/pipelines/pipeline_template.py
new file mode 100644
index 00000000..a29ce5d7
--- /dev/null
+++ b/modelscope/pipelines/pipeline_template.py
@@ -0,0 +1,87 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from typing import Any, Dict
+
+import numpy as np
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.base.base_model import Model
+from modelscope.outputs.outputs import OutputKeys
+from modelscope.pipelines.base import Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.utils.constant import Tasks
+
+__all__ = ['PipelineTemplate']
+
+
+@PIPELINES.register_module(
+    Tasks.task_template, module_name=Pipelines.pipeline_template)
+class PipelineTemplate(Pipeline):
+    """A pipeline template explain how to define parameters and input and
+       output information. As a rule, the first parameter is the input,
+       followed by the request parameters. The parameter must add type
+       hint information, and set the default value if necessary,
+       for the convenience of use.
+    """
+
+    def __init__(self, model: Model, **kwargs):
+        """A pipeline template to describe input and
+        output and parameter processing
+
+        Args:
+            model: A Model instance.
+        """
+        # call base init.
+        super().__init__(model=model, **kwargs)
+
+    def preprocess(self,
+                   input: Any,
+                   max_length: int = 1024,
+                   top_p: float = 0.8) -> Any:
+        """Pipeline preprocess interface.
+
+        Args:
+            input (Any): The pipeline input, ref Tasks.task_template TASK_INPUTS.
+            max_length (int, optional): The max_length parameter. Defaults to 1024.
+            top_p (float, optional): The top_p parameter. Defaults to 0.8.
+
+        Returns:
+            Any: Return result process by forward.
+        """
+        pass
+
+    def forward(self,
+                input: Any,
+                max_length: int = 1024,
+                top_p: float = 0.8) -> Any:
+        """The forward interface.
+
+        Args:
+            input (Any): The output of the preprocess.
+            max_length (int, optional): max_length. Defaults to 1024.
+            top_p (float, optional): top_p. Defaults to 0.8.
+
+        Returns:
+            Any: Return result process by postprocess.
+        """
+        pass
+
+    def postprocess(self,
+                    inputs: Any,
+                    postprocess_param1: str = None) -> Dict[str, Any]:
+        """The postprocess interface.
+
+        Args:
+            input (Any): The output of the forward.
+            max_length (int, optional): max_length. Defaults to 1024.
+            top_p (float, optional): top_p. Defaults to 0.8.
+
+        Returns:
+            Any: Return result process by postprocess.
+        """
+        result = {
+            OutputKeys.BOXES: np.zeros(4),
+            OutputKeys.OUTPUT_IMG: np.zeros(10, 4),
+            OutputKeys.TEXT_EMBEDDING: np.zeros(1, 1000)
+        }
+        return result
diff --git a/modelscope/preprocessors/__init__.py b/modelscope/preprocessors/__init__.py
index a35f130a..dbcb0813 100644
--- a/modelscope/preprocessors/__init__.py
+++ b/modelscope/preprocessors/__init__.py
@@ -20,7 +20,8 @@ if TYPE_CHECKING:
     from .tts import KanttsDataPreprocessor
     from .multi_modal import (DiffusionImageGenerationPreprocessor,
                               OfaPreprocessor, MPlugPreprocessor,
-                              HiTeAPreprocessor)
+                              HiTeAPreprocessor, MplugOwlPreprocessor,
+                              ImageCaptioningClipInterrogatorPreprocessor)
     from .nlp import (
         DocumentSegmentationTransformersPreprocessor,
         FaqQuestionAnsweringTransformersPreprocessor,
@@ -34,16 +35,16 @@ if TYPE_CHECKING:
         TextErrorCorrectionPreprocessor, TextGenerationT5Preprocessor,
         WordAlignmentPreprocessor, TextGenerationTransformersPreprocessor,
         Tokenize, WordSegmentationBlankSetToLabelPreprocessor,
-        CodeGeeXPreprocessor, MGLMSummarizationPreprocessor,
+        MGLMSummarizationPreprocessor,
         ZeroShotClassificationTransformersPreprocessor,
         TextGenerationJiebaPreprocessor, SentencePiecePreprocessor,
         DialogIntentPredictionPreprocessor, DialogModelingPreprocessor,
         DialogStateTrackingPreprocessor, ConversationalTextToSqlPreprocessor,
         TableQuestionAnsweringPreprocessor, NERPreprocessorViet,
         NERPreprocessorThai, WordSegmentationPreprocessorThai,
-        TranslationEvaluationPreprocessor, CanmtTranslationPreprocessor,
-        DialogueClassificationUsePreprocessor, SiameseUiePreprocessor,
-        DocumentGroundedDialogGeneratePreprocessor,
+        TranslationEvaluationTransformersPreprocessor,
+        CanmtTranslationPreprocessor, DialogueClassificationUsePreprocessor,
+        SiameseUiePreprocessor, DocumentGroundedDialogGeneratePreprocessor,
         DocumentGroundedDialogRetrievalPreprocessor,
         DocumentGroundedDialogRerankPreprocessor)
     from .video import ReadVideoData, MovieSceneSegmentationPreprocessor
@@ -70,7 +71,8 @@ else:
         'tts': ['KanttsDataPreprocessor'],
         'multi_modal': [
             'DiffusionImageGenerationPreprocessor', 'OfaPreprocessor',
-            'MPlugPreprocessor', 'HiTeAPreprocessor'
+            'MPlugPreprocessor', 'HiTeAPreprocessor', 'MplugOwlPreprocessor',
+            'ImageCaptioningClipInterrogatorPreprocessor'
         ],
         'nlp': [
             'DocumentSegmentationTransformersPreprocessor',
@@ -96,7 +98,7 @@ else:
             'DialogStateTrackingPreprocessor',
             'ConversationalTextToSqlPreprocessor',
             'TableQuestionAnsweringPreprocessor',
-            'TranslationEvaluationPreprocessor',
+            'TranslationEvaluationTransformersPreprocessor',
             'CanmtTranslationPreprocessor',
             'DialogueClassificationUsePreprocessor', 'SiameseUiePreprocessor',
             'DialogueClassificationUsePreprocessor',
diff --git a/modelscope/preprocessors/asr.py b/modelscope/preprocessors/asr.py
index ea867775..4696c675 100644
--- a/modelscope/preprocessors/asr.py
+++ b/modelscope/preprocessors/asr.py
@@ -74,14 +74,6 @@ class WavToScp(Preprocessor):
         if code_base != 'funasr':
             cmd = self.config_checking(cmd)
         cmd = self.env_setting(cmd)
-        if audio_format == 'wav':
-            cmd['audio_lists'] = self.scp_generation_from_wav(cmd)
-        elif audio_format == 'kaldi_ark':
-            cmd['audio_lists'] = self.scp_generation_from_ark(cmd)
-        elif audio_format == 'tfrecord':
-            cmd['audio_lists'] = os.path.join(cmd['wav_path'], 'data.records')
-        elif audio_format == 'pcm' or audio_format == 'scp':
-            cmd['audio_lists'] = audio_in
 
         return cmd
 
@@ -235,63 +227,4 @@ class WavToScp(Preprocessor):
             inputs['model_lang'] = inputs['model_config']['lang']
         else:
             inputs['model_lang'] = 'zh-cn'
-
         return inputs
-
-    def scp_generation_from_wav(self, inputs: Dict[str, Any]) -> List[Any]:
-        """scp generation from waveform files
-        """
-
-        # find all waveform files
-        wav_list = []
-        if inputs['recog_type'] == 'wav':
-            file_path = inputs['wav_path']
-            if os.path.isfile(file_path):
-                if file_path.endswith('.wav') or file_path.endswith('.WAV'):
-                    wav_list.append(file_path)
-        else:
-            from easyasr.common import asr_utils
-            wav_dir: str = inputs['wav_path']
-            wav_list = asr_utils.recursion_dir_all_wav(wav_list, wav_dir)
-
-        list_count: int = len(wav_list)
-        inputs['wav_count'] = list_count
-
-        # store all wav into audio list
-        audio_lists = []
-        j: int = 0
-        while j < list_count:
-            wav_file = wav_list[j]
-            wave_key: str = os.path.splitext(os.path.basename(wav_file))[0]
-            item = {'key': wave_key, 'file': wav_file}
-            audio_lists.append(item)
-            j += 1
-
-        return audio_lists
-
-    def scp_generation_from_ark(self, inputs: Dict[str, Any]) -> List[Any]:
-        """scp generation from kaldi ark file
-        """
-
-        ark_scp_path = os.path.join(inputs['wav_path'], 'data.scp')
-        ark_file_path = os.path.join(inputs['wav_path'], 'data.ark')
-        assert os.path.exists(ark_scp_path), 'data.scp does not exist'
-        assert os.path.exists(ark_file_path), 'data.ark does not exist'
-
-        with open(ark_scp_path, 'r', encoding='utf-8') as f:
-            lines = f.readlines()
-
-        # store all ark item into audio list
-        audio_lists = []
-        for line in lines:
-            outs = line.strip().split(' ')
-            if len(outs) == 2:
-                key = outs[0]
-                sub = outs[1].split(':')
-                if len(sub) == 2:
-                    nums = sub[1]
-                    content = ark_file_path + ':' + nums
-                    item = {'key': key, 'file': content}
-                    audio_lists.append(item)
-
-        return audio_lists
diff --git a/modelscope/preprocessors/multi_modal.py b/modelscope/preprocessors/multi_modal.py
index bd37c620..faf796f4 100644
--- a/modelscope/preprocessors/multi_modal.py
+++ b/modelscope/preprocessors/multi_modal.py
@@ -1,5 +1,6 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import os.path as osp
+import re
 from io import BytesIO
 from typing import Any, Dict, List, Tuple, Union
 
@@ -29,7 +30,7 @@ from .ofa.utils.constant import OFA_TASK_KEY_MAPPING
 
 __all__ = [
     'DiffusionImageGenerationPreprocessor', 'OfaPreprocessor',
-    'MPlugPreprocessor', 'HiTeAPreprocessor'
+    'MPlugPreprocessor', 'HiTeAPreprocessor', 'MplugOwlPreprocessor'
 ]
 
 
@@ -642,3 +643,159 @@ class HiTeAPreprocessor(Preprocessor):
                 'answer_attention_mask': answer.attention_mask.squeeze(),
             }
             return output
+
+
+@PREPROCESSORS.register_module(
+    Fields.multi_modal, module_name=Preprocessors.mplug_owl_preprocessor)
+class MplugOwlPreprocessor(Preprocessor):
+
+    def __init__(self,
+                 model_dir: str,
+                 mode: str = ModeKeys.INFERENCE,
+                 *args,
+                 **kwargs):
+        super().__init__(*args, **kwargs)
+        self.model_dir = model_dir
+        self.mode = mode
+
+        self._tokenizer = None
+        self._patch_resize_transform = None
+        self.media_token = {'<image>': 65}
+        self._image_map = {}
+
+    @property
+    def tokenizer(self):
+        from modelscope.models.nlp.llama import LlamaTokenizer
+
+        if self._tokenizer is None:
+            self._tokenizer = LlamaTokenizer.from_pretrained(self.model_dir)
+        return self._tokenizer
+
+    @property
+    def patch_resize_transform(self):
+        if self._patch_resize_transform is None:
+            from torchvision import transforms
+
+            mean = (0.48145466, 0.4578275, 0.40821073)
+            std = (0.26862954, 0.26130258, 0.27577711)
+
+            self._patch_resize_transform = transforms.Compose([
+                transforms.Resize((224, 224), interpolation=Image.BICUBIC),
+                transforms.ToTensor(),
+                transforms.Normalize(mean=mean, std=std),
+            ])
+        return self._patch_resize_transform
+
+    def image_open(self, path: str) -> Tuple[Image.Image, int]:
+        if path not in self._image_map:
+            index = len(self._image_map)
+            self._image_map[path] = (load_image(path), index)
+        return self._image_map[path]
+
+    def tokenize_text(self, text: str) -> List[int]:
+        media_tokens = {
+            k: -int(i + 1)
+            for i, k in enumerate(self.media_token.keys())
+        }
+        media_lengths = self.media_token.copy()
+
+        prompt_chunk = [self.tokenizer.bos_token_id]
+
+        # Pure Text
+        condition = [
+            media_token not in text for media_token in media_tokens.keys()
+        ]
+        if all(condition):
+            enc_chunk = prompt_chunk + \
+                self.tokenizer(text, add_special_tokens=False)['input_ids']
+
+        # Multi-Modal Text
+        else:
+            enc_chunk = prompt_chunk
+            pattern = '|'.join(map(re.escape, list(media_tokens.keys())))
+            chunk_strs = re.split(f'({pattern})', text)
+            chunk_strs = [x for x in chunk_strs if len(x) > 0]
+            for idx, chunk_str in enumerate(chunk_strs):
+                if chunk_str in media_tokens:
+                    enc_chunk += [media_tokens[chunk_str]] * \
+                        media_lengths[chunk_str]
+                else:
+                    tmp_chunk = self.tokenizer(
+                        chunk_str, add_special_tokens=False)['input_ids']
+                    enc_chunk += tmp_chunk
+        return enc_chunk
+
+    def convert(self, messages: Dict[str, List[Dict]]) -> str:
+        texts = []
+        image = []
+        messages = messages['messages']
+        for turn in messages:
+            if turn['role'] == 'system':
+                role = ''
+            elif turn['role'] == 'user':
+                role = 'Human: '
+            else:
+                role = 'AI: '
+            if isinstance(turn['content'], str):
+                text = f"{role}{turn['content']}"
+                texts.append(text)
+            else:
+                for t in turn['content']:
+                    if isinstance(t, str):
+                        text = f'{role}{t}'
+                    else:
+                        text = f'{role}<image>'
+                        image.append(t['image'])
+                    texts.append(text)
+        texts = '\n'.join(texts)
+        texts += '\nAI: '
+        return image, texts
+
+    def __call__(self, messages: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Args:
+            messages: {[
+                {'role': 'system', 'content': 'message1'},
+                {'role': 'user', 'content': 'message2'},
+                {'role': 'user', 'content': ['message2', {"image": 'image_path'}, 'message3', ...]},
+            ]}
+            The 'role' should be choose from ['system', 'user', 'assistant'].
+            The 'content' can be either str or List[Union[str, Dict]]
+        Return:
+            output: Dict[str, Tensor]
+        """
+        output = {}
+        images, text = self.convert(messages)
+
+        if len(images) > 0:
+            pixel_values = []
+            for image in images:
+                pixel_values.append(
+                    self.patch_resize_transform(self.image_open(image)[0]))
+                pixel_values = torch.stack(pixel_values, dim=0)
+        else:
+            pixel_values = None
+
+        input_ids = self.tokenize_text(text)
+        input_ids = torch.LongTensor([input_ids])
+
+        output = {
+            'pixel_values': pixel_values,
+            'input_ids': input_ids,
+        }
+
+        return output
+
+
+@PREPROCESSORS.register_module(
+    Fields.multi_modal,
+    module_name=Preprocessors.image_captioning_clip_interrogator_preprocessor)
+class ImageCaptioningClipInterrogatorPreprocessor(Preprocessor):
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def __call__(self, data) -> Dict[str, Any]:
+        image = load_image(data)
+        data = np.array(image).transpose(2, 0, 1)
+        return data
diff --git a/modelscope/preprocessors/nlp/__init__.py b/modelscope/preprocessors/nlp/__init__.py
index 5904d65e..19421fa0 100644
--- a/modelscope/preprocessors/nlp/__init__.py
+++ b/modelscope/preprocessors/nlp/__init__.py
@@ -29,7 +29,7 @@ if TYPE_CHECKING:
     from .space_T_en import ConversationalTextToSqlPreprocessor
     from .space_T_cn import TableQuestionAnsweringPreprocessor
     from .mglm_summarization_preprocessor import MGLMSummarizationPreprocessor
-    from .translation_evaluation_preprocessor import TranslationEvaluationPreprocessor
+    from .translation_evaluation_preprocessor import TranslationEvaluationTransformersPreprocessor
     from .canmt_translation import CanmtTranslationPreprocessor
     from .dialog_classification_use_preprocessor import DialogueClassificationUsePreprocessor
     from .siamese_uie_preprocessor import SiameseUiePreprocessor
@@ -90,7 +90,7 @@ else:
         'space_T_en': ['ConversationalTextToSqlPreprocessor'],
         'space_T_cn': ['TableQuestionAnsweringPreprocessor'],
         'translation_evaluation_preprocessor':
-        ['TranslationEvaluationPreprocessor'],
+        ['TranslationEvaluationTransformersPreprocessor'],
         'canmt_translation': [
             'CanmtTranslationPreprocessor',
         ],
diff --git a/modelscope/preprocessors/nlp/token_classification_preprocessor.py b/modelscope/preprocessors/nlp/token_classification_preprocessor.py
index 66e57cc8..4b4fee1f 100644
--- a/modelscope/preprocessors/nlp/token_classification_preprocessor.py
+++ b/modelscope/preprocessors/nlp/token_classification_preprocessor.py
@@ -201,7 +201,7 @@ class TokenClassificationTransformersPreprocessor(
 
     def __init__(self,
                  model_dir: str = None,
-                 first_sequence: str = None,
+                 first_sequence: str = 'text',
                  label: str = 'label',
                  label2id: Dict = None,
                  label_all_tokens: bool = False,
diff --git a/modelscope/preprocessors/nlp/translation_evaluation_preprocessor.py b/modelscope/preprocessors/nlp/translation_evaluation_preprocessor.py
index 0bf62cdc..b0b2efd1 100644
--- a/modelscope/preprocessors/nlp/translation_evaluation_preprocessor.py
+++ b/modelscope/preprocessors/nlp/translation_evaluation_preprocessor.py
@@ -2,10 +2,13 @@
 
 from typing import Any, Dict, List, Union
 
+import torch
 from transformers import AutoTokenizer
 
 from modelscope.metainfo import Preprocessors
-from modelscope.models.nlp.unite.configuration_unite import EvaluationMode
+from modelscope.models.nlp.unite.configuration import InputFormat
+from modelscope.models.nlp.unite.translation_evaluation import \
+    combine_input_sentences
 from modelscope.preprocessors import Preprocessor
 from modelscope.preprocessors.builder import PREPROCESSORS
 from modelscope.utils.constant import Fields, ModeKeys
@@ -14,43 +17,98 @@ from .transformers_tokenizer import NLPTokenizer
 
 @PREPROCESSORS.register_module(
     Fields.nlp, module_name=Preprocessors.translation_evaluation)
-class TranslationEvaluationPreprocessor(Preprocessor):
+class TranslationEvaluationTransformersPreprocessor(Preprocessor):
     r"""The tokenizer preprocessor used for translation evaluation.
     """
 
     def __init__(self,
                  model_dir: str,
-                 eval_mode: EvaluationMode,
+                 max_len: int,
+                 pad_token_id: int,
+                 eos_token_id: int,
+                 input_format: InputFormat = InputFormat.SRC_REF,
                  mode=ModeKeys.INFERENCE,
                  *args,
                  **kwargs):
-        r"""preprocess the data via the vocab file from the `model_dir` path
+        r"""Preprocessing the data for the model in `model_dir` path
 
         Args:
             model_dir: A Model instance.
-            eval_mode: Evaluation mode, choosing one from `"EvaluationMode.SRC_REF"`,
-                `"EvaluationMode.SRC"`, `"EvaluationMode.REF"`. Aside from hypothesis, the
+            max_len: Maximum length for input sequence.
+            pad_token_id: Token id for padding token.
+            eos_token_id: Token id for the ending-of-sequence (eos) token.
+            input_format: Input format, choosing one from `"InputFormat.SRC_REF"`,
+                `"InputFormat.SRC"`, `"InputFormat.REF"`. Aside from hypothesis, the
                 source/reference/source+reference can be presented during evaluation.
+            mode: The mode for this preprocessor.
         """
         super().__init__(mode=mode)
         self.tokenizer = NLPTokenizer(
             model_dir=model_dir, use_fast=False, tokenize_kwargs=kwargs)
-        self.eval_mode = eval_mode
+        self.input_format = input_format
+
+        self.max_len = max_len
+        self.pad_token_id = pad_token_id
+        self.eos_token_id = eos_token_id
 
         return
 
-    def __call__(self, input_dict: Dict[str, Any]) -> List[List[str]]:
-        if self.eval_mode == EvaluationMode.SRC and 'src' not in input_dict.keys(
+    def change_input_format(self, input_format: InputFormat):
+        r"""Change the input format for the preprocessor.
+
+        Args:
+            input_format: Any choice in InputFormat.SRC_REF, InputFormat.SRC and InputFormat.REF.
+
+        """
+        self.input_format = input_format
+        return
+
+    def collect_input_ids(self, input_dict: Dict[str, Any]):
+        r"""Collect the input ids for the given examples.
+
+        Args:
+            input_dict: A dict containing hyp/src/ref sentences.
+
+        Returns:
+            The token ids for each example.
+
+        """
+        output_sents = [
+            self.tokenizer(
+                input_dict['hyp'], return_tensors='pt',
+                padding=True)['input_ids']
+        ]
+        if self.input_format == InputFormat.SRC or self.input_format == InputFormat.SRC_REF:
+            output_sents += [
+                self.tokenizer(
+                    input_dict['src'], return_tensors='pt',
+                    padding=True)['input_ids']
+            ]
+        if self.input_format == InputFormat.REF or self.input_format == InputFormat.SRC_REF:
+            output_sents += [
+                self.tokenizer(
+                    input_dict['ref'], return_tensors='pt',
+                    padding=True)['input_ids']
+            ]
+
+        input_ids = combine_input_sentences(output_sents, self.max_len,
+                                            self.pad_token_id,
+                                            self.eos_token_id)
+
+        return input_ids
+
+    def __call__(self, input_dict: Dict[str, Any]) -> Dict[str, Any]:
+        if self.input_format == InputFormat.SRC and 'src' not in input_dict.keys(
         ):
             raise ValueError(
                 'Source sentences are required for source-only evaluation mode.'
             )
-        if self.eval_mode == EvaluationMode.REF and 'ref' not in input_dict.keys(
+        if self.input_format == InputFormat.REF and 'ref' not in input_dict.keys(
         ):
             raise ValueError(
                 'Reference sentences are required for reference-only evaluation mode.'
             )
-        if self.eval_mode == EvaluationMode.SRC_REF and (
+        if self.input_format == InputFormat.SRC_REF and (
                 'src' not in input_dict.keys()
                 or 'ref' not in input_dict.keys()):
             raise ValueError(
@@ -59,29 +117,58 @@ class TranslationEvaluationPreprocessor(Preprocessor):
 
         if type(input_dict['hyp']) == str:
             input_dict['hyp'] = [input_dict['hyp']]
-        if (self.eval_mode == EvaluationMode.SRC or self.eval_mode
-                == EvaluationMode.SRC_REF) and type(input_dict['src']) == str:
+        if (self.input_format == InputFormat.SRC or self.input_format
+                == InputFormat.SRC_REF) and type(input_dict['src']) == str:
             input_dict['src'] = [input_dict['src']]
-        if (self.eval_mode == EvaluationMode.REF or self.eval_mode
-                == EvaluationMode.SRC_REF) and type(input_dict['ref']) == str:
+        if (self.input_format == InputFormat.REF or self.input_format
+                == InputFormat.SRC_REF) and type(input_dict['ref']) == str:
             input_dict['ref'] = [input_dict['ref']]
 
-        output_sents = [
-            self.tokenizer(
-                input_dict['hyp'], return_tensors='pt',
-                padding=True)['input_ids']
-        ]
-        if self.eval_mode == EvaluationMode.SRC or self.eval_mode == EvaluationMode.SRC_REF:
-            output_sents += [
-                self.tokenizer(
-                    input_dict['src'], return_tensors='pt',
-                    padding=True)['input_ids']
-            ]
-        if self.eval_mode == EvaluationMode.REF or self.eval_mode == EvaluationMode.SRC_REF:
-            output_sents += [
-                self.tokenizer(
-                    input_dict['ref'], return_tensors='pt',
-                    padding=True)['input_ids']
-            ]
+        if (self.input_format == InputFormat.SRC
+                or self.input_format == InputFormat.SRC_REF) and (len(
+                    input_dict['hyp']) != len(input_dict['src'])):
+            raise ValueError(
+                'The number of given hyp sentences (%d) is not equal to that of src (%d).'
+                % (len(input_dict['hyp']), len(input_dict['src'])))
+        if (self.input_format == InputFormat.REF
+                or self.input_format == InputFormat.SRC_REF) and (len(
+                    input_dict['hyp']) != len(input_dict['ref'])):
+            raise ValueError(
+                'The number of given hyp sentences (%d) is not equal to that of ref (%d).'
+                % (len(input_dict['hyp']), len(input_dict['ref'])))
 
-        return output_sents
+        output_dict = {'input_ids': self.collect_input_ids(input_dict)}
+
+        if self.mode == ModeKeys.TRAIN or self.mode == ModeKeys.EVAL:
+            if 'score' not in input_dict.keys():
+                raise KeyError(
+                    'During training or evaluating, \'score\' should be provided.'
+                )
+            if (isinstance(input_dict['score'], List) and len(input_dict['score']) != len(output_dict['input_ids'])) \
+                    or (isinstance(input_dict['score'], float) and len(output['input_ids']) != 1):
+                raise ValueError(
+                    'The number of score is not equal to that of the given examples. '
+                    'Required %d, given %d.' %
+                    (len(output['input_ids']), len(input_dict['score'])))
+
+            output_dict['score'] = [input_dict['score']] if isinstance(
+                input_dict['score'], float) else input_dict['score']
+
+        if self.mode == ModeKeys.EVAL:
+            if 'lp' not in input_dict.keys():
+                raise ValueError(
+                    'Language pair should be provided for evaluation.')
+
+            if 'segment_id' not in input_dict.keys():
+                raise ValueError(
+                    'Segment id should be provided for evaluation.')
+
+            if 'raw_score' not in input_dict.keys():
+                raise ValueError(
+                    'Raw scores should be provided for evaluation.')
+
+            output_dict['lp'] = input_dict['lp']
+            output_dict['segment_id'] = input_dict['segment_id']
+            output_dict['raw_score'] = input_dict['raw_score']
+
+        return output_dict
diff --git a/modelscope/trainers/__init__.py b/modelscope/trainers/__init__.py
index 90f73a7f..0d20fe00 100644
--- a/modelscope/trainers/__init__.py
+++ b/modelscope/trainers/__init__.py
@@ -15,6 +15,8 @@ if TYPE_CHECKING:
     from .nlp import SequenceClassificationTrainer, TextRankingTrainer, SiameseUIETrainer
     from .nlp_trainer import NlpEpochBasedTrainer, VecoTrainer
     from .trainer import EpochBasedTrainer
+    from .training_args import TrainingArgs, build_dataset_from_file
+    from .hooks import Hook, Priority
 
 else:
     _import_structure = {
@@ -32,7 +34,9 @@ else:
             'SiameseUIETrainer'
         ],
         'nlp_trainer': ['NlpEpochBasedTrainer', 'VecoTrainer'],
-        'trainer': ['EpochBasedTrainer']
+        'trainer': ['EpochBasedTrainer'],
+        'training_args': ['TrainingArgs', 'build_dataset_from_file'],
+        'hooks': ['Hook']
     }
 
     import sys
diff --git a/modelscope/trainers/cli_argument_parser.py b/modelscope/trainers/cli_argument_parser.py
new file mode 100644
index 00000000..f183b9ea
--- /dev/null
+++ b/modelscope/trainers/cli_argument_parser.py
@@ -0,0 +1,151 @@
+from argparse import Action, ArgumentDefaultsHelpFormatter, ArgumentParser
+from dataclasses import fields
+from typing import List
+
+
+class CliArgumentParser(ArgumentParser):
+    """ Argument Parser to define and parse command-line args for training.
+
+    Args:
+        training_args: dict or list of dict which defines different
+            paramters for training.
+    """
+
+    def __init__(self, training_args=None, **kwargs):
+        if 'formatter_class' not in kwargs:
+            kwargs['formatter_class'] = ArgumentDefaultsHelpFormatter
+        super().__init__(**kwargs)
+        self.training_args = training_args
+        self.define_args()
+
+    def get_manual_args(self, args):
+        return [arg[2:] for arg in args if arg.startswith('--')]
+
+    def _parse_known_args(self, args: List = None, namespace=None):
+        self.model_id = namespace.model if namespace is not None else None
+        if '--model' in args:
+            self.model_id = args[args.index('--model') + 1]
+        self.manual_args = self.get_manual_args(args)
+        return super()._parse_known_args(args, namespace)
+
+    def print_help(self, file=None):
+        return super().print_help(file)
+
+    def define_args(self):
+        if self.training_args is not None:
+            for f in fields(self.training_args):
+                arg_name = f.name
+                arg_attr = getattr(self.training_args, f.name)
+                name = f'--{arg_name}'
+                kwargs = dict(type=f.type, help=f.metadata['help'])
+                kwargs['default'] = arg_attr
+
+                if 'choices' in f.metadata:
+                    kwargs['choices'] = f.metadata['choices']
+
+                kwargs['action'] = SingleAction
+                self.add_argument(name, **kwargs)
+
+
+class DictAction(Action):
+    """
+    argparse action to split an argument into KEY=VALUE form
+    on the first = and append to a dictionary. List options can
+    be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit
+    brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build
+    list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]'
+    """
+
+    @staticmethod
+    def parse_int_float_bool_str(val):
+        try:
+            return int(val)
+        except ValueError:
+            pass
+        try:
+            return float(val)
+        except ValueError:
+            pass
+        if val.lower() in ['true', 'false']:
+            return val.lower() == 'true'
+        if val == 'None':
+            return None
+        return val
+
+    @staticmethod
+    def parse_iterable(val):
+        """Parse iterable values in the string.
+        All elements inside '()' or '[]' are treated as iterable values.
+        Args:
+            val (str): Value string.
+        Returns:
+            list | tuple: The expanded list or tuple from the string.
+        Examples:
+            >>> DictAction._parse_iterable('1,2,3')
+            [1, 2, 3]
+            >>> DictAction._parse_iterable('[a, b, c]')
+            ['a', 'b', 'c']
+            >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]')
+            [(1, 2, 3), ['a', 'b'], 'c']
+        """
+
+        def find_next_comma(string):
+            """Find the position of next comma in the string.
+            If no ',' is found in the string, return the string length. All
+            chars inside '()' and '[]' are treated as one element and thus ','
+            inside these brackets are ignored.
+            """
+            assert (string.count('(') == string.count(')')) and (
+                string.count('[')
+                == string.count(']')), f'Imbalanced brackets exist in {string}'
+            end = len(string)
+            for idx, char in enumerate(string):
+                pre = string[:idx]
+                # The string before this ',' is balanced
+                if ((char == ',') and (pre.count('(') == pre.count(')'))
+                        and (pre.count('[') == pre.count(']'))):
+                    end = idx
+                    break
+            return end
+
+        # Strip ' and " characters and replace whitespace.
+        val = val.strip('\'\"').replace(' ', '')
+        is_tuple = False
+        if val.startswith('(') and val.endswith(')'):
+            is_tuple = True
+            val = val[1:-1]
+        elif val.startswith('[') and val.endswith(']'):
+            val = val[1:-1]
+        elif ',' not in val:
+            # val is a single value
+            return DictAction.parse_int_float_bool_str(val)
+
+        values = []
+        while len(val) > 0:
+            comma_idx = find_next_comma(val)
+            element = DictAction.parse_iterable(val[:comma_idx])
+            values.append(element)
+            val = val[comma_idx + 1:]
+        if is_tuple:
+            values = tuple(values)
+        return values
+
+    def __call__(self, parser, namespace, values, option_string):
+        options = {}
+        for kv in values:
+            key, val = kv.split('=', maxsplit=1)
+            options[key] = self.parse_iterable(val)
+        setattr(namespace, self.dest, options)
+
+
+class SingleAction(DictAction):
+    """ Argparse action to convert value to tuple or list or nested structure of
+    list and tuple, i.e 'V1,V2,V3', or with explicit brackets, i.e. '[V1,V2,V3]'.
+    It also support nested brackets to build list/tuple values. e.g. '[(V1,V2),(V3,V4)]'
+    """
+
+    def __call__(self, parser, namespace, value, option_string):
+        if isinstance(value, str):
+            setattr(namespace, self.dest, self.parse_iterable(value))
+        else:
+            setattr(namespace, self.dest, value)
diff --git a/modelscope/trainers/default_config.py b/modelscope/trainers/default_config.py
index 51a0df40..bb272695 100644
--- a/modelscope/trainers/default_config.py
+++ b/modelscope/trainers/default_config.py
@@ -4,38 +4,6 @@ from typing import Dict, List, Optional, Tuple
 
 from modelscope.utils.config import Config
 
-DEFAULT_CONFIG = Config({
-    'framework': 'pytorch',
-    'train': {
-        'work_dir': '/tmp',
-        'max_epochs': 10,
-        'dataloader': {
-            'batch_size_per_gpu': 16,
-            'workers_per_gpu': 0
-        },
-        'optimizer': {
-            'type': 'SGD',
-            'lr': 1e-3
-        },
-        'lr_scheduler': {
-            'type': 'StepLR',
-            'step_size': 2
-        },
-        'checkpoint': {
-            'period': {
-                'interval': 1
-            }
-        }
-    },
-    'evaluation': {
-        'dataloader': {
-            'batch_size_per_gpu': 16,
-            'workers_per_gpu': 0,
-            'shuffle': False
-        },
-    }
-})
-
 DEFAULT_HOOKS_CONFIG = {
     'train.hooks': [{
         'type': 'CheckpointHook',
@@ -68,7 +36,7 @@ def merge_cfg(cfg: Config):
 
 
 def merge_hooks(cfg: Config) -> List[Dict]:
-    hooks = cfg.train.hooks.copy()
+    hooks = getattr(cfg.train, 'hooks', []).copy()
     for hook_type, key_chain in _HOOK_KEY_CHAIN_MAP.items():
         hook = _key_chain_to_hook(cfg, key_chain, hook_type)
         if hook is not None:
@@ -107,7 +75,8 @@ def _check_basic_hook(cfg: Config, key_chain: str, hook_type: str) -> bool:
     if cfg.safe_get(key_chain) is None:
         return False
     hooks = list(
-        filter(lambda hook: hook['type'] == hook_type, cfg.train.hooks))
+        filter(lambda hook: hook['type'] == hook_type,
+               getattr(cfg.train, 'hooks', [])))
     assert len(hooks) == 0, f'The key_chain {key_chain} and the traditional hook ' \
                             f'cannot exist at the same time, ' \
                             f'please delete {hook_type} in the configuration file.'
diff --git a/modelscope/trainers/easycv/__init__.py b/modelscope/trainers/easycv/__init__.py
deleted file mode 100644
index b1b8fc15..00000000
--- a/modelscope/trainers/easycv/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
-    from .utils import AddLrLogHook, EasyCVMetric
-else:
-    _import_structure = {'utils': ['AddLrLogHook', 'EasyCVMetric']}
-
-    import sys
-
-    sys.modules[__name__] = LazyImportModule(
-        __name__,
-        globals()['__file__'],
-        _import_structure,
-        module_spec=__spec__,
-        extra_objects={},
-    )
diff --git a/modelscope/trainers/easycv/trainer.py b/modelscope/trainers/easycv/trainer.py
deleted file mode 100644
index 58d6a440..00000000
--- a/modelscope/trainers/easycv/trainer.py
+++ /dev/null
@@ -1,183 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from copy import deepcopy
-from functools import partial
-from typing import Callable, Optional, Tuple, Union
-
-import torch
-from easycv.utils.checkpoint import load_checkpoint as ev_load_checkpoint
-from torch import nn
-from torch.utils.data import Dataset
-
-from modelscope.metainfo import Trainers
-from modelscope.models.base import TorchModel
-from modelscope.msdatasets import MsDataset
-from modelscope.preprocessors import Preprocessor
-from modelscope.trainers import EpochBasedTrainer
-from modelscope.trainers.base import TRAINERS
-from modelscope.trainers.easycv.utils import register_util
-from modelscope.trainers.hooks import HOOKS
-from modelscope.trainers.parallel.builder import build_parallel
-from modelscope.trainers.parallel.utils import is_parallel
-from modelscope.utils.config import Config
-from modelscope.utils.constant import DEFAULT_MODEL_REVISION
-from modelscope.utils.import_utils import LazyImportModule
-from modelscope.utils.registry import default_group
-
-
-@TRAINERS.register_module(module_name=Trainers.easycv)
-class EasyCVEpochBasedTrainer(EpochBasedTrainer):
-    """Epoch based Trainer for EasyCV.
-
-    Args:
-        cfg_file(str): The config file of EasyCV.
-        model (:obj:`torch.nn.Module` or :obj:`TorchModel` or `str`): The model to be run, or a valid model dir
-            or a model id. If model is None, build_model method will be called.
-        train_dataset (`MsDataset` or `torch.utils.data.Dataset`, *optional*):
-            The dataset to use for training.
-            Note that if it's a `torch.utils.data.IterableDataset` with some randomization and you are training in a
-            distributed fashion, your iterable dataset should either use a internal attribute `generator` that is a
-            `torch.Generator` for the randomization that must be identical on all processes (and the Trainer will
-            manually set the seed of this `generator` at each epoch) or have a `set_epoch()` method that internally
-            sets the seed of the RNGs used.
-        eval_dataset (`MsDataset` or `torch.utils.data.Dataset`, *optional*): The dataset to use for evaluation.
-        preprocessor (:obj:`Preprocessor`, *optional*): The optional preprocessor.
-            NOTE: If the preprocessor has been called before the dataset fed into this trainer by user's custom code,
-            this parameter should be None, meanwhile remove the 'preprocessor' key from the cfg_file.
-            Else the preprocessor will be instantiated from the cfg_file or assigned from this parameter and
-            this preprocessing action will be executed every time the dataset's __getitem__ is called.
-        optimizers (`Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler._LRScheduler]`, *optional*): A tuple
-            containing the optimizer and the scheduler to use.
-        max_epochs: (int, optional): Total training epochs.
-    """
-
-    def __init__(
-            self,
-            cfg_file: Optional[str] = None,
-            model: Optional[Union[TorchModel, nn.Module, str]] = None,
-            arg_parse_fn: Optional[Callable] = None,
-            train_dataset: Optional[Union[MsDataset, Dataset]] = None,
-            eval_dataset: Optional[Union[MsDataset, Dataset]] = None,
-            preprocessor: Optional[Preprocessor] = None,
-            optimizers: Tuple[torch.optim.Optimizer,
-                              torch.optim.lr_scheduler._LRScheduler] = (None,
-                                                                        None),
-            model_revision: Optional[str] = DEFAULT_MODEL_REVISION,
-            **kwargs):
-
-        register_util.register_parallel()
-        register_util.register_part_mmcv_hooks_to_ms()
-
-        super(EasyCVEpochBasedTrainer, self).__init__(
-            model=model,
-            cfg_file=cfg_file,
-            arg_parse_fn=arg_parse_fn,
-            preprocessor=preprocessor,
-            optimizers=optimizers,
-            model_revision=model_revision,
-            train_dataset=train_dataset,
-            eval_dataset=eval_dataset,
-            **kwargs)
-
-        # reset data_collator
-        from mmcv.parallel import collate
-
-        self.train_data_collator = partial(
-            collate,
-            samples_per_gpu=self.cfg.train.dataloader.batch_size_per_gpu)
-        self.eval_data_collator = partial(
-            collate,
-            samples_per_gpu=self.cfg.evaluation.dataloader.batch_size_per_gpu)
-
-        # load pretrained model
-        load_from = self.cfg.get('load_from', None)
-        if load_from is not None:
-            ev_load_checkpoint(
-                self.model,
-                filename=load_from,
-                map_location=self.device,
-                strict=False,
-            )
-
-        # reset parallel
-        if not self._dist:
-            assert not is_parallel(
-                self.model
-            ), 'Not support model wrapped by custom parallel if not in distributed mode!'
-            dp_cfg = dict(
-                type='MMDataParallel',
-                module=self.model,
-                device_ids=[torch.cuda.current_device()])
-            self.model = build_parallel(dp_cfg)
-
-    def rebuild_config(self, cfg: Config):
-        cfg = super().rebuild_config(cfg)
-        # Register easycv hooks dynamicly. If the hook already exists in modelscope,
-        # the hook in modelscope will be used, otherwise register easycv hook into ms.
-        # We must manually trigger lazy import to detect whether the hook is in modelscope.
-        # TODO: use ast index to detect whether the hook is in modelscope
-        for h_i in cfg.train.get('hooks', []):
-            sig = ('HOOKS', default_group, h_i['type'])
-            LazyImportModule.import_module(sig)
-            if h_i['type'] not in HOOKS._modules[default_group]:
-                if h_i['type'] in [
-                        'TensorboardLoggerHookV2', 'WandbLoggerHookV2'
-                ]:
-                    raise ValueError(
-                        'Not support hook %s now, we will support it in the future!'
-                        % h_i['type'])
-                register_util.register_hook_to_ms(h_i['type'])
-        return cfg
-
-    def create_optimizer_and_scheduler(self):
-        """ Create optimizer and lr scheduler
-        """
-        optimizer, lr_scheduler = self.optimizers
-        if optimizer is None:
-            optimizer_cfg = self.cfg.train.get('optimizer', None)
-        else:
-            optimizer_cfg = None
-
-        optim_options = {}
-        if optimizer_cfg is not None:
-            optim_options = optimizer_cfg.pop('options', {})
-            from easycv.apis.train import build_optimizer
-            optimizer = build_optimizer(self.model, optimizer_cfg)
-
-        if lr_scheduler is None:
-            lr_scheduler_cfg = self.cfg.train.get('lr_scheduler', None)
-        else:
-            lr_scheduler_cfg = None
-
-        lr_options = {}
-        # Adapt to mmcv lr scheduler hook.
-        # Please refer to: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py
-        if lr_scheduler_cfg is not None:
-            assert optimizer is not None
-            lr_options = lr_scheduler_cfg.pop('options', {})
-            assert 'policy' in lr_scheduler_cfg
-            policy_type = lr_scheduler_cfg.pop('policy')
-            if policy_type == policy_type.lower():
-                policy_type = policy_type.title()
-            hook_type = policy_type + 'LrUpdaterHook'
-            lr_scheduler_cfg['type'] = hook_type
-
-            self.cfg.train.lr_scheduler_hook = lr_scheduler_cfg
-
-        self.optimizer = optimizer
-        self.lr_scheduler = lr_scheduler
-
-        return self.optimizer, self.lr_scheduler, optim_options, lr_options
-
-    def to_parallel(self, model) -> Union[nn.Module, TorchModel]:
-        if self.cfg.get('parallel', None) is not None:
-            dp_cfg = deepcopy(self.cfg['parallel'])
-            dp_cfg.update(
-                dict(module=model, device_ids=[torch.cuda.current_device()]))
-            return build_parallel(dp_cfg)
-
-        dp_cfg = dict(
-            type='MMDistributedDataParallel',
-            module=model,
-            device_ids=[torch.cuda.current_device()])
-
-        return build_parallel(dp_cfg)
diff --git a/modelscope/trainers/easycv/utils/__init__.py b/modelscope/trainers/easycv/utils/__init__.py
deleted file mode 100644
index 23cfa36a..00000000
--- a/modelscope/trainers/easycv/utils/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
-    from .hooks import AddLrLogHook
-    from .metric import EasyCVMetric
-
-else:
-    _import_structure = {'hooks': ['AddLrLogHook'], 'metric': ['EasyCVMetric']}
-
-    import sys
-
-    sys.modules[__name__] = LazyImportModule(
-        __name__,
-        globals()['__file__'],
-        _import_structure,
-        module_spec=__spec__,
-        extra_objects={},
-    )
diff --git a/modelscope/trainers/easycv/utils/hooks.py b/modelscope/trainers/easycv/utils/hooks.py
deleted file mode 100644
index 1f1a5c95..00000000
--- a/modelscope/trainers/easycv/utils/hooks.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from modelscope.trainers.hooks import HOOKS, Priority
-from modelscope.trainers.hooks.lr_scheduler_hook import LrSchedulerHook
-from modelscope.utils.constant import LogKeys
-
-
-@HOOKS.register_module(module_name='AddLrLogHook')
-class AddLrLogHook(LrSchedulerHook):
-    """For EasyCV to adapt to ModelScope, the lr log of EasyCV is added in the trainer,
-    but the trainer of ModelScope does not and it is added in the lr scheduler hook.
-    But The lr scheduler hook used by EasyCV is the hook of mmcv, and there is no lr log.
-    It will be deleted in the future.
-    """
-    PRIORITY = Priority.NORMAL
-
-    def __init__(self):
-        pass
-
-    def before_run(self, trainer):
-        pass
-
-    def after_train_iter(self, trainer):
-        trainer.log_buffer.output[LogKeys.LR] = self._get_log_lr(trainer)
-
-    def before_train_epoch(self, trainer):
-        trainer.log_buffer.output[LogKeys.LR] = self._get_log_lr(trainer)
-
-    def after_train_epoch(self, trainer):
-        pass
diff --git a/modelscope/trainers/easycv/utils/metric.py b/modelscope/trainers/easycv/utils/metric.py
deleted file mode 100644
index d952ec3e..00000000
--- a/modelscope/trainers/easycv/utils/metric.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import itertools
-from typing import Dict
-
-import numpy as np
-import torch
-
-from modelscope.metrics.base import Metric
-from modelscope.metrics.builder import METRICS
-
-
-@METRICS.register_module(module_name='EasyCVMetric')
-class EasyCVMetric(Metric):
-    """Adapt to ModelScope Metric for EasyCV evaluator.
-    """
-
-    def __init__(self, trainer=None, evaluators=None, *args, **kwargs):
-        from easycv.core.evaluation.builder import build_evaluator
-
-        self.trainer = trainer
-        self.evaluators = build_evaluator(evaluators)
-        self.preds = []
-        self.grountruths = []
-
-    def add(self, outputs: Dict, inputs: Dict):
-        self.preds.append(outputs)
-        del inputs
-
-    def evaluate(self):
-        results = {}
-        for _, batch in enumerate(self.preds):
-            for k, v in batch.items():
-                if k not in results:
-                    results[k] = []
-                results[k].append(v)
-
-        for k, v in results.items():
-            if len(v) == 0:
-                raise ValueError(f'empty result for {k}')
-
-            if isinstance(v[0], torch.Tensor):
-                results[k] = torch.cat(v, 0)
-            elif isinstance(v[0], (list, np.ndarray)):
-                results[k] = list(itertools.chain.from_iterable(v))
-            else:
-                raise ValueError(
-                    f'value of batch prediction dict should only be tensor or list, {k} type is {v[0]}'
-                )
-
-        metric_values = self.trainer.eval_dataset.evaluate(
-            results, self.evaluators)
-        return metric_values
-
-    def merge(self, other: 'EasyCVMetric'):
-        self.preds.extend(other.preds)
-
-    def __getstate__(self):
-        return self.preds
-
-    def __setstate__(self, state):
-        self.__init__()
-        self.preds = state
diff --git a/modelscope/trainers/easycv/utils/register_util.py b/modelscope/trainers/easycv/utils/register_util.py
deleted file mode 100644
index 04bf719b..00000000
--- a/modelscope/trainers/easycv/utils/register_util.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import inspect
-import logging
-
-from modelscope.trainers.hooks import HOOKS
-from modelscope.trainers.parallel.builder import PARALLEL
-from modelscope.utils.registry import default_group
-
-
-class _RegisterManager:
-
-    def __init__(self):
-        self.registries = {}
-
-    def add(self, module, name, group_key=default_group):
-        if module.name not in self.registries:
-            self.registries[module.name] = {}
-        if group_key not in self.registries[module.name]:
-            self.registries[module.name][group_key] = []
-
-        self.registries[module.name][group_key].append(name)
-
-    def exists(self, module, name, group_key=default_group):
-        if self.registries.get(module.name, None) is None:
-            return False
-        if self.registries[module.name].get(group_key, None) is None:
-            return False
-        if name in self.registries[module.name][group_key]:
-            return True
-
-        return False
-
-
-_dynamic_register = _RegisterManager()
-
-
-def register_parallel():
-    from mmcv.parallel import MMDistributedDataParallel, MMDataParallel
-
-    mmddp = 'MMDistributedDataParallel'
-    mmdp = 'MMDataParallel'
-
-    if not _dynamic_register.exists(PARALLEL, mmddp):
-        _dynamic_register.add(PARALLEL, mmddp)
-        PARALLEL.register_module(
-            module_name=mmddp, module_cls=MMDistributedDataParallel)
-    if not _dynamic_register.exists(PARALLEL, mmdp):
-        _dynamic_register.add(PARALLEL, mmdp)
-        PARALLEL.register_module(module_name=mmdp, module_cls=MMDataParallel)
-
-
-def register_hook_to_ms(hook_name, logger=None):
-    """Register EasyCV hook to ModelScope."""
-    from easycv.hooks import HOOKS as _EV_HOOKS
-
-    if hook_name not in _EV_HOOKS._module_dict:
-        raise ValueError(
-            f'Not found hook "{hook_name}" in EasyCV hook registries!')
-
-    if _dynamic_register.exists(HOOKS, hook_name):
-        return
-    _dynamic_register.add(HOOKS, hook_name)
-
-    obj = _EV_HOOKS._module_dict[hook_name]
-    HOOKS.register_module(module_name=hook_name, module_cls=obj)
-
-    log_str = f'Register hook "{hook_name}" to modelscope hooks.'
-    logger.info(log_str) if logger is not None else logging.info(log_str)
-
-
-def register_part_mmcv_hooks_to_ms():
-    """Register required mmcv hooks to ModelScope.
-    Currently we only registered all lr scheduler hooks in EasyCV and mmcv.
-    Please refer to:
-        EasyCV: https://github.com/alibaba/EasyCV/blob/master/easycv/hooks/lr_update_hook.py
-        mmcv: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py
-    """
-    from mmcv.runner.hooks import lr_updater
-    from mmcv.runner.hooks import HOOKS as _MMCV_HOOKS
-    from easycv.hooks import StepFixCosineAnnealingLrUpdaterHook, YOLOXLrUpdaterHook
-
-    mmcv_hooks_in_easycv = [('StepFixCosineAnnealingLrUpdaterHook',
-                             StepFixCosineAnnealingLrUpdaterHook),
-                            ('YOLOXLrUpdaterHook', YOLOXLrUpdaterHook)]
-
-    members = inspect.getmembers(lr_updater)
-    members.extend(mmcv_hooks_in_easycv)
-
-    for name, obj in members:
-        if name in _MMCV_HOOKS._module_dict:
-            if _dynamic_register.exists(HOOKS, name):
-                continue
-            _dynamic_register.add(HOOKS, name)
-            HOOKS.register_module(
-                module_name=name,
-                module_cls=obj,
-            )
diff --git a/modelscope/trainers/hooks/__init__.py b/modelscope/trainers/hooks/__init__.py
index 51677f25..072105be 100644
--- a/modelscope/trainers/hooks/__init__.py
+++ b/modelscope/trainers/hooks/__init__.py
@@ -5,7 +5,6 @@ from modelscope.utils.import_utils import LazyImportModule
 
 if TYPE_CHECKING:
     from .builder import HOOKS, build_hook
-    from .checkpoint_hook import BestCkptSaverHook, CheckpointHook, LoadCheckpointHook
     from .early_stop_hook import EarlyStopHook
     from .compression import SparsityHook
     from .evaluation_hook import EvaluationHook
@@ -16,6 +15,10 @@ if TYPE_CHECKING:
     from .optimizer import (ApexAMPOptimizerHook, NoneOptimizerHook,
                             OptimizerHook, TorchAMPOptimizerHook)
     from .priority import Priority, get_priority
+    from .checkpoint import CheckpointHook, LoadCheckpointHook, BestCkptSaverHook
+    from .distributed.ddp_hook import DDPHook
+    from .distributed.deepspeed_hook import DeepspeedHook
+    from .distributed.megatron_hook import MegatronHook
 
 else:
     _import_structure = {
@@ -32,7 +35,12 @@ else:
             'ApexAMPOptimizerHook', 'NoneOptimizerHook', 'OptimizerHook',
             'TorchAMPOptimizerHook'
         ],
-        'priority': ['Priority', 'get']
+        'checkpoint':
+        ['CheckpointHook', 'LoadCheckpointHook', 'BestCkptSaverHook'],
+        'distributed.ddp_hook': ['DDPHook'],
+        'distributed.deepspeed_hook': ['DeepspeedHook'],
+        'distributed.megatron_hook': ['MegatronHook'],
+        'priority': ['Priority', 'get_priority']
     }
 
     import sys
diff --git a/modelscope/trainers/hooks/checkpoint/__init__.py b/modelscope/trainers/hooks/checkpoint/__init__.py
new file mode 100644
index 00000000..e2abb272
--- /dev/null
+++ b/modelscope/trainers/hooks/checkpoint/__init__.py
@@ -0,0 +1,2 @@
+from .checkpoint_hook import BestCkptSaverHook, CheckpointHook
+from .load_checkpoint_hook import LoadCheckpointHook
diff --git a/modelscope/trainers/hooks/checkpoint/checkpoint_hook.py b/modelscope/trainers/hooks/checkpoint/checkpoint_hook.py
new file mode 100644
index 00000000..4b14a13f
--- /dev/null
+++ b/modelscope/trainers/hooks/checkpoint/checkpoint_hook.py
@@ -0,0 +1,435 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import random
+import time
+from typing import Optional
+
+import numpy as np
+import torch
+
+from modelscope.hub.check_model import check_model_is_id
+from modelscope.hub.push_to_hub import push_to_hub_async
+from modelscope.metainfo import Hooks
+from modelscope.trainers.hooks.builder import HOOKS
+from modelscope.trainers.hooks.checkpoint.checkpoint_processor import \
+    CheckpointProcessor
+from modelscope.trainers.hooks.hook import Hook
+from modelscope.trainers.hooks.priority import Priority
+from modelscope.utils.constant import (DEFAULT_REPOSITORY_REVISION, LogKeys,
+                                       ModelFile)
+from modelscope.utils.logger import get_logger
+from modelscope.utils.torch_utils import is_master
+
+
+class CheckpointStrategy:
+    by_epoch = 'by_epoch'
+    by_step = 'by_step'
+    no = 'no'
+
+
+@HOOKS.register_module(module_name=Hooks.CheckpointHook)
+class CheckpointHook(Hook):
+    """Save checkpoints periodically.
+
+    Args:
+        save_strategy(str): The strategy to save checkpoint, can be `by_epoch`, `by_step` or `no`
+        interval (int): The frequency to save model. If `by_epoch=True`,
+            it means the number of epochs, else means the number of iterations
+        save_dir (str): The directory to save checkpoints. If is None, use `trainer.work_dir`
+        output_dir (str): The absolute path to save the output files for inference. If it's not specified,
+            the default dir is `{sub_dir}/output`.
+        save_last (bool): Whether to save the last checkpoint. Default: True.
+        max_checkpoint_num (int): The max number of checkpoint files, default None which means never delete anything.
+            If the number exceeding the limit, earlier checkpoints will be deleted first.
+        push_to_hub (bool): Whether push the checkpoint to modelhub.
+        hub_repo_id (str): The hub repo id.
+        hub_token (str): The token of the modelhub. You can also set the environment variable `MODELSCOPE_API_TOKEN`.
+        private_hub (bool): Whether push to a private hub, default True.
+        hub_revision (str): Which branch to push the model to, default is `master`
+        kwargs:
+            by_epoch (bool): Same with `save_strategy`, but has a higher priority, legacy argument.
+            output_sub_dir (str): The folder under the `save_dir` to save the output checkpoint for inference.
+                This argument is kept to fit the existing configs.
+    """
+
+    PRIORITY = Priority.LOW
+
+    EVAL_RESULT_FILE = 'eval_result.txt'
+
+    def __init__(self,
+                 save_strategy: Optional[str] = CheckpointStrategy.by_epoch,
+                 interval: Optional[int] = 0,
+                 save_dir: Optional[str] = None,
+                 output_dir: Optional[str] = None,
+                 save_last: Optional[bool] = True,
+                 max_checkpoint_num: Optional[int] = None,
+                 push_to_hub: Optional[bool] = False,
+                 hub_repo_id: Optional[str] = None,
+                 hub_token: Optional[str] = None,
+                 private_hub: Optional[bool] = True,
+                 hub_revision: Optional[str] = DEFAULT_REPOSITORY_REVISION,
+                 **kwargs):
+        self.interval = interval
+        self.save_dir = save_dir
+        if 'by_epoch' in kwargs:
+            self.save_strategy = CheckpointStrategy.by_epoch if kwargs[
+                'by_epoch'] else CheckpointStrategy.by_step
+        else:
+            self.save_strategy = save_strategy
+        if 'output_sub_dir' in kwargs:
+            self.output_sub_dir = kwargs['output_sub_dir']
+            self.output_dir = None
+        else:
+            self.output_sub_dir = None
+            self.output_dir = output_dir
+        self.save_last = save_last
+        self.rng_state = None
+        self.push_to_hub = push_to_hub
+        self.hub_repo_id = hub_repo_id
+        self.hub_token = hub_token
+        self.private_hub = private_hub
+        self.hub_revision = hub_revision
+        self.tag = -1
+        self.is_model_id = None
+        self.push_to_hub_future = None
+        self.max_checkpoint_num = None
+        if max_checkpoint_num is not None:
+            self.max_checkpoint_num = max(int(max_checkpoint_num), 1)
+        self.history_checkpoints = []
+        self.processor = CheckpointProcessor()
+
+    def set_processor(self, processor):
+        """
+        The checkpoint hook accepts a processor to finish the actual saving/deleting action.
+        """
+        self.processor = processor
+
+    def before_run(self, trainer):
+        self.tag = -1
+        if not self.save_dir:
+            self.save_dir = trainer.work_dir
+        if not self.output_dir:
+            if self.output_sub_dir:
+                self.output_dir = os.path.join(self.save_dir,
+                                               self.output_sub_dir)
+            else:
+                self.output_dir = os.path.join(self.save_dir,
+                                               ModelFile.TRAIN_OUTPUT_DIR)
+
+        if not os.path.exists(self.save_dir):
+            os.makedirs(self.save_dir, exist_ok=True)
+
+        if not hasattr(trainer, 'logger'):
+            self.logger = get_logger()
+        else:
+            self.logger = trainer.logger
+
+        if is_master():
+            output_dir = self.output_dir
+            # only global master prepares the output folder
+            self.processor.prepare_output(trainer, output_dir)
+            self.logger.info(f'Checkpoints will be saved to {self.save_dir}')
+
+    def generate_prefix(self, trainer, save_strategy):
+        if save_strategy == CheckpointStrategy.by_epoch:
+            return f'{LogKeys.EPOCH}_{trainer.epoch + 1}'
+        else:
+            return f'{LogKeys.ITER}_{trainer.iter + 1}'
+
+    def _do_save(self, trainer, save_strategy):
+        # prefix like 'epoch-1' or 'iter-1'
+        prefix = self.generate_prefix(trainer, save_strategy)
+        if self.processor.should_save_on_rank(trainer):
+            if is_master():
+                if save_strategy == CheckpointStrategy.by_epoch:
+                    self.logger.info(
+                        f'Saving checkpoint at {trainer.epoch + 1} epoch')
+                else:
+                    self.logger.info(
+                        f'Saving checkpoint at {trainer.iter + 1} iter')
+            self._save_checkpoint(trainer, prefix)
+        if is_master() and self.push_to_hub:
+            if self.push_to_hub_future is not None and not self.push_to_hub_future.done(
+            ):
+                self.logger.error(
+                    f'Another uploading is running, '
+                    f'this uploading with message {prefix} will be canceled.')
+                return
+            self.push_to_hub_future = self._push_to_hub(trainer, prefix)
+
+    def after_train_epoch(self, trainer):
+        if self.save_strategy != CheckpointStrategy.by_epoch:
+            return
+
+        if self._should_save(trainer):
+            self._do_save(trainer, CheckpointStrategy.by_epoch)
+
+    def after_train_iter(self, trainer):
+        if self.save_strategy != CheckpointStrategy.by_step:
+            return
+
+        if self._should_save(trainer):
+            self._do_save(trainer, CheckpointStrategy.by_step)
+
+    def after_run(self, trainer):
+        if self.push_to_hub_future is not None and not self.push_to_hub_future.done(
+        ):
+            self.logger.info('Train finished. Uploading models, waiting...')
+            while not self.push_to_hub_future.done():
+                time.sleep(1)
+            self.logger.info('Uploading models done.')
+
+    def _push_to_hub(self, trainer, prefix):
+        if self.is_model_id is None:
+            self.is_model_id = check_model_is_id(trainer.input_model_id,
+                                                 self.hub_token)
+        self.tag += 1
+        return push_to_hub_async(
+            self.hub_repo_id,
+            self.output_dir,
+            token=self.hub_token,
+            private=self.private_hub,
+            commit_message=prefix,
+            tag=f'v1.{self.tag}',
+            revision=self.hub_revision,
+            source_repo=trainer.input_model_id if self.is_model_id else '')
+
+    def save_evaluate_results(self, trainer):
+        with open(os.path.join(self.output_dir, self.EVAL_RESULT_FILE),
+                  'w') as f:
+            f.write(str(trainer.metric_values))
+
+    def _save_checkpoint(self, trainer, prefix):
+        """Save checkpoint files and remove obsolete ones
+        """
+        checkpoint_path_prefix = os.path.join(self.save_dir, prefix)
+        meta = self._create_training_state(trainer)
+        self.processor.save_checkpoints(trainer, checkpoint_path_prefix,
+                                        self.output_dir, meta)
+        self.save_evaluate_results(trainer)
+        self.history_checkpoints.append(checkpoint_path_prefix)
+        self._remove_obsolete_checkpoints(trainer)
+        return prefix
+
+    def _remove_obsolete_checkpoints(self, trainer):
+        if self.max_checkpoint_num is not None and \
+                len(self.history_checkpoints) > self.max_checkpoint_num:
+            history_checkpoints = [ckpt for ckpt in self.history_checkpoints]
+            self.history_checkpoints.clear()
+            for i, checkpoint_path_prefix in enumerate(history_checkpoints):
+                if i < len(history_checkpoints) - self.max_checkpoint_num:
+                    self.logger.info(
+                        f'deleting checkpoint: {checkpoint_path_prefix}')
+                    self.processor.remove_checkpoints(
+                        trainer, checkpoint_path_prefix=checkpoint_path_prefix)
+                else:
+                    self.history_checkpoints.append(checkpoint_path_prefix)
+
+    def _should_save(self, trainer):
+        if self.save_strategy == CheckpointStrategy.by_epoch:
+            check_last = self.is_last_epoch
+            check_frequency = self.every_n_epochs
+        elif self.save_strategy == CheckpointStrategy.by_step:
+            check_last = self.is_last_iter
+            check_frequency = self.every_n_iters
+        else:
+            return False
+
+        if check_frequency(trainer,
+                           self.interval) or (self.save_last
+                                              and check_last(trainer)):
+            return True
+        return False
+
+    def _create_training_state(self, trainer):
+        self.rng_state = {
+            'random': random.getstate(),
+            'numpy': np.random.get_state(),
+            'cpu': torch.random.get_rng_state(),
+            'cuda': torch.cuda.get_rng_state_all(),
+        }
+
+        # keep epoch/iter/inner_iter/random_state
+        meta = {
+            'epoch': trainer.epoch,
+            'iter': trainer.iter + 1,
+            'inner_iter': trainer.inner_iter + 1,
+            'rng_state': self.rng_state,
+        }
+
+        # keep hooks state
+        i = 0
+        for hook in trainer.hooks:
+            if hasattr(hook, 'state_dict') and getattr(hook, '_should_save',
+                                                       True):
+                meta[f'{hook.__class__}-{i}'] = hook.state_dict()
+                i += 1
+
+        return meta
+
+
+@HOOKS.register_module(module_name=Hooks.BestCkptSaverHook)
+class BestCkptSaverHook(CheckpointHook):
+    """
+    Save best checkpoints hook.
+
+    Args:
+        metric_key (str): Metric key to compare rule for best score.
+        save_best(bool): Save the best checkpoint, if set to False, this hook will have no effect.
+        rule (str): Comparison rule for best score. Support "max" and "min". If rule is "max", the checkpoint
+            at the maximum `metric_key` will be saved, If rule is "min", the checkpoint at the minimum `metric_key`
+            will be saved.
+        save_file_name: The manual specified saving file name.
+        restore_best (bool): Whether to restore the best checkpoint after training.
+        max_checkpoint_num (int): The max number of checkpoint files, default None which means never delete anything.
+            If the number exceeding the limit, checkpoints with worse metric will be deleted, which is judged by the
+            `rule` and `metric_key` arguments.
+
+    The `BestCkptSaverHook` class accepts `output_sub_dir` and `output_dir` argument as its super class do.
+    If neither of them are passed, the default value is `{save_dir}/output_best`.
+
+    This class will not accept the `interval` or `save_strategy` or `by_epoch` argument, because the saving interval
+    will follow the `EvaluationHook`.
+    """
+
+    PRIORITY = Priority.LOW
+    rule_map = {'max': lambda x, y: x > y, 'min': lambda x, y: x < y}
+
+    def __init__(self,
+                 metric_key: str,
+                 save_best: Optional[bool] = True,
+                 rule: Optional[str] = 'max',
+                 save_file_name: Optional[str] = None,
+                 restore_best: Optional[bool] = False,
+                 max_checkpoint_num: Optional[int] = 1,
+                 **kwargs):
+        assert rule in ['max', 'min'], 'Only support "max" or "min" rule now.'
+        output_kwargs = {}
+        if 'output_sub_dir' not in kwargs and 'output_dir' not in kwargs:
+            output_kwargs['output_sub_dir'] = ModelFile.TRAIN_BEST_OUTPUT_DIR
+        kwargs.pop('interval', None)
+        kwargs.pop('save_strategy', None)
+        super().__init__(
+            max_checkpoint_num=max_checkpoint_num,
+            **kwargs,
+            **output_kwargs,
+        )
+        self.save_best = save_best
+        self.metric_key = metric_key
+        self.rule = rule
+        self._best_metric = None
+        self._best_ckpt_file = None
+        self.save_file_name = save_file_name
+        self.restore_best = restore_best
+        self.history_checkpoints = set()
+
+    def after_train_epoch(self, trainer):
+        from modelscope.trainers.hooks import EvaluationHook
+        eval_hook = trainer.get_hook(EvaluationHook)
+        if len(eval_hook) == 0:
+            self.logger.error(
+                'Trying to save the best checkpoint, but there is no evaluation, skipping.'
+            )
+
+        if eval_hook[0].last_eval_tag == (
+                'epoch', trainer.epoch) and self._should_save(trainer):
+            self._do_save(trainer, 'by_epoch')
+
+    def after_train_iter(self, trainer):
+        from modelscope.trainers.hooks import EvaluationHook
+        eval_hook = trainer.get_hook(EvaluationHook)
+        if len(eval_hook) == 0:
+            self.logger.error(
+                'Trying to save the best checkpoint, but there is no evaluation, skipping.'
+            )
+
+        if eval_hook[0].last_eval_tag == (
+                'iter', trainer.iter) and self._should_save(trainer):
+            self._do_save(trainer, 'by_step')
+
+    def _should_save(self, trainer):
+        return self.save_best and self._is_best_metric(trainer.metric_values)
+
+    def _is_best_metric(self, metric_values):
+        if metric_values is None:
+            return False
+
+        if self.metric_key not in metric_values:
+            raise ValueError(
+                f'Not find metric_key: {self.metric_key} in {metric_values}')
+
+        if self._best_metric is None:
+            self._best_metric = metric_values[self.metric_key]
+            return True
+        else:
+            compare_fn = self.rule_map[self.rule]
+            if compare_fn(metric_values[self.metric_key], self._best_metric):
+                self._best_metric = metric_values[self.metric_key]
+                return True
+        return False
+
+    def generate_prefix(self, trainer, save_strategy):
+        if save_strategy == CheckpointStrategy.by_epoch:
+            return f'best_{LogKeys.EPOCH}{trainer.epoch + 1}_{self.metric_key}{self._best_metric}'
+        else:
+            return f'best_{LogKeys.ITER}{trainer.iter + 1}_{self.metric_key}{self._best_metric}'
+
+    def _save_checkpoint(self, trainer, prefix):
+        checkpoint_path_prefix = self.save_file_name
+        if checkpoint_path_prefix is None:
+            checkpoint_path_prefix = os.path.join(self.save_dir, prefix)
+        else:
+            checkpoint_path_prefix = os.path.join(self.save_dir,
+                                                  checkpoint_path_prefix)
+
+        self._best_ckpt_file = checkpoint_path_prefix
+        meta = self._create_training_state(trainer)
+        self.processor.save_checkpoints(trainer, checkpoint_path_prefix,
+                                        self.output_dir, meta)
+        self.save_evaluate_results(trainer)
+        self.history_checkpoints.add(checkpoint_path_prefix)
+        self._remove_obsolete_checkpoints(trainer)
+        return prefix
+
+    def _remove_obsolete_checkpoints(self, trainer):
+
+        def extract_metric_from_filename(name1):
+            metric1 = float(name1.split(self.metric_key)[1])
+            if self.rule == 'max':
+                return -metric1
+            else:
+                return metric1
+
+        if self.max_checkpoint_num is not None and \
+                len(self.history_checkpoints) > self.max_checkpoint_num:
+            history_checkpoints = sorted(
+                self.history_checkpoints, key=extract_metric_from_filename)
+            self.history_checkpoints.clear()
+            for i, checkpoint_path_prefix in enumerate(history_checkpoints):
+                if i < self.max_checkpoint_num:
+                    self.history_checkpoints.add(checkpoint_path_prefix)
+                else:
+                    self.logger.info(
+                        f'deleting checkpoint: {checkpoint_path_prefix}')
+                    self.processor.remove_checkpoints(
+                        trainer, checkpoint_path_prefix=checkpoint_path_prefix)
+
+    def state_dict(self):
+        return {
+            'best_metric': self._best_metric,
+        }
+
+    def load_state_dict(self, state_dict):
+        if state_dict is not None and len(state_dict) > 0:
+            self._best_metric = state_dict.get('best_metric')
+        else:
+            self.logger.warning(
+                'The state_dict is not available, the best metric value will be affected.'
+            )
+
+    def after_run(self, trainer):
+        if self.restore_best:
+            # If restore_best is True, will call the LoadCheckpointHook to load the best checkpoint
+            # for later evaluation or prediction.
+            from modelscope.trainers.hooks.checkpoint.load_checkpoint_hook import LoadCheckpointHook
+            LoadCheckpointHook.load_checkpoint(self._best_ckpt_file, trainer)
diff --git a/modelscope/trainers/hooks/checkpoint/checkpoint_processor.py b/modelscope/trainers/hooks/checkpoint/checkpoint_processor.py
new file mode 100644
index 00000000..f28fc397
--- /dev/null
+++ b/modelscope/trainers/hooks/checkpoint/checkpoint_processor.py
@@ -0,0 +1,276 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import re
+import shutil
+
+from modelscope.metainfo import Pipelines
+from modelscope.utils.checkpoint import (load_checkpoint, save_checkpoint,
+                                         save_configuration)
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.logger import get_logger
+from modelscope.utils.torch_utils import is_master
+
+
+class CheckpointProcessor:
+
+    TRAINER_STATE_SUFFIX = '_trainer_state.pth'
+
+    MODEL_STATE_SUFFIX = '.pth'
+
+    def prepare_output(self, trainer, output_dir):
+        """Prepares the output of target folder.
+
+        This is a strategic function which can be registered by other hook's function.
+
+        Args:
+            trainer: The trainer instance.
+            output_dir: The target folder used in inference.
+        """
+        model = trainer.unwrap_module(trainer.model)
+        config = trainer.cfg
+
+        # override pipeline by tasks name after finetune done,
+        # avoid case like fill mask pipeline with a text cls task
+        if config['task'] in [
+                getattr(Pipelines, attr) for attr in dir(Pipelines)
+                if not attr.startswith('__')
+        ]:
+            # TODO a temp fix to avoid pipeline_name and task mismatch
+            config['pipeline'] = {'type': config['task']}
+
+        self.copy_files_and_dump_config(trainer, output_dir, config,
+                                        self._bin_file(model))
+
+    @staticmethod
+    def copy_files_and_dump_config(trainer, output_dir, config, bin_file):
+        """Copy useful files to target output folder and dumps the target configuration.json.
+        """
+        model = trainer.unwrap_module(trainer.model)
+
+        class SaveConfig:
+
+            def __init__(self, output_dir, config):
+                self.output_dir = output_dir
+                self.config = config
+
+            def __call__(self, _output_dir, _config):
+                self.config = _config
+
+            def save_config(self):
+                save_configuration(self.output_dir, self.config)
+
+        for pop_key in [
+                'push_to_hub', 'hub_repo_id', 'hub_token', 'private_hub'
+        ]:
+            if config.safe_get('train.checkpoint.period.'
+                               + pop_key) is not None:
+                config.safe_get('train.checkpoint.period').pop(pop_key)
+            if config.safe_get('train.checkpoint.best.' + pop_key) is not None:
+                config.safe_get('train.checkpoint.best').pop(pop_key)
+
+        save_config_fn = SaveConfig(output_dir, config)
+
+        if hasattr(model, 'save_pretrained'):
+            # Save pretrained of model, skip saving checkpoint
+            model.save_pretrained(
+                output_dir,
+                bin_file,
+                save_function=lambda *args, **kwargs: None,
+                config=save_config_fn.config,
+                save_config_function=save_config_fn)
+
+        if trainer.train_preprocessor is not None:
+            trainer.train_preprocessor.save_pretrained(
+                output_dir,
+                save_config_fn.config,
+                save_config_function=save_config_fn)
+        if trainer.eval_preprocessor is not None:
+            trainer.eval_preprocessor.save_pretrained(
+                output_dir,
+                save_config_fn.config,
+                save_config_function=save_config_fn)
+        save_config_fn.save_config()
+
+    @staticmethod
+    def _bin_file(model):
+        """Get bin file path.
+        """
+        default_bin_file = ModelFile.TORCH_MODEL_BIN_FILE
+        if hasattr(model,
+                   'model_dir') and ModelFile.TORCH_MODEL_FILE in os.listdir(
+                       model.model_dir):
+            default_bin_file = ModelFile.TORCH_MODEL_FILE
+        return default_bin_file
+
+    def save_checkpoints(self,
+                         trainer,
+                         checkpoint_path_prefix,
+                         output_dir,
+                         meta=None):
+        """Save the state dict for trainer and model.
+
+        This is a strategic function which can be registered by other hook's function.
+
+        Args:
+            trainer(`EpochBasedTrainer`): The trainer instance.
+            checkpoint_path_prefix(`str`): The saving dir with a prefix.
+                like: /tmp/test/epoch_0
+            output_dir(`str`): The output dir for inference.
+            meta: (`dict`): The meta info needed to be saved into files.
+        """
+        model = trainer.unwrap_module(trainer.model)
+        _model_file, _train_state_file = self._get_state_file_name(
+            checkpoint_path_prefix)
+
+        # Save pth file without model state_dict
+        self.save_trainer_state(trainer, model, _train_state_file, meta)
+        self.save_model_state(model, _model_file)
+        self.link(model, _model_file, output_dir)
+
+    def remove_checkpoints(self, trainer, checkpoint_path_prefix):
+        """Remove obsolete checkpoint files.
+
+        This is a strategic function which can be registered by other hook's function.
+
+        Args:
+            trainer(`EpochBasedTrainer`): The trainer instance.
+            checkpoint_path_prefix(`str`): The saving dir with a prefix.
+                like: /tmp/test/epoch_0
+        """
+        _model_file, _train_state_file = self._get_state_file_name(
+            checkpoint_path_prefix)
+        if os.path.isfile(_train_state_file):
+            os.remove(_train_state_file)
+
+        if os.path.isfile(_model_file):
+            os.remove(_model_file)
+
+    def should_save_on_rank(self, trainer):
+        """Used in ddp or other distributed training scenario, returns whether do saving in current rank.
+
+        This is a strategic function which can be registered by other hook's function.
+
+        Args:
+            trainer(`EpochBasedTrainer`): The trainer instance.
+        """
+        return is_master()
+
+    def link(self, model, src_file, output_dir):
+        """Links the src bin file to the output folder.
+
+        Args:
+            model: The model instance.
+            src_file: The src bin file path.
+            output_dir: The target folder used in inference.
+        """
+
+        bin_file = self._bin_file(model)
+        dest_file = os.path.join(output_dir, bin_file)
+        if os.path.isfile(dest_file):
+            os.unlink(dest_file)
+
+        try:
+            os.link(src_file, dest_file)
+        except OSError as e:
+            get_logger().error(
+                f'Link {src_file} to {dest_file} error: {e}, '
+                'changing to copy the bin file, this may use more disk space.')
+            shutil.copyfile(src_file, dest_file)
+
+    def save_trainer_state(self, trainer, model, train_state_file, meta):
+        """Save the trainer state, including optimizer/lr_scheduler's state dict, random states etc.
+
+        Args:
+            trainer: The trainer instance.
+            model: The model instance.
+            train_state_file: The target file name for saving trainer states.
+            meta: Some extra meta info.
+        """
+        save_checkpoint(
+            model,
+            train_state_file,
+            trainer.optimizer,
+            trainer.lr_scheduler,
+            meta=meta,
+            with_model=False)
+
+    def save_model_state(self, model, model_file):
+        """Save the model state.
+
+        Args:
+            model: The model instance.
+            model_file: The target file name for saving model states.
+        """
+        save_checkpoint(
+            model, model_file, None, None, meta=None, with_meta=False)
+
+    def load_checkpoints(self, checkpoint_path_prefix, trainer, load_all_state,
+                         strict):
+        """Load checkpoint files of trainer state and model state.
+
+        This is a strategic function which can be registered by other hook's function.
+
+        Args:
+            checkpoint_path_prefix(str): The checkpoint dir with prefix or a model state file.
+                Example: '/tmp/test/epoch_0' or '/tmp/test/epoch_0.pth'
+            trainer(`EpochBasedTrainer`): The trainer instance.
+            load_all_state(`boolean`): Load all states (else load only module states).
+            strict(`boolean`): If strict, any unmatched keys will cause an error.
+
+        Returns:
+            The meta info in json.
+        """
+        _model_file, _train_state_file = self._get_state_file_name(
+            checkpoint_path_prefix)
+        meta = {}
+        if os.path.isfile(_train_state_file):
+            meta = self.load_trainer_state(trainer, _train_state_file,
+                                           load_all_state)
+        else:
+            print(f'No trainer state file {_train_state_file} found, skip.')
+        self.load_model_state(trainer, _model_file, strict)
+        return meta
+
+    @staticmethod
+    def load_trainer_state(trainer, train_state_file, load_all_state):
+        """Load trainer state file.
+        """
+
+        optimizer = getattr(trainer, 'optimizer',
+                            None) if load_all_state else None
+        lr_scheduler = getattr(trainer, 'lr_scheduler',
+                               None) if load_all_state else None
+        return load_checkpoint(train_state_file, None, optimizer, lr_scheduler)
+
+    def load_model_state(self, trainer, model_file, strict):
+        """Load model state file.
+        """
+        return load_checkpoint(model_file,
+                               trainer.unwrap_module(trainer.model), None,
+                               None)
+
+    @staticmethod
+    def _get_state_file_name(checkpoint_path_prefix):
+        """Get the default file name for state files.
+
+        If the input is a checkpoint dir with prefix, this function will append suffix for both checkpoint files.
+        If the input is an absolute file name, this function will return it as the model file name, and append
+            suffix for the trainer file name.
+
+        NOTE: a best checkpoint filename with float or int metric value inside
+            will not be judged as having a extension file name. like: '/tmp/test/epoch_0_accuracy0.85'
+
+        Args:
+            checkpoint_path_prefix(`str`): The checkpoint dir with prefix or a model state file
+            with extension file name. like: '/tmp/test/epoch_0'
+
+        Returns:
+              A tuple of model state file name and trainer state file name.
+        """
+        base, ext = os.path.splitext(checkpoint_path_prefix)
+        if len(ext) == 0 or re.match(r'^\d+$', ext[1:]):
+            return checkpoint_path_prefix + CheckpointProcessor.MODEL_STATE_SUFFIX, \
+                   checkpoint_path_prefix + CheckpointProcessor.TRAINER_STATE_SUFFIX # noqa
+        else:
+            return checkpoint_path_prefix, base + CheckpointProcessor.TRAINER_STATE_SUFFIX.split(
+                '.')[0] + '.' + ext[1:]
diff --git a/modelscope/trainers/hooks/checkpoint/load_checkpoint_hook.py b/modelscope/trainers/hooks/checkpoint/load_checkpoint_hook.py
new file mode 100644
index 00000000..3ccb800f
--- /dev/null
+++ b/modelscope/trainers/hooks/checkpoint/load_checkpoint_hook.py
@@ -0,0 +1,138 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import random
+from typing import Optional
+
+import numpy as np
+import torch
+from packaging import version
+
+from modelscope.metainfo import Hooks
+from modelscope.trainers.hooks.builder import HOOKS
+from modelscope.trainers.hooks.checkpoint.checkpoint_processor import \
+    CheckpointProcessor
+from modelscope.trainers.hooks.hook import Hook
+from modelscope.trainers.hooks.priority import Priority
+from modelscope.utils.logger import get_logger
+
+
+@HOOKS.register_module(module_name=Hooks.LoadCheckpointHook)
+class LoadCheckpointHook(Hook):
+    """Load a checkpoint file at the beginning of training or evaluating.
+
+    This hook does not need to be configured or saved in the config file.
+    User should use it by:
+    >>> trainer.train('some-checkpoint', load_all_state=True)
+    or
+    >>> trainer.evaluate('some-checkpoint')
+    instead.
+
+    Args:
+        checkpoint_file (str): The checkpoint file to be loaded.
+        load_all_state (bool): Load all states(optimizer, epoch, lr_scheduler, random_state, etc.) when loading old
+            training state file or not. The model's state dict will only be loaded if False.
+        strict (bool): If strict, any unmatched keys will cause an error.
+    """
+
+    PRIORITY = Priority.HIGH
+
+    _should_save = False
+
+    # From 1.3.1 version we split one pth file to two files: trainer state pth file/model state pth file.
+    _TWO_PTH_FILE_VERSION = '1.3.1'
+
+    def __init__(
+        self,
+        checkpoint_file: Optional[str] = None,
+        load_all_state: Optional[bool] = True,
+        strict: Optional[bool] = False,
+    ):
+        self.checkpoint_file = checkpoint_file
+        self.rng_state = None
+        self.need_load_rng_state = False
+        self.load_all_state = load_all_state
+        self.strict = strict
+        self.processor = CheckpointProcessor()
+
+    def before_run(self, trainer):
+        if not hasattr(trainer, 'logger'):
+            self.logger = get_logger()
+        else:
+            self.logger = trainer.logger
+
+        if self.checkpoint_file is not None:
+            meta = self.load_checkpoint(self.checkpoint_file, trainer,
+                                        self.load_all_state, self.strict)
+            self.rng_state = meta.get('rng_state')
+            self.need_load_rng_state = self.load_all_state
+
+    def before_train_iter(self, trainer):
+        if self.need_load_rng_state:
+            if self.rng_state is not None:
+                random.setstate(self.rng_state['random'])
+                np.random.set_state(self.rng_state['numpy'])
+                torch.random.set_rng_state(self.rng_state['cpu'])
+                if torch.cuda.is_available():
+                    torch.cuda.random.set_rng_state_all(self.rng_state['cuda'])
+                self.need_load_rng_state = False
+            else:
+                self.logger.info(
+                    'Random state cannot be found in checkpoint file, '
+                    'this may cause a random data order or model initialization.'
+                )
+
+    @staticmethod
+    def _restore_training_state(trainer, meta):
+        trainer._epoch = meta.get('epoch', trainer._epoch)
+        trainer._iter = meta.get('iter', trainer._iter)
+        trainer._inner_iter = meta.get('inner_iter', trainer._inner_iter)
+
+        i = 0
+        for hook in trainer.hooks:
+            if hasattr(hook, 'load_state_dict') and getattr(
+                    hook, '_should_save', True):
+                key = f'{hook.__class__}-{i}'
+                if key in meta:
+                    hook.load_state_dict(meta.get(key, {}))
+                else:
+                    trainer.logger.warning(
+                        f'The state_dict of hook {hook.__class__} at index {i} is not found in the checkpoint file.'
+                    )
+                i += 1
+
+    @classmethod
+    def load_checkpoint(cls,
+                        filename,
+                        trainer,
+                        load_all_state=True,
+                        strict=False):
+        """A static method to load checkpoint files.
+
+        Args:
+            filename(str): An absolute model bin file(pth or bin) or a dir path with a file prefix(like epoch_1).
+            trainer(`EpochBasedTrainer`): The trainer instance.
+            load_all_state(`bool`): Load all states including the trainer states.
+            strict(`bool`): Load module state dict strictly.
+
+        Returns:
+            A dict containing the train states saved by `_create_training_state`
+        """
+        meta = cls().processor.load_checkpoints(filename, trainer,
+                                                load_all_state, strict)
+        if load_all_state:
+            cls._restore_training_state(trainer, meta)
+
+        if meta is not None:
+            _version = meta.get('modelscope')
+            if _version is not None and version.parse(
+                    _version) < version.parse(
+                        LoadCheckpointHook._TWO_PTH_FILE_VERSION):
+                trainer.logger.warning(
+                    'The unique pth file is split into a model file and '
+                    f'a trainer file since version {LoadCheckpointHook._TWO_PTH_FILE_VERSION},'
+                    'consider re-training your model or '
+                    'using a converting script to split the single pth file into two.'
+                )
+            trainer.logger.info(
+                f'Checkpoint {filename} saving time: {meta.get("time")}, modelscope version: {_version}'
+            )
+        return meta
diff --git a/modelscope/trainers/hooks/checkpoint_hook.py b/modelscope/trainers/hooks/checkpoint_hook.py
deleted file mode 100644
index 59832105..00000000
--- a/modelscope/trainers/hooks/checkpoint_hook.py
+++ /dev/null
@@ -1,749 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import os
-import random
-import re
-import time
-
-import numpy as np
-import torch
-from packaging import version
-
-from modelscope.hub.check_model import check_model_is_id
-from modelscope.hub.push_to_hub import push_to_hub_async
-from modelscope.metainfo import Hooks, Pipelines
-from modelscope.utils.checkpoint import (load_checkpoint, save_checkpoint,
-                                         save_configuration)
-from modelscope.utils.constant import LogKeys, ModelFile
-from modelscope.utils.logger import get_logger
-from modelscope.utils.torch_utils import is_master
-from .builder import HOOKS
-from .hook import Hook
-from .priority import Priority
-
-
-@HOOKS.register_module(module_name=Hooks.CheckpointHook)
-class CheckpointHook(Hook):
-    """Save checkpoints periodically.
-
-    Args:
-        interval (int): The frequency to save model. If `by_epoch=True`,
-            it means the number of epochs, else means the number of iterations
-        by_epoch (bool): Saving checkpoints by epoch or by iteration.
-        save_optimizer (bool): Whether to save optimizer state dict.  Default: True.
-        save_dir (str): The directory to save checkpoints. If is None, use `trainer.work_dir`
-        output_sub_dir (str): The sub folder under the `save_dir` to save the output checkpoint for inference.
-            Default 'output'.
-        save_last (bool): Whether to save the last checkpoint. Default: True.
-        max_checkpoint_num (int): The max number of checkpoint files, default None which means never delete anything.
-            If the number exceeding the limit, earlier checkpoints will be deleted first.
-    """
-
-    PRIORITY = Priority.LOW
-
-    TRAINER_STATE_SUFFIX = '_trainer_state.pth'
-
-    MODEL_STATE_SUFFIX = '.pth'
-
-    def __init__(self,
-                 interval=0,
-                 by_epoch=True,
-                 save_optimizer=True,
-                 save_dir=None,
-                 output_sub_dir=ModelFile.TRAIN_OUTPUT_DIR,
-                 save_last=True,
-                 max_checkpoint_num=None,
-                 push_to_hub=False,
-                 model_id_with_org=None,
-                 hub_token=None,
-                 private_hub=True,
-                 **kwargs):
-        self.interval = interval
-        self.by_epoch = by_epoch
-        self.save_optimizer = save_optimizer
-        self.save_dir = save_dir
-        self.output_sub_dir = output_sub_dir
-        self.save_last = save_last
-        self.rng_state = None
-        self.max_checkpoint_num = None
-        self.push_to_hub = push_to_hub
-        self.model_id_with_org = model_id_with_org
-        self.hub_token = hub_token
-        self.private_hub = private_hub
-        self.is_model_id = None
-        self.push_to_hub_future = None
-        if max_checkpoint_num is not None:
-            self.max_checkpoint_num = max(int(max_checkpoint_num), 1)
-        self.history_checkpoints = []
-
-    def before_run(self, trainer):
-        if not self.save_dir:
-            self.save_dir = trainer.work_dir
-
-        if not os.path.exists(self.save_dir):
-            os.makedirs(self.save_dir, exist_ok=True)
-
-        if not hasattr(trainer, 'logger'):
-            self.logger = get_logger()
-        else:
-            self.logger = trainer.logger
-
-        if is_master():
-            output_dir = os.path.join(self.save_dir, self.output_sub_dir)
-            # only global master prepares the output folder
-            self.prepare_output(trainer, output_dir)
-            self.logger.info(f'Checkpoints will be saved to {self.save_dir}')
-
-    def generate_prefix(self, trainer):
-        if self.by_epoch:
-            return f'{LogKeys.EPOCH}_{trainer.epoch + 1}'
-        else:
-            return f'{LogKeys.ITER}_{trainer.iter + 1}'
-
-    def after_train_epoch(self, trainer):
-        if not self.by_epoch:
-            return
-
-        if self._should_save(trainer):
-            # prefix like 'epoch-1' or 'iter-1'
-            prefix = self.generate_prefix(trainer)
-            if self.should_save_on_rank(trainer):
-                if is_master():
-                    self.logger.info(
-                        f'Saving checkpoint at {trainer.epoch + 1} epoch')
-                self._save_checkpoint(trainer, prefix)
-            if is_master() and self.push_to_hub:
-                if self.push_to_hub_future is not None and not self.push_to_hub_future.done(
-                ):
-                    self.logger.error(
-                        f'Another uploading is running, '
-                        f'this uploading with message {prefix} will be canceled.'
-                    )
-                    return
-                self.push_to_hub_future = self._push_to_hub(trainer, prefix)
-
-    def after_train_iter(self, trainer):
-        if self.by_epoch:
-            return
-
-        if self._should_save(trainer):
-            # prefix like 'epoch-1' or 'iter-1'
-            prefix = self.generate_prefix(trainer)
-            if self.should_save_on_rank(trainer):
-                if is_master():
-                    self.logger.info(
-                        f'Saving checkpoint at {trainer.iter + 1} iter')
-                self._save_checkpoint(trainer, prefix)
-            if is_master() and self.push_to_hub:
-                if self.push_to_hub_future is not None and not self.push_to_hub_future.done(
-                ):
-                    self.logger.error(
-                        f'Another uploading is running, '
-                        f'this uploading with message {prefix} will be canceled.'
-                    )
-                    return
-                self.push_to_hub_future = self._push_to_hub(trainer, prefix)
-
-    def after_run(self, trainer):
-        if self.push_to_hub_future is not None and not self.push_to_hub_future.done(
-        ):
-            self.logger.info('Train finished. Uploading models, waiting...')
-            while not self.push_to_hub_future.done():
-                time.sleep(1)
-            self.logger.info('Uploading models done.')
-
-    def _push_to_hub(self, trainer, prefix):
-        if self.is_model_id is None:
-            self.is_model_id = check_model_is_id(trainer.input_model_id,
-                                                 self.hub_token)
-
-        return push_to_hub_async(
-            self.model_id_with_org,
-            os.path.join(self.save_dir, self.output_sub_dir),
-            token=self.hub_token,
-            private=self.private_hub,
-            commit_message=prefix,
-            source_repo=trainer.input_model_id if self.is_model_id else '')
-
-    def _save_checkpoint(self, trainer, prefix):
-        """Save checkpoint files and remove obsolete ones
-        """
-        checkpoint_path_prefix = os.path.join(self.save_dir, prefix)
-        meta = self._create_training_state(trainer)
-        self.save_checkpoints(trainer, checkpoint_path_prefix,
-                              self.output_sub_dir, meta)
-        self.history_checkpoints.append(checkpoint_path_prefix)
-        self._remove_obsolete_checkpoints(trainer)
-        return prefix
-
-    def _remove_obsolete_checkpoints(self, trainer):
-        if self.max_checkpoint_num is not None and \
-                len(self.history_checkpoints) > self.max_checkpoint_num:
-            history_checkpoints = [ckpt for ckpt in self.history_checkpoints]
-            self.history_checkpoints.clear()
-            for i, checkpoint_path_prefix in enumerate(history_checkpoints):
-                if i < len(history_checkpoints) - self.max_checkpoint_num:
-                    self.logger.info(
-                        f'deleting checkpoint: {checkpoint_path_prefix}')
-                    self.remove_checkpoints(
-                        trainer, checkpoint_path_prefix=checkpoint_path_prefix)
-                else:
-                    self.history_checkpoints.append(checkpoint_path_prefix)
-
-    def _should_save(self, trainer):
-        if self.by_epoch:
-            check_last = self.is_last_epoch
-            check_frequency = self.every_n_epochs
-        else:
-            check_last = self.is_last_iter
-            check_frequency = self.every_n_iters
-
-        if check_frequency(trainer,
-                           self.interval) or (self.save_last
-                                              and check_last(trainer)):
-            return True
-        return False
-
-    def _create_training_state(self, trainer):
-        self.rng_state = {
-            'random': random.getstate(),
-            'numpy': np.random.get_state(),
-            'cpu': torch.random.get_rng_state(),
-            'cuda': torch.cuda.get_rng_state_all(),
-        }
-
-        # keep epoch/iter/inner_iter/random_state
-        meta = {
-            'epoch': trainer.epoch,
-            'iter': trainer.iter + 1,
-            'inner_iter': trainer.inner_iter + 1,
-            'rng_state': self.rng_state,
-        }
-
-        # keep hooks state
-        i = 0
-        for hook in trainer.hooks:
-            if hasattr(hook, 'state_dict') and getattr(hook, '_should_save',
-                                                       True):
-                meta[f'{hook.__class__}-{i}'] = hook.state_dict()
-                i += 1
-
-        return meta
-
-    @staticmethod
-    def copy_files_and_dump_config(trainer, output_dir, config, bin_file):
-        """Copy useful files to target output folder and dumps the target configuration.json.
-        """
-        model = trainer.unwrap_module(trainer.model)
-
-        class SaveConfig:
-
-            def __init__(self, output_dir, config):
-                self.output_dir = output_dir
-                self.config = config
-
-            def __call__(self, _output_dir, _config):
-                self.config = _config
-
-            def save_config(self):
-                save_configuration(self.output_dir, self.config)
-
-        for pop_key in [
-                'push_to_hub', 'model_id_with_org', 'hub_token', 'private_hub'
-        ]:
-            if config.safe_get('train.checkpoint.period.'
-                               + pop_key) is not None:
-                config.safe_get('train.checkpoint.period').pop(pop_key)
-            if config.safe_get('train.checkpoint.best.' + pop_key) is not None:
-                config.safe_get('train.checkpoint.best').pop(pop_key)
-
-        save_config_fn = SaveConfig(output_dir, config)
-
-        if hasattr(model, 'save_pretrained'):
-            # Save pretrained of model, skip saving checkpoint
-            model.save_pretrained(
-                output_dir,
-                bin_file,
-                save_function=lambda *args, **kwargs: None,
-                config=save_config_fn.config,
-                save_config_function=save_config_fn)
-
-        if trainer.train_preprocessor is not None:
-            trainer.train_preprocessor.save_pretrained(
-                output_dir,
-                save_config_fn.config,
-                save_config_function=save_config_fn)
-        if trainer.eval_preprocessor is not None:
-            trainer.eval_preprocessor.save_pretrained(
-                output_dir,
-                save_config_fn.config,
-                save_config_function=save_config_fn)
-        save_config_fn.save_config()
-
-    @staticmethod
-    def _bin_file(model):
-        """Get bin file path.
-        """
-        default_bin_file = ModelFile.TORCH_MODEL_BIN_FILE
-        if hasattr(model,
-                   'model_dir') and ModelFile.TORCH_MODEL_FILE in os.listdir(
-                       model.model_dir):
-            default_bin_file = ModelFile.TORCH_MODEL_FILE
-        return default_bin_file
-
-    @Hook.overload_func(name='CheckpointHook.prepare_output')
-    def prepare_output(self, trainer, output_dir):
-        """Prepares the output of target folder.
-
-        This is a strategic function which can be registered by other hook's function.
-
-        Args:
-            trainer: The trainer instance.
-            output_dir: The target folder used in inference.
-        """
-        model = trainer.unwrap_module(trainer.model)
-        config = trainer.cfg
-
-        # override pipeline by tasks name after finetune done,
-        # avoid case like fill mask pipeline with a text cls task
-        if config['task'] in [
-                getattr(Pipelines, attr) for attr in dir(Pipelines)
-                if not attr.startswith('__')
-        ]:
-            # TODO a temp fix to avoid pipeline_name and task mismatch
-            config['pipeline'] = {'type': config['task']}
-
-        self.copy_files_and_dump_config(trainer, output_dir, config,
-                                        self._bin_file(model))
-
-    def link(self, model, src_file, output_dir):
-        """Links the src bin file to the output folder.
-
-        Args:
-            model: The model instance.
-            src_file: The src bin file path.
-            output_dir: The target folder used in inference.
-        """
-
-        bin_file = self._bin_file(model)
-        dest_file = os.path.join(output_dir, bin_file)
-        if os.path.isfile(dest_file):
-            os.unlink(dest_file)
-
-        os.link(src_file, dest_file)
-
-    def save_trainer_state(self, trainer, model, train_state_file, meta):
-        """Save the trainer state, including optimizer/lr_scheduler's state dict, random states etc.
-
-        Args:
-            trainer: The trainer instance.
-            model: The model instance.
-            train_state_file: The target file name for saving trainer states.
-            meta: Some extra meta info.
-        """
-        save_checkpoint(
-            model,
-            train_state_file,
-            trainer.optimizer,
-            trainer.lr_scheduler,
-            meta=meta,
-            with_model=False)
-
-    def save_model_state(self, model, model_file):
-        """Save the model state.
-
-        Args:
-            model: The model instance.
-            model_file: The target file name for saving model states.
-        """
-        save_checkpoint(
-            model, model_file, None, None, meta=None, with_meta=False)
-
-    @Hook.overload_func(name='CheckpointHook.save_checkpoints')
-    def save_checkpoints(self,
-                         trainer,
-                         checkpoint_path_prefix,
-                         output_sub_dir,
-                         meta=None):
-        """Save the state dict for trainer and model.
-
-        This is a strategic function which can be registered by other hook's function.
-
-        Args:
-            trainer(`EpochBasedTrainer`): The trainer instance.
-            checkpoint_path_prefix(`str`): The saving dir with a prefix.
-                like: /tmp/test/epoch_0
-            output_sub_dir(`str`): The sub-dir in the saving dir used in inference.
-            meta: (`dict`): The meta info needed to be saved into files.
-        """
-        model = trainer.unwrap_module(trainer.model)
-        _model_file, _train_state_file = _get_state_file_name(
-            checkpoint_path_prefix)
-
-        # Save pth file without model state_dict
-        self.save_trainer_state(trainer, model, _train_state_file, meta)
-        self.save_model_state(model, _model_file)
-        output_dir = os.path.join(self.save_dir, output_sub_dir)
-        self.link(model, _model_file, output_dir)
-
-    @Hook.overload_func(name='CheckpointHook.remove_checkpoints')
-    def remove_checkpoints(self, trainer, checkpoint_path_prefix):
-        """Remove obsolete checkpoint files.
-
-        This is a strategic function which can be registered by other hook's function.
-
-        Args:
-            trainer(`EpochBasedTrainer`): The trainer instance.
-            checkpoint_path_prefix(`str`): The saving dir with a prefix.
-                like: /tmp/test/epoch_0
-        """
-        _model_file, _train_state_file = _get_state_file_name(
-            checkpoint_path_prefix)
-        if os.path.isfile(_train_state_file):
-            os.remove(_train_state_file)
-
-        if os.path.isfile(_model_file):
-            os.remove(_model_file)
-
-    @Hook.overload_func(name='CheckpointHook.should_save_on_rank')
-    def should_save_on_rank(self, trainer):
-        """Used in ddp or other distributed training scenario, returns whether do saving in current rank.
-
-        This is a strategic function which can be registered by other hook's function.
-
-        Args:
-            trainer(`EpochBasedTrainer`): The trainer instance.
-        """
-        return is_master()
-
-
-@HOOKS.register_module(module_name=Hooks.BestCkptSaverHook)
-class BestCkptSaverHook(CheckpointHook):
-    """
-    Save best checkpoints hook.
-
-    Args:
-        metric_key (str): Metric key to compare rule for best score.
-        rule (str): Comparison rule for best score. Support "max" and "min". If rule is "max", the checkpoint
-            at the maximum `metric_key` will be saved, If rule is "min", the checkpoint at the minimum `metric_key`
-            will be saved.
-        by_epoch (bool): Save best checkpoints by epoch or by iteration.
-        save_optimizer (bool): Whether to save optimizer state dict.  Default: True.
-        save_dir (str): Output directory to save best checkpoint.
-        output_sub_dir (str): The sub folder under the `save_dir` to save the output checkpoint for inference.
-            Default 'output_best'.
-        restore_best (bool): Whether to restore the best checkpoint after training.
-        max_checkpoint_num (int): The max number of checkpoint files, default None which means never delete anything.
-            If the number exceeding the limit, checkpoints with worse metric will be deleted, which is judged by the
-            `rule` and `metric_key` arguments.
-    """
-
-    PRIORITY = Priority.LOW
-    rule_map = {'max': lambda x, y: x > y, 'min': lambda x, y: x < y}
-
-    def __init__(self,
-                 metric_key,
-                 rule='max',
-                 by_epoch=True,
-                 save_optimizer=True,
-                 save_dir=None,
-                 output_sub_dir=ModelFile.TRAIN_BEST_OUTPUT_DIR,
-                 save_file_name=None,
-                 restore_best=False,
-                 max_checkpoint_num=1,
-                 interval=0,
-                 **kwargs):
-        assert rule in ['max', 'min'], 'Only support "max" or "min" rule now.'
-        super().__init__(
-            interval=interval,
-            by_epoch=by_epoch,
-            save_optimizer=save_optimizer,
-            save_dir=save_dir,
-            output_sub_dir=output_sub_dir,
-            max_checkpoint_num=max_checkpoint_num,
-            **kwargs,
-        )
-        self.metric_key = metric_key
-        self.rule = rule
-        self._best_metric = None
-        self._best_ckpt_file = None
-        self.save_file_name = save_file_name
-        self.restore_best = restore_best
-        self.history_checkpoints = set()
-
-    def _should_save(self, trainer):
-        return self._is_best_metric(trainer.metric_values)
-
-    def _is_best_metric(self, metric_values):
-        if metric_values is None:
-            return False
-
-        if self.metric_key not in metric_values:
-            raise ValueError(
-                f'Not find metric_key: {self.metric_key} in {metric_values}')
-
-        if self._best_metric is None:
-            self._best_metric = metric_values[self.metric_key]
-            return True
-        else:
-            compare_fn = self.rule_map[self.rule]
-            if compare_fn(metric_values[self.metric_key], self._best_metric):
-                self._best_metric = metric_values[self.metric_key]
-                return True
-        return False
-
-    def generate_prefix(self, trainer):
-        if self.by_epoch:
-            return f'best_{LogKeys.EPOCH}{trainer.epoch + 1}_{self.metric_key}{self._best_metric}'
-        else:
-            return f'best_{LogKeys.ITER}{trainer.iter + 1}_{self.metric_key}{self._best_metric}'
-
-    def _save_checkpoint(self, trainer, prefix):
-        checkpoint_path_prefix = self.save_file_name
-        if checkpoint_path_prefix is None:
-            checkpoint_path_prefix = os.path.join(self.save_dir, prefix)
-        else:
-            checkpoint_path_prefix = os.path.join(self.save_dir,
-                                                  checkpoint_path_prefix)
-
-        self._best_ckpt_file = checkpoint_path_prefix
-        meta = self._create_training_state(trainer)
-        self.save_checkpoints(trainer, checkpoint_path_prefix,
-                              self.output_sub_dir, meta)
-        self.history_checkpoints.add(checkpoint_path_prefix)
-        self._remove_obsolete_checkpoints(trainer)
-        return prefix
-
-    def _remove_obsolete_checkpoints(self, trainer):
-
-        def extract_metric_from_filename(name1):
-            metric1 = float(name1.split(self.metric_key)[1])
-            if self.rule == 'max':
-                return -metric1
-            else:
-                return metric1
-
-        if self.max_checkpoint_num is not None and \
-                len(self.history_checkpoints) > self.max_checkpoint_num:
-            history_checkpoints = sorted(
-                self.history_checkpoints, key=extract_metric_from_filename)
-            self.history_checkpoints.clear()
-            for i, checkpoint_path_prefix in enumerate(history_checkpoints):
-                if i < self.max_checkpoint_num:
-                    self.history_checkpoints.add(checkpoint_path_prefix)
-                else:
-                    self.logger.info(
-                        f'deleting checkpoint: {checkpoint_path_prefix}')
-                    self.remove_checkpoints(
-                        trainer, checkpoint_path_prefix=checkpoint_path_prefix)
-
-    def state_dict(self):
-        return {
-            'best_metric': self._best_metric,
-        }
-
-    def load_state_dict(self, state_dict):
-        if state_dict is not None and len(state_dict) > 0:
-            self._best_metric = state_dict.get('best_metric')
-        else:
-            self.logger.warning(
-                'The state_dict is not available, the best metric value will be affected.'
-            )
-
-    def after_run(self, trainer):
-        if self.restore_best:
-            # If restore_best is True, will call the LoadCheckpointHook to load the best checkpoint
-            # for later evaluation or prediction.
-            LoadCheckpointHook.load_checkpoint(self._best_ckpt_file, trainer)
-
-
-@HOOKS.register_module(module_name=Hooks.LoadCheckpointHook)
-class LoadCheckpointHook(Hook):
-    """Load a checkpoint file at the beginning of training or evaluating.
-
-    This hook does not need to be configured or saved in the config file.
-    User should use it by:
-    >>> trainer.train('some-checkpoint', load_all_state=True)
-    or
-    >>> trainer.evaluate('some-checkpoint')
-    instead.
-
-    Args:
-        checkpoint_file (str): The checkpoint file to be loaded.
-        load_all_state (bool): Load all states(optimizer, epoch, lr_scheduler, random_state, etc.) when loading old
-            training state file or not. The model's state dict will only be loaded if False.
-        strict (bool): If strict, any unmatched keys will cause an error.
-    """
-
-    PRIORITY = Priority.HIGH
-
-    _should_save = False
-
-    _TWO_PTH_FILE_VERSION = '1.3.1'
-
-    def __init__(
-        self,
-        checkpoint_file=None,
-        load_all_state=True,
-        strict=False,
-    ):
-        self.checkpoint_file = checkpoint_file
-        self.rng_state = None
-        self.need_load_rng_state = False
-        self.load_all_state = load_all_state
-        self.strict = strict
-
-    def before_run(self, trainer):
-        if not hasattr(trainer, 'logger'):
-            self.logger = get_logger()
-        else:
-            self.logger = trainer.logger
-
-        if self.checkpoint_file is not None:
-            meta = self.load_checkpoint(self.checkpoint_file, trainer,
-                                        self.load_all_state, self.strict)
-            self.rng_state = meta.get('rng_state')
-            self.need_load_rng_state = self.load_all_state
-
-    def before_train_iter(self, trainer):
-        if self.need_load_rng_state:
-            if self.rng_state is not None:
-                random.setstate(self.rng_state['random'])
-                np.random.set_state(self.rng_state['numpy'])
-                torch.random.set_rng_state(self.rng_state['cpu'])
-                if torch.cuda.is_available():
-                    torch.cuda.random.set_rng_state_all(self.rng_state['cuda'])
-                self.need_load_rng_state = False
-            else:
-                self.logger.info(
-                    'Random state cannot be found in checkpoint file, '
-                    'this may cause a random data order or model initialization.'
-                )
-
-    @staticmethod
-    def _restore_training_state(trainer, meta):
-        trainer._epoch = meta.get('epoch', trainer._epoch)
-        trainer._iter = meta.get('iter', trainer._iter)
-        trainer._inner_iter = meta.get('inner_iter', trainer._inner_iter)
-
-        i = 0
-        for hook in trainer.hooks:
-            if hasattr(hook, 'load_state_dict') and getattr(
-                    hook, '_should_save', True):
-                key = f'{hook.__class__}-{i}'
-                if key in meta:
-                    hook.load_state_dict(meta.get(key, {}))
-                else:
-                    trainer.logger.warning(
-                        f'The state_dict of hook {hook.__class__} at index {i} is not found in the checkpoint file.'
-                    )
-                i += 1
-
-    @classmethod
-    def load_checkpoint(cls,
-                        filename,
-                        trainer,
-                        load_all_state=True,
-                        strict=False):
-        """A static method to load checkpoint files.
-
-        Args:
-            filename(str): An absolute model bin file(pth or bin) or a dir path with a file prefix(like epoch_1).
-            trainer(`EpochBasedTrainer`): The trainer instance.
-            load_all_state(`bool`): Load all states including the trainer states.
-            strict(`bool`): Load module state dict strictly.
-
-        Returns:
-            A dict containing the train states saved by `_create_training_state`
-        """
-        meta = cls().load_checkpoints(filename, trainer, load_all_state,
-                                      strict)
-        if load_all_state:
-            cls._restore_training_state(trainer, meta)
-
-        if meta is not None:
-            _version = meta.get('modelscope')
-            if _version is not None and version.parse(
-                    _version) < version.parse(
-                        LoadCheckpointHook._TWO_PTH_FILE_VERSION):
-                trainer.logger.warning(
-                    'The unique pth file is split into a model file and '
-                    f'a trainer file since version {LoadCheckpointHook._TWO_PTH_FILE_VERSION},'
-                    'consider re-training your model or '
-                    'using a converting script to split the single pth file into two.'
-                )
-            trainer.logger.info(
-                f'Checkpoint {filename} saving time: {meta.get("time")}, modelscope version: {_version}'
-            )
-        return meta
-
-    @staticmethod
-    def load_trainer_state(trainer, train_state_file, load_all_state):
-        """Load trainer state file.
-        """
-
-        optimizer = getattr(trainer, 'optimizer',
-                            None) if load_all_state else None
-        lr_scheduler = getattr(trainer, 'lr_scheduler',
-                               None) if load_all_state else None
-        return load_checkpoint(train_state_file, None, optimizer, lr_scheduler)
-
-    def load_model_state(self, trainer, model_file, strict):
-        """Load model state file.
-        """
-        return load_checkpoint(model_file,
-                               trainer.unwrap_module(trainer.model), None,
-                               None)
-
-    @Hook.overload_func(name='LoadCheckpointHook.load_checkpoints')
-    def load_checkpoints(self, checkpoint_path_prefix, trainer, load_all_state,
-                         strict):
-        """Load checkpoint files of trainer state and model state.
-
-        This is a strategic function which can be registered by other hook's function.
-
-        Args:
-            checkpoint_path_prefix(str): The checkpoint dir with prefix or a model state file.
-                Example: '/tmp/test/epoch_0' or '/tmp/test/epoch_0.pth'
-            trainer(`EpochBasedTrainer`): The trainer instance.
-            load_all_state(`boolean`): Load all states (else load only module states).
-            strict(`boolean`): If strict, any unmatched keys will cause an error.
-
-        Returns:
-            The meta info in json.
-        """
-        _model_file, _train_state_file = _get_state_file_name(
-            checkpoint_path_prefix)
-        meta = {}
-        if os.path.isfile(_train_state_file):
-            meta = self.load_trainer_state(trainer, _train_state_file,
-                                           load_all_state)
-        else:
-            print(f'No trainer state file {_train_state_file} found, skip.')
-        self.load_model_state(trainer, _model_file, strict)
-        return meta
-
-
-def _get_state_file_name(checkpoint_path_prefix):
-    """Get the default file name for state files.
-
-    If the input is a checkpoint dir with prefix, this function will append suffix for both checkpoint files.
-    If the input is an absolute file name, this function will return it as the model file name, and append
-        suffix for the trainer file name.
-
-    NOTE: a best checkpoint filename with float or int metric value inside
-        will not be judged as having a extension file name. like: '/tmp/test/epoch_0_accuracy0.85'
-
-    Args:
-        checkpoint_path_prefix(`str`): The checkpoint dir with prefix or a model state file with extension file name.
-            like: '/tmp/test/epoch_0'
-
-    Returns:
-          A tuple of model state file name and trainer state file name.
-    """
-    base, ext = os.path.splitext(checkpoint_path_prefix)
-    if len(ext) == 0 or re.match(r'^\d+$', ext[1:]):
-        return checkpoint_path_prefix + CheckpointHook.MODEL_STATE_SUFFIX, \
-            checkpoint_path_prefix + CheckpointHook.TRAINER_STATE_SUFFIX
-    else:
-        return checkpoint_path_prefix, base + CheckpointHook.TRAINER_STATE_SUFFIX.split(
-            '.')[0] + '.' + ext[1:]
diff --git a/modelscope/trainers/hooks/compression/sparsity_hook.py b/modelscope/trainers/hooks/compression/sparsity_hook.py
index 993488d8..e71c269a 100644
--- a/modelscope/trainers/hooks/compression/sparsity_hook.py
+++ b/modelscope/trainers/hooks/compression/sparsity_hook.py
@@ -1,7 +1,6 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import os
 
-from modelscope import __version__
 from modelscope.metainfo import Hooks
 from modelscope.trainers.hooks.builder import HOOKS
 from modelscope.trainers.hooks.hook import Hook
diff --git a/tests/pipelines/easycv_pipelines/__init__.py b/modelscope/trainers/hooks/distributed/__init__.py
similarity index 100%
rename from tests/pipelines/easycv_pipelines/__init__.py
rename to modelscope/trainers/hooks/distributed/__init__.py
diff --git a/modelscope/trainers/hooks/ddp_hook.py b/modelscope/trainers/hooks/distributed/ddp_hook.py
similarity index 89%
rename from modelscope/trainers/hooks/ddp_hook.py
rename to modelscope/trainers/hooks/distributed/ddp_hook.py
index eaae2d89..2bdbe939 100644
--- a/modelscope/trainers/hooks/ddp_hook.py
+++ b/modelscope/trainers/hooks/distributed/ddp_hook.py
@@ -1,11 +1,11 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from modelscope.metainfo import Hooks
+from modelscope.trainers.hooks.builder import HOOKS
+from modelscope.trainers.hooks.hook import Hook
+from modelscope.trainers.hooks.priority import Priority
 from modelscope.utils.constant import DistributedParallelType
 from modelscope.utils.device import create_device
 from modelscope.utils.torch_utils import get_local_rank, init_dist
-from .builder import HOOKS
-from .hook import Hook
-from .priority import Priority
 
 
 @HOOKS.register_module(module_name=Hooks.DDPHook)
diff --git a/modelscope/trainers/hooks/deepspeed_hook.py b/modelscope/trainers/hooks/distributed/deepspeed_hook.py
similarity index 64%
rename from modelscope/trainers/hooks/deepspeed_hook.py
rename to modelscope/trainers/hooks/distributed/deepspeed_hook.py
index a34b3f6f..7dddc5d9 100644
--- a/modelscope/trainers/hooks/deepspeed_hook.py
+++ b/modelscope/trainers/hooks/distributed/deepspeed_hook.py
@@ -8,72 +8,48 @@ from deepspeed import DeepSpeedEngine
 from megatron_util import mpu, print_rank_0
 
 from modelscope.metainfo import Hooks
+from modelscope.trainers.hooks import LoadCheckpointHook
 from modelscope.trainers.hooks.builder import HOOKS
+from modelscope.trainers.hooks.checkpoint.checkpoint_hook import (
+    BestCkptSaverHook, CheckpointHook)
 from modelscope.trainers.hooks.hook import Hook
 from modelscope.trainers.hooks.priority import Priority
 from modelscope.utils.checkpoint import save_checkpoint
 from modelscope.utils.logger import get_logger
-from .checkpoint_hook import CheckpointHook, LoadCheckpointHook
-from .megatron_hook import MegatronHook
+from ..checkpoint.checkpoint_processor import CheckpointProcessor
+from ..lr_scheduler_hook import LrSchedulerProcessor
+from ..optimizer.base import OptimizerHook, OptimizerProcessor
 
 
-@HOOKS.register_module(module_name=Hooks.DeepspeedHook)
-class DeepspeedHook(MegatronHook):
-    PRIORITY = Priority.VERY_HIGH
+class DeepspeedProcessor(CheckpointProcessor, LrSchedulerProcessor,
+                         OptimizerProcessor):
 
-    def __init__(self,
-                 deepspeed_activation_checkpointing=True,
-                 save_zero_checkpoint=False,
-                 with_mpu=True):
-        self.save_zero_checkpoint = save_zero_checkpoint
-        self.deepspeed_activation_checkpointing = deepspeed_activation_checkpointing
-        # TODO without mpu
-        self.with_mpu = with_mpu
-        assert with_mpu, 'DeepspeedHook now is only for mpu models.'
+    _BIN_FILE_DIR = 'model'
 
-    def register_strategy(self):
-        Hook.overload(name='OptimizerHook.backward', function=self.backward)
-        Hook.overload(
-            name='OptimizerHook.initialize_optimizer', function=self.idle)
-        Hook.overload(name='LrSchedulerHook.step', function=self.idle)
-        Hook.overload(
-            name='CheckpointHook.save_checkpoints',
-            function=self.save_checkpoints)
-        Hook.overload(
-            name='LoadCheckpointHook.load_checkpoints',
-            function=self.load_checkpoints)
-        Hook.overload(
-            name='CheckpointHook.remove_checkpoints',
-            function=self.remove_checkpoints)
-        Hook.overload(
-            name='CheckpointHook.prepare_output', function=self.prepare_output)
-        if self.with_mpu:
-            Hook.overload(
-                name='CheckpointHook.should_save_on_rank',
-                function=self.should_save_on_rank)
+    def rank_name(self):
+        # TODO
+        try:
+            tp_world_size = mpu.get_tensor_model_parallel_world_size()
+            if tp_world_size == 1:
+                return ''
+            mp_rank = mpu.get_tensor_model_parallel_rank()
+            return '_mp_rank_{:02d}'.format(mp_rank)
+        except (ImportError, AssertionError):
+            return ''
 
-    def backward(self, trainer, loss_keys, cumulative_iters, grad_clip):
-        # assert cumulative_iters == 1, 'DeepSpeed only support cumulative_iters=1'
-        # The `trainer.model` here is actually a deepspeed engine object.
-        # backward step
-        for k in loss_keys:
-            loss = trainer.train_outputs[k]
-            trainer.model.backward(loss)
-
-        # update parameters
-        trainer.model.step()
-
-    def idle(self, *args, **kwargs):
-        pass
+    def get_bin_file(self):
+        mp_rank = mpu.get_tensor_model_parallel_rank()
+        rank = '{:02d}'.format(mp_rank)
+        return f'mp_rank_{rank}_model_states.pt'
 
     def save_checkpoints(self,
                          trainer,
                          checkpoint_path_prefix,
-                         output_sub_dir,
+                         output_dir,
                          meta=None):
         model = trainer.unwrap_module(trainer.model)
         _train_state_file = checkpoint_path_prefix + self.rank_name(
-        ) + CheckpointHook.TRAINER_STATE_SUFFIX
+        ) + CheckpointProcessor.TRAINER_STATE_SUFFIX
         # Save pth file without model state_dict
         save_checkpoint(
             model, _train_state_file, None, None, meta=meta, with_model=False)
@@ -84,16 +60,22 @@ class DeepspeedHook(MegatronHook):
 
         bin_file = self.get_bin_file()
         src_file = os.path.join(checkpoint_path_prefix, bin_file)
-        dest_file = os.path.join(save_dir, output_sub_dir, self._BIN_FILE_DIR,
-                                 bin_file)
+        dest_file = os.path.join(output_dir, self._BIN_FILE_DIR, bin_file)
         if os.path.isfile(dest_file):
             os.unlink(dest_file)
 
-        os.link(src_file, dest_file)
+        try:
+            os.link(src_file, dest_file)
+        except OSError as e:
+            get_logger().error(
+                f'Link {src_file} to {dest_file} error: {e}, '
+                'changing to copy the bin file, this may case more space usage.'
+            )
+            shutil.copyfile(src_file, dest_file)
 
     def remove_checkpoints(self, trainer, checkpoint_path_prefix):
         _train_state_file = checkpoint_path_prefix + self.rank_name(
-        ) + CheckpointHook.TRAINER_STATE_SUFFIX
+        ) + CheckpointProcessor.TRAINER_STATE_SUFFIX
         if os.path.isfile(_train_state_file):
             os.remove(_train_state_file)
 
@@ -107,10 +89,10 @@ class DeepspeedHook(MegatronHook):
 
         meta = {}
         _train_state_file = checkpoint_path_prefix + self.rank_name(
-        ) + CheckpointHook.TRAINER_STATE_SUFFIX
+        ) + CheckpointProcessor.TRAINER_STATE_SUFFIX
         if os.path.isfile(_train_state_file):
-            meta = LoadCheckpointHook.load_trainer_state(
-                trainer, _train_state_file, load_all_state)
+            meta = self.load_trainer_state(trainer, _train_state_file,
+                                           load_all_state)
 
         if isinstance(trainer.model, DeepSpeedEngine):
             # DeepSpeedEngine is initialized
@@ -138,6 +120,57 @@ class DeepspeedHook(MegatronHook):
                 checkpoint, strict=strict)
         return meta
 
+    def backward(self, trainer, loss_keys, cumulative_iters, grad_clip):
+        # assert cumulative_iters == 1, 'DeepSpeed only support cumulative_iters=1'
+        # The `trainer.model` here is actually a deepspeed engine object.
+        # backward step
+        for k in loss_keys:
+            loss = trainer.train_outputs[k]
+            trainer.model.backward(loss)
+
+        # update parameters
+        trainer.model.step()
+
+    def initialize_optimizer(self, trainer):
+        pass
+
+    def step(self, trainer):
+        pass
+
+
+@HOOKS.register_module(module_name=Hooks.DeepspeedHook)
+class DeepspeedHook(Hook):
+    PRIORITY = Priority.VERY_HIGH
+
+    def __init__(self,
+                 deepspeed_activation_checkpointing=True,
+                 save_zero_checkpoint=False,
+                 with_mpu=True):
+        self.save_zero_checkpoint = save_zero_checkpoint
+        self.deepspeed_activation_checkpointing = deepspeed_activation_checkpointing
+        # TODO without mpu
+        self.with_mpu = with_mpu
+        assert with_mpu, 'DeepspeedHook now is only for mpu models.'
+
+    def register_processor(self, trainer):
+        processor = DeepspeedProcessor()
+        optimizer_hook = trainer.get_hook(OptimizerHook)
+        if len(optimizer_hook) > 0 and not isinstance(
+                optimizer_hook[0].processor, DeepspeedProcessor):
+            optimizer_hook[0].set_processor(processor)
+        ckpt_hook = trainer.get_hook(CheckpointHook)
+        if len(ckpt_hook) > 0 and not isinstance(ckpt_hook[0].processor,
+                                                 DeepspeedProcessor):
+            ckpt_hook[0].set_processor(processor)
+        best_ckpt_hook = trainer.get_hook(BestCkptSaverHook)
+        if len(best_ckpt_hook) > 0 and not isinstance(
+                best_ckpt_hook[0].processor, DeepspeedProcessor):
+            best_ckpt_hook[0].set_processor(processor)
+        load_ckpt_hook = trainer.get_hook(LoadCheckpointHook)
+        if len(load_ckpt_hook) > 0 and not isinstance(
+                load_ckpt_hook[0].processor, DeepspeedProcessor):
+            load_ckpt_hook[0].set_processor(processor)
+
     def before_val(self, trainer):
         pass
 
diff --git a/modelscope/trainers/hooks/megatron_hook.py b/modelscope/trainers/hooks/distributed/megatron_hook.py
similarity index 70%
rename from modelscope/trainers/hooks/megatron_hook.py
rename to modelscope/trainers/hooks/distributed/megatron_hook.py
index f01288de..c4aeaf19 100644
--- a/modelscope/trainers/hooks/megatron_hook.py
+++ b/modelscope/trainers/hooks/distributed/megatron_hook.py
@@ -1,19 +1,129 @@
 import os
-from copy import deepcopy
+import shutil
 
 import torch
 from megatron_util import mpu
 
 from modelscope.metainfo import Hooks
+from modelscope.trainers import EpochBasedTrainer
 from modelscope.trainers.hooks.builder import HOOKS
+from modelscope.trainers.hooks.checkpoint.checkpoint_hook import (
+    BestCkptSaverHook, CheckpointHook, CheckpointProcessor)
+from modelscope.trainers.hooks.checkpoint.load_checkpoint_hook import \
+    LoadCheckpointHook
 from modelscope.trainers.hooks.hook import Hook
-from modelscope.trainers.parallel.builder import build_parallel
 from modelscope.utils.checkpoint import load_checkpoint, save_checkpoint
 from modelscope.utils.constant import DistributedParallelType
 from modelscope.utils.device import create_device
+from modelscope.utils.logger import get_logger
 from modelscope.utils.megatron_utils import is_megatron_initialized
 from modelscope.utils.torch_utils import get_local_rank
-from .checkpoint_hook import CheckpointHook, LoadCheckpointHook
+
+
+class MpuProcessor(CheckpointProcessor):
+
+    _BIN_FILE_DIR = 'model'
+
+    def rank_name(self):
+        # TODO
+        try:
+            tp_world_size = mpu.get_tensor_model_parallel_world_size()
+            if tp_world_size == 1:
+                return ''
+            mp_rank = mpu.get_tensor_model_parallel_rank()
+            return '_mp_rank_{:02d}'.format(mp_rank)
+        except (ImportError, AssertionError):
+            return ''
+
+    def get_bin_file(self):
+        mp_rank = mpu.get_tensor_model_parallel_rank()
+        rank = '{:02d}'.format(mp_rank)
+        return f'mp_rank_{rank}_model_states.pt'
+
+    def should_save_on_rank(self, trainer):
+        # TODO
+        return (not torch.distributed.is_initialized()
+                ) or mpu.get_data_parallel_rank() == 0
+
+    def prepare_output(self, trainer, output_dir):
+        config = trainer.cfg
+        CheckpointProcessor.copy_files_and_dump_config(trainer, output_dir,
+                                                       config,
+                                                       self._BIN_FILE_DIR)
+        os.makedirs(
+            os.path.join(output_dir, self._BIN_FILE_DIR), exist_ok=True)
+
+    def save_checkpoints(self,
+                         trainer,
+                         checkpoint_path_prefix,
+                         output_dir,
+                         meta=None):
+        model = trainer.unwrap_module(trainer.model)
+        _train_state_file = checkpoint_path_prefix + self.rank_name(
+        ) + CheckpointProcessor.TRAINER_STATE_SUFFIX
+        # Save pth file without model state_dict
+        save_checkpoint(
+            model,
+            _train_state_file,
+            trainer.optimizer,
+            trainer.lr_scheduler,
+            meta=meta,
+            with_model=False)
+
+        save_dir = os.path.dirname(checkpoint_path_prefix)
+        prefix = os.path.basename(checkpoint_path_prefix)
+        bin_file = self.get_bin_file()
+        prefix_bin_file = os.path.join(save_dir, prefix + '_' + bin_file)
+        save_checkpoint(model, prefix_bin_file, with_meta=False)
+
+        src_file = prefix_bin_file
+        dest_file = os.path.join(output_dir, self._BIN_FILE_DIR, bin_file)
+        if os.path.isfile(dest_file):
+            os.unlink(dest_file)
+
+        try:
+            os.link(src_file, dest_file)
+        except OSError as e:
+            get_logger().error(
+                f'Link {src_file} to {dest_file} error: {e}, '
+                'changing to copy the bin file, this may case more space usage.'
+            )
+            shutil.copyfile(src_file, dest_file)
+
+    def remove_checkpoints(self, trainer, checkpoint_path_prefix):
+        _train_state_file = checkpoint_path_prefix + self.rank_name(
+        ) + CheckpointProcessor.TRAINER_STATE_SUFFIX
+        if os.path.isfile(_train_state_file):
+            os.remove(_train_state_file)
+
+        save_dir = os.path.dirname(checkpoint_path_prefix)
+        prefix = os.path.basename(checkpoint_path_prefix)
+        bin_file = self.get_bin_file()
+        absolute_file = os.path.join(save_dir, prefix + '_' + bin_file)
+        if os.path.isfile(absolute_file):
+            os.remove(absolute_file)
+
+    def load_checkpoints(self, checkpoint_path_prefix, trainer, load_all_state,
+                         strict):
+        model = trainer.unwrap_module(trainer.model)
+        if os.path.isdir(checkpoint_path_prefix):
+            save_dir = checkpoint_path_prefix
+            bin_file = self.get_bin_file()
+            model_file = os.path.join(save_dir, bin_file)
+            load_checkpoint(model_file, model, None, None)
+        else:
+            _train_state_file = checkpoint_path_prefix + self.rank_name(
+            ) + CheckpointProcessor.TRAINER_STATE_SUFFIX
+            meta = LoadCheckpointHook.load_trainer_state(
+                trainer, _train_state_file, load_all_state)
+
+            save_dir = os.path.dirname(checkpoint_path_prefix)
+            prefix = os.path.basename(checkpoint_path_prefix)
+            bin_file = self.get_bin_file()
+
+            model_file = os.path.join(save_dir, prefix + '_' + bin_file)
+            load_checkpoint(model_file, model, None, None)
+            return meta
 
 
 @HOOKS.register_module(module_name=Hooks.MegatronHook)
@@ -24,21 +134,20 @@ class MegatronHook(Hook):
     def __init__(self):
         self.wrapped = False
 
-    def register_strategy(self):
-        Hook.overload(
-            name='CheckpointHook.should_save_on_rank',
-            function=self.should_save_on_rank)
-        Hook.overload(
-            name='CheckpointHook.save_checkpoints',
-            function=self.save_checkpoints)
-        Hook.overload(
-            name='LoadCheckpointHook.load_checkpoints',
-            function=self.load_checkpoints)
-        Hook.overload(
-            name='CheckpointHook.remove_checkpoints',
-            function=self.remove_checkpoints)
-        Hook.overload(
-            name='CheckpointHook.prepare_output', function=self.prepare_output)
+    def register_processor(self, trainer: EpochBasedTrainer):
+        processor = MpuProcessor()
+        ckpt_hook = trainer.get_hook(CheckpointHook)
+        if len(ckpt_hook) > 0 and not isinstance(ckpt_hook[0].processor,
+                                                 MpuProcessor):
+            ckpt_hook[0].set_processor(processor)
+        best_ckpt_hook = trainer.get_hook(BestCkptSaverHook)
+        if len(best_ckpt_hook) > 0 and not isinstance(
+                best_ckpt_hook[0].processor, MpuProcessor):
+            best_ckpt_hook[0].set_processor(processor)
+        load_ckpt_hook = trainer.get_hook(LoadCheckpointHook)
+        if len(load_ckpt_hook) > 0 and not isinstance(
+                load_ckpt_hook[0].processor, MpuProcessor):
+            load_ckpt_hook[0].set_processor(processor)
 
     def after_init(self, trainer):
         assert is_megatron_initialized()
@@ -63,97 +172,3 @@ class MegatronHook(Hook):
             if not self.wrapped:
                 trainer.model = trainer.to_parallel(trainer.model)
                 self.wrapped = True
-
-    def should_save_on_rank(self, trainer):
-        # TODO
-        return (not torch.distributed.is_initialized()
-                ) or mpu.get_data_parallel_rank() == 0
-
-    def rank_name(self):
-        # TODO
-        try:
-            tp_world_size = mpu.get_tensor_model_parallel_world_size()
-            if tp_world_size == 1:
-                return ''
-            mp_rank = mpu.get_tensor_model_parallel_rank()
-            return '_mp_rank_{:02d}'.format(mp_rank)
-        except (ImportError, AssertionError):
-            return ''
-
-    def get_bin_file(self):
-        mp_rank = mpu.get_tensor_model_parallel_rank()
-        rank = '{:02d}'.format(mp_rank)
-        return f'mp_rank_{rank}_model_states.pt'
-
-    def save_checkpoints(self,
-                         trainer,
-                         checkpoint_path_prefix,
-                         output_sub_dir,
-                         meta=None):
-        model = trainer.unwrap_module(trainer.model)
-        _train_state_file = checkpoint_path_prefix + self.rank_name(
-        ) + CheckpointHook.TRAINER_STATE_SUFFIX
-        # Save pth file without model state_dict
-        save_checkpoint(
-            model,
-            _train_state_file,
-            trainer.optimizer,
-            trainer.lr_scheduler,
-            meta=meta,
-            with_model=False)
-
-        save_dir = os.path.dirname(checkpoint_path_prefix)
-        prefix = os.path.basename(checkpoint_path_prefix)
-        bin_file = self.get_bin_file()
-        prefix_bin_file = os.path.join(save_dir, prefix + '_' + bin_file)
-        save_checkpoint(model, prefix_bin_file, with_meta=False)
-
-        src_file = prefix_bin_file
-        dest_file = os.path.join(save_dir, output_sub_dir, self._BIN_FILE_DIR,
-                                 bin_file)
-        if os.path.isfile(dest_file):
-            os.unlink(dest_file)
-
-        os.link(src_file, dest_file)
-
-    def remove_checkpoints(self, trainer, checkpoint_path_prefix):
-        _train_state_file = checkpoint_path_prefix + self.rank_name(
-        ) + CheckpointHook.TRAINER_STATE_SUFFIX
-        if os.path.isfile(_train_state_file):
-            os.remove(_train_state_file)
-
-        save_dir = os.path.dirname(checkpoint_path_prefix)
-        prefix = os.path.basename(checkpoint_path_prefix)
-        bin_file = self.get_bin_file()
-        absolute_file = os.path.join(save_dir, prefix + '_' + bin_file)
-        if os.path.isfile(absolute_file):
-            os.remove(absolute_file)
-
-    def load_checkpoints(self, checkpoint_path_prefix, trainer, load_all_state,
-                         strict):
-        model = trainer.unwrap_module(trainer.model)
-        if os.path.isdir(checkpoint_path_prefix):
-            save_dir = checkpoint_path_prefix
-            bin_file = self.get_bin_file()
-            model_file = os.path.join(save_dir, bin_file)
-            load_checkpoint(model_file, model, None, None)
-        else:
-            _train_state_file = checkpoint_path_prefix + self.rank_name(
-            ) + CheckpointHook.TRAINER_STATE_SUFFIX
-            meta = LoadCheckpointHook.load_trainer_state(
-                trainer, _train_state_file, load_all_state)
-
-            save_dir = os.path.dirname(checkpoint_path_prefix)
-            prefix = os.path.basename(checkpoint_path_prefix)
-            bin_file = self.get_bin_file()
-
-            model_file = os.path.join(save_dir, prefix + '_' + bin_file)
-            load_checkpoint(model_file, model, None, None)
-            return meta
-
-    def prepare_output(self, trainer, output_dir):
-        config = trainer.cfg
-        CheckpointHook.copy_files_and_dump_config(trainer, output_dir, config,
-                                                  self._BIN_FILE_DIR)
-        os.makedirs(
-            os.path.join(output_dir, self._BIN_FILE_DIR), exist_ok=True)
diff --git a/modelscope/trainers/hooks/early_stop_hook.py b/modelscope/trainers/hooks/early_stop_hook.py
index b15e8e5a..7aba69a4 100644
--- a/modelscope/trainers/hooks/early_stop_hook.py
+++ b/modelscope/trainers/hooks/early_stop_hook.py
@@ -9,6 +9,12 @@ from .hook import Hook
 from .priority import Priority
 
 
+class EarlyStopStrategy:
+    by_epoch = 'by_epoch'
+    by_step = 'by_step'
+    no = 'no'
+
+
 @HOOKS.register_module(module_name=Hooks.EarlyStopHook)
 class EarlyStopHook(Hook):
     """Early stop when a specific metric stops improving.
@@ -16,14 +22,13 @@ class EarlyStopHook(Hook):
     Args:
         metric_key (str):  Metric key to be monitored.
         rule (str): Comparison rule for best score. Support "max" and "min".
-            If rule is "max", the training will stop when `metric_key` has stopped increaing.
+            If rule is "max", the training will stop when `metric_key` has stopped increasing.
             If rule is "min", the training will stop when `metric_key` has stopped decreasing.
         patience (int): Trainer will stop if the monitored metric did not improve for the last `patience` times.
-        min_delta (float): Minimum change in the monitored metric to quailfy as an improvement.
+        min_delta (float): Minimum change in the monitored metric to qualify as an improvement.
         check_finite (bool): If true, stops training when the metric becomes NaN or infinite.
-        by_epoch (int): Saving checkpoints by epoch or by iteration.
-        interval (int): The frequency to trigger early stop check. If `by_epoch=True`,
-            it means the number of epochs, else means the number of iterations.
+        early_stop_strategy (str): The strategy to early stop, can be by_epoch/by_step/none
+        interval (int): The frequency to trigger early stop check, by epoch or step.
     """
 
     PRIORITY = Priority.VERY_LOW
@@ -35,14 +40,19 @@ class EarlyStopHook(Hook):
                  patience: int = 3,
                  min_delta: float = 0.0,
                  check_finite: bool = True,
-                 by_epoch: bool = True,
-                 interval: int = 1):
+                 early_stop_strategy: str = EarlyStopStrategy.by_epoch,
+                 interval: int = 1,
+                 **kwargs):
         self.metric_key = metric_key
         self.rule = rule
         self.patience = patience
         self.min_delta = min_delta
         self.check_finite = check_finite
-        self.by_epoch = by_epoch
+        if 'by_epoch' in kwargs:
+            self.early_stop_strategy = EarlyStopStrategy.by_epoch if kwargs[
+                'by_epoch'] else EarlyStopStrategy.by_step
+        else:
+            self.early_stop_strategy = early_stop_strategy
         self.interval = interval
 
         self.wait_count = 0
@@ -89,7 +99,7 @@ class EarlyStopHook(Hook):
         trainer._stop_training = True
 
     def after_train_epoch(self, trainer):
-        if not self.by_epoch:
+        if self.early_stop_strategy != EarlyStopStrategy.by_epoch:
             return
 
         if not self.every_n_epochs(trainer, self.interval):
@@ -99,7 +109,7 @@ class EarlyStopHook(Hook):
             self._stop_training(trainer)
 
     def after_train_iter(self, trainer):
-        if self.by_epoch:
+        if self.early_stop_strategy != EarlyStopStrategy.by_step:
             return
 
         if not self.every_n_iters(trainer, self.interval):
diff --git a/modelscope/trainers/hooks/evaluation_hook.py b/modelscope/trainers/hooks/evaluation_hook.py
index 80c8c31a..c29a6d6a 100644
--- a/modelscope/trainers/hooks/evaluation_hook.py
+++ b/modelscope/trainers/hooks/evaluation_hook.py
@@ -1,11 +1,18 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from collections import OrderedDict
+from typing import Optional
 
 from modelscope.metainfo import Hooks
 from .builder import HOOKS
 from .hook import Hook
 
 
+class EvaluationStrategy:
+    by_epoch = 'by_epoch'
+    by_step = 'by_step'
+    no = 'no'
+
+
 @HOOKS.register_module(module_name=Hooks.EvaluationHook)
 class EvaluationHook(Hook):
     """
@@ -18,21 +25,34 @@ class EvaluationHook(Hook):
             Default: None, validate every interval epochs/iterations from scratch.
     """
 
-    def __init__(self, interval=1, by_epoch=True, start_idx=None):
+    def __init__(self,
+                 interval: Optional[int] = 1,
+                 eval_strategy: Optional[str] = EvaluationStrategy.by_epoch,
+                 start_idx: Optional[int] = None,
+                 **kwargs):
         assert interval > 0, 'interval must be a positive number'
         self.interval = interval
         self.start_idx = start_idx
-        self.by_epoch = by_epoch
+        self.last_eval_tag = (None, None)
+        if 'by_epoch' in kwargs:
+            self.eval_strategy = EvaluationStrategy.by_epoch if kwargs[
+                'by_epoch'] else EvaluationStrategy.by_step
+        else:
+            self.eval_strategy = eval_strategy
 
     def after_train_iter(self, trainer):
         """Called after every training iter to evaluate the results."""
-        if not self.by_epoch and self._should_evaluate(trainer):
+        if self.eval_strategy == EvaluationStrategy.by_step and self._should_evaluate(
+                trainer):
             self.do_evaluate(trainer)
+            self.last_eval_tag = ('iter', trainer.iter)
 
     def after_train_epoch(self, trainer):
         """Called after every training epoch to evaluate the results."""
-        if self.by_epoch and self._should_evaluate(trainer):
+        if self.eval_strategy == EvaluationStrategy.by_epoch and self._should_evaluate(
+                trainer):
             self.do_evaluate(trainer)
+            self.last_eval_tag = ('epoch', trainer.epoch)
 
     def add_visualization_info(self, trainer, results):
         if trainer.visualization_buffer.output.get('eval_results',
@@ -64,7 +84,7 @@ class EvaluationHook(Hook):
         Returns:
             bool: The flag indicating whether to perform evaluation.
         """
-        if self.by_epoch:
+        if self.eval_strategy == EvaluationStrategy.by_epoch:
             current = trainer.epoch
             check_time = self.every_n_epochs
         else:
diff --git a/modelscope/trainers/hooks/hook.py b/modelscope/trainers/hooks/hook.py
index 70e06fbd..93ea8541 100644
--- a/modelscope/trainers/hooks/hook.py
+++ b/modelscope/trainers/hooks/hook.py
@@ -22,9 +22,6 @@ class Hook:
 
     PRIORITY = Priority.NORMAL
 
-    # The strategic function dict.
-    _strategies = dict()
-
     def after_init(self, trainer):
         """
         Will be called at the end of the trainer's `__init__` method
@@ -201,42 +198,48 @@ class Hook:
         """
         self.after_iter(trainer)
 
-    def every_n_epochs(self, trainer, n):
+    @staticmethod
+    def every_n_epochs(trainer, n):
         """
         Whether to reach every ``n`` epochs
         Returns: bool
         """
         return (trainer.epoch + 1) % n == 0 if n > 0 else False
 
-    def every_n_inner_iters(self, runner, n):
+    @staticmethod
+    def every_n_inner_iters(runner, n):
         """
         Whether to reach every ``n`` iterations at every epoch
         Returns: bool
         """
         return (runner.inner_iter + 1) % n == 0 if n > 0 else False
 
-    def every_n_iters(self, trainer, n):
+    @staticmethod
+    def every_n_iters(trainer, n):
         """
         Whether to reach every ``n`` iterations
         Returns: bool
         """
         return (trainer.iter + 1) % n == 0 if n > 0 else False
 
-    def end_of_epoch(self, trainer):
+    @staticmethod
+    def end_of_epoch(trainer):
         """
         Whether to reach the end of every epoch
         Returns: bool
         """
         return trainer.inner_iter + 1 == trainer.iters_per_epoch
 
-    def is_last_epoch(self, trainer):
+    @staticmethod
+    def is_last_epoch(trainer):
         """
         Whether to reach the last epoch
         Returns: bool
         """
         return trainer.epoch + 1 == trainer.max_epochs
 
-    def is_last_iter(self, trainer):
+    @staticmethod
+    def is_last_iter(trainer):
         """
         Whether to reach the last iteration in the entire training process
         Returns: bool
@@ -256,54 +259,3 @@ class Hook:
 
     def load_state_dict(self, state_dict):
         pass
-
-    @staticmethod
-    def clear_strategies():
-        Hook._strategies.clear()
-
-    @staticmethod
-    def overload(function, name=None):
-        """Register a function to a strategic function.
-
-        Args:
-            function(`method` or `Callable`): The function instance.
-            name(`str`): The name of the strategic function, which specifies by the method `consume`
-        """
-
-        _name = name or function.__name__
-        if _name not in Hook._strategies:
-            Hook._strategies[_name] = []
-
-        Hook._strategies[_name].append(function)
-
-    @staticmethod
-    def overload_func(name=None):
-        """Declare a function as a strategic function, which can be replaced by some other functions.
-
-        This function should be used in annotations.
-
-        Args:
-            name(str): The strategic function name.
-        """
-
-        def _register(function):
-
-            @wraps(function)
-            def _call(*args, **kwargs):
-                _name = name or function.__name__
-                producers = Hook._strategies.get(_name, [])
-
-                if len(producers) == 0:
-                    return function(*args, **kwargs)
-                else:
-                    if len(producers) > 1:
-                        raise ValueError(
-                            f'Multiple functions registered to {_name}, '
-                            f'here is the list: {producers}')
-                    if isinstance(args[0], Hook):
-                        args = args[1:]
-                    return producers[0](*args, **kwargs)
-
-            return _call
-
-        return _register
diff --git a/modelscope/trainers/hooks/lr_scheduler_hook.py b/modelscope/trainers/hooks/lr_scheduler_hook.py
index 28ce250c..51a8e858 100644
--- a/modelscope/trainers/hooks/lr_scheduler_hook.py
+++ b/modelscope/trainers/hooks/lr_scheduler_hook.py
@@ -1,4 +1,5 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
+
 from modelscope.metainfo import Hooks
 from modelscope.trainers.lrscheduler.builder import build_lr_scheduler
 from modelscope.utils.constant import LogKeys
@@ -9,6 +10,42 @@ from .hook import Hook
 from .priority import Priority
 
 
+class LrSchedulerProcessor:
+
+    def __init__(self):
+        self.lr_strategy = None
+        self.warmup_lr_scheduler = None
+
+    def set_lr_strategy(self, lr_strategy):
+        self.lr_strategy = lr_strategy
+
+    def set_warmup_lr_scheduler(self, warmup_lr_scheduler):
+        self.warmup_lr_scheduler = warmup_lr_scheduler
+
+    def initialize_lr_scheduler(self, trainer):
+        """Initialize the lr scheduler.
+
+        This is a strategic function which can be registered by other hook's function.
+        """
+        pass
+
+    def step(self, trainer):
+        """Do lr scheduler's step.
+
+        This is a strategic function which can be registered by other hook's function.
+        """
+        if self.warmup_lr_scheduler is not None:
+            self.warmup_lr_scheduler.step()
+        else:
+            trainer.lr_scheduler.step()
+
+
+class LrStrategy:
+    by_epoch = 'by_epoch'
+    by_step = 'by_step'
+    no = 'no'
+
+
 @HOOKS.register_module(module_name=Hooks.LrSchedulerHook)
 class LrSchedulerHook(Hook):
     """Lr scheduler.
@@ -19,38 +56,33 @@ class LrSchedulerHook(Hook):
     """
     PRIORITY = Priority.LOW
 
-    def __init__(self, by_epoch=True, warmup=None, **kwargs) -> None:
+    def __init__(self,
+                 lr_strategy=LrStrategy.by_epoch,
+                 warmup=None,
+                 **kwargs) -> None:
         super().__init__()
-        self.by_epoch = by_epoch
+        if 'by_epoch' in kwargs:
+            self.lr_strategy = LrStrategy.by_epoch if kwargs[
+                'by_epoch'] else LrStrategy.by_step
+        else:
+            self.lr_strategy = lr_strategy
         self.warmup = warmup
         self.warmup_lr_scheduler = None
+        self.processor = LrSchedulerProcessor()
+
+    def set_processor(self, processor):
+        self.processor = processor
 
     def before_run(self, trainer):
-        self.initialize_lr_scheduler(trainer)
+        self.processor.set_lr_strategy(self.lr_strategy)
         if self.warmup is not None:
             assert isinstance(self.warmup, dict) and 'type' in self.warmup
             self.warmup_lr_scheduler = build_lr_scheduler(
                 cfg=self.warmup,
                 default_args={'base_scheduler': trainer.lr_scheduler})
+            self.processor.set_warmup_lr_scheduler(self.warmup_lr_scheduler)
 
-    @Hook.overload_func(name='LrSchedulerHook.initialize_lr_scheduler')
-    def initialize_lr_scheduler(self, trainer):
-        """Initialize the lr scheduler.
-
-        This is a strategic function which can be registered by other hook's function.
-        """
-        pass
-
-    @Hook.overload_func(name='LrSchedulerHook.step')
-    def step(self, trainer):
-        """Do lr scheduler's step.
-
-        This is a strategic function which can be registered by other hook's function.
-        """
-        if self.warmup_lr_scheduler is not None:
-            self.warmup_lr_scheduler.step()
-        else:
-            trainer.lr_scheduler.step()
+        self.processor.initialize_lr_scheduler(trainer)
 
     def get_current_lr(self, trainer):
         import torch
@@ -67,17 +99,17 @@ class LrSchedulerHook(Hook):
         return lr
 
     def after_train_iter(self, trainer):
-        if not self.by_epoch and trainer.iter >= getattr(
+        if self.lr_strategy == LrStrategy.by_step and trainer.iter >= getattr(
                 trainer, 'cumulative_iters', 1) - 1:
-            self.step(trainer)
+            self.processor.step(trainer)
         trainer.log_buffer.output[LogKeys.LR] = self._get_log_lr(trainer)
 
     def before_train_epoch(self, trainer):
         trainer.log_buffer.output[LogKeys.LR] = self._get_log_lr(trainer)
 
     def after_train_epoch(self, trainer):
-        if self.by_epoch:
-            self.step(trainer)
+        if self.lr_strategy == LrStrategy.by_epoch:
+            self.processor.step(trainer)
 
     def _get_log_lr(self, trainer):
         cur_lr = self.get_current_lr(trainer)
@@ -94,6 +126,29 @@ class LrSchedulerHook(Hook):
         return lr
 
 
+class PlateauLrSchedulerProcessor(LrSchedulerProcessor):
+
+    def __init__(self, metric_key):
+        super().__init__()
+        self.metric_key = metric_key
+
+    def step(self, trainer):
+        # adapt to evaluation interval is greater than 1
+        if trainer.metric_values is None:
+            if is_master():
+                print(
+                    f'Current epoch {trainer.epoch} has no evaluation metric values, skip lr_scheduler.step() !'
+                )
+            return
+
+        metrics = trainer.metric_values[self.metric_key]
+        if self.lr_strategy == LrStrategy.by_epoch:
+            if self.warmup_lr_scheduler is not None:
+                self.warmup_lr_scheduler.step(metrics=metrics)
+            else:
+                trainer.lr_scheduler.step(metrics=metrics)
+
+
 @HOOKS.register_module(module_name=Hooks.PlateauLrSchedulerHook)
 class PlateauLrSchedulerHook(Hook):
     """Lr scheduler hook for `ReduceLROnPlateau`.
@@ -105,10 +160,16 @@ class PlateauLrSchedulerHook(Hook):
     PRIORITY = Priority.LOW  # should be after EvaluationHook
 
     def __init__(self, metric_key, **kwargs):
+        super().__init__()
         self.metric_key = metric_key
 
-    def register_strategy(self):
-        Hook.overload(name='LrSchedulerHook.step', function=self.step)
+    def register_processor(self, trainer):
+        lr_scheduler_hook = trainer.get_hook(LrSchedulerHook)
+        if len(lr_scheduler_hook) > 0 and type(
+                lr_scheduler_hook[0].processor) in (type(None),
+                                                    LrSchedulerProcessor):
+            lr_scheduler_hook[0].set_processor(
+                PlateauLrSchedulerProcessor(self.metric_key))
 
     def before_run(self, trainer):
         if not hasattr(trainer, 'logger'):
@@ -116,23 +177,6 @@ class PlateauLrSchedulerHook(Hook):
         else:
             self.logger = trainer.logger
 
-    def step(self, trainer):
-        # adapt to evaluation intervel is greater than 1
-        if trainer.metric_values is None:
-            if is_master():
-                self.logger.warning(
-                    f'Current epoch {trainer.epoch} has no evaluation metric values, skip lr_scheduler.step() !'
-                )
-            return
-
-        metrics = trainer.metric_values[self.metric_key]
-        lr_scheduler_hook = trainer.get_hook(LrSchedulerHook)[0]
-        if lr_scheduler_hook.by_epoch:
-            if lr_scheduler_hook.warmup_lr_scheduler is not None:
-                lr_scheduler_hook.warmup_lr_scheduler.step(metrics=metrics)
-            else:
-                trainer.lr_scheduler.step(metrics=metrics)
-
 
 @HOOKS.register_module(module_name=Hooks.NoneLrSchedulerHook)
 class NoneLrSchedulerHook(LrSchedulerHook):
diff --git a/modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py b/modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py
index bd1034f3..3c874ccf 100644
--- a/modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py
+++ b/modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py
@@ -7,40 +7,14 @@ from packaging import version
 from modelscope.metainfo import Hooks
 from modelscope.trainers.hooks import Hook
 from modelscope.trainers.hooks.builder import HOOKS
-from .base import OptimizerHook
+from .base import OptimizerHook, OptimizerProcessor
 
 
-@HOOKS.register_module(module_name=Hooks.ApexAMPOptimizerHook)
-class ApexAMPOptimizerHook(Hook):
-    """
-    Fp16 optimizer, if torch version is less than 1.6.0,
-    you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default
+class ApexOptimizerProcessor(OptimizerProcessor):
 
-    Args:
-        opt_level (str): "O0" and "O3" are not true mixed precision,
-            but they are useful for establishing accuracy and speed baselines, respectively.
-            "O1" and "O2" are different implementations of mixed precision.
-            Try both, and see what gives the best speedup and accuracy for your model.
-    """
-
-    PRIORITY = OptimizerHook.PRIORITY
-
-    def __init__(self, opt_level='O1', **kwargs):
+    def __init__(self, opt_level):
         self.opt_level = opt_level
 
-        try:
-            from apex import amp
-        except ImportError:
-            raise ValueError(
-                'apex not installed, please install apex from https://www.github.com/nvidia/apex.'
-            )
-
-    def register_strategy(self):
-        Hook.overload(
-            name='OptimizerHook.initialize_optimizer',
-            function=self.initialize_optimizer)
-        Hook.overload(name='OptimizerHook.backward', function=self.backward)
-
     def initialize_optimizer(self, trainer):
         from apex import amp
 
@@ -68,10 +42,44 @@ class ApexAMPOptimizerHook(Hook):
                                 trainer.optimizer) as scaled_loss:
                 scaled_loss.backward()
 
-        if self.every_n_iters(trainer, cumulative_iters):
+        if Hook.every_n_iters(trainer, cumulative_iters):
             if grad_clip is not None:
-                OptimizerHook.clip_grads(trainer.model.parameters(),
-                                         **grad_clip)
+                OptimizerProcessor.clip_grads(trainer.model.parameters(),
+                                              **grad_clip)
 
             trainer.optimizer.step()
             trainer.optimizer.zero_grad()
+
+
+@HOOKS.register_module(module_name=Hooks.ApexAMPOptimizerHook)
+class ApexAMPOptimizerHook(Hook):
+    """
+    Fp16 optimizer, if torch version is less than 1.6.0,
+    you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default
+
+    Args:
+        opt_level (str): "O0" and "O3" are not true mixed precision,
+            but they are useful for establishing accuracy and speed baselines, respectively.
+            "O1" and "O2" are different implementations of mixed precision.
+            Try both, and see what gives the best speedup and accuracy for your model.
+    """
+
+    PRIORITY = OptimizerHook.PRIORITY
+
+    def __init__(self, opt_level='O1', **kwargs):
+        self.opt_level = opt_level
+
+        try:
+            from apex import amp
+        except ImportError:
+            raise ValueError(
+                'apex not installed, please install apex from https://www.github.com/nvidia/apex.'
+            )
+
+    def register_processor(self, trainer):
+        optimizer_hook = trainer.get_hook(OptimizerHook)
+        if len(optimizer_hook) > 0 and type(
+                optimizer_hook[0].processor) in (type(None),
+                                                 OptimizerProcessor):
+            optimizer_hook[0].set_processor(
+                ApexOptimizerProcessor(self.opt_level))
diff --git a/modelscope/trainers/hooks/optimizer/base.py b/modelscope/trainers/hooks/optimizer/base.py
index f0d62612..ca20720d 100644
--- a/modelscope/trainers/hooks/optimizer/base.py
+++ b/modelscope/trainers/hooks/optimizer/base.py
@@ -10,6 +10,48 @@ from modelscope.trainers.hooks.hook import Hook
 from modelscope.trainers.hooks.priority import Priority
 
 
+class OptimizerProcessor:
+
+    def initialize_optimizer(self, trainer):
+        """Initialize the optimizer.
+
+        This is a strategic function which can be registered by other hook's function.
+        """
+        trainer.optimizer.zero_grad()
+
+    def before_forward(self, trainer):
+        pass
+
+    def backward(self, trainer, loss_keys, cumulative_iters, grad_clip):
+        """Do module backward, optimizer's step and zero_grad and clip the grads.
+
+        This is a strategic function which can be registered by other hook's function.
+
+        Args:
+            trainer(`EpochBasedTrainer`): The trainer instance.
+            loss_keys(`list`): The list of loss keys.
+            cumulative_iters(`int`): The cumulative iters for gradients.
+            grad_clip(`dict`): The grad clipping options.
+        """
+        for k in loss_keys:
+            trainer.train_outputs[k] /= cumulative_iters
+            trainer.train_outputs[k].backward()
+
+        if Hook.every_n_iters(trainer, cumulative_iters):
+            if grad_clip is not None:
+                self.clip_grads(trainer.model.parameters(), **grad_clip)
+
+            trainer.optimizer.step()
+            trainer.optimizer.zero_grad()
+
+    @staticmethod
+    def clip_grads(params, **clip_args):
+        params = list(
+            filter(lambda p: p.requires_grad and p.grad is not None, params))
+        if len(params) > 0:
+            return clip_grad.clip_grad_norm_(params, **clip_args)
+
+
 @HOOKS.register_module(module_name=Hooks.OptimizerHook)
 class OptimizerHook(Hook):
     """Optimizer hook
@@ -36,52 +78,21 @@ class OptimizerHook(Hook):
         self.loss_keys = loss_keys
         self.cumulative_iters = cumulative_iters
         self.grad_clip = grad_clip
+        self.processor = OptimizerProcessor()
 
-    @staticmethod
-    def clip_grads(params, **clip_args):
-        params = list(
-            filter(lambda p: p.requires_grad and p.grad is not None, params))
-        if len(params) > 0:
-            return clip_grad.clip_grad_norm_(params, **clip_args)
-
-    @Hook.overload_func(name='OptimizerHook.initialize_optimizer')
-    def initialize_optimizer(self, trainer):
-        """Initialize the optimizer.
-
-        This is a strategic function which can be registered by other hook's function.
-        """
-        trainer.optimizer.zero_grad()
+    def set_processor(self, processor):
+        self.processor = processor
 
     def before_run(self, trainer):
-        self.initialize_optimizer(trainer)
         trainer.cumulative_iters = self.cumulative_iters
+        self.processor.initialize_optimizer(trainer)
 
-    @Hook.overload_func(name='OptimizerHook.backward')
-    def backward(self, trainer, loss_keys, cumulative_iters, grad_clip):
-        """Do module backward, optimizer's step and zero_grad and clip the grads.
-
-        This is a strategic function which can be registered by other hook's function.
-
-        Args:
-            trainer(`EpochBasedTrainer`): The trainer instance.
-            loss_keys(`list`): The list of loss keys.
-            cumulative_iters(`int`): The cumulative iters for gradients.
-            grad_clip(`dict`): The grad clipping options.
-        """
-        for k in loss_keys:
-            trainer.train_outputs[k] /= cumulative_iters
-            trainer.train_outputs[k].backward()
-
-        if self.every_n_iters(trainer, cumulative_iters):
-            if grad_clip is not None:
-                self.clip_grads(trainer.model.parameters(), **grad_clip)
-
-            trainer.optimizer.step()
-            trainer.optimizer.zero_grad()
+    def before_train_iter(self, trainer):
+        self.processor.before_forward(trainer)
 
     def after_train_iter(self, trainer):
-        self.backward(trainer, self.loss_keys, self.cumulative_iters,
-                      self.grad_clip)
+        self.processor.backward(trainer, self.loss_keys, self.cumulative_iters,
+                                self.grad_clip)
 
 
 @HOOKS.register_module(module_name=Hooks.NoneOptimizerHook)
diff --git a/modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py b/modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py
index 1ab89720..fc7d2672 100644
--- a/modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py
+++ b/modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py
@@ -4,7 +4,45 @@ import logging
 from modelscope.metainfo import Hooks
 from modelscope.trainers.hooks import Hook
 from modelscope.trainers.hooks.builder import HOOKS
-from .base import OptimizerHook
+from .base import OptimizerHook, OptimizerProcessor
+
+
+class TorchAMPOptimizerProcessor(OptimizerProcessor):
+
+    def __init__(self, scaler, scale_update_param):
+        self.scaler = scaler
+        self.scale_update_param = scale_update_param
+
+    def before_forward(self, trainer):
+        from torch.cuda import amp
+        setattr(self._model, 'forward', amp.autocast()(self._model.forward))
+
+    def initialize_optimizer(self, trainer):
+        logging.info('open fp16')
+        trainer.optimizer.zero_grad()
+
+        model = trainer.unwrap_module(trainer.model)
+        self._ori_model_forward = model.forward
+        self._model = model
+
+    def backward(self, trainer, loss_keys, cumulative_iters, grad_clip):
+        for k in loss_keys:
+            trainer.train_outputs[k] /= cumulative_iters
+
+        for k in loss_keys:
+            self.scaler.scale(trainer.train_outputs[k]).backward()
+
+        if Hook.every_n_iters(trainer, cumulative_iters):
+            self.scaler.unscale_(trainer.optimizer)
+            if grad_clip is not None:
+                OptimizerProcessor.clip_grads(trainer.model.parameters(),
+                                              **grad_clip)
+
+            self.scaler.step(trainer.optimizer)
+            self.scaler.update(self.scale_update_param)
+            trainer.optimizer.zero_grad()
+
+        setattr(self._model, 'forward', self._ori_model_forward)
 
 
 @HOOKS.register_module(module_name=Hooks.TorchAMPOptimizerHook)
@@ -44,39 +82,11 @@ class TorchAMPOptimizerHook(Hook):
                 '`loss_scale` type must be in [float, dict], but got {loss_scale}'
             )
 
-    def register_strategy(self):
-        Hook.overload(
-            name='OptimizerHook.initialize_optimizer',
-            function=self.initialize_optimizer)
-        Hook.overload(name='OptimizerHook.backward', function=self.backward)
-
-    def initialize_optimizer(self, trainer):
-        logging.info('open fp16')
-        trainer.optimizer.zero_grad()
-
-        model = trainer.unwrap_module(trainer.model)
-        self._ori_model_forward = model.forward
-        self._model = model
-
-    def before_train_iter(self, trainer):
-        from torch.cuda import amp
-        setattr(self._model, 'forward', amp.autocast()(self._model.forward))
-
-    def backward(self, trainer, loss_keys, cumulative_iters, grad_clip):
-        for k in loss_keys:
-            trainer.train_outputs[k] /= cumulative_iters
-
-        for k in loss_keys:
-            self.scaler.scale(trainer.train_outputs[k]).backward()
-
-        if self.every_n_iters(trainer, cumulative_iters):
-            self.scaler.unscale_(trainer.optimizer)
-            if grad_clip is not None:
-                OptimizerHook.clip_grads(trainer.model.parameters(),
-                                         **grad_clip)
-
-            self.scaler.step(trainer.optimizer)
-            self.scaler.update(self._scale_update_param)
-            trainer.optimizer.zero_grad()
-
-        setattr(self._model, 'forward', self._ori_model_forward)
+    def register_processor(self, trainer):
+        optimizer_hook = trainer.get_hook(OptimizerHook)
+        if len(optimizer_hook) > 0 and type(
+                optimizer_hook[0].processor) in (type(None),
+                                                 OptimizerProcessor):
+            optimizer_hook[0].set_processor(
+                TorchAMPOptimizerProcessor(self.scaler,
+                                           self._scale_update_param))
diff --git a/modelscope/trainers/multi_modal/clip/clip_trainer.py b/modelscope/trainers/multi_modal/clip/clip_trainer.py
index b0415bc2..ae00232f 100644
--- a/modelscope/trainers/multi_modal/clip/clip_trainer.py
+++ b/modelscope/trainers/multi_modal/clip/clip_trainer.py
@@ -176,11 +176,10 @@ class CLIPTrainer(EpochBasedTrainer):
         self.dataset_cfg = cfg.dataset
         if hasattr(self.dataset_cfg, 'column_map'):
             # cases where dataset key names are not "img" and "text"
-            img_key_name = getattr(self.dataset_cfg.column_map, 'img', 'img')
+            img_key_name = self.dataset_cfg['column_map'].get('img', 'img')
             preprocessor[ConfigKeys.train].set_input_img_key(img_key_name)
             preprocessor[ConfigKeys.val].set_input_img_key(img_key_name)
-            text_key_name = getattr(self.dataset_cfg.column_map, 'text',
-                                    'text')
+            text_key_name = self.dataset_cfg['column_map'].get('text', 'text')
             preprocessor[ConfigKeys.train].set_input_text_key(text_key_name)
             preprocessor[ConfigKeys.val].set_input_text_key(text_key_name)
         self.global_batch_size = cfg.train.dataloader.batch_size_per_gpu * world_size
diff --git a/modelscope/trainers/nlp/__init__.py b/modelscope/trainers/nlp/__init__.py
index 755e5387..ae102efa 100644
--- a/modelscope/trainers/nlp/__init__.py
+++ b/modelscope/trainers/nlp/__init__.py
@@ -10,6 +10,7 @@ if TYPE_CHECKING:
     from .text_generation_trainer import TextGenerationTrainer
     from .sentence_embedding_trainer import SentenceEmbeddingTrainer
     from .siamese_uie_trainer import SiameseUIETrainer
+    from .translation_evaluation_trainer import TranslationEvaluationTrainer
 else:
     _import_structure = {
         'sequence_classification_trainer': ['SequenceClassificationTrainer'],
@@ -17,7 +18,8 @@ else:
         'text_ranking_trainer': ['TextRankingTrainer'],
         'text_generation_trainer': ['TextGenerationTrainer'],
         'sentence_emebedding_trainer': ['SentenceEmbeddingTrainer'],
-        'siamese_uie_trainer': ['SiameseUIETrainer']
+        'siamese_uie_trainer': ['SiameseUIETrainer'],
+        'translation_evaluation_trainer': ['TranslationEvaluationTrainer']
     }
 
     import sys
diff --git a/modelscope/trainers/nlp/translation_evaluation_trainer.py b/modelscope/trainers/nlp/translation_evaluation_trainer.py
new file mode 100644
index 00000000..05e9db89
--- /dev/null
+++ b/modelscope/trainers/nlp/translation_evaluation_trainer.py
@@ -0,0 +1,396 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+"""PyTorch trainer for UniTE model."""
+
+import os.path as osp
+import random
+from math import ceil
+from os import mkdir
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+import torch
+from pandas import DataFrame
+from torch.nn.functional import pad
+from torch.nn.utils import clip_grad_norm_
+from torch.optim import AdamW, Optimizer
+from torch.utils.data import (BatchSampler, DataLoader, Dataset, Sampler,
+                              SequentialSampler, SubsetRandomSampler)
+from torch.utils.tensorboard import SummaryWriter
+from tqdm import tqdm
+from transformers import AutoTokenizer
+
+from modelscope.metainfo import Metrics, Trainers
+from modelscope.metrics import Metric
+from modelscope.metrics.builder import MetricKeys, build_metric
+from modelscope.models.base import TorchModel
+from modelscope.models.nlp.unite.configuration import InputFormat
+from modelscope.models.nlp.unite.translation_evaluation import (
+    UniTEForTranslationEvaluation, combine_input_sentences)
+from modelscope.msdatasets import MsDataset
+from modelscope.preprocessors import Preprocessor
+from modelscope.trainers.builder import TRAINERS
+from modelscope.trainers.hooks import Hook
+from modelscope.trainers.trainer import EpochBasedTrainer
+from modelscope.utils.config import ConfigDict
+from modelscope.utils.constant import (ConfigKeys, Fields, ModeKeys, ModelFile,
+                                       TrainerStages)
+from modelscope.utils.device import create_device
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+class TranslationEvaluationTrainingSampler(Sampler):
+
+    def __init__(self, num_of_samples: int,
+                 batch_size_for_each_input_format: int):
+        r"""Build a sampler for model training with translation evaluation trainer.
+        The trainer should derive samples for each subset of the entire dataset.
+
+        Args:
+            num_of_samples: The number of samples in total.
+            batch_size_for_each_input_format: During training, the batch size for each input format
+
+        Returns:
+            A data sampler for translation evaluation model training.
+
+        """
+
+        self.num_of_samples = num_of_samples
+        self.batch_size_for_each_input_format = batch_size_for_each_input_format
+
+        self.num_of_samples_for_each_input_format = self.num_of_samples // 3
+        num_of_samples_to_use = self.num_of_samples_for_each_input_format * 3
+
+        logger.info(
+            '%d samples are given for training. '
+            'Using %d samples for each input format. '
+            'Leaving the last %d samples unused.' %
+            (self.num_of_samples, self.num_of_samples_for_each_input_format,
+             self.num_of_samples - num_of_samples_to_use))
+        self.num_of_samples = num_of_samples_to_use
+
+        random_permutations = torch.randperm(
+            self.num_of_samples).cpu().tolist()
+
+        self.subset_iterators = dict()
+        self.subset_samplers = dict()
+        self.indices_for_each_input_format = dict()
+        for input_format_index, input_format in \
+                enumerate((InputFormat.SRC_REF, InputFormat.SRC, InputFormat.REF)):
+            start_idx = input_format_index * self.num_of_samples_for_each_input_format
+            end_idx = start_idx + self.num_of_samples_for_each_input_format
+            self.indices_for_each_input_format[
+                input_format] = random_permutations[start_idx:end_idx]
+            self.subset_samplers[input_format] = \
+                BatchSampler(SubsetRandomSampler(self.indices_for_each_input_format[input_format]),
+                             batch_size=self.batch_size_for_each_input_format,
+                             drop_last=True)
+            self.subset_iterators[input_format] = iter(
+                self.subset_samplers[input_format])
+
+        self.num_of_sampled_batches = 0
+
+        if self.__len__() == 0:
+            raise ValueError(
+                'The dataset doesn\'t contain enough examples to form a single batch.',
+                'Please reduce the batch_size or use more examples for training.'
+            )
+
+        return
+
+    def __iter__(self):
+        while True:
+            try:
+                if self.num_of_sampled_batches == self.__len__():
+                    for input_format in (InputFormat.SRC_REF, InputFormat.SRC,
+                                         InputFormat.REF):
+                        while True:
+                            try:
+                                next(self.subset_iterators[input_format])
+                            except StopIteration:
+                                self.subset_iterators[input_format] = \
+                                    iter(self.subset_samplers[input_format])
+                                break
+
+                    self.num_of_sampled_batches = 0
+
+                output = list()
+                for input_format_idx, input_format in \
+                        enumerate((InputFormat.SRC_REF, InputFormat.SRC, InputFormat.REF)):
+                    output += next(self.subset_iterators[input_format])
+
+                self.num_of_sampled_batches += 1
+
+                yield output
+            except StopIteration:
+                break
+
+    def __len__(self) -> int:
+        return self.num_of_samples_for_each_input_format // self.batch_size_for_each_input_format
+
+
+def convert_csv_dict_to_input(
+        batch: List[Dict[str, Any]],
+        preprocessor: Preprocessor) -> Tuple[List[torch.Tensor]]:
+
+    input_dict = dict()
+
+    for key in batch[0].keys():
+        input_dict[key] = list(x[key] for x in batch)
+
+    input_dict = preprocessor(input_dict)
+
+    return input_dict
+
+
+def data_collate_fn(batch: List[Dict[str, Any]], batch_size: int,
+                    preprocessor: Preprocessor) -> List[Dict[str, Any]]:
+
+    output_dict = dict()
+    output_dict['input_format'] = list()
+
+    if preprocessor.mode == ModeKeys.TRAIN:
+        for input_format_index, input_format in \
+                enumerate((InputFormat.SRC_REF, InputFormat.SRC, InputFormat.REF)):
+            start_idx = input_format_index * batch_size
+            end_idx = start_idx + batch_size
+            batch_to_process = batch[start_idx:end_idx]
+            output_dict['input_format'] += [input_format] * batch_size
+            preprocessor.change_input_format(input_format)
+            batch_to_process = convert_csv_dict_to_input(
+                batch_to_process, preprocessor)
+
+            for key, value in batch_to_process.items():
+                if key not in output_dict.keys():
+                    output_dict[key] = list()
+                output_dict[key].append(value)
+    elif preprocessor.mode == ModeKeys.EVAL:
+        output_dict['input_format'] += [preprocessor.input_format] * len(batch)
+        batch = convert_csv_dict_to_input(batch, preprocessor)
+
+        for key, value in batch.items():
+            if key not in output_dict.keys():
+                output_dict[key] = list()
+            output_dict[key].append(value)
+    else:
+        raise ValueError(
+            'During training, %s mode is not allowed for preprocessor.'
+            % preprocessor.mode)
+
+    input_max_lengths = max(x.size(-1) for x in output_dict['input_ids'])
+    output_dict['input_ids'] = list(
+        pad(x,
+            pad=(0, input_max_lengths - x.size(-1)),
+            value=preprocessor.pad_token_id) for x in output_dict['input_ids'])
+
+    output_dict['input_ids'] = torch.cat(output_dict['input_ids'], dim=0)
+    output_dict['score'] = torch.Tensor(output_dict['score']).view(-1)
+
+    if preprocessor.mode == ModeKeys.EVAL:
+        output_dict['lp'] = sum(output_dict['lp'], list())
+        output_dict['raw_score'] = sum(output_dict['raw_score'], list())
+        output_dict['segment_id'] = sum(output_dict['segment_id'], list())
+
+    return output_dict
+
+
+@TRAINERS.register_module(module_name=Trainers.translation_evaluation_trainer)
+class TranslationEvaluationTrainer(EpochBasedTrainer):
+
+    def __init__(self,
+                 model: Optional[Union[TorchModel, torch.nn.Module,
+                                       str]] = None,
+                 cfg_file: Optional[str] = None,
+                 device: str = 'gpu',
+                 *args,
+                 **kwargs):
+        r"""Build a translation evaluation trainer with a model dir or a model id in the model hub.
+
+        Args:
+            model: A Model instance.
+            cfg_file: The path for the configuration file (configuration.json).
+            device: Used device for this trainer.
+
+        """
+
+        def data_collator_for_train(x):
+            return data_collate_fn(
+                x,
+                batch_size=self.cfg.train.batch_size,
+                preprocessor=self.train_preprocessor)
+
+        def data_collator_for_eval(x):
+            return data_collate_fn(
+                x,
+                batch_size=self.cfg.evaluation.batch_size,
+                preprocessor=self.eval_preprocessor)
+
+        data_collator = {
+            ConfigKeys.train: data_collator_for_train,
+            ConfigKeys.val: data_collator_for_eval
+        }
+
+        super().__init__(
+            model,
+            cfg_file=cfg_file,
+            data_collator=data_collator,
+            *args,
+            **kwargs)
+
+        self.train_dataloader = None
+        self.eval_dataloader = None
+
+        return
+
+    def build_optimizer(self, cfg: ConfigDict) -> Optimizer:
+        r"""Sets the optimizers to be used during training."""
+        if self.cfg.train.optimizer.type != 'AdamW':
+            return super().build_optimizer(cfg)
+
+        # Freezing embedding layers for more efficient training.
+        for param in self.model.encoder.embeddings.parameters():
+            param.requires_grad = False
+
+        logger.info('Building AdamW optimizer ...')
+        learning_rates_and_parameters = list({
+            'params':
+            self.model.encoder.encoder.layer[i].parameters(),
+            'lr':
+            self.cfg.train.optimizer.plm_lr
+            * self.cfg.train.optimizer.plm_lr_layerwise_decay**i,
+        } for i in range(0, self.cfg.model.num_hidden_layers))
+
+        learning_rates_and_parameters.append({
+            'params':
+            self.model.encoder.embeddings.parameters(),
+            'lr':
+            self.cfg.train.optimizer.plm_lr,
+        })
+
+        learning_rates_and_parameters.append({
+            'params':
+            self.model.estimator.parameters(),
+            'lr':
+            self.cfg.train.optimizer.mlp_lr
+        })
+
+        learning_rates_and_parameters.append({
+            'params':
+            self.model.layerwise_attention.parameters(),
+            'lr':
+            self.cfg.train.optimizer.mlp_lr,
+        })
+
+        optimizer = AdamW(
+            learning_rates_and_parameters,
+            lr=self.cfg.train.optimizer.plm_lr,
+            betas=self.cfg.train.optimizer.betas,
+            eps=self.cfg.train.optimizer.eps,
+            weight_decay=self.cfg.train.optimizer.weight_decay,
+        )
+
+        return optimizer
+
+    def get_train_dataloader(self) -> DataLoader:
+        logger.info('Building dataloader for training ...')
+
+        if self.train_dataset is None:
+            logger.info('Reading train csv file from %s ...'
+                        % self.cfg.dataset.train.name)
+            self.train_dataset = MsDataset.load(
+                osp.join(self.model_dir, self.cfg.dataset.train.name),
+                split=self.cfg.dataset.train.split)
+
+        train_dataloader = DataLoader(
+            self.train_dataset,
+            batch_sampler=TranslationEvaluationTrainingSampler(
+                len(self.train_dataset),
+                batch_size_for_each_input_format=self.cfg.train.batch_size),
+            num_workers=4,
+            collate_fn=self.train_data_collator,
+            generator=None)
+
+        logger.info('Reading done, %d items in total'
+                    % len(self.train_dataset))
+
+        return train_dataloader
+
+    def get_eval_data_loader(self) -> DataLoader:
+        logger.info('Building dataloader for evaluating ...')
+
+        if self.eval_dataset is None:
+            logger.info('Reading eval csv file from %s ...'
+                        % self.cfg.dataset.valid.name)
+
+            self.eval_dataset = MsDataset.load(
+                osp.join(self.model_dir, self.cfg.dataset.valid.name),
+                split=self.cfg.dataset.valid.split)
+
+        eval_dataloader = DataLoader(
+            self.eval_dataset,
+            batch_sampler=BatchSampler(
+                SequentialSampler(range(0, len(self.eval_dataset))),
+                batch_size=self.cfg.evaluation.batch_size,
+                drop_last=False),
+            num_workers=4,
+            collate_fn=self.eval_data_collator,
+            generator=None)
+
+        logger.info('Reading done, %d items in total' % len(self.eval_dataset))
+
+        return eval_dataloader
+
+    def evaluation_loop(self, data_loader, metric_classes):
+        """ Evaluation loop used by `TranslationEvaluationTrainer.evaluate()`.
+
+        The evaluation process of UniTE model should be arranged with three loops,
+        corresponding to the input formats of `InputFormat.SRC_REF`, `InputFormat.REF`,
+        and `InputFormat.SRC`.
+
+        Here we directly copy the codes of `EpochBasedTrainer.evaluation_loop`, and change
+        the input format during each evaluation subloop.
+        """
+        vis_closure = None
+        if hasattr(self.cfg.evaluation, 'visualization'):
+            vis_cfg = self.cfg.evaluation.visualization
+            vis_closure = partial(
+                self.visualization, dataset=self.eval_dataset, **vis_cfg)
+
+        self.invoke_hook(TrainerStages.before_val)
+        metric_values = dict()
+
+        for input_format in (InputFormat.SRC_REF, InputFormat.SRC,
+                             InputFormat.REF):
+            self.eval_preprocessor.change_input_format(input_format)
+
+            if self._dist:
+                from modelscope.trainers.utils.inference import multi_gpu_test
+                # list of batched result and data samples
+                metric_values.update(
+                    multi_gpu_test(
+                        self,
+                        data_loader,
+                        device=self.device,
+                        metric_classes=metric_classes,
+                        vis_closure=vis_closure,
+                        tmpdir=self.cfg.evaluation.get('cache_dir', None),
+                        gpu_collect=self.cfg.evaluation.get(
+                            'gpu_collect', False),
+                        data_loader_iters_per_gpu=self._eval_iters_per_epoch))
+            else:
+                from modelscope.trainers.utils.inference import single_gpu_test
+                metric_values.update(
+                    single_gpu_test(
+                        self,
+                        data_loader,
+                        device=self.device,
+                        metric_classes=metric_classes,
+                        vis_closure=vis_closure,
+                        data_loader_iters=self._eval_iters_per_epoch))
+
+            for m in metric_classes:
+                if hasattr(m, 'clear') and callable(m.clear):
+                    m.clear()
+
+        self.invoke_hook(TrainerStages.after_val)
+        return metric_values
diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py
index 683ff2f5..c980de04 100644
--- a/modelscope/trainers/trainer.py
+++ b/modelscope/trainers/trainer.py
@@ -11,7 +11,7 @@ import json
 import torch
 from torch import distributed as dist
 from torch import nn
-from torch.utils.data import DataLoader, Dataset
+from torch.utils.data import DataLoader, Dataset, Sampler
 from torch.utils.data.dataloader import default_collate
 from torch.utils.data.distributed import DistributedSampler
 
@@ -88,7 +88,7 @@ class EpochBasedTrainer(BaseTrainer):
         compile_options (dict, optional): The compile options if compile=True,
             default None to use the default params of 'TorchModel.compile'.
         efficient_tuners (dict, optional): The tuners to use to train the model
-
+        samplers: (:obj:`Sampler` or `Dict[Sampler]`, *optional*): samplers used in the train/eval DataLoader.
         Examples of cfg_modify_fn:
             >>> def cfg_modify_fn(cfg):
             >>>     cfg.preprocessor.first_sequence= 'text1'
@@ -114,6 +114,7 @@ class EpochBasedTrainer(BaseTrainer):
             model_revision: Optional[str] = DEFAULT_MODEL_REVISION,
             seed: int = 42,
             callbacks: Optional[List[Hook]] = None,
+            samplers: Optional[Union[Sampler, Dict[str, Sampler]]] = None,
             efficient_tuners: List[Dict] = None,
             **kwargs):
 
@@ -132,6 +133,7 @@ class EpochBasedTrainer(BaseTrainer):
         self.train_dataloader = None
         self.eval_dataloader = None
         self.data_loader = None
+        self._samplers = samplers
 
         if isinstance(model, str):
             third_party = kwargs.get(ThirdParty.KEY)
@@ -224,9 +226,6 @@ class EpochBasedTrainer(BaseTrainer):
         # Please check the DDPHook and MegatronHook for details.
         self.parallel_groups = {}
 
-        # Clear the Hook overload functions to avoid duplication.
-        Hook.clear_strategies()
-
         if self.launcher is not None and not self.cfg.safe_get(
                 'train.hooks.DDPHook'):
             # A logic to fit the current code
@@ -681,6 +680,7 @@ class EpochBasedTrainer(BaseTrainer):
         self.train_dataloader = self.get_train_dataloader()
         self.data_loader = self.train_dataloader
         self.register_optimizers_hook()
+        self.register_processors()
         self.print_hook_info()
         self.set_checkpoint_file_to_hook(checkpoint_path, load_all_state,
                                          kwargs.get('strict', False))
@@ -720,6 +720,7 @@ class EpochBasedTrainer(BaseTrainer):
 
             strict(`boolean`): If strict, any unmatched keys will cause an error.
         """
+        self.register_processors()
         self.print_hook_info()
         if checkpoint_path is not None:
             from modelscope.trainers.hooks import LoadCheckpointHook
@@ -758,6 +759,7 @@ class EpochBasedTrainer(BaseTrainer):
             kwargs:
                 strict(`boolean`): If strict, any unmatched keys will cause an error.
         """
+        self.register_processors()
         self.print_hook_info()
         if checkpoint_path is not None:
             from modelscope.trainers.hooks import LoadCheckpointHook
@@ -897,11 +899,18 @@ class EpochBasedTrainer(BaseTrainer):
         """
         if self.train_dataset is None:
             raise 'The train_dataset cannot be None.'
+
+        sampler_cfg = {}
+        if self._samplers is not None:
+            sampler_cfg['sampler'] = self._samplers[
+                ConfigKeys.train] if isinstance(self._samplers,
+                                                dict) else self._samplers
         data_loader = self._build_dataloader_with_dataset(
             self.train_dataset,
             dist=self._dist,
             seed=self._seed,
             collate_fn=self.train_data_collator,
+            **sampler_cfg,
             **self.cfg.train.get('dataloader', {}))
         return data_loader
 
@@ -915,6 +924,11 @@ class EpochBasedTrainer(BaseTrainer):
         if self.eval_dataset is None:
             raise 'The eval_dataset cannot be None.'
 
+        sampler_cfg = {}
+        if self._samplers is not None:
+            sampler_cfg['sampler'] = self._samplers[
+                ConfigKeys.val] if isinstance(self._samplers,
+                                              dict) else self._samplers
         default_config = {'shuffle': False}
         default_config.update(self.cfg.evaluation.get('dataloader', {}))
         data_loader = self._build_dataloader_with_dataset(
@@ -922,6 +936,7 @@ class EpochBasedTrainer(BaseTrainer):
             dist=self._dist,
             seed=self._seed,
             collate_fn=self.eval_data_collator,
+            **sampler_cfg,
             **default_config)
         return data_loader
 
@@ -938,6 +953,11 @@ class EpochBasedTrainer(BaseTrainer):
             mode=ModeKeys.EVAL,
             preprocessor=self.eval_preprocessor)
 
+        sampler_cfg = {}
+        if self._samplers is not None:
+            sampler_cfg['sampler'] = self._samplers[
+                ConfigKeys.val] if isinstance(self._samplers,
+                                              dict) else self._samplers
         default_config = {'shuffle': False}
         default_config.update(self.cfg.evaluation.get('dataloader', {}))
         data_loader = self._build_dataloader_with_dataset(
@@ -945,6 +965,7 @@ class EpochBasedTrainer(BaseTrainer):
             dist=self._dist,
             seed=self._seed,
             collate_fn=self.eval_data_collator,
+            **sampler_cfg,
             **default_config)
         return data_loader
 
@@ -1132,13 +1153,19 @@ class EpochBasedTrainer(BaseTrainer):
             batch_size = batch_size_per_gpu
             num_workers = workers_per_gpu
 
-        if dist and not isinstance(dataset, torch.utils.data.IterableDataset):
-            sampler = DistributedSampler(
-                dataset, num_replicas=world_size, rank=rank, shuffle=shuffle)
-        else:
-            sampler = None
-            if not isinstance(dataset, torch.utils.data.IterableDataset):
-                kwargs['shuffle'] = shuffle
+        sampler = kwargs.pop('sampler', None)
+        if sampler is None:
+            if dist and not isinstance(dataset,
+                                       torch.utils.data.IterableDataset):
+                sampler = DistributedSampler(
+                    dataset,
+                    num_replicas=world_size,
+                    rank=rank,
+                    shuffle=shuffle)
+            else:
+                sampler = None
+                if not isinstance(dataset, torch.utils.data.IterableDataset):
+                    kwargs['shuffle'] = shuffle
 
         batch_sampler = None
 
@@ -1169,7 +1196,6 @@ class EpochBasedTrainer(BaseTrainer):
         """ Training loop used by `EpochBasedTrainer.train()`
         """
         self.invoke_hook(TrainerStages.before_run)
-        kwargs = {}
         self.model.train()
         for _ in range(self._epoch, self._max_epochs):
             self.invoke_hook(TrainerStages.before_train_epoch)
@@ -1181,7 +1207,7 @@ class EpochBasedTrainer(BaseTrainer):
                 self.data_batch = data_batch
                 self._inner_iter = i
                 self.invoke_hook(TrainerStages.before_train_iter)
-                self.train_step(self.model, data_batch, **kwargs)
+                self.train_step(self.model, data_batch)
                 self.invoke_hook(TrainerStages.after_train_iter)
                 # Value changed after the hooks are invoked, do not move them above the invoke_hook code.
                 del self.data_batch
@@ -1320,12 +1346,17 @@ class EpochBasedTrainer(BaseTrainer):
         hooks = []
         for cfg_i in hook_cfg:
             hook = build_from_cfg(cfg_i, HOOKS)
-            if hasattr(hook, 'register_strategy'):
-                hook.register_strategy()
             self.register_hook(hook)
             hooks.append(hook)
         return hooks
 
+    def register_processors(self):
+        """Register processors to hooks
+        """
+        for hook in self.hooks:
+            if hasattr(hook, 'register_processor'):
+                hook.register_processor(self)
+
     def get_hook(self, cls):
         return [h for h in self._hooks if h.__class__ == cls]
 
@@ -1381,14 +1412,7 @@ class EpochBasedTrainer(BaseTrainer):
                 info += '\n -------------------- '
                 stage_hook_infos.append(info)
         stage_hook_infos = '\n'.join(stage_hook_infos)
-
-        strategy_info = '\n --- Hook strategies info --- \n'
-        for consumer, methods in Hook._strategies.items():
-            strategy_info += f'Method: {consumer} ' \
-                             f'replaced by: ' \
-                             f'{[method.__self__.__class__.__name__ + "." + method.__name__ for method in methods]}\n'
-        strategy_info += '\n --- Hook strategies info end --- \n'
-        return stage_hook_infos + strategy_info
+        return stage_hook_infos
 
 
 def worker_init_fn(worker_id, num_workers, rank, seed):
diff --git a/modelscope/trainers/training_args.py b/modelscope/trainers/training_args.py
index f4e4e138..b7236163 100644
--- a/modelscope/trainers/training_args.py
+++ b/modelscope/trainers/training_args.py
@@ -1,108 +1,560 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-
 import re
-from argparse import Action, ArgumentDefaultsHelpFormatter, ArgumentParser
+from copy import deepcopy
 from dataclasses import dataclass, field, fields
-from functools import partial
-from typing import Any, Dict, List, Tuple, Union
+from typing import List, Union
 
-from modelscope.trainers.default_config import DEFAULT_CONFIG
-from modelscope.utils.config import Config, ConfigDict
-from modelscope.utils.hub import read_config
+import addict
+import json
+
+from modelscope.trainers.cli_argument_parser import CliArgumentParser
+from modelscope.utils.config import Config
 
 
-def get_flatten_value(config: Config, metadata: Dict, exclusions=None):
-    cfg_node = metadata['cfg_node']
-    if exclusions is None:
-        exclusions = []
-
-    values = config.safe_get(cfg_node)
-    if isinstance(values, dict):
-        param_map = []
-        for key, value in values.items():
-            if key in exclusions or not isinstance(value,
-                                                   (str, int, float, bool)):
-                continue
-            value = add_quotes_for_str(value)
-            param_map.append(f'{key}={value}')
-        return ','.join(param_map)
-    else:
-        return values
-
-
-def set_flatten_value(config: Config, values: Union[str, List[str]],
-                      metadata: Dict):
-    cfg_node = metadata['cfg_node']
-    if values is None:
-        return config
-
+def set_flatten_value(values: Union[str, List[str]]):
     pairs = values.split(',') if isinstance(values, str) else values
-    for kv in pairs:
+    _params = {}
+    for kv in pairs or []:
         if len(kv.strip()) == 0:
             continue
         key, value = kv.split('=')
-        value = parse_value(value)
-        config.merge_from_dict({cfg_node + '.' + key: value})
-    return config
+        _params[key] = parse_value(value)
+    return _params
 
 
-def get_base_hook_args(config: Config, metadata: Dict):
-    cfg_node = metadata['cfg_node']
-    hook_type = metadata['hook_type']
-    key = metadata['key']
-    value = config.safe_get(cfg_node)
-    if value is None:
-        return get_hook_param(config, hook_type, key)
-    else:
-        return True if key == 'type' else value
+@dataclass
+class DatasetArgs:
+
+    train_dataset_name: str = field(
+        default=None,
+        metadata={
+            'help':
+            'The dataset name used for training, can be an id in the datahub or a local dir',
+        })
+
+    val_dataset_name: str = field(
+        default=None,
+        metadata={
+            'help':
+            'The subset name used for evaluating, can be an id in the datahub or a local dir',
+        })
+
+    train_subset_name: str = field(
+        default=None,
+        metadata={
+            'help': 'The subset name used for training, can be None',
+        })
+
+    val_subset_name: str = field(
+        default=None,
+        metadata={
+            'help': 'The subset name used for evaluating, can be None',
+        })
+
+    train_split: str = field(
+        default=None, metadata={
+            'help': 'The split of train dataset',
+        })
+
+    val_split: str = field(
+        default=None, metadata={
+            'help': 'The split of val dataset',
+        })
+
+    train_dataset_namespace: str = field(
+        default=None,
+        metadata={
+            'help': 'The dataset namespace used for training',
+        })
+
+    val_dataset_namespace: str = field(
+        default=None,
+        metadata={
+            'help': 'The dataset namespace used for evaluating',
+        })
+
+    dataset_json_file: str = field(
+        default=None,
+        metadata={
+            'help':
+            'The json file to parse all datasets from, used in a complex dataset scenario,'
+            'the json format should be like:'
+            '''
+                    [
+                        {
+                            "dataset": {
+                                # All args used in the MsDataset.load function
+                                "dataset_name": "xxx",
+                                ...
+                            },
+                            # All columns used, mapping the column names in each dataset in same names.
+                            "column_mapping": {
+                                "text1": "sequence1",
+                                "text2": "sequence2",
+                                "label": "label",
+                            },
+                            # float or str, float means to split the dataset into train/val,
+                            # or just str(train/val)
+                            "split": 0.8,
+                        }
+                    ]
+                    ''',
+        })
 
 
-def set_base_hook_args(config: Config, value: Any, metadata: Dict):
-    cfg_node = metadata['cfg_node']
-    hook_type = metadata['hook_type']
-    key = metadata['key']
-    if 'hooks' in config.train:
-        config.train.hooks = [
-            hook for hook in config.train.hooks if hook['type'] != hook_type
+@dataclass
+class ModelArgs:
+    task: str = field(
+        default=None,
+        metadata={
+            'help': 'The task code to be used',
+            'cfg_node': 'task'
+        })
+
+    model: str = field(
+        default=None, metadata={
+            'help': 'A model id or model dir',
+        })
+
+    model_type: str = field(
+        default=None,
+        metadata={
+            'help':
+            'The mode type, if load_model_config is False, user need to fill this field',
+            'cfg_node': 'model.type'
+        })
+
+
+@dataclass
+class TrainArgs:
+
+    seed: int = field(
+        default=42, metadata={
+            'help': 'The random seed',
+        })
+
+    per_device_train_batch_size: int = field(
+        default=16,
+        metadata={
+            'cfg_node': 'train.dataloader.batch_size_per_gpu',
+            'help':
+            'The `batch_size_per_gpu` argument for the train dataloader',
+        })
+
+    train_data_worker: int = field(
+        default=0,
+        metadata={
+            'cfg_node': 'train.dataloader.workers_per_gpu',
+            'help': 'The `workers_per_gpu` argument for the train dataloader',
+        })
+
+    train_shuffle: bool = field(
+        default=False,
+        metadata={
+            'cfg_node': 'train.dataloader.shuffle',
+            'help': 'The `shuffle` argument for the train dataloader',
+        })
+
+    train_drop_last: bool = field(
+        default=False,
+        metadata={
+            'cfg_node': 'train.dataloader.drop_last',
+            'help': 'The `drop_last` argument for the train dataloader',
+        })
+
+    per_device_eval_batch_size: int = field(
+        default=16,
+        metadata={
+            'cfg_node': 'evaluation.dataloader.batch_size_per_gpu',
+            'help':
+            'The `batch_size_per_gpu` argument for the eval dataloader',
+        })
+
+    eval_data_worker: int = field(
+        default=0,
+        metadata={
+            'cfg_node': 'evaluation.dataloader.workers_per_gpu',
+            'help': 'The `workers_per_gpu` argument for the eval dataloader',
+        })
+
+    eval_shuffle: bool = field(
+        default=False,
+        metadata={
+            'cfg_node': 'evaluation.dataloader.shuffle',
+            'help': 'The `shuffle` argument for the eval dataloader',
+        })
+
+    eval_drop_last: bool = field(
+        default=False,
+        metadata={
+            'cfg_node': 'evaluation.dataloader.drop_last',
+            'help': 'The `drop_last` argument for the eval dataloader',
+        })
+
+    max_epochs: int = field(
+        default=5,
+        metadata={
+            'cfg_node': 'train.max_epochs',
+            'help': 'The training epochs',
+        })
+
+    work_dir: str = field(
+        default='./train_target',
+        metadata={
+            'cfg_node': 'train.work_dir',
+            'help': 'The directory to save models and logs',
+        })
+
+    lr: float = field(
+        default=5e-5,
+        metadata={
+            'cfg_node': 'train.optimizer.lr',
+            'help': 'The learning rate of the optimizer',
+        })
+
+    lr_scheduler: str = field(
+        default='LinearLR',
+        metadata={
+            'cfg_node': 'train.lr_scheduler.type',
+            'help': 'The lr_scheduler type in torch',
+        })
+
+    optimizer: str = field(
+        default='AdamW',
+        metadata={
+            'cfg_node': 'train.optimizer.type',
+            'help': 'The optimizer type in PyTorch, like `AdamW`',
+        })
+
+    optimizer_params: str = field(
+        default=None,
+        metadata={
+            'cfg_node': 'train.optimizer',
+            'help': 'The optimizer params',
+            'cfg_setter': set_flatten_value,
+        })
+
+    lr_scheduler_params: str = field(
+        default=None,
+        metadata={
+            'cfg_node': 'train.lr_scheduler',
+            'help': 'The lr scheduler params',
+            'cfg_setter': set_flatten_value,
+        })
+
+    lr_strategy: str = field(
+        default='by_epoch',
+        metadata={
+            'cfg_node': 'train.lr_scheduler.options.lr_strategy',
+            'help': 'The lr decay strategy',
+            'choices': ['by_epoch', 'by_step', 'no'],
+        })
+
+    local_rank: int = field(
+        default=0, metadata={
+            'help': 'The local rank',
+        })
+
+    logging_interval: int = field(
+        default=5,
+        metadata={
+            'help': 'The interval of iter of logging information',
+            'cfg_node': 'train.logging.interval',
+        })
+
+    eval_strategy: str = field(
+        default='by_epoch',
+        metadata={
+            'help': 'Eval strategy, can be `by_epoch` or `by_step` or `no`',
+            'cfg_node': 'evaluation.period.eval_strategy',
+            'choices': ['by_epoch', 'by_step', 'no'],
+        })
+
+    eval_interval: int = field(
+        default=1,
+        metadata={
+            'help': 'Eval interval',
+            'cfg_node': 'evaluation.period.interval',
+        })
+
+    eval_metrics: str = field(
+        default=None,
+        metadata={
+            'help': 'The metric name for evaluation',
+            'cfg_node': 'evaluation.metrics'
+        })
+
+    save_strategy: str = field(
+        default='by_epoch',
+        metadata={
+            'help':
+            'Checkpointing strategy, can be `by_epoch` or `by_step` or `no`',
+            'cfg_node': 'train.checkpoint.period.save_strategy',
+            'choices': ['by_epoch', 'by_step', 'no'],
+        })
+
+    save_interval: int = field(
+        default=1,
+        metadata={
+            'help':
+            'The interval of epoch or iter of saving checkpoint period',
+            'cfg_node': 'train.checkpoint.period.interval',
+        })
+
+    save_best_checkpoint: bool = field(
+        default=False,
+        metadata={
+            'help':
+            'Save the checkpoint(if it\'s the best) after the evaluation.',
+            'cfg_node': 'train.checkpoint.best.save_best',
+        })
+
+    metric_for_best_model: str = field(
+        default=None,
+        metadata={
+            'help': 'The metric used to measure the model.',
+            'cfg_node': 'train.checkpoint.best.metric_key',
+        })
+
+    metric_rule_for_best_model: str = field(
+        default='max',
+        metadata={
+            'help':
+            'The rule to measure the model with the metric, can be `max` or `min`',
+            'cfg_node': 'train.checkpoint.best.rule',
+        })
+
+    max_checkpoint_num: int = field(
+        default=None,
+        metadata={
+            'help':
+            'The max number of checkpoints to keep, older ones will be deleted.',
+            'cfg_node': 'train.checkpoint.period.max_checkpoint_num',
+        })
+
+    max_checkpoint_num_best: int = field(
+        default=1,
+        metadata={
+            'help':
+            'The max number of best checkpoints to keep, worse ones will be deleted.',
+            'cfg_node': 'train.checkpoint.best.max_checkpoint_num',
+        })
+
+    push_to_hub: bool = field(
+        default=False,
+        metadata={
+            'help': 'Push to hub after each checkpointing',
+            'cfg_node': 'train.checkpoint.period.push_to_hub',
+        })
+
+    repo_id: str = field(
+        default=None,
+        metadata={
+            'help':
+            'The repo id in modelhub, usually the format is "group/model"',
+            'cfg_node': 'train.checkpoint.period.hub_repo_id',
+        })
+
+    hub_token: str = field(
+        default=None,
+        metadata={
+            'help':
+            'The modelhub token, you can also set the token to the env variable `MODELSCOPE_API_TOKEN`',
+            'cfg_node': 'train.checkpoint.period.hub_token',
+        })
+
+    private_hub: bool = field(
+        default=True,
+        metadata={
+            'help': 'Upload to a private hub',
+            'cfg_node': 'train.checkpoint.period.private_hub',
+        })
+
+    hub_revision: str = field(
+        default='master',
+        metadata={
+            'help': 'Which branch to commit to',
+            'cfg_node': 'train.checkpoint.period.hub_revision',
+        })
+
+    push_to_hub_best: bool = field(
+        default=False,
+        metadata={
+            'help': 'Push to hub after each checkpointing',
+            'cfg_node': 'train.checkpoint.best.push_to_hub',
+        })
+
+    repo_id_best: str = field(
+        default=None,
+        metadata={
+            'help':
+            'The repo id in modelhub, usually the format is "group/model"',
+            'cfg_node': 'train.checkpoint.best.hub_repo_id',
+        })
+
+    hub_token_best: str = field(
+        default=None,
+        metadata={
+            'help':
+            'The modelhub token, you can also set the token to the env variable `MODELSCOPE_API_TOKEN`',
+            'cfg_node': 'train.checkpoint.best.hub_token',
+        })
+
+    private_hub_best: bool = field(
+        default=True,
+        metadata={
+            'help': 'Upload to a private hub',
+            'cfg_node': 'train.checkpoint.best.private_hub',
+        })
+
+    hub_revision_best: str = field(
+        default='master',
+        metadata={
+            'help': 'Which branch to commit to',
+            'cfg_node': 'train.checkpoint.best.hub_revision',
+        })
+
+
+@dataclass(init=False)
+class TrainingArgs(DatasetArgs, TrainArgs, ModelArgs):
+
+    use_model_config: bool = field(
+        default=False,
+        metadata={
+            'help':
+            'Use the configuration of the model, '
+            'default will only use the parameters in the CLI and the dataclass',
+        })
+
+    def __init__(self, **kwargs):
+        self.manual_args = list(kwargs.keys())
+        for f in fields(self):
+            if f.name in kwargs:
+                setattr(self, f.name, kwargs[f.name])
+        self._unknown_args = {}
+
+    def parse_cli(self, parser_args=None):
+        """Construct a TrainingArg class by the parameters of CLI.
+
+        Returns:
+            Self
+        """
+        parser = CliArgumentParser(self)
+        args, unknown = parser.parse_known_args(parser_args)
+        unknown = [
+            item for item in unknown
+            if item not in ('\\', '\n') and '--local-rank=' not in item
         ]
-    if key == 'type':
-        if value and config.safe_get(cfg_node) is None:
-            config.merge_from_dict({cfg_node: {}})
-    else:
-        config.merge_from_dict({cfg_node: value})
+        _unknown = {}
+        for i in range(0, len(unknown), 2):
+            _unknown[unknown[i].replace('-', '')] = parse_value(unknown[i + 1])
+        args_dict = vars(args)
+        self.manual_args += parser.manual_args
 
+        for key, value in deepcopy(args_dict).items():
+            if key is not None and hasattr(self, key):
+                setattr(self, key, value)
+        return self
 
-def get_strategy(config: Config,
-                 metadata: Dict,
-                 value_pair: Tuple[str] = ('by_epoch', 'by_step')):
-    flag = get_base_hook_args(config, metadata)
-    if flag is None:
+    def to_config(self, ignore_default_config=None):
+        """Convert the TrainingArgs to the `Config`
+
+        Returns:
+            The Config, and extra parameters in dict.
+        """
+        cfg = Config()
+        args_dict = addict.Dict()
+
+        if ignore_default_config is None:
+            ignore_default_config = self.use_model_config
+
+        for f in fields(self):
+            cfg_node = f.metadata.get('cfg_node')
+            cfg_setter = f.metadata.get('cfg_setter') or (lambda x: x)
+            if cfg_node is not None:
+                if f.name in self.manual_args or not ignore_default_config:
+                    if isinstance(cfg_node, str):
+                        cfg_node = [cfg_node]
+                    for _node in cfg_node:
+                        cfg.merge_from_dict(
+                            {_node: cfg_setter(getattr(self, f.name))})
+            else:
+                args_dict[f.name] = getattr(self, f.name)
+
+        cfg.merge_from_dict(self._unknown_args)
+        return cfg, args_dict
+
+    def get_metadata(self, key):
+        _fields = fields(self)
+        for f in _fields:
+            if f.name == key:
+                return f
         return None
-    return value_pair[0] if flag else value_pair[1]
 
 
-def set_strategy(config: Config,
-                 value: Any,
-                 metadata: Dict,
-                 value_pair: Tuple[str] = ('by_epoch', 'by_step')):
-    set_base_hook_args(config, value == value_pair[0], metadata)
+def build_dataset_from_file(filename):
+    """
+    The filename format:
+    [
+        {
+            "dataset": {
+                "dataset_name": "xxx",
+                ...
+            },
+            "column_mapping": {
+                "text1": "sequence1",
+                "text2": "sequence2",
+                "label": "label",
+            }
+            "split": 0.8,
+        }
+    ]
+    """
+    from modelscope import MsDataset
+    train_set = []
+    eval_set = []
 
+    with open(filename, 'r') as f:
+        ds_json = json.load(f)
+        for ds in ds_json:
+            dataset = MsDataset.load(**ds['dataset']).to_hf_dataset()
+            all_columns = dataset.column_names
+            keep_columns = ds['column_mapping'].keys()
+            remove_columns = [
+                column for column in all_columns if column not in keep_columns
+            ]
+            from datasets import Features
+            from datasets import Value
+            from datasets import ClassLabel
+            features = [
+                f for f in dataset.features.items() if f[0] in keep_columns
+            ]
+            new_features = {}
+            for f in features:
+                if isinstance(f[1], ClassLabel):
+                    new_features[f[0]] = Value(f[1].dtype)
+                else:
+                    new_features[f[0]] = f[1]
+            new_features = Features(new_features)
+            dataset = dataset.map(
+                lambda x: x,
+                remove_columns=remove_columns,
+                features=new_features).rename_columns(ds['column_mapping'])
+            split = ds['split']
+            if isinstance(split, str):
+                assert split in ('train', 'val')
+                if split == 'train':
+                    train_set.append(dataset)
+                else:
+                    eval_set.append(dataset)
+            else:
+                assert isinstance(split, float) and 0 < split < 1
+                ds_dict = dataset.train_test_split(train_size=split)
+                train_set.append(ds_dict['train'])
+                eval_set.append(ds_dict['test'])
 
-def get_hook_param(config, hook_type: str, key='type'):
-    hooks = config.safe_get('train.hooks', [])
-    _hooks = list(filter(lambda hook: hook['type'] == hook_type, hooks))
-    if key == 'type':
-        return len(_hooks) > 0
-    elif len(_hooks) > 0:
-        return getattr(_hooks[0], key, None)
-    return None
-
-
-def add_quotes_for_str(value: Union[str, float, bool, None]) -> str:
-    if isinstance(value, str):
-        return f'"{value}"'
-    else:
-        return str(value)
+    from datasets import concatenate_datasets
+    return concatenate_datasets(train_set), concatenate_datasets(eval_set)
 
 
 def parse_value(value: str) -> Union[str, float, bool, None]:
@@ -126,717 +578,3 @@ def parse_value(value: str) -> Union[str, float, bool, None]:
         return float(value)
     else:
         return value
-
-
-@dataclass
-class TrainingArgs:
-    model: str = field(
-        default=None, metadata={
-            'help': 'A model id or model dir',
-        })
-
-    seed: int = field(
-        default=42, metadata={
-            'help': 'The random seed',
-        })
-
-    task: str = field(
-        default=None,
-        metadata={
-            'help': 'The task code to be used',
-            'cfg_node': 'task'
-        })
-
-    dataset_name: str = field(
-        default=None, metadata={
-            'help': 'The dataset name',
-        })
-
-    subset_name: str = field(
-        default=None, metadata={
-            'help': 'The subset name of the dataset',
-        })
-
-    train_dataset_name: str = field(
-        default=None, metadata={
-            'help': 'The train dataset name',
-        })
-
-    val_dataset_name: str = field(
-        default=None, metadata={
-            'help': 'The validation dataset name',
-        })
-
-    per_device_train_batch_size: int = field(
-        default=None,
-        metadata={
-            'cfg_node': 'train.dataloader.batch_size_per_gpu',
-            'help': 'The training batch size per GPU',
-        })
-
-    train_data_worker: int = field(
-        default=0,
-        metadata={
-            'cfg_node': 'train.dataloader.workers_per_gpu',
-            'help': 'The number of data workers for train dataloader',
-        })
-
-    train_shuffle: bool = field(
-        default=None,
-        metadata={
-            'cfg_node': 'train.dataloader.shuffle',
-            'help': 'Shuffle the train dataset or not',
-        })
-
-    train_drop_last: bool = field(
-        default=None,
-        metadata={
-            'cfg_node':
-            'train.dataloader.drop_last',
-            'help':
-            'Whether to drop out the last set of data in the train_dataset',
-        })
-
-    per_device_eval_batch_size: int = field(
-        default=None,
-        metadata={
-            'cfg_node': 'evaluation.dataloader.batch_size_per_gpu',
-            'help': 'The eval batch size per GPU',
-        })
-
-    eval_data_worker: int = field(
-        default=0,
-        metadata={
-            'cfg_node': 'evaluation.dataloader.workers_per_gpu',
-            'help': 'The number of data workers for eval dataloader',
-        })
-
-    eval_shuffle: bool = field(
-        default=None,
-        metadata={
-            'cfg_node': 'evaluation.dataloader.shuffle',
-            'help': 'Shuffle the eval dataset or not',
-        })
-
-    eval_drop_last: bool = field(
-        default=None,
-        metadata={
-            'cfg_node': 'evaluation.dataloader.drop_last',
-            'help':
-            'Whether to drop out the last set of data in the eval_dataset',
-        })
-
-    max_epochs: int = field(
-        default=None,
-        metadata={
-            'cfg_node': 'train.max_epochs',
-            'help': 'The training epochs',
-        })
-
-    work_dir: str = field(
-        default=None,
-        metadata={
-            'cfg_node': 'train.work_dir',
-            'help': 'The training dir to save models and logs',
-        })
-
-    lr: float = field(
-        default=None,
-        metadata={
-            'cfg_node': 'train.optimizer.lr',
-            'help': 'The learning rate of the optimizer',
-        })
-
-    optimizer: str = field(
-        default=None,
-        metadata={
-            'cfg_node': 'train.optimizer.type',
-            'help': 'The optimizer type',
-        })
-
-    optimizer_params: str = field(
-        default=None,
-        metadata={
-            'cfg_node':
-            'train.optimizer',
-            'cfg_getter':
-            partial(get_flatten_value, exclusions=['type', 'lr', 'options']),
-            'cfg_setter':
-            set_flatten_value,
-            'help':
-            'The optimizer init params except `lr`',
-        })
-
-    lr_scheduler_params: str = field(
-        default=None,
-        metadata={
-            'cfg_node':
-            'train.lr_scheduler',
-            'cfg_getter':
-            partial(get_flatten_value, exclusions=['type', 'lr', 'options']),
-            'cfg_setter':
-            set_flatten_value,
-            'help':
-            'The lr_scheduler init params',
-        })
-
-    local_rank: int = field(
-        default=0, metadata={
-            'help': 'The training local rank',
-        })
-
-    save_ckpt: bool = field(
-        default=True,
-        metadata={
-            'help':
-            'Periodically save checkpoint when True, corresponding to CheckpointHook',
-            'cfg_node': 'train.checkpoint.period',
-            'hook_type': 'CheckpointHook',
-            'key': 'type',
-            'cfg_getter': get_base_hook_args,
-            'cfg_setter': set_base_hook_args,
-        })
-
-    save_ckpt_best: bool = field(
-        default=None,
-        metadata={
-            'help':
-            'Save best checkpoint when True, corresponding to BestCkptSaverHook',
-            'cfg_node': 'train.checkpoint.best',
-            'hook_type': 'BestCkptSaverHook',
-            'key': 'type',
-            'cfg_getter': get_base_hook_args,
-            'cfg_setter': set_base_hook_args,
-        })
-
-    evaluate: bool = field(
-        default=True,
-        metadata={
-            'help': 'Evaluate when True, corresponding to EvaluationHook',
-            'cfg_node': 'evaluation.period',
-            'hook_type': 'EvaluationHook',
-            'key': 'type',
-            'cfg_getter': get_base_hook_args,
-            'cfg_setter': set_base_hook_args,
-        })
-
-    save_ckpt_strategy: str = field(
-        default=None,
-        metadata={
-            'help': 'Periodically save checkpoint by epoch or by step'
-            'use with `CheckpointHook`, can be `by_epoch` or `by_step`',
-            'cfg_node': 'train.checkpoint.period.by_epoch',
-            'hook_type': 'CheckpointHook',
-            'key': 'by_epoch',
-            'choices': ['by_epoch', 'by_step'],
-            'cfg_getter': get_strategy,
-            'cfg_setter': set_strategy,
-        })
-
-    save_ckpt_best_strategy: str = field(
-        default=None,
-        metadata={
-            'help': 'Save best checkpoint by epoch or by step'
-            'use with `BestCkptSaverHook`, can be `by_epoch` or `by_step`',
-            'cfg_node': 'train.checkpoint.best.by_epoch',
-            'hook_type': 'BestCkptSaverHook',
-            'key': 'by_epoch',
-            'choices': ['by_epoch', 'by_step'],
-            'cfg_getter': get_strategy,
-            'cfg_setter': set_strategy,
-        })
-
-    push_to_hub: bool = field(
-        default=None,
-        metadata={
-            'help':
-            'Push to hub after one checkpoint saved by CheckpointHook in the local disk',
-            'cfg_node': 'train.checkpoint.period.push_to_hub',
-            'hook_type': 'CheckpointHook',
-            'key': 'push_to_hub',
-            'cfg_getter': get_base_hook_args,
-            'cfg_setter': set_base_hook_args,
-        })
-
-    model_id_with_org: str = field(
-        default=None,
-        metadata={
-            'help':
-            'The repo id in modelhub, usually it\'s like "group/model"',
-            'cfg_node': 'train.checkpoint.period.model_id_with_org',
-            'hook_type': 'CheckpointHook',
-            'key': 'model_id_with_org',
-            'cfg_getter': get_base_hook_args,
-            'cfg_setter': set_base_hook_args,
-        })
-
-    hub_token: str = field(
-        default=None,
-        metadata={
-            'help':
-            'The token to push to hub, you can also set the token to the env variable `MODELSCOPE_API_TOKEN`',
-            'cfg_node': 'train.checkpoint.period.hub_token',
-            'hook_type': 'CheckpointHook',
-            'key': 'hub_token',
-            'cfg_getter': get_base_hook_args,
-            'cfg_setter': set_base_hook_args,
-        })
-
-    private_hub: bool = field(
-        default=None,
-        metadata={
-            'help': 'Upload to a private hub',
-            'cfg_node': 'train.checkpoint.period.private_hub',
-            'hook_type': 'CheckpointHook',
-            'key': 'private_hub',
-            'cfg_getter': get_base_hook_args,
-            'cfg_setter': set_base_hook_args,
-        })
-
-    push_to_hub_best_model: bool = field(
-        default=None,
-        metadata={
-            'help':
-            'Push to hub after one checkpoint saved by BestCkptSaverHook in the local disk',
-            'cfg_node': 'train.checkpoint.best.push_to_hub',
-            'hook_type': 'BestCkptSaverHook',
-            'key': 'push_to_hub',
-            'cfg_getter': get_base_hook_args,
-            'cfg_setter': set_base_hook_args,
-        })
-
-    model_id_with_org_best_model: str = field(
-        default=None,
-        metadata={
-            'help':
-            'The repo id in modelhub, usually it\'s like "group/model"',
-            'cfg_node': 'train.checkpoint.best.model_id_with_org',
-            'hook_type': 'BestCkptSaverHook',
-            'key': 'model_id_with_org',
-            'cfg_getter': get_base_hook_args,
-            'cfg_setter': set_base_hook_args,
-        })
-
-    hub_token_best_model: str = field(
-        default=None,
-        metadata={
-            'help':
-            'The token to push to hub, you can also set the token to the env variable `MODELSCOPE_API_TOKEN`',
-            'cfg_node': 'train.checkpoint.best.hub_token',
-            'hook_type': 'BestCkptSaverHook',
-            'key': 'hub_token',
-            'cfg_getter': get_base_hook_args,
-            'cfg_setter': set_base_hook_args,
-        })
-
-    private_hub_best_model: bool = field(
-        default=None,
-        metadata={
-            'help': 'Upload to a private hub',
-            'cfg_node': 'train.checkpoint.best.private_hub',
-            'hook_type': 'BestCkptSaverHook',
-            'key': 'private_hub',
-            'cfg_getter': get_base_hook_args,
-            'cfg_setter': set_base_hook_args,
-        })
-
-    ckpt_period_interval: int = field(
-        default=1,
-        metadata={
-            'help':
-            'The interval of epoch or iter of saving checkpoint period',
-            'cfg_node': 'train.checkpoint.period.interval',
-            'hook_type': 'CheckpointHook',
-            'key': 'interval',
-            'cfg_getter': get_base_hook_args,
-            'cfg_setter': set_base_hook_args,
-        })
-
-    ckpt_best_interval: int = field(
-        default=None,
-        metadata={
-            'help': 'The interval of epoch or iter of saving checkpoint best',
-            'cfg_node': 'train.checkpoint.best.interval',
-            'hook_type': 'BestCkptSaverHook',
-            'key': 'interval',
-            'cfg_getter': get_base_hook_args,
-            'cfg_setter': set_base_hook_args,
-        })
-
-    metric_for_best_model: str = field(
-        default=None,
-        metadata={
-            'help':
-            'Which metric key to judge the checkpoint is better or not, use with `BestCkptSaverHook`, '
-            'please make sure this key is returned by the `evaluation_metrics` classes',
-            'cfg_node':
-            'train.checkpoint.best.metric_key',
-            'hook_type':
-            'BestCkptSaverHook',
-            'key':
-            'metric_key',
-            'cfg_getter':
-            get_base_hook_args,
-            'cfg_setter':
-            set_base_hook_args,
-        })
-
-    metric_rule_for_best_model: str = field(
-        default=None,
-        metadata={
-            'help':
-            'Which rule to compare the value of `checkpoint_saving_metric`, '
-            'use with `BestCkptSaverHook`, can be `max` or `min`',
-            'cfg_node':
-            'train.checkpoint.best.rule',
-            'hook_type':
-            'BestCkptSaverHook',
-            'key':
-            'rule',
-            'cfg_getter':
-            get_base_hook_args,
-            'cfg_setter':
-            set_base_hook_args,
-        })
-
-    save_ckpt_peroid_limit: int = field(
-        default=None,
-        metadata={
-            'help':
-            'The max saving number of checkpoint, older checkpoints will be deleted.',
-            'cfg_node': 'train.checkpoint.period.max_checkpoint_num',
-            'hook_type': 'CheckpointHook',
-            'key': 'max_checkpoint_num',
-            'cfg_getter': get_base_hook_args,
-            'cfg_setter': set_base_hook_args,
-        })
-
-    save_ckpt_best_limit: int = field(
-        default=None,
-        metadata={
-            'help':
-            'The max saving number of checkpoint, worse checkpoints will be deleted.',
-            'cfg_node': 'train.checkpoint.best.max_checkpoint_num',
-            'hook_type': 'BestCkptSaverHook',
-            'key': 'max_checkpoint_num',
-            'cfg_getter': get_base_hook_args,
-            'cfg_setter': set_base_hook_args,
-        })
-
-    logging_interval: int = field(
-        default=None,
-        metadata={
-            'help': 'The interval of iter of logging information',
-            'cfg_node': 'train.logging.interval',
-            'hook_type': 'TextLoggerHook',
-            'key': 'interval',
-            'cfg_getter': get_base_hook_args,
-            'cfg_setter': set_base_hook_args,
-        })
-
-    eval_strategy: str = field(
-        default=None,
-        metadata={
-            'help': 'Evaluate model by epoch or by step'
-            'use with `EvaluationHook`, can be `by_epoch` or `by_step`',
-            'cfg_node': 'evaluation.period.by_epoch',
-            'hook_type': 'EvaluationHook',
-            'key': 'by_epoch',
-            'choices': ['by_epoch', 'by_step'],
-            'cfg_getter': get_strategy,
-            'cfg_setter': set_strategy,
-        })
-
-    eval_interval: int = field(
-        default=None,
-        metadata={
-            'help': 'Evaluation interval by epoch or iter',
-            'cfg_node': 'evaluation.period.interval',
-            'hook_type': 'EvaluationHook',
-            'key': 'interval',
-            'cfg_getter': get_base_hook_args,
-            'cfg_setter': set_base_hook_args,
-        })
-
-    eval_metrics: str = field(
-        default=None,
-        metadata={
-            'help': 'The metric module name used in evaluation',
-            'cfg_node': 'evaluation.metrics'
-        })
-
-    namespace: str = field(
-        default=None, metadata={'help': 'The namespace of dataset'})
-
-    @classmethod
-    def from_cli(cls, parser_args=None, **extra_kwargs):
-        """Construct a TrainingArg class by the parameters of CLI.
-
-        Args:
-            **extra_kwargs: Extra args which can be defined in code.
-
-        Returns:
-            The output TrainingArg class with the parameters from CLI.
-        """
-        self = cls(**extra_kwargs)
-        parser = CliArgumentParser(self)
-        args, unknown = parser.parse_known_args(parser_args)
-        unknown = [item for item in unknown if item not in ('\\', '\n')]
-        _unknown = {}
-        for i in range(0, len(unknown), 2):
-            _unknown[unknown[i].replace('-', '')] = parse_value(unknown[i + 1])
-        cfg_dict = vars(args)
-
-        if args.model is not None:
-            try:
-                cfg = read_config(args.model)
-            except Exception as e:
-                print('Read config failed with error:', e)
-            else:
-                self = cls.from_config(cfg, **extra_kwargs)
-        for key, value in cfg_dict.items():
-            if key is not None and hasattr(self,
-                                           key) and key in parser.manual_args:
-                setattr(self, key, value)
-        self.extra_args = _unknown
-        return self
-
-    def to_args(self):
-        """Convert the TrainingArg class to key-value pairs.
-
-        Returns: The key-value pair.
-
-        """
-        _args = {}
-        for f in fields(self):
-            _args[f.name] = getattr(self, f.name)
-        return _args
-
-    @classmethod
-    def from_config(cls, config=DEFAULT_CONFIG, **kwargs):
-        """Construct the TrainingArg class by a `Config` class.
-
-        Args:
-            config: The Config class. By default, `DEFAULT_CONFIG` is used.
-            **kwargs: Extra args which can be defined in code.
-
-        Returns: The output TrainingArg class with the parameters from the config.
-
-        """
-
-        self = cls(**kwargs)
-        for f in fields(self):
-            if 'cfg_node' in f.metadata and getattr(self, f.name) is None:
-                self._to_field(f, config)
-        return self
-
-    def _to_field(self, f, config):
-        assert 'cfg_node' in f.metadata
-        if 'cfg_getter' in f.metadata:
-            cfg_getter = f.metadata['cfg_getter']
-            setattr(self, f.name, cfg_getter(config, f.metadata))
-        else:
-            cfg_node = f.metadata['cfg_node']
-            setattr(self, f.name, config.safe_get(cfg_node))
-
-    def _to_config(self, f, config: Config):
-        assert 'cfg_node' in f.metadata
-        value = getattr(self, f.name)
-        if 'cfg_setter' in f.metadata:
-            cfg_setter = f.metadata['cfg_setter']
-            config = cfg_setter(config, value, f.metadata)
-        else:
-            cfg_node = f.metadata['cfg_node']
-            if isinstance(cfg_node, str):
-                cfg_node = [cfg_node]
-            for _node in cfg_node:
-                config.merge_from_dict({_node: value})
-        return config
-
-    def __call__(self, cfg: Config):
-        for f in fields(self):
-            if 'cfg_node' not in f.metadata:
-                continue
-
-            value = getattr(self, f.name)
-            if value is not None:
-                self._to_config(f, cfg)
-                if hasattr(self, 'extra_args'):
-                    cfg.merge_from_dict(self.extra_args)
-            else:
-                self._to_field(f, cfg)
-        return cfg
-
-
-class CliArgumentParser(ArgumentParser):
-    """ Argument Parser to define and parse command-line args for training.
-
-    Args:
-        training_args (TrainingArgs): dict or list of dict which defines different
-            paramters for training.
-    """
-
-    def __init__(self, training_args: TrainingArgs = None, **kwargs):
-        if 'formatter_class' not in kwargs:
-            kwargs['formatter_class'] = ArgumentDefaultsHelpFormatter
-        super().__init__(**kwargs)
-        self.training_args = training_args
-        self.define_args()
-
-    def get_manual_args(self, args):
-        return [arg[2:] for arg in args if arg.startswith('--')]
-
-    def _parse_known_args(self, args: List = None, namespace=None):
-        self.model_id = namespace.model if namespace is not None else None
-        if '--model' in args:
-            self.model_id = args[args.index('--model') + 1]
-        self.manual_args = self.get_manual_args(args)
-        return super()._parse_known_args(args, namespace)
-
-    def print_help(self, file=None):
-        config = DEFAULT_CONFIG
-        if self.model_id is not None:
-            try:
-                config = read_config(self.model_id)
-            except Exception as e:
-                print('Read config failed with error:', e)
-
-        if config is not None:
-            for action_group in self._optionals._group_actions:
-                if hasattr(self.training_args, action_group.dest):
-                    value = getattr(self.training_args, action_group.dest)
-                    f = {f.name: f
-                         for f in fields(self.training_args)
-                         }.get(action_group.dest)
-                    if value is not None:
-                        action_group.default = value
-                    elif 'cfg_node' in f.metadata:
-                        cfg_node = f.metadata['cfg_node']
-                        if isinstance(cfg_node, str):
-                            cfg_node = [cfg_node]
-
-                        assert isinstance(cfg_node, (list, tuple))
-                        if isinstance(cfg_node[0], str):
-                            action_group.default = config.safe_get(cfg_node[0])
-                        else:
-                            action_group.default = cfg_node[0](config)
-        return super().print_help(file)
-
-    def define_args(self):
-        if self.training_args is not None:
-            for f in fields(self.training_args):
-                arg_name = f.name
-                arg_attr = getattr(self.training_args, f.name)
-                name = f'--{arg_name}'
-                kwargs = dict(type=f.type, help=f.metadata['help'])
-                kwargs['default'] = arg_attr
-
-                if 'choices' in f.metadata:
-                    kwargs['choices'] = f.metadata['choices']
-
-                kwargs['action'] = SingleAction
-                self.add_argument(name, **kwargs)
-
-
-class DictAction(Action):
-    """
-    argparse action to split an argument into KEY=VALUE form
-    on the first = and append to a dictionary. List options can
-    be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit
-    brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build
-    list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]'
-    """
-
-    @staticmethod
-    def parse_int_float_bool_str(val):
-        try:
-            return int(val)
-        except ValueError:
-            pass
-        try:
-            return float(val)
-        except ValueError:
-            pass
-        if val.lower() in ['true', 'false']:
-            return val.lower() == 'true'
-        if val == 'None':
-            return None
-        return val
-
-    @staticmethod
-    def parse_iterable(val):
-        """Parse iterable values in the string.
-        All elements inside '()' or '[]' are treated as iterable values.
-        Args:
-            val (str): Value string.
-        Returns:
-            list | tuple: The expanded list or tuple from the string.
-        Examples:
-            >>> DictAction._parse_iterable('1,2,3')
-            [1, 2, 3]
-            >>> DictAction._parse_iterable('[a, b, c]')
-            ['a', 'b', 'c']
-            >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]')
-            [(1, 2, 3), ['a', 'b'], 'c']
-        """
-
-        def find_next_comma(string):
-            """Find the position of next comma in the string.
-            If no ',' is found in the string, return the string length. All
-            chars inside '()' and '[]' are treated as one element and thus ','
-            inside these brackets are ignored.
-            """
-            assert (string.count('(') == string.count(')')) and (
-                string.count('[')
-                == string.count(']')), f'Imbalanced brackets exist in {string}'
-            end = len(string)
-            for idx, char in enumerate(string):
-                pre = string[:idx]
-                # The string before this ',' is balanced
-                if ((char == ',') and (pre.count('(') == pre.count(')'))
-                        and (pre.count('[') == pre.count(']'))):
-                    end = idx
-                    break
-            return end
-
-        # Strip ' and " characters and replace whitespace.
-        val = val.strip('\'\"').replace(' ', '')
-        is_tuple = False
-        if val.startswith('(') and val.endswith(')'):
-            is_tuple = True
-            val = val[1:-1]
-        elif val.startswith('[') and val.endswith(']'):
-            val = val[1:-1]
-        elif ',' not in val:
-            # val is a single value
-            return DictAction.parse_int_float_bool_str(val)
-
-        values = []
-        while len(val) > 0:
-            comma_idx = find_next_comma(val)
-            element = DictAction.parse_iterable(val[:comma_idx])
-            values.append(element)
-            val = val[comma_idx + 1:]
-        if is_tuple:
-            values = tuple(values)
-        return values
-
-    def __call__(self, parser, namespace, values, option_string):
-        options = {}
-        for kv in values:
-            key, val = kv.split('=', maxsplit=1)
-            options[key] = self.parse_iterable(val)
-        setattr(namespace, self.dest, options)
-
-
-class SingleAction(DictAction):
-    """ Argparse action to convert value to tuple or list or nested structure of
-    list and tuple, i.e 'V1,V2,V3', or with explicit brackets, i.e. '[V1,V2,V3]'.
-    It also support nested brackets to build list/tuple values. e.g. '[(V1,V2),(V3,V4)]'
-    """
-
-    def __call__(self, parser, namespace, value, option_string):
-        if isinstance(value, str):
-            setattr(namespace, self.dest, self.parse_iterable(value))
-        else:
-            setattr(namespace, self.dest, value)
diff --git a/modelscope/utils/ast_index_file.py b/modelscope/utils/ast_index_file.py
new file mode 100644
index 00000000..5aedf1bb
--- /dev/null
+++ b/modelscope/utils/ast_index_file.py
@@ -0,0 +1 @@
+{"index": {"('MODELS', 'protein-structure', 'unifold')": {"filepath": "TEMPLATE_PATH/models/science/unifold/model.py", "imports": ["torch", "os", "typing", "argparse"], "module": "modelscope.models.science.unifold.model"}, "('MODELS', 'acoustic-noise-suppression', 'speech_dfsmn_ans')": {"filepath": "TEMPLATE_PATH/models/audio/ans/denoise_net.py", "imports": ["torch"], "module": "modelscope.models.audio.ans.denoise_net"}, "('MODELS', 'acoustic-noise-suppression', 'speech_frcrn_ans_cirm_16k')": {"filepath": "TEMPLATE_PATH/models/audio/ans/frcrn.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.audio.ans.frcrn"}, "('MODELS', 'speaker-verification', 'ecapa-tdnn-sv')": {"filepath": "TEMPLATE_PATH/models/audio/sv/ecapa_tdnn.py", "imports": ["torch", "torchaudio", "math", "os", "typing"], "module": "modelscope.models.audio.sv.ecapa_tdnn"}, "('MODELS', 'speaker-verification', 'eres2net-sv')": {"filepath": "TEMPLATE_PATH/models/audio/sv/ERes2Net.py", "imports": ["torch", "torchaudio", "math", "os", "typing"], "module": "modelscope.models.audio.sv.ERes2Net"}, "('MODELS', 'speaker-verification', 'cam++-sv')": {"filepath": "TEMPLATE_PATH/models/audio/sv/DTDNN.py", "imports": ["torch", "torchaudio", "collections", "os", "typing"], "module": "modelscope.models.audio.sv.DTDNN"}, "('MODELS', 'speaker-verification', 'generic-sv')": {"filepath": "TEMPLATE_PATH/models/audio/sv/generic_speaker_verification.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.sv.generic_speaker_verification"}, "('MODELS', 'speaker-diarization', 'generic-sv')": {"filepath": "TEMPLATE_PATH/models/audio/sv/generic_speaker_verification.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.sv.generic_speaker_verification"}, "('MODELS', 'speaker-diarization', 'scl-sd')": {"filepath": "TEMPLATE_PATH/models/audio/sv/speaker_change_locator.py", "imports": ["numpy", "torch", "torchaudio", "collections", "os", "typing"], "module": "modelscope.models.audio.sv.speaker_change_locator"}, "('MODELS', 'speaker-verification', 'rdino_ecapa-tdnn-sv')": {"filepath": "TEMPLATE_PATH/models/audio/sv/rdino.py", "imports": ["torch", "torchaudio", "math", "os", "typing"], "module": "modelscope.models.audio.sv.rdino"}, "('MODELS', 'inverse-text-processing', 'generic-itn')": {"filepath": "TEMPLATE_PATH/models/audio/itn/generic_inverse_text_processing.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.itn.generic_inverse_text_processing"}, "('MODELS', 'auto-speech-recognition', 'wenet-asr')": {"filepath": "TEMPLATE_PATH/models/audio/asr/wenet_automatic_speech_recognition.py", "imports": ["json", "os", "wenetruntime", "typing"], "module": "modelscope.models.audio.asr.wenet_automatic_speech_recognition"}, "('MODELS', 'auto-speech-recognition', 'generic-asr')": {"filepath": "TEMPLATE_PATH/models/audio/asr/generic_automatic_speech_recognition.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.asr.generic_automatic_speech_recognition"}, "('MODELS', 'voice-activity-detection', 'generic-asr')": {"filepath": "TEMPLATE_PATH/models/audio/asr/generic_automatic_speech_recognition.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.asr.generic_automatic_speech_recognition"}, "('MODELS', 'language-score-prediction', 'generic-asr')": {"filepath": "TEMPLATE_PATH/models/audio/asr/generic_automatic_speech_recognition.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.asr.generic_automatic_speech_recognition"}, "('MODELS', 'speech-timestamp', 'generic-asr')": {"filepath": "TEMPLATE_PATH/models/audio/asr/generic_automatic_speech_recognition.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.asr.generic_automatic_speech_recognition"}, "('MODELS', 'punctuation', 'generic-punc')": {"filepath": "TEMPLATE_PATH/models/audio/punc/generic_punctuation.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.punc.generic_punctuation"}, "('MODELS', 'text-to-speech', 'sambert-hifigan')": {"filepath": "TEMPLATE_PATH/models/audio/tts/sambert_hifi.py", "imports": ["shutil", "numpy", "json", "__future__", "wave", "matplotlib", "datetime", "yaml", "os", "zipfile"], "module": "modelscope.models.audio.tts.sambert_hifi"}, "('MODELS', 'speech-separation', 'speech_mossformer_separation_temporal_8k')": {"filepath": "TEMPLATE_PATH/models/audio/separation/mossformer.py", "imports": ["torch", "os", "copy", "typing"], "module": "modelscope.models.audio.separation.mossformer"}, "('MODELS', 'keyword-spotting', 'speech_dfsmn_kws_char_farfield')": {"filepath": "TEMPLATE_PATH/models/audio/kws/farfield/model.py", "imports": ["os", "typing", "tempfile"], "module": "modelscope.models.audio.kws.farfield.model"}, "('MODELS', 'keyword-spotting', 'speech_dfsmn_kws_char_farfield_iot')": {"filepath": "TEMPLATE_PATH/models/audio/kws/farfield/model.py", "imports": ["os", "typing", "tempfile"], "module": "modelscope.models.audio.kws.farfield.model"}, "('MODELS', 'keyword-spotting', 'kws-kwsbp')": {"filepath": "TEMPLATE_PATH/models/audio/kws/generic_key_word_spotting.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.kws.generic_key_word_spotting"}, "('MODELS', 'keyword-spotting', 'speech_kws_fsmn_char_ctc_nearfield')": {"filepath": "TEMPLATE_PATH/models/audio/kws/nearfield/model.py", "imports": ["torch", "tempfile", "sys", "os", "typing"], "module": "modelscope.models.audio.kws.nearfield.model"}, "('MODELS', 'image-captioning', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'ocr-recognition', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'visual-grounding', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'visual-question-answering', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'visual-entailment', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'image-classification', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'text-summarization', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'text-classification', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'auto-speech-recognition', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'sudoku', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'text2sql', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'multi-modal-embedding', 'clip-multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/models/multi_modal/clip/model.py", "imports": ["numpy", "json", "torch", "collections", "os", "typing"], "module": "modelscope.models.multi_modal.clip.model"}, "('MODELS', 'visual-question-answering', 'mplug')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mplug_for_all_tasks.py", "imports": ["os", "typing"], "module": "modelscope.models.multi_modal.mplug_for_all_tasks"}, "('MODELS', 'image-captioning', 'mplug')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mplug_for_all_tasks.py", "imports": ["os", "typing"], "module": "modelscope.models.multi_modal.mplug_for_all_tasks"}, "('MODELS', 'image-text-retrieval', 'mplug')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mplug_for_all_tasks.py", "imports": ["os", "typing"], "module": "modelscope.models.multi_modal.mplug_for_all_tasks"}, "('MODELS', 'video-question-answering', 'hitea')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mplug_for_all_tasks.py", "imports": ["os", "typing"], "module": "modelscope.models.multi_modal.mplug_for_all_tasks"}, "('MODELS', 'video-captioning', 'hitea')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mplug_for_all_tasks.py", "imports": ["os", "typing"], "module": "modelscope.models.multi_modal.mplug_for_all_tasks"}, "('MODELS', 'text-to-image-synthesis', 'multi-stage-diffusion-text-to-image-synthesis')": {"filepath": "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/model.py", "imports": ["PIL", "numpy", "json", "torch", "math", "os", "typing"], "module": "modelscope.models.multi_modal.multi_stage_diffusion.model"}, "('MODELS', 'text-to-image-synthesis', 'diffusion-text-to-image-synthesis')": {"filepath": "TEMPLATE_PATH/models/multi_modal/diffusion/model.py", "imports": ["numpy", "json", "torch", "os", "typing"], "module": "modelscope.models.multi_modal.diffusion.model"}, "('MODELS', 'efficient-diffusion-tuning', 'efficient-diffusion-tuning')": {"filepath": "TEMPLATE_PATH/models/multi_modal/efficient_diffusion_tuning/efficient_stable_diffusion.py", "imports": ["transformers", "torch", "functools", "diffusers", "os", "typing"], "module": "modelscope.models.multi_modal.efficient_diffusion_tuning.efficient_stable_diffusion"}, "('MODELS', 'generative-multi-modal-embedding', 'gemm-generative-multi-modal')": {"filepath": "TEMPLATE_PATH/models/multi_modal/gemm/gemm_model.py", "imports": ["PIL", "numpy", "json", "torch", "torchvision", "os", "typing"], "module": "modelscope.models.multi_modal.gemm.gemm_model"}, "('MODELS', 'video-multi-modal-embedding', 'video-clip-multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py", "imports": ["urllib", "PIL", "random", "numpy", "json", "torch", "decord", "tempfile", "os", "typing", "uuid"], "module": "modelscope.models.multi_modal.mmr.models.clip_for_mm_video_embedding"}, "('MODELS', 'multi-modal-similarity', 'team-multi-modal-similarity')": {"filepath": "TEMPLATE_PATH/models/multi_modal/team/team_model.py", "imports": ["PIL", "numpy", "torch", "cv2", "tokenizers", "torchvision", "typing"], "module": "modelscope.models.multi_modal.team.team_model"}, "('MODELS', 'document-vl-embedding', 'vldoc')": {"filepath": "TEMPLATE_PATH/models/multi_modal/vldoc/model.py", "imports": ["json", "torch", "logging", "re", "math", "sys", "copy", "torchvision", "os"], "module": "modelscope.models.multi_modal.vldoc.model"}, "('MODELS', 'video-temporal-grounding', 'soonet')": {"filepath": "TEMPLATE_PATH/models/multi_modal/soonet/model.py", "imports": ["torch", "os"], "module": "modelscope.models.multi_modal.soonet.model"}, "('MODELS', 'text-ranking', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/text_ranking.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.text_ranking"}, "('MODELS', 'backbone', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/backbone.py", "imports": ["dataclasses", "transformers", "random", "torch", "math", "warnings", "os", "typing"], "module": "modelscope.models.multi_modal.mgeo.backbone"}, "('MODELS', 'text-classification', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/text_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.text_classification"}, "('MODELS', 'nli', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/text_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.text_classification"}, "('MODELS', 'sentiment-classification', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/text_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.text_classification"}, "('MODELS', 'sentence-similarity', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/text_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.text_classification"}, "('MODELS', 'zero-shot-classification', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/text_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.text_classification"}, "('MODELS', 'token-classification', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/token_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.token_classification"}, "('MODELS', 'part-of-speech', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/token_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.token_classification"}, "('MODELS', 'word-segmentation', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/token_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.token_classification"}, "('MODELS', 'multimodal-dialogue', 'mplug-owl')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mplug_owl/modeling_mplug_owl.py", "imports": ["dataclasses", "transformers", "random", "torch", "logging", "math", "copy", "io", "os", "typing"], "module": "modelscope.models.multi_modal.mplug_owl.modeling_mplug_owl"}, "('MODELS', 'text-to-image-synthesis', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_text_to_image_synthesis_model.py", "imports": ["PIL", "pkg_resources", "numpy", "json", "torch", "taming", "torchvision", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_text_to_image_synthesis_model"}, "('MODELS', 'text-to-video-synthesis', 'latent-text-to-video-synthesis')": {"filepath": "TEMPLATE_PATH/models/multi_modal/video_synthesis/text_to_video_synthesis_model.py", "imports": ["open_clip", "torch", "einops", "os", "typing"], "module": "modelscope.models.multi_modal.video_synthesis.text_to_video_synthesis_model"}, "('MODELS', 'image-captioning', 'clip-interrogator')": {"filepath": "TEMPLATE_PATH/models/multi_modal/clip_interrogator/model.py", "imports": ["PIL", "hashlib", "numpy", "open_clip", "torch", "dataclasses", "os", "typing", "requests", "transformers", "safetensors", "tqdm", "math", "time", "torchvision"], "module": "modelscope.models.multi_modal.clip_interrogator.model"}, "('MODELS', 'generative-multi-modal-embedding', 'rleg-generative-multi-modal')": {"filepath": "TEMPLATE_PATH/models/multi_modal/rleg/rleg.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.models.multi_modal.rleg.rleg"}, "('MODELS', 'translation-evaluation', 'unite')": {"filepath": "TEMPLATE_PATH/models/nlp/unite/translation_evaluation.py", "imports": ["transformers", "numpy", "torch", "math", "warnings", "packaging", "dataclasses", "typing"], "module": "modelscope.models.nlp.unite.translation_evaluation"}, "('MODELS', 'text-generation', 'palm-v2')": {"filepath": "TEMPLATE_PATH/models/nlp/palm_v2/text_generation.py", "imports": ["dataclasses", "subprocess", "codecs", "transformers", "numpy", "json", "torch", "math", "copy", "os", "typing"], "module": "modelscope.models.nlp.palm_v2.text_generation"}, "('MODELS', 'fill-mask', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/fill_mask.py", "imports": ["torch", "transformers"], "module": "modelscope.models.nlp.structbert.fill_mask"}, "('MODELS', 'backbone', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/backbone.py", "imports": ["transformers", "torch", "math", "packaging", "dataclasses", "typing"], "module": "modelscope.models.nlp.structbert.backbone"}, "('MODELS', 'faq-question-answering', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/faq_question_answering.py", "imports": ["torch", "math", "collections", "os", "typing"], "module": "modelscope.models.nlp.structbert.faq_question_answering"}, "('MODELS', 'text-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.text_classification"}, "('MODELS', 'nli', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.text_classification"}, "('MODELS', 'sentiment-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.text_classification"}, "('MODELS', 'sentence-similarity', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.text_classification"}, "('MODELS', 'zero-shot-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.text_classification"}, "('MODELS', 'token-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/token_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.token_classification"}, "('MODELS', 'word-segmentation', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/token_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.token_classification"}, "('MODELS', 'part-of-speech', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/token_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.token_classification"}, "('MODELS', 'backbone', 'transformers')": {"filepath": "TEMPLATE_PATH/models/nlp/hf_transformers/backbone.py", "imports": ["transformers"], "module": "modelscope.models.nlp.hf_transformers.backbone"}, "('MODELS', 'fill-mask', 'fill-mask')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/fill_mask.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.models.nlp.task_models.fill_mask"}, "('MODELS', 'text-ranking', 'text-ranking')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/text_ranking.py", "imports": ["typing", "numpy"], "module": "modelscope.models.nlp.task_models.text_ranking"}, "('MODELS', 'feature-extraction', 'feature-extraction')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/feature_extraction.py", "imports": ["typing", "numpy"], "module": "modelscope.models.nlp.task_models.feature_extraction"}, "('MODELS', 'text-classification', 'text-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/text_classification.py", "imports": ["typing", "numpy"], "module": "modelscope.models.nlp.task_models.text_classification"}, "('MODELS', 'text-generation', 'text-generation')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/text_generation.py", "imports": ["torch", "typing", "transformers", "numpy"], "module": "modelscope.models.nlp.task_models.text_generation"}, "('MODELS', 'information-extraction', 'information-extraction')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/information_extraction.py", "imports": ["typing", "numpy"], "module": "modelscope.models.nlp.task_models.information_extraction"}, "('MODELS', 'relation-extraction', 'information-extraction')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/information_extraction.py", "imports": ["typing", "numpy"], "module": "modelscope.models.nlp.task_models.information_extraction"}, "('MODELS', 'token-classification', 'token-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'part-of-speech', 'token-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'named-entity-recognition', 'token-classification-for-ner')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'transformer-crf', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'token-classification', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'token-classification', 'transformer-crf-for-word-segmentation')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'named-entity-recognition', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'part-of-speech', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'word-segmentation', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'word-segmentation', 'transformer-crf-for-word-segmentation')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'fill-mask', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/fill_mask.py", "imports": ["transformers"], "module": "modelscope.models.nlp.veco.fill_mask"}, "('MODELS', 'backbone', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/backbone.py", "imports": ["transformers"], "module": "modelscope.models.nlp.veco.backbone"}, "('MODELS', 'nli', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/text_classification.py", "imports": ["transformers"], "module": "modelscope.models.nlp.veco.text_classification"}, "('MODELS', 'sentiment-classification', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/text_classification.py", "imports": ["transformers"], "module": "modelscope.models.nlp.veco.text_classification"}, "('MODELS', 'sentence-similarity', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/text_classification.py", "imports": ["transformers"], "module": "modelscope.models.nlp.veco.text_classification"}, "('MODELS', 'text-classification', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/text_classification.py", "imports": ["transformers"], "module": "modelscope.models.nlp.veco.text_classification"}, "('MODELS', 'token-classification', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/token_classification.py", "imports": ["torch", "transformers"], "module": "modelscope.models.nlp.veco.token_classification"}, "('MODELS', 'text-generation', 'glm130b')": {"filepath": "TEMPLATE_PATH/models/nlp/glm_130b/text_generation.py", "imports": ["random", "stat", "torch", "SwissArmyTransformer", "re", "functools", "sys", "copy", "time", "os", "typing"], "module": "modelscope.models.nlp.glm_130b.text_generation"}, "('MODELS', 'text-summarization', 'mglm')": {"filepath": "TEMPLATE_PATH/models/nlp/mglm/mglm_for_text_summarization.py", "imports": ["random", "numpy", "torch", "megatron_util", "os", "typing"], "module": "modelscope.models.nlp.mglm.mglm_for_text_summarization"}, "('MODELS', 'backbone', 'plug-mental')": {"filepath": "TEMPLATE_PATH/models/nlp/plug_mental/backbone.py", "imports": ["transformers", "torch", "math", "packaging", "dataclasses", "typing"], "module": "modelscope.models.nlp.plug_mental.backbone"}, "('MODELS', 'text-classification', 'plug-mental')": {"filepath": "TEMPLATE_PATH/models/nlp/plug_mental/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.plug_mental.text_classification"}, "('MODELS', 'nli', 'plug-mental')": {"filepath": "TEMPLATE_PATH/models/nlp/plug_mental/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.plug_mental.text_classification"}, "('MODELS', 'sentiment-classification', 'plug-mental')": {"filepath": "TEMPLATE_PATH/models/nlp/plug_mental/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.plug_mental.text_classification"}, "('MODELS', 'sentence-similarity', 'plug-mental')": {"filepath": "TEMPLATE_PATH/models/nlp/plug_mental/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.plug_mental.text_classification"}, "('MODELS', 'zero-shot-classification', 'plug-mental')": {"filepath": "TEMPLATE_PATH/models/nlp/plug_mental/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.plug_mental.text_classification"}, "('MODELS', 'text-generation', 'gpt-moe')": {"filepath": "TEMPLATE_PATH/models/nlp/gpt_moe/text_generation.py", "imports": ["typing", "transformers"], "module": "modelscope.models.nlp.gpt_moe.text_generation"}, "('MODELS', 'translation', 'csanmt-translation')": {"filepath": "TEMPLATE_PATH/models/nlp/csanmt/translation.py", "imports": ["tensorflow", "typing", "math", "collections"], "module": "modelscope.models.nlp.csanmt.translation"}, "('MODELS', 'text2text-generation', 'T5')": {"filepath": "TEMPLATE_PATH/models/nlp/T5/text2text_generation.py", "imports": ["transformers", "torch", "copy", "warnings", "typing"], "module": "modelscope.models.nlp.T5.text2text_generation"}, "('MODELS', 'backbone', 'T5')": {"filepath": "TEMPLATE_PATH/models/nlp/T5/backbone.py", "imports": ["transformers", "torch", "math", "copy", "warnings", "os", "typing"], "module": "modelscope.models.nlp.T5.backbone"}, "('HEADS', 'text-classification', 'text-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/text_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.text_classification_head"}, "('HEADS', 'sentence-similarity', 'text-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/text_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.text_classification_head"}, "('HEADS', 'nli', 'text-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/text_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.text_classification_head"}, "('HEADS', 'sentiment-classification', 'text-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/text_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.text_classification_head"}, "('HEADS', 'information-extraction', 'information-extraction')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/infromation_extraction_head.py", "imports": ["torch"], "module": "modelscope.models.nlp.heads.infromation_extraction_head"}, "('HEADS', 'relation-extraction', 'information-extraction')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/infromation_extraction_head.py", "imports": ["torch"], "module": "modelscope.models.nlp.heads.infromation_extraction_head"}, "('HEADS', 'token-classification', 'token-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/token_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.token_classification_head"}, "('HEADS', 'named-entity-recognition', 'token-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/token_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.token_classification_head"}, "('HEADS', 'part-of-speech', 'token-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/token_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.token_classification_head"}, "('HEADS', 'text-generation', 'text-generation')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/text_generation_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.text_generation_head"}, "('HEADS', 'token-classification', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'named-entity-recognition', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'word-segmentation', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'part-of-speech', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'transformer-crf', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'token-classification', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'named-entity-recognition', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'word-segmentation', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'part-of-speech', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'fill-mask', 'roberta-mlm')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/torch_pretrain_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.torch_pretrain_head"}, "('HEADS', 'fill-mask', 'bert-mlm')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/fill_mask_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.fill_mask_head"}, "('HEADS', 'fill-mask', 'fill-mask')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/fill_mask_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.fill_mask_head"}, "('HEADS', 'fill-mask', 'xlm-roberta-mlm')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/fill_mask_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.fill_mask_head"}, "('HEADS', 'text-ranking', 'text-ranking')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/text_ranking_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.text_ranking_head"}, "('BACKBONES', 'backbone', 'bloom')": {"filepath": "TEMPLATE_PATH/models/nlp/bloom/backbone.py", "imports": ["transformers"], "module": "modelscope.models.nlp.bloom.backbone"}, "('MODELS', 'backbone', 'xlm-roberta')": {"filepath": "TEMPLATE_PATH/models/nlp/xlm_roberta/backbone.py", "imports": ["torch", "transformers", "math", "packaging"], "module": "modelscope.models.nlp.xlm_roberta.backbone"}, "('MODELS', 'text-classification', 'peer')": {"filepath": "TEMPLATE_PATH/models/nlp/peer/text_classification.py", "imports": ["copy", "torch"], "module": "modelscope.models.nlp.peer.text_classification"}, "('MODELS', 'nli', 'peer')": {"filepath": "TEMPLATE_PATH/models/nlp/peer/text_classification.py", "imports": ["copy", "torch"], "module": "modelscope.models.nlp.peer.text_classification"}, "('MODELS', 'sentiment-classification', 'peer')": {"filepath": "TEMPLATE_PATH/models/nlp/peer/text_classification.py", "imports": ["copy", "torch"], "module": "modelscope.models.nlp.peer.text_classification"}, "('MODELS', 'sentence-similarity', 'peer')": {"filepath": "TEMPLATE_PATH/models/nlp/peer/text_classification.py", "imports": ["copy", "torch"], "module": "modelscope.models.nlp.peer.text_classification"}, "('MODELS', 'zero-shot-classification', 'peer')": {"filepath": "TEMPLATE_PATH/models/nlp/peer/text_classification.py", "imports": ["copy", "torch"], "module": "modelscope.models.nlp.peer.text_classification"}, "('MODELS', 'fid-dialogue', 'fid-T5')": {"filepath": "TEMPLATE_PATH/models/nlp/fid_T5/text_generation.py", "imports": ["torch", "os", "io", "transformers"], "module": "modelscope.models.nlp.fid_T5.text_generation"}, "('MODELS', 'table-question-answering', 'space-T-en')": {"filepath": "TEMPLATE_PATH/models/nlp/space_T_en/text_to_sql.py", "imports": ["torch", "os", "typing", "text2sql_lgesql"], "module": "modelscope.models.nlp.space_T_en.text_to_sql"}, "('MODELS', 'competency-aware-translation', 'canmt')": {"filepath": "TEMPLATE_PATH/models/nlp/canmt/canmt_translation.py", "imports": ["numpy", "torch", "math", "os", "typing"], "module": "modelscope.models.nlp.canmt.canmt_translation"}, "('MODELS', 'text-error-correction', 'bart')": {"filepath": "TEMPLATE_PATH/models/nlp/bart/text_error_correction.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.nlp.bart.text_error_correction"}, "('MODELS', 'text-classification', 'user-satisfaction-estimation')": {"filepath": "TEMPLATE_PATH/models/nlp/use/user_satisfaction_estimation.py", "imports": ["transformers", "numpy", "torch", "os", "typing"], "module": "modelscope.models.nlp.use.user_satisfaction_estimation"}, "('BACKBONES', 'backbone', 'gpt-neo')": {"filepath": "TEMPLATE_PATH/models/nlp/gpt_neo/backbone.py", "imports": ["transformers"], "module": "modelscope.models.nlp.gpt_neo.backbone"}, "('MODELS', 'siamese-uie', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/siamese_uie.py", "imports": ["torch", "copy"], "module": "modelscope.models.nlp.bert.siamese_uie"}, "('MODELS', 'fill-mask', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/fill_mask.py", "imports": [], "module": "modelscope.models.nlp.bert.fill_mask"}, "('MODELS', 'word-alignment', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/word_alignment.py", "imports": ["torch"], "module": "modelscope.models.nlp.bert.word_alignment"}, "('MODELS', 'text-ranking', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/text_ranking.py", "imports": [], "module": "modelscope.models.nlp.bert.text_ranking"}, "('MODELS', 'backbone', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/backbone.py", "imports": ["torch", "transformers", "math", "packaging"], "module": "modelscope.models.nlp.bert.backbone"}, "('MODELS', 'text-classification', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/text_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.text_classification"}, "('MODELS', 'nli', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/text_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.text_classification"}, "('MODELS', 'sentiment-classification', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/text_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.text_classification"}, "('MODELS', 'sentence-similarity', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/text_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.text_classification"}, "('MODELS', 'zero-shot-classification', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/text_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.text_classification"}, "('MODELS', 'sentence-embedding', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/sentence_embedding.py", "imports": ["torch"], "module": "modelscope.models.nlp.bert.sentence_embedding"}, "('MODELS', 'document-segmentation', 'bert-for-document-segmentation')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/document_segmentation.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.bert.document_segmentation"}, "('MODELS', 'token-classification', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/token_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.token_classification"}, "('MODELS', 'part-of-speech', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/token_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.token_classification"}, "('MODELS', 'word-segmentation', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/token_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.token_classification"}, "('MODELS', 'document-grounded-dialog-rerank', 'doc2bot')": {"filepath": "TEMPLATE_PATH/models/nlp/dgds/document_grounded_dialog_rerank.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.nlp.dgds.document_grounded_dialog_rerank"}, "('MODELS', 'document-grounded-dialog-generate', 'doc2bot')": {"filepath": "TEMPLATE_PATH/models/nlp/dgds/document_grounded_dialog_generate.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.nlp.dgds.document_grounded_dialog_generate"}, "('MODELS', 'document-grounded-dialog-retrieval', 'doc2bot')": {"filepath": "TEMPLATE_PATH/models/nlp/dgds/document_grounded_dialog_retrieval.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.nlp.dgds.document_grounded_dialog_retrieval"}, "('MODELS', 'text-generation', 'gpt3')": {"filepath": "TEMPLATE_PATH/models/nlp/gpt3/text_generation.py", "imports": ["torch", "typing", "transformers", "collections"], "module": "modelscope.models.nlp.gpt3.text_generation"}, "('MODELS', 'fill-mask', 'deberta_v2')": {"filepath": "TEMPLATE_PATH/models/nlp/deberta_v2/fill_mask.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.deberta_v2.fill_mask"}, "('MODELS', 'backbone', 'deberta_v2')": {"filepath": "TEMPLATE_PATH/models/nlp/deberta_v2/backbone.py", "imports": ["torch", "typing", "transformers", "collections"], "module": "modelscope.models.nlp.deberta_v2.backbone"}, "('MODELS', 'code-translation', 'codegeex')": {"filepath": "TEMPLATE_PATH/models/nlp/codegeex/codegeex_for_code_translation.py", "imports": ["torch", "copy", "typing"], "module": "modelscope.models.nlp.codegeex.codegeex_for_code_translation"}, "('MODELS', 'code-generation', 'codegeex')": {"filepath": "TEMPLATE_PATH/models/nlp/codegeex/codegeex_for_code_generation.py", "imports": ["torch", "copy", "typing"], "module": "modelscope.models.nlp.codegeex.codegeex_for_code_generation"}, "('MODELS', 'task-oriented-conversation', 'space-modeling')": {"filepath": "TEMPLATE_PATH/models/nlp/space/dialog_modeling.py", "imports": ["os", "typing"], "module": "modelscope.models.nlp.space.dialog_modeling"}, "('MODELS', 'task-oriented-conversation', 'space-dst')": {"filepath": "TEMPLATE_PATH/models/nlp/space/dialog_state_tracking.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.space.dialog_state_tracking"}, "('MODELS', 'task-oriented-conversation', 'space-intent')": {"filepath": "TEMPLATE_PATH/models/nlp/space/dialog_intent_prediction.py", "imports": ["os", "typing"], "module": "modelscope.models.nlp.space.dialog_intent_prediction"}, "('MODELS', 'fid-dialogue', 'fid-plug')": {"filepath": "TEMPLATE_PATH/models/nlp/fid_plug/text_generation.py", "imports": ["torch", "os", "io", "transformers"], "module": "modelscope.models.nlp.fid_plug.text_generation"}, "('BACKBONES', 'backbone', 'gpt2')": {"filepath": "TEMPLATE_PATH/models/nlp/gpt2/backbone.py", "imports": ["transformers"], "module": "modelscope.models.nlp.gpt2.backbone"}, "('MODELS', 'fill-mask', 'megatron-bert')": {"filepath": "TEMPLATE_PATH/models/nlp/megatron_bert/fill_mask.py", "imports": ["torch", "transformers"], "module": "modelscope.models.nlp.megatron_bert.fill_mask"}, "('MODELS', 'backbone', 'megatron-bert')": {"filepath": "TEMPLATE_PATH/models/nlp/megatron_bert/backbone.py", "imports": ["torch", "transformers", "math"], "module": "modelscope.models.nlp.megatron_bert.backbone"}, "('MODELS', 'table-question-answering', 'space-T-cn')": {"filepath": "TEMPLATE_PATH/models/nlp/space_T_cn/table_question_answering.py", "imports": ["transformers", "numpy", "torch", "os", "typing"], "module": "modelscope.models.nlp.space_T_cn.table_question_answering"}, "('MODELS', 'fill-mask', 'ponet')": {"filepath": "TEMPLATE_PATH/models/nlp/ponet/fill_mask.py", "imports": ["torch", "transformers"], "module": "modelscope.models.nlp.ponet.fill_mask"}, "('MODELS', 'backbone', 'ponet')": {"filepath": "TEMPLATE_PATH/models/nlp/ponet/backbone.py", "imports": ["distutils", "transformers", "torch", "math", "packaging"], "module": "modelscope.models.nlp.ponet.backbone"}, "('MODELS', 'document-segmentation', 'ponet-for-document-segmentation')": {"filepath": "TEMPLATE_PATH/models/nlp/ponet/document_segmentation.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.ponet.document_segmentation"}, "('MODELS', 'extractive-summarization', 'ponet-for-document-segmentation')": {"filepath": "TEMPLATE_PATH/models/nlp/ponet/document_segmentation.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.ponet.document_segmentation"}, "('MODELS', 'backbone', 'llama')": {"filepath": "TEMPLATE_PATH/models/nlp/llama/backbone.py", "imports": ["torch", "typing", "transformers", "math"], "module": "modelscope.models.nlp.llama.backbone"}, "('MODELS', 'text-generation', 'llama')": {"filepath": "TEMPLATE_PATH/models/nlp/llama/text_generation.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.llama.text_generation"}, "('MODELS', 'backbone', 'lstm')": {"filepath": "TEMPLATE_PATH/models/nlp/lstm/backbone.py", "imports": ["torch"], "module": "modelscope.models.nlp.lstm.backbone"}, "('MODELS', 'token-classification', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/lstm/token_classification.py", "imports": [], "module": "modelscope.models.nlp.lstm.token_classification"}, "('MODELS', 'named-entity-recognition', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/lstm/token_classification.py", "imports": [], "module": "modelscope.models.nlp.lstm.token_classification"}, "('MODELS', 'part-of-speech', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/lstm/token_classification.py", "imports": [], "module": "modelscope.models.nlp.lstm.token_classification"}, "('MODELS', 'word-segmentation', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/lstm/token_classification.py", "imports": [], "module": "modelscope.models.nlp.lstm.token_classification"}, "('MODELS', 'word-segmentation', 'lstm-crf-for-word-segmentation')": {"filepath": "TEMPLATE_PATH/models/nlp/lstm/token_classification.py", "imports": [], "module": "modelscope.models.nlp.lstm.token_classification"}, "('MODELS', 'image-deblurring', 'nafnet')": {"filepath": "TEMPLATE_PATH/models/cv/image_deblur/nafnet_for_image_deblur.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_deblur.nafnet_for_image_deblur"}, "('MODELS', 'image-segmentation', 'vision-middleware')": {"filepath": "TEMPLATE_PATH/models/cv/vision_middleware/model.py", "imports": ["json", "torch", "typing", "os"], "module": "modelscope.models.cv.vision_middleware.model"}, "('MODELS', 'image-quality-assessment-mos', 'image-quality-assessment-man')": {"filepath": "TEMPLATE_PATH/models/cv/image_quality_assessment_man/image_quality_assessment_man.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_quality_assessment_man.image_quality_assessment_man"}, "('MODELS', 'product-retrieval-embedding', 'product-retrieval-embedding')": {"filepath": "TEMPLATE_PATH/models/cv/product_retrieval_embedding/item_model.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.models.cv.product_retrieval_embedding.item_model"}, "('MODELS', 'body-2d-keypoints', 'body-2d-keypoints')": {"filepath": "TEMPLATE_PATH/models/cv/body_2d_keypoints/hrnet_v2.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.body_2d_keypoints.hrnet_v2"}, "('MODELS', 'indoor-layout-estimation', 'panovit-layout-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/indoor_layout_estimation/panovit.py", "imports": ["torch", "os", "yacs", "numpy"], "module": "modelscope.models.cv.indoor_layout_estimation.panovit"}, "('MODELS', 'semantic-segmentation', 'detection')": {"filepath": "TEMPLATE_PATH/models/cv/salient_detection/salient_model.py", "imports": ["PIL", "torch", "cv2", "torchvision", "os"], "module": "modelscope.models.cv.salient_detection.salient_model"}, "('MODELS', 'image-quality-assessment-degradation', 'image-quality-assessment-degradation')": {"filepath": "TEMPLATE_PATH/models/cv/image_quality_assessment_degradation/image_quality_assessment_degradation.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_quality_assessment_degradation.image_quality_assessment_degradation"}, "('MODELS', 'image-portrait-enhancement', 'gpen')": {"filepath": "TEMPLATE_PATH/models/cv/image_portrait_enhancement/image_portrait_enhancement.py", "imports": ["torch", "os", "typing", "math"], "module": "modelscope.models.cv.image_portrait_enhancement.image_portrait_enhancement"}, "('HEADS', 'default', 'MaskScoringNRoIHead')": {"filepath": "TEMPLATE_PATH/models/cv/abnormal_object_detection/mmdet_ms/roi_head/mask_scoring_roi_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.abnormal_object_detection.mmdet_ms.roi_head.mask_scoring_roi_head"}, "('ROI_EXTRACTORS', 'default', 'SingleRoINExtractor')": {"filepath": "TEMPLATE_PATH/models/cv/abnormal_object_detection/mmdet_ms/roi_head/roi_extractors/single_level_roi_extractor.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.abnormal_object_detection.mmdet_ms.roi_head.roi_extractors.single_level_roi_extractor"}, "('MODELS', 'image-object-detection', 'MaskScoring')": {"filepath": "TEMPLATE_PATH/models/cv/abnormal_object_detection/mmdet_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.abnormal_object_detection.mmdet_model"}, "('MODELS', 'image-classification', 'image-probing-model')": {"filepath": "TEMPLATE_PATH/models/cv/image_probing_model/model.py", "imports": ["json", "torch", "typing", "os"], "module": "modelscope.models.cv.image_probing_model.model"}, "('MODELS', 'video-human-matting', 'video-human-matting')": {"filepath": "TEMPLATE_PATH/models/cv/video_human_matting/model.py", "imports": ["numpy", "torch", "torchvision", "os", "typing"], "module": "modelscope.models.cv.video_human_matting.model"}, "('MODELS', 'language-guided-video-summarization', 'clip-it-language-guided-video-summarization')": {"filepath": "TEMPLATE_PATH/models/cv/language_guided_video_summarization/summarizer.py", "imports": ["numpy", "videofeatures_clipit", "torch", "bmt_clipit", "os", "typing", "argparse"], "module": "modelscope.models.cv.language_guided_video_summarization.summarizer"}, "('MODELS', 'face-2d-keypoints', 'flc')": {"filepath": "TEMPLATE_PATH/models/cv/facial_landmark_confidence/flc/facial_landmark_confidence.py", "imports": ["PIL", "numpy", "torch", "cv2", "os"], "module": "modelscope.models.cv.facial_landmark_confidence.flc.facial_landmark_confidence"}, "('MODELS', 'image-body-reshaping', 'image-body-reshaping')": {"filepath": "TEMPLATE_PATH/models/cv/image_body_reshaping/image_body_reshaping.py", "imports": ["numpy", "cv2", "torch", "os", "typing"], "module": "modelscope.models.cv.image_body_reshaping.image_body_reshaping"}, "('MODELS', 'image-segmentation', 'm2fp')": {"filepath": "TEMPLATE_PATH/models/cv/image_human_parsing/m2fp_net.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_human_parsing.m2fp_net"}, "('PREPROCESSORS', 'cv', 'image-sky-change-preprocessor')": {"filepath": "TEMPLATE_PATH/models/cv/image_skychange/preprocessor.py", "imports": ["numbers", "pdb", "numpy", "cv2", "json", "torch", "torchvision", "typing"], "module": "modelscope.models.cv.image_skychange.preprocessor"}, "('MODELS', 'image-skychange', 'image-skychange')": {"filepath": "TEMPLATE_PATH/models/cv/image_skychange/skychange_model.py", "imports": ["pdb", "cv2", "torch", "json", "math", "collections", "time", "os", "typing"], "module": "modelscope.models.cv.image_skychange.skychange_model"}, "('MODELS', 'video-object-segmentation', 'video-object-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/video_object_segmentation/model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.video_object_segmentation.model"}, "('MODELS', 'face-reconstruction', 'face_reconstruction')": {"filepath": "TEMPLATE_PATH/models/cv/face_reconstruction/models/facerecon_model.py", "imports": ["numpy", "cv2", "torch", "collections", "os"], "module": "modelscope.models.cv.face_reconstruction.models.facerecon_model"}, "('MODELS', 'facial-expression-recognition', 'fer')": {"filepath": "TEMPLATE_PATH/models/cv/facial_expression_recognition/fer/facial_expression_recognition.py", "imports": ["PIL", "numpy", "torch", "cv2", "os"], "module": "modelscope.models.cv.facial_expression_recognition.fer.facial_expression_recognition"}, "('MODELS', 'face-recognition', 'rts-backbone')": {"filepath": "TEMPLATE_PATH/models/cv/face_recognition/torchkit/rts_backbone.py", "imports": ["torch", "os", "math", "collections"], "module": "modelscope.models.cv.face_recognition.torchkit.rts_backbone"}, "('MODELS', 'shop-segmentation', 'shop-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/shop_segmentation/shop_seg_model.py", "imports": ["PIL", "numpy", "torch", "json", "os", "typing"], "module": "modelscope.models.cv.shop_segmentation.shop_seg_model"}, "('MODELS', 'image-segmentation', 'fastinst')": {"filepath": "TEMPLATE_PATH/models/cv/image_instance_segmentation/fastinst_model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_instance_segmentation.fastinst_model"}, "('MODELS', 'image-segmentation', 'cascade_mask_rcnn_swin')": {"filepath": "TEMPLATE_PATH/models/cv/image_instance_segmentation/model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_instance_segmentation.model"}, "('MODELS', 'image-segmentation', 'maskdino_swin')": {"filepath": "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino_model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_instance_segmentation.maskdino_model"}, "('MODELS', 'video-text-retrieval', 'vop-retrieval-model')": {"filepath": "TEMPLATE_PATH/models/cv/vop_retrieval/model.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.vop_retrieval.model"}, "('MODELS', 'video-text-retrieval', 'vop-retrieval-model-se')": {"filepath": "TEMPLATE_PATH/models/cv/vop_retrieval/model_se.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.vop_retrieval.model_se"}, "('HEADS', 'default', 'KernelUpdateHeadVideo')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/track/kernel_update_head.py", "imports": ["torch", "mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.video_instance_segmentation.track.kernel_update_head"}, "('MATCH_COST', 'default', 'MaskCost')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/track/mask_hungarian_assigner.py", "imports": ["torch", "scipy", "mmdet", "numpy"], "module": "modelscope.models.cv.video_instance_segmentation.track.mask_hungarian_assigner"}, "('BBOX_ASSIGNERS', 'default', 'MaskHungarianAssignerVideo')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/track/mask_hungarian_assigner.py", "imports": ["torch", "scipy", "mmdet", "numpy"], "module": "modelscope.models.cv.video_instance_segmentation.track.mask_hungarian_assigner"}, "('MODELS', 'video-instance-segmentation', 'swinb-video-instance-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/video_knet.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.video_instance_segmentation.video_knet"}, "('TRANSFORMER_LAYER', 'default', 'KernelUpdator')": {"filepath": "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_updator.py", "imports": ["torch", "mmcv"], "module": "modelscope.models.cv.video_panoptic_segmentation.head.kernel_updator"}, "('HEADS', 'default', 'KernelUpdateHead')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_update_head.py", "imports": ["torch", "mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.video_instance_segmentation.head.kernel_update_head"}, "('HEADS', 'default', 'KernelFrameIterHeadVideo')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_frame_iter_head.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.video_instance_segmentation.head.kernel_frame_iter_head"}, "('HEADS', 'default', 'ConvKernelHeadVideo')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_head.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.video_instance_segmentation.head.kernel_head"}, "('HEADS', 'default', 'KernelIterHeadVideo')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_iter_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.video_instance_segmentation.head.kernel_iter_head"}, "('NECKS', 'default', 'MSDeformAttnPixelDecoder')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/neck/msdeformattn_decoder.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.video_instance_segmentation.neck.msdeformattn_decoder"}, "('MODELS', 'image-super-resolution', 'ecbsr')": {"filepath": "TEMPLATE_PATH/models/cv/super_resolution/ecbsr_model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.super_resolution.ecbsr_model"}, "('PREPROCESSORS', 'cv', 'ocr-detection')": {"filepath": "TEMPLATE_PATH/models/cv/ocr_detection/preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "module": "modelscope.models.cv.ocr_detection.preprocessor"}, "('MODELS', 'ocr-detection', 'OCRDetection')": {"filepath": "TEMPLATE_PATH/models/cv/ocr_detection/model.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.models.cv.ocr_detection.model"}, "('MODELS', 'panorama-depth-estimation', 'unifuse-depth-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/panorama_depth_estimation/unifuse_model.py", "imports": ["torch", "os", "torchvision", "numpy"], "module": "modelscope.models.cv.panorama_depth_estimation.unifuse_model"}, "('MODELS', 'video-object-detection', 'realtime-video-object-detection')": {"filepath": "TEMPLATE_PATH/models/cv/stream_yolo/realtime_video_detector.py", "imports": ["numpy", "cv2", "torch", "logging", "json", "tqdm", "time", "os", "argparse"], "module": "modelscope.models.cv.stream_yolo.realtime_video_detector"}, "('MODELS', 'bad-image-detecting', 'bad-image-detecting')": {"filepath": "TEMPLATE_PATH/models/cv/bad_image_detecting/bad_image_detecting.py", "imports": ["numpy", "torch", "torchvision", "os", "typing"], "module": "modelscope.models.cv.bad_image_detecting.bad_image_detecting"}, "('MODELS', 'human-reconstruction', 'human-reconstruction')": {"filepath": "TEMPLATE_PATH/models/cv/human_reconstruction/Reconstruction.py", "imports": ["PIL", "skimage", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.models.cv.human_reconstruction.Reconstruction"}, "('PREPROCESSORS', 'cv', 'image-driving-perception-preprocessor')": {"filepath": "TEMPLATE_PATH/models/cv/image_driving_perception/preprocessor.py", "imports": ["cv2", "torch", "typing", "numpy"], "module": "modelscope.models.cv.image_driving_perception.preprocessor"}, "('MODELS', 'image-driving-perception', 'yolopv2')": {"filepath": "TEMPLATE_PATH/models/cv/image_driving_perception/image_driving_percetion_model.py", "imports": ["numpy", "cv2", "torch", "os", "typing"], "module": "modelscope.models.cv.image_driving_perception.image_driving_percetion_model"}, "('MODELS', 'video-object-detection', 'longshortnet')": {"filepath": "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/longshortnet.py", "imports": ["numpy", "cv2", "torch", "logging", "json", "tqdm", "time", "os", "argparse"], "module": "modelscope.models.cv.video_streaming_perception.longshortnet.longshortnet"}, "('MODELS', 'image-paintbyexample', 'Stablediffusion-Paintbyexample')": {"filepath": "TEMPLATE_PATH/models/cv/image_paintbyexample/model.py", "imports": ["torch", "paint_ldm", "omegaconf", "os", "typing"], "module": "modelscope.models.cv.image_paintbyexample.model"}, "('MODELS', 'image-inpainting', 'FFTInpainting')": {"filepath": "TEMPLATE_PATH/models/cv/image_inpainting/model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_inpainting.model"}, "('BBOX_CODERS', 'default', 'NMSFreeCoder')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.coders.nms_free_coder"}, "('MATCH_COST', 'default', 'BBox3DL1Cost')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/match_costs/match_cost.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.match_costs.match_cost"}, "('BBOX_ASSIGNERS', 'default', 'HungarianAssigner3D')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py", "imports": ["torch", "scipy", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.assigners.hungarian_assigner_3d"}, "('DATASETS', 'default', 'CustomNuScenesDataset')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/nuscenes_dataset.py", "imports": ["mmdet3d", "mmdet", "numpy"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.nuscenes_dataset"}, "('PIPELINES', 'default', 'LoadMultiViewImageFromMultiSweepsFiles')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/loading.py", "imports": ["mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.pipelines.loading"}, "('PIPELINES', 'default', 'PadMultiViewImage')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/transform_3d.py", "imports": ["PIL", "copy", "mmcv", "numpy", "torch", "mmdet3d", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.pipelines.transform_3d"}, "('PIPELINES', 'default', 'NormalizeMultiviewImage')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/transform_3d.py", "imports": ["PIL", "copy", "mmcv", "numpy", "torch", "mmdet3d", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.pipelines.transform_3d"}, "('PIPELINES', 'default', 'ResizeCropFlipImage')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/transform_3d.py", "imports": ["PIL", "copy", "mmcv", "numpy", "torch", "mmdet3d", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.pipelines.transform_3d"}, "('HEADS', 'default', 'PETRv2DEDNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/petrv2_dednhead.py", "imports": ["mmcv", "numpy", "torch", "math", "copy", "mmdet3d", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.dense_heads.petrv2_dednhead"}, "('NECKS', 'default', 'CPFPN')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/necks/cp_fpn.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.necks.cp_fpn"}, "('TRANSFORMER', 'default', 'PETRDNTransformer')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py", "imports": ["copy", "warnings", "mmcv", "torch", "mmdet", "typing", "math"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.petr_transformer"}, "('TRANSFORMER_LAYER', 'default', 'PETRTransformerDecoderLayer')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py", "imports": ["copy", "warnings", "mmcv", "torch", "mmdet", "typing", "math"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.petr_transformer"}, "('ATTENTION', 'default', 'PETRMultiheadAttention')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py", "imports": ["copy", "warnings", "mmcv", "torch", "mmdet", "typing", "math"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.petr_transformer"}, "('TRANSFORMER_LAYER_SEQUENCE', 'default', 'PETRTransformerEncoder')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py", "imports": ["copy", "warnings", "mmcv", "torch", "mmdet", "typing", "math"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.petr_transformer"}, "('TRANSFORMER_LAYER_SEQUENCE', 'default', 'PETRTransformerDecoder')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py", "imports": ["copy", "warnings", "mmcv", "torch", "mmdet", "typing", "math"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.petr_transformer"}, "('POSITIONAL_ENCODING', 'default', 'SinePositionalEncoding3D')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/positional_encoding.py", "imports": ["torch", "mmcv", "math"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.positional_encoding"}, "('BACKBONES', 'default', 'VoVNet')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/backbones/vovnet.py", "imports": ["torch", "mmdet", "mmcv", "collections"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.backbones.vovnet"}, "('DETECTORS', 'default', 'Petr3D')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/detectors/petr3d.py", "imports": ["mmcv", "numpy", "torch", "mmdet3d", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.detectors.petr3d"}, "('MODELS', 'object-detection-3d', 'depe')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/depe_detect.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.models.cv.object_detection_3d.depe.depe_detect"}, "('MODELS', 'image-quality-assessment-mos', 'image-quality-assessment-mos')": {"filepath": "TEMPLATE_PATH/models/cv/image_quality_assessment_mos/image_quality_assessment_mos.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_quality_assessment_mos.image_quality_assessment_mos"}, "('MODELS', 'image-debanding', 'rrdb')": {"filepath": "TEMPLATE_PATH/models/cv/image_debanding/rrdb/rrdb_image_debanding.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_debanding.rrdb.rrdb_image_debanding"}, "('MODELS', 'image-demoireing', 'image-restoration')": {"filepath": "TEMPLATE_PATH/models/cv/image_restoration/image_restoration_model.py", "imports": ["cv2", "torch", "os", "numpy"], "module": "modelscope.models.cv.image_restoration.image_restoration_model"}, "('MODELS', 'vision-efficient-tuning', 'vision-efficient-tuning')": {"filepath": "TEMPLATE_PATH/models/cv/vision_efficient_tuning/model.py", "imports": ["torch", "typing"], "module": "modelscope.models.cv.vision_efficient_tuning.model"}, "('MODELS', 'movie-scene-segmentation', 'resnet50-bert')": {"filepath": "TEMPLATE_PATH/models/cv/movie_scene_segmentation/model.py", "imports": ["PIL", "numpy", "torch", "einops", "tqdm", "math", "shotdetect_scenedetect_lgss", "torchvision", "os", "typing"], "module": "modelscope.models.cv.movie_scene_segmentation.model"}, "('MODELS', 'video-summarization', 'pgl-video-summarization')": {"filepath": "TEMPLATE_PATH/models/cv/video_summarization/summarizer.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.models.cv.video_summarization.summarizer"}, "('MODELS', 'lineless-table-recognition', 'LoreModel')": {"filepath": "TEMPLATE_PATH/models/cv/table_recognition/model_lore.py", "imports": ["numpy", "torch", "math", "copy", "os", "typing"], "module": "modelscope.models.cv.table_recognition.model_lore"}, "('MODELS', 'image-matching', 'quadtree-attention-image-matching')": {"filepath": "TEMPLATE_PATH/models/cv/image_matching/quadtree_attention_model.py", "imports": ["numpy", "cv2", "torch", "pathlib", "os"], "module": "modelscope.models.cv.image_matching.quadtree_attention_model"}, "('MODELS', 'image-object-detection', 'tinynas-detection')": {"filepath": "TEMPLATE_PATH/models/cv/tinynas_detection/tinynas_detector.py", "imports": [], "module": "modelscope.models.cv.tinynas_detection.tinynas_detector"}, "('MODELS', 'domain-specific-object-detection', 'tinynas-damoyolo')": {"filepath": "TEMPLATE_PATH/models/cv/tinynas_detection/tinynas_damoyolo.py", "imports": [], "module": "modelscope.models.cv.tinynas_detection.tinynas_damoyolo"}, "('MODELS', 'image-object-detection', 'tinynas-damoyolo')": {"filepath": "TEMPLATE_PATH/models/cv/tinynas_detection/tinynas_damoyolo.py", "imports": [], "module": "modelscope.models.cv.tinynas_detection.tinynas_damoyolo"}, "('MODELS', 'nerf-recon-acc', 'nerf-recon-acc')": {"filepath": "TEMPLATE_PATH/models/cv/nerf_recon_acc/nerf_recon_acc.py", "imports": ["numpy", "cv2", "torch", "tqdm", "time", "os", "glob"], "module": "modelscope.models.cv.nerf_recon_acc.nerf_recon_acc"}, "('PREPROCESSORS', 'cv', 'nerf-recon-acc-preprocessor')": {"filepath": "TEMPLATE_PATH/models/cv/nerf_recon_acc/nerf_preprocess.py", "imports": ["subprocess", "tensorflow", "numpy", "cv2", "glob", "os", "typing"], "module": "modelscope.models.cv.nerf_recon_acc.nerf_preprocess"}, "('MODELS', 'video-deinterlace', 'video-deinterlace')": {"filepath": "TEMPLATE_PATH/models/cv/video_deinterlace/UNet_for_video_deinterlace.py", "imports": ["torch", "os", "copy", "typing"], "module": "modelscope.models.cv.video_deinterlace.UNet_for_video_deinterlace"}, "('MODELS', 'image-depth-estimation', 'bts-depth-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/image_depth_estimation_bts/depth_estimation_bts_model.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.image_depth_estimation_bts.depth_estimation_bts_model"}, "('MODELS', 'image-fewshot-detection', 'defrcn')": {"filepath": "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/defrcn_for_fewshot.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_defrcn_fewshot.defrcn_for_fewshot"}, "('PREPROCESSORS', 'cv', 'ocr-recognition')": {"filepath": "TEMPLATE_PATH/models/cv/ocr_recognition/preprocessor.py", "imports": ["PIL", "numpy", "torch", "cv2", "os"], "module": "modelscope.models.cv.ocr_recognition.preprocessor"}, "('MODELS', 'ocr-recognition', 'OCRRecognition')": {"filepath": "TEMPLATE_PATH/models/cv/ocr_recognition/model.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.ocr_recognition.model"}, "('TRACKERS', 'default', 'QuasiDenseEmbedTracker')": {"filepath": "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/track/quasi_dense_embed_tracker.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.video_panoptic_segmentation.track.quasi_dense_embed_tracker"}, "('HEADS', 'default', 'VideoKernelUpdateHead')": {"filepath": "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_update_head.py", "imports": ["torch", "mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.video_panoptic_segmentation.head.kernel_update_head"}, "('NECKS', 'default', 'SemanticFPNWrapper')": {"filepath": "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/semantic_fpn_wrapper.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.video_panoptic_segmentation.head.semantic_fpn_wrapper"}, "('HEADS', 'default', 'VideoKernelIterHead')": {"filepath": "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_iter_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.video_panoptic_segmentation.head.kernel_iter_head"}, "('MODELS', 'video-panoptic-segmentation', 'swinb-video-panoptic-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/video_k_net.py", "imports": ["torch", "mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.video_panoptic_segmentation.video_k_net"}, "('MODELS', 'open-vocabulary-detection', 'open-vocabulary-detection-vild')": {"filepath": "TEMPLATE_PATH/models/cv/open_vocabulary_detection_vild/vild.py", "imports": ["tensorflow", "numpy", "torch", "scipy", "clip", "os", "typing"], "module": "modelscope.models.cv.open_vocabulary_detection_vild.vild"}, "('MODELS', 'image-reid-person', 'passvitb')": {"filepath": "TEMPLATE_PATH/models/cv/image_reid_person/pass_model.py", "imports": ["torch", "os", "enum"], "module": "modelscope.models.cv.image_reid_person.pass_model"}, "('MODELS', 'image-face-fusion', 'image-face-fusion')": {"filepath": "TEMPLATE_PATH/models/cv/image_face_fusion/image_face_fusion.py", "imports": ["PIL", "numpy", "torch", "cv2", "collections", "torchvision", "os", "typing"], "module": "modelscope.models.cv.image_face_fusion.image_face_fusion"}, "('MODELS', 'product-segmentation', 'product-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/product_segmentation/seg_infer.py", "imports": ["PIL", "torch", "cv2", "numpy"], "module": "modelscope.models.cv.product_segmentation.seg_infer"}, "('MODELS', 'controllable-image-generation', 'controllable-image-generation')": {"filepath": "TEMPLATE_PATH/models/cv/controllable_image_generation/controlnet.py", "imports": ["PIL", "random", "numpy", "cv2", "torch", "einops", "tempfile", "sys", "math", "control_ldm", "os", "typing"], "module": "modelscope.models.cv.controllable_image_generation.controlnet"}, "('MODELS', 'video-inpainting', 'video-inpainting')": {"filepath": "TEMPLATE_PATH/models/cv/video_inpainting/inpainting_model.py", "imports": ["torch", "torchvision", "math", "numpy"], "module": "modelscope.models.cv.video_inpainting.inpainting_model"}, "('MODELS', 'image-multi-view-depth-estimation', 'image-casmvs-depth-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/casmvs_model.py", "imports": ["numpy", "cv2", "torch", "os", "easydict"], "module": "modelscope.models.cv.image_mvs_depth_estimation.casmvs_model"}, "('MODELS', 'image-classification', 'bnext')": {"filepath": "TEMPLATE_PATH/models/cv/image_binary_quant_classification/binary_quant_model.py", "imports": ["torch", "os", "collections"], "module": "modelscope.models.cv.image_binary_quant_classification.binary_quant_model"}, "('MODELS', 'body-3d-keypoints', 'hdformer')": {"filepath": "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/hdformer_detector.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.models.cv.body_3d_keypoints.hdformer.hdformer_detector"}, "('MODELS', 'body-3d-keypoints', 'body-3d-keypoints')": {"filepath": "TEMPLATE_PATH/models/cv/body_3d_keypoints/cannonical_pose/body_3d_pose.py", "imports": ["numpy", "torch", "logging", "os", "typing"], "module": "modelscope.models.cv.body_3d_keypoints.cannonical_pose.body_3d_pose"}, "('MODELS', 'video-frame-interpolation', 'video-frame-interpolation')": {"filepath": "TEMPLATE_PATH/models/cv/video_frame_interpolation/VFINet_for_video_frame_interpolation.py", "imports": ["torch", "os", "copy", "typing"], "module": "modelscope.models.cv.video_frame_interpolation.VFINet_for_video_frame_interpolation"}, "('HEADS', 'default', 'RPNNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/dense_heads/rpn_head.py", "imports": ["torch", "copy", "mmcv", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.dense_heads.rpn_head"}, "('HEADS', 'default', 'AnchorNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/dense_heads/anchor_head.py", "imports": ["mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.dense_heads.anchor_head"}, "('NECKS', 'default', 'FPNF')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/necks/fpn.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.necks.fpn"}, "('BACKBONES', 'default', 'ViT')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/backbones/vit.py", "imports": ["timm", "torch", "functools", "math", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.backbones.vit"}, "('HEADS', 'default', 'ConvFCBBoxNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.roi_heads.bbox_heads.convfc_bbox_head"}, "('HEADS', 'default', 'Shared2FCBBoxNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.roi_heads.bbox_heads.convfc_bbox_head"}, "('HEADS', 'default', 'Shared4Conv1FCBBoxNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.roi_heads.bbox_heads.convfc_bbox_head"}, "('HEADS', 'default', 'FCNMaskNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/fcn_mask_head.py", "imports": ["mmcv", "numpy", "torch", "warnings", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.roi_heads.mask_heads.fcn_mask_head"}, "('MODELS', 'human-detection', 'detection')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.object_detection.mmdet_model"}, "('MODELS', 'image-object-detection', 'detection')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.object_detection.mmdet_model"}, "('MODELS', 'pedestrian-attribute-recognition', 'pedestrian-attribute-recognition')": {"filepath": "TEMPLATE_PATH/models/cv/pedestrian_attribute_recognition/model.py", "imports": ["torch", "os", "torchvision", "numpy"], "module": "modelscope.models.cv.pedestrian_attribute_recognition.model"}, "('MODELS', 'pointcloud-sceneflow-estimation', 'rcp-sceneflow-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/pointcloud_sceneflow_estimation/rcp_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.pointcloud_sceneflow_estimation.rcp_model"}, "('MODELS', 'video-stabilization', 'video-stabilization')": {"filepath": "TEMPLATE_PATH/models/cv/video_stabilization/DUTRAFTStabilizer.py", "imports": ["numpy", "cv2", "torch", "tempfile", "sys", "math", "os", "typing"], "module": "modelscope.models.cv.video_stabilization.DUTRAFTStabilizer"}, "('MODELS', 'video-depth-estimation', 'dro-resnet18-depth-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/video_depth_estimation/dro_model.py", "imports": ["numpy", "cv2", "torch", "tqdm", "os", "glob"], "module": "modelscope.models.cv.video_depth_estimation.dro_model"}, "('MODELS', 'image-object-detection', 'vidt')": {"filepath": "TEMPLATE_PATH/models/cv/vidt/model.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.vidt.model"}, "('MODELS', 'face-human-hand-detection', 'face-human-hand-detection')": {"filepath": "TEMPLATE_PATH/models/cv/face_human_hand_detection/det_infer.py", "imports": ["cv2", "torch", "numpy"], "module": "modelscope.models.cv.face_human_hand_detection.det_infer"}, "('MODELS', 'referring-video-object-segmentation', 'swinT-referring-video-object-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.referring_video_object_segmentation.model"}, "('MODELS', 'hand-static', 'hand-static')": {"filepath": "TEMPLATE_PATH/models/cv/hand_static/hand_model.py", "imports": ["PIL", "numpy", "torch", "cv2", "sys", "torchvision", "os"], "module": "modelscope.models.cv.hand_static.hand_model"}, "('MODELS', 'image-depth-estimation', 'newcrfs-depth-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/image_depth_estimation/newcrfs_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.image_depth_estimation.newcrfs_model"}, "('MODELS', 'image-colorization', 'ddcolor')": {"filepath": "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/ddcolor_for_image_colorization.py", "imports": ["numpy", "torch", "copy", "os", "typing"], "module": "modelscope.models.cv.image_colorization.ddcolor.ddcolor_for_image_colorization"}, "('MODELS', 'face-detection', 'retinaface')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/retinaface/detection.py", "imports": ["cv2", "torch", "numpy"], "module": "modelscope.models.cv.face_detection.retinaface.detection"}, "('MODELS', 'face-detection', 'mtcnn')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/mtcnn/models/detector.py", "imports": ["PIL", "torch", "os", "numpy"], "module": "modelscope.models.cv.face_detection.mtcnn.models.detector"}, "('MODELS', 'face-detection', 'ulfd')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/detection.py", "imports": ["cv2", "torch", "os", "numpy"], "module": "modelscope.models.cv.face_detection.ulfd_slim.detection"}, "('MODELS', 'face-detection', 'scrfd')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/scrfd_detect.py", "imports": ["numpy", "torch", "copy", "os", "typing"], "module": "modelscope.models.cv.face_detection.scrfd.scrfd_detect"}, "('MODELS', 'card-detection', 'scrfd')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/scrfd_detect.py", "imports": ["numpy", "torch", "copy", "os", "typing"], "module": "modelscope.models.cv.face_detection.scrfd.scrfd_detect"}, "('DATASETS', 'default', 'RetinaFaceDataset')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/retinaface.py", "imports": ["mmdet", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.retinaface"}, "('PIPELINES', 'default', 'RotateV2')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/auto_augment.py", "imports": ["copy", "mmcv", "numpy", "cv2", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.auto_augment"}, "('PIPELINES', 'default', 'ResizeV2')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py", "imports": ["mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.transforms"}, "('PIPELINES', 'default', 'RandomFlipV2')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py", "imports": ["mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.transforms"}, "('PIPELINES', 'default', 'RandomSquareCrop')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py", "imports": ["mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.transforms"}, "('PIPELINES', 'default', 'LoadAnnotationsV2')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/loading.py", "imports": ["os", "mmdet", "numpy", "pycocotools"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.loading"}, "('PIPELINES', 'default', 'DefaultFormatBundleV2')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/formating.py", "imports": ["torch", "mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.formating"}, "('HEADS', 'default', 'SCRFDHead')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/scrfd_head.py", "imports": ["torch", "mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.dense_heads.scrfd_head"}, "('BACKBONES', 'default', 'MasterNet')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/master_net.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones.master_net"}, "('BACKBONES', 'default', 'MobileNetV1')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/mobilenet.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones.mobilenet"}, "('BACKBONES', 'default', 'ResNetV1e')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/resnet.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones.resnet"}, "('DETECTORS', 'default', 'SCRFD')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/scrfd.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.scrfd"}, "('DETECTORS', 'default', 'CustomSingleStageDetector')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/single_stage.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.single_stage"}, "('DETECTORS', 'default', 'TinyMog')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/tinymog.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.tinymog"}, "('MODELS', 'face-detection', 'tinymog')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/tinymog_detect.py", "imports": ["torch", "os", "copy", "typing"], "module": "modelscope.models.cv.face_detection.scrfd.tinymog_detect"}, "('PREPROCESSORS', 'cv', 'object-detection-scrfd')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/preprocessor.py", "imports": ["PIL", "typing", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.preprocessor"}, "('MODELS', 'face-detection', 'damofd')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/damofd_detect.py", "imports": ["torch", "os", "copy", "typing"], "module": "modelscope.models.cv.face_detection.scrfd.damofd_detect"}, "('MODELS', 'face-detection', 'mogface')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/mogface/models/detectors.py", "imports": ["cv2", "torch", "os", "numpy"], "module": "modelscope.models.cv.face_detection.mogface.models.detectors"}, "('MODELS', 'image-classification', 'EasyRobustModel')": {"filepath": "TEMPLATE_PATH/models/cv/robust_image_classification/easyrobust_model.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.robust_image_classification.easyrobust_model"}, "('MODELS', 'semantic-segmentation', 'ddpm')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/ddpm_segmentation_model.py", "imports": ["torch", "os", "typing", "ddpm_guided_diffusion"], "module": "modelscope.models.cv.image_semantic_segmentation.ddpm_segmentation_model"}, "('PIPELINES', 'default', 'ResizeToMultiple')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/utils/data_process_func.py", "imports": ["mmcv", "mmdet"], "module": "modelscope.models.cv.image_semantic_segmentation.vit_adapter.utils.data_process_func"}, "('BACKBONES', 'default', 'BEiTAdapter')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/beit_adapter.py", "imports": ["timm", "torch", "logging", "math", "mmdet"], "module": "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.backbone.beit_adapter"}, "('BACKBONES', 'default', 'BASEBEiT')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/base/beit.py", "imports": ["timm", "mmcv", "torch", "mmdet", "functools", "math"], "module": "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.backbone.base.beit"}, "('DETECTORS', 'default', 'EncoderDecoderMask2Former')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/encoder_decoder_mask2former.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.segmentors.encoder_decoder_mask2former"}, "('HEADS', 'default', 'Mask2FormerHeadFromMMSeg')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/mask2former_head_from_mmseg.py", "imports": ["torch", "copy", "mmcv", "mmdet"], "module": "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.decode_heads.mask2former_head_from_mmseg"}, "('MODELS', 'image-segmentation', 'swinL-semantic-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/semantic_seg_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.image_semantic_segmentation.semantic_seg_model"}, "('MODELS', 'image-segmentation', 'vitadapter-semantic-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/semantic_seg_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.image_semantic_segmentation.semantic_seg_model"}, "('HEADS', 'default', 'MaskFormerSemanticHead')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/pan_merge/maskformer_semantic_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.image_semantic_segmentation.pan_merge.maskformer_semantic_head"}, "('MODELS', 'text-driven-segmentation', 'text-driven-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/text_driven_segmentation/lseg_model.py", "imports": ["PIL", "numpy", "torch", "json", "os", "typing"], "module": "modelscope.models.cv.text_driven_segmentation.lseg_model"}, "('MODELS', 'crowd-counting', 'HRNetCrowdCounting')": {"filepath": "TEMPLATE_PATH/models/cv/crowd_counting/cc_model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.crowd_counting.cc_model"}, "('MODELS', 'image-segmentation', 'swinL-panoptic-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/image_panoptic_segmentation/panseg_model.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.image_panoptic_segmentation.panseg_model"}, "('MODELS', 'face-emotion', 'face-emotion')": {"filepath": "TEMPLATE_PATH/models/cv/face_emotion/emotion_model.py", "imports": ["torch", "os", "sys"], "module": "modelscope.models.cv.face_emotion.emotion_model"}, "('MODELS', 'video-super-resolution', 'msrresnet-lite')": {"filepath": "TEMPLATE_PATH/models/cv/video_super_resolution/msrresnet_lite_model.py", "imports": ["torch", "os", "functools", "typing"], "module": "modelscope.models.cv.video_super_resolution.msrresnet_lite_model"}, "('MODELS', 'video-super-resolution', 'real-basicvsr')": {"filepath": "TEMPLATE_PATH/models/cv/video_super_resolution/real_basicvsr_for_video_super_resolution.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.video_super_resolution.real_basicvsr_for_video_super_resolution"}, "('MODELS', 'face-attribute-recognition', 'fairface')": {"filepath": "TEMPLATE_PATH/models/cv/face_attribute_recognition/fair_face/face_attribute_recognition.py", "imports": ["PIL", "numpy", "torch", "cv2", "torchvision", "os"], "module": "modelscope.models.cv.face_attribute_recognition.fair_face.face_attribute_recognition"}, "('MODELS', 'image-denoising', 'nafnet')": {"filepath": "TEMPLATE_PATH/models/cv/image_denoise/nafnet_for_image_denoise.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_denoise.nafnet_for_image_denoise"}, "('MODELS', 'image-classification', 'ClassificationModel')": {"filepath": "TEMPLATE_PATH/models/cv/image_classification/mmcls_model.py", "imports": ["os"], "module": "modelscope.models.cv.image_classification.mmcls_model"}, "('BACKBONES', 'default', 'BEiTv2')": {"filepath": "TEMPLATE_PATH/models/cv/image_classification/backbones/beit_v2.py", "imports": ["itertools", "mmcv", "torch", "einops", "functools", "mmcls", "math", "collections", "warnings", "os", "typing"], "module": "modelscope.models.cv.image_classification.backbones.beit_v2"}, "('BACKBONES', 'default', 'NextViT')": {"filepath": "TEMPLATE_PATH/models/cv/image_classification/backbones/nextvit.py", "imports": ["itertools", "mmcv", "torch", "einops", "functools", "mmcls", "math", "collections", "warnings", "os", "typing"], "module": "modelscope.models.cv.image_classification.backbones.nextvit"}, "('MODELS', 'image-classification', 'content-check')": {"filepath": "TEMPLATE_PATH/models/cv/image_classification/resnet50_cc.py", "imports": ["torch", "math", "collections", "torchvision", "os"], "module": "modelscope.models.cv.image_classification.resnet50_cc"}, "('MODELS', 'image-color-enhancement', 'deeplpfnet')": {"filepath": "TEMPLATE_PATH/models/cv/image_color_enhance/deeplpf/deeplpf_image_color_enhance.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_color_enhance.deeplpf.deeplpf_image_color_enhance"}, "('MODELS', 'image-color-enhancement', 'csrnet')": {"filepath": "TEMPLATE_PATH/models/cv/image_color_enhance/image_color_enhance.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_color_enhance.image_color_enhance"}, "('MODELS', 'image-color-enhancement', 'adaint')": {"filepath": "TEMPLATE_PATH/models/cv/image_color_enhance/adaint/adaint.py", "imports": ["numbers", "torch", "torchvision", "os", "typing"], "module": "modelscope.models.cv.image_color_enhance.adaint.adaint"}, "('METRICS', 'default', 'image-quality-assessment-degradation-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_quality_assessment_degradation_metric.py", "imports": ["numpy", "cv2", "torch", "scipy", "tempfile", "sys", "collections", "tqdm", "os", "typing"], "module": "modelscope.metrics.image_quality_assessment_degradation_metric"}, "('METRICS', 'default', 'prediction-saving-wrapper')": {"filepath": "TEMPLATE_PATH/metrics/prediction_saving_wrapper.py", "imports": ["typing", "sklearn", "numpy"], "module": "modelscope.metrics.prediction_saving_wrapper"}, "('METRICS', 'default', 'video-stabilization-metric')": {"filepath": "TEMPLATE_PATH/metrics/video_stabilization_metric.py", "imports": ["numpy", "cv2", "tqdm", "tempfile", "sys", "os", "typing"], "module": "modelscope.metrics.video_stabilization_metric"}, "('METRICS', 'default', 'ppl')": {"filepath": "TEMPLATE_PATH/metrics/ppl_metric.py", "imports": ["torch", "typing", "math", "numpy"], "module": "modelscope.metrics.ppl_metric"}, "('METRICS', 'default', 'inbatch_recall')": {"filepath": "TEMPLATE_PATH/metrics/inbatch_recall_metric.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.metrics.inbatch_recall_metric"}, "('METRICS', 'default', 'loss-metric')": {"filepath": "TEMPLATE_PATH/metrics/loss_metric.py", "imports": ["typing", "sklearn", "numpy"], "module": "modelscope.metrics.loss_metric"}, "('METRICS', 'default', 'ocr-recognition-metric')": {"filepath": "TEMPLATE_PATH/metrics/ocr_recognition_metric.py", "imports": ["torch", "edit_distance", "typing", "numpy"], "module": "modelscope.metrics.ocr_recognition_metric"}, "('METRICS', 'default', 'mAP')": {"filepath": "TEMPLATE_PATH/metrics/map_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.map_metric"}, "('METRICS', 'default', 'image-colorization-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_colorization_metric.py", "imports": ["numpy", "cv2", "torch", "scipy", "torchvision", "typing"], "module": "modelscope.metrics.image_colorization_metric"}, "('METRICS', 'default', 'seq-cls-metric')": {"filepath": "TEMPLATE_PATH/metrics/sequence_classification_metric.py", "imports": ["typing", "sklearn", "numpy"], "module": "modelscope.metrics.sequence_classification_metric"}, "('METRICS', 'default', 'audio-noise-metric')": {"filepath": "TEMPLATE_PATH/metrics/audio_noise_metric.py", "imports": ["typing"], "module": "modelscope.metrics.audio_noise_metric"}, "('METRICS', 'default', 'translation-evaluation-metric')": {"filepath": "TEMPLATE_PATH/metrics/translation_evaluation_metric.py", "imports": ["pandas", "typing", "importlib"], "module": "modelscope.metrics.translation_evaluation_metric"}, "('METRICS', 'default', 'video-frame-interpolation-metric')": {"filepath": "TEMPLATE_PATH/metrics/video_frame_interpolation_metric.py", "imports": ["numpy", "torch", "lpips", "math", "typing"], "module": "modelscope.metrics.video_frame_interpolation_metric"}, "('METRICS', 'default', 'image-inpainting-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_inpainting_metric.py", "imports": ["torch", "scipy", "typing", "numpy"], "module": "modelscope.metrics.image_inpainting_metric"}, "('METRICS', 'default', 'image-denoise-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_denoise_metric.py", "imports": ["cv2", "torch", "typing", "numpy"], "module": "modelscope.metrics.image_denoise_metric"}, "('METRICS', 'default', 'referring-video-object-segmentation-metric')": {"filepath": "TEMPLATE_PATH/metrics/referring_video_object_segmentation_metric.py", "imports": ["numpy", "pycocotools", "torch", "tqdm", "typing"], "module": "modelscope.metrics.referring_video_object_segmentation_metric"}, "('METRICS', 'default', 'token-cls-metric')": {"filepath": "TEMPLATE_PATH/metrics/token_classification_metric.py", "imports": ["typing", "numpy", "importlib"], "module": "modelscope.metrics.token_classification_metric"}, "('METRICS', 'default', 'video-summarization-metric')": {"filepath": "TEMPLATE_PATH/metrics/video_summarization_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.video_summarization_metric"}, "('METRICS', 'default', 'image-quality-assessment-mos-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_quality_assessment_mos_metric.py", "imports": ["numpy", "cv2", "torch", "scipy", "tempfile", "sys", "tqdm", "os", "typing"], "module": "modelscope.metrics.image_quality_assessment_mos_metric"}, "('METRICS', 'default', 'ned')": {"filepath": "TEMPLATE_PATH/metrics/ned_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.ned_metric"}, "('METRICS', 'default', 'text-ranking-metric')": {"filepath": "TEMPLATE_PATH/metrics/text_ranking_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.text_ranking_metric"}, "('METRICS', 'default', 'movie-scene-segmentation-metric')": {"filepath": "TEMPLATE_PATH/metrics/movie_scene_segmentation_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.movie_scene_segmentation_metric"}, "('METRICS', 'default', 'accuracy')": {"filepath": "TEMPLATE_PATH/metrics/accuracy_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.accuracy_metric"}, "('METRICS', 'default', 'image-ins-seg-coco-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_instance_segmentation_metric.py", "imports": ["numpy", "pycocotools", "tempfile", "collections", "os", "typing"], "module": "modelscope.metrics.image_instance_segmentation_metric"}, "('METRICS', 'default', 'video-super-resolution-metric')": {"filepath": "TEMPLATE_PATH/metrics/video_super_resolution_metric/video_super_resolution_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.video_super_resolution_metric.video_super_resolution_metric"}, "('METRICS', 'default', 'image-color-enhance-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_color_enhance_metric.py", "imports": ["cv2", "typing", "numpy"], "module": "modelscope.metrics.image_color_enhance_metric"}, "('METRICS', 'default', 'image-portrait-enhancement-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_portrait_enhancement_metric.py", "imports": ["cv2", "typing", "numpy"], "module": "modelscope.metrics.image_portrait_enhancement_metric"}, "('METRICS', 'default', 'bleu')": {"filepath": "TEMPLATE_PATH/metrics/bleu_metric.py", "imports": ["typing", "itertools", "sacrebleu"], "module": "modelscope.metrics.bleu_metric"}, "('METRICS', 'default', 'text-gen-metric')": {"filepath": "TEMPLATE_PATH/metrics/text_generation_metric.py", "imports": ["nltk", "rouge", "typing"], "module": "modelscope.metrics.text_generation_metric"}, "('PIPELINES', 'protein-structure', 'unifold-protein-structure')": {"filepath": "TEMPLATE_PATH/pipelines/science/protein_structure_pipeline.py", "imports": ["numpy", "json", "torch", "unicore", "time", "os", "typing"], "module": "modelscope.pipelines.science.protein_structure_pipeline"}, "('PIPELINES', 'task-template', 'pipeline-template')": {"filepath": "TEMPLATE_PATH/pipelines/pipeline_template.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.pipeline_template"}, "('PIPELINES', 'speech-timestamp', 'speech-timestamp-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/timestamp_pipeline.py", "imports": ["json", "typing", "yaml", "os", "funasr"], "module": "modelscope.pipelines.audio.timestamp_pipeline"}, "('PIPELINES', 'keyword-spotting', 'speech_dfsmn_kws_char_farfield')": {"filepath": "TEMPLATE_PATH/pipelines/audio/kws_farfield_pipeline.py", "imports": ["numpy", "wave", "soundfile", "io", "typing"], "module": "modelscope.pipelines.audio.kws_farfield_pipeline"}, "('PIPELINES', 'speaker-verification', 'sv-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/speaker_verification_pipeline.py", "imports": ["os", "typing", "shutil", "yaml"], "module": "modelscope.pipelines.audio.speaker_verification_pipeline"}, "('PIPELINES', 'inverse-text-processing', 'itn-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/inverse_text_processing_pipeline.py", "imports": ["os", "typing", "shutil", "yaml"], "module": "modelscope.pipelines.audio.inverse_text_processing_pipeline"}, "('PIPELINES', 'speech-separation', 'speech-separation')": {"filepath": "TEMPLATE_PATH/pipelines/audio/separation_pipeline.py", "imports": ["numpy", "torch", "soundfile", "io", "typing"], "module": "modelscope.pipelines.audio.separation_pipeline"}, "('PIPELINES', 'voice-activity-detection', 'vad-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/voice_activity_detection_pipeline.py", "imports": ["json", "typing", "yaml", "os", "funasr"], "module": "modelscope.pipelines.audio.voice_activity_detection_pipeline"}, "('PIPELINES', 'text-to-speech', 'sambert-hifigan-tts')": {"filepath": "TEMPLATE_PATH/pipelines/audio/text_to_speech_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.audio.text_to_speech_pipeline"}, "('PIPELINES', 'keyword-spotting', 'kws-kwsbp')": {"filepath": "TEMPLATE_PATH/pipelines/audio/kws_kwsbp_pipeline.py", "imports": ["json", "os", "typing"], "module": "modelscope.pipelines.audio.kws_kwsbp_pipeline"}, "('PIPELINES', 'acoustic-echo-cancellation', 'speech-dfsmn-aec-psm-16k')": {"filepath": "TEMPLATE_PATH/pipelines/audio/linear_aec_pipeline.py", "imports": ["numpy", "torch", "scipy", "yaml", "importlib", "os", "typing"], "module": "modelscope.pipelines.audio.linear_aec_pipeline"}, "('PIPELINES', 'acoustic-noise-suppression', 'speech_frcrn_ans_cirm_16k')": {"filepath": "TEMPLATE_PATH/pipelines/audio/ans_pipeline.py", "imports": ["numpy", "torch", "librosa", "soundfile", "io", "typing"], "module": "modelscope.pipelines.audio.ans_pipeline"}, "('PIPELINES', 'speaker-verification', 'speaker-verification-eres2net')": {"filepath": "TEMPLATE_PATH/pipelines/audio/speaker_verification_eres2net_pipeline.py", "imports": ["torch", "io", "typing", "soundfile"], "module": "modelscope.pipelines.audio.speaker_verification_eres2net_pipeline"}, "('PIPELINES', 'language-score-prediction', 'language-score-prediction')": {"filepath": "TEMPLATE_PATH/pipelines/audio/lm_infer_pipeline.py", "imports": ["os", "typing"], "module": "modelscope.pipelines.audio.lm_infer_pipeline"}, "('PIPELINES', 'acoustic-noise-suppression', 'speech_dfsmn_ans_psm_48k_causal')": {"filepath": "TEMPLATE_PATH/pipelines/audio/ans_dfsmn_pipeline.py", "imports": ["numpy", "torch", "sys", "collections", "librosa", "soundfile", "io", "os", "typing"], "module": "modelscope.pipelines.audio.ans_dfsmn_pipeline"}, "('PIPELINES', 'auto-speech-recognition', 'asr-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/asr_inference_pipeline.py", "imports": ["json", "os", "typing", "yaml"], "module": "modelscope.pipelines.audio.asr_inference_pipeline"}, "('PIPELINES', 'speaker-diarization', 'speaker-diarization-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/speaker_diarization_pipeline.py", "imports": ["shutil", "numpy", "json", "yaml", "os", "typing"], "module": "modelscope.pipelines.audio.speaker_diarization_pipeline"}, "('PIPELINES', 'speaker-verification', 'speaker-verification-rdino')": {"filepath": "TEMPLATE_PATH/pipelines/audio/speaker_verification_rdino_pipeline.py", "imports": ["torch", "io", "typing", "soundfile"], "module": "modelscope.pipelines.audio.speaker_verification_rdino_pipeline"}, "('PIPELINES', 'punctuation', 'punc-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/punctuation_processing_pipeline.py", "imports": ["os", "typing", "shutil", "yaml"], "module": "modelscope.pipelines.audio.punctuation_processing_pipeline"}, "('PIPELINES', 'speaker-verification', 'speaker-verification')": {"filepath": "TEMPLATE_PATH/pipelines/audio/speaker_verification_light_pipeline.py", "imports": ["torch", "io", "typing", "soundfile"], "module": "modelscope.pipelines.audio.speaker_verification_light_pipeline"}, "('PIPELINES', 'speaker-diarization', 'speaker-change-locating')": {"filepath": "TEMPLATE_PATH/pipelines/audio/speaker_change_locating_pipeline.py", "imports": ["numpy", "torch", "soundfile", "io", "typing"], "module": "modelscope.pipelines.audio.speaker_change_locating_pipeline"}, "('PIPELINES', 'auto-speech-recognition', 'asr-wenet-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/asr_wenet_inference_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.audio.asr_wenet_inference_pipeline"}, "('PIPELINES', 'auto-speech-recognition', 'ofa-asr')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/asr_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.asr_pipeline"}, "('PIPELINES', 'image-captioning', 'image-captioning')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/image_captioning_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.multi_modal.image_captioning_pipeline"}, "('PIPELINES', 'text-to-video-synthesis', 'latent-text-to-video-synthesis')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/text_to_video_synthesis_pipeline.py", "imports": ["cv2", "torch", "einops", "tempfile", "os", "typing"], "module": "modelscope.pipelines.multi_modal.text_to_video_synthesis_pipeline"}, "('PIPELINES', 'text-ranking', 'mgeo-ranking')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/mgeo_ranking_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.multi_modal.mgeo_ranking_pipeline"}, "('PIPELINES', 'generative-multi-modal-embedding', 'generative-multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/generative_multi_modal_embedding_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.multi_modal.generative_multi_modal_embedding_pipeline"}, "('PIPELINES', 'multimodal-dialogue', 'multimodal-dialogue')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/multimodal_dialogue_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.multimodal_dialogue_pipeline"}, "('PIPELINES', 'text-to-image-synthesis', 'text-to-image-synthesis')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/text_to_image_synthesis_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.text_to_image_synthesis_pipeline"}, "('PIPELINES', 'text2sql', 'ofa-text2sql')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/text2sql_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.text2sql_pipeline"}, "('PIPELINES', 'visual-entailment', 'visual-entailment')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/visual_entailment_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.visual_entailment_pipeline"}, "('PIPELINES', 'text-to-image-synthesis', 'disco_guided_diffusion')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/disco_guided_diffusion_pipeline/disco_guided_diffusion.py", "imports": ["PIL", "gc", "numpy", "cv2", "json", "torch", "math", "clip", "importlib", "torchvision", "os"], "module": "modelscope.pipelines.multi_modal.disco_guided_diffusion_pipeline.disco_guided_diffusion"}, "('PIPELINES', 'visual-question-answering', 'visual-question-answering')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/visual_question_answering_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.visual_question_answering_pipeline"}, "('PIPELINES', 'video-question-answering', 'video-question-answering')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/video_question_answering_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.video_question_answering_pipeline"}, "('PIPELINES', 'video-captioning', 'video-captioning')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/video_captioning_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.video_captioning_pipeline"}, "('PIPELINES', 'video-multi-modal-embedding', 'video-multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/video_multi_modal_embedding_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.multi_modal.video_multi_modal_embedding_pipeline"}, "('PIPELINES', 'efficient-diffusion-tuning', 'efficient-diffusion-tuning')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/efficient_diffusion_tuning_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "torchvision", "typing"], "module": "modelscope.pipelines.multi_modal.efficient_diffusion_tuning_pipeline"}, "('PIPELINES', 'multi-modal-similarity', 'multi-modal-similarity')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/team_multi_modal_similarity_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.multi_modal.team_multi_modal_similarity_pipeline"}, "('PIPELINES', 'text-to-image-synthesis', 'diffusers-stable-diffusion')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "diffusers", "typing"], "module": "modelscope.pipelines.multi_modal.diffusers_wrapped.stable_diffusion.stable_diffusion_pipeline"}, "('PIPELINES', 'text-to-image-synthesis', 'chinese-stable-diffusion')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py", "imports": ["PIL", "transformers", "numpy", "cv2", "torch", "diffusers", "typing"], "module": "modelscope.pipelines.multi_modal.diffusers_wrapped.stable_diffusion.chinese_stable_diffusion_pipeline"}, "('PIPELINES', 'image-text-retrieval', 'multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/multi_modal_embedding_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.multi_modal.multi_modal_embedding_pipeline"}, "('PIPELINES', 'multi-modal-embedding', 'multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/multi_modal_embedding_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.multi_modal.multi_modal_embedding_pipeline"}, "('PIPELINES', 'ocr-recognition', 'ofa-ocr-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/ocr_recognition_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.ocr_recognition_pipeline"}, "('PIPELINES', 'document-vl-embedding', 'document-vl-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/document_vl_embedding_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.document_vl_embedding_pipeline"}, "('PIPELINES', 'image-text-retrieval', 'image-text-retrieval')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/image_text_retrieval_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.image_text_retrieval_pipeline"}, "('PIPELINES', 'visual-question-answering', 'gridvlp-multi-modal-classification')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/gridvlp_pipeline.py", "imports": ["PIL", "transformers", "numpy", "json", "torch", "time", "os", "traceback", "typing"], "module": "modelscope.pipelines.multi_modal.gridvlp_pipeline"}, "('PIPELINES', 'multi-modal-embedding', 'gridvlp-multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/gridvlp_pipeline.py", "imports": ["PIL", "transformers", "numpy", "json", "torch", "time", "os", "traceback", "typing"], "module": "modelscope.pipelines.multi_modal.gridvlp_pipeline"}, "('PIPELINES', 'visual-grounding', 'visual-grounding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/visual_grounding_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.visual_grounding_pipeline"}, "('PIPELINES', 'video-temporal-grounding', 'soonet-video-temporal-grounding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/soonet_video_temporal_grounding_pipeline.py", "imports": ["numpy", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.multi_modal.soonet_video_temporal_grounding_pipeline"}, "('PIPELINES', 'sudoku', 'ofa-sudoku')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/sudoku_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.sudoku_pipeline"}, "('PIPELINES', 'translation-evaluation', 'translation-evaluation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/translation_evaluation_pipeline.py", "imports": ["numpy", "enum", "torch", "os", "typing"], "module": "modelscope.pipelines.nlp.translation_evaluation_pipeline"}, "('PIPELINES', 'text-generation', 'glm130b-text-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/glm130b_text_generation_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.glm130b_text_generation_pipeline"}, "('PIPELINES', 'faq-question-answering', 'faq-question-answering')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/faq_question_answering_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.faq_question_answering_pipeline"}, "('PIPELINES', 'document-grounded-dialog-generate', 'document-grounded-dialog-generate')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/document_grounded_dialog_generate_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.document_grounded_dialog_generate_pipeline"}, "('PIPELINES', 'translation', 'automatic-post-editing')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/automatic_post_editing_pipeline.py", "imports": ["tensorflow", "sacremoses", "numpy", "jieba", "sentencepiece", "os", "typing", "html"], "module": "modelscope.pipelines.nlp.automatic_post_editing_pipeline"}, "('PIPELINES', 'named-entity-recognition', 'named-entity-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/named_entity_recognition_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.named_entity_recognition_pipeline"}, "('PIPELINES', 'named-entity-recognition', 'named-entity-recognition-thai')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/named_entity_recognition_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.named_entity_recognition_pipeline"}, "('PIPELINES', 'named-entity-recognition', 'named-entity-recognition-viet')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/named_entity_recognition_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.named_entity_recognition_pipeline"}, "('PIPELINES', 'translation', 'interactive-translation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/interactive_translation_pipeline.py", "imports": ["tensorflow", "sacremoses", "numpy", "jieba", "subword_nmt", "os", "typing"], "module": "modelscope.pipelines.nlp.interactive_translation_pipeline"}, "('PIPELINES', 'text-summarization', 'text-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/summarization_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.summarization_pipeline"}, "('PIPELINES', 'document-grounded-dialog-retrieval', 'document-grounded-dialog-retrieval')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/document_grounded_dialog_retrieval_pipeline.py", "imports": ["numpy", "json", "faiss", "os", "typing"], "module": "modelscope.pipelines.nlp.document_grounded_dialog_retrieval_pipeline"}, "('PIPELINES', 'text-classification', 'domain-classification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/fasttext_text_classification_pipeline.py", "imports": ["numpy", "fasttext", "sentencepiece", "os", "typing"], "module": "modelscope.pipelines.nlp.fasttext_text_classification_pipeline"}, "('PIPELINES', 'word-alignment', 'word-alignment')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/word_alignment_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.nlp.word_alignment_pipeline"}, "('PIPELINES', 'feature-extraction', 'feature-extraction')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/feature_extraction_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.nlp.feature_extraction_pipeline"}, "('PIPELINES', 'text-ranking', 'text-ranking')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_ranking_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.nlp.text_ranking_pipeline"}, "('PIPELINES', 'fid-dialogue', 'fid-dialogue')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/fid_dialogue_pipeline.py", "imports": ["torch", "re", "typing"], "module": "modelscope.pipelines.nlp.fid_dialogue_pipeline"}, "('PIPELINES', 'text-classification', 'sentiment-analysis')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'nli', 'nli')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'sentence-similarity', 'sentence-similarity')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'text-classification', 'text-classification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'text-classification', 'sentiment-classification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'text-classification', 'sentence-similarity')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'sentiment-classification', 'sentiment-classification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'code-generation', 'codegeex-code-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/codegeex_code_generation_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.codegeex_code_generation_pipeline"}, "('PIPELINES', 'sentence-similarity', 'translation-quality-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/translation_quality_estimation_pipeline.py", "imports": ["transformers", "torch", "io", "os", "typing"], "module": "modelscope.pipelines.nlp.translation_quality_estimation_pipeline"}, "('PIPELINES', 'fill-mask', 'fill-mask')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/fill_mask_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.nlp.fill_mask_pipeline"}, "('PIPELINES', 'fill-mask', 'fill-mask-ponet')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/fill_mask_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.nlp.fill_mask_pipeline"}, "('PIPELINES', 'text-generation', 'plug-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/distributed_plug_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.distributed_plug_pipeline"}, "('PIPELINES', 'table-question-answering', 'conversational-text-to-sql')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/conversational_text_to_sql_pipeline.py", "imports": ["torch", "typing", "text2sql_lgesql"], "module": "modelscope.pipelines.nlp.conversational_text_to_sql_pipeline"}, "('PIPELINES', 'text-generation', 'gpt3-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/distributed_gpt3_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.distributed_gpt3_pipeline"}, "('PIPELINES', 'information-extraction', 'relation-extraction')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/information_extraction_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.information_extraction_pipeline"}, "('PIPELINES', 'relation-extraction', 'relation-extraction')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/information_extraction_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.information_extraction_pipeline"}, "('PIPELINES', 'table-question-answering', 'table-question-answering-pipeline')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/table_question_answering_pipeline.py", "imports": ["transformers", "json", "torch", "os", "typing"], "module": "modelscope.pipelines.nlp.table_question_answering_pipeline"}, "('PIPELINES', 'text-classification', 'user-satisfaction-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/user_satisfaction_estimation_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.user_satisfaction_estimation_pipeline"}, "('PIPELINES', 'task-oriented-conversation', 'dialog-modeling')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/dialog_modeling_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.dialog_modeling_pipeline"}, "('PIPELINES', 'competency-aware-translation', 'canmt-translation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/canmt_translation_pipeline.py", "imports": ["torch", "os", "sacremoses", "typing"], "module": "modelscope.pipelines.nlp.canmt_translation_pipeline"}, "('PIPELINES', 'word-segmentation', 'word-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/word_segmentation_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.word_segmentation_pipeline"}, "('PIPELINES', 'word-segmentation', 'multilingual-word-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/word_segmentation_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.word_segmentation_pipeline"}, "('PIPELINES', 'word-segmentation', 'word-segmentation-thai')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/word_segmentation_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.word_segmentation_pipeline"}, "('PIPELINES', 'document-segmentation', 'document-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/document_segmentation_pipeline.py", "imports": ["datasets", "numpy", "torch", "re", "typing"], "module": "modelscope.pipelines.nlp.document_segmentation_pipeline"}, "('PIPELINES', 'text-generation', 'gpt-moe-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/distributed_gpt_moe_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.distributed_gpt_moe_pipeline"}, "('PIPELINES', 'extractive-summarization', 'extractive-summarization')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/extractive_summarization_pipeline.py", "imports": ["datasets", "numpy", "torch", "re", "typing"], "module": "modelscope.pipelines.nlp.extractive_summarization_pipeline"}, "('PIPELINES', 'text-error-correction', 'text-error-correction')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_error_correction_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.text_error_correction_pipeline"}, "('PIPELINES', 'task-oriented-conversation', 'dialog-state-tracking')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/dialog_state_tracking_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.dialog_state_tracking_pipeline"}, "('PIPELINES', 'text-summarization', 'mglm-text-summarization')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/mglm_text_summarization_pipeline.py", "imports": ["os", "typing"], "module": "modelscope.pipelines.nlp.mglm_text_summarization_pipeline"}, "('PIPELINES', 'translation', 'csanmt-translation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/translation_pipeline.py", "imports": ["tensorflow", "sacremoses", "numpy", "jieba", "subword_nmt", "os", "typing"], "module": "modelscope.pipelines.nlp.translation_pipeline"}, "('PIPELINES', 'siamese-uie', 'siamese-uie')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/siamese_uie_pipeline.py", "imports": ["json", "torch", "logging", "scipy", "tqdm", "math", "copy", "time", "pathlib", "os", "typing"], "module": "modelscope.pipelines.nlp.siamese_uie_pipeline"}, "('PIPELINES', 'task-oriented-conversation', 'dialog-intent-prediction')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/dialog_intent_prediction_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.dialog_intent_prediction_pipeline"}, "('PIPELINES', 'sentence-embedding', 'sentence-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/sentence_embedding_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.sentence_embedding_pipeline"}, "('PIPELINES', 'document-grounded-dialog-rerank', 'document-grounded-dialog-rerank')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/document_grounded_dialog_rerank_pipeline.py", "imports": ["ujson", "transformers", "random", "numpy", "torch", "re", "sys", "collections", "time", "os", "typing", "pprint"], "module": "modelscope.pipelines.nlp.document_grounded_dialog_rerank_pipeline"}, "('PIPELINES', 'zero-shot-classification', 'zero-shot-classification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/zero_shot_classification_pipeline.py", "imports": ["torch", "scipy", "typing"], "module": "modelscope.pipelines.nlp.zero_shot_classification_pipeline"}, "('PIPELINES', 'text-generation', 'text-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_generation_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.nlp.text_generation_pipeline"}, "('PIPELINES', 'text2text-generation', 'translation_en_to_de')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_generation_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.nlp.text_generation_pipeline"}, "('PIPELINES', 'text2text-generation', 'translation_en_to_ro')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_generation_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.nlp.text_generation_pipeline"}, "('PIPELINES', 'text2text-generation', 'translation_en_to_fr')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_generation_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.nlp.text_generation_pipeline"}, "('PIPELINES', 'text2text-generation', 'text2text-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_generation_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.nlp.text_generation_pipeline"}, "('PIPELINES', 'text-classification', 'language_identification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/language_identification_pipline.py", "imports": ["tensorflow", "numpy", "re", "os", "typing"], "module": "modelscope.pipelines.nlp.language_identification_pipline"}, "('PIPELINES', 'token-classification', 'token-classification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/token_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.token_classification_pipeline"}, "('PIPELINES', 'token-classification', 'part-of-speech')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/token_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.token_classification_pipeline"}, "('PIPELINES', 'token-classification', 'word-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/token_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.token_classification_pipeline"}, "('PIPELINES', 'token-classification', 'named-entity-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/token_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.token_classification_pipeline"}, "('PIPELINES', 'part-of-speech', 'part-of-speech')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/token_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.token_classification_pipeline"}, "('PIPELINES', 'code-translation', 'codegeex-code-translation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/codegeex_code_translation_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.codegeex_code_translation_pipeline"}, "('PIPELINES', 'bad-image-detecting', 'bad-image-detecting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/bad_image_detecting_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.bad_image_detecting_pipeline"}, "('PIPELINES', 'image-portrait-stylization', 'unet-person-image-cartoon')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_cartoon_pipeline.py", "imports": ["tensorflow", "numpy", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.image_cartoon_pipeline"}, "('PIPELINES', 'image-to-image-generation', 'image-to-image-generation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_to_image_generate_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.image_to_image_generate_pipeline"}, "('PIPELINES', 'facial-expression-recognition', 'vgg19-facial-expression-recognition-fer')": {"filepath": "TEMPLATE_PATH/pipelines/cv/facial_expression_recognition_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.facial_expression_recognition_pipeline"}, "('PIPELINES', 'face-detection', 'resnet50-face-detection-retinaface')": {"filepath": "TEMPLATE_PATH/pipelines/cv/retina_face_detection_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.retina_face_detection_pipeline"}, "('PIPELINES', 'image-style-transfer', 'AAMS-style-transfer')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_style_transfer_pipeline.py", "imports": ["cv2", "os", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_style_transfer_pipeline"}, "('PIPELINES', 'image-face-fusion', 'image-face-fusion')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_face_fusion_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.image_face_fusion_pipeline"}, "('PIPELINES', 'face-detection', 'manual-face-detection-ulfd')": {"filepath": "TEMPLATE_PATH/pipelines/cv/ulfd_face_detection_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.ulfd_face_detection_pipeline"}, "('PIPELINES', 'pedestrian-attribute-recognition', 'resnet50_pedestrian-attribute-recognition_image')": {"filepath": "TEMPLATE_PATH/pipelines/cv/pedestrian_attribute_recognition_pipeline.py", "imports": ["PIL", "numpy", "cv2", "json", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.pedestrian_attribute_recognition_pipeline"}, "('PIPELINES', 'image-denoising', 'nafnet-image-denoise')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_denoise_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_denoise_pipeline"}, "('PIPELINES', 'video-text-retrieval', 'vop-video-text-retrieval-se')": {"filepath": "TEMPLATE_PATH/pipelines/cv/vop_retrieval_se_pipeline.py", "imports": ["numpy", "torch", "gzip", "os", "typing"], "module": "modelscope.pipelines.cv.vop_retrieval_se_pipeline"}, "('PIPELINES', 'portrait-matting', 'unet-image-matting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_matting_pipeline.py", "imports": ["tensorflow", "numpy", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.image_matting_pipeline"}, "('PIPELINES', 'universal-matting', 'unet-universal-matting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_matting_pipeline.py", "imports": ["tensorflow", "numpy", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.image_matting_pipeline"}, "('PIPELINES', 'image-deblurring', 'nafnet-image-deblur')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_deblur_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_deblur_pipeline"}, "('PIPELINES', 'video-human-matting', 'video-human-matting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_human_matting_pipeline.py", "imports": ["numpy", "cv2", "torch", "moviepy", "os", "typing"], "module": "modelscope.pipelines.cv.video_human_matting_pipeline"}, "('PIPELINES', 'live-category', 'live-category')": {"filepath": "TEMPLATE_PATH/pipelines/cv/live_category_pipeline.py", "imports": ["PIL", "numpy", "torch", "decord", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.live_category_pipeline"}, "('PIPELINES', 'image-classification', 'image-structured-model-probing')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_structured_model_probing_pipeline.py", "imports": ["mmcv", "numpy", "torch", "math", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.image_structured_model_probing_pipeline"}, "('PIPELINES', 'face-quality-assessment', 'manual-face-quality-assessment-fqa')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_quality_assessment_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "module": "modelscope.pipelines.cv.face_quality_assessment_pipeline"}, "('PIPELINES', 'image-portrait-enhancement', 'gpen-image-portrait-enhancement')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_portrait_enhancement_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "scipy", "math", "typing"], "module": "modelscope.pipelines.cv.image_portrait_enhancement_pipeline"}, "('PIPELINES', 'image-color-enhancement', 'adaint-image-color-enhance')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_color_enhance_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_color_enhance_pipeline"}, "('PIPELINES', 'image-color-enhancement', 'deeplpf-image-color-enhance')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_color_enhance_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_color_enhance_pipeline"}, "('PIPELINES', 'image-color-enhancement', 'csrnet-image-color-enhance')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_color_enhance_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_color_enhance_pipeline"}, "('PIPELINES', 'vision-efficient-tuning', 'vision-efficient-tuning')": {"filepath": "TEMPLATE_PATH/pipelines/cv/vision_efficient_tuning_pipeline.py", "imports": ["torch", "torchvision", "typing", "numpy"], "module": "modelscope.pipelines.cv.vision_efficient_tuning_pipeline"}, "('PIPELINES', 'video-object-segmentation', 'video-object-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_object_segmentation_pipeline.py", "imports": ["PIL", "numpy", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.video_object_segmentation_pipeline"}, "('PIPELINES', 'face-detection', 'resnet-face-detection-scrfd10gkps')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_detection_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.face_detection_pipeline"}, "('PIPELINES', 'body-3d-keypoints', 'canonical_body-3d-keypoints_video')": {"filepath": "TEMPLATE_PATH/pipelines/cv/body_3d_keypoints_pipeline.py", "imports": ["numpy", "cv2", "torch", "tempfile", "matplotlib", "datetime", "mpl_toolkits", "os", "typing"], "module": "modelscope.pipelines.cv.body_3d_keypoints_pipeline"}, "('PIPELINES', 'image-paintbyexample', 'stablediffusion-paintbyexample')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_paintbyexample_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "einops", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_paintbyexample_pipeline"}, "('PIPELINES', 'face-recognition', 'ir-face-recognition-rts')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_recognition_ood_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.face_recognition_ood_pipeline"}, "('PIPELINES', 'image-classification', 'image-classification')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'vit-base_image-classification_ImageNet-labels')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'vit-base_image-classification_Dailylife-labels')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'nextvit-small_image-classification_Dailylife-labels')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'convnext-base_image-classification_garbage')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'common-image-classification')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'easyrobust-classification')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'bnext-small_image-classification_ImageNet-labels')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'card-detection', 'resnet-card-detection-scrfd34gkps')": {"filepath": "TEMPLATE_PATH/pipelines/cv/card_detection_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.card_detection_pipeline"}, "('PIPELINES', 'table-recognition', 'dla34-table-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/table_recognition_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "module": "modelscope.pipelines.cv.table_recognition_pipeline"}, "('PIPELINES', 'image-to-image-translation', 'image-to-image-translation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_to_image_translation_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "sys", "io", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.image_to_image_translation_pipeline"}, "('PIPELINES', 'face-attribute-recognition', 'resnet34-face-attribute-recognition-fairface')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_attribute_recognition_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.face_attribute_recognition_pipeline"}, "('PIPELINES', 'image-debanding', 'rrdb-image-debanding')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_debanding_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_debanding_pipeline"}, "('PIPELINES', 'video-instance-segmentation', 'video-instance-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_instance_segmentation_pipeline.py", "imports": ["mmcv", "numpy", "cv2", "torch", "tqdm", "os", "typing"], "module": "modelscope.pipelines.cv.video_instance_segmentation_pipeline"}, "('PIPELINES', 'image-classification', 'tinynas-classification')": {"filepath": "TEMPLATE_PATH/pipelines/cv/tinynas_classification_pipeline.py", "imports": ["torch", "math", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.tinynas_classification_pipeline"}, "('PIPELINES', 'human-reconstruction', 'human-reconstruction')": {"filepath": "TEMPLATE_PATH/pipelines/cv/human_reconstruction_pipeline.py", "imports": ["trimesh", "shutil", "numpy", "torch", "os", "typing"], "module": "modelscope.pipelines.cv.human_reconstruction_pipeline"}, "('PIPELINES', 'video-multi-object-tracking', 'video-multi-object-tracking')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_multi_object_tracking_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.cv.video_multi_object_tracking_pipeline"}, "('PIPELINES', 'controllable-image-generation', 'controllable-image-generation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/controllable_image_generation_pipeline.py", "imports": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "glob", "os", "typing"], "module": "modelscope.pipelines.cv.controllable_image_generation_pipeline"}, "('PIPELINES', 'image-fewshot-detection', 'image-fewshot-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_defrcn_fewshot_pipeline.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_defrcn_fewshot_pipeline"}, "('PIPELINES', 'semantic-segmentation', 'ddpm-image-semantic-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/ddpm_semantic_segmentation_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.ddpm_semantic_segmentation_pipeline"}, "('PIPELINES', 'image-classification', 'resnet50-image-classification-cc')": {"filepath": "TEMPLATE_PATH/pipelines/cv/content_check_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.content_check_pipeline"}, "('PIPELINES', 'video-text-retrieval', 'vop-video-text-retrieval')": {"filepath": "TEMPLATE_PATH/pipelines/cv/vop_retrieval_pipeline.py", "imports": ["random", "numpy", "torch", "tqdm", "math", "collections", "gzip", "os", "typing", "pickle"], "module": "modelscope.pipelines.cv.vop_retrieval_pipeline"}, "('PIPELINES', 'object-detection-3d', 'object-detection-3d-depe')": {"filepath": "TEMPLATE_PATH/pipelines/cv/object_detection_3d_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "tempfile", "os", "typing"], "module": "modelscope.pipelines.cv.object_detection_3d_pipeline"}, "('PIPELINES', 'lineless-table-recognition', 'lore-lineless-table-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/lineless_table_recognition_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "module": "modelscope.pipelines.cv.lineless_table_recognition_pipeline"}, "('PIPELINES', 'video-embedding', 'cmdssl-r2p1d_video_embedding')": {"filepath": "TEMPLATE_PATH/pipelines/cv/cmdssl_video_embedding_pipeline.py", "imports": ["PIL", "numpy", "torch", "decord", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.cmdssl_video_embedding_pipeline"}, "('PIPELINES', 'domain-specific-object-detection', 'tinynas-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/tinynas_detection_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.tinynas_detection_pipeline"}, "('PIPELINES', 'image-object-detection', 'tinynas-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/tinynas_detection_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.tinynas_detection_pipeline"}, "('PIPELINES', 'video-deinterlace', 'video-deinterlace')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_deinterlace_pipeline.py", "imports": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.video_deinterlace_pipeline"}, "('PIPELINES', 'open-vocabulary-detection', 'open-vocabulary-detection-vild')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_open_vocabulary_detection_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.image_open_vocabulary_detection_pipeline"}, "('PIPELINES', 'language-guided-video-summarization', 'clip-it-video-summarization')": {"filepath": "TEMPLATE_PATH/pipelines/cv/language_guided_video_summarization_pipeline.py", "imports": ["PIL", "shutil", "random", "numpy", "cv2", "torch", "tempfile", "clip", "os", "typing"], "module": "modelscope.pipelines.cv.language_guided_video_summarization_pipeline"}, "('PIPELINES', 'body-2d-keypoints', 'hrnetv2w32_body-2d-keypoints_image')": {"filepath": "TEMPLATE_PATH/pipelines/cv/body_2d_keypoints_pipeline.py", "imports": ["PIL", "numpy", "cv2", "json", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.body_2d_keypoints_pipeline"}, "('PIPELINES', 'face-human-hand-detection', 'face-human-hand-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_human_hand_detection_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.face_human_hand_detection_pipeline"}, "('PIPELINES', 'video-embedding', 'hicossl-s3dg-video_embedding')": {"filepath": "TEMPLATE_PATH/pipelines/cv/hicossl_video_embedding_pipeline.py", "imports": ["torch", "os", "typing", "math"], "module": "modelscope.pipelines.cv.hicossl_video_embedding_pipeline"}, "('PIPELINES', 'face-recognition', 'ir101-face-recognition-cfglint')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_recognition_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.face_recognition_pipeline"}, "('PIPELINES', 'image-body-reshaping', 'flow-based-body-reshaping')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_body_reshaping_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.image_body_reshaping_pipeline"}, "('PIPELINES', 'image-inpainting', 'fft-inpainting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_inpainting_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.image_inpainting_pipeline"}, "('PIPELINES', 'face-recognition', 'manual-face-recognition-frfm')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_recognition_onnx_fm_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "module": "modelscope.pipelines.cv.face_recognition_onnx_fm_pipeline"}, "('PIPELINES', 'image-driving-perception', 'yolopv2_image-driving-percetion_bdd100k')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_driving_perception_pipeline.py", "imports": ["cv2", "os", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_driving_perception_pipeline"}, "('PIPELINES', 'video-stabilization', 'video-stabilization')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_stabilization_pipeline.py", "imports": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "glob", "os", "typing"], "module": "modelscope.pipelines.cv.video_stabilization_pipeline"}, "('PIPELINES', 'indoor-layout-estimation', 'indoor-layout-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/indoor_layout_estimation_pipeline.py", "imports": ["cv2", "typing", "numpy"], "module": "modelscope.pipelines.cv.indoor_layout_estimation_pipeline"}, "('PIPELINES', 'image-colorization', 'ddcolor-image-colorization')": {"filepath": "TEMPLATE_PATH/pipelines/cv/ddcolor_image_colorization_pipeline.py", "imports": ["numpy", "cv2", "torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.ddcolor_image_colorization_pipeline"}, "('PIPELINES', 'face-emotion', 'face-emotion')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_emotion_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.face_emotion_pipeline"}, "('PIPELINES', 'face-detection', 'manual-face-detection-mtcnn')": {"filepath": "TEMPLATE_PATH/pipelines/cv/mtcnn_face_detection_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.cv.mtcnn_face_detection_pipeline"}, "('PIPELINES', 'nerf-recon-acc', 'nerf-recon-acc')": {"filepath": "TEMPLATE_PATH/pipelines/cv/nerf_recon_acc_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.nerf_recon_acc_pipeline"}, "('PIPELINES', 'image-depth-estimation', 'image-bts-depth-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_bts_depth_estimation_pipeline.py", "imports": ["albumentations", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.image_bts_depth_estimation_pipeline"}, "('PIPELINES', 'face-2d-keypoints', 'manual-facial-landmark-confidence-flcm')": {"filepath": "TEMPLATE_PATH/pipelines/cv/facial_landmark_confidence_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.facial_landmark_confidence_pipeline"}, "('PIPELINES', 'face-reconstruction', 'resnet50-face-reconstruction')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_reconstruction_pipeline.py", "imports": ["PIL", "tensorflow", "shutil", "numpy", "cv2", "torch", "scipy", "io", "face_alignment", "os", "typing"], "module": "modelscope.pipelines.cv.face_reconstruction_pipeline"}, "('PIPELINES', 'face-detection', 'resnet101-face-detection-cvpr22papermogface')": {"filepath": "TEMPLATE_PATH/pipelines/cv/mog_face_detection_pipeline.py", "imports": ["os", "typing", "numpy"], "module": "modelscope.pipelines.cv.mog_face_detection_pipeline"}, "('PIPELINES', 'skin-retouching', 'unet-skin-retouching')": {"filepath": "TEMPLATE_PATH/pipelines/cv/skin_retouching_pipeline.py", "imports": ["PIL", "tensorflow", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.skin_retouching_pipeline"}, "('PIPELINES', 'image-segmentation', 'vision-middleware-multi-task')": {"filepath": "TEMPLATE_PATH/pipelines/cv/vision_middleware_pipeline.py", "imports": ["mmcv", "numpy", "torch", "math", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.vision_middleware_pipeline"}, "('PIPELINES', 'face-liveness', 'manual-face-liveness-flir')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_liveness_ir_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "module": "modelscope.pipelines.cv.face_liveness_ir_pipeline"}, "('PIPELINES', 'human-detection', 'resnet18-human-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_detection_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.image_detection_pipeline"}, "('PIPELINES', 'image-object-detection', 'vit-object-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_detection_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.image_detection_pipeline"}, "('PIPELINES', 'image-object-detection', 'abnormal-object-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_detection_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.image_detection_pipeline"}, "('PIPELINES', 'video-object-detection', 'cspnet_realtime-video-object-detection_streamyolo')": {"filepath": "TEMPLATE_PATH/pipelines/cv/realtime_video_object_detection_pipeline.py", "imports": ["PIL", "numpy", "cv2", "json", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.realtime_video_object_detection_pipeline"}, "('PIPELINES', 'video-panoptic-segmentation', 'video-panoptic-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_panoptic_segmentation_pipeline.py", "imports": ["mmcv", "numpy", "cv2", "torch", "tqdm", "os", "typing"], "module": "modelscope.pipelines.cv.video_panoptic_segmentation_pipeline"}, "('PIPELINES', 'action-detection', 'ResNetC3D-action-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/action_detection_pipeline.py", "imports": ["os", "typing", "math"], "module": "modelscope.pipelines.cv.action_detection_pipeline"}, "('PIPELINES', 'product-segmentation', 'product-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/product_segmentation_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.product_segmentation_pipeline"}, "('PIPELINES', 'image-object-detection', 'tbs-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/tbs_detection_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "colorsys", "os", "typing"], "module": "modelscope.pipelines.cv.tbs_detection_pipeline"}, "('PIPELINES', 'image-matching', 'image-matching')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_matching_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.image_matching_pipeline"}, "('PIPELINES', 'video-category', 'video-category')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_category_pipeline.py", "imports": ["PIL", "numpy", "json", "torch", "decord", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.video_category_pipeline"}, "('PIPELINES', 'hand-static', 'hand-static')": {"filepath": "TEMPLATE_PATH/pipelines/cv/hand_static_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.hand_static_pipeline"}, "('PIPELINES', 'animal-recognition', 'resnet101-animal-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/animal_recognition_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.animal_recognition_pipeline"}, "('PIPELINES', 'pointcloud-sceneflow-estimation', 'pointcloud-sceneflow-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/pointcloud_sceneflow_estimation_pipeline.py", "imports": ["torch", "typing", "plyfile", "numpy"], "module": "modelscope.pipelines.cv.pointcloud_sceneflow_estimation_pipeline"}, "('PIPELINES', 'image-segmentation', 'cascade-mask-rcnn-swin-image-instance-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_instance_segmentation_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.image_instance_segmentation_pipeline"}, "('PIPELINES', 'video-frame-interpolation', 'video-frame-interpolation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_frame_interpolation_pipeline.py", "imports": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "glob", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.video_frame_interpolation_pipeline"}, "('PIPELINES', 'image-quality-assessment-mos', 'image-quality-assessment-mos')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_quality_assessment_mos_pipeline.py", "imports": ["numpy", "cv2", "torch", "tempfile", "math", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_quality_assessment_mos_pipeline"}, "('PIPELINES', 'video-summarization', 'googlenet_pgl_video_summarization')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_summarization_pipeline.py", "imports": ["numpy", "cv2", "torch", "tqdm", "os", "typing"], "module": "modelscope.pipelines.cv.video_summarization_pipeline"}, "('PIPELINES', 'panorama-depth-estimation', 'panorama-depth-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/panorama_depth_estimation_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.panorama_depth_estimation_pipeline"}, "('PIPELINES', 'image-segmentation', 'fast-instance-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/fast_instance_segmentation_pipeline.py", "imports": ["torch", "torchvision", "typing", "numpy"], "module": "modelscope.pipelines.cv.fast_instance_segmentation_pipeline"}, "('PIPELINES', 'image-object-detection', 'vidt')": {"filepath": "TEMPLATE_PATH/pipelines/cv/vidt_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.vidt_pipeline"}, "('PIPELINES', 'image-skychange', 'image-skychange')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_skychange_pipeline.py", "imports": ["PIL", "pdb", "numpy", "cv2", "time", "typing"], "module": "modelscope.pipelines.cv.image_skychange_pipeline"}, "('PIPELINES', 'image-quality-assessment-mos', 'image-quality-assessment-man')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_quality_assessment_man_pipeline.py", "imports": ["numpy", "cv2", "torch", "tempfile", "math", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_quality_assessment_man_pipeline"}, "('PIPELINES', 'image-demoireing', 'uhdm-image-demoireing')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_restoration_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.image_restoration_pipeline"}, "('PIPELINES', 'video-inpainting', 'video-inpainting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_inpainting_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.video_inpainting_pipeline"}, "('PIPELINES', 'face-image-generation', 'gan-face-image-generation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_image_generation_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.face_image_generation_pipeline"}, "('PIPELINES', 'video-super-resolution', 'realbasicvsr-video-super-resolution')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_super_resolution_pipeline.py", "imports": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.video_super_resolution_pipeline"}, "('PIPELINES', 'referring-video-object-segmentation', 'referring-video-object-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/referring_video_object_segmentation_pipeline.py", "imports": ["PIL", "numpy", "torch", "einops", "tqdm", "tempfile", "moviepy", "torchvision", "typing"], "module": "modelscope.pipelines.cv.referring_video_object_segmentation_pipeline"}, "('PIPELINES', 'virtual-try-on', 'virtual-try-on')": {"filepath": "TEMPLATE_PATH/pipelines/cv/virtual_try_on_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.virtual_try_on_pipeline"}, "('PIPELINES', 'ocr-recognition', 'convnextTiny-ocr-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/ocr_recognition_pipeline.py", "imports": [], "module": "modelscope.pipelines.cv.ocr_recognition_pipeline"}, "('PIPELINES', 'ocr-detection', 'resnet18-ocr-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/ocr_detection_pipeline.py", "imports": ["tensorflow", "tf_slim", "numpy", "cv2", "torch", "math", "os", "typing"], "module": "modelscope.pipelines.cv.ocr_detection_pipeline"}, "('PIPELINES', 'movie-scene-segmentation', 'resnet50-bert-movie-scene-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/movie_scene_segmentation_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.cv.movie_scene_segmentation_pipeline"}, "('PIPELINES', 'image-segmentation', 'maskdino-swin-image-instance-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/maskdino_instance_segmentation_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.maskdino_instance_segmentation_pipeline"}, "('PIPELINES', 'video-colorization', 'video-colorization')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_colorization_pipeline.py", "imports": ["PIL", "subprocess", "numpy", "cv2", "torch", "tempfile", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.video_colorization_pipeline"}, "('PIPELINES', 'image-segmentation', 'm2fp-image-human-parsing')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_human_parsing_pipeline.py", "imports": ["torch", "torchvision", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_human_parsing_pipeline"}, "('PIPELINES', 'face-liveness', 'manual-face-liveness-flxc')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_liveness_xc_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "module": "modelscope.pipelines.cv.face_liveness_xc_pipeline"}, "('PIPELINES', 'crowd-counting', 'hrnet-crowd-counting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/crowd_counting_pipeline.py", "imports": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "module": "modelscope.pipelines.cv.crowd_counting_pipeline"}, "('PIPELINES', 'video-depth-estimation', 'video-depth-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_depth_estimation_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.video_depth_estimation_pipeline"}, "('PIPELINES', 'image-colorization', 'unet-image-colorization')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_colorization_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_colorization_pipeline"}, "('PIPELINES', 'face-recognition', 'ir50-face-recognition-arcface')": {"filepath": "TEMPLATE_PATH/pipelines/cv/arc_face_recognition_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.arc_face_recognition_pipeline"}, "('PIPELINES', 'image-quality-assessment-degradation', 'image-quality-assessment-degradation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_quality_assessment_degradation_pipeline.py", "imports": ["numpy", "cv2", "torch", "tempfile", "math", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_quality_assessment_degradation_pipeline"}, "('PIPELINES', 'image-inpainting', 'image-inpainting-sdv2')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_inpainting_sdv2_pipeline.py", "imports": ["numpy", "cv2", "torch", "tempfile", "sys", "math", "diffusers", "os", "typing"], "module": "modelscope.pipelines.cv.image_inpainting_sdv2_pipeline"}, "('PIPELINES', 'image-super-resolution', 'rrdb-image-super-resolution')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_super_resolution_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.image_super_resolution_pipeline"}, "('PIPELINES', 'semantic-segmentation', 'u2net-salient-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_salient_detection_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.image_salient_detection_pipeline"}, "('PIPELINES', 'semantic-segmentation', 'res2net-salient-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_salient_detection_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.image_salient_detection_pipeline"}, "('PIPELINES', 'semantic-segmentation', 'res2net-camouflaged-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_salient_detection_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.image_salient_detection_pipeline"}, "('PIPELINES', 'video-single-object-tracking', 'procontext-vitb-video-single-object-tracking')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_single_object_tracking_pipeline.py", "imports": ["cv2", "os", "typing"], "module": "modelscope.pipelines.cv.video_single_object_tracking_pipeline"}, "('PIPELINES', 'video-single-object-tracking', 'ostrack-vitb-video-single-object-tracking')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_single_object_tracking_pipeline.py", "imports": ["cv2", "os", "typing"], "module": "modelscope.pipelines.cv.video_single_object_tracking_pipeline"}, "('PIPELINES', 'face-recognition', 'manual-face-recognition-frir')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_recognition_onnx_ir_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "module": "modelscope.pipelines.cv.face_recognition_onnx_ir_pipeline"}, "('PIPELINES', 'product-retrieval-embedding', 'resnet50-product-retrieval-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/cv/product_retrieval_embedding_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.product_retrieval_embedding_pipeline"}, "('PIPELINES', 'face-recognition', 'resnet-face-recognition-facemask')": {"filepath": "TEMPLATE_PATH/pipelines/cv/mask_face_recognition_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "collections", "os", "typing"], "module": "modelscope.pipelines.cv.mask_face_recognition_pipeline"}, "('PIPELINES', 'image-super-resolution', 'mobile-image-super-resolution')": {"filepath": "TEMPLATE_PATH/pipelines/cv/mobile_image_super_resolution_pipeline.py", "imports": ["skimage", "numpy", "torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.mobile_image_super_resolution_pipeline"}, "('PIPELINES', 'license-plate-detection', 'resnet18-license-plate-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/license_plate_detection_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "module": "modelscope.pipelines.cv.license_plate_detection_pipeline"}, "('PIPELINES', 'image-segmentation', 'image-semantic-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_semantic_segmentation_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.image_semantic_segmentation_pipeline"}, "('PIPELINES', 'text-driven-segmentation', 'text-driven-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/text_driven_segmentation_pipleline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.text_driven_segmentation_pipleline"}, "('PIPELINES', 'motion-generation', 'mdm-motion-generation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/motion_generation_pipeline.py", "imports": ["numpy", "torch", "tempfile", "os", "typing"], "module": "modelscope.pipelines.cv.motion_generation_pipeline"}, "('PIPELINES', 'image-multi-view-depth-estimation', 'image-multi-view-depth-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_mvs_depth_estimation_pipeline.py", "imports": ["os", "typing", "tempfile", "shutil"], "module": "modelscope.pipelines.cv.image_mvs_depth_estimation_pipeline"}, "('PIPELINES', 'image-depth-estimation', 'image-depth-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_depth_estimation_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.image_depth_estimation_pipeline"}, "('PIPELINES', 'action-recognition', 'TAdaConv_action-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/action_recognition_pipeline.py", "imports": ["torch", "os", "typing", "math"], "module": "modelscope.pipelines.cv.action_recognition_pipeline"}, "('PIPELINES', 'action-recognition', 'patchshift-action-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/action_recognition_pipeline.py", "imports": ["torch", "os", "typing", "math"], "module": "modelscope.pipelines.cv.action_recognition_pipeline"}, "('PIPELINES', 'image-reid-person', 'passvitb-image-reid-person')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_reid_person_pipeline.py", "imports": ["PIL", "torch", "math", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.image_reid_person_pipeline"}, "('PIPELINES', 'general-recognition', 'resnet101-general-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/general_recognition_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.general_recognition_pipeline"}, "('PIPELINES', 'shop-segmentation', 'shop-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/shop_segmentation_pipleline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.shop_segmentation_pipleline"}, "('PREPROCESSORS', 'audio', 'wav-to-lists')": {"filepath": "TEMPLATE_PATH/preprocessors/kws.py", "imports": ["os", "typing", "yaml"], "module": "modelscope.preprocessors.kws"}, "('PREPROCESSORS', 'multi-modal', 'diffusion-image-generation-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'ofa-tasks-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'clip-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'mplug-tasks-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'vldoc-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'hitea-tasks-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'mplug-owl-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'image-captioning-clip-interrogator-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'science', 'unifold-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/science/uni_fold.py", "imports": ["unittest", "hashlib", "ipdb", "random", "numpy", "torch", "json", "tarfile", "pathlib", "os", "typing", "requests", "logging", "re", "tqdm", "time", "gzip", "pickle"], "module": "modelscope.preprocessors.science.uni_fold"}, "('PREPROCESSORS', 'text-to-speech', 'kantts-data-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/tts.py", "imports": ["os", "kantts", "typing"], "module": "modelscope.preprocessors.tts"}, "('PREPROCESSORS', 'audio', 'wav-to-scp')": {"filepath": "TEMPLATE_PATH/preprocessors/asr.py", "imports": ["os", "typing"], "module": "modelscope.preprocessors.asr"}, "('PREPROCESSORS', 'default', 'Compose')": {"filepath": "TEMPLATE_PATH/preprocessors/common.py", "imports": ["numpy", "torch", "collections", "time", "typing"], "module": "modelscope.preprocessors.common"}, "('PREPROCESSORS', 'default', 'ToTensor')": {"filepath": "TEMPLATE_PATH/preprocessors/common.py", "imports": ["numpy", "torch", "collections", "time", "typing"], "module": "modelscope.preprocessors.common"}, "('PREPROCESSORS', 'default', 'Filter')": {"filepath": "TEMPLATE_PATH/preprocessors/common.py", "imports": ["numpy", "torch", "collections", "time", "typing"], "module": "modelscope.preprocessors.common"}, "('PREPROCESSORS', 'default', 'ToNumpy')": {"filepath": "TEMPLATE_PATH/preprocessors/common.py", "imports": ["numpy", "torch", "collections", "time", "typing"], "module": "modelscope.preprocessors.common"}, "('PREPROCESSORS', 'default', 'Rename')": {"filepath": "TEMPLATE_PATH/preprocessors/common.py", "imports": ["numpy", "torch", "collections", "time", "typing"], "module": "modelscope.preprocessors.common"}, "('PREPROCESSORS', 'default', 'Identity')": {"filepath": "TEMPLATE_PATH/preprocessors/common.py", "imports": ["numpy", "torch", "collections", "time", "typing"], "module": "modelscope.preprocessors.common"}, "('PREPROCESSORS', 'nlp', 'word-segment-text-to-label-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_preprocessor.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.token_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'ner-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_preprocessor.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.token_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'token-cls-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_preprocessor.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.token_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'sequence-labeling-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_preprocessor.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.token_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'siamese-uie-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/siamese_uie_preprocessor.py", "imports": ["typing", "transformers"], "module": "modelscope.preprocessors.nlp.siamese_uie_preprocessor"}, "('PREPROCESSORS', 'nlp', 're-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/relation_extraction_preprocessor.py", "imports": ["typing", "transformers"], "module": "modelscope.preprocessors.nlp.relation_extraction_preprocessor"}, "('PREPROCESSORS', 'nlp', 'viet-ner-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_viet_preprocessor.py", "imports": ["torch", "typing"], "module": "modelscope.preprocessors.nlp.token_classification_viet_preprocessor"}, "('PREPROCESSORS', 'nlp', 'translation-evaluation-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/translation_evaluation_preprocessor.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.translation_evaluation_preprocessor"}, "('PREPROCESSORS', 'nlp', 'nli-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_classification_preprocessor.py", "imports": ["typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'sen-sim-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_classification_preprocessor.py", "imports": ["typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'bert-seq-cls-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_classification_preprocessor.py", "imports": ["typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'sen-cls-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_classification_preprocessor.py", "imports": ["typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'document-grounded-dialog-retrieval')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/document_grounded_dialog_retrieval_preprocessor.py", "imports": ["torch", "os", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.document_grounded_dialog_retrieval_preprocessor"}, "('PREPROCESSORS', 'nlp', 'zero-shot-cls-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/zero_shot_classification_preprocessor.py", "imports": ["typing"], "module": "modelscope.preprocessors.nlp.zero_shot_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'canmt-translation')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/canmt_translation.py", "imports": ["sacremoses", "jieba", "torch", "subword_nmt", "os", "typing"], "module": "modelscope.preprocessors.nlp.canmt_translation"}, "('PREPROCESSORS', 'nlp', 'fill-mask')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/fill_mask_preprocessor.py", "imports": ["numpy", "torch", "abc", "re", "os", "typing"], "module": "modelscope.preprocessors.nlp.fill_mask_preprocessor"}, "('PREPROCESSORS', 'nlp', 'fill-mask-ponet')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/fill_mask_preprocessor.py", "imports": ["numpy", "torch", "abc", "re", "os", "typing"], "module": "modelscope.preprocessors.nlp.fill_mask_preprocessor"}, "('PREPROCESSORS', 'nlp', 'word-alignment')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/word_alignment_preprocessor.py", "imports": ["itertools", "numpy", "torch", "os", "typing"], "module": "modelscope.preprocessors.nlp.word_alignment_preprocessor"}, "('PREPROCESSORS', 'nlp', 'conversational-text-to-sql')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/space_T_en/conversational_text_to_sql_preprocessor.py", "imports": ["json", "torch", "text2sql_lgesql", "os", "typing"], "module": "modelscope.preprocessors.nlp.space_T_en.conversational_text_to_sql_preprocessor"}, "('PREPROCESSORS', 'nlp', 'document-grounded-dialog-generate')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/document_grounded_dialog_generate_preprocessor.py", "imports": ["torch", "os", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.document_grounded_dialog_generate_preprocessor"}, "('PREPROCESSORS', 'nlp', 'text-error-correction')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_error_correction.py", "imports": ["torch", "os", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.text_error_correction"}, "('PREPROCESSORS', 'nlp', 'text-ranking')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_ranking_preprocessor.py", "imports": ["typing", "transformers"], "module": "modelscope.preprocessors.nlp.text_ranking_preprocessor"}, "('PREPROCESSORS', 'nlp', 'Tokenize')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/bert_seq_cls_tokenizer.py", "imports": ["typing", "transformers"], "module": "modelscope.preprocessors.nlp.bert_seq_cls_tokenizer"}, "('PREPROCESSORS', 'nlp', 'document-segmentation')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/document_segmentation_preprocessor.py", "imports": ["typing"], "module": "modelscope.preprocessors.nlp.document_segmentation_preprocessor"}, "('PREPROCESSORS', 'nlp', 'sentence-embedding')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/sentence_embedding_preprocessor.py", "imports": ["typing"], "module": "modelscope.preprocessors.nlp.sentence_embedding_preprocessor"}, "('PREPROCESSORS', 'nlp', 'mglm-summarization')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/mglm_summarization_preprocessor.py", "imports": ["os", "re", "typing"], "module": "modelscope.preprocessors.nlp.mglm_summarization_preprocessor"}, "('PREPROCESSORS', 'nlp', 'thai-ner-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_thai_preprocessor.py", "imports": ["typing"], "module": "modelscope.preprocessors.nlp.token_classification_thai_preprocessor"}, "('PREPROCESSORS', 'nlp', 'thai-wseg-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_thai_preprocessor.py", "imports": ["typing"], "module": "modelscope.preprocessors.nlp.token_classification_thai_preprocessor"}, "('PREPROCESSORS', 'nlp', 'mgeo-ranking')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/mgeo_ranking_preprocessor.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.mgeo_ranking_preprocessor"}, "('PREPROCESSORS', 'nlp', 'dialog-intent-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/space/dialog_intent_prediction_preprocessor.py", "imports": ["json", "os", "typing"], "module": "modelscope.preprocessors.nlp.space.dialog_intent_prediction_preprocessor"}, "('PREPROCESSORS', 'nlp', 'dialog-state-tracking-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/space/dialog_state_tracking_preprocessor.py", "imports": ["typing"], "module": "modelscope.preprocessors.nlp.space.dialog_state_tracking_preprocessor"}, "('PREPROCESSORS', 'nlp', 'dialog-modeling-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/space/dialog_modeling_preprocessor.py", "imports": ["os", "typing"], "module": "modelscope.preprocessors.nlp.space.dialog_modeling_preprocessor"}, "('PREPROCESSORS', 'nlp', 'dialog-use-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/dialog_classification_use_preprocessor.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.dialog_classification_use_preprocessor"}, "('PREPROCESSORS', 'nlp', 'text-gen-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_generation_preprocessor.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_generation_preprocessor"}, "('PREPROCESSORS', 'nlp', 'text-gen-jieba-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_generation_preprocessor.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_generation_preprocessor"}, "('PREPROCESSORS', 'nlp', 'sentence-piece')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_generation_preprocessor.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_generation_preprocessor"}, "('PREPROCESSORS', 'nlp', 'text2text-gen-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_generation_preprocessor.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_generation_preprocessor"}, "('PREPROCESSORS', 'nlp', 'table-question-answering-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/space_T_cn/table_question_answering_preprocessor.py", "imports": ["torch", "os", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.space_T_cn.table_question_answering_preprocessor"}, "('PREPROCESSORS', 'nlp', 'document-grounded-dialog-rerank')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/document_grounded_dialog_rerank_preprocessor.py", "imports": ["transformers", "torch", "copy", "os", "typing"], "module": "modelscope.preprocessors.nlp.document_grounded_dialog_rerank_preprocessor"}, "('PREPROCESSORS', 'nlp', 'feature-extraction')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/feature_extraction_preprocessor.py", "imports": ["typing", "numpy"], "module": "modelscope.preprocessors.nlp.feature_extraction_preprocessor"}, "('PREPROCESSORS', 'nlp', 'faq-question-answering-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/faq_question_answering_preprocessor.py", "imports": ["torch", "typing"], "module": "modelscope.preprocessors.nlp.faq_question_answering_preprocessor"}, "('PREPROCESSORS', 'audio', 'LinearAECAndFbank')": {"filepath": "TEMPLATE_PATH/preprocessors/audio.py", "imports": ["numpy", "torch", "scipy", "io", "os", "typing"], "module": "modelscope.preprocessors.audio"}, "('PREPROCESSORS', 'cv', 'RandomCrop')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'RandomResizedCrop')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'Resize')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'CenterCrop')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'RandomHorizontalFlip')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'Normalize')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'ImageToTensor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'image-classification-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'bad-image-detecting-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/bad_image_detecting_preprocessor.py", "imports": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "module": "modelscope.preprocessors.cv.bad_image_detecting_preprocessor"}, "('PREPROCESSORS', 'cv', 'image-classification-mmcv-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/mmcls_preprocessor.py", "imports": ["os", "typing", "numpy"], "module": "modelscope.preprocessors.cv.mmcls_preprocessor"}, "('PREPROCESSORS', 'cv', 'controllable-image-generation-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/controllable_image_generation.py", "imports": ["PIL", "numpy", "cv2", "torch", "math", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.controllable_image_generation"}, "('PREPROCESSORS', 'cv', 'image-quality_assessment-mos-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_quality_assessment_mos.py", "imports": ["numpy", "cv2", "math", "torchvision", "typing"], "module": "modelscope.preprocessors.cv.image_quality_assessment_mos"}, "('PREPROCESSORS', 'cv', 'image-demoire-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_restoration_preprocessor.py", "imports": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "module": "modelscope.preprocessors.cv.image_restoration_preprocessor"}, "('PREPROCESSORS', 'cv', 'image-quality_assessment-man-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_quality_assessment_man.py", "imports": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "module": "modelscope.preprocessors.cv.image_quality_assessment_man"}, "('PREPROCESSORS', 'cv', 'movie-scene-segmentation-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/video.py", "imports": ["urllib", "numpy", "random", "torch", "decord", "tempfile", "math", "torchvision", "os", "uuid"], "module": "modelscope.preprocessors.video"}, "('PREPROCESSORS', 'cv', 'load-image')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'object-detection-tinynas-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'image-color-enhance-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'image-denoise-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'image-deblur-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'image-portrait-enhancement-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'image-instance-segmentation-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'video-summarization-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'image-classification-bypass-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PARALLEL', 'default', 'DistributedDataParallel')": {"filepath": "TEMPLATE_PATH/trainers/parallel/builder.py", "imports": ["torch"], "module": "modelscope.trainers.parallel.builder"}, "('OPTIMIZERS', 'default', 'ChildTuningAdamW')": {"filepath": "TEMPLATE_PATH/trainers/optimizer/child_tuning_adamw_optimizer.py", "imports": ["numpy", "torch", "types", "math", "typing"], "module": "modelscope.trainers.optimizer.child_tuning_adamw_optimizer"}, "('LR_SCHEDULER', 'default', 'ConstantWarmup')": {"filepath": "TEMPLATE_PATH/trainers/lrscheduler/warmup/warmup.py", "imports": [], "module": "modelscope.trainers.lrscheduler.warmup.warmup"}, "('LR_SCHEDULER', 'default', 'LinearWarmup')": {"filepath": "TEMPLATE_PATH/trainers/lrscheduler/warmup/warmup.py", "imports": [], "module": "modelscope.trainers.lrscheduler.warmup.warmup"}, "('LR_SCHEDULER', 'default', 'ExponentialWarmup')": {"filepath": "TEMPLATE_PATH/trainers/lrscheduler/warmup/warmup.py", "imports": [], "module": "modelscope.trainers.lrscheduler.warmup.warmup"}, "('TRAINERS', 'default', 'nlp-base-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp_trainer.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.trainers.nlp_trainer"}, "('TRAINERS', 'default', 'nlp-veco-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp_trainer.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.trainers.nlp_trainer"}, "('TRAINERS', 'default', 'speech_kws_fsmn_char_ctc_nearfield')": {"filepath": "TEMPLATE_PATH/trainers/audio/kws_nearfield_trainer.py", "imports": ["torch", "re", "tensorboardX", "copy", "datetime", "yaml", "os", "typing"], "module": "modelscope.trainers.audio.kws_nearfield_trainer"}, "('TRAINERS', 'default', 'speech_dfsmn_kws_char_farfield')": {"filepath": "TEMPLATE_PATH/trainers/audio/kws_farfield_trainer.py", "imports": ["numpy", "torch", "math", "datetime", "glob", "os", "typing", "pickle"], "module": "modelscope.trainers.audio.kws_farfield_trainer"}, "('TRAINERS', 'default', 'speech-separation')": {"filepath": "TEMPLATE_PATH/trainers/audio/separation_trainer.py", "imports": ["numpy", "torch", "torchaudio", "tqdm", "csv", "os", "speechbrain", "typing"], "module": "modelscope.trainers.audio.separation_trainer"}, "('TRAINERS', 'default', 'speech-asr-trainer')": {"filepath": "TEMPLATE_PATH/trainers/audio/asr_trainer.py", "imports": ["shutil", "json", "typing", "tempfile", "os", "funasr"], "module": "modelscope.trainers.audio.asr_trainer"}, "('TRAINERS', 'default', 'speech-kantts-trainer')": {"filepath": "TEMPLATE_PATH/trainers/audio/tts_trainer.py", "imports": ["shutil", "json", "tempfile", "os", "typing", "zipfile"], "module": "modelscope.trainers.audio.tts_trainer"}, "('TRAINERS', 'default', 'speech_frcrn_ans_cirm_16k')": {"filepath": "TEMPLATE_PATH/trainers/audio/ans_trainer.py", "imports": [], "module": "modelscope.trainers.audio.ans_trainer"}, "('HOOKS', 'default', 'CheckpointHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/checkpoint/checkpoint_hook.py", "imports": ["random", "numpy", "torch", "time", "os", "typing"], "module": "modelscope.trainers.hooks.checkpoint.checkpoint_hook"}, "('HOOKS', 'default', 'BestCkptSaverHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/checkpoint/checkpoint_hook.py", "imports": ["random", "numpy", "torch", "time", "os", "typing"], "module": "modelscope.trainers.hooks.checkpoint.checkpoint_hook"}, "('HOOKS', 'default', 'LoadCheckpointHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/checkpoint/load_checkpoint_hook.py", "imports": ["random", "numpy", "torch", "packaging", "typing"], "module": "modelscope.trainers.hooks.checkpoint.load_checkpoint_hook"}, "('HOOKS', 'default', 'TextLoggerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/logger/text_logger_hook.py", "imports": ["json", "torch", "collections", "datetime", "os"], "module": "modelscope.trainers.hooks.logger.text_logger_hook"}, "('HOOKS', 'default', 'TensorboardHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/logger/tensorboard_hook.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.trainers.hooks.logger.tensorboard_hook"}, "('HOOKS', 'default', 'ApexAMPOptimizerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/optimizer/apex_optimizer_hook.py", "imports": ["torch", "logging", "packaging"], "module": "modelscope.trainers.hooks.optimizer.apex_optimizer_hook"}, "('HOOKS', 'default', 'TorchAMPOptimizerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/optimizer/torch_optimizer_hook.py", "imports": ["logging"], "module": "modelscope.trainers.hooks.optimizer.torch_optimizer_hook"}, "('HOOKS', 'default', 'OptimizerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/optimizer/base.py", "imports": ["torch", "logging"], "module": "modelscope.trainers.hooks.optimizer.base"}, "('HOOKS', 'default', 'NoneOptimizerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/optimizer/base.py", "imports": ["torch", "logging"], "module": "modelscope.trainers.hooks.optimizer.base"}, "('HOOKS', 'default', 'MegatronHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/distributed/megatron_hook.py", "imports": ["torch", "os", "shutil", "megatron_util"], "module": "modelscope.trainers.hooks.distributed.megatron_hook"}, "('HOOKS', 'default', 'DeepspeedHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/distributed/deepspeed_hook.py", "imports": ["shutil", "torch", "megatron_util", "deepspeed", "os"], "module": "modelscope.trainers.hooks.distributed.deepspeed_hook"}, "('HOOKS', 'default', 'DDPHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/distributed/ddp_hook.py", "imports": [], "module": "modelscope.trainers.hooks.distributed.ddp_hook"}, "('HOOKS', 'default', 'LrSchedulerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/lr_scheduler_hook.py", "imports": [], "module": "modelscope.trainers.hooks.lr_scheduler_hook"}, "('HOOKS', 'default', 'PlateauLrSchedulerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/lr_scheduler_hook.py", "imports": [], "module": "modelscope.trainers.hooks.lr_scheduler_hook"}, "('HOOKS', 'default', 'NoneLrSchedulerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/lr_scheduler_hook.py", "imports": [], "module": "modelscope.trainers.hooks.lr_scheduler_hook"}, "('HOOKS', 'default', 'EarlyStopHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/early_stop_hook.py", "imports": ["numpy"], "module": "modelscope.trainers.hooks.early_stop_hook"}, "('HOOKS', 'default', 'ClipClampLogitScaleHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/clip_clamp_logit_scale_hook.py", "imports": ["torch"], "module": "modelscope.trainers.hooks.clip_clamp_logit_scale_hook"}, "('HOOKS', 'default', 'SparsityHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/compression/sparsity_hook.py", "imports": ["os"], "module": "modelscope.trainers.hooks.compression.sparsity_hook"}, "('HOOKS', 'default', 'IterTimerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/iter_timer_hook.py", "imports": ["time"], "module": "modelscope.trainers.hooks.iter_timer_hook"}, "('HOOKS', 'default', 'EvaluationHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/evaluation_hook.py", "imports": ["typing", "collections"], "module": "modelscope.trainers.hooks.evaluation_hook"}, "('TRAINERS', 'default', 'clip-multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/trainers/multi_modal/clip/clip_trainer.py", "imports": ["torch", "os", "typing", "math"], "module": "modelscope.trainers.multi_modal.clip.clip_trainer"}, "('TRAINERS', 'default', 'efficient-diffusion-tuning')": {"filepath": "TEMPLATE_PATH/trainers/multi_modal/efficient_diffusion_tuning/efficient_diffusion_tuning_trainer.py", "imports": ["torch", "typing"], "module": "modelscope.trainers.multi_modal.efficient_diffusion_tuning.efficient_diffusion_tuning_trainer"}, "('TRAINERS', 'default', 'mplug')": {"filepath": "TEMPLATE_PATH/trainers/multi_modal/mplug/mplug_trainer.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.trainers.multi_modal.mplug.mplug_trainer"}, "('TRAINERS', 'default', 'image-classification-team')": {"filepath": "TEMPLATE_PATH/trainers/multi_modal/team/team_trainer.py", "imports": ["numpy", "torch", "collections", "sklearn", "os", "typing"], "module": "modelscope.trainers.multi_modal.team.team_trainer"}, "('TRAINERS', 'default', 'mgeo-ranking-trainer')": {"filepath": "TEMPLATE_PATH/trainers/multi_modal/mgeo_ranking_trainer.py", "imports": ["torch", "dataclasses", "typing"], "module": "modelscope.trainers.multi_modal.mgeo_ranking_trainer"}, "('TRAINERS', 'default', 'ofa')": {"filepath": "TEMPLATE_PATH/trainers/multi_modal/ofa/ofa_trainer.py", "imports": ["shutil", "json", "torch", "functools", "tempfile", "math", "os", "typing"], "module": "modelscope.trainers.multi_modal.ofa.ofa_trainer"}, "('TRAINERS', 'default', 'nlp-gpt-moe-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/gpt_moe_trainer.py", "imports": ["torch", "collections", "megatron_util", "os", "typing"], "module": "modelscope.trainers.nlp.gpt_moe_trainer"}, "('TRAINERS', 'default', 'nlp-plug-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/plug_trainer.py", "imports": ["torch", "megatron_util", "deepspeed", "os", "typing"], "module": "modelscope.trainers.nlp.plug_trainer"}, "('TRAINERS', 'default', 'text-generation-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/text_generation_trainer.py", "imports": ["torch", "collections"], "module": "modelscope.trainers.nlp.text_generation_trainer"}, "('TRAINERS', 'default', 'document-grounded-dialog-rerank-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/document_grounded_dialog_rerank_trainer.py", "imports": ["transformers", "numpy", "random", "torch", "time", "os", "typing"], "module": "modelscope.trainers.nlp.document_grounded_dialog_rerank_trainer"}, "('TRAINERS', 'default', 'csanmt-translation')": {"filepath": "TEMPLATE_PATH/trainers/nlp/csanmt_translation_trainer.py", "imports": ["os", "tensorflow", "typing", "time"], "module": "modelscope.trainers.nlp.csanmt_translation_trainer"}, "('TRAINERS', 'default', 'translation-evaluation-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/translation_evaluation_trainer.py", "imports": ["transformers", "random", "torch", "tqdm", "math", "pandas", "os", "typing"], "module": "modelscope.trainers.nlp.translation_evaluation_trainer"}, "('TRAINERS', 'default', 'faq-question-answering-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/faq_question_answering_trainer.py", "imports": ["distutils", "contextlib", "numpy", "torch", "functools", "collections", "dataclasses", "typing"], "module": "modelscope.trainers.nlp.faq_question_answering_trainer"}, "('TRAINERS', 'default', 'table-question-answering-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/table_question_answering_trainer.py", "imports": ["numpy", "json", "torch", "tqdm", "time", "os", "typing"], "module": "modelscope.trainers.nlp.table_question_answering_trainer"}, "('TRAINERS', 'default', 'bert-sentiment-analysis')": {"filepath": "TEMPLATE_PATH/trainers/nlp/sequence_classification_trainer.py", "imports": ["time", "typing", "numpy"], "module": "modelscope.trainers.nlp.sequence_classification_trainer"}, "('TRAINERS', 'default', 'nlp-sentence-embedding-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/sentence_embedding_trainer.py", "imports": ["transformers", "numpy", "torch", "tqdm", "time", "dataclasses", "typing"], "module": "modelscope.trainers.nlp.sentence_embedding_trainer"}, "('TRAINERS', 'default', 'nlp-gpt3-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/gpt3_trainer.py", "imports": ["torch", "os", "copy", "typing"], "module": "modelscope.trainers.nlp.gpt3_trainer"}, "('TRAINERS', 'default', 'nlp-text-ranking-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/text_ranking_trainer.py", "imports": ["numpy", "torch", "tqdm", "time", "dataclasses", "typing"], "module": "modelscope.trainers.nlp.text_ranking_trainer"}, "('TRAINERS', 'default', 'siamese-uie-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/siamese_uie_trainer.py", "imports": ["random", "numpy", "json", "torch", "collections", "math", "time", "os", "typing"], "module": "modelscope.trainers.nlp.siamese_uie_trainer"}, "('TRAINERS', 'default', 'dialog-intent-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/space/dialog_intent_trainer.py", "imports": ["os", "typing", "numpy"], "module": "modelscope.trainers.nlp.space.dialog_intent_trainer"}, "('TRAINERS', 'default', 'dialog-modeling-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/space/dialog_modeling_trainer.py", "imports": ["os", "time", "typing", "numpy"], "module": "modelscope.trainers.nlp.space.dialog_modeling_trainer"}, "('TRAINERS', 'default', 'document-grounded-dialog-retrieval-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/document_grounded_dialog_retrieval_trainer.py", "imports": ["transformers", "numpy", "json", "torch", "tqdm", "faiss", "os"], "module": "modelscope.trainers.nlp.document_grounded_dialog_retrieval_trainer"}, "('TRAINERS', 'default', 'document-grounded-dialog-generate-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/document_grounded_dialog_generate_trainer.py", "imports": ["string", "transformers", "json", "torch", "rouge", "re", "tqdm", "collections", "os", "sacrebleu"], "module": "modelscope.trainers.nlp.document_grounded_dialog_generate_trainer"}, "('TRAINERS', 'default', 'ocr-recognition')": {"filepath": "TEMPLATE_PATH/trainers/cv/ocr_recognition_trainer.py", "imports": ["torch", "time", "collections"], "module": "modelscope.trainers.cv.ocr_recognition_trainer"}, "('TRAINERS', 'default', 'image-instance-segmentation')": {"filepath": "TEMPLATE_PATH/trainers/cv/image_instance_segmentation_trainer.py", "imports": [], "module": "modelscope.trainers.cv.image_instance_segmentation_trainer"}, "('TRAINERS', 'default', 'referring-video-object-segmentation')": {"filepath": "TEMPLATE_PATH/trainers/cv/referring_video_object_segmentation_trainer.py", "imports": ["torch", "os"], "module": "modelscope.trainers.cv.referring_video_object_segmentation_trainer"}, "('TRAINERS', 'default', 'vision-efficient-tuning')": {"filepath": "TEMPLATE_PATH/trainers/cv/vision_efficient_tuning_trainer.py", "imports": ["torch", "typing"], "module": "modelscope.trainers.cv.vision_efficient_tuning_trainer"}, "('TRAINERS', 'default', 'movie-scene-segmentation')": {"filepath": "TEMPLATE_PATH/trainers/cv/movie_scene_segmentation_trainer.py", "imports": [], "module": "modelscope.trainers.cv.movie_scene_segmentation_trainer"}, "('TRAINERS', 'default', 'nerf-recon-acc')": {"filepath": "TEMPLATE_PATH/trainers/cv/nerf_recon_acc_trainer.py", "imports": ["random", "numpy", "cv2", "torch", "tqdm", "time", "datetime", "glob", "os", "typing"], "module": "modelscope.trainers.cv.nerf_recon_acc_trainer"}, "('TRAINERS', 'default', 'tinynas-damoyolo')": {"filepath": "TEMPLATE_PATH/trainers/cv/image_detection_damoyolo_trainer.py", "imports": ["torch", "math", "datetime", "time", "os", "easydict", "typing"], "module": "modelscope.trainers.cv.image_detection_damoyolo_trainer"}, "('TRAINERS', 'default', 'image-classification')": {"filepath": "TEMPLATE_PATH/trainers/cv/image_classifition_trainer.py", "imports": ["numpy", "torch", "copy", "time", "os", "typing"], "module": "modelscope.trainers.cv.image_classifition_trainer"}, "('TRAINERS', 'default', 'cartoon-translation')": {"filepath": "TEMPLATE_PATH/trainers/cv/cartoon_translation_trainer.py", "imports": ["tensorflow", "numpy", "tqdm", "packaging", "os", "typing"], "module": "modelscope.trainers.cv.cartoon_translation_trainer"}, "('TRAINERS', 'default', 'ocr-detection-db')": {"filepath": "TEMPLATE_PATH/trainers/cv/ocr_detection_db_trainer.py", "imports": ["numpy", "torch", "tqdm", "math", "copy", "datetime", "time", "os", "easydict", "typing"], "module": "modelscope.trainers.cv.ocr_detection_db_trainer"}, "('TRAINERS', 'default', 'card-detection-scrfd')": {"filepath": "TEMPLATE_PATH/trainers/cv/card_detection_scrfd_trainer.py", "imports": [], "module": "modelscope.trainers.cv.card_detection_scrfd_trainer"}, "('TRAINERS', 'default', 'face-detection-scrfd')": {"filepath": "TEMPLATE_PATH/trainers/cv/face_detection_scrfd_trainer.py", "imports": ["copy", "time", "typing", "os"], "module": "modelscope.trainers.cv.face_detection_scrfd_trainer"}, "('TRAINERS', 'default', 'image-inpainting')": {"filepath": "TEMPLATE_PATH/trainers/cv/image_inpainting_trainer.py", "imports": ["torch", "time", "collections"], "module": "modelscope.trainers.cv.image_inpainting_trainer"}, "('TRAINERS', 'default', 'image-portrait-enhancement')": {"filepath": "TEMPLATE_PATH/trainers/cv/image_portrait_enhancement_trainer.py", "imports": ["torch", "collections"], "module": "modelscope.trainers.cv.image_portrait_enhancement_trainer"}, "('TRAINERS', 'default', 'action-detection')": {"filepath": "TEMPLATE_PATH/trainers/cv/action_detection_trainer.py", "imports": ["torch", "fvcore", "os", "typing", "detectron2"], "module": "modelscope.trainers.cv.action_detection_trainer"}, "('TRAINERS', 'default', 'image-fewshot-detection')": {"filepath": "TEMPLATE_PATH/trainers/cv/image_defrcn_fewshot_detection_trainer.py", "imports": ["torch", "collections", "os", "typing", "detectron2"], "module": "modelscope.trainers.cv.image_defrcn_fewshot_detection_trainer"}, "('TRAINERS', 'default', 'trainer')": {"filepath": "TEMPLATE_PATH/trainers/trainer.py", "imports": ["distutils", "json", "torch", "functools", "collections", "copy", "inspect", "os", "typing"], "module": "modelscope.trainers.trainer"}, "('TRAINERS', 'default', 'dummy')": {"filepath": "TEMPLATE_PATH/trainers/base.py", "imports": ["os", "abc", "typing", "time"], "module": "modelscope.trainers.base"}, "('CUSTOM_DATASETS', 'image-quality-assessment-degradation', 'image-quality-assessment-degradation')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_quality_assessment_degradation/image_quality_assessment_degradation_dataset.py", "imports": ["torchvision"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.image_quality_assessment_degradation.image_quality_assessment_degradation_dataset"}, "('CUSTOM_DATASETS', 'image-portrait-enhancement', 'PairedDataset')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_portrait_enhancement/image_portrait_enhancement_dataset.py", "imports": ["cv2", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.image_portrait_enhancement.image_portrait_enhancement_dataset"}, "('CUSTOM_DATASETS', 'nli', 'veco')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/veco_dataset.py", "imports": ["datasets", "typing", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.veco_dataset"}, "('CUSTOM_DATASETS', 'image-segmentation', 'cascade_mask_rcnn_swin')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_instance_segmentation_coco_dataset.py", "imports": ["os", "numpy", "pycocotools"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.image_instance_segmentation_coco_dataset"}, "('CUSTOM_DATASETS', 'ocr-recognition', 'OCRRecognition')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py", "imports": ["PIL", "numpy", "cv2", "json", "torch", "six", "lmdb", "os"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_recognition_dataset"}, "('CUSTOM_DATASETS', 'bad-image-detecting', 'bad-image-detecting')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/bad_image_detecting/bad_image_detecting_dataset.py", "imports": [], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.bad_image_detecting.bad_image_detecting_dataset"}, "('CUSTOM_DATASETS', 'image-inpainting', 'FFTInpainting')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_inpainting/image_inpainting_dataset.py", "imports": ["albumentations", "numpy", "enum", "cv2", "os", "glob"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.image_inpainting.image_inpainting_dataset"}, "('CUSTOM_DATASETS', 'language-guided-video-summarization', 'clip-it-language-guided-video-summarization')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/language_guided_video_summarization_dataset.py", "imports": ["numpy", "json", "torch", "h5py", "os"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.language_guided_video_summarization_dataset"}, "('CUSTOM_DATASETS', 'movie-scene-segmentation', 'resnet50-bert')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py", "imports": ["random", "json", "torch", "copy", "torchvision", "os"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.movie_scene_segmentation.movie_scene_segmentation_dataset"}, "('CUSTOM_DATASETS', 'text-ranking', 'bert')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/text_ranking_dataset.py", "imports": ["torch", "typing", "random"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.text_ranking_dataset"}, "('CUSTOM_DATASETS', 'sentence-embedding', 'bert')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/text_ranking_dataset.py", "imports": ["torch", "typing", "random"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.text_ranking_dataset"}, "('CUSTOM_DATASETS', 'image-denoising', 'SiddDataset')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/sidd_image_denoising_dataset.py", "imports": ["cv2", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.sidd_image_denoising.sidd_image_denoising_dataset"}, "('CUSTOM_DATASETS', 'image-deblurring', 'RedsDataset')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/reds_image_deblurring_dataset.py", "imports": ["cv2", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.reds_image_deblurring_dataset"}, "('CUSTOM_DATASETS', 'video-frame-interpolation', 'video-frame-interpolation')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_frame_interpolation/video_frame_interpolation_dataset.py", "imports": ["cv2", "torch", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.video_frame_interpolation.video_frame_interpolation_dataset"}, "('CUSTOM_DATASETS', 'image-quality-assessment-mos', 'image-quality-assessment-mos')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_quality_assmessment_mos/image_quality_assessment_mos_dataset.py", "imports": [], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.image_quality_assmessment_mos.image_quality_assessment_mos_dataset"}, "('CUSTOM_DATASETS', 'text-ranking', 'mgeo')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/mgeo_ranking_dataset.py", "imports": ["json", "torch", "typing", "random"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.mgeo_ranking_dataset"}, "('CUSTOM_DATASETS', 'video-stabilization', 'video-stabilization')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_stabilization/video_stabilization_dataset.py", "imports": [], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.video_stabilization.video_stabilization_dataset"}, "('CUSTOM_DATASETS', 'image-deblurring', 'GoproDataset')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/gopro_image_deblurring_dataset.py", "imports": ["cv2", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.gopro_image_deblurring_dataset"}, "('CUSTOM_DATASETS', 'referring-video-object-segmentation', 'swinT-referring-video-object-segmentation')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py", "imports": ["numpy", "pycocotools", "json", "torch", "tqdm", "h5py", "glob", "torchvision", "pandas", "os"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.referring_video_object_segmentation.referring_video_object_segmentation_dataset"}, "('CUSTOM_DATASETS', 'image-colorization', 'ddcolor')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_colorization/image_colorization_dataset.py", "imports": ["cv2", "torch", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.image_colorization.image_colorization_dataset"}, "('CUSTOM_DATASETS', 'video-super-resolution', 'real-basicvsr')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_super_resolution/video_super_resolution_dataset.py", "imports": ["cv2", "torch", "collections", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.video_super_resolution.video_super_resolution_dataset"}, "('EXPORTERS', 'acoustic-noise-suppression', 'speech_dfsmn_ans')": {"filepath": "TEMPLATE_PATH/exporters/audio/ans_dfsmn_exporter.py", "imports": ["torch", "os"], "module": "modelscope.exporters.audio.ans_dfsmn_exporter"}, "('EXPORTERS', 'translation', 'csanmt-translation')": {"filepath": "TEMPLATE_PATH/exporters/nlp/csanmt_for_translation_exporter.py", "imports": ["os", "typing", "tensorflow"], "module": "modelscope.exporters.nlp.csanmt_for_translation_exporter"}, "('EXPORTERS', 'transformer-crf', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/exporters/nlp/model_for_token_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.model_for_token_classification_exporter"}, "('EXPORTERS', 'token-classification', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/exporters/nlp/model_for_token_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.model_for_token_classification_exporter"}, "('EXPORTERS', 'named-entity-recognition', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/exporters/nlp/model_for_token_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.model_for_token_classification_exporter"}, "('EXPORTERS', 'part-of-speech', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/exporters/nlp/model_for_token_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.model_for_token_classification_exporter"}, "('EXPORTERS', 'word-segmentation', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/exporters/nlp/model_for_token_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.model_for_token_classification_exporter"}, "('EXPORTERS', 'text-classification', 'bert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'text-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'sentence-similarity', 'bert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'sentiment-classification', 'bert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'nli', 'bert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'sentence-similarity', 'structbert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'sentiment-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'nli', 'structbert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'zero-shot-classification', 'bert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_zero_shot_classification_exporter.py", "imports": ["typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_zero_shot_classification_exporter"}, "('EXPORTERS', 'zero-shot-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_zero_shot_classification_exporter.py", "imports": ["typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_zero_shot_classification_exporter"}, "('EXPORTERS', 'image-object-detection', 'tinynas-damoyolo')": {"filepath": "TEMPLATE_PATH/exporters/cv/object_detection_damoyolo_exporter.py", "imports": ["numpy", "torch", "functools", "onnx", "os", "typing"], "module": "modelscope.exporters.cv.object_detection_damoyolo_exporter"}, "('EXPORTERS', 'face-detection', 'scrfd')": {"filepath": "TEMPLATE_PATH/exporters/cv/face_detection_scrfd_exporter.py", "imports": ["numpy", "torch", "functools", "onnx", "os", "typing"], "module": "modelscope.exporters.cv.face_detection_scrfd_exporter"}, "('EXPORTERS', 'default', 'cartoon-translation')": {"filepath": "TEMPLATE_PATH/exporters/cv/cartoon_translation_exporter.py", "imports": ["os", "tensorflow", "typing", "packaging"], "module": "modelscope.exporters.cv.cartoon_translation_exporter"}}, "requirements": {"modelscope.models.science.unifold.config": ["copy", "typing", "ml_collections"], "modelscope.models.science.unifold.msa.tools.hmmsearch": ["os", "subprocess", "absl", "typing"], "modelscope.models.science.unifold.msa.tools.hhblits": ["subprocess", "absl", "glob", "os", "typing"], "modelscope.models.science.unifold.msa.tools.kalign": ["os", "subprocess", "absl", "typing"], "modelscope.models.science.unifold.msa.tools.utils": ["time", "contextlib", "shutil", "absl", "typing", "tempfile"], "modelscope.models.science.unifold.msa.tools.hmmbuild": ["os", "subprocess", "absl", "re"], "modelscope.models.science.unifold.msa.tools.jackhmmer": ["urllib", "subprocess", "absl", "glob", "os", "concurrent", "typing"], "modelscope.models.science.unifold.msa.tools.hhsearch": ["subprocess", "absl", "glob", "os", "typing"], "modelscope.models.science.unifold.msa.mmcif": ["io", "dataclasses", "absl", "functools", "typing", "collections", "Bio"], "modelscope.models.science.unifold.msa.msa_identifiers": ["dataclasses", "re", "typing"], "modelscope.models.science.unifold.msa.parsers": ["string", "itertools", "dataclasses", "re", "typing", "collections"], "modelscope.models.science.unifold.msa.templates": ["numpy", "abc", "absl", "functools", "re", "datetime", "glob", "os", "dataclasses", "typing"], "modelscope.models.science.unifold.msa.utils": ["json", "os", "absl", "typing"], "modelscope.models.science.unifold.msa.pipeline": ["os", "absl", "typing", "numpy"], "modelscope.models.science.unifold.model": ["torch", "os", "typing", "argparse"], "modelscope.models.science.unifold.dataset": ["numpy", "json", "torch", "logging", "unicore", "copy", "ml_collections", "os", "typing"], "modelscope.models.science.unifold.modules.confidence": ["torch", "typing"], "modelscope.models.science.unifold.modules.alphafold": ["torch", "unicore"], "modelscope.models.science.unifold.modules.evoformer": ["torch", "functools", "typing", "unicore"], "modelscope.models.science.unifold.modules.auxillary_heads": ["torch", "typing", "unicore"], "modelscope.models.science.unifold.modules.attentions": ["torch", "functools", "typing", "unicore"], "modelscope.models.science.unifold.modules.embedders": ["torch", "typing", "unicore"], "modelscope.models.science.unifold.modules.structure_module": ["torch", "typing", "math", "unicore"], "modelscope.models.science.unifold.modules.common": ["torch", "functools", "typing", "unicore"], "modelscope.models.science.unifold.modules.frame": ["torch", "__future__", "typing", "numpy"], "modelscope.models.science.unifold.modules.template": ["torch", "functools", "math", "unicore", "typing"], "modelscope.models.science.unifold.modules.triangle_multiplication": ["torch", "functools", "typing", "unicore"], "modelscope.models.science.unifold.modules.featurization": ["torch", "typing", "unicore"], "modelscope.models.science.unifold.data.process_multimer": ["typing", "collections", "numpy"], "modelscope.models.science.unifold.data.protein": ["numpy", "Bio", "io", "dataclasses", "typing"], "modelscope.models.science.unifold.data.residue_constants": ["numpy", "os", "functools", "typing", "collections", "unicore"], "modelscope.models.science.unifold.data.utils": ["numpy", "json", "functools", "scipy", "copy", "gzip", "typing", "pickle"], "modelscope.models.science.unifold.data.process": ["torch", "typing", "numpy"], "modelscope.models.science.unifold.data.msa_pairing": ["numpy", "scipy", "collections", "pandas", "typing"], "modelscope.models.science.unifold.data.data_ops": ["itertools", "numpy", "torch", "functools", "operator", "unicore", "typing"], "modelscope.models.builder": [], "modelscope.models.audio.ans.layers.activations": ["torch"], "modelscope.models.audio.ans.layers.layer_base": ["six", "abc", "torch", "numpy"], "modelscope.models.audio.ans.layers.affine_transform": ["torch"], "modelscope.models.audio.ans.layers.uni_deep_fsmn": ["torch", "numpy"], "modelscope.models.audio.ans.unet": ["torch"], "modelscope.models.audio.ans.conv_stft": ["torch", "scipy", "numpy"], "modelscope.models.audio.ans.denoise_net": ["torch"], "modelscope.models.audio.ans.complex_nn": ["torch"], "modelscope.models.audio.ans.se_module_complex": ["torch"], "modelscope.models.audio.ans.frcrn": ["torch", "os", "typing"], "modelscope.models.audio.sv.DTDNN_layers": ["torch"], "modelscope.models.audio.sv.ecapa_tdnn": ["torch", "torchaudio", "math", "os", "typing"], "modelscope.models.audio.sv.ERes2Net": ["torch", "torchaudio", "math", "os", "typing"], "modelscope.models.audio.sv.pooling_layers": ["torch"], "modelscope.models.audio.sv.DTDNN": ["torch", "torchaudio", "collections", "os", "typing"], "modelscope.models.audio.sv.fusion": ["torch"], "modelscope.models.audio.sv.generic_speaker_verification": ["os", "typing"], "modelscope.models.audio.sv.speaker_change_locator": ["numpy", "torch", "torchaudio", "collections", "os", "typing"], "modelscope.models.audio.sv.rdino": ["torch", "torchaudio", "math", "os", "typing"], "modelscope.models.audio.itn.generic_inverse_text_processing": ["os", "typing"], "modelscope.models.audio.aec.layers.activations": ["torch"], "modelscope.models.audio.aec.layers.layer_base": ["torch", "abc", "re", "numpy"], "modelscope.models.audio.aec.layers.deep_fsmn": ["torch", "numpy"], "modelscope.models.audio.aec.layers.affine_transform": ["torch", "numpy"], "modelscope.models.audio.aec.layers.uni_deep_fsmn": ["torch", "numpy"], "modelscope.models.audio.aec.network.se_net": ["torch"], "modelscope.models.audio.aec.network.loss": ["torch"], "modelscope.models.audio.aec.network.modulation_loss": ["torch", "torchaudio", "math"], "modelscope.models.audio.asr.wenet_automatic_speech_recognition": ["json", "os", "wenetruntime", "typing"], "modelscope.models.audio.asr.generic_automatic_speech_recognition": ["os", "typing"], "modelscope.models.audio.punc.generic_punctuation": ["os", "typing"], "modelscope.models.audio.tts.voice": ["numpy", "json", "torch", "kantts", "collections", "time", "yaml", "os", "threading", "pickle"], "modelscope.models.audio.tts.sambert_hifi": ["shutil", "numpy", "json", "__future__", "wave", "matplotlib", "datetime", "yaml", "os", "zipfile"], "modelscope.models.audio.separation.mossformer": ["torch", "os", "copy", "typing"], "modelscope.models.audio.separation.mossformer_conv_module": ["torch"], "modelscope.models.audio.separation.mossformer_block": ["torch"], "modelscope.models.audio.separation.layer_norm": ["torch", "__future__"], "modelscope.models.audio.kws.farfield.fsmn": ["torch", "numpy"], "modelscope.models.audio.kws.farfield.fsmn_sele_v2": ["torch"], "modelscope.models.audio.kws.farfield.fsmn_sele_v3": ["torch"], "modelscope.models.audio.kws.farfield.model_def": ["math", "struct", "enum"], "modelscope.models.audio.kws.farfield.model": ["os", "typing", "tempfile"], "modelscope.models.audio.kws.generic_key_word_spotting": ["os", "typing"], "modelscope.models.audio.kws.nearfield.fsmn": ["torch", "typing", "numpy"], "modelscope.models.audio.kws.nearfield.model": ["torch", "tempfile", "sys", "os", "typing"], "modelscope.models.audio.kws.nearfield.cmvn": ["torch", "re", "numpy"], "modelscope.models.multi_modal.ofa_for_all_tasks": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "modelscope.models.multi_modal.clip.configuration_bert": ["__future__", "logging"], "modelscope.models.multi_modal.clip.bert_tokenizer": ["six", "unicodedata", "__future__", "re", "os", "collections"], "modelscope.models.multi_modal.clip.model": ["numpy", "json", "torch", "collections", "os", "typing"], "modelscope.models.multi_modal.clip.modeling_bert": ["json", "torch", "logging", "__future__", "sys", "math", "io", "os"], "modelscope.models.multi_modal.mplug_for_all_tasks": ["os", "typing"], "modelscope.models.multi_modal.multi_stage_diffusion.decoder": ["torch", "math"], "modelscope.models.multi_modal.multi_stage_diffusion.prior": ["torch", "math"], "modelscope.models.multi_modal.multi_stage_diffusion.upsampler": ["torch", "math"], "modelscope.models.multi_modal.multi_stage_diffusion.model": ["PIL", "numpy", "json", "torch", "math", "os", "typing"], "modelscope.models.multi_modal.multi_stage_diffusion.tokenizer": ["transformers", "gzip", "torch", "regex", "functools", "ftfy", "html"], "modelscope.models.multi_modal.multi_stage_diffusion.xglm": ["torch", "math"], "modelscope.models.multi_modal.multi_stage_diffusion.gaussian_diffusion": ["torch", "math"], "modelscope.models.multi_modal.multi_stage_diffusion.clip": ["torch", "math"], "modelscope.models.multi_modal.diffusion.structbert": ["copy", "six", "numpy", "json", "torch", "__future__", "math"], "modelscope.models.multi_modal.diffusion.diffusion": ["torch", "math"], "modelscope.models.multi_modal.diffusion.unet_generator": ["torch", "math"], "modelscope.models.multi_modal.diffusion.model": ["numpy", "json", "torch", "os", "typing"], "modelscope.models.multi_modal.diffusion.tokenizer": ["unicodedata", "six", "__future__", "collections"], "modelscope.models.multi_modal.diffusion.unet_upsampler_256": ["torch", "functools", "math"], "modelscope.models.multi_modal.diffusion.unet_upsampler_1024": ["torch", "math"], "modelscope.models.multi_modal.efficient_diffusion_tuning.efficient_stable_diffusion": ["transformers", "torch", "functools", "diffusers", "os", "typing"], "modelscope.models.multi_modal.gemm.gemm_base": ["numpy", "json", "torch", "collections", "os", "typing"], "modelscope.models.multi_modal.gemm.gemm_model": ["PIL", "numpy", "json", "torch", "torchvision", "os", "typing"], "modelscope.models.multi_modal.gemm.tokenizer": ["gzip", "torch", "os", "regex", "functools", "ftfy", "html"], "modelscope.models.multi_modal.mmr.dataloaders.rawvideo_util": ["PIL", "numpy", "torch", "cv2", "torchvision"], "modelscope.models.multi_modal.mmr.models.module_clip": ["urllib", "hashlib", "torch", "tqdm", "collections", "warnings", "os", "typing"], "modelscope.models.multi_modal.mmr.models.clip_for_mm_video_embedding": ["urllib", "PIL", "random", "numpy", "json", "torch", "decord", "tempfile", "os", "typing", "uuid"], "modelscope.models.multi_modal.mmr.models.module_cross": ["json", "torch", "logging", "collections", "__future__"], "modelscope.models.multi_modal.mmr.models.until_module": ["torch", "logging", "math", "numpy"], "modelscope.models.multi_modal.mmr.models.tokenization_clip": ["gzip", "os", "regex", "functools", "ftfy", "html"], "modelscope.models.multi_modal.mmr.models.modeling": ["torch", "types", "collections", "platform", "os"], "modelscope.models.multi_modal.mmr.models.dynamic_inverted_softmax": ["numpy"], "modelscope.models.multi_modal.mplug.predictor": ["torch", "__future__"], "modelscope.models.multi_modal.mplug.clip.clip": ["torch", "typing", "collections"], "modelscope.models.multi_modal.mplug.modeling_mplug": ["transformers", "torch", "math", "os", "typing"], "modelscope.models.multi_modal.mplug.mvit": ["timm", "numpy", "torch", "functools", "fairscale", "collections"], "modelscope.models.multi_modal.mplug.configuration_mplug": ["os", "typing", "transformers", "yaml"], "modelscope.models.multi_modal.team.team_model": ["PIL", "numpy", "torch", "cv2", "tokenizers", "torchvision", "typing"], "modelscope.models.multi_modal.team.utils": ["transformers", "numpy", "torch", "typing", "collections"], "modelscope.models.multi_modal.guided_diffusion.respace": ["torch", "numpy"], "modelscope.models.multi_modal.guided_diffusion.unet": ["transformers", "numpy", "torch", "abc", "math"], "modelscope.models.multi_modal.guided_diffusion.gaussian_diffusion": ["torch", "math", "numpy", "enum"], "modelscope.models.multi_modal.guided_diffusion.script": [], "modelscope.models.multi_modal.vldoc.tokenization": ["os", "transformers"], "modelscope.models.multi_modal.vldoc.model": ["json", "torch", "logging", "re", "math", "sys", "copy", "torchvision", "os"], "modelscope.models.multi_modal.vldoc.conv_fpn_trans": ["timm", "random", "torch", "collections", "apex"], "modelscope.models.multi_modal.vldoc.transformer_local": ["copy", "torch"], "modelscope.models.multi_modal.vldoc.modeling_layout_roberta": ["transformers", "packaging", "torch", "os", "math"], "modelscope.models.multi_modal.vldoc.processing": ["PIL", "timm", "numpy", "cv2", "torch", "collections", "torchvision", "typing"], "modelscope.models.multi_modal.vldoc.convnext": ["torch", "os", "timm"], "modelscope.models.multi_modal.soonet.model": ["torch", "os"], "modelscope.models.multi_modal.soonet.tokenizer": ["gzip", "torch", "regex", "functools", "ftfy", "html"], "modelscope.models.multi_modal.soonet.utils": ["copy", "decord", "numpy", "tqdm"], "modelscope.models.multi_modal.soonet.blocks": ["torch", "math"], "modelscope.models.multi_modal.soonet.swin_transformer": ["torch", "numpy"], "modelscope.models.multi_modal.soonet.clip": ["warnings", "numpy", "torch", "typing", "collections"], "modelscope.models.multi_modal.mgeo.text_ranking": ["torch"], "modelscope.models.multi_modal.mgeo.backbone": ["dataclasses", "transformers", "random", "torch", "math", "warnings", "os", "typing"], "modelscope.models.multi_modal.mgeo.text_classification": ["torch"], "modelscope.models.multi_modal.mgeo.token_classification": ["torch"], "modelscope.models.multi_modal.mplug_owl.configuration_mplug_owl": ["copy", "os", "typing", "transformers"], "modelscope.models.multi_modal.mplug_owl.modeling_mplug_owl": ["dataclasses", "transformers", "random", "torch", "logging", "math", "copy", "io", "os", "typing"], "modelscope.models.multi_modal.ofa_for_text_to_image_synthesis_model": ["PIL", "pkg_resources", "numpy", "json", "torch", "taming", "torchvision", "os", "typing"], "modelscope.models.multi_modal.video_synthesis.diffusion": ["torch"], "modelscope.models.multi_modal.video_synthesis.text_to_video_synthesis_model": ["open_clip", "torch", "einops", "os", "typing"], "modelscope.models.multi_modal.video_synthesis.autoencoder": ["torch", "numpy"], "modelscope.models.multi_modal.video_synthesis.unet_sd": ["torch", "einops", "math"], "modelscope.models.multi_modal.clip_interrogator.model": ["PIL", "hashlib", "numpy", "open_clip", "torch", "dataclasses", "os", "typing", "requests", "transformers", "safetensors", "tqdm", "math", "time", "torchvision"], "modelscope.models.multi_modal.rleg.model": ["json", "os", "torch"], "modelscope.models.multi_modal.rleg.rleg": ["torch", "torchvision", "typing"], "modelscope.models.multi_modal.dpm_solver_pytorch": ["torch", "math"], "modelscope.models.multi_modal.ofa.modeling_ofa": ["transformers", "random", "torch", "math", "packaging", "apex", "dataclasses", "typing"], "modelscope.models.multi_modal.ofa.utils.utils": ["torch", "typing"], "modelscope.models.multi_modal.ofa.utils.constant": [], "modelscope.models.multi_modal.ofa.vit": ["torch", "collections", "fairseq"], "modelscope.models.multi_modal.ofa.modeling_mmspeech": ["transformers", "numpy", "torch", "math", "fairseq", "packaging", "apex", "dataclasses", "typing"], "modelscope.models.multi_modal.ofa.resnet": ["torch"], "modelscope.models.multi_modal.ofa.tokenization_ofa": ["os", "typing", "transformers", "collections"], "modelscope.models.multi_modal.ofa.generate.multihead_attention": ["torch", "typing", "math", "fairseq"], "modelscope.models.multi_modal.ofa.generate.ngram_repeat_block": ["warnings", "torch", "typing", "math", "fairseq"], "modelscope.models.multi_modal.ofa.generate.sequence_generator": ["torch", "math", "typing", "sys"], "modelscope.models.multi_modal.ofa.generate.incremental_decoding_utils": ["torch", "typing", "uuid"], "modelscope.models.multi_modal.ofa.generate.utils": ["amp_C", "itertools", "torch_xla", "torch", "collections"], "modelscope.models.multi_modal.ofa.generate.search": ["torch", "typing", "math"], "modelscope.models.multi_modal.ofa.generate.token_generation_constraints": ["torch", "typing", "collections"], "modelscope.models.multi_modal.ofa.tokenization_ofa_fast": ["json", "typing", "transformers", "tokenizers"], "modelscope.models.multi_modal.ofa.configuration_mmspeech": ["warnings", "transformers"], "modelscope.models.multi_modal.ofa.configuration_ofa": ["warnings", "transformers"], "modelscope.models.nlp.unite.configuration": ["enum"], "modelscope.models.nlp.unite.translation_evaluation": ["transformers", "numpy", "torch", "math", "warnings", "packaging", "dataclasses", "typing"], "modelscope.models.nlp.palm_v2.configuration": ["transformers"], "modelscope.models.nlp.palm_v2.dureader_eval": ["zipfile", "numpy", "json", "rouge", "re", "sys", "math", "collections", "copy", "argparse"], "modelscope.models.nlp.palm_v2.text_generation": ["dataclasses", "subprocess", "codecs", "transformers", "numpy", "json", "torch", "math", "copy", "os", "typing"], "modelscope.models.nlp.structbert.configuration": ["transformers"], "modelscope.models.nlp.structbert.fill_mask": ["torch", "transformers"], "modelscope.models.nlp.structbert.backbone": ["transformers", "torch", "math", "packaging", "dataclasses", "typing"], "modelscope.models.nlp.structbert.faq_question_answering": ["torch", "math", "collections", "os", "typing"], "modelscope.models.nlp.structbert.adv_utils": ["torch"], "modelscope.models.nlp.structbert.text_classification": ["torch"], "modelscope.models.nlp.structbert.token_classification": ["torch"], "modelscope.models.nlp.hf_transformers.backbone": ["transformers"], "modelscope.models.nlp.task_models.fill_mask": ["torch", "typing", "numpy"], "modelscope.models.nlp.task_models.text_ranking": ["typing", "numpy"], "modelscope.models.nlp.task_models.feature_extraction": ["typing", "numpy"], "modelscope.models.nlp.task_models.text_classification": ["typing", "numpy"], "modelscope.models.nlp.task_models.task_model": ["torch", "abc", "re", "collections", "os", "typing"], "modelscope.models.nlp.task_models.text_generation": ["torch", "typing", "transformers", "numpy"], "modelscope.models.nlp.task_models.information_extraction": ["typing", "numpy"], "modelscope.models.nlp.task_models.token_classification": ["torch", "typing"], "modelscope.models.nlp.veco.configuration": ["transformers"], "modelscope.models.nlp.veco.fill_mask": ["transformers"], "modelscope.models.nlp.veco.backbone": ["transformers"], "modelscope.models.nlp.veco.text_classification": ["transformers"], "modelscope.models.nlp.veco.token_classification": ["torch", "transformers"], "modelscope.models.nlp.glm_130b.initialize": ["torch", "time", "argparse", "SwissArmyTransformer"], "modelscope.models.nlp.glm_130b.quantization.functional": ["torch"], "modelscope.models.nlp.glm_130b.quantization.layers": ["torch", "SwissArmyTransformer"], "modelscope.models.nlp.glm_130b.text_generation": ["random", "stat", "torch", "SwissArmyTransformer", "re", "functools", "sys", "copy", "time", "os", "typing"], "modelscope.models.nlp.glm_130b.generation.strategies": ["torch", "numpy", "SwissArmyTransformer"], "modelscope.models.nlp.mglm.tasks.superglue.pvp": ["string", "tasks", "random", "numpy", "abc", "utils", "math", "collections", "copy", "typing"], "modelscope.models.nlp.mglm.tasks.superglue.dataset": ["random", "numpy", "json", "abc", "torch", "collections", "os", "typing", "re", "tqdm", "utils", "csv", "copy", "glob", "pandas", "data_utils"], "modelscope.models.nlp.mglm.tasks.superglue.evaluate": ["string", "tasks", "__future__", "functools", "typing", "re", "collections"], "modelscope.models.nlp.mglm.tasks.superglue.finetune": ["tasks", "collections", "finetune_glm"], "modelscope.models.nlp.mglm.tasks.data_utils": ["numpy", "json", "torch", "re", "copy", "megatron_util", "typing", "pickle"], "modelscope.models.nlp.mglm.tasks.seq2seq.dataset": ["tasks", "random", "numpy", "json", "torch", "tqdm", "utils", "os", "data_utils"], "modelscope.models.nlp.mglm.tasks.seq2seq.evaluate": ["string", "rouge_score", "datetime", "random", "megatron_util", "torch", "generation_utils"], "modelscope.models.nlp.mglm.tasks.seq2seq.finetune": ["tasks", "pretrain_glm", "megatron_util", "torch", "functools", "collections", "finetune_glm"], "modelscope.models.nlp.mglm.tasks.language_model.detokenizer": ["re"], "modelscope.models.nlp.mglm.tasks.language_model.dataset": ["tasks", "itertools", "numpy", "json", "torch", "utils", "math", "bisect"], "modelscope.models.nlp.mglm.tasks.language_model.finetune": ["tasks", "pretrain_glm", "megatron_util", "torch", "functools", "math", "finetune_glm"], "modelscope.models.nlp.mglm.tasks.eval_utils": ["tasks", "random", "torch", "utils", "collections", "finetune_glm", "datetime", "time", "sklearn", "megatron_util", "os", "typing"], "modelscope.models.nlp.mglm.blocklm_utils": ["copy", "numpy", "random", "torch", "megatron_util", "scipy", "math"], "modelscope.models.nlp.mglm.train_utils": ["torch", "apex", "deepspeed", "megatron_util"], "modelscope.models.nlp.mglm.test.test_block": ["numpy", "argparse", "blocklm_utils", "random"], "modelscope.models.nlp.mglm.test.test_rel_shift": ["torch", "learning_rates", "numpy", "matplotlib"], "modelscope.models.nlp.mglm.arguments": ["json", "torch", "deepspeed", "os", "argparse"], "modelscope.models.nlp.mglm.data_utils.tokenization_gpt2": ["json", "logging", "__future__", "sys", "functools", "io", "os", "regex"], "modelscope.models.nlp.mglm.data_utils.lazy_loader": ["time", "itertools", "mmap", "numpy", "torch", "os", "pickle"], "modelscope.models.nlp.mglm.data_utils.wordpiece": ["logging", "collections", "io", "unicodedata", "__future__", "os"], "modelscope.models.nlp.mglm.data_utils.datasets": ["random", "numpy", "json", "torch", "operator", "nltk", "bisect", "os", "itertools", "tqdm", "math", "csv", "time", "pandas"], "modelscope.models.nlp.mglm.data_utils.tokenization": ["itertools", "random", "torch", "collections", "csv", "sentencepiece", "nltk", "os", "regex"], "modelscope.models.nlp.mglm.data_utils.extraction": ["os", "glob", "json", "nltk"], "modelscope.models.nlp.mglm.data_utils.file_utils": ["urllib", "hashlib", "json", "botocore", "sys", "io", "pathlib", "os", "requests", "shutil", "logging", "functools", "tempfile", "tqdm", "boto3", "__future__"], "modelscope.models.nlp.mglm.data_utils.sp_tokenizer": ["os"], "modelscope.models.nlp.mglm.data_utils.corpora": ["multiprocessing", "random", "json", "torch", "tqdm", "collections", "queue", "os"], "modelscope.models.nlp.mglm.data_utils.samplers": ["numpy", "torch", "math", "os", "sys"], "modelscope.models.nlp.mglm.mglm_for_text_summarization": ["random", "numpy", "torch", "megatron_util", "os", "typing"], "modelscope.models.nlp.mglm.process_grid": ["os", "json", "glob", "statistics", "sys"], "modelscope.models.nlp.mglm.generation_utils": ["torch", "abc", "typing", "collections"], "modelscope.models.nlp.mglm.utils": ["subprocess", "random", "numpy", "json", "torch", "time", "megatron_util", "os"], "modelscope.models.nlp.mglm.configure_data": ["itertools", "random", "numpy", "torch", "copy", "bisect", "megatron_util", "os"], "modelscope.models.nlp.mglm.model.distributed": ["torch", "megatron_util"], "modelscope.models.nlp.mglm.model.transformer": ["apex", "deepspeed", "megatron_util", "torch", "math"], "modelscope.models.nlp.mglm.model.modeling_bert": ["shutil", "json", "torch", "logging", "__future__", "tempfile", "math", "copy", "apex", "megatron_util", "tarfile", "os", "data_utils"], "modelscope.models.nlp.mglm.model.prompt": ["torch", "random"], "modelscope.models.nlp.mglm.model.modeling_glm": ["torch", "megatron_util"], "modelscope.models.nlp.mglm.model.downstream": ["torch"], "modelscope.models.nlp.mglm.run_test": ["sys", "test"], "modelscope.models.nlp.plug_mental.configuration": ["transformers"], "modelscope.models.nlp.plug_mental.backbone": ["transformers", "torch", "math", "packaging", "dataclasses", "typing"], "modelscope.models.nlp.plug_mental.adv_utils": ["torch"], "modelscope.models.nlp.plug_mental.text_classification": ["torch"], "modelscope.models.nlp.gpt_moe.configuration": ["torch", "transformers"], "modelscope.models.nlp.gpt_moe.backbone": ["transformers", "torch", "typing", "math", "os", "addict"], "modelscope.models.nlp.gpt_moe.tokenizer": ["tokenizers"], "modelscope.models.nlp.gpt_moe.distributed_gpt_moe": ["torch", "transformers", "math", "megatron_util"], "modelscope.models.nlp.gpt_moe.text_generation": ["typing", "transformers"], "modelscope.models.nlp.gpt_moe.moe.sharded_moe": ["tutel", "torch", "scipy", "math", "apex", "megatron_util", "typing"], "modelscope.models.nlp.gpt_moe.moe.utils": ["torch", "typing"], "modelscope.models.nlp.gpt_moe.moe.layer": ["torch", "typing", "megatron_util"], "modelscope.models.nlp.gpt_moe.moe.experts": ["copy", "torch"], "modelscope.models.nlp.gpt_moe.moe.mappings": ["torch", "megatron_util"], "modelscope.models.nlp.gpt_moe.checkpointing": ["torch", "os", "megatron_util"], "modelscope.models.nlp.csanmt.translation": ["tensorflow", "typing", "math", "collections"], "modelscope.models.nlp.T5.text2text_generation": ["transformers", "torch", "copy", "warnings", "typing"], "modelscope.models.nlp.T5.configuration": ["typing", "transformers"], "modelscope.models.nlp.T5.backbone": ["transformers", "torch", "math", "copy", "warnings", "os", "typing"], "modelscope.models.nlp.heads.text_classification_head": ["torch", "typing"], "modelscope.models.nlp.heads.infromation_extraction_head": ["torch"], "modelscope.models.nlp.heads.token_classification_head": ["torch", "typing"], "modelscope.models.nlp.heads.text_generation_head": ["torch", "typing"], "modelscope.models.nlp.heads.crf_head": ["torch", "typing", "transformers"], "modelscope.models.nlp.heads.torch_pretrain_head": ["torch", "typing", "transformers"], "modelscope.models.nlp.heads.fill_mask_head": ["torch", "typing", "transformers"], "modelscope.models.nlp.heads.text_ranking_head": ["torch", "typing"], "modelscope.models.nlp.bloom.backbone": ["transformers"], "modelscope.models.nlp.xlm_roberta.configuration": ["typing", "transformers", "collections"], "modelscope.models.nlp.xlm_roberta.backbone": ["torch", "transformers", "math", "packaging"], "modelscope.models.nlp.peer.configuration": ["transformers"], "modelscope.models.nlp.peer.sas_utils": ["numpy", "nltk", "torch", "random"], "modelscope.models.nlp.peer.backbone": ["transformers", "torch", "math", "dataclasses", "typing"], "modelscope.models.nlp.peer.text_classification": ["copy", "torch"], "modelscope.models.nlp.fid_T5.text_generation": ["torch", "os", "io", "transformers"], "modelscope.models.nlp.space_T_en.text_to_sql": ["torch", "os", "typing", "text2sql_lgesql"], "modelscope.models.nlp.canmt.sequence_generator": ["numpy", "torch", "math", "typing", "sys", "fairseq"], "modelscope.models.nlp.canmt.canmt_translation": ["numpy", "torch", "math", "os", "typing"], "modelscope.models.nlp.canmt.canmt_model": ["numpy", "torch", "typing", "math", "fairseq"], "modelscope.models.nlp.bart.text_error_correction": ["torch", "os", "typing"], "modelscope.models.nlp.use.transformer": ["torch", "math"], "modelscope.models.nlp.use.user_satisfaction_estimation": ["transformers", "numpy", "torch", "os", "typing"], "modelscope.models.nlp.gpt_neo.backbone": ["transformers"], "modelscope.models.nlp.bert.configuration": ["typing", "transformers", "collections"], "modelscope.models.nlp.bert.siamese_uie": ["torch", "copy"], "modelscope.models.nlp.bert.fill_mask": [], "modelscope.models.nlp.bert.word_alignment": ["torch"], "modelscope.models.nlp.bert.text_ranking": [], "modelscope.models.nlp.bert.backbone": ["torch", "transformers", "math", "packaging"], "modelscope.models.nlp.bert.text_classification": [], "modelscope.models.nlp.bert.sentence_embedding": ["torch"], "modelscope.models.nlp.bert.document_segmentation": ["torch", "typing"], "modelscope.models.nlp.bert.token_classification": [], "modelscope.models.nlp.dgds.backbone": ["torch", "__future__", "os", "transformers"], "modelscope.models.nlp.dgds.document_grounded_dialog_rerank": ["torch", "os", "typing"], "modelscope.models.nlp.dgds.document_grounded_dialog_generate": ["torch", "os", "typing"], "modelscope.models.nlp.dgds.document_grounded_dialog_retrieval": ["torch", "os", "typing"], "modelscope.models.nlp.gpt3.configuration": ["torch", "transformers"], "modelscope.models.nlp.gpt3.backbone": ["transformers", "torch", "typing", "math", "os", "addict"], "modelscope.models.nlp.gpt3.tokenizer": ["typing", "tokenizers"], "modelscope.models.nlp.gpt3.distributed_gpt3": ["transformers", "torch", "math", "collections", "megatron_util", "os", "typing"], "modelscope.models.nlp.gpt3.text_generation": ["torch", "typing", "transformers", "collections"], "modelscope.models.nlp.deberta_v2.configuration": ["transformers"], "modelscope.models.nlp.deberta_v2.fill_mask": ["torch", "typing", "transformers"], "modelscope.models.nlp.deberta_v2.backbone": ["torch", "typing", "transformers", "collections"], "modelscope.models.nlp.deberta_v2.tokenization": ["transformers", "unicodedata", "sentencepiece", "typing", "os"], "modelscope.models.nlp.deberta_v2.tokenization_fast": ["os", "typing", "transformers", "shutil"], "modelscope.models.nlp.codegeex.codegeex_for_code_translation": ["torch", "copy", "typing"], "modelscope.models.nlp.codegeex.tokenizer": ["torch", "typing", "transformers"], "modelscope.models.nlp.codegeex.codegeex_for_code_generation": ["torch", "copy", "typing"], "modelscope.models.nlp.codegeex.inference": ["torch", "typing"], "modelscope.models.nlp.codegeex.codegeex": ["torch", "math"], "modelscope.models.nlp.space.configuration": [], "modelscope.models.nlp.space.dialog_modeling": ["os", "typing"], "modelscope.models.nlp.space.dialog_state_tracking": ["torch", "typing", "transformers"], "modelscope.models.nlp.space.model.intent_unified_transformer": ["torch"], "modelscope.models.nlp.space.model.tokenization_space": ["transformers"], "modelscope.models.nlp.space.model.unified_transformer": ["torch", "numpy"], "modelscope.models.nlp.space.model.model_base": ["torch", "os"], "modelscope.models.nlp.space.model.generator": ["torch", "math", "numpy"], "modelscope.models.nlp.space.model.gen_unified_transformer": ["torch"], "modelscope.models.nlp.space.dialog_intent_prediction": ["os", "typing"], "modelscope.models.nlp.space.modules.transformer_block": ["torch"], "modelscope.models.nlp.space.modules.functions": ["torch", "numpy"], "modelscope.models.nlp.space.modules.multihead_attention": ["torch"], "modelscope.models.nlp.space.modules.feedforward": ["torch"], "modelscope.models.nlp.space.modules.embedder": ["torch"], "modelscope.models.nlp.fid_plug.configuration": ["transformers"], "modelscope.models.nlp.fid_plug.backbone": ["dataclasses", "transformers", "numpy", "torch", "math", "copy", "os", "typing"], "modelscope.models.nlp.fid_plug.text_generation": ["torch", "os", "io", "transformers"], "modelscope.models.nlp.gpt2.backbone": ["transformers"], "modelscope.models.nlp.plug.distributed_plug": ["torch", "typing", "megatron_util"], "modelscope.models.nlp.plug.configuration": ["copy", "json", "transformers"], "modelscope.models.nlp.plug.backbone": ["torch", "logging", "math", "megatron_util", "__future__"], "modelscope.models.nlp.plug.AnnealingLR": ["torch", "math"], "modelscope.models.nlp.plug.generator": ["torch"], "modelscope.models.nlp.megatron_bert.configuration": ["typing", "transformers", "collections"], "modelscope.models.nlp.megatron_bert.fill_mask": ["torch", "transformers"], "modelscope.models.nlp.megatron_bert.backbone": ["torch", "transformers", "math"], "modelscope.models.nlp.space_T_cn.configuration": ["copy", "__future__", "logging", "json"], "modelscope.models.nlp.space_T_cn.backbone": ["shutil", "numpy", "torch", "__future__", "tempfile", "math", "copy", "tarfile", "os"], "modelscope.models.nlp.space_T_cn.table_question_answering": ["transformers", "numpy", "torch", "os", "typing"], "modelscope.models.nlp.ponet.configuration": ["transformers"], "modelscope.models.nlp.ponet.fill_mask": ["torch", "transformers"], "modelscope.models.nlp.ponet.backbone": ["distutils", "transformers", "torch", "math", "packaging"], "modelscope.models.nlp.ponet.tokenization": ["typing", "transformers"], "modelscope.models.nlp.ponet.document_segmentation": ["torch", "typing"], "modelscope.models.nlp.llama.configuration": ["transformers"], "modelscope.models.nlp.llama.convert_llama_weights_to_hf": ["shutil", "gc", "json", "torch", "math", "os", "argparse"], "modelscope.models.nlp.llama.backbone": ["torch", "typing", "transformers", "math"], "modelscope.models.nlp.llama.tokenization": ["transformers", "shutil", "sentencepiece", "os", "typing"], "modelscope.models.nlp.llama.tokenization_fast": ["os", "typing", "transformers", "shutil"], "modelscope.models.nlp.llama.text_generation": ["torch", "typing"], "modelscope.models.nlp.lstm.backbone": ["torch"], "modelscope.models.nlp.lstm.token_classification": [], "modelscope.models.cv.image_deblur.nafnet_for_image_deblur": ["torch", "os", "typing"], "modelscope.models.cv.vision_middleware.backbone": ["numpy", "torch", "math", "collections", "os", "typing"], "modelscope.models.cv.vision_middleware.model": ["json", "torch", "typing", "os"], "modelscope.models.cv.vision_middleware.head": ["torch", "abc", "mmcv", "numpy"], "modelscope.models.cv.vision_middleware.vim": ["torch", "einops", "math"], "modelscope.models.cv.image_quality_assessment_man.swin": ["warnings", "itertools", "torch", "einops", "math", "collections"], "modelscope.models.cv.image_quality_assessment_man.maniqa": ["timm", "torch", "einops"], "modelscope.models.cv.image_quality_assessment_man.image_quality_assessment_man": ["torch", "os", "typing"], "modelscope.models.cv.product_retrieval_embedding.item_detection": ["cv2", "numpy"], "modelscope.models.cv.product_retrieval_embedding.item_model": ["torch", "os", "typing", "numpy"], "modelscope.models.cv.product_retrieval_embedding.item_embedding": ["cv2", "torch", "numpy"], "modelscope.models.cv.body_2d_keypoints.w48": [], "modelscope.models.cv.body_2d_keypoints.hrnet_v2": ["torch", "os", "numpy"], "modelscope.models.cv.body_2d_keypoints.hrnet_basic_modules": ["torch"], "modelscope.models.cv.indoor_layout_estimation.panovit": ["torch", "os", "yacs", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.modality.layout": ["numpy", "torch", "scipy", "math", "shapely"], "modelscope.models.cv.indoor_layout_estimation.networks.misc.panostretch": ["functools", "scipy", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.misc.fourier": ["PIL", "scipy", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.misc.post_proc": ["scipy", "sklearn", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.panovit": ["torch", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.utils": ["torch", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.backbone.vit_horizon_pry_image": ["timm", "torch", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.backbone.resnet_DA": ["torch", "torchvision"], "modelscope.models.cv.salient_detection.salient_model": ["PIL", "torch", "cv2", "torchvision", "os"], "modelscope.models.cv.salient_detection.models.senet": ["torch"], "modelscope.models.cv.salient_detection.models.utils": ["torch"], "modelscope.models.cv.salient_detection.models.modules": ["torch"], "modelscope.models.cv.salient_detection.models.u2net": ["torch"], "modelscope.models.cv.salient_detection.models.backbone.Res2Net_v1b": ["torch", "math"], "modelscope.models.cv.image_quality_assessment_degradation.degradation_model": ["time", "torchvision", "json", "numpy", "cv2", "torch", "collections"], "modelscope.models.cv.image_quality_assessment_degradation.image_quality_assessment_degradation": ["torch", "os", "typing"], "modelscope.models.cv.image_portrait_enhancement.losses.model_irse": ["torch"], "modelscope.models.cv.image_portrait_enhancement.losses.losses": ["torch"], "modelscope.models.cv.image_portrait_enhancement.losses.helpers": ["torch", "collections"], "modelscope.models.cv.image_portrait_enhancement.retinaface.detection": ["cv2", "torch", "os", "numpy"], "modelscope.models.cv.image_portrait_enhancement.retinaface.models.retinaface": ["torch", "torchvision", "collections"], "modelscope.models.cv.image_portrait_enhancement.retinaface.models.net": ["torch", "time", "torchvision"], "modelscope.models.cv.image_portrait_enhancement.retinaface.utils": ["torch", "itertools", "math", "numpy"], "modelscope.models.cv.image_portrait_enhancement.gpen": ["itertools", "random", "torch", "functools", "operator", "math"], "modelscope.models.cv.image_portrait_enhancement.image_portrait_enhancement": ["torch", "os", "typing", "math"], "modelscope.models.cv.image_portrait_enhancement.align_faces": ["cv2", "skimage", "numpy"], "modelscope.models.cv.image_portrait_enhancement.eqface.fqa": ["cv2", "torch", "os", "numpy"], "modelscope.models.cv.image_portrait_enhancement.eqface.model_resnet": ["torch"], "modelscope.models.cv.abnormal_object_detection.mmdet_ms.roi_head.mask_scoring_roi_head": ["torch", "mmdet"], "modelscope.models.cv.abnormal_object_detection.mmdet_ms.roi_head.roi_extractors.single_level_roi_extractor": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.abnormal_object_detection.mmdet_model": ["torch", "os", "numpy"], "modelscope.models.cv.image_probing_model.backbone": ["PIL", "numpy", "torch", "functools", "operator", "sys", "math", "collections", "torchvision"], "modelscope.models.cv.image_probing_model.model": ["json", "torch", "typing", "os"], "modelscope.models.cv.image_probing_model.utils": ["torch", "re"], "modelscope.models.cv.tinynas_classfication.super_res_kxkx": ["torch", "uuid"], "modelscope.models.cv.tinynas_classfication.super_res_k1kxk1": ["torch", "uuid"], "modelscope.models.cv.tinynas_classfication.model_zoo": [], "modelscope.models.cv.tinynas_classfication.super_blocks": ["torch", "uuid"], "modelscope.models.cv.tinynas_classfication.basic_blocks": ["torch", "uuid", "numpy"], "modelscope.models.cv.tinynas_classfication.master_net": ["torch"], "modelscope.models.cv.tinynas_classfication.plain_net_utils": ["torch"], "modelscope.models.cv.tinynas_classfication.super_res_idwexkx": ["torch", "uuid"], "modelscope.models.cv.tinynas_classfication.global_utils": [], "modelscope.models.cv.image_to_image_translation.model_translation": ["torch", "math"], "modelscope.models.cv.image_to_image_translation.models.autoencoder": ["torch", "math"], "modelscope.models.cv.image_to_image_translation.models.clip": ["torch", "math"], "modelscope.models.cv.image_to_image_translation.ops.metrics": ["torch", "scipy", "numpy"], "modelscope.models.cv.image_to_image_translation.ops.diffusion": ["torch", "math"], "modelscope.models.cv.image_to_image_translation.ops.apps": ["PIL", "numpy", "torch", "artist", "torchvision", "os"], "modelscope.models.cv.image_to_image_translation.ops.svd": ["torch"], "modelscope.models.cv.image_to_image_translation.ops.random_mask": ["cv2", "numpy"], "modelscope.models.cv.image_to_image_translation.ops.degradation": ["random", "numpy", "cv2", "torch", "scipy", "os", "math"], "modelscope.models.cv.image_to_image_translation.ops.random_color": ["colorsys", "random"], "modelscope.models.cv.image_to_image_translation.ops.utils": ["PIL", "hashlib", "multiprocessing", "base64", "numpy", "cv2", "json", "torch", "math", "io", "binascii", "os", "zipfile"], "modelscope.models.cv.image_to_image_translation.ops.losses": ["torch", "math"], "modelscope.models.cv.image_to_image_translation.data.transforms": ["torchvision", "PIL", "math", "random"], "modelscope.models.cv.video_human_matting.models.decoder": ["torch", "typing"], "modelscope.models.cv.video_human_matting.models.effv2": ["torch"], "modelscope.models.cv.video_human_matting.models.lraspp": ["torch"], "modelscope.models.cv.video_human_matting.models.matting": ["torch", "typing"], "modelscope.models.cv.video_human_matting.models.deep_guided_filter": ["torch"], "modelscope.models.cv.video_human_matting.model": ["numpy", "torch", "torchvision", "os", "typing"], "modelscope.models.cv.language_guided_video_summarization.transformer.models": ["torch", "numpy"], "modelscope.models.cv.language_guided_video_summarization.transformer.modules": ["torch"], "modelscope.models.cv.language_guided_video_summarization.transformer.sub_layers": ["torch", "numpy"], "modelscope.models.cv.language_guided_video_summarization.transformer.layers": ["torch"], "modelscope.models.cv.language_guided_video_summarization.summarizer": ["numpy", "videofeatures_clipit", "torch", "bmt_clipit", "os", "typing", "argparse"], "modelscope.models.cv.facial_landmark_confidence.flc.facial_landmark_confidence": ["PIL", "numpy", "torch", "cv2", "os"], "modelscope.models.cv.facial_landmark_confidence.flc.manual_landmark_net": ["torch", "math"], "modelscope.models.cv.image_to_image_generation.models.autoencoder": ["torch", "math"], "modelscope.models.cv.image_to_image_generation.models.clip": ["torch", "math"], "modelscope.models.cv.image_to_image_generation.model": ["torch", "math"], "modelscope.models.cv.image_to_image_generation.ops.diffusion": ["torch", "math"], "modelscope.models.cv.image_to_image_generation.ops.losses": ["torch", "math"], "modelscope.models.cv.image_to_image_generation.data.transforms": ["torchvision", "PIL", "math", "random"], "modelscope.models.cv.image_body_reshaping.person_info": ["torch", "cv2", "copy", "numpy"], "modelscope.models.cv.image_body_reshaping.model": ["torch"], "modelscope.models.cv.image_body_reshaping.slim_utils": ["random", "numpy", "cv2", "torch", "os", "math", "numba"], "modelscope.models.cv.image_body_reshaping.pose_estimator.body": ["numpy", "cv2", "torch", "scipy", "math"], "modelscope.models.cv.image_body_reshaping.pose_estimator.util": ["numpy"], "modelscope.models.cv.image_body_reshaping.pose_estimator.model": ["torch", "collections"], "modelscope.models.cv.image_body_reshaping.image_body_reshaping": ["numpy", "cv2", "torch", "os", "typing"], "modelscope.models.cv.image_human_parsing.m2fp_net": ["torch", "os", "typing"], "modelscope.models.cv.image_human_parsing.m2fp.m2fp_decoder": ["torch"], "modelscope.models.cv.image_human_parsing.m2fp.m2fp_encoder": ["torch", "typing", "numpy"], "modelscope.models.cv.image_human_parsing.parsing_utils": ["copy", "torch", "PIL", "numpy"], "modelscope.models.cv.image_human_parsing.backbone.deeplab_resnet": ["torch", "numpy"], "modelscope.models.cv.image_skychange.ptsemseg.hrnet_super_and_ocr": ["torch", "__future__", "numpy"], "modelscope.models.cv.image_skychange.ptsemseg.BlockModules": ["torch"], "modelscope.models.cv.image_skychange.ptsemseg.unet": ["torch"], "modelscope.models.cv.image_skychange.ptsemseg.hrnet_backnone": ["torch", "os", "logging", "numpy"], "modelscope.models.cv.image_skychange.skychange": ["numbers", "PIL", "pdb", "numpy", "cv2", "json", "torch", "collections", "torchvision", "os"], "modelscope.models.cv.image_skychange.preprocessor": ["numbers", "pdb", "numpy", "cv2", "json", "torch", "torchvision", "typing"], "modelscope.models.cv.image_skychange.skychange_model": ["pdb", "cv2", "torch", "json", "math", "collections", "time", "os", "typing"], "modelscope.models.cv.video_object_segmentation.aggregate": ["torch"], "modelscope.models.cv.video_object_segmentation.inference_memory_bank": ["torch", "math"], "modelscope.models.cv.video_object_segmentation.inference_core": ["torch"], "modelscope.models.cv.video_object_segmentation.model": ["torch", "os", "typing"], "modelscope.models.cv.video_object_segmentation.eval_network": ["torch"], "modelscope.models.cv.video_object_segmentation.mod_resnet": ["torch", "math", "collections"], "modelscope.models.cv.video_object_segmentation.network": ["torch", "math"], "modelscope.models.cv.video_object_segmentation.modules": ["torch", "torchvision"], "modelscope.models.cv.video_object_segmentation.cbam": ["torch"], "modelscope.models.cv.face_reconstruction.models.nv_diffrast": ["nvdiffrast", "numpy", "torch", "warnings", "typing"], "modelscope.models.cv.face_reconstruction.models.renderer": ["torch", "imageio", "skimage", "numpy"], "modelscope.models.cv.face_reconstruction.models.unet": ["torch", "warnings"], "modelscope.models.cv.face_reconstruction.models.bfm": ["torch", "os", "scipy", "numpy"], "modelscope.models.cv.face_reconstruction.models.opt": [], "modelscope.models.cv.face_reconstruction.models.networks": ["torch", "os", "typing", "kornia"], "modelscope.models.cv.face_reconstruction.models.de_retouching_module": ["torch"], "modelscope.models.cv.face_reconstruction.models.losses": ["torch", "numpy", "kornia"], "modelscope.models.cv.face_reconstruction.models.pix2pix.pix2pix_options": [], "modelscope.models.cv.face_reconstruction.models.pix2pix.pix2pix_model": ["torch"], "modelscope.models.cv.face_reconstruction.models.pix2pix.networks": ["torch", "functools"], "modelscope.models.cv.face_reconstruction.models.facelandmark.nets.large_eyeball_net": ["torch"], "modelscope.models.cv.face_reconstruction.models.facelandmark.nets.large_base_lmks_net": ["torch"], "modelscope.models.cv.face_reconstruction.models.facelandmark.large_base_lmks_infer": ["torch", "numpy"], "modelscope.models.cv.face_reconstruction.models.facerecon_model": ["numpy", "cv2", "torch", "collections", "os"], "modelscope.models.cv.face_reconstruction.utils": ["PIL", "numpy", "array", "cv2", "torch", "scipy", "math", "numba", "os", "argparse"], "modelscope.models.cv.facial_expression_recognition.fer.transforms": ["numbers", "PIL", "numpy", "torch", "types"], "modelscope.models.cv.facial_expression_recognition.fer.vgg": ["torch"], "modelscope.models.cv.facial_expression_recognition.fer.facial_expression_recognition": ["PIL", "numpy", "torch", "cv2", "os"], "modelscope.models.cv.face_recognition.align_face": ["cv2", "skimage", "numpy"], "modelscope.models.cv.face_recognition.torchkit.rts_backbone": ["torch", "os", "math", "collections"], "modelscope.models.cv.face_recognition.torchkit.backbone.facemask_backbone": ["torch", "collections"], "modelscope.models.cv.face_recognition.torchkit.backbone.model_irse": ["torch", "collections"], "modelscope.models.cv.face_recognition.torchkit.backbone.model_resnet": ["torch"], "modelscope.models.cv.face_recognition.torchkit.backbone.common": ["torch"], "modelscope.models.cv.face_recognition.torchkit.backbone.arcface_backbone": ["torch"], "modelscope.models.cv.face_generation.stylegan2": ["random", "torch", "functools", "operator", "math"], "modelscope.models.cv.face_generation.op.fused_act": ["torch", "os"], "modelscope.models.cv.face_generation.op.upfirdn2d": ["torch", "os", "collections"], "modelscope.models.cv.face_generation.op.conv2d_gradfix": ["torch", "warnings", "contextlib"], "modelscope.models.cv.shop_segmentation.head_fpn": ["timm", "torch", "mmcv", "numpy"], "modelscope.models.cv.shop_segmentation.models": ["torch", "timm", "math", "collections"], "modelscope.models.cv.shop_segmentation.common": ["torch", "warnings"], "modelscope.models.cv.shop_segmentation.utils": ["torch", "functools", "ftfy", "gzip", "os", "regex", "typing", "html"], "modelscope.models.cv.shop_segmentation.shop_seg_base": ["torch"], "modelscope.models.cv.shop_segmentation.neck_fpn": ["torch", "mmcv", "timm"], "modelscope.models.cv.shop_segmentation.shop_seg_model": ["PIL", "numpy", "torch", "json", "os", "typing"], "modelscope.models.cv.image_instance_segmentation.maskdino.ms_deform_attn": ["warnings", "mmcv", "torch", "__future__", "math"], "modelscope.models.cv.image_instance_segmentation.maskdino.position_encoding": ["torch", "math"], "modelscope.models.cv.image_instance_segmentation.maskdino.dino_decoder": ["torch", "typing"], "modelscope.models.cv.image_instance_segmentation.maskdino.maskdino_encoder": ["torch", "typing", "numpy"], "modelscope.models.cv.image_instance_segmentation.maskdino.utils": ["copy", "torch", "math"], "modelscope.models.cv.image_instance_segmentation.maskdino.maskdino_decoder": ["torch"], "modelscope.models.cv.image_instance_segmentation.maskdino_swin": ["torch", "os"], "modelscope.models.cv.image_instance_segmentation.datasets.transforms": ["os", "numpy"], "modelscope.models.cv.image_instance_segmentation.fastinst.fastinst_encoder": ["torch", "logging", "typing"], "modelscope.models.cv.image_instance_segmentation.fastinst.fastinst_decoder": ["torch", "math"], "modelscope.models.cv.image_instance_segmentation.cascade_mask_rcnn_swin": ["torch", "os", "collections"], "modelscope.models.cv.image_instance_segmentation.fastinst_model": ["torch", "os", "typing"], "modelscope.models.cv.image_instance_segmentation.model": ["torch", "os", "typing"], "modelscope.models.cv.image_instance_segmentation.postprocess_utils": ["itertools", "numpy", "pycocotools", "cv2", "torch"], "modelscope.models.cv.image_instance_segmentation.backbones.resnet": ["torch"], "modelscope.models.cv.image_instance_segmentation.backbones.swin_transformer": ["torch", "timm", "numpy"], "modelscope.models.cv.image_instance_segmentation.maskdino_model": ["torch", "os", "typing"], "modelscope.models.cv.action_detection.modules.resnet": ["torch", "detectron2"], "modelscope.models.cv.action_detection.modules.action_detection_pytorch": ["torch", "fvcore", "logging", "typing", "detectron2"], "modelscope.models.cv.action_detection.action_detection_onnx": ["urllib", "subprocess", "shutil", "numpy", "cv2", "tempfile", "onnxruntime", "os", "uuid"], "modelscope.models.cv.vop_retrieval.backbone": ["urllib", "hashlib", "numpy", "torch", "tqdm", "collections", "warnings", "os", "typing"], "modelscope.models.cv.vop_retrieval.basic_utils": ["PIL", "ujson", "shutil", "random", "numpy", "cv2", "torch", "collections", "torchvision", "os", "pickle", "zipfile"], "modelscope.models.cv.vop_retrieval.model": ["torch", "os"], "modelscope.models.cv.vop_retrieval.tokenization_clip": ["gzip", "torch", "os", "regex", "functools", "ftfy", "html"], "modelscope.models.cv.vop_retrieval.model_se": ["torch", "os"], "modelscope.models.cv.video_instance_segmentation.track.kernel_update_head": ["torch", "mmcv", "mmdet", "numpy"], "modelscope.models.cv.video_instance_segmentation.track.mask_hungarian_assigner": ["torch", "scipy", "mmdet", "numpy"], "modelscope.models.cv.video_instance_segmentation.video_knet": ["torch", "mmdet"], "modelscope.models.cv.video_instance_segmentation.head.kernel_updator": ["torch", "mmcv"], "modelscope.models.cv.video_instance_segmentation.head.kernel_update_head": ["torch", "mmcv", "mmdet", "numpy"], "modelscope.models.cv.video_instance_segmentation.head.kernel_frame_iter_head": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.video_instance_segmentation.head.kernel_head": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.video_instance_segmentation.head.kernel_iter_head": ["torch", "mmdet"], "modelscope.models.cv.video_instance_segmentation.utils": ["torch", "mmdet", "numpy"], "modelscope.models.cv.video_instance_segmentation.neck.msdeformattn_decoder": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.super_resolution.ecb": ["torch"], "modelscope.models.cv.super_resolution.ecbsr_model": ["torch", "os", "typing"], "modelscope.models.cv.super_resolution.rrdbnet_arch": ["torch"], "modelscope.models.cv.super_resolution.arch_util": ["torchvision", "warnings", "itertools", "torch", "math", "collections"], "modelscope.models.cv.ocr_detection.preprocessor": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "modelscope.models.cv.ocr_detection.model": ["torch", "os", "typing", "numpy"], "modelscope.models.cv.ocr_detection.utils": ["cv2", "pyclipper", "shapely", "numpy"], "modelscope.models.cv.ocr_detection.modules.dbnet": ["torch", "math", "os", "sys", "collections"], "modelscope.models.cv.ocr_detection.modules.seg_detector_loss": ["torch", "sys"], "modelscope.models.cv.panorama_depth_estimation.networks.util": ["cv2", "scipy", "numpy"], "modelscope.models.cv.panorama_depth_estimation.networks.mobilenet": ["torch"], "modelscope.models.cv.panorama_depth_estimation.networks.equi": ["torch", "__future__", "collections", "numpy"], "modelscope.models.cv.panorama_depth_estimation.networks.resnet": ["torch"], "modelscope.models.cv.panorama_depth_estimation.networks.unifuse": ["torch", "__future__", "collections", "numpy"], "modelscope.models.cv.panorama_depth_estimation.networks.layers": ["torch", "numpy"], "modelscope.models.cv.panorama_depth_estimation.unifuse_model": ["torch", "os", "torchvision", "numpy"], "modelscope.models.cv.stream_yolo.utils.format": ["math"], "modelscope.models.cv.stream_yolo.utils.boxes": ["torch", "torchvision"], "modelscope.models.cv.stream_yolo.models.tal_head": ["torch"], "modelscope.models.cv.stream_yolo.models.dfp_pafpn": ["torch"], "modelscope.models.cv.stream_yolo.models.streamyolo": ["torch"], "modelscope.models.cv.stream_yolo.models.network_blocks": ["torch"], "modelscope.models.cv.stream_yolo.models.darknet": ["torch"], "modelscope.models.cv.stream_yolo.realtime_video_detector": ["numpy", "cv2", "torch", "logging", "json", "tqdm", "time", "os", "argparse"], "modelscope.models.cv.stream_yolo.exp.build": ["os", "sys"], "modelscope.models.cv.stream_yolo.exp.base_exp": ["torch", "abc"], "modelscope.models.cv.stream_yolo.exp.default.streamyolo": ["torch", "os", "sys"], "modelscope.models.cv.stream_yolo.exp.yolox_base": ["torch", "os", "random"], "modelscope.models.cv.stream_yolo.data.data_augment": ["cv2", "math", "random", "numpy"], "modelscope.models.cv.virual_tryon.sdafnet": ["torch", "random", "numpy"], "modelscope.models.cv.bad_image_detecting.bad_image_detecting": ["numpy", "torch", "torchvision", "os", "typing"], "modelscope.models.cv.human_reconstruction.Reconstruction": ["PIL", "skimage", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.models.cv.human_reconstruction.models.Surface_head": ["torch"], "modelscope.models.cv.human_reconstruction.models.Res_backbone": ["torch", "numpy"], "modelscope.models.cv.human_reconstruction.models.Embedding": ["torch"], "modelscope.models.cv.human_reconstruction.models.PixToMesh": ["torch"], "modelscope.models.cv.human_reconstruction.models.networks": ["torch", "functools", "numpy"], "modelscope.models.cv.human_reconstruction.models.human_segmenter": ["cv2", "tensorflow", "numpy"], "modelscope.models.cv.human_reconstruction.models.geometry": ["torch"], "modelscope.models.cv.human_reconstruction.models.detectors": ["torch", "numpy"], "modelscope.models.cv.human_reconstruction.utils": ["mcubes", "os", "torch", "numpy"], "modelscope.models.cv.image_driving_perception.preprocessor": ["cv2", "torch", "typing", "numpy"], "modelscope.models.cv.image_driving_perception.utils": ["torch", "time", "torchvision", "numpy"], "modelscope.models.cv.image_driving_perception.image_driving_percetion_model": ["numpy", "cv2", "torch", "os", "typing"], "modelscope.models.cv.video_streaming_perception.longshortnet.longshortnet": ["numpy", "cv2", "torch", "logging", "json", "tqdm", "time", "os", "argparse"], "modelscope.models.cv.video_streaming_perception.longshortnet.models.longshort_backbone_neck": ["torch"], "modelscope.models.cv.video_streaming_perception.longshortnet.models.longshort": ["torch"], "modelscope.models.cv.video_streaming_perception.longshortnet.models.dfp_pafpn_short": ["torch", "collections"], "modelscope.models.cv.video_streaming_perception.longshortnet.models.dfp_pafpn_long": ["torch", "collections"], "modelscope.models.cv.video_streaming_perception.longshortnet.exp.longshortnet_base": [], "modelscope.models.cv.image_paintbyexample.model": ["torch", "paint_ldm", "omegaconf", "os", "typing"], "modelscope.models.cv.image_inpainting.refinement": ["numpy", "cv2", "torch", "tqdm", "kornia"], "modelscope.models.cv.image_inpainting.model": ["torch", "os", "typing"], "modelscope.models.cv.image_inpainting.default": ["torch", "bisect"], "modelscope.models.cv.image_inpainting.modules.ade20k.resnet": ["torch", "os", "math"], "modelscope.models.cv.image_inpainting.modules.ade20k.base": ["torch", "os"], "modelscope.models.cv.image_inpainting.modules.adversarial": ["torch", "typing"], "modelscope.models.cv.image_inpainting.modules.perceptual": ["torch", "torchvision"], "modelscope.models.cv.image_inpainting.modules.inception": ["torch", "torchvision"], "modelscope.models.cv.image_inpainting.modules.ffc": ["torch", "numpy", "kornia"], "modelscope.models.cv.image_inpainting.modules.pix2pixhd": ["numpy", "torch", "logging", "functools", "collections"], "modelscope.models.cv.image_inpainting.modules.feature_matching": ["torch", "typing"], "modelscope.models.cv.image_inpainting.base": ["torch", "typing"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.coders.nms_free_coder": ["torch", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.util": ["torch", "mmdet3d", "numpy"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.match_costs.match_cost": ["torch", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.assigners.hungarian_assigner_3d": ["torch", "scipy", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.nuscenes_dataset": ["mmdet3d", "mmdet", "numpy"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.pipelines.loading": ["mmcv", "mmdet", "numpy"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.pipelines.transform_3d": ["PIL", "copy", "mmcv", "numpy", "torch", "mmdet3d", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.dense_heads.petrv2_dednhead": ["mmcv", "numpy", "torch", "math", "copy", "mmdet3d", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.dense_heads.depth_net": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.necks.cp_fpn": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.petr_transformer": ["copy", "warnings", "mmcv", "torch", "mmdet", "typing", "math"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.positional_encoding": ["torch", "mmcv", "math"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.backbones.vovnet": ["torch", "mmdet", "mmcv", "collections"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.detectors.petr3d": ["mmcv", "numpy", "torch", "mmdet3d", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.result_vis": ["numpy", "cv2", "json", "pyquaternion", "os", "mmdet3d", "pickle", "argparse"], "modelscope.models.cv.object_detection_3d.depe.depe_detect": ["torch", "os", "typing", "numpy"], "modelscope.models.cv.image_quality_assessment_mos.image_quality_assessment_mos": ["torch", "os", "typing"], "modelscope.models.cv.image_quality_assessment_mos.heads.simple_head": ["torch"], "modelscope.models.cv.image_quality_assessment_mos.backbones.resnet": ["torch", "os"], "modelscope.models.cv.image_quality_assessment_mos.censeo_ivqa_model": ["torch"], "modelscope.models.cv.image_debanding.rrdb.rrdb_image_debanding": ["torch", "os", "typing"], "modelscope.models.cv.image_restoration.demoire_models.nets": ["torch"], "modelscope.models.cv.image_restoration.image_restoration_model": ["cv2", "torch", "os", "numpy"], "modelscope.models.cv.cartoon.model_tf": ["tensorflow", "typing"], "modelscope.models.cv.cartoon.facelib.facer": ["cv2", "time", "numpy"], "modelscope.models.cv.cartoon.facelib.config": ["os", "easydict", "numpy"], "modelscope.models.cv.cartoon.facelib.LK.lk": ["numpy"], "modelscope.models.cv.cartoon.facelib.face_detector": ["cv2", "tensorflow", "numpy", "time"], "modelscope.models.cv.cartoon.facelib.face_landmark": ["cv2", "tensorflow", "numpy"], "modelscope.models.cv.cartoon.loss": ["tensorflow", "joblib", "skimage", "numpy", "scipy", "os"], "modelscope.models.cv.cartoon.utils": ["tensorflow", "random", "numpy", "cv2", "os"], "modelscope.models.cv.cartoon.mtcnn_pytorch.src.align_trans": ["cv2", "numpy"], "modelscope.models.cv.cartoon.mtcnn_pytorch.src.matlab_cp2tform": ["numpy"], "modelscope.models.cv.cartoon.network": ["tensorflow"], "modelscope.models.cv.vision_efficient_tuning.vision_efficient_tuning": ["torch", "os", "collections"], "modelscope.models.cv.vision_efficient_tuning.timm_vision_transformer": ["itertools", "torch", "logging", "functools", "math", "collections"], "modelscope.models.cv.vision_efficient_tuning.backbone": ["torch", "functools"], "modelscope.models.cv.vision_efficient_tuning.timm_weight_init": ["torch", "warnings", "math"], "modelscope.models.cv.vision_efficient_tuning.timm_helpers": ["torch", "typing", "itertools", "math"], "modelscope.models.cv.vision_efficient_tuning.petl": ["torch", "torchvision", "math", "collections"], "modelscope.models.cv.vision_efficient_tuning.model": ["torch", "typing"], "modelscope.models.cv.vision_efficient_tuning.head": ["torch"], "modelscope.models.cv.movie_scene_segmentation.utils.save_op": ["subprocess", "numpy", "cv2", "os", "tqdm"], "modelscope.models.cv.movie_scene_segmentation.utils.shot_encoder": ["torch", "typing"], "modelscope.models.cv.movie_scene_segmentation.utils.trn": ["torch", "transformers"], "modelscope.models.cv.movie_scene_segmentation.utils.head": ["torch"], "modelscope.models.cv.movie_scene_segmentation.model": ["PIL", "numpy", "torch", "einops", "tqdm", "math", "shotdetect_scenedetect_lgss", "torchvision", "os", "typing"], "modelscope.models.cv.movie_scene_segmentation.get_model": [], "modelscope.models.cv.video_summarization.pgl_sum": ["torch", "math"], "modelscope.models.cv.video_summarization.base_model": ["cv2", "torch", "numpy"], "modelscope.models.cv.video_summarization.summarizer": ["torch", "os", "typing", "numpy"], "modelscope.models.cv.video_summarization.kts.cpd_auto": ["numpy"], "modelscope.models.cv.video_summarization.kts.cpd_nonlin": ["numpy"], "modelscope.models.cv.table_recognition.lineless_table_process": ["cv2", "torch", "shapely", "numpy"], "modelscope.models.cv.table_recognition.model_lore": ["numpy", "torch", "math", "copy", "os", "typing"], "modelscope.models.cv.table_recognition.modules.lore_processor": ["copy", "numpy", "torch", "os", "math"], "modelscope.models.cv.table_recognition.modules.lore_detector": ["copy", "numpy", "torch", "os", "math"], "modelscope.models.cv.image_matching.quadtree_attention_model": ["numpy", "cv2", "torch", "pathlib", "os"], "modelscope.models.cv.image_matching.config.default": ["yacs"], "modelscope.models.cv.image_matching.utils.misc": ["yacs"], "modelscope.models.cv.image_matching.loftr_quadtree.loftr": ["torch", "einops"], "modelscope.models.cv.image_matching.loftr_quadtree.utils.position_encoding": ["torch", "math"], "modelscope.models.cv.image_matching.loftr_quadtree.utils.coarse_matching": ["torch", "einops"], "modelscope.models.cv.image_matching.loftr_quadtree.utils.fine_matching": ["torch", "math", "kornia"], "modelscope.models.cv.image_matching.loftr_quadtree.loftr_module.quadtree_attention": ["torch", "timm"], "modelscope.models.cv.image_matching.loftr_quadtree.loftr_module.fine_preprocess": ["torch", "einops"], "modelscope.models.cv.image_matching.loftr_quadtree.loftr_module.transformer": ["timm", "torch", "einops", "math", "copy"], "modelscope.models.cv.image_matching.loftr_quadtree.loftr_module.linear_attention": ["torch"], "modelscope.models.cv.image_matching.loftr_quadtree.backbone.resnet_fpn": ["torch"], "modelscope.models.cv.tinynas_detection.detector": ["torch", "os", "torchvision", "pickle"], "modelscope.models.cv.tinynas_detection.tinynas_detector": [], "modelscope.models.cv.tinynas_detection.damo.apis.detector_evaluater": ["torch", "os"], "modelscope.models.cv.tinynas_detection.damo.apis.detector_inference": ["torch", "os", "tqdm"], "modelscope.models.cv.tinynas_detection.damo.structures.boxlist_ops": ["torch"], "modelscope.models.cv.tinynas_detection.damo.structures.bounding_box": ["torch"], "modelscope.models.cv.tinynas_detection.damo.structures.image_list": ["torch", "__future__"], "modelscope.models.cv.tinynas_detection.damo.utils.model_utils": ["copy", "time", "torch", "thop", "math"], "modelscope.models.cv.tinynas_detection.damo.utils.boxes": ["torch", "torchvision", "numpy"], "modelscope.models.cv.tinynas_detection.damo.utils.scheduler": ["math"], "modelscope.models.cv.tinynas_detection.damo.augmentations.box_level_augs.box_level_augs": ["random", "numpy"], "modelscope.models.cv.tinynas_detection.damo.augmentations.box_level_augs.gaussian_maps": ["torch", "math"], "modelscope.models.cv.tinynas_detection.damo.augmentations.box_level_augs.color_augs": ["torch", "random"], "modelscope.models.cv.tinynas_detection.damo.augmentations.box_level_augs.geometric_augs": ["torch", "torchvision", "copy", "random"], "modelscope.models.cv.tinynas_detection.damo.augmentations.scale_aware_aug": ["copy"], "modelscope.models.cv.tinynas_detection.damo.detectors.detector": ["torch"], "modelscope.models.cv.tinynas_detection.damo.base_models.losses.distill_loss": ["torch"], "modelscope.models.cv.tinynas_detection.damo.base_models.losses.gfocal_loss": ["torch", "functools"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.ota_assigner": ["torch", "warnings"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.base_ops": ["torch", "math"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.repvgg_block": ["torch", "numpy"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.neck_ops": ["torch", "numpy"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.ops": ["torch", "numpy"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.utils": ["torch", "functools"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.weight_init": ["torch", "numpy"], "modelscope.models.cv.tinynas_detection.damo.base_models.necks.giraffe_config": ["collections", "networkx"], "modelscope.models.cv.tinynas_detection.damo.base_models.necks.giraffe_fpn": ["timm", "numpy", "torch", "functools", "math", "collections", "typing"], "modelscope.models.cv.tinynas_detection.damo.base_models.necks.giraffe_fpn_btn": ["torch"], "modelscope.models.cv.tinynas_detection.damo.base_models.heads.gfocal_v2_tiny": ["torch", "functools", "numpy"], "modelscope.models.cv.tinynas_detection.damo.base_models.heads.zero_head": ["torch"], "modelscope.models.cv.tinynas_detection.damo.base_models.backbones.tinynas_csp": ["torch"], "modelscope.models.cv.tinynas_detection.damo.base_models.backbones.tinynas_res": ["torch"], "modelscope.models.cv.tinynas_detection.damo.base_models.backbones.darknet": ["torch"], "modelscope.models.cv.tinynas_detection.utils": ["shutil", "importlib", "os", "easydict", "tempfile", "sys"], "modelscope.models.cv.tinynas_detection.tinynas_damoyolo": [], "modelscope.models.cv.video_multi_object_tracking.utils.visualization": ["cv2", "numpy"], "modelscope.models.cv.video_multi_object_tracking.utils.utils": ["cv2", "torch", "numpy"], "modelscope.models.cv.video_multi_object_tracking.utils.kalman_filter": ["scipy", "numpy"], "modelscope.models.cv.video_multi_object_tracking.utils.image": ["cv2", "numpy"], "modelscope.models.cv.video_multi_object_tracking.tracker.matching": ["scipy", "lap", "numpy"], "modelscope.models.cv.video_multi_object_tracking.tracker.multitracker": ["torch", "collections", "numpy"], "modelscope.models.cv.video_multi_object_tracking.tracker.basetrack": ["collections", "numpy"], "modelscope.models.cv.video_multi_object_tracking.models.model": ["torch"], "modelscope.models.cv.video_multi_object_tracking.models.common": ["torch"], "modelscope.models.cv.video_multi_object_tracking.models.yolo": ["torch", "copy", "math"], "modelscope.models.cv.video_multi_object_tracking.models.decode": ["torch"], "modelscope.models.cv.nerf_recon_acc.nerf_recon_acc": ["numpy", "cv2", "torch", "tqdm", "time", "os", "glob"], "modelscope.models.cv.nerf_recon_acc.network.nerf": ["torch", "nerfacc", "numpy", "tinycudann"], "modelscope.models.cv.nerf_recon_acc.network.utils": ["mcubes", "numpy", "gc", "torch", "collections", "tinycudann"], "modelscope.models.cv.nerf_recon_acc.network.segmenter": ["tensorflow", "numpy"], "modelscope.models.cv.nerf_recon_acc.nerf_preprocess": ["subprocess", "tensorflow", "numpy", "cv2", "glob", "os", "typing"], "modelscope.models.cv.nerf_recon_acc.dataloader.nerf_dataset": ["PIL", "numpy", "json", "torch", "math", "torchvision", "os"], "modelscope.models.cv.nerf_recon_acc.dataloader.read_write_model": ["struct", "numpy", "os", "collections", "argparse"], "modelscope.models.cv.video_deinterlace.UNet_for_video_deinterlace": ["torch", "os", "copy", "typing"], "modelscope.models.cv.video_deinterlace.deinterlace_arch": ["torch"], "modelscope.models.cv.video_deinterlace.models.deep_fourier_upsampling": ["torch", "numpy"], "modelscope.models.cv.video_deinterlace.models.fre": ["torch"], "modelscope.models.cv.video_deinterlace.models.utils": ["torch"], "modelscope.models.cv.video_deinterlace.models.archs": ["torch", "numpy"], "modelscope.models.cv.video_deinterlace.models.enh": ["torch"], "modelscope.models.cv.cmdssl_video_embedding.resnet3d": ["torch"], "modelscope.models.cv.cmdssl_video_embedding.resnet2p1d": ["torch"], "modelscope.models.cv.cmdssl_video_embedding.c3d": ["torch"], "modelscope.models.cv.image_depth_estimation_bts.depth_estimation_bts_model": ["torch", "os"], "modelscope.models.cv.image_depth_estimation_bts.networks.decoder": ["torch"], "modelscope.models.cv.image_depth_estimation_bts.networks.bts_model": ["torch"], "modelscope.models.cv.image_depth_estimation_bts.networks.encoder": ["torch", "torchvision"], "modelscope.models.cv.image_depth_estimation_bts.networks.utils": ["torch", "math"], "modelscope.models.cv.motion_generation.model": [], "modelscope.models.cv.motion_generation.modules.rotation2xyz": ["torch"], "modelscope.models.cv.motion_generation.modules.respace": ["torch", "numpy"], "modelscope.models.cv.motion_generation.modules.smpl": ["contextlib", "numpy", "torch", "os", "smplx"], "modelscope.models.cv.motion_generation.modules.mdm": ["torch", "numpy", "clip"], "modelscope.models.cv.motion_generation.modules.gaussian_diffusion": ["copy", "numpy", "enum", "torch", "math"], "modelscope.models.cv.motion_generation.modules.cfg_sampler": ["torch", "copy"], "modelscope.models.cv.image_defrcn_fewshot.utils.requirements_check": ["importlib_metadata", "sys", "collections", "packaging", "importlib"], "modelscope.models.cv.image_defrcn_fewshot.utils.voc_register": ["numpy", "os", "fvcore", "xml", "detectron2"], "modelscope.models.cv.image_defrcn_fewshot.utils.configuration_mapper": ["detectron2"], "modelscope.models.cv.image_defrcn_fewshot.utils.model_surgery_op": ["torch", "os", "argparse"], "modelscope.models.cv.image_defrcn_fewshot.utils.coco_register": ["io", "contextlib", "pycocotools", "os", "fvcore", "detectron2"], "modelscope.models.cv.image_defrcn_fewshot.utils.register_data": [], "modelscope.models.cv.image_defrcn_fewshot.models.fast_rcnn": ["torch", "fvcore", "detectron2", "numpy"], "modelscope.models.cv.image_defrcn_fewshot.models.defrcn": ["torch", "os", "typing", "detectron2"], "modelscope.models.cv.image_defrcn_fewshot.models.resnet": ["torch", "torchvision"], "modelscope.models.cv.image_defrcn_fewshot.models.calibration_layer": ["cv2", "torch", "sklearn", "detectron2"], "modelscope.models.cv.image_defrcn_fewshot.models.gdl": ["torch"], "modelscope.models.cv.image_defrcn_fewshot.models.roi_heads": ["torch", "detectron2"], "modelscope.models.cv.image_defrcn_fewshot.defrcn_for_fewshot": ["torch", "os", "typing"], "modelscope.models.cv.image_defrcn_fewshot.evaluation.coco_evaluation": ["contextlib", "itertools", "numpy", "pycocotools", "json", "torch", "logging", "fvcore", "tabulate", "collections", "copy", "io", "os", "detectron2"], "modelscope.models.cv.image_defrcn_fewshot.evaluation.pascal_voc_evaluation": ["detectron2", "numpy", "os", "tempfile", "collections"], "modelscope.models.cv.image_defrcn_fewshot.evaluation.evaluator": ["torch", "logging", "datetime", "time", "detectron2"], "modelscope.models.cv.ocr_recognition.preprocessor": ["PIL", "numpy", "torch", "cv2", "os"], "modelscope.models.cv.ocr_recognition.model": ["torch", "os"], "modelscope.models.cv.ocr_recognition.modules.convnextvit": ["torch"], "modelscope.models.cv.ocr_recognition.modules.crnn": ["torch"], "modelscope.models.cv.ocr_recognition.modules.vitstr": ["torch", "logging", "functools", "copy", "__future__"], "modelscope.models.cv.ocr_recognition.modules.timm_tinyc": ["copy", "itertools", "torch", "logging", "functools", "math", "collections"], "modelscope.models.cv.ocr_recognition.modules.convnext": ["torch"], "modelscope.models.cv.video_panoptic_segmentation.track.quasi_dense_embed_tracker": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.video_panoptic_segmentation.head.mask": ["numpy", "pycocotools", "cv2", "torch", "__future__"], "modelscope.models.cv.video_panoptic_segmentation.head.kernel_updator": ["torch", "mmcv"], "modelscope.models.cv.video_panoptic_segmentation.head.kernel_update_head": ["torch", "mmcv", "mmdet", "numpy"], "modelscope.models.cv.video_panoptic_segmentation.head.semantic_fpn_wrapper": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.video_panoptic_segmentation.head.kernel_head": ["torch", "mmcv"], "modelscope.models.cv.video_panoptic_segmentation.head.kernel_iter_head": ["torch", "mmdet"], "modelscope.models.cv.video_panoptic_segmentation.head.track_heads": ["torch", "mmcv", "numpy"], "modelscope.models.cv.video_panoptic_segmentation.neck.fpn": ["torch", "mmcv"], "modelscope.models.cv.video_panoptic_segmentation.video_k_net": ["torch", "mmcv", "mmdet", "numpy"], "modelscope.models.cv.video_panoptic_segmentation.backbone.swin_checkpoint": ["pkgutil", "torchvision", "importlib", "torch", "os", "collections"], "modelscope.models.cv.video_panoptic_segmentation.backbone.swin_transformer": ["timm", "torch", "mmdet", "numpy"], "modelscope.models.cv.video_panoptic_segmentation.visualizer": ["cv2", "hashlib", "numpy"], "modelscope.models.cv.open_vocabulary_detection_vild.vild": ["tensorflow", "numpy", "torch", "scipy", "clip", "os", "typing"], "modelscope.models.cv.image_reid_person.pass_model": ["torch", "os", "enum"], "modelscope.models.cv.image_reid_person.transreid_model": ["torch", "functools", "itertools", "collections"], "modelscope.models.cv.image_face_fusion.facelib.align_trans": ["cv2", "numpy"], "modelscope.models.cv.image_face_fusion.facelib.matlab_cp2tform": ["numpy"], "modelscope.models.cv.image_face_fusion.network.aad_layer": ["torch"], "modelscope.models.cv.image_face_fusion.network.dense_motion": ["torch"], "modelscope.models.cv.image_face_fusion.network.model_irse": ["torch", "collections"], "modelscope.models.cv.image_face_fusion.network.bfm": ["torch", "os", "scipy", "numpy"], "modelscope.models.cv.image_face_fusion.network.ops": ["torch"], "modelscope.models.cv.image_face_fusion.network.aei_flow_net": ["torch"], "modelscope.models.cv.image_face_fusion.network.facerecon_model": ["torch", "os", "typing"], "modelscope.models.cv.image_face_fusion.image_face_fusion": ["PIL", "numpy", "torch", "cv2", "collections", "torchvision", "os", "typing"], "modelscope.models.cv.image_face_fusion.facegan.gan_wrap": ["PIL", "numpy", "torch", "cv2", "torchvision", "os"], "modelscope.models.cv.image_face_fusion.facegan.op.fused_act": ["torch"], "modelscope.models.cv.image_face_fusion.facegan.op.upfirdn2d": ["torch", "collections"], "modelscope.models.cv.image_face_fusion.facegan.op.conv2d_gradfix": ["torch", "warnings", "contextlib"], "modelscope.models.cv.image_face_fusion.facegan.model": ["torch", "math", "random"], "modelscope.models.cv.product_segmentation.net": ["torch"], "modelscope.models.cv.product_segmentation.seg_infer": ["PIL", "torch", "cv2", "numpy"], "modelscope.models.cv.controllable_image_generation.controlnet": ["PIL", "random", "numpy", "cv2", "torch", "einops", "tempfile", "sys", "math", "control_ldm", "os", "typing"], "modelscope.models.cv.controllable_image_generation.annotator.openpose.body": ["numpy", "cv2", "torch", "scipy", "math", "matplotlib", "time", "torchvision"], "modelscope.models.cv.controllable_image_generation.annotator.openpose.util": ["cv2", "math", "numpy", "matplotlib"], "modelscope.models.cv.controllable_image_generation.annotator.openpose.model": ["torch", "collections"], "modelscope.models.cv.controllable_image_generation.annotator.openpose.hand": ["skimage", "numpy", "cv2", "json", "torch", "scipy", "math", "matplotlib", "time"], "modelscope.models.cv.controllable_image_generation.annotator.annotator": ["mmcv", "mmseg", "numpy", "cv2", "torch", "einops", "os"], "modelscope.models.cv.controllable_image_generation.annotator.midas.api": ["cv2", "torch", "torchvision", "os"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.midas_net_custom": ["torch"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.transforms": ["cv2", "math", "numpy"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.midas_net": ["torch"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.dpt_depth": ["torch"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.base_model": ["torch"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.vit": ["timm", "torch", "types", "math"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.blocks": ["torch"], "modelscope.models.cv.controllable_image_generation.annotator.midas.utils": ["numpy", "cv2", "torch", "re", "sys"], "modelscope.models.cv.controllable_image_generation.annotator.mlsd.utils": ["cv2", "os", "torch", "numpy"], "modelscope.models.cv.controllable_image_generation.annotator.mlsd.mbv2_mlsd_large": ["torch", "os", "sys"], "modelscope.models.cv.video_inpainting.inpainting": ["PIL", "time", "torchvision", "numpy", "cv2", "torch", "os"], "modelscope.models.cv.video_inpainting.inpainting_model": ["torch", "torchvision", "math", "numpy"], "modelscope.models.cv.image_mvs_depth_estimation.casmvs_model": ["numpy", "cv2", "torch", "os", "easydict"], "modelscope.models.cv.image_mvs_depth_estimation.colmap2mvsnet": ["multiprocessing", "struct", "shutil", "numpy", "cv2", "__future__", "functools", "collections", "os"], "modelscope.models.cv.image_mvs_depth_estimation.cas_mvsnet": ["torch"], "modelscope.models.cv.image_mvs_depth_estimation.utils": ["torch", "numpy", "torchvision", "random"], "modelscope.models.cv.image_mvs_depth_estimation.depth_filter": ["PIL", "numpy", "cv2", "plyfile", "os"], "modelscope.models.cv.image_mvs_depth_estimation.module": ["torch"], "modelscope.models.cv.image_mvs_depth_estimation.general_eval_dataset": ["PIL", "numpy", "cv2", "torch", "re", "os", "sys"], "modelscope.models.cv.image_binary_quant_classification.binary_quant_model": ["torch", "os", "collections"], "modelscope.models.cv.image_binary_quant_classification.bnext": ["torch", "numpy"], "modelscope.models.cv.skin_retouching.detection_model.detection_unet_in": ["torch"], "modelscope.models.cv.skin_retouching.detection_model.detection_module": ["torch"], "modelscope.models.cv.skin_retouching.retinaface.net": ["torch", "typing"], "modelscope.models.cv.skin_retouching.retinaface.prior_box": ["torch", "itertools", "math"], "modelscope.models.cv.skin_retouching.retinaface.box_utils": ["torch", "typing", "numpy"], "modelscope.models.cv.skin_retouching.retinaface.utils": ["pathlib", "numpy", "cv2", "torch", "re", "typing"], "modelscope.models.cv.skin_retouching.retinaface.network": ["torch", "torchvision", "typing"], "modelscope.models.cv.skin_retouching.retinaface.predict_single": ["albumentations", "numpy", "torch", "torchvision", "typing"], "modelscope.models.cv.skin_retouching.unet_deploy": ["torch", "warnings"], "modelscope.models.cv.skin_retouching.weights_init": ["torch"], "modelscope.models.cv.skin_retouching.utils": ["time", "numpy", "cv2", "torch", "einops", "typing"], "modelscope.models.cv.skin_retouching.inpainting_model.gconv": ["torch"], "modelscope.models.cv.skin_retouching.inpainting_model.inpainting_unet": ["torch"], "modelscope.models.cv.body_3d_keypoints.hdformer.directed_graph": ["typing", "sys", "numpy"], "modelscope.models.cv.body_3d_keypoints.hdformer.hdformer_detector": ["torch", "os", "typing", "numpy"], "modelscope.models.cv.body_3d_keypoints.hdformer.skeleton": ["numpy"], "modelscope.models.cv.body_3d_keypoints.hdformer.backbone": ["torch"], "modelscope.models.cv.body_3d_keypoints.hdformer.hdformer": ["torch"], "modelscope.models.cv.body_3d_keypoints.hdformer.block": ["torch", "einops", "math"], "modelscope.models.cv.body_3d_keypoints.cannonical_pose.canonical_pose_modules": ["torch"], "modelscope.models.cv.body_3d_keypoints.cannonical_pose.body_3d_pose": ["numpy", "torch", "logging", "os", "typing"], "modelscope.models.cv.action_recognition.models": ["torch"], "modelscope.models.cv.action_recognition.s3dg": ["torch"], "modelscope.models.cv.action_recognition.tada_convnext": ["torch", "math"], "modelscope.models.cv.action_recognition.temporal_patch_shift_transformer": ["timm", "numpy", "torch", "abc", "einops", "functools", "operator", "torchvision"], "modelscope.models.cv.video_frame_interpolation.interp_model.flow_reversal": ["torch"], "modelscope.models.cv.video_frame_interpolation.interp_model.UNet": ["torch"], "modelscope.models.cv.video_frame_interpolation.interp_model.IFNet_swin": ["torch", "timm", "numpy"], "modelscope.models.cv.video_frame_interpolation.interp_model.refinenet_arch": ["torch", "numpy"], "modelscope.models.cv.video_frame_interpolation.interp_model.transformer_layers": ["timm", "torch", "math", "functools", "sys"], "modelscope.models.cv.video_frame_interpolation.utils.utils": ["torch", "scipy", "numpy"], "modelscope.models.cv.video_frame_interpolation.utils.scene_change_detection": ["torch", "numpy"], "modelscope.models.cv.video_frame_interpolation.VFINet_for_video_frame_interpolation": ["torch", "os", "copy", "typing"], "modelscope.models.cv.video_frame_interpolation.VFINet_arch": ["torch"], "modelscope.models.cv.video_frame_interpolation.flow_model.update": ["torch"], "modelscope.models.cv.video_frame_interpolation.flow_model.corr": ["torch"], "modelscope.models.cv.video_frame_interpolation.flow_model.extractor": ["torch"], "modelscope.models.cv.video_frame_interpolation.flow_model.raft": ["torch", "numpy"], "modelscope.models.cv.object_detection.mmdet_ms.dense_heads.rpn_head": ["torch", "copy", "mmcv", "mmdet"], "modelscope.models.cv.object_detection.mmdet_ms.dense_heads.anchor_head": ["mmdet"], "modelscope.models.cv.object_detection.mmdet_ms.necks.fpn": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.object_detection.mmdet_ms.utils.convModule_norm": ["mmcv"], "modelscope.models.cv.object_detection.mmdet_ms.utils.checkpoint": ["mmcv", "torch", "tempfile", "collections", "time", "pkgutil", "io", "warnings", "importlib", "torchvision", "os"], "modelscope.models.cv.object_detection.mmdet_ms.backbones.vit": ["timm", "torch", "functools", "math", "mmdet"], "modelscope.models.cv.object_detection.mmdet_ms.roi_heads.bbox_heads.convfc_bbox_head": ["torch", "mmdet"], "modelscope.models.cv.object_detection.mmdet_ms.roi_heads.mask_heads.fcn_mask_head": ["mmcv", "numpy", "torch", "warnings", "mmdet"], "modelscope.models.cv.object_detection.mmdet_model": ["torch", "os", "numpy"], "modelscope.models.cv.pedestrian_attribute_recognition.model": ["torch", "os", "torchvision", "numpy"], "modelscope.models.cv.pointcloud_sceneflow_estimation.sf_rcp": ["torch"], "modelscope.models.cv.pointcloud_sceneflow_estimation.rcp_model": ["torch", "os", "numpy"], "modelscope.models.cv.pointcloud_sceneflow_estimation.common": ["torch", "math"], "modelscope.models.cv.pointcloud_sceneflow_estimation.pointnet2_utils": ["torch", "typing", "pointnet2_cuda"], "modelscope.models.cv.animal_recognition.splat": ["torch"], "modelscope.models.cv.animal_recognition.resnet": ["torch", "math"], "modelscope.models.cv.video_stabilization.utils.image_utils": ["torch", "skimage"], "modelscope.models.cv.video_stabilization.utils.RAFTUtils": ["torch", "scipy", "numpy"], "modelscope.models.cv.video_stabilization.utils.math_utils": ["torch", "numpy"], "modelscope.models.cv.video_stabilization.utils.ProjectionUtils": ["cv2", "torch", "math", "numpy"], "modelscope.models.cv.video_stabilization.utils.WarpUtils": ["torch", "tqdm", "numpy"], "modelscope.models.cv.video_stabilization.utils.MedianFilter": ["cv2", "torch", "math", "numpy"], "modelscope.models.cv.video_stabilization.utils.IterativeSmooth": ["torch", "os", "math", "numpy"], "modelscope.models.cv.video_stabilization.DUTRAFTStabilizer": ["numpy", "cv2", "torch", "tempfile", "sys", "math", "os", "typing"], "modelscope.models.cv.video_stabilization.DUT.config": ["__future__", "easydict"], "modelscope.models.cv.video_stabilization.DUT.rf_det_so": ["torch"], "modelscope.models.cv.video_stabilization.DUT.Smoother": ["torch", "math", "numpy"], "modelscope.models.cv.video_stabilization.DUT.DUT_raft": ["cv2", "torch", "sys", "numpy"], "modelscope.models.cv.video_stabilization.DUT.MotionPro": ["numpy", "cv2", "torch", "math", "os"], "modelscope.models.cv.video_stabilization.DUT.RAFT.update": ["torch"], "modelscope.models.cv.video_stabilization.DUT.RAFT.corr": ["torch", "alt_cuda_corr"], "modelscope.models.cv.video_stabilization.DUT.RAFT.extractor": ["torch"], "modelscope.models.cv.video_stabilization.DUT.RAFT.raft": ["torch", "numpy"], "modelscope.models.cv.video_stabilization.DUT.rf_det_module": ["torch"], "modelscope.models.cv.video_depth_estimation.dro_model": ["numpy", "cv2", "torch", "tqdm", "os", "glob"], "modelscope.models.cv.video_depth_estimation.utils.misc": ["termcolor"], "modelscope.models.cv.video_depth_estimation.utils.config": ["torch", "datetime", "yacs", "os"], "modelscope.models.cv.video_depth_estimation.utils.horovod": ["horovod"], "modelscope.models.cv.video_depth_estimation.utils.image_gt": ["PIL", "torch", "cv2", "functools"], "modelscope.models.cv.video_depth_estimation.utils.types": ["torch", "yacs", "numpy"], "modelscope.models.cv.video_depth_estimation.utils.depth": ["torch", "torchvision", "numpy", "matplotlib"], "modelscope.models.cv.video_depth_estimation.utils.load": ["torch", "logging", "collections", "inspect", "warnings", "importlib", "os"], "modelscope.models.cv.video_depth_estimation.utils.image": ["PIL", "numpy", "torch", "cv2", "functools", "os"], "modelscope.models.cv.video_depth_estimation.utils.augmentations": ["PIL", "random", "numpy", "cv2", "torchvision"], "modelscope.models.cv.video_depth_estimation.models.model_utils": [], "modelscope.models.cv.video_depth_estimation.models.sfm_model_mf": ["torch", "random"], "modelscope.models.cv.video_depth_estimation.models.model_checkpoint": ["torch", "os", "re", "numpy"], "modelscope.models.cv.video_depth_estimation.models.model_wrapper": ["random", "numpy", "torch", "collections", "importlib"], "modelscope.models.cv.video_depth_estimation.models.sup_model_mf": [], "modelscope.models.cv.video_depth_estimation.networks.layers.resnet.pose_decoder": ["torch", "__future__", "collections"], "modelscope.models.cv.video_depth_estimation.networks.layers.resnet.resnet_encoder": ["torch", "__future__", "torchvision", "numpy"], "modelscope.models.cv.video_depth_estimation.networks.layers.resnet.layers": ["torch", "__future__"], "modelscope.models.cv.video_depth_estimation.networks.layers.resnet.depth_decoder": ["torch", "__future__", "collections", "numpy"], "modelscope.models.cv.video_depth_estimation.networks.optim.update": ["torch"], "modelscope.models.cv.video_depth_estimation.networks.optim.extractor": ["torch", "torchvision"], "modelscope.models.cv.video_depth_estimation.networks.depth_pose.depth_pose_net": ["torch", "functools"], "modelscope.models.cv.video_depth_estimation.configs.default_config": ["os", "yacs"], "modelscope.models.cv.video_depth_estimation.geometry.pose_utils": ["torch", "numpy"], "modelscope.models.cv.video_depth_estimation.geometry.camera_utils": ["torch"], "modelscope.models.cv.video_depth_estimation.geometry.camera": ["torch", "functools"], "modelscope.models.cv.video_depth_estimation.geometry.pose": ["torch"], "modelscope.models.cv.vidt.backbone": ["timm", "numpy", "torch", "os", "math"], "modelscope.models.cv.vidt.model": ["torch", "os"], "modelscope.models.cv.vidt.head": ["copy", "torch", "math"], "modelscope.models.cv.vidt.fpn_fusion": ["torch"], "modelscope.models.cv.vidt.deformable_transformer": ["timm", "copy", "warnings", "torch", "math"], "modelscope.models.cv.face_human_hand_detection.shufflenetv2": ["torch"], "modelscope.models.cv.face_human_hand_detection.one_stage_detector": ["torch"], "modelscope.models.cv.face_human_hand_detection.nanodet_plus_head": ["numpy", "cv2", "torch", "math", "torchvision"], "modelscope.models.cv.face_human_hand_detection.det_infer": ["cv2", "torch", "numpy"], "modelscope.models.cv.face_human_hand_detection.ghost_pan": ["torch", "math"], "modelscope.models.cv.face_human_hand_detection.utils": ["torch"], "modelscope.models.cv.referring_video_object_segmentation.utils.misc": ["torch", "torchvision", "typing", "pickle"], "modelscope.models.cv.referring_video_object_segmentation.utils.mttr": ["torch", "einops"], "modelscope.models.cv.referring_video_object_segmentation.utils.multimodal_transformer": ["transformers", "torch", "einops", "copy", "os", "typing"], "modelscope.models.cv.referring_video_object_segmentation.utils.matcher": ["torch", "scipy"], "modelscope.models.cv.referring_video_object_segmentation.utils.backbone": ["torch", "torchvision", "einops"], "modelscope.models.cv.referring_video_object_segmentation.utils.position_encoding_2d": ["torch", "math"], "modelscope.models.cv.referring_video_object_segmentation.utils.postprocessing": ["torch", "einops", "numpy", "pycocotools"], "modelscope.models.cv.referring_video_object_segmentation.utils.criterion": ["torch"], "modelscope.models.cv.referring_video_object_segmentation.utils.swin_transformer": ["timm", "numpy", "torch", "einops", "functools", "operator"], "modelscope.models.cv.referring_video_object_segmentation.utils.segmentation": ["torch", "typing"], "modelscope.models.cv.referring_video_object_segmentation.model": ["torch", "os", "typing"], "modelscope.models.cv.hand_static.networks": ["torch", "os", "torchvision"], "modelscope.models.cv.hand_static.hand_model": ["PIL", "numpy", "torch", "cv2", "sys", "torchvision", "os"], "modelscope.models.cv.image_depth_estimation.newcrfs_model": ["torch", "os", "numpy"], "modelscope.models.cv.image_depth_estimation.networks.uper_crf_head": ["torch", "mmcv"], "modelscope.models.cv.image_depth_estimation.networks.newcrf_layers": ["torch", "timm", "numpy"], "modelscope.models.cv.image_depth_estimation.networks.newcrf_depth": ["torch"], "modelscope.models.cv.image_depth_estimation.networks.newcrf_utils": ["pkgutil", "warnings", "torchvision", "importlib", "torch", "os", "collections"], "modelscope.models.cv.image_depth_estimation.networks.swin_transformer": ["torch", "timm", "numpy"], "modelscope.models.cv.image_colorization.unet.unet": ["torch", "numpy"], "modelscope.models.cv.image_colorization.unet.utils": ["torch", "functools", "enum"], "modelscope.models.cv.image_colorization.ddcolor.ddcolor_for_image_colorization": ["numpy", "torch", "copy", "os", "typing"], "modelscope.models.cv.image_colorization.ddcolor.ddcolor": ["torch"], "modelscope.models.cv.image_colorization.ddcolor.utils.vgg": ["torch", "os", "torchvision", "collections"], "modelscope.models.cv.image_colorization.ddcolor.utils.unet": ["torch", "collections", "enum"], "modelscope.models.cv.image_colorization.ddcolor.utils.transformer_utils": ["torch", "typing"], "modelscope.models.cv.image_colorization.ddcolor.utils.position_encoding": ["torch", "math"], "modelscope.models.cv.image_colorization.ddcolor.utils.convnext": ["torch", "timm"], "modelscope.models.cv.image_colorization.ddcolor.loss": ["torch"], "modelscope.models.cv.face_detection.retinaface.detection": ["cv2", "torch", "numpy"], "modelscope.models.cv.face_detection.retinaface.models.retinaface": ["torch", "torchvision", "collections"], "modelscope.models.cv.face_detection.retinaface.models.net": ["torch", "time", "torchvision"], "modelscope.models.cv.face_detection.retinaface.utils": ["torch", "itertools", "math", "numpy"], "modelscope.models.cv.face_detection.mtcnn.models.detector": ["PIL", "torch", "os", "numpy"], "modelscope.models.cv.face_detection.mtcnn.models.get_nets": ["torch", "collections", "numpy"], "modelscope.models.cv.face_detection.mtcnn.models.box_utils": ["PIL", "numpy"], "modelscope.models.cv.face_detection.mtcnn.models.first_stage": ["PIL", "torch", "math", "numpy"], "modelscope.models.cv.face_detection.ulfd_slim.detection": ["cv2", "torch", "os", "numpy"], "modelscope.models.cv.face_detection.ulfd_slim.vision.ssd.predictor": ["torch"], "modelscope.models.cv.face_detection.ulfd_slim.vision.ssd.data_preprocessing": [], "modelscope.models.cv.face_detection.ulfd_slim.vision.ssd.ssd": ["torch", "typing", "collections", "numpy"], "modelscope.models.cv.face_detection.ulfd_slim.vision.ssd.mb_tiny_fd": ["torch"], "modelscope.models.cv.face_detection.ulfd_slim.vision.ssd.fd_config": ["numpy"], "modelscope.models.cv.face_detection.ulfd_slim.vision.transforms": ["cv2", "torch", "types", "numpy"], "modelscope.models.cv.face_detection.ulfd_slim.vision.box_utils": ["torch", "math"], "modelscope.models.cv.face_detection.ulfd_slim.vision.mb_tiny": ["torch"], "modelscope.models.cv.face_detection.peppa_pig_face.facer": ["cv2", "numpy"], "modelscope.models.cv.face_detection.peppa_pig_face.LK.lk": ["numpy"], "modelscope.models.cv.face_detection.peppa_pig_face.face_detector": ["cv2", "tensorflow", "numpy"], "modelscope.models.cv.face_detection.peppa_pig_face.face_landmark": ["cv2", "tensorflow", "numpy"], "modelscope.models.cv.face_detection.scrfd.scrfd_detect": ["numpy", "torch", "copy", "os", "typing"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.core.post_processing.bbox_nms": ["torch"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.core.bbox.transforms": ["torch", "numpy"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.retinaface": ["mmdet", "numpy"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.auto_augment": ["copy", "mmcv", "numpy", "cv2", "mmdet"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.transforms": ["mmcv", "mmdet", "numpy"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.loading": ["os", "mmdet", "numpy", "pycocotools"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.formating": ["torch", "mmcv", "mmdet", "numpy"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.dense_heads.scrfd_head": ["torch", "mmcv", "mmdet", "numpy"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones.master_net": ["torch", "mmdet"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones.mobilenet": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones.resnet": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.scrfd": ["torch", "mmdet"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.single_stage": ["torch", "mmdet"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.base": ["mmcv", "numpy", "torch", "abc", "mmdet", "collections"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.tinymog": ["torch", "mmdet"], "modelscope.models.cv.face_detection.scrfd.tinymog_detect": ["torch", "os", "copy", "typing"], "modelscope.models.cv.face_detection.scrfd.preprocessor": ["PIL", "typing", "numpy"], "modelscope.models.cv.face_detection.scrfd.damofd_detect": ["torch", "os", "copy", "typing"], "modelscope.models.cv.face_detection.mogface.models.mogprednet": ["torch", "math"], "modelscope.models.cv.face_detection.mogface.models.resnet": ["torch"], "modelscope.models.cv.face_detection.mogface.models.utils": ["torch", "itertools", "math", "numpy"], "modelscope.models.cv.face_detection.mogface.models.detectors": ["cv2", "torch", "os", "numpy"], "modelscope.models.cv.face_detection.mogface.models.mogface": ["torch"], "modelscope.models.cv.robust_image_classification.easyrobust_model": ["torch", "os"], "modelscope.models.cv.image_semantic_segmentation.ddpm_segmentation_model": ["torch", "os", "typing", "ddpm_guided_diffusion"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.utils.data_process_func": ["mmcv", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.utils.builder": ["mmcv"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.utils.seg_func": ["torch", "warnings"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.backbone.beit_adapter": ["timm", "torch", "logging", "math", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.backbone.adapter_modules": ["timm", "torch", "logging", "functools", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.backbone.base.beit": ["timm", "mmcv", "torch", "mmdet", "functools", "math"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.segmentors.encoder_decoder_mask2former": ["torch", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.segmentors.base_segmentor": ["warnings", "mmcv", "numpy", "torch", "abc", "collections"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.decode_heads.mask2former_head_from_mmseg": ["torch", "copy", "mmcv", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.decode_heads.base_decode_head": ["torch", "abc", "mmcv", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.semantic_seg_model": ["torch", "os", "numpy"], "modelscope.models.cv.image_semantic_segmentation.pan_merge.base_panoptic_fusion_head": ["abc", "mmcv", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.pan_merge.maskformer_semantic_head": ["torch", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.ddpm_seg.data_util": [], "modelscope.models.cv.image_semantic_segmentation.ddpm_seg.utils": ["torch", "numpy", "PIL", "random"], "modelscope.models.cv.image_semantic_segmentation.ddpm_seg.feature_extractors": ["torch", "typing"], "modelscope.models.cv.image_semantic_segmentation.ddpm_seg.pixel_classifier": ["PIL", "numpy", "torch", "collections", "os"], "modelscope.models.cv.video_single_object_tracking.config.ostrack": ["easydict"], "modelscope.models.cv.video_single_object_tracking.utils.utils": ["numpy", "cv2", "torch", "typing", "math"], "modelscope.models.cv.video_single_object_tracking.tracker.procontext": ["torch", "copy"], "modelscope.models.cv.video_single_object_tracking.tracker.ostrack": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.layers.attn_blocks": ["torch", "math", "timm"], "modelscope.models.cv.video_single_object_tracking.models.layers.head": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.layers.patch_embed": ["torch", "timm"], "modelscope.models.cv.video_single_object_tracking.models.layers.attn": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.ostrack.base_backbone": ["torch", "timm"], "modelscope.models.cv.video_single_object_tracking.models.ostrack.ostrack": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.ostrack.utils": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.ostrack.vit_ce": ["timm", "torch", "functools"], "modelscope.models.cv.video_single_object_tracking.models.procontext.procontext": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.procontext.utils": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.procontext.vit_ce": ["timm", "torch", "functools"], "modelscope.models.cv.text_driven_segmentation.lseg_net": ["torch", "numpy"], "modelscope.models.cv.text_driven_segmentation.lseg_blocks": ["torch"], "modelscope.models.cv.text_driven_segmentation.lseg_model": ["PIL", "numpy", "torch", "json", "os", "typing"], "modelscope.models.cv.text_driven_segmentation.model": ["torch", "typing", "collections", "numpy"], "modelscope.models.cv.text_driven_segmentation.lseg_vit": ["timm", "torch", "types", "math"], "modelscope.models.cv.text_driven_segmentation.clip": ["urllib", "hashlib", "PIL", "pkg_resources", "torch", "tqdm", "warnings", "torchvision", "os", "typing"], "modelscope.models.cv.text_driven_segmentation.simple_tokenizer": ["gzip", "os", "regex", "functools", "ftfy", "html"], "modelscope.models.cv.text_driven_segmentation.lseg_base": ["torch"], "modelscope.models.cv.crowd_counting.hrnet_aspp_relu": ["numpy", "torch", "logging", "functools", "os"], "modelscope.models.cv.crowd_counting.cc_model": ["torch", "os", "typing"], "modelscope.models.cv.image_panoptic_segmentation.panseg_model": ["torch", "os"], "modelscope.models.cv.face_emotion.emotion_model": ["torch", "os", "sys"], "modelscope.models.cv.face_emotion.emotion_infer": ["PIL", "torch", "torchvision"], "modelscope.models.cv.face_emotion.face_alignment.face_align": ["PIL", "numpy", "cv2", "sys", "os"], "modelscope.models.cv.face_emotion.face_alignment.face": ["cv2", "os", "numpy", "tensorflow"], "modelscope.models.cv.face_emotion.efficient.model": ["torch"], "modelscope.models.cv.face_emotion.efficient.utils": ["functools", "torch", "re", "math", "collections"], "modelscope.models.cv.video_super_resolution.real_basicvsr_net": ["torch"], "modelscope.models.cv.video_super_resolution.msrresnet_lite_model": ["torch", "os", "functools", "typing"], "modelscope.models.cv.video_super_resolution.common": ["torch"], "modelscope.models.cv.video_super_resolution.real_basicvsr_for_video_super_resolution": ["torch", "os", "typing"], "modelscope.models.cv.video_super_resolution.basicvsr_net": ["torch"], "modelscope.models.cv.face_attribute_recognition.fair_face.face_attribute_recognition": ["PIL", "numpy", "torch", "cv2", "torchvision", "os"], "modelscope.models.cv.image_denoise.nafnet.NAFNet_arch": ["torch", "numpy"], "modelscope.models.cv.image_denoise.nafnet.arch_util": ["torch"], "modelscope.models.cv.image_denoise.nafnet_for_image_denoise": ["torch", "os", "typing"], "modelscope.models.cv.image_classification.mmcls_model": ["os"], "modelscope.models.cv.image_classification.utils": ["itertools", "numpy", "torch", "os", "mmcls", "math", "collections"], "modelscope.models.cv.image_classification.backbones.beit_v2": ["itertools", "mmcv", "torch", "einops", "functools", "mmcls", "math", "collections", "warnings", "os", "typing"], "modelscope.models.cv.image_classification.backbones.nextvit": ["itertools", "mmcv", "torch", "einops", "functools", "mmcls", "math", "collections", "warnings", "os", "typing"], "modelscope.models.cv.image_classification.resnet50_cc": ["torch", "math", "collections", "torchvision", "os"], "modelscope.models.cv.image_color_enhance.csrnet": ["torch", "functools", "math"], "modelscope.models.cv.image_color_enhance.deeplpf.deeplpfnet": ["torch", "math", "matplotlib"], "modelscope.models.cv.image_color_enhance.deeplpf.deeplpf_image_color_enhance": ["torch", "os", "typing"], "modelscope.models.cv.image_color_enhance.image_color_enhance": ["torch", "os", "typing"], "modelscope.models.cv.image_color_enhance.adaint.adaint": ["numbers", "torch", "torchvision", "os", "typing"], "modelscope.models.base.base_torch_head": ["torch", "typing"], "modelscope.models.base.base_model": ["os", "abc", "typing"], "modelscope.models.base.base_torch_model": ["torch", "functools", "copy", "packaging", "os", "typing"], "modelscope.models.base.base_head": ["abc", "typing"], "modelscope.metrics.image_quality_assessment_degradation_metric": ["numpy", "cv2", "torch", "scipy", "tempfile", "sys", "collections", "tqdm", "os", "typing"], "modelscope.metrics.prediction_saving_wrapper": ["typing", "sklearn", "numpy"], "modelscope.metrics.video_stabilization_metric": ["numpy", "cv2", "tqdm", "tempfile", "sys", "os", "typing"], "modelscope.metrics.ppl_metric": ["torch", "typing", "math", "numpy"], "modelscope.metrics.inbatch_recall_metric": ["torch", "typing", "numpy"], "modelscope.metrics.loss_metric": ["typing", "sklearn", "numpy"], "modelscope.metrics.ocr_recognition_metric": ["torch", "edit_distance", "typing", "numpy"], "modelscope.metrics.map_metric": ["typing", "numpy"], "modelscope.metrics.image_colorization_metric": ["numpy", "cv2", "torch", "scipy", "torchvision", "typing"], "modelscope.metrics.sequence_classification_metric": ["typing", "sklearn", "numpy"], "modelscope.metrics.audio_noise_metric": ["typing"], "modelscope.metrics.translation_evaluation_metric": ["pandas", "typing", "importlib"], "modelscope.metrics.video_frame_interpolation_metric": ["numpy", "torch", "lpips", "math", "typing"], "modelscope.metrics.image_inpainting_metric": ["torch", "scipy", "typing", "numpy"], "modelscope.metrics.image_denoise_metric": ["cv2", "torch", "typing", "numpy"], "modelscope.metrics.referring_video_object_segmentation_metric": ["numpy", "pycocotools", "torch", "tqdm", "typing"], "modelscope.metrics.token_classification_metric": ["typing", "numpy", "importlib"], "modelscope.metrics.video_summarization_metric": ["typing", "numpy"], "modelscope.metrics.builder": ["typing"], "modelscope.metrics.image_quality_assessment_mos_metric": ["numpy", "cv2", "torch", "scipy", "tempfile", "sys", "tqdm", "os", "typing"], "modelscope.metrics.ned_metric": ["typing", "numpy"], "modelscope.metrics.text_ranking_metric": ["typing", "numpy"], "modelscope.metrics.movie_scene_segmentation_metric": ["typing", "numpy"], "modelscope.metrics.accuracy_metric": ["typing", "numpy"], "modelscope.metrics.image_instance_segmentation_metric": ["numpy", "pycocotools", "tempfile", "collections", "os", "typing"], "modelscope.metrics.video_super_resolution_metric.metric_util": ["numpy"], "modelscope.metrics.video_super_resolution_metric.video_super_resolution_metric": ["typing", "numpy"], "modelscope.metrics.video_super_resolution_metric.niqe": ["cv2", "scipy", "math", "numpy"], "modelscope.metrics.video_super_resolution_metric.matlab_functions": ["torch", "math", "numpy"], "modelscope.metrics.ciderD.ciderD": ["__future__"], "modelscope.metrics.ciderD.ciderD_scorer": ["pdb", "numpy", "__future__", "math", "collections", "copy", "six", "os"], "modelscope.metrics.action_detection_evaluator": ["numpy", "logging", "scipy", "pandas", "collections", "copy", "os", "detectron2"], "modelscope.metrics.image_color_enhance_metric": ["cv2", "typing", "numpy"], "modelscope.metrics.image_portrait_enhancement_metric": ["cv2", "typing", "numpy"], "modelscope.metrics.bleu_metric": ["typing", "itertools", "sacrebleu"], "modelscope.metrics.text_generation_metric": ["nltk", "rouge", "typing"], "modelscope.metrics.base": ["abc", "typing"], "modelscope.pipelines.util": ["os", "typing"], "modelscope.pipelines.science.protein_structure_pipeline": ["numpy", "json", "torch", "unicore", "time", "os", "typing"], "modelscope.pipelines.builder": ["os", "typing"], "modelscope.pipelines.pipeline_template": ["typing", "numpy"], "modelscope.pipelines.audio.timestamp_pipeline": ["json", "typing", "yaml", "os", "funasr"], "modelscope.pipelines.audio.kws_farfield_pipeline": ["numpy", "wave", "soundfile", "io", "typing"], "modelscope.pipelines.audio.speaker_verification_pipeline": ["os", "typing", "shutil", "yaml"], "modelscope.pipelines.audio.inverse_text_processing_pipeline": ["os", "typing", "shutil", "yaml"], "modelscope.pipelines.audio.separation_pipeline": ["numpy", "torch", "soundfile", "io", "typing"], "modelscope.pipelines.audio.voice_activity_detection_pipeline": ["json", "typing", "yaml", "os", "funasr"], "modelscope.pipelines.audio.text_to_speech_pipeline": ["typing", "numpy"], "modelscope.pipelines.audio.kws_kwsbp_pipeline": ["json", "os", "typing"], "modelscope.pipelines.audio.linear_aec_pipeline": ["numpy", "torch", "scipy", "yaml", "importlib", "os", "typing"], "modelscope.pipelines.audio.ans_pipeline": ["numpy", "torch", "librosa", "soundfile", "io", "typing"], "modelscope.pipelines.audio.speaker_verification_eres2net_pipeline": ["torch", "io", "typing", "soundfile"], "modelscope.pipelines.audio.lm_infer_pipeline": ["os", "typing"], "modelscope.pipelines.audio.ans_dfsmn_pipeline": ["numpy", "torch", "sys", "collections", "librosa", "soundfile", "io", "os", "typing"], "modelscope.pipelines.audio.asr_inference_pipeline": ["json", "os", "typing", "yaml"], "modelscope.pipelines.audio.speaker_diarization_pipeline": ["shutil", "numpy", "json", "yaml", "os", "typing"], "modelscope.pipelines.audio.speaker_verification_rdino_pipeline": ["torch", "io", "typing", "soundfile"], "modelscope.pipelines.audio.punctuation_processing_pipeline": ["os", "typing", "shutil", "yaml"], "modelscope.pipelines.audio.speaker_verification_light_pipeline": ["torch", "io", "typing", "soundfile"], "modelscope.pipelines.audio.speaker_change_locating_pipeline": ["numpy", "torch", "soundfile", "io", "typing"], "modelscope.pipelines.audio.asr_wenet_inference_pipeline": ["typing"], "modelscope.pipelines.multi_modal.asr_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.image_captioning_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.multi_modal.text_to_video_synthesis_pipeline": ["cv2", "torch", "einops", "tempfile", "os", "typing"], "modelscope.pipelines.multi_modal.mgeo_ranking_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.multi_modal.generative_multi_modal_embedding_pipeline": ["typing"], "modelscope.pipelines.multi_modal.multimodal_dialogue_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.text_to_image_synthesis_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.text2sql_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.visual_entailment_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.disco_guided_diffusion_pipeline.disco_guided_diffusion": ["PIL", "gc", "numpy", "cv2", "json", "torch", "math", "clip", "importlib", "torchvision", "os"], "modelscope.pipelines.multi_modal.disco_guided_diffusion_pipeline.utils": ["fractions", "warnings", "numpy", "torch", "math"], "modelscope.pipelines.multi_modal.visual_question_answering_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.video_question_answering_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.video_captioning_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.video_multi_modal_embedding_pipeline": ["typing"], "modelscope.pipelines.multi_modal.efficient_diffusion_tuning_pipeline": ["PIL", "numpy", "torch", "cv2", "torchvision", "typing"], "modelscope.pipelines.multi_modal.team_multi_modal_similarity_pipeline": ["typing"], "modelscope.pipelines.multi_modal.diffusers_wrapped.diffusers_pipeline": ["os", "typing"], "modelscope.pipelines.multi_modal.diffusers_wrapped.stable_diffusion.stable_diffusion_pipeline": ["PIL", "numpy", "torch", "cv2", "diffusers", "typing"], "modelscope.pipelines.multi_modal.diffusers_wrapped.stable_diffusion.chinese_stable_diffusion_pipeline": ["PIL", "transformers", "numpy", "cv2", "torch", "diffusers", "typing"], "modelscope.pipelines.multi_modal.multi_modal_embedding_pipeline": ["typing"], "modelscope.pipelines.multi_modal.ocr_recognition_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.document_vl_embedding_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.image_text_retrieval_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.gridvlp_pipeline": ["PIL", "transformers", "numpy", "json", "torch", "time", "os", "traceback", "typing"], "modelscope.pipelines.multi_modal.visual_grounding_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.soonet_video_temporal_grounding_pipeline": ["numpy", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.multi_modal.sudoku_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.translation_evaluation_pipeline": ["numpy", "enum", "torch", "os", "typing"], "modelscope.pipelines.nlp.glm130b_text_generation_pipeline": ["typing"], "modelscope.pipelines.nlp.faq_question_answering_pipeline": ["typing"], "modelscope.pipelines.nlp.document_grounded_dialog_generate_pipeline": ["typing"], "modelscope.pipelines.nlp.automatic_post_editing_pipeline": ["tensorflow", "sacremoses", "numpy", "jieba", "sentencepiece", "os", "typing", "html"], "modelscope.pipelines.nlp.named_entity_recognition_pipeline": ["typing"], "modelscope.pipelines.nlp.interactive_translation_pipeline": ["tensorflow", "sacremoses", "numpy", "jieba", "subword_nmt", "os", "typing"], "modelscope.pipelines.nlp.summarization_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.document_grounded_dialog_retrieval_pipeline": ["numpy", "json", "faiss", "os", "typing"], "modelscope.pipelines.nlp.fasttext_text_classification_pipeline": ["numpy", "fasttext", "sentencepiece", "os", "typing"], "modelscope.pipelines.nlp.word_alignment_pipeline": ["typing", "numpy"], "modelscope.pipelines.nlp.feature_extraction_pipeline": ["torch", "os", "typing"], "modelscope.pipelines.nlp.text_ranking_pipeline": ["typing", "numpy"], "modelscope.pipelines.nlp.fid_dialogue_pipeline": ["torch", "re", "typing"], "modelscope.pipelines.nlp.text_classification_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.nlp.codegeex_code_generation_pipeline": ["typing"], "modelscope.pipelines.nlp.translation_quality_estimation_pipeline": ["transformers", "torch", "io", "os", "typing"], "modelscope.pipelines.nlp.fill_mask_pipeline": ["typing", "numpy"], "modelscope.pipelines.nlp.distributed_plug_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.conversational_text_to_sql_pipeline": ["torch", "typing", "text2sql_lgesql"], "modelscope.pipelines.nlp.distributed_gpt3_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.information_extraction_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.table_question_answering_pipeline": ["transformers", "json", "torch", "os", "typing"], "modelscope.pipelines.nlp.user_satisfaction_estimation_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.nlp.dialog_modeling_pipeline": ["typing"], "modelscope.pipelines.nlp.canmt_translation_pipeline": ["torch", "os", "sacremoses", "typing"], "modelscope.pipelines.nlp.word_segmentation_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.document_segmentation_pipeline": ["datasets", "numpy", "torch", "re", "typing"], "modelscope.pipelines.nlp.distributed_gpt_moe_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.extractive_summarization_pipeline": ["datasets", "numpy", "torch", "re", "typing"], "modelscope.pipelines.nlp.text_error_correction_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.dialog_state_tracking_pipeline": ["typing"], "modelscope.pipelines.nlp.mglm_text_summarization_pipeline": ["os", "typing"], "modelscope.pipelines.nlp.translation_pipeline": ["tensorflow", "sacremoses", "numpy", "jieba", "subword_nmt", "os", "typing"], "modelscope.pipelines.nlp.siamese_uie_pipeline": ["json", "torch", "logging", "scipy", "tqdm", "math", "copy", "time", "pathlib", "os", "typing"], "modelscope.pipelines.nlp.dialog_intent_prediction_pipeline": ["typing"], "modelscope.pipelines.nlp.sentence_embedding_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.nlp.document_grounded_dialog_rerank_pipeline": ["ujson", "transformers", "random", "numpy", "torch", "re", "sys", "collections", "time", "os", "typing", "pprint"], "modelscope.pipelines.nlp.zero_shot_classification_pipeline": ["torch", "scipy", "typing"], "modelscope.pipelines.nlp.text_generation_pipeline": ["torch", "os", "typing"], "modelscope.pipelines.nlp.language_identification_pipline": ["tensorflow", "numpy", "re", "os", "typing"], "modelscope.pipelines.nlp.token_classification_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.nlp.codegeex_code_translation_pipeline": ["typing"], "modelscope.pipelines.cv.bad_image_detecting_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.cv.image_cartoon_pipeline": ["tensorflow", "numpy", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_to_image_generate_pipeline": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.facial_expression_recognition_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.retina_face_detection_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_style_transfer_pipeline": ["cv2", "os", "typing", "numpy"], "modelscope.pipelines.cv.image_face_fusion_pipeline": ["typing", "numpy"], "modelscope.pipelines.cv.ulfd_face_detection_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.pedestrian_attribute_recognition_pipeline": ["PIL", "numpy", "cv2", "json", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.image_denoise_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.vop_retrieval_se_pipeline": ["numpy", "torch", "gzip", "os", "typing"], "modelscope.pipelines.cv.image_matting_pipeline": ["tensorflow", "numpy", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_deblur_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.video_human_matting_pipeline": ["numpy", "cv2", "torch", "moviepy", "os", "typing"], "modelscope.pipelines.cv.live_category_pipeline": ["PIL", "numpy", "torch", "decord", "torchvision", "os", "typing"], "modelscope.pipelines.cv.image_structured_model_probing_pipeline": ["mmcv", "numpy", "torch", "math", "torchvision", "os", "typing"], "modelscope.pipelines.cv.face_quality_assessment_pipeline": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "modelscope.pipelines.cv.face_processing_base_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_portrait_enhancement_pipeline": ["PIL", "numpy", "cv2", "torch", "scipy", "math", "typing"], "modelscope.pipelines.cv.image_color_enhance_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.vision_efficient_tuning_pipeline": ["torch", "torchvision", "typing", "numpy"], "modelscope.pipelines.cv.tbs_detection_utils.utils": ["PIL", "numpy", "torch", "__future__", "colorsys", "pandas", "matplotlib", "torchvision", "os"], "modelscope.pipelines.cv.video_object_segmentation_pipeline": ["PIL", "numpy", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.face_detection_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.body_3d_keypoints_pipeline": ["numpy", "cv2", "torch", "tempfile", "matplotlib", "datetime", "mpl_toolkits", "os", "typing"], "modelscope.pipelines.cv.image_paintbyexample_pipeline": ["PIL", "numpy", "cv2", "torch", "einops", "torchvision", "typing"], "modelscope.pipelines.cv.face_recognition_ood_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_classification_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.cv.card_detection_pipeline": ["typing"], "modelscope.pipelines.cv.table_recognition_pipeline": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "modelscope.pipelines.cv.image_to_image_translation_pipeline": ["PIL", "numpy", "cv2", "torch", "sys", "io", "torchvision", "os", "typing"], "modelscope.pipelines.cv.face_attribute_recognition_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_debanding_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.video_instance_segmentation_pipeline": ["mmcv", "numpy", "cv2", "torch", "tqdm", "os", "typing"], "modelscope.pipelines.cv.tinynas_classification_pipeline": ["torch", "math", "torchvision", "os", "typing"], "modelscope.pipelines.cv.human_reconstruction_pipeline": ["trimesh", "shutil", "numpy", "torch", "os", "typing"], "modelscope.pipelines.cv.video_multi_object_tracking_pipeline": ["torch", "os", "typing"], "modelscope.pipelines.cv.controllable_image_generation_pipeline": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "glob", "os", "typing"], "modelscope.pipelines.cv.image_defrcn_fewshot_pipeline": ["torch", "os", "typing", "numpy"], "modelscope.pipelines.cv.ddpm_semantic_segmentation_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.content_check_pipeline": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.vop_retrieval_pipeline": ["random", "numpy", "torch", "tqdm", "math", "collections", "gzip", "os", "typing", "pickle"], "modelscope.pipelines.cv.object_detection_3d_pipeline": ["PIL", "numpy", "cv2", "torch", "tempfile", "os", "typing"], "modelscope.pipelines.cv.lineless_table_recognition_pipeline": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "modelscope.pipelines.cv.cmdssl_video_embedding_pipeline": ["PIL", "numpy", "torch", "decord", "torchvision", "os", "typing"], "modelscope.pipelines.cv.tinynas_detection_pipeline": ["typing"], "modelscope.pipelines.cv.video_deinterlace_pipeline": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "torchvision", "os", "typing"], "modelscope.pipelines.cv.image_open_vocabulary_detection_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.language_guided_video_summarization_pipeline": ["PIL", "shutil", "random", "numpy", "cv2", "torch", "tempfile", "clip", "os", "typing"], "modelscope.pipelines.cv.body_2d_keypoints_pipeline": ["PIL", "numpy", "cv2", "json", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.face_human_hand_detection_pipeline": ["typing", "numpy"], "modelscope.pipelines.cv.hicossl_video_embedding_pipeline": ["torch", "os", "typing", "math"], "modelscope.pipelines.cv.face_recognition_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_body_reshaping_pipeline": ["typing"], "modelscope.pipelines.cv.image_inpainting_pipeline": ["PIL", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.face_recognition_onnx_fm_pipeline": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "modelscope.pipelines.cv.image_driving_perception_pipeline": ["cv2", "os", "typing", "numpy"], "modelscope.pipelines.cv.video_stabilization_pipeline": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "glob", "os", "typing"], "modelscope.pipelines.cv.indoor_layout_estimation_pipeline": ["cv2", "typing", "numpy"], "modelscope.pipelines.cv.ddcolor_image_colorization_pipeline": ["numpy", "cv2", "torch", "torchvision", "typing"], "modelscope.pipelines.cv.face_emotion_pipeline": ["typing", "numpy"], "modelscope.pipelines.cv.mtcnn_face_detection_pipeline": ["torch", "os", "typing"], "modelscope.pipelines.cv.nerf_recon_acc_pipeline": ["typing"], "modelscope.pipelines.cv.image_bts_depth_estimation_pipeline": ["albumentations", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.facial_landmark_confidence_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.face_reconstruction_pipeline": ["PIL", "tensorflow", "shutil", "numpy", "cv2", "torch", "scipy", "io", "face_alignment", "os", "typing"], "modelscope.pipelines.cv.mog_face_detection_pipeline": ["os", "typing", "numpy"], "modelscope.pipelines.cv.skin_retouching_pipeline": ["PIL", "tensorflow", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.vision_middleware_pipeline": ["mmcv", "numpy", "torch", "math", "torchvision", "os", "typing"], "modelscope.pipelines.cv.face_liveness_ir_pipeline": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "modelscope.pipelines.cv.image_detection_pipeline": ["typing", "numpy"], "modelscope.pipelines.cv.realtime_video_object_detection_pipeline": ["PIL", "numpy", "cv2", "json", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.video_panoptic_segmentation_pipeline": ["mmcv", "numpy", "cv2", "torch", "tqdm", "os", "typing"], "modelscope.pipelines.cv.action_detection_pipeline": ["os", "typing", "math"], "modelscope.pipelines.cv.product_segmentation_pipeline": ["typing", "numpy"], "modelscope.pipelines.cv.tbs_detection_pipeline": ["PIL", "numpy", "cv2", "torch", "colorsys", "os", "typing"], "modelscope.pipelines.cv.image_matching_pipeline": ["PIL", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.video_category_pipeline": ["PIL", "numpy", "json", "torch", "decord", "torchvision", "os", "typing"], "modelscope.pipelines.cv.hand_static_pipeline": ["typing", "numpy"], "modelscope.pipelines.cv.animal_recognition_pipeline": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.pointcloud_sceneflow_estimation_pipeline": ["torch", "typing", "plyfile", "numpy"], "modelscope.pipelines.cv.image_instance_segmentation_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.video_frame_interpolation_pipeline": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "glob", "torchvision", "os", "typing"], "modelscope.pipelines.cv.image_quality_assessment_mos_pipeline": ["numpy", "cv2", "torch", "tempfile", "math", "torchvision", "typing"], "modelscope.pipelines.cv.video_summarization_pipeline": ["numpy", "cv2", "torch", "tqdm", "os", "typing"], "modelscope.pipelines.cv.panorama_depth_estimation_pipeline": ["PIL", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.fast_instance_segmentation_pipeline": ["torch", "torchvision", "typing", "numpy"], "modelscope.pipelines.cv.vidt_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.image_skychange_pipeline": ["PIL", "pdb", "numpy", "cv2", "time", "typing"], "modelscope.pipelines.cv.image_quality_assessment_man_pipeline": ["numpy", "cv2", "torch", "tempfile", "math", "torchvision", "typing"], "modelscope.pipelines.cv.image_restoration_pipeline": ["typing"], "modelscope.pipelines.cv.video_inpainting_pipeline": ["typing"], "modelscope.pipelines.cv.face_image_generation_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.video_super_resolution_pipeline": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "torchvision", "os", "typing"], "modelscope.pipelines.cv.referring_video_object_segmentation_pipeline": ["PIL", "numpy", "torch", "einops", "tqdm", "tempfile", "moviepy", "torchvision", "typing"], "modelscope.pipelines.cv.virtual_try_on_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.ocr_recognition_pipeline": [], "modelscope.pipelines.cv.ocr_detection_pipeline": ["tensorflow", "tf_slim", "numpy", "cv2", "torch", "math", "os", "typing"], "modelscope.pipelines.cv.movie_scene_segmentation_pipeline": ["torch", "typing"], "modelscope.pipelines.cv.maskdino_instance_segmentation_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.video_colorization_pipeline": ["PIL", "subprocess", "numpy", "cv2", "torch", "tempfile", "torchvision", "os", "typing"], "modelscope.pipelines.cv.image_human_parsing_pipeline": ["torch", "torchvision", "typing", "numpy"], "modelscope.pipelines.cv.face_liveness_xc_pipeline": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "modelscope.pipelines.cv.crowd_counting_pipeline": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "modelscope.pipelines.cv.video_depth_estimation_pipeline": ["typing"], "modelscope.pipelines.cv.image_colorization_pipeline": ["PIL", "numpy", "torch", "cv2", "torchvision", "typing"], "modelscope.pipelines.cv.arc_face_recognition_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_quality_assessment_degradation_pipeline": ["numpy", "cv2", "torch", "tempfile", "math", "torchvision", "typing"], "modelscope.pipelines.cv.ocr_utils.model_convnext_transformer": ["torch"], "modelscope.pipelines.cv.ocr_utils.model_resnet18_half": ["torch", "os"], "modelscope.pipelines.cv.ocr_utils.resnet18_v1": ["tensorflow", "tf_slim"], "modelscope.pipelines.cv.ocr_utils.model_dla34": ["torch", "os", "math", "numpy"], "modelscope.pipelines.cv.ocr_utils.ocr_modules.vitstr": ["torch", "logging", "functools", "copy", "__future__"], "modelscope.pipelines.cv.ocr_utils.ocr_modules.timm_tinyc": ["copy", "itertools", "torch", "logging", "functools", "math", "collections"], "modelscope.pipelines.cv.ocr_utils.ocr_modules.convnext": ["torch"], "modelscope.pipelines.cv.ocr_utils.table_process": ["copy", "numpy", "random", "cv2", "torch", "math"], "modelscope.pipelines.cv.ocr_utils.resnet_utils": ["tensorflow", "collections", "tf_slim"], "modelscope.pipelines.cv.ocr_utils.ops": ["tensorflow", "shutil", "numpy", "cv2", "absl", "sys", "math", "os", "uuid"], "modelscope.pipelines.cv.ocr_utils.utils": ["cv2", "pyclipper", "shapely", "numpy"], "modelscope.pipelines.cv.ocr_utils.model_vlpt": ["torch", "os", "sys", "math"], "modelscope.pipelines.cv.ocr_utils.model_resnet_mutex_v4_linewithchar": ["tensorflow", "tf_slim"], "modelscope.pipelines.cv.image_inpainting_sdv2_pipeline": ["numpy", "cv2", "torch", "tempfile", "sys", "math", "diffusers", "os", "typing"], "modelscope.pipelines.cv.image_super_resolution_pipeline": ["PIL", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.image_salient_detection_pipeline": ["typing"], "modelscope.pipelines.cv.video_single_object_tracking_pipeline": ["cv2", "os", "typing"], "modelscope.pipelines.cv.face_recognition_onnx_ir_pipeline": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "modelscope.pipelines.cv.product_retrieval_embedding_pipeline": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.mask_face_recognition_pipeline": ["PIL", "numpy", "torch", "cv2", "collections", "os", "typing"], "modelscope.pipelines.cv.mobile_image_super_resolution_pipeline": ["skimage", "numpy", "torch", "torchvision", "typing"], "modelscope.pipelines.cv.license_plate_detection_pipeline": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "modelscope.pipelines.cv.image_semantic_segmentation_pipeline": ["PIL", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.text_driven_segmentation_pipleline": ["typing"], "modelscope.pipelines.cv.motion_generation_pipeline": ["numpy", "torch", "tempfile", "os", "typing"], "modelscope.pipelines.cv.image_mvs_depth_estimation_pipeline": ["os", "typing", "tempfile", "shutil"], "modelscope.pipelines.cv.image_depth_estimation_pipeline": ["PIL", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.action_recognition_pipeline": ["torch", "os", "typing", "math"], "modelscope.pipelines.cv.image_reid_person_pipeline": ["PIL", "torch", "math", "torchvision", "os", "typing"], "modelscope.pipelines.cv.general_recognition_pipeline": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.shop_segmentation_pipleline": ["typing"], "modelscope.pipelines.base": ["multiprocessing", "numpy", "random", "torch", "abc", "functools", "packaging", "os", "threading", "typing"], "modelscope.preprocessors.kws": ["os", "typing", "yaml"], "modelscope.preprocessors.multi_modal": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "modelscope.preprocessors.science.uni_fold": ["unittest", "hashlib", "ipdb", "random", "numpy", "torch", "json", "tarfile", "pathlib", "os", "typing", "requests", "logging", "re", "tqdm", "time", "gzip", "pickle"], "modelscope.preprocessors.tts": ["os", "kantts", "typing"], "modelscope.preprocessors.asr": ["os", "typing"], "modelscope.preprocessors.builder": [], "modelscope.preprocessors.movie_scene_segmentation.transforms": ["numbers", "PIL", "random", "numpy", "torch", "torchvision", "os", "typing"], "modelscope.preprocessors.common": ["numpy", "torch", "collections", "time", "typing"], "modelscope.preprocessors.nlp.token_classification_preprocessor": ["torch", "typing", "numpy"], "modelscope.preprocessors.nlp.siamese_uie_preprocessor": ["typing", "transformers"], "modelscope.preprocessors.nlp.relation_extraction_preprocessor": ["typing", "transformers"], "modelscope.preprocessors.nlp.token_classification_viet_preprocessor": ["torch", "typing"], "modelscope.preprocessors.nlp.translation_evaluation_preprocessor": ["torch", "typing", "transformers"], "modelscope.preprocessors.nlp.text_classification_preprocessor": ["typing", "numpy"], "modelscope.preprocessors.nlp.document_grounded_dialog_retrieval_preprocessor": ["torch", "os", "typing", "transformers"], "modelscope.preprocessors.nlp.zero_shot_classification_preprocessor": ["typing"], "modelscope.preprocessors.nlp.canmt_translation": ["sacremoses", "jieba", "torch", "subword_nmt", "os", "typing"], "modelscope.preprocessors.nlp.fill_mask_preprocessor": ["numpy", "torch", "abc", "re", "os", "typing"], "modelscope.preprocessors.nlp.word_alignment_preprocessor": ["itertools", "numpy", "torch", "os", "typing"], "modelscope.preprocessors.nlp.space_T_en.fields.preprocess_dataset": ["text2sql_lgesql"], "modelscope.preprocessors.nlp.space_T_en.fields.parse": [], "modelscope.preprocessors.nlp.space_T_en.fields.common_utils": ["itertools", "numpy", "nltk", "sqlite3", "text2sql_lgesql", "os"], "modelscope.preprocessors.nlp.space_T_en.fields.process_dataset": ["os", "sys", "pickle", "text2sql_lgesql"], "modelscope.preprocessors.nlp.space_T_en.conversational_text_to_sql_preprocessor": ["json", "torch", "text2sql_lgesql", "os", "typing"], "modelscope.preprocessors.nlp.document_grounded_dialog_generate_preprocessor": ["torch", "os", "typing", "transformers"], "modelscope.preprocessors.nlp.text_error_correction": ["torch", "os", "typing", "transformers"], "modelscope.preprocessors.nlp.text_ranking_preprocessor": ["typing", "transformers"], "modelscope.preprocessors.nlp.transformers_tokenizer": ["json", "os", "transformers", "collections"], "modelscope.preprocessors.nlp.bert_seq_cls_tokenizer": ["typing", "transformers"], "modelscope.preprocessors.nlp.text_clean": ["re", "codecs", "sys"], "modelscope.preprocessors.nlp.utils": ["transformers", "numpy", "json", "collections", "os", "typing"], "modelscope.preprocessors.nlp.document_segmentation_preprocessor": ["typing"], "modelscope.preprocessors.nlp.sentence_embedding_preprocessor": ["typing"], "modelscope.preprocessors.nlp.mglm_summarization_preprocessor": ["os", "re", "typing"], "modelscope.preprocessors.nlp.token_classification_thai_preprocessor": ["typing"], "modelscope.preprocessors.nlp.mgeo_ranking_preprocessor": ["torch", "typing", "transformers"], "modelscope.preprocessors.nlp.space.dialog_intent_prediction_preprocessor": ["json", "os", "typing"], "modelscope.preprocessors.nlp.space.lazy_dataset": ["json"], "modelscope.preprocessors.nlp.space.dialog_state_tracking_preprocessor": ["typing"], "modelscope.preprocessors.nlp.space.preprocess": ["os", "glob"], "modelscope.preprocessors.nlp.space.data_loader": ["os", "math", "numpy"], "modelscope.preprocessors.nlp.space.batch": [], "modelscope.preprocessors.nlp.space.dialog_modeling_preprocessor": ["os", "typing"], "modelscope.preprocessors.nlp.space.tokenizer": ["json", "logging", "__future__", "functools", "sys", "collections", "unicodedata", "os", "regex"], "modelscope.preprocessors.nlp.space.dst_processors": ["six", "numpy", "json", "logging", "re", "tqdm"], "modelscope.preprocessors.nlp.space.args": ["json", "argparse"], "modelscope.preprocessors.nlp.space.fields.gen_field": ["itertools", "random", "numpy", "json", "collections", "asyncio", "os"], "modelscope.preprocessors.nlp.space.fields.intent_field": ["multiprocessing", "itertools", "random", "numpy", "json", "re", "tqdm", "collections", "time", "glob", "os"], "modelscope.preprocessors.nlp.space.sampler": ["numpy"], "modelscope.preprocessors.nlp.space.tensorlistdataset": ["torch"], "modelscope.preprocessors.nlp.dialog_classification_use_preprocessor": ["torch", "typing", "transformers"], "modelscope.preprocessors.nlp.text_generation_preprocessor": ["torch", "os", "typing", "numpy"], "modelscope.preprocessors.nlp.space_T_cn.table_question_answering_preprocessor": ["torch", "os", "typing", "transformers"], "modelscope.preprocessors.nlp.space_T_cn.fields.database": ["json", "sqlite3", "tqdm"], "modelscope.preprocessors.nlp.space_T_cn.fields.schema_link": ["re"], "modelscope.preprocessors.nlp.space_T_cn.fields.struct": [], "modelscope.preprocessors.nlp.document_grounded_dialog_rerank_preprocessor": ["transformers", "torch", "copy", "os", "typing"], "modelscope.preprocessors.nlp.feature_extraction_preprocessor": ["typing", "numpy"], "modelscope.preprocessors.nlp.faq_question_answering_preprocessor": ["torch", "typing"], "modelscope.preprocessors.audio": ["numpy", "torch", "scipy", "io", "os", "typing"], "modelscope.preprocessors.cv.image_classification_preprocessor": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.preprocessors.cv.util": ["os", "sys", "shutil", "collections"], "modelscope.preprocessors.cv.timer": ["time"], "modelscope.preprocessors.cv.bad_image_detecting_preprocessor": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "modelscope.preprocessors.cv.mmcls_preprocessor": ["os", "typing", "numpy"], "modelscope.preprocessors.cv.controllable_image_generation": ["PIL", "numpy", "cv2", "torch", "math", "torchvision", "os", "typing"], "modelscope.preprocessors.cv.image_quality_assessment_mos": ["numpy", "cv2", "math", "torchvision", "typing"], "modelscope.preprocessors.cv.image_restoration_preprocessor": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "modelscope.preprocessors.cv.cv2_transforms": ["numbers", "random", "numpy", "cv2", "torch", "math", "collections"], "modelscope.preprocessors.cv.video_super_resolution": ["cv2", "os", "collections"], "modelscope.preprocessors.cv.image_quality_assessment_man": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "modelscope.preprocessors.cv.action_detection_mapper": ["copy", "numpy", "random", "torch", "decord", "scipy", "detectron2"], "modelscope.preprocessors.cv.video_stabilization": ["cv2", "torch", "numpy"], "modelscope.preprocessors.video": ["urllib", "numpy", "random", "torch", "decord", "tempfile", "math", "torchvision", "os", "uuid"], "modelscope.preprocessors.image": ["PIL", "numpy", "cv2", "io", "typing"], "modelscope.preprocessors.base": ["os", "abc", "typing"], "modelscope.preprocessors.ofa.visual_question_answering": ["PIL", "torch", "torchvision", "typing"], "modelscope.preprocessors.ofa.image_classification": ["PIL", "timm", "torch", "functools", "torchvision", "typing"], "modelscope.preprocessors.ofa.utils.transforms": ["PIL", "torchvision", "numpy", "random", "torch"], "modelscope.preprocessors.ofa.utils.bridge_content_encoder": ["sqlite3", "difflib", "rapidfuzz", "functools", "typing"], "modelscope.preprocessors.ofa.utils.collate": ["torch", "typing", "numpy"], "modelscope.preprocessors.ofa.utils.get_tables": ["traceback", "sqlite3", "sys"], "modelscope.preprocessors.ofa.utils.text2phone": [], "modelscope.preprocessors.ofa.utils.audio_helper": ["torch", "typing", "numpy"], "modelscope.preprocessors.ofa.utils.random_help": ["torch", "torch_xla"], "modelscope.preprocessors.ofa.utils.vision_helper": ["cv2", "numpy"], "modelscope.preprocessors.ofa.utils.constant": [], "modelscope.preprocessors.ofa.asr": ["random", "torch", "librosa", "fairseq", "soundfile", "pathlib", "os", "typing"], "modelscope.preprocessors.ofa.text2sql": ["random", "torch", "re", "os", "typing"], "modelscope.preprocessors.ofa.text_classification": ["torch", "typing"], "modelscope.preprocessors.ofa.image_captioning": ["torch", "torchvision", "typing"], "modelscope.preprocessors.ofa.ocr_recognition": ["torch", "unicodedata2", "torchvision", "typing", "zhconv"], "modelscope.preprocessors.ofa.visual_entailment": ["PIL", "torch", "torchvision", "typing"], "modelscope.preprocessors.ofa.visual_grounding": ["PIL", "numpy", "torch", "torchvision", "typing"], "modelscope.preprocessors.ofa.summarization": ["torch", "typing"], "modelscope.preprocessors.ofa.text_to_image_synthesis": ["torch", "typing"], "modelscope.preprocessors.ofa.sudoku": ["torch", "typing", "numpy"], "modelscope.preprocessors.ofa.base": ["PIL", "string", "numpy", "json", "torch", "torchaudio", "re", "io", "os"], "modelscope.trainers.parallel.builder": ["torch"], "modelscope.trainers.parallel.utils": [], "modelscope.trainers.optimizer.builder": ["torch", "inspect", "typing"], "modelscope.trainers.optimizer.child_tuning_adamw_optimizer": ["numpy", "torch", "types", "math", "typing"], "modelscope.trainers.lrscheduler.builder": ["torch", "inspect", "packaging"], "modelscope.trainers.lrscheduler.warmup.warmup": [], "modelscope.trainers.lrscheduler.warmup.base": ["torch"], "modelscope.trainers.nlp_trainer": ["torch", "os", "typing", "numpy"], "modelscope.trainers.utils.inference": ["shutil", "torch", "logging", "tqdm", "collections", "os", "pickle"], "modelscope.trainers.utils.log_buffer": ["collections", "numpy"], "modelscope.trainers.training_args": ["json", "re", "addict", "copy", "dataclasses", "typing"], "modelscope.trainers.builder": [], "modelscope.trainers.audio.kws_nearfield_trainer": ["torch", "re", "tensorboardX", "copy", "datetime", "yaml", "os", "typing"], "modelscope.trainers.audio.kws_utils.model_utils": ["shutil", "numpy", "torch", "re", "glob", "yaml", "os"], "modelscope.trainers.audio.kws_utils.runtime_utils": ["codecs", "shutil", "json", "re", "sys", "collections", "os", "stat"], "modelscope.trainers.audio.kws_utils.det_utils": ["kaldiio", "numpy", "json", "torch", "matplotlib", "os", "glob", "threading"], "modelscope.trainers.audio.kws_utils.batch_utils": ["numpy", "torch", "sys", "math", "collections", "datetime", "os", "typing"], "modelscope.trainers.audio.kws_utils.file_utils": ["re"], "modelscope.trainers.audio.kws_farfield_trainer": ["numpy", "torch", "math", "datetime", "glob", "os", "typing", "pickle"], "modelscope.trainers.audio.separation_trainer": ["numpy", "torch", "torchaudio", "tqdm", "csv", "os", "speechbrain", "typing"], "modelscope.trainers.audio.asr_trainer": ["shutil", "json", "typing", "tempfile", "os", "funasr"], "modelscope.trainers.audio.tts_trainer": ["shutil", "json", "tempfile", "os", "typing", "zipfile"], "modelscope.trainers.audio.ans_trainer": [], "modelscope.trainers.hooks.checkpoint.checkpoint_hook": ["random", "numpy", "torch", "time", "os", "typing"], "modelscope.trainers.hooks.checkpoint.checkpoint_processor": ["os", "re", "shutil"], "modelscope.trainers.hooks.checkpoint.load_checkpoint_hook": ["random", "numpy", "torch", "packaging", "typing"], "modelscope.trainers.hooks.logger.text_logger_hook": ["json", "torch", "collections", "datetime", "os"], "modelscope.trainers.hooks.logger.tensorboard_hook": ["torch", "os", "numpy"], "modelscope.trainers.hooks.logger.base": ["numbers", "torch", "abc", "numpy"], "modelscope.trainers.hooks.optimizer.apex_optimizer_hook": ["torch", "logging", "packaging"], "modelscope.trainers.hooks.optimizer.torch_optimizer_hook": ["logging"], "modelscope.trainers.hooks.optimizer.base": ["torch", "logging"], "modelscope.trainers.hooks.distributed.megatron_hook": ["torch", "os", "shutil", "megatron_util"], "modelscope.trainers.hooks.distributed.deepspeed_hook": ["shutil", "torch", "megatron_util", "deepspeed", "os"], "modelscope.trainers.hooks.distributed.ddp_hook": [], "modelscope.trainers.hooks.lr_scheduler_hook": [], "modelscope.trainers.hooks.early_stop_hook": ["numpy"], "modelscope.trainers.hooks.hook": ["functools"], "modelscope.trainers.hooks.priority": ["typing", "enum"], "modelscope.trainers.hooks.builder": [], "modelscope.trainers.hooks.clip_clamp_logit_scale_hook": ["torch"], "modelscope.trainers.hooks.compression.sparsity_hook": ["os"], "modelscope.trainers.hooks.compression.utils": ["torch"], "modelscope.trainers.hooks.iter_timer_hook": ["time"], "modelscope.trainers.hooks.evaluation_hook": ["typing", "collections"], "modelscope.trainers.multi_modal.clip.clip_trainer": ["torch", "os", "typing", "math"], "modelscope.trainers.multi_modal.clip.clip_trainer_utils": ["torch", "functools", "math", "inspect", "os"], "modelscope.trainers.multi_modal.efficient_diffusion_tuning.efficient_diffusion_tuning_trainer": ["torch", "typing"], "modelscope.trainers.multi_modal.mplug.mplug_trainer": ["torch", "typing", "collections"], "modelscope.trainers.multi_modal.team.team_trainer": ["numpy", "torch", "collections", "sklearn", "os", "typing"], "modelscope.trainers.multi_modal.team.team_trainer_utils": ["torch", "torchvision", "PIL"], "modelscope.trainers.multi_modal.mgeo_ranking_trainer": ["torch", "dataclasses", "typing"], "modelscope.trainers.multi_modal.ofa.ofa_trainer": ["shutil", "json", "torch", "functools", "tempfile", "math", "os", "typing"], "modelscope.trainers.multi_modal.ofa.ofa_trainer_utils": ["transformers", "shutil", "numpy", "torch", "os", "math"], "modelscope.trainers.default_config": ["typing"], "modelscope.trainers.nlp.gpt_moe_trainer": ["torch", "collections", "megatron_util", "os", "typing"], "modelscope.trainers.nlp.plug_trainer": ["torch", "megatron_util", "deepspeed", "os", "typing"], "modelscope.trainers.nlp.text_generation_trainer": ["torch", "collections"], "modelscope.trainers.nlp.document_grounded_dialog_rerank_trainer": ["transformers", "numpy", "random", "torch", "time", "os", "typing"], "modelscope.trainers.nlp.csanmt_translation_trainer": ["os", "tensorflow", "typing", "time"], "modelscope.trainers.nlp.translation_evaluation_trainer": ["transformers", "random", "torch", "tqdm", "math", "pandas", "os", "typing"], "modelscope.trainers.nlp.faq_question_answering_trainer": ["distutils", "contextlib", "numpy", "torch", "functools", "collections", "dataclasses", "typing"], "modelscope.trainers.nlp.table_question_answering_trainer": ["numpy", "json", "torch", "tqdm", "time", "os", "typing"], "modelscope.trainers.nlp.sequence_classification_trainer": ["time", "typing", "numpy"], "modelscope.trainers.nlp.sentence_embedding_trainer": ["transformers", "numpy", "torch", "tqdm", "time", "dataclasses", "typing"], "modelscope.trainers.nlp.gpt3_trainer": ["torch", "os", "copy", "typing"], "modelscope.trainers.nlp.text_ranking_trainer": ["numpy", "torch", "tqdm", "time", "dataclasses", "typing"], "modelscope.trainers.nlp.siamese_uie_trainer": ["random", "numpy", "json", "torch", "collections", "math", "time", "os", "typing"], "modelscope.trainers.nlp.space.metrics.metrics_tracker": ["math", "collections"], "modelscope.trainers.nlp.space.dialog_intent_trainer": ["os", "typing", "numpy"], "modelscope.trainers.nlp.space.eval": ["numpy", "json", "math", "collections", "nltk", "sklearn"], "modelscope.trainers.nlp.space.trainer.intent_trainer": ["transformers", "numpy", "json", "torch", "tqdm", "collections", "time", "os"], "modelscope.trainers.nlp.space.trainer.gen_trainer": ["transformers", "numpy", "json", "torch", "tqdm", "collections", "time", "os"], "modelscope.trainers.nlp.space.dialog_modeling_trainer": ["os", "time", "typing", "numpy"], "modelscope.trainers.nlp.document_grounded_dialog_retrieval_trainer": ["transformers", "numpy", "json", "torch", "tqdm", "faiss", "os"], "modelscope.trainers.nlp.document_grounded_dialog_generate_trainer": ["string", "transformers", "json", "torch", "rouge", "re", "tqdm", "collections", "os", "sacrebleu"], "modelscope.trainers.cli_argument_parser": ["dataclasses", "typing", "argparse"], "modelscope.trainers.cv.ocr_recognition_trainer": ["torch", "time", "collections"], "modelscope.trainers.cv.image_instance_segmentation_trainer": [], "modelscope.trainers.cv.referring_video_object_segmentation_trainer": ["torch", "os"], "modelscope.trainers.cv.vision_efficient_tuning_trainer": ["torch", "typing"], "modelscope.trainers.cv.movie_scene_segmentation_trainer": [], "modelscope.trainers.cv.nerf_recon_acc_trainer": ["random", "numpy", "cv2", "torch", "tqdm", "time", "datetime", "glob", "os", "typing"], "modelscope.trainers.cv.image_detection_damoyolo_trainer": ["torch", "math", "datetime", "time", "os", "easydict", "typing"], "modelscope.trainers.cv.image_classifition_trainer": ["numpy", "torch", "copy", "time", "os", "typing"], "modelscope.trainers.cv.cartoon_translation_trainer": ["tensorflow", "numpy", "tqdm", "packaging", "os", "typing"], "modelscope.trainers.cv.ocr_detection_db_trainer": ["numpy", "torch", "tqdm", "math", "copy", "datetime", "time", "os", "easydict", "typing"], "modelscope.trainers.cv.card_detection_scrfd_trainer": [], "modelscope.trainers.cv.face_detection_scrfd_trainer": ["copy", "time", "typing", "os"], "modelscope.trainers.cv.image_inpainting_trainer": ["torch", "time", "collections"], "modelscope.trainers.cv.image_portrait_enhancement_trainer": ["torch", "collections"], "modelscope.trainers.cv.action_detection_trainer": ["torch", "fvcore", "os", "typing", "detectron2"], "modelscope.trainers.cv.image_defrcn_fewshot_detection_trainer": ["torch", "collections", "os", "typing", "detectron2"], "modelscope.trainers.trainer": ["distutils", "json", "torch", "functools", "collections", "copy", "inspect", "os", "typing"], "modelscope.trainers.base": ["os", "abc", "typing", "time"], "modelscope.msdatasets.ms_dataset": ["datasets", "numpy", "warnings", "os", "typing"], "modelscope.msdatasets.context.dataset_context_config": ["typing"], "modelscope.msdatasets.auth.auth_config": ["http", "typing"], "modelscope.msdatasets.meta.data_meta_config": [], "modelscope.msdatasets.meta.data_meta_manager": ["datasets", "shutil", "json", "collections", "os"], "modelscope.msdatasets.utils.oss_utils": ["multiprocessing", "datasets", "__future__", "oss2", "os"], "modelscope.msdatasets.utils.maxcompute_utils": ["pandas", "math"], "modelscope.msdatasets.utils.dataset_utils": ["os", "typing", "collections"], "modelscope.msdatasets.utils.delete_utils": [], "modelscope.msdatasets.utils.upload_utils": ["os", "tqdm", "multiprocessing"], "modelscope.msdatasets.task_datasets.video_summarization_dataset": [], "modelscope.msdatasets.task_datasets.sidd_image_denoising": [], "modelscope.msdatasets.task_datasets.torch_base_dataset": [], "modelscope.msdatasets.task_datasets.reds_image_deblurring_dataset": [], "modelscope.msdatasets.task_datasets.gopro_image_deblurring_dataset": [], "modelscope.msdatasets.data_files.data_files_manager": ["os", "datasets", "typing"], "modelscope.msdatasets.audio.asr_dataset": [], "modelscope.msdatasets.download.download_config": ["datasets", "typing"], "modelscope.msdatasets.download.download_manager": ["datasets"], "modelscope.msdatasets.download.dataset_builder": ["datasets", "pandas", "pyarrow", "os", "typing"], "modelscope.msdatasets.dataset_cls.dataset": ["copy", "pandas", "datasets", "os"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_quality_assessment_degradation.image_quality_assessment_degradation_dataset": ["torchvision"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_portrait_enhancement.data_utils": ["cv2", "torch"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_portrait_enhancement.image_portrait_enhancement_dataset": ["cv2", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.veco_dataset": ["datasets", "typing", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_instance_segmentation_coco_dataset": ["os", "numpy", "pycocotools"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_recognition_dataset": ["PIL", "numpy", "cv2", "json", "torch", "six", "lmdb", "os"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.image_dataset": ["numpy", "cv2", "torch", "logging", "functools", "math", "bisect", "os", "glob"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.make_border_map": ["cv2", "pyclipper", "shapely", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.normalize_image": ["torch", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.make_icdar_data": ["cv2", "torch", "collections", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.make_seg_detection_data": ["cv2", "pyclipper", "shapely", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.data_process": [], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.augment_data": ["cv2", "imgaug", "math", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.random_crop_data": ["cv2", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.data_loader": ["numpy", "torch", "math", "imgaug", "bisect"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.measures.quad_measurer": ["numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.measures.iou_evaluator": ["numpy", "shapely", "collections"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.augmenter": ["imgaug"], "modelscope.msdatasets.dataset_cls.custom_datasets.bad_image_detecting.bad_image_detecting_dataset": [], "modelscope.msdatasets.dataset_cls.custom_datasets.video_summarization_dataset": ["numpy", "json", "torch", "h5py", "os"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_inpainting.image_inpainting_dataset": ["albumentations", "numpy", "enum", "cv2", "os", "glob"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_inpainting.aug": ["albumentations", "imgaug"], "modelscope.msdatasets.dataset_cls.custom_datasets.language_guided_video_summarization_dataset": ["numpy", "json", "torch", "h5py", "os"], "modelscope.msdatasets.dataset_cls.custom_datasets.builder": [], "modelscope.msdatasets.dataset_cls.custom_datasets.movie_scene_segmentation.movie_scene_segmentation_dataset": ["random", "json", "torch", "copy", "torchvision", "os"], "modelscope.msdatasets.dataset_cls.custom_datasets.movie_scene_segmentation.sampler": ["random", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.text_ranking_dataset": ["torch", "typing", "random"], "modelscope.msdatasets.dataset_cls.custom_datasets.audio.kws_nearfield_dataset": ["torch", "random"], "modelscope.msdatasets.dataset_cls.custom_datasets.audio.kws_farfield_dataset": ["numpy", "torch", "math", "queue", "os", "threading"], "modelscope.msdatasets.dataset_cls.custom_datasets.audio.kws_nearfield_processor": ["kaldiio", "numpy", "random", "json", "torch", "torchaudio"], "modelscope.msdatasets.dataset_cls.custom_datasets.audio.asr_dataset": ["os"], "modelscope.msdatasets.dataset_cls.custom_datasets.sidd_image_denoising.transforms": ["random"], "modelscope.msdatasets.dataset_cls.custom_datasets.sidd_image_denoising.sidd_image_denoising_dataset": ["cv2", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.sidd_image_denoising.data_utils": ["cv2", "torch"], "modelscope.msdatasets.dataset_cls.custom_datasets.reds_image_deblurring_dataset": ["cv2", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.video_frame_interpolation.data_utils": ["cv2", "torch"], "modelscope.msdatasets.dataset_cls.custom_datasets.video_frame_interpolation.video_frame_interpolation_dataset": ["cv2", "torch", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_quality_assmessment_mos.image_quality_assessment_mos_dataset": [], "modelscope.msdatasets.dataset_cls.custom_datasets.mgeo_ranking_dataset": ["json", "torch", "typing", "random"], "modelscope.msdatasets.dataset_cls.custom_datasets.video_stabilization.video_stabilization_dataset": [], "modelscope.msdatasets.dataset_cls.custom_datasets.gopro_image_deblurring_dataset": ["cv2", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.referring_video_object_segmentation.transformers": ["PIL", "torch", "torchvision", "random"], "modelscope.msdatasets.dataset_cls.custom_datasets.referring_video_object_segmentation.referring_video_object_segmentation_dataset": ["numpy", "pycocotools", "json", "torch", "tqdm", "h5py", "glob", "torchvision", "pandas", "os"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_colorization.image_colorization_dataset": ["cv2", "torch", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.build": ["torch", "copy", "bisect", "math"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.datasets.coco": ["cv2", "torch", "torchvision", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.datasets.mosaic_wrapper": ["random", "numpy", "cv2", "torch", "math"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.transforms.build": [], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.transforms.transforms": ["torchvision", "numpy", "random", "cv2", "torch"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.collate_batch": [], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.samplers.grouped_batch_sampler": ["torch", "itertools"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.samplers.distributed": ["torch", "math"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.samplers.iteration_based_batch_sampler": ["torch"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.evaluation.coco.coco_eval": ["torch", "os", "tempfile", "collections"], "modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base": ["os"], "modelscope.msdatasets.dataset_cls.custom_datasets.torch_custom_dataset": ["torch", "typing"], "modelscope.msdatasets.dataset_cls.custom_datasets.video_super_resolution.video_super_resolution_dataset": ["cv2", "torch", "collections", "numpy"], "modelscope.msdatasets.data_loader.data_loader_manager": ["os", "abc", "datasets", "enum"], "modelscope.msdatasets.data_loader.data_loader": ["os", "abc", "datasets", "typing"], "modelscope.exporters.torch_model_exporter": ["itertools", "contextlib", "torch", "os", "typing"], "modelscope.exporters.builder": [], "modelscope.exporters.audio.ans_dfsmn_exporter": ["torch", "os"], "modelscope.exporters.nlp.csanmt_for_translation_exporter": ["os", "typing", "tensorflow"], "modelscope.exporters.nlp.model_for_token_classification_exporter": ["torch", "typing", "collections"], "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter": ["torch", "typing", "collections"], "modelscope.exporters.nlp.sbert_for_zero_shot_classification_exporter": ["typing", "collections"], "modelscope.exporters.cv.object_detection_damoyolo_exporter": ["numpy", "torch", "functools", "onnx", "os", "typing"], "modelscope.exporters.cv.face_detection_scrfd_exporter": ["numpy", "torch", "functools", "onnx", "os", "typing"], "modelscope.exporters.cv.cartoon_translation_exporter": ["os", "tensorflow", "typing", "packaging"], "modelscope.exporters.tf_model_exporter": ["os", "tensorflow", "typing"], "modelscope.exporters.base": ["os", "abc", "typing"]}, "version": "1.6.0", "md5": "5e46ad1c70848d28c7aeafd9db9c3aac", "files_mtime": {"TEMPLATE_PATH/models/science/unifold/config.py": 1666778289.6766584, "TEMPLATE_PATH/models/science/unifold/msa/tools/hmmsearch.py": 1666778289.6888485, "TEMPLATE_PATH/models/science/unifold/msa/tools/hhblits.py": 1666778289.6881094, "TEMPLATE_PATH/models/science/unifold/msa/tools/kalign.py": 1666778289.689356, "TEMPLATE_PATH/models/science/unifold/msa/tools/utils.py": 1666778289.6895845, "TEMPLATE_PATH/models/science/unifold/msa/tools/hmmbuild.py": 1666778289.6885293, "TEMPLATE_PATH/models/science/unifold/msa/tools/jackhmmer.py": 1666778289.6891205, "TEMPLATE_PATH/models/science/unifold/msa/tools/hhsearch.py": 1666778289.6883202, "TEMPLATE_PATH/models/science/unifold/msa/mmcif.py": 1666778289.6854372, "TEMPLATE_PATH/models/science/unifold/msa/msa_identifiers.py": 1666778289.6857276, "TEMPLATE_PATH/models/science/unifold/msa/parsers.py": 1666778289.6860957, "TEMPLATE_PATH/models/science/unifold/msa/templates.py": 1684246001.5188344, "TEMPLATE_PATH/models/science/unifold/msa/utils.py": 1666778289.6898172, "TEMPLATE_PATH/models/science/unifold/msa/pipeline.py": 1669108798.6335008, "TEMPLATE_PATH/models/science/unifold/model.py": 1669108798.6326127, "TEMPLATE_PATH/models/science/unifold/dataset.py": 1669108798.63184, "TEMPLATE_PATH/models/science/unifold/modules/confidence.py": 1666778289.6826582, "TEMPLATE_PATH/models/science/unifold/modules/alphafold.py": 1666778289.6816177, "TEMPLATE_PATH/models/science/unifold/modules/evoformer.py": 1666778289.683275, "TEMPLATE_PATH/models/science/unifold/modules/auxillary_heads.py": 1666778289.682163, "TEMPLATE_PATH/models/science/unifold/modules/attentions.py": 1678345974.7664688, "TEMPLATE_PATH/models/science/unifold/modules/embedders.py": 1666778289.6829705, "TEMPLATE_PATH/models/science/unifold/modules/structure_module.py": 1669108798.6331663, "TEMPLATE_PATH/models/science/unifold/modules/common.py": 1666778289.6823854, "TEMPLATE_PATH/models/science/unifold/modules/frame.py": 1666778289.683827, "TEMPLATE_PATH/models/science/unifold/modules/template.py": 1666778289.6844184, "TEMPLATE_PATH/models/science/unifold/modules/triangle_multiplication.py": 1666778289.6846595, "TEMPLATE_PATH/models/science/unifold/modules/featurization.py": 1666778289.6835535, "TEMPLATE_PATH/models/science/unifold/data/process_multimer.py": 1666778289.6789792, "TEMPLATE_PATH/models/science/unifold/data/protein.py": 1666778289.6792727, "TEMPLATE_PATH/models/science/unifold/data/residue_constants.py": 1669108798.6314445, "TEMPLATE_PATH/models/science/unifold/data/utils.py": 1666778289.6802187, "TEMPLATE_PATH/models/science/unifold/data/process.py": 1666778289.6786027, "TEMPLATE_PATH/models/science/unifold/data/msa_pairing.py": 1684246001.5181975, "TEMPLATE_PATH/models/science/unifold/data/data_ops.py": 1678345974.7659872, "TEMPLATE_PATH/models/builder.py": 1678695526.2830884, "TEMPLATE_PATH/models/audio/ans/layers/activations.py": 1678695526.2749481, "TEMPLATE_PATH/models/audio/ans/layers/layer_base.py": 1678695526.2758405, "TEMPLATE_PATH/models/audio/ans/layers/affine_transform.py": 1678695526.2755635, "TEMPLATE_PATH/models/audio/ans/layers/uni_deep_fsmn.py": 1678695526.2761767, "TEMPLATE_PATH/models/audio/ans/unet.py": 1666757257.138642, "TEMPLATE_PATH/models/audio/ans/conv_stft.py": 1684246001.4631696, "TEMPLATE_PATH/models/audio/ans/denoise_net.py": 1678695526.2738411, "TEMPLATE_PATH/models/audio/ans/complex_nn.py": 1678695526.273521, "TEMPLATE_PATH/models/audio/ans/se_module_complex.py": 1666757257.1383736, "TEMPLATE_PATH/models/audio/ans/frcrn.py": 1678695526.2743342, "TEMPLATE_PATH/models/audio/sv/DTDNN_layers.py": 1683889954.4686143, "TEMPLATE_PATH/models/audio/sv/ecapa_tdnn.py": 1678345974.1721325, "TEMPLATE_PATH/models/audio/sv/ERes2Net.py": 1684247769.663397, "TEMPLATE_PATH/models/audio/sv/pooling_layers.py": 1684247769.6642458, "TEMPLATE_PATH/models/audio/sv/DTDNN.py": 1684246001.4648209, "TEMPLATE_PATH/models/audio/sv/fusion.py": 1684247769.6637704, "TEMPLATE_PATH/models/audio/sv/generic_speaker_verification.py": 1678695526.2780309, "TEMPLATE_PATH/models/audio/sv/speaker_change_locator.py": 1684246001.4659781, "TEMPLATE_PATH/models/audio/sv/rdino.py": 1684246001.4655278, "TEMPLATE_PATH/models/audio/itn/generic_inverse_text_processing.py": 1678345974.1680963, "TEMPLATE_PATH/models/audio/aec/layers/activations.py": 1666757257.1350431, "TEMPLATE_PATH/models/audio/aec/layers/layer_base.py": 1666757257.1358142, "TEMPLATE_PATH/models/audio/aec/layers/deep_fsmn.py": 1666757257.1355417, "TEMPLATE_PATH/models/audio/aec/layers/affine_transform.py": 1666757257.1352675, "TEMPLATE_PATH/models/audio/aec/layers/uni_deep_fsmn.py": 1666757257.1360576, "TEMPLATE_PATH/models/audio/aec/network/se_net.py": 1666757257.1370454, "TEMPLATE_PATH/models/audio/aec/network/loss.py": 1666757257.1365721, "TEMPLATE_PATH/models/audio/aec/network/modulation_loss.py": 1666757257.136794, "TEMPLATE_PATH/models/audio/asr/wenet_automatic_speech_recognition.py": 1678345974.1674347, "TEMPLATE_PATH/models/audio/asr/generic_automatic_speech_recognition.py": 1684246001.463599, "TEMPLATE_PATH/models/audio/punc/generic_punctuation.py": 1678345974.1698205, "TEMPLATE_PATH/models/audio/tts/voice.py": 1684246001.466414, "TEMPLATE_PATH/models/audio/tts/sambert_hifi.py": 1678695526.2786689, "TEMPLATE_PATH/models/audio/separation/mossformer.py": 1678345974.1705601, "TEMPLATE_PATH/models/audio/separation/mossformer_conv_module.py": 1678345974.1713047, "TEMPLATE_PATH/models/audio/separation/mossformer_block.py": 1678345974.1711044, "TEMPLATE_PATH/models/audio/separation/layer_norm.py": 1678345974.1702788, "TEMPLATE_PATH/models/audio/kws/farfield/fsmn.py": 1666757257.1401393, "TEMPLATE_PATH/models/audio/kws/farfield/fsmn_sele_v2.py": 1683889954.4661622, "TEMPLATE_PATH/models/audio/kws/farfield/fsmn_sele_v3.py": 1684246001.4639575, "TEMPLATE_PATH/models/audio/kws/farfield/model_def.py": 1666757257.140835, "TEMPLATE_PATH/models/audio/kws/farfield/model.py": 1684246001.4643233, "TEMPLATE_PATH/models/audio/kws/generic_key_word_spotting.py": 1666757257.1410184, "TEMPLATE_PATH/models/audio/kws/nearfield/fsmn.py": 1683889954.4674246, "TEMPLATE_PATH/models/audio/kws/nearfield/model.py": 1683889954.4677804, "TEMPLATE_PATH/models/audio/kws/nearfield/cmvn.py": 1678345974.1689863, "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py": 1678345974.6520555, "TEMPLATE_PATH/models/multi_modal/clip/configuration_bert.py": 1666757257.302656, "TEMPLATE_PATH/models/multi_modal/clip/bert_tokenizer.py": 1669108798.597482, "TEMPLATE_PATH/models/multi_modal/clip/model.py": 1678345974.6126437, "TEMPLATE_PATH/models/multi_modal/clip/modeling_bert.py": 1678345974.6139398, "TEMPLATE_PATH/models/multi_modal/mplug_for_all_tasks.py": 1678345974.6332867, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/decoder.py": 1666757257.3277674, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/prior.py": 1666757257.3294334, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/upsampler.py": 1666757257.3308744, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/model.py": 1678345974.6361222, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/tokenizer.py": 1678695526.5035207, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/xglm.py": 1678695526.5041978, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/gaussian_diffusion.py": 1678695526.502133, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/clip.py": 1678695526.5006785, "TEMPLATE_PATH/models/multi_modal/diffusion/structbert.py": 1678345974.617392, "TEMPLATE_PATH/models/multi_modal/diffusion/diffusion.py": 1681714768.8946908, "TEMPLATE_PATH/models/multi_modal/diffusion/unet_generator.py": 1678695526.4923015, "TEMPLATE_PATH/models/multi_modal/diffusion/model.py": 1678345974.6162271, "TEMPLATE_PATH/models/multi_modal/diffusion/tokenizer.py": 1678345974.6183596, "TEMPLATE_PATH/models/multi_modal/diffusion/unet_upsampler_256.py": 1678695526.494478, "TEMPLATE_PATH/models/multi_modal/diffusion/unet_upsampler_1024.py": 1678695526.4934785, "TEMPLATE_PATH/models/multi_modal/efficient_diffusion_tuning/efficient_stable_diffusion.py": 1683889954.5002546, "TEMPLATE_PATH/models/multi_modal/gemm/gemm_base.py": 1669108798.5997014, "TEMPLATE_PATH/models/multi_modal/gemm/gemm_model.py": 1666757257.312342, "TEMPLATE_PATH/models/multi_modal/gemm/tokenizer.py": 1666757257.3128963, "TEMPLATE_PATH/models/multi_modal/mmr/dataloaders/rawvideo_util.py": 1666757257.3151526, "TEMPLATE_PATH/models/multi_modal/mmr/models/module_clip.py": 1666757257.3185143, "TEMPLATE_PATH/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py": 1684246001.5114832, "TEMPLATE_PATH/models/multi_modal/mmr/models/module_cross.py": 1666757257.319208, "TEMPLATE_PATH/models/multi_modal/mmr/models/until_module.py": 1666757257.3205154, "TEMPLATE_PATH/models/multi_modal/mmr/models/tokenization_clip.py": 1666757257.3197618, "TEMPLATE_PATH/models/multi_modal/mmr/models/modeling.py": 1666757257.3177187, "TEMPLATE_PATH/models/multi_modal/mmr/models/dynamic_inverted_softmax.py": 1666757257.3171651, "TEMPLATE_PATH/models/multi_modal/mplug/predictor.py": 1666757257.3251338, "TEMPLATE_PATH/models/multi_modal/mplug/clip/clip.py": 1666757257.322875, "TEMPLATE_PATH/models/multi_modal/mplug/modeling_mplug.py": 1678345974.631151, "TEMPLATE_PATH/models/multi_modal/mplug/mvit.py": 1678345974.632255, "TEMPLATE_PATH/models/multi_modal/mplug/configuration_mplug.py": 1678345974.629229, "TEMPLATE_PATH/models/multi_modal/team/team_model.py": 1666757257.344364, "TEMPLATE_PATH/models/multi_modal/team/utils.py": 1666757257.3448434, "TEMPLATE_PATH/models/multi_modal/guided_diffusion/respace.py": 1681714768.8972325, "TEMPLATE_PATH/models/multi_modal/guided_diffusion/unet.py": 1681714768.8985677, "TEMPLATE_PATH/models/multi_modal/guided_diffusion/gaussian_diffusion.py": 1681714768.896804, "TEMPLATE_PATH/models/multi_modal/guided_diffusion/script.py": 1681714768.8976767, "TEMPLATE_PATH/models/multi_modal/vldoc/tokenization.py": 1678345974.6881084, "TEMPLATE_PATH/models/multi_modal/vldoc/model.py": 1678345974.6853945, "TEMPLATE_PATH/models/multi_modal/vldoc/conv_fpn_trans.py": 1678345974.6839283, "TEMPLATE_PATH/models/multi_modal/vldoc/transformer_local.py": 1678345974.6888812, "TEMPLATE_PATH/models/multi_modal/vldoc/modeling_layout_roberta.py": 1678345974.6863377, "TEMPLATE_PATH/models/multi_modal/vldoc/processing.py": 1678345974.6873274, "TEMPLATE_PATH/models/multi_modal/vldoc/convnext.py": 1678345974.6846218, "TEMPLATE_PATH/models/multi_modal/soonet/model.py": 1681714768.901114, "TEMPLATE_PATH/models/multi_modal/soonet/tokenizer.py": 1681714768.9022171, "TEMPLATE_PATH/models/multi_modal/soonet/utils.py": 1681714768.9025855, "TEMPLATE_PATH/models/multi_modal/soonet/blocks.py": 1681714768.9001248, "TEMPLATE_PATH/models/multi_modal/soonet/swin_transformer.py": 1681714768.90172, "TEMPLATE_PATH/models/multi_modal/soonet/clip.py": 1681714768.90064, "TEMPLATE_PATH/models/multi_modal/mgeo/text_ranking.py": 1678345974.626834, "TEMPLATE_PATH/models/multi_modal/mgeo/backbone.py": 1678345974.6254547, "TEMPLATE_PATH/models/multi_modal/mgeo/text_classification.py": 1678345974.6262727, "TEMPLATE_PATH/models/multi_modal/mgeo/token_classification.py": 1678345974.6274276, "TEMPLATE_PATH/models/multi_modal/mplug_owl/configuration_mplug_owl.py": 1684246001.513214, "TEMPLATE_PATH/models/multi_modal/mplug_owl/modeling_mplug_owl.py": 1684246001.5142066, "TEMPLATE_PATH/models/multi_modal/ofa_for_text_to_image_synthesis_model.py": 1678345974.6531072, "TEMPLATE_PATH/models/multi_modal/video_synthesis/diffusion.py": 1681714768.9037023, "TEMPLATE_PATH/models/multi_modal/video_synthesis/text_to_video_synthesis_model.py": 1678695526.5113559, "TEMPLATE_PATH/models/multi_modal/video_synthesis/autoencoder.py": 1678695526.510036, "TEMPLATE_PATH/models/multi_modal/video_synthesis/unet_sd.py": 1678695526.5119526, "TEMPLATE_PATH/models/multi_modal/clip_interrogator/model.py": 1684246001.5105355, "TEMPLATE_PATH/models/multi_modal/rleg/model.py": 1678695526.5053334, "TEMPLATE_PATH/models/multi_modal/rleg/rleg.py": 1678695526.5057476, "TEMPLATE_PATH/models/multi_modal/dpm_solver_pytorch.py": 1678695526.4955242, "TEMPLATE_PATH/models/multi_modal/ofa/modeling_ofa.py": 1678345974.6454003, "TEMPLATE_PATH/models/multi_modal/ofa/utils/utils.py": 1678345974.6500447, "TEMPLATE_PATH/models/multi_modal/ofa/utils/constant.py": 1678345974.649251, "TEMPLATE_PATH/models/multi_modal/ofa/vit.py": 1678345974.6508958, "TEMPLATE_PATH/models/multi_modal/ofa/modeling_mmspeech.py": 1678345974.6442, "TEMPLATE_PATH/models/multi_modal/ofa/resnet.py": 1678345974.6463652, "TEMPLATE_PATH/models/multi_modal/ofa/tokenization_ofa.py": 1678345974.6473439, "TEMPLATE_PATH/models/multi_modal/ofa/generate/multihead_attention.py": 1666757257.335432, "TEMPLATE_PATH/models/multi_modal/ofa/generate/ngram_repeat_block.py": 1666757257.335963, "TEMPLATE_PATH/models/multi_modal/ofa/generate/sequence_generator.py": 1678345974.642128, "TEMPLATE_PATH/models/multi_modal/ofa/generate/incremental_decoding_utils.py": 1666757257.3349085, "TEMPLATE_PATH/models/multi_modal/ofa/generate/utils.py": 1678345974.6431253, "TEMPLATE_PATH/models/multi_modal/ofa/generate/search.py": 1678345974.6410236, "TEMPLATE_PATH/models/multi_modal/ofa/generate/token_generation_constraints.py": 1666757257.3377285, "TEMPLATE_PATH/models/multi_modal/ofa/tokenization_ofa_fast.py": 1678345974.6482744, "TEMPLATE_PATH/models/multi_modal/ofa/configuration_mmspeech.py": 1678345974.6392608, "TEMPLATE_PATH/models/multi_modal/ofa/configuration_ofa.py": 1678345974.640075, "TEMPLATE_PATH/models/nlp/unite/configuration.py": 1684246001.5170493, "TEMPLATE_PATH/models/nlp/unite/translation_evaluation.py": 1684246001.5173905, "TEMPLATE_PATH/models/nlp/palm_v2/configuration.py": 1678345974.7403622, "TEMPLATE_PATH/models/nlp/palm_v2/dureader_eval.py": 1666757257.3743646, "TEMPLATE_PATH/models/nlp/palm_v2/text_generation.py": 1681714768.9220717, "TEMPLATE_PATH/models/nlp/structbert/configuration.py": 1678345974.7552435, "TEMPLATE_PATH/models/nlp/structbert/fill_mask.py": 1678345974.7563565, "TEMPLATE_PATH/models/nlp/structbert/backbone.py": 1678345974.7548847, "TEMPLATE_PATH/models/nlp/structbert/faq_question_answering.py": 1678345974.7559564, "TEMPLATE_PATH/models/nlp/structbert/adv_utils.py": 1678695526.531147, "TEMPLATE_PATH/models/nlp/structbert/text_classification.py": 1678345974.7566974, "TEMPLATE_PATH/models/nlp/structbert/token_classification.py": 1678345974.7570403, "TEMPLATE_PATH/models/nlp/hf_transformers/backbone.py": 1678695526.5259144, "TEMPLATE_PATH/models/nlp/task_models/fill_mask.py": 1678345974.7579868, "TEMPLATE_PATH/models/nlp/task_models/text_ranking.py": 1678345974.7599752, "TEMPLATE_PATH/models/nlp/task_models/feature_extraction.py": 1678345974.7576537, "TEMPLATE_PATH/models/nlp/task_models/text_classification.py": 1678345974.7593715, "TEMPLATE_PATH/models/nlp/task_models/task_model.py": 1683889954.520566, "TEMPLATE_PATH/models/nlp/task_models/text_generation.py": 1683889954.521497, "TEMPLATE_PATH/models/nlp/task_models/information_extraction.py": 1678345974.758443, "TEMPLATE_PATH/models/nlp/task_models/token_classification.py": 1678345974.7602658, "TEMPLATE_PATH/models/nlp/veco/configuration.py": 1678345974.76297, "TEMPLATE_PATH/models/nlp/veco/fill_mask.py": 1678345974.7633657, "TEMPLATE_PATH/models/nlp/veco/backbone.py": 1678345974.762673, "TEMPLATE_PATH/models/nlp/veco/text_classification.py": 1678345974.7637107, "TEMPLATE_PATH/models/nlp/veco/token_classification.py": 1678345974.7641091, "TEMPLATE_PATH/models/nlp/glm_130b/initialize.py": 1683889954.5107641, "TEMPLATE_PATH/models/nlp/glm_130b/quantization/functional.py": 1683889954.512782, "TEMPLATE_PATH/models/nlp/glm_130b/quantization/layers.py": 1683889954.5129745, "TEMPLATE_PATH/models/nlp/glm_130b/text_generation.py": 1683889954.5132122, "TEMPLATE_PATH/models/nlp/glm_130b/generation/strategies.py": 1683889954.5105143, "TEMPLATE_PATH/models/nlp/mglm/tasks/superglue/pvp.py": 1678345974.739084, "TEMPLATE_PATH/models/nlp/mglm/tasks/superglue/dataset.py": 1669108798.6253061, "TEMPLATE_PATH/models/nlp/mglm/tasks/superglue/evaluate.py": 1669108798.6255116, "TEMPLATE_PATH/models/nlp/mglm/tasks/superglue/finetune.py": 1669108798.6256893, "TEMPLATE_PATH/models/nlp/mglm/tasks/data_utils.py": 1678345974.737032, "TEMPLATE_PATH/models/nlp/mglm/tasks/seq2seq/dataset.py": 1669108798.6240597, "TEMPLATE_PATH/models/nlp/mglm/tasks/seq2seq/evaluate.py": 1678345974.738028, "TEMPLATE_PATH/models/nlp/mglm/tasks/seq2seq/finetune.py": 1678345974.7383432, "TEMPLATE_PATH/models/nlp/mglm/tasks/language_model/detokenizer.py": 1669108798.6234415, "TEMPLATE_PATH/models/nlp/mglm/tasks/language_model/dataset.py": 1669108798.6232784, "TEMPLATE_PATH/models/nlp/mglm/tasks/language_model/finetune.py": 1678345974.7376661, "TEMPLATE_PATH/models/nlp/mglm/tasks/eval_utils.py": 1678345974.7373278, "TEMPLATE_PATH/models/nlp/mglm/blocklm_utils.py": 1684246001.5148673, "TEMPLATE_PATH/models/nlp/mglm/train_utils.py": 1678345974.7394702, "TEMPLATE_PATH/models/nlp/mglm/test/test_block.py": 1684246001.515939, "TEMPLATE_PATH/models/nlp/mglm/test/test_rel_shift.py": 1684246001.5163302, "TEMPLATE_PATH/models/nlp/mglm/arguments.py": 1669108798.609255, "TEMPLATE_PATH/models/nlp/mglm/data_utils/tokenization_gpt2.py": 1669108798.6142075, "TEMPLATE_PATH/models/nlp/mglm/data_utils/lazy_loader.py": 1678345974.7336698, "TEMPLATE_PATH/models/nlp/mglm/data_utils/wordpiece.py": 1678345974.7343767, "TEMPLATE_PATH/models/nlp/mglm/data_utils/datasets.py": 1684246001.515576, "TEMPLATE_PATH/models/nlp/mglm/data_utils/tokenization.py": 1669108798.613975, "TEMPLATE_PATH/models/nlp/mglm/data_utils/extraction.py": 1678345974.733312, "TEMPLATE_PATH/models/nlp/mglm/data_utils/file_utils.py": 1669108798.6124434, "TEMPLATE_PATH/models/nlp/mglm/data_utils/sp_tokenizer.py": 1669108798.6136456, "TEMPLATE_PATH/models/nlp/mglm/data_utils/corpora.py": 1669108798.6114604, "TEMPLATE_PATH/models/nlp/mglm/data_utils/samplers.py": 1669108798.6129339, "TEMPLATE_PATH/models/nlp/mglm/mglm_for_text_summarization.py": 1678345974.7347617, "TEMPLATE_PATH/models/nlp/mglm/process_grid.py": 1669108798.6219385, "TEMPLATE_PATH/models/nlp/mglm/generation_utils.py": 1669108798.6156476, "TEMPLATE_PATH/models/nlp/mglm/utils.py": 1678345974.739819, "TEMPLATE_PATH/models/nlp/mglm/configure_data.py": 1678345974.7326682, "TEMPLATE_PATH/models/nlp/mglm/model/distributed.py": 1678345974.735162, "TEMPLATE_PATH/models/nlp/mglm/model/transformer.py": 1678345974.7363741, "TEMPLATE_PATH/models/nlp/mglm/model/modeling_bert.py": 1678345974.735637, "TEMPLATE_PATH/models/nlp/mglm/model/prompt.py": 1669108798.617736, "TEMPLATE_PATH/models/nlp/mglm/model/modeling_glm.py": 1678345974.7359483, "TEMPLATE_PATH/models/nlp/mglm/model/downstream.py": 1669108798.6170213, "TEMPLATE_PATH/models/nlp/mglm/run_test.py": 1669108798.6222408, "TEMPLATE_PATH/models/nlp/plug_mental/configuration.py": 1678345974.7478015, "TEMPLATE_PATH/models/nlp/plug_mental/backbone.py": 1678345974.7475746, "TEMPLATE_PATH/models/nlp/plug_mental/adv_utils.py": 1678345974.7465599, "TEMPLATE_PATH/models/nlp/plug_mental/text_classification.py": 1678345974.747977, "TEMPLATE_PATH/models/nlp/gpt_moe/configuration.py": 1678345974.7217705, "TEMPLATE_PATH/models/nlp/gpt_moe/backbone.py": 1678345974.7208388, "TEMPLATE_PATH/models/nlp/gpt_moe/tokenizer.py": 1678345974.725533, "TEMPLATE_PATH/models/nlp/gpt_moe/distributed_gpt_moe.py": 1678695526.523395, "TEMPLATE_PATH/models/nlp/gpt_moe/text_generation.py": 1678345974.7252653, "TEMPLATE_PATH/models/nlp/gpt_moe/moe/sharded_moe.py": 1678345974.7245455, "TEMPLATE_PATH/models/nlp/gpt_moe/moe/utils.py": 1678345974.7249217, "TEMPLATE_PATH/models/nlp/gpt_moe/moe/layer.py": 1678345974.7238333, "TEMPLATE_PATH/models/nlp/gpt_moe/moe/experts.py": 1678345974.7235267, "TEMPLATE_PATH/models/nlp/gpt_moe/moe/mappings.py": 1678345974.7241268, "TEMPLATE_PATH/models/nlp/gpt_moe/checkpointing.py": 1678695526.5199594, "TEMPLATE_PATH/models/nlp/csanmt/translation.py": 1678345974.710362, "TEMPLATE_PATH/models/nlp/T5/text2text_generation.py": 1678345974.6919267, "TEMPLATE_PATH/models/nlp/T5/configuration.py": 1678345974.6909628, "TEMPLATE_PATH/models/nlp/T5/backbone.py": 1683889954.5021315, "TEMPLATE_PATH/models/nlp/heads/text_classification_head.py": 1678345974.727904, "TEMPLATE_PATH/models/nlp/heads/infromation_extraction_head.py": 1678345974.7273557, "TEMPLATE_PATH/models/nlp/heads/token_classification_head.py": 1678345974.728869, "TEMPLATE_PATH/models/nlp/heads/text_generation_head.py": 1678345974.7283216, "TEMPLATE_PATH/models/nlp/heads/crf_head.py": 1678695526.5250702, "TEMPLATE_PATH/models/nlp/heads/torch_pretrain_head.py": 1666757257.3713884, "TEMPLATE_PATH/models/nlp/heads/fill_mask_head.py": 1683889954.5144427, "TEMPLATE_PATH/models/nlp/heads/text_ranking_head.py": 1678345974.7285597, "TEMPLATE_PATH/models/nlp/bloom/backbone.py": 1669108798.6061795, "TEMPLATE_PATH/models/nlp/xlm_roberta/configuration.py": 1678345974.7653904, "TEMPLATE_PATH/models/nlp/xlm_roberta/backbone.py": 1678345974.7651584, "TEMPLATE_PATH/models/nlp/peer/configuration.py": 1678695526.529261, "TEMPLATE_PATH/models/nlp/peer/sas_utils.py": 1678695526.5296216, "TEMPLATE_PATH/models/nlp/peer/backbone.py": 1678695526.5284507, "TEMPLATE_PATH/models/nlp/peer/text_classification.py": 1678695526.5302649, "TEMPLATE_PATH/models/nlp/fid_T5/text_generation.py": 1683889954.5068686, "TEMPLATE_PATH/models/nlp/space_T_en/text_to_sql.py": 1666757257.3954694, "TEMPLATE_PATH/models/nlp/canmt/sequence_generator.py": 1683889954.5052524, "TEMPLATE_PATH/models/nlp/canmt/canmt_translation.py": 1683889954.5044076, "TEMPLATE_PATH/models/nlp/canmt/canmt_model.py": 1683889954.5040576, "TEMPLATE_PATH/models/nlp/bart/text_error_correction.py": 1678345974.693962, "TEMPLATE_PATH/models/nlp/use/transformer.py": 1678345974.7618728, "TEMPLATE_PATH/models/nlp/use/user_satisfaction_estimation.py": 1678345974.7620804, "TEMPLATE_PATH/models/nlp/gpt_neo/backbone.py": 1666757257.3668969, "TEMPLATE_PATH/models/nlp/bert/configuration.py": 1678345974.6969304, "TEMPLATE_PATH/models/nlp/bert/siamese_uie.py": 1678695526.5135634, "TEMPLATE_PATH/models/nlp/bert/fill_mask.py": 1678345974.6990001, "TEMPLATE_PATH/models/nlp/bert/word_alignment.py": 1678695526.5139036, "TEMPLATE_PATH/models/nlp/bert/text_ranking.py": 1678345974.703262, "TEMPLATE_PATH/models/nlp/bert/backbone.py": 1678345974.6959348, "TEMPLATE_PATH/models/nlp/bert/text_classification.py": 1678345974.7023563, "TEMPLATE_PATH/models/nlp/bert/sentence_embedding.py": 1678345974.7002544, "TEMPLATE_PATH/models/nlp/bert/document_segmentation.py": 1678345974.6980228, "TEMPLATE_PATH/models/nlp/bert/token_classification.py": 1678345974.7041605, "TEMPLATE_PATH/models/nlp/dgds/backbone.py": 1683889954.5060863, "TEMPLATE_PATH/models/nlp/dgds/document_grounded_dialog_rerank.py": 1678345974.7150524, "TEMPLATE_PATH/models/nlp/dgds/document_grounded_dialog_generate.py": 1678345974.71487, "TEMPLATE_PATH/models/nlp/dgds/document_grounded_dialog_retrieval.py": 1678345974.715238, "TEMPLATE_PATH/models/nlp/gpt3/configuration.py": 1678695526.5156965, "TEMPLATE_PATH/models/nlp/gpt3/backbone.py": 1681714768.9173203, "TEMPLATE_PATH/models/nlp/gpt3/tokenizer.py": 1678695526.5179377, "TEMPLATE_PATH/models/nlp/gpt3/distributed_gpt3.py": 1683889954.51408, "TEMPLATE_PATH/models/nlp/gpt3/text_generation.py": 1681714768.9190643, "TEMPLATE_PATH/models/nlp/deberta_v2/configuration.py": 1678345974.7123609, "TEMPLATE_PATH/models/nlp/deberta_v2/fill_mask.py": 1678345974.7131743, "TEMPLATE_PATH/models/nlp/deberta_v2/backbone.py": 1678345974.7115374, "TEMPLATE_PATH/models/nlp/deberta_v2/tokenization.py": 1666757257.361269, "TEMPLATE_PATH/models/nlp/deberta_v2/tokenization_fast.py": 1678345974.7137625, "TEMPLATE_PATH/models/nlp/codegeex/codegeex_for_code_translation.py": 1678345974.707734, "TEMPLATE_PATH/models/nlp/codegeex/tokenizer.py": 1678345974.7089796, "TEMPLATE_PATH/models/nlp/codegeex/codegeex_for_code_generation.py": 1678345974.7071722, "TEMPLATE_PATH/models/nlp/codegeex/inference.py": 1678345974.7083764, "TEMPLATE_PATH/models/nlp/codegeex/codegeex.py": 1678345974.706545, "TEMPLATE_PATH/models/nlp/space/configuration.py": 1678345974.7504852, "TEMPLATE_PATH/models/nlp/space/dialog_modeling.py": 1678345974.7508473, "TEMPLATE_PATH/models/nlp/space/dialog_state_tracking.py": 1666757257.3844292, "TEMPLATE_PATH/models/nlp/space/model/intent_unified_transformer.py": 1666757257.386785, "TEMPLATE_PATH/models/nlp/space/model/tokenization_space.py": 1678345974.7516365, "TEMPLATE_PATH/models/nlp/space/model/unified_transformer.py": 1678345974.7521238, "TEMPLATE_PATH/models/nlp/space/model/model_base.py": 1678345974.7511904, "TEMPLATE_PATH/models/nlp/space/model/generator.py": 1666757257.3862689, "TEMPLATE_PATH/models/nlp/space/model/gen_unified_transformer.py": 1666757257.3857656, "TEMPLATE_PATH/models/nlp/space/dialog_intent_prediction.py": 1666757257.3833244, "TEMPLATE_PATH/models/nlp/space/modules/transformer_block.py": 1666757257.391351, "TEMPLATE_PATH/models/nlp/space/modules/functions.py": 1666757257.3904216, "TEMPLATE_PATH/models/nlp/space/modules/multihead_attention.py": 1666757257.3908985, "TEMPLATE_PATH/models/nlp/space/modules/feedforward.py": 1666757257.3899465, "TEMPLATE_PATH/models/nlp/space/modules/embedder.py": 1666757257.3894768, "TEMPLATE_PATH/models/nlp/fid_plug/configuration.py": 1683889954.50833, "TEMPLATE_PATH/models/nlp/fid_plug/backbone.py": 1683889954.507869, "TEMPLATE_PATH/models/nlp/fid_plug/text_generation.py": 1683889954.5088115, "TEMPLATE_PATH/models/nlp/gpt2/backbone.py": 1678345974.7169101, "TEMPLATE_PATH/models/nlp/plug/distributed_plug.py": 1678345974.7456992, "TEMPLATE_PATH/models/nlp/plug/configuration.py": 1678345974.7445607, "TEMPLATE_PATH/models/nlp/plug/backbone.py": 1678345974.7441673, "TEMPLATE_PATH/models/nlp/plug/AnnealingLR.py": 1678345974.7434573, "TEMPLATE_PATH/models/nlp/plug/generator.py": 1678345974.7459483, "TEMPLATE_PATH/models/nlp/megatron_bert/configuration.py": 1678345974.7317162, "TEMPLATE_PATH/models/nlp/megatron_bert/fill_mask.py": 1678345974.7319267, "TEMPLATE_PATH/models/nlp/megatron_bert/backbone.py": 1678345974.731479, "TEMPLATE_PATH/models/nlp/space_T_cn/configuration.py": 1666757257.3935158, "TEMPLATE_PATH/models/nlp/space_T_cn/backbone.py": 1678345974.752695, "TEMPLATE_PATH/models/nlp/space_T_cn/table_question_answering.py": 1678345974.7536259, "TEMPLATE_PATH/models/nlp/ponet/configuration.py": 1678345974.7491364, "TEMPLATE_PATH/models/nlp/ponet/fill_mask.py": 1678345974.7497096, "TEMPLATE_PATH/models/nlp/ponet/backbone.py": 1678345974.7488022, "TEMPLATE_PATH/models/nlp/ponet/tokenization.py": 1678345974.7501063, "TEMPLATE_PATH/models/nlp/ponet/document_segmentation.py": 1678345974.749312, "TEMPLATE_PATH/models/nlp/llama/configuration.py": 1683889954.5161562, "TEMPLATE_PATH/models/nlp/llama/convert_llama_weights_to_hf.py": 1683889954.5163944, "TEMPLATE_PATH/models/nlp/llama/backbone.py": 1683889954.5156515, "TEMPLATE_PATH/models/nlp/llama/tokenization.py": 1683889954.517054, "TEMPLATE_PATH/models/nlp/llama/tokenization_fast.py": 1683889954.5174031, "TEMPLATE_PATH/models/nlp/llama/text_generation.py": 1683889954.5166035, "TEMPLATE_PATH/models/nlp/lstm/backbone.py": 1678345974.7302816, "TEMPLATE_PATH/models/nlp/lstm/token_classification.py": 1678345974.7304647, "TEMPLATE_PATH/models/cv/image_deblur/nafnet_for_image_deblur.py": 1678345974.289103, "TEMPLATE_PATH/models/cv/vision_middleware/backbone.py": 1678345974.6052146, "TEMPLATE_PATH/models/cv/vision_middleware/model.py": 1678345974.6064956, "TEMPLATE_PATH/models/cv/vision_middleware/head.py": 1678345974.605873, "TEMPLATE_PATH/models/cv/vision_middleware/vim.py": 1678345974.607082, "TEMPLATE_PATH/models/cv/image_quality_assessment_man/swin.py": 1678695526.3478003, "TEMPLATE_PATH/models/cv/image_quality_assessment_man/maniqa.py": 1678695526.3473833, "TEMPLATE_PATH/models/cv/image_quality_assessment_man/image_quality_assessment_man.py": 1678695526.3470078, "TEMPLATE_PATH/models/cv/product_retrieval_embedding/item_detection.py": 1666757257.2308764, "TEMPLATE_PATH/models/cv/product_retrieval_embedding/item_model.py": 1666757257.231389, "TEMPLATE_PATH/models/cv/product_retrieval_embedding/item_embedding.py": 1666757257.2311432, "TEMPLATE_PATH/models/cv/body_2d_keypoints/w48.py": 1666757257.1529067, "TEMPLATE_PATH/models/cv/body_2d_keypoints/hrnet_v2.py": 1684246001.4672918, "TEMPLATE_PATH/models/cv/body_2d_keypoints/hrnet_basic_modules.py": 1666757257.1524448, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/panovit.py": 1678345974.3350315, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/modality/layout.py": 1678345974.3345408, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/misc/panostretch.py": 1678345974.3337135, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/misc/fourier.py": 1678345974.3334966, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/misc/post_proc.py": 1678345974.3339539, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/panovit.py": 1678345974.3347096, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/utils.py": 1678345974.3348787, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/backbone/vit_horizon_pry_image.py": 1678345974.3330128, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/backbone/resnet_DA.py": 1678345974.332792, "TEMPLATE_PATH/models/cv/salient_detection/salient_model.py": 1678345974.3975854, "TEMPLATE_PATH/models/cv/salient_detection/models/senet.py": 1678345974.3712454, "TEMPLATE_PATH/models/cv/salient_detection/models/utils.py": 1678695526.4316845, "TEMPLATE_PATH/models/cv/salient_detection/models/modules.py": 1678345974.3710551, "TEMPLATE_PATH/models/cv/salient_detection/models/u2net.py": 1666757257.241171, "TEMPLATE_PATH/models/cv/salient_detection/models/backbone/Res2Net_v1b.py": 1678695526.4297223, "TEMPLATE_PATH/models/cv/image_quality_assessment_degradation/degradation_model.py": 1678345974.321454, "TEMPLATE_PATH/models/cv/image_quality_assessment_degradation/image_quality_assessment_degradation.py": 1678345974.3216996, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/losses/model_irse.py": 1666757257.2036955, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/losses/losses.py": 1666757257.203465, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/losses/helpers.py": 1666757257.203164, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/retinaface/detection.py": 1673508904.826248, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/retinaface/models/retinaface.py": 1666757257.2049234, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/retinaface/models/net.py": 1666757257.2047052, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/retinaface/utils.py": 1666757257.2051783, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/gpen.py": 1666757257.2019858, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/image_portrait_enhancement.py": 1678345974.3197925, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/align_faces.py": 1666757257.2006574, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/eqface/fqa.py": 1666757257.201287, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/eqface/model_resnet.py": 1666757257.2015626, "TEMPLATE_PATH/models/cv/abnormal_object_detection/mmdet_ms/roi_head/mask_scoring_roi_head.py": 1678695526.2852845, "TEMPLATE_PATH/models/cv/abnormal_object_detection/mmdet_ms/roi_head/roi_extractors/single_level_roi_extractor.py": 1678695526.2864377, "TEMPLATE_PATH/models/cv/abnormal_object_detection/mmdet_model.py": 1678345974.1796575, "TEMPLATE_PATH/models/cv/image_probing_model/backbone.py": 1678345974.3205512, "TEMPLATE_PATH/models/cv/image_probing_model/model.py": 1678345974.320754, "TEMPLATE_PATH/models/cv/image_probing_model/utils.py": 1678345974.320998, "TEMPLATE_PATH/models/cv/tinynas_classfication/super_res_kxkx.py": 1666757257.268841, "TEMPLATE_PATH/models/cv/tinynas_classfication/super_res_k1kxk1.py": 1666757257.2682607, "TEMPLATE_PATH/models/cv/tinynas_classfication/model_zoo.py": 1666757257.265972, "TEMPLATE_PATH/models/cv/tinynas_classfication/super_blocks.py": 1666757257.267099, "TEMPLATE_PATH/models/cv/tinynas_classfication/basic_blocks.py": 1666757257.2643123, "TEMPLATE_PATH/models/cv/tinynas_classfication/master_net.py": 1666757257.2654593, "TEMPLATE_PATH/models/cv/tinynas_classfication/plain_net_utils.py": 1669108798.596394, "TEMPLATE_PATH/models/cv/tinynas_classfication/super_res_idwexkx.py": 1666757257.267718, "TEMPLATE_PATH/models/cv/tinynas_classfication/global_utils.py": 1666757257.264953, "TEMPLATE_PATH/models/cv/image_to_image_translation/model_translation.py": 1666757257.2173638, "TEMPLATE_PATH/models/cv/image_to_image_translation/models/autoencoder.py": 1666757257.2180924, "TEMPLATE_PATH/models/cv/image_to_image_translation/models/clip.py": 1678695526.3520553, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/metrics.py": 1666757257.2199914, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/diffusion.py": 1678695526.3534672, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/apps.py": 1666757257.2189667, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/svd.py": 1666757257.2207708, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/random_mask.py": 1666757257.2205741, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/degradation.py": 1666757257.2193289, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/random_color.py": 1666757257.2203503, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/utils.py": 1666757257.2211437, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/losses.py": 1666757257.2197845, "TEMPLATE_PATH/models/cv/image_to_image_translation/data/transforms.py": 1666757257.217096, "TEMPLATE_PATH/models/cv/video_human_matting/models/decoder.py": 1678345974.4896257, "TEMPLATE_PATH/models/cv/video_human_matting/models/effv2.py": 1678345974.4909832, "TEMPLATE_PATH/models/cv/video_human_matting/models/lraspp.py": 1678345974.4915452, "TEMPLATE_PATH/models/cv/video_human_matting/models/matting.py": 1678345974.4921389, "TEMPLATE_PATH/models/cv/video_human_matting/models/deep_guided_filter.py": 1678345974.4903216, "TEMPLATE_PATH/models/cv/video_human_matting/model.py": 1678345974.488256, "TEMPLATE_PATH/models/cv/language_guided_video_summarization/transformer/models.py": 1673508904.8344479, "TEMPLATE_PATH/models/cv/language_guided_video_summarization/transformer/modules.py": 1673508904.8346016, "TEMPLATE_PATH/models/cv/language_guided_video_summarization/transformer/sub_layers.py": 1673508904.8347619, "TEMPLATE_PATH/models/cv/language_guided_video_summarization/transformer/layers.py": 1673508904.8342712, "TEMPLATE_PATH/models/cv/language_guided_video_summarization/summarizer.py": 1678345974.3353753, "TEMPLATE_PATH/models/cv/facial_landmark_confidence/flc/facial_landmark_confidence.py": 1678345974.2760296, "TEMPLATE_PATH/models/cv/facial_landmark_confidence/flc/manual_landmark_net.py": 1678345974.276284, "TEMPLATE_PATH/models/cv/image_to_image_generation/models/autoencoder.py": 1666757257.2149377, "TEMPLATE_PATH/models/cv/image_to_image_generation/models/clip.py": 1678695526.3506653, "TEMPLATE_PATH/models/cv/image_to_image_generation/model.py": 1666757257.2143965, "TEMPLATE_PATH/models/cv/image_to_image_generation/ops/diffusion.py": 1666757257.215869, "TEMPLATE_PATH/models/cv/image_to_image_generation/ops/losses.py": 1666757257.2161045, "TEMPLATE_PATH/models/cv/image_to_image_generation/data/transforms.py": 1666757257.2141688, "TEMPLATE_PATH/models/cv/image_body_reshaping/person_info.py": 1666757257.1893692, "TEMPLATE_PATH/models/cv/image_body_reshaping/model.py": 1666757257.1891172, "TEMPLATE_PATH/models/cv/image_body_reshaping/slim_utils.py": 1666757257.1903415, "TEMPLATE_PATH/models/cv/image_body_reshaping/pose_estimator/body.py": 1666757257.1897807, "TEMPLATE_PATH/models/cv/image_body_reshaping/pose_estimator/util.py": 1666757257.1901324, "TEMPLATE_PATH/models/cv/image_body_reshaping/pose_estimator/model.py": 1666757257.1899562, "TEMPLATE_PATH/models/cv/image_body_reshaping/image_body_reshaping.py": 1666757257.188921, "TEMPLATE_PATH/models/cv/image_human_parsing/m2fp_net.py": 1678345974.3071952, "TEMPLATE_PATH/models/cv/image_human_parsing/m2fp/m2fp_decoder.py": 1678345974.3068166, "TEMPLATE_PATH/models/cv/image_human_parsing/m2fp/m2fp_encoder.py": 1678345974.3069928, "TEMPLATE_PATH/models/cv/image_human_parsing/parsing_utils.py": 1678345974.307423, "TEMPLATE_PATH/models/cv/image_human_parsing/backbone/deeplab_resnet.py": 1678345974.3061016, "TEMPLATE_PATH/models/cv/image_skychange/ptsemseg/hrnet_super_and_ocr.py": 1684246001.4751763, "TEMPLATE_PATH/models/cv/image_skychange/ptsemseg/BlockModules.py": 1678345974.3284485, "TEMPLATE_PATH/models/cv/image_skychange/ptsemseg/unet.py": 1678345974.3294759, "TEMPLATE_PATH/models/cv/image_skychange/ptsemseg/hrnet_backnone.py": 1684246001.4747965, "TEMPLATE_PATH/models/cv/image_skychange/skychange.py": 1678345974.330122, "TEMPLATE_PATH/models/cv/image_skychange/preprocessor.py": 1678345974.3279777, "TEMPLATE_PATH/models/cv/image_skychange/skychange_model.py": 1678345974.3304164, "TEMPLATE_PATH/models/cv/video_object_segmentation/aggregate.py": 1678345974.5072932, "TEMPLATE_PATH/models/cv/video_object_segmentation/inference_memory_bank.py": 1678345974.5102427, "TEMPLATE_PATH/models/cv/video_object_segmentation/inference_core.py": 1678345974.5094788, "TEMPLATE_PATH/models/cv/video_object_segmentation/model.py": 1678345974.51162, "TEMPLATE_PATH/models/cv/video_object_segmentation/eval_network.py": 1678345974.5086596, "TEMPLATE_PATH/models/cv/video_object_segmentation/mod_resnet.py": 1678345974.5108964, "TEMPLATE_PATH/models/cv/video_object_segmentation/network.py": 1678345974.5134938, "TEMPLATE_PATH/models/cv/video_object_segmentation/modules.py": 1678345974.5123272, "TEMPLATE_PATH/models/cv/video_object_segmentation/cbam.py": 1678345974.5079415, "TEMPLATE_PATH/models/cv/face_reconstruction/models/nv_diffrast.py": 1681714768.8716514, "TEMPLATE_PATH/models/cv/face_reconstruction/models/renderer.py": 1681714768.8736632, "TEMPLATE_PATH/models/cv/face_reconstruction/models/unet.py": 1681714768.873916, "TEMPLATE_PATH/models/cv/face_reconstruction/models/bfm.py": 1681714768.8695195, "TEMPLATE_PATH/models/cv/face_reconstruction/models/opt.py": 1681714768.8720403, "TEMPLATE_PATH/models/cv/face_reconstruction/models/networks.py": 1678345974.2734904, "TEMPLATE_PATH/models/cv/face_reconstruction/models/de_retouching_module.py": 1681714768.8699348, "TEMPLATE_PATH/models/cv/face_reconstruction/models/losses.py": 1681714768.8712077, "TEMPLATE_PATH/models/cv/face_reconstruction/models/pix2pix/pix2pix_options.py": 1681714768.8733847, "TEMPLATE_PATH/models/cv/face_reconstruction/models/pix2pix/pix2pix_model.py": 1681714768.873153, "TEMPLATE_PATH/models/cv/face_reconstruction/models/pix2pix/networks.py": 1681714768.8728101, "TEMPLATE_PATH/models/cv/face_reconstruction/models/facelandmark/nets/large_eyeball_net.py": 1678345974.2724826, "TEMPLATE_PATH/models/cv/face_reconstruction/models/facelandmark/nets/large_base_lmks_net.py": 1678345974.2721982, "TEMPLATE_PATH/models/cv/face_reconstruction/models/facelandmark/large_base_lmks_infer.py": 1678345974.2711725, "TEMPLATE_PATH/models/cv/face_reconstruction/models/facerecon_model.py": 1681714768.870774, "TEMPLATE_PATH/models/cv/face_reconstruction/utils.py": 1681714768.8743782, "TEMPLATE_PATH/models/cv/facial_expression_recognition/fer/transforms.py": 1666757257.186491, "TEMPLATE_PATH/models/cv/facial_expression_recognition/fer/vgg.py": 1666757257.1866848, "TEMPLATE_PATH/models/cv/facial_expression_recognition/fer/facial_expression_recognition.py": 1673508904.8041663, "TEMPLATE_PATH/models/cv/face_recognition/align_face.py": 1678695526.3292472, "TEMPLATE_PATH/models/cv/face_recognition/torchkit/rts_backbone.py": 1678345974.2696226, "TEMPLATE_PATH/models/cv/face_recognition/torchkit/backbone/facemask_backbone.py": 1678345974.2694073, "TEMPLATE_PATH/models/cv/face_recognition/torchkit/backbone/model_irse.py": 1666757257.1852279, "TEMPLATE_PATH/models/cv/face_recognition/torchkit/backbone/model_resnet.py": 1666757257.1854684, "TEMPLATE_PATH/models/cv/face_recognition/torchkit/backbone/common.py": 1666757257.1850357, "TEMPLATE_PATH/models/cv/face_recognition/torchkit/backbone/arcface_backbone.py": 1678345974.269119, "TEMPLATE_PATH/models/cv/face_generation/stylegan2.py": 1666757257.1819198, "TEMPLATE_PATH/models/cv/face_generation/op/fused_act.py": 1666757257.181432, "TEMPLATE_PATH/models/cv/face_generation/op/upfirdn2d.py": 1666757257.18165, "TEMPLATE_PATH/models/cv/face_generation/op/conv2d_gradfix.py": 1666757257.1812036, "TEMPLATE_PATH/models/cv/shop_segmentation/head_fpn.py": 1666757257.242077, "TEMPLATE_PATH/models/cv/shop_segmentation/models.py": 1666757257.2425845, "TEMPLATE_PATH/models/cv/shop_segmentation/common.py": 1666757257.241814, "TEMPLATE_PATH/models/cv/shop_segmentation/utils.py": 1666757257.2446902, "TEMPLATE_PATH/models/cv/shop_segmentation/shop_seg_base.py": 1666757257.243692, "TEMPLATE_PATH/models/cv/shop_segmentation/neck_fpn.py": 1666757257.2431688, "TEMPLATE_PATH/models/cv/shop_segmentation/shop_seg_model.py": 1666757257.2441843, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino/ms_deform_attn.py": 1678345974.3103385, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino/position_encoding.py": 1678345974.310542, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino/dino_decoder.py": 1678345974.3097205, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino/maskdino_encoder.py": 1678345974.3101413, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino/utils.py": 1678345974.3107386, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino/maskdino_decoder.py": 1678345974.3099248, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino_swin.py": 1678345974.3111138, "TEMPLATE_PATH/models/cv/image_instance_segmentation/datasets/transforms.py": 1666757257.1984863, "TEMPLATE_PATH/models/cv/image_instance_segmentation/fastinst/fastinst_encoder.py": 1684246001.4722662, "TEMPLATE_PATH/models/cv/image_instance_segmentation/fastinst/fastinst_decoder.py": 1684246001.4720163, "TEMPLATE_PATH/models/cv/image_instance_segmentation/cascade_mask_rcnn_swin.py": 1678345974.3089857, "TEMPLATE_PATH/models/cv/image_instance_segmentation/fastinst_model.py": 1684246001.472576, "TEMPLATE_PATH/models/cv/image_instance_segmentation/model.py": 1666757257.198652, "TEMPLATE_PATH/models/cv/image_instance_segmentation/postprocess_utils.py": 1684246001.4729404, "TEMPLATE_PATH/models/cv/image_instance_segmentation/backbones/resnet.py": 1684246001.4712603, "TEMPLATE_PATH/models/cv/image_instance_segmentation/backbones/swin_transformer.py": 1678345974.3086588, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino_model.py": 1678345974.3109038, "TEMPLATE_PATH/models/cv/action_detection/modules/resnet.py": 1678695526.2903874, "TEMPLATE_PATH/models/cv/action_detection/modules/action_detection_pytorch.py": 1678695526.288069, "TEMPLATE_PATH/models/cv/action_detection/action_detection_onnx.py": 1678345974.1868067, "TEMPLATE_PATH/models/cv/vop_retrieval/backbone.py": 1678695526.4892921, "TEMPLATE_PATH/models/cv/vop_retrieval/basic_utils.py": 1678345974.6089652, "TEMPLATE_PATH/models/cv/vop_retrieval/model.py": 1678345974.6095595, "TEMPLATE_PATH/models/cv/vop_retrieval/tokenization_clip.py": 1678695526.4906054, "TEMPLATE_PATH/models/cv/vop_retrieval/model_se.py": 1678695526.489979, "TEMPLATE_PATH/models/cv/video_instance_segmentation/track/kernel_update_head.py": 1681714768.8891828, "TEMPLATE_PATH/models/cv/video_instance_segmentation/track/mask_hungarian_assigner.py": 1681714768.8895793, "TEMPLATE_PATH/models/cv/video_instance_segmentation/video_knet.py": 1681714768.8901427, "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_updator.py": 1681714768.8875823, "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_update_head.py": 1681714768.887322, "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_frame_iter_head.py": 1681714768.8861332, "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_head.py": 1681714768.8865519, "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_iter_head.py": 1681714768.88694, "TEMPLATE_PATH/models/cv/video_instance_segmentation/utils.py": 1681714768.8898368, "TEMPLATE_PATH/models/cv/video_instance_segmentation/neck/msdeformattn_decoder.py": 1681714768.888164, "TEMPLATE_PATH/models/cv/super_resolution/ecb.py": 1678345974.436123, "TEMPLATE_PATH/models/cv/super_resolution/ecbsr_model.py": 1678345974.4364467, "TEMPLATE_PATH/models/cv/super_resolution/rrdbnet_arch.py": 1666757257.2570488, "TEMPLATE_PATH/models/cv/super_resolution/arch_util.py": 1666757257.2563787, "TEMPLATE_PATH/models/cv/ocr_detection/preprocessor.py": 1684246001.5023808, "TEMPLATE_PATH/models/cv/ocr_detection/model.py": 1678695526.3969364, "TEMPLATE_PATH/models/cv/ocr_detection/utils.py": 1678695526.3993652, "TEMPLATE_PATH/models/cv/ocr_detection/modules/dbnet.py": 1678695526.3981876, "TEMPLATE_PATH/models/cv/ocr_detection/modules/seg_detector_loss.py": 1678695526.3986294, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/networks/util.py": 1678345974.3670025, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/networks/mobilenet.py": 1678345974.3609436, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/networks/equi.py": 1678345974.360484, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/networks/resnet.py": 1678345974.3611743, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/networks/unifuse.py": 1678345974.3667643, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/networks/layers.py": 1684246001.5053837, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/unifuse_model.py": 1678345974.3672006, "TEMPLATE_PATH/models/cv/stream_yolo/utils/format.py": 1678345974.4351218, "TEMPLATE_PATH/models/cv/stream_yolo/utils/boxes.py": 1678345974.4347887, "TEMPLATE_PATH/models/cv/stream_yolo/models/tal_head.py": 1678345974.4333599, "TEMPLATE_PATH/models/cv/stream_yolo/models/dfp_pafpn.py": 1678345974.4322102, "TEMPLATE_PATH/models/cv/stream_yolo/models/streamyolo.py": 1678345974.4329953, "TEMPLATE_PATH/models/cv/stream_yolo/models/network_blocks.py": 1678345974.4326873, "TEMPLATE_PATH/models/cv/stream_yolo/models/darknet.py": 1678345974.4318306, "TEMPLATE_PATH/models/cv/stream_yolo/realtime_video_detector.py": 1678345974.433779, "TEMPLATE_PATH/models/cv/stream_yolo/exp/build.py": 1678345974.4007049, "TEMPLATE_PATH/models/cv/stream_yolo/exp/base_exp.py": 1678345974.4003, "TEMPLATE_PATH/models/cv/stream_yolo/exp/default/streamyolo.py": 1678345974.4303985, "TEMPLATE_PATH/models/cv/stream_yolo/exp/yolox_base.py": 1678345974.4308836, "TEMPLATE_PATH/models/cv/stream_yolo/data/data_augment.py": 1678345974.3993874, "TEMPLATE_PATH/models/cv/virual_tryon/sdafnet.py": 1666757257.299963, "TEMPLATE_PATH/models/cv/bad_image_detecting/bad_image_detecting.py": 1678695526.2924836, "TEMPLATE_PATH/models/cv/human_reconstruction/Reconstruction.py": 1681714768.874672, "TEMPLATE_PATH/models/cv/human_reconstruction/models/Surface_head.py": 1681714768.8764422, "TEMPLATE_PATH/models/cv/human_reconstruction/models/Res_backbone.py": 1681714768.87622, "TEMPLATE_PATH/models/cv/human_reconstruction/models/Embedding.py": 1681714768.8756416, "TEMPLATE_PATH/models/cv/human_reconstruction/models/PixToMesh.py": 1681714768.875951, "TEMPLATE_PATH/models/cv/human_reconstruction/models/networks.py": 1681714768.877559, "TEMPLATE_PATH/models/cv/human_reconstruction/models/human_segmenter.py": 1684246001.4695294, "TEMPLATE_PATH/models/cv/human_reconstruction/models/geometry.py": 1681714768.8770833, "TEMPLATE_PATH/models/cv/human_reconstruction/models/detectors.py": 1681714768.876841, "TEMPLATE_PATH/models/cv/human_reconstruction/utils.py": 1684246001.4699862, "TEMPLATE_PATH/models/cv/image_driving_perception/preprocessor.py": 1678695526.3451977, "TEMPLATE_PATH/models/cv/image_driving_perception/utils.py": 1678695526.3456447, "TEMPLATE_PATH/models/cv/image_driving_perception/image_driving_percetion_model.py": 1678695526.3447573, "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/longshortnet.py": 1678695526.4507868, "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/models/longshort_backbone_neck.py": 1678695526.4542763, "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/models/longshort.py": 1678695526.4533079, "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/models/dfp_pafpn_short.py": 1678695526.45279, "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/models/dfp_pafpn_long.py": 1678695526.452267, "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/exp/longshortnet_base.py": 1678695526.4501612, "TEMPLATE_PATH/models/cv/image_paintbyexample/model.py": 1678345974.3189397, "TEMPLATE_PATH/models/cv/image_inpainting/refinement.py": 1666757257.1970024, "TEMPLATE_PATH/models/cv/image_inpainting/model.py": 1666757257.1947935, "TEMPLATE_PATH/models/cv/image_inpainting/default.py": 1666757257.1946204, "TEMPLATE_PATH/models/cv/image_inpainting/modules/ade20k/resnet.py": 1666757257.1956348, "TEMPLATE_PATH/models/cv/image_inpainting/modules/ade20k/base.py": 1666757257.1954472, "TEMPLATE_PATH/models/cv/image_inpainting/modules/adversarial.py": 1666757257.1958177, "TEMPLATE_PATH/models/cv/image_inpainting/modules/perceptual.py": 1666757257.196634, "TEMPLATE_PATH/models/cv/image_inpainting/modules/inception.py": 1666757257.1964645, "TEMPLATE_PATH/models/cv/image_inpainting/modules/ffc.py": 1666757257.1962402, "TEMPLATE_PATH/models/cv/image_inpainting/modules/pix2pixhd.py": 1666757257.1967993, "TEMPLATE_PATH/models/cv/image_inpainting/modules/feature_matching.py": 1666757257.196007, "TEMPLATE_PATH/models/cv/image_inpainting/base.py": 1666757257.1944175, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py": 1678695526.3792994, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/util.py": 1678695526.381083, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/match_costs/match_cost.py": 1678695526.3804727, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py": 1678695526.37819, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/nuscenes_dataset.py": 1678695526.3820806, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/loading.py": 1678695526.383117, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/transform_3d.py": 1678695526.3837686, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/petrv2_dednhead.py": 1678695526.3906348, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/depth_net.py": 1678695526.3890011, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/necks/cp_fpn.py": 1678695526.3925595, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py": 1678695526.393985, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/positional_encoding.py": 1678695526.3945107, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/backbones/vovnet.py": 1678695526.3856297, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/detectors/petr3d.py": 1678695526.3916428, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/result_vis.py": 1684246001.4759786, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/depe_detect.py": 1678345974.347357, "TEMPLATE_PATH/models/cv/image_quality_assessment_mos/image_quality_assessment_mos.py": 1678345974.3244548, "TEMPLATE_PATH/models/cv/image_quality_assessment_mos/heads/simple_head.py": 1678345974.3242753, "TEMPLATE_PATH/models/cv/image_quality_assessment_mos/backbones/resnet.py": 1678345974.3235202, "TEMPLATE_PATH/models/cv/image_quality_assessment_mos/censeo_ivqa_model.py": 1678345974.3237197, "TEMPLATE_PATH/models/cv/image_debanding/rrdb/rrdb_image_debanding.py": 1678345974.2884033, "TEMPLATE_PATH/models/cv/image_restoration/demoire_models/nets.py": 1678345974.3251178, "TEMPLATE_PATH/models/cv/image_restoration/image_restoration_model.py": 1678345974.325296, "TEMPLATE_PATH/models/cv/cartoon/model_tf.py": 1678695526.2941835, "TEMPLATE_PATH/models/cv/cartoon/facelib/facer.py": 1683889954.472153, "TEMPLATE_PATH/models/cv/cartoon/facelib/config.py": 1666757257.1560297, "TEMPLATE_PATH/models/cv/cartoon/facelib/LK/lk.py": 1666757257.1556334, "TEMPLATE_PATH/models/cv/cartoon/facelib/face_detector.py": 1666757257.1562476, "TEMPLATE_PATH/models/cv/cartoon/facelib/face_landmark.py": 1684246001.4677038, "TEMPLATE_PATH/models/cv/cartoon/loss.py": 1678695526.2937913, "TEMPLATE_PATH/models/cv/cartoon/utils.py": 1678695526.295007, "TEMPLATE_PATH/models/cv/cartoon/mtcnn_pytorch/src/align_trans.py": 1666757257.1581075, "TEMPLATE_PATH/models/cv/cartoon/mtcnn_pytorch/src/matlab_cp2tform.py": 1666757257.158323, "TEMPLATE_PATH/models/cv/cartoon/network.py": 1678695526.2945373, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/vision_efficient_tuning.py": 1678695526.463801, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/timm_vision_transformer.py": 1678345974.6026883, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/backbone.py": 1678695526.4603705, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/timm_weight_init.py": 1678345974.6033437, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/timm_helpers.py": 1678345974.601856, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/petl.py": 1678695526.4632218, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/model.py": 1678695526.4610043, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/head.py": 1678345974.5989482, "TEMPLATE_PATH/models/cv/movie_scene_segmentation/utils/save_op.py": 1684121077.52684, "TEMPLATE_PATH/models/cv/movie_scene_segmentation/utils/shot_encoder.py": 1666757257.2231948, "TEMPLATE_PATH/models/cv/movie_scene_segmentation/utils/trn.py": 1666757257.2234836, "TEMPLATE_PATH/models/cv/movie_scene_segmentation/utils/head.py": 1678695526.3568585, "TEMPLATE_PATH/models/cv/movie_scene_segmentation/model.py": 1684121077.525873, "TEMPLATE_PATH/models/cv/movie_scene_segmentation/get_model.py": 1666757257.2217636, "TEMPLATE_PATH/models/cv/video_summarization/pgl_sum.py": 1666757257.297918, "TEMPLATE_PATH/models/cv/video_summarization/base_model.py": 1666757257.2957783, "TEMPLATE_PATH/models/cv/video_summarization/summarizer.py": 1678345974.5919068, "TEMPLATE_PATH/models/cv/video_summarization/kts/cpd_auto.py": 1666757257.2969224, "TEMPLATE_PATH/models/cv/video_summarization/kts/cpd_nonlin.py": 1666757257.2974133, "TEMPLATE_PATH/models/cv/table_recognition/lineless_table_process.py": 1678695526.4324372, "TEMPLATE_PATH/models/cv/table_recognition/model_lore.py": 1678695526.4331207, "TEMPLATE_PATH/models/cv/table_recognition/modules/lore_processor.py": 1678695526.434709, "TEMPLATE_PATH/models/cv/table_recognition/modules/lore_detector.py": 1678695526.4340818, "TEMPLATE_PATH/models/cv/image_matching/quadtree_attention_model.py": 1678345974.3155432, "TEMPLATE_PATH/models/cv/image_matching/config/default.py": 1678345974.3125448, "TEMPLATE_PATH/models/cv/image_matching/utils/misc.py": 1678345974.315888, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/loftr.py": 1678345974.3134868, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/utils/position_encoding.py": 1678345974.3152256, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/utils/coarse_matching.py": 1678345974.3148923, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/utils/fine_matching.py": 1678345974.3150685, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/loftr_module/quadtree_attention.py": 1678345974.3143134, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/loftr_module/fine_preprocess.py": 1678345974.3139389, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/loftr_module/transformer.py": 1678345974.3145041, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/loftr_module/linear_attention.py": 1678345974.3141232, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/backbone/resnet_fpn.py": 1678345974.3133032, "TEMPLATE_PATH/models/cv/tinynas_detection/detector.py": 1678695526.4374578, "TEMPLATE_PATH/models/cv/tinynas_detection/tinynas_detector.py": 1678345974.460455, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/apis/detector_evaluater.py": 1681714768.8838654, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/apis/detector_inference.py": 1681714768.8841915, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/structures/boxlist_ops.py": 1678345974.4569457, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/structures/bounding_box.py": 1678345974.4566479, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/structures/image_list.py": 1678345974.4573236, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/utils/model_utils.py": 1678345974.4585514, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/utils/boxes.py": 1678345974.4581728, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/utils/scheduler.py": 1678345974.4589145, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/augmentations/box_level_augs/box_level_augs.py": 1678345974.4441965, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/augmentations/box_level_augs/gaussian_maps.py": 1678345974.4447448, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/augmentations/box_level_augs/color_augs.py": 1678345974.4444985, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/augmentations/box_level_augs/geometric_augs.py": 1683889954.4839153, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/augmentations/scale_aware_aug.py": 1678345974.4453552, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/detectors/detector.py": 1678345974.4558744, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/losses/distill_loss.py": 1678345974.4532282, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/losses/gfocal_loss.py": 1678345974.4535718, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/ota_assigner.py": 1678345974.4496946, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/base_ops.py": 1678345974.4481623, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/repvgg_block.py": 1678345974.4501693, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/neck_ops.py": 1678345974.4485013, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/ops.py": 1678345974.4491763, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/utils.py": 1678345974.450523, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/weight_init.py": 1678345974.4508731, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/necks/giraffe_config.py": 1678345974.4543374, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/necks/giraffe_fpn.py": 1678345974.4547024, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/necks/giraffe_fpn_btn.py": 1678345974.4552062, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/heads/gfocal_v2_tiny.py": 1678345974.4517708, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/heads/zero_head.py": 1678345974.45238, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/backbones/tinynas_csp.py": 1678345974.4469912, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/backbones/tinynas_res.py": 1678345974.447312, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/backbones/darknet.py": 1678345974.4466953, "TEMPLATE_PATH/models/cv/tinynas_detection/utils.py": 1678345974.4609265, "TEMPLATE_PATH/models/cv/tinynas_detection/tinynas_damoyolo.py": 1678345974.4599845, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/utils/visualization.py": 1678345974.5058522, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/utils/utils.py": 1678345974.5049293, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/utils/kalman_filter.py": 1678345974.5041819, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/utils/image.py": 1678345974.503496, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/tracker/matching.py": 1684246001.5073156, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/tracker/multitracker.py": 1684246001.507944, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/tracker/basetrack.py": 1678345974.501055, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/models/model.py": 1678345974.4990714, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/models/common.py": 1678345974.497754, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/models/yolo.py": 1678345974.4996593, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/models/decode.py": 1678345974.4984286, "TEMPLATE_PATH/models/cv/nerf_recon_acc/nerf_recon_acc.py": 1678695526.3603125, "TEMPLATE_PATH/models/cv/nerf_recon_acc/network/nerf.py": 1678345974.340633, "TEMPLATE_PATH/models/cv/nerf_recon_acc/network/utils.py": 1678345974.340971, "TEMPLATE_PATH/models/cv/nerf_recon_acc/network/segmenter.py": 1684246001.4756348, "TEMPLATE_PATH/models/cv/nerf_recon_acc/nerf_preprocess.py": 1678695526.359052, "TEMPLATE_PATH/models/cv/nerf_recon_acc/dataloader/nerf_dataset.py": 1678345974.3388634, "TEMPLATE_PATH/models/cv/nerf_recon_acc/dataloader/read_write_model.py": 1678345974.3391125, "TEMPLATE_PATH/models/cv/video_deinterlace/UNet_for_video_deinterlace.py": 1678345974.461295, "TEMPLATE_PATH/models/cv/video_deinterlace/deinterlace_arch.py": 1678345974.4619946, "TEMPLATE_PATH/models/cv/video_deinterlace/models/deep_fourier_upsampling.py": 1678345974.4627986, "TEMPLATE_PATH/models/cv/video_deinterlace/models/fre.py": 1678345974.4634838, "TEMPLATE_PATH/models/cv/video_deinterlace/models/utils.py": 1678345974.464179, "TEMPLATE_PATH/models/cv/video_deinterlace/models/archs.py": 1678345974.4625406, "TEMPLATE_PATH/models/cv/video_deinterlace/models/enh.py": 1678345974.4631467, "TEMPLATE_PATH/models/cv/cmdssl_video_embedding/resnet3d.py": 1666757257.1593952, "TEMPLATE_PATH/models/cv/cmdssl_video_embedding/resnet2p1d.py": 1666757257.1591942, "TEMPLATE_PATH/models/cv/cmdssl_video_embedding/c3d.py": 1666757257.1590006, "TEMPLATE_PATH/models/cv/image_depth_estimation_bts/depth_estimation_bts_model.py": 1678695526.3416724, "TEMPLATE_PATH/models/cv/image_depth_estimation_bts/networks/decoder.py": 1678695526.3428533, "TEMPLATE_PATH/models/cv/image_depth_estimation_bts/networks/bts_model.py": 1678695526.3423235, "TEMPLATE_PATH/models/cv/image_depth_estimation_bts/networks/encoder.py": 1678695526.3433921, "TEMPLATE_PATH/models/cv/image_depth_estimation_bts/networks/utils.py": 1678695526.343739, "TEMPLATE_PATH/models/cv/motion_generation/model.py": 1678345974.3359873, "TEMPLATE_PATH/models/cv/motion_generation/modules/rotation2xyz.py": 1678345974.3373065, "TEMPLATE_PATH/models/cv/motion_generation/modules/respace.py": 1678345974.3370926, "TEMPLATE_PATH/models/cv/motion_generation/modules/smpl.py": 1678345974.3374798, "TEMPLATE_PATH/models/cv/motion_generation/modules/mdm.py": 1678345974.33691, "TEMPLATE_PATH/models/cv/motion_generation/modules/gaussian_diffusion.py": 1678345974.3366945, "TEMPLATE_PATH/models/cv/motion_generation/modules/cfg_sampler.py": 1678345974.3364377, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/utils/requirements_check.py": 1678345974.2937963, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/utils/voc_register.py": 1684246001.4703872, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/utils/configuration_mapper.py": 1678345974.2932599, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/utils/model_surgery_op.py": 1678345974.293452, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/utils/coco_register.py": 1678345974.2930408, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/utils/register_data.py": 1678345974.293619, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/models/fast_rcnn.py": 1678345974.2918143, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/models/defrcn.py": 1678345974.2915351, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/models/resnet.py": 1678345974.2922988, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/models/calibration_layer.py": 1678345974.2913256, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/models/gdl.py": 1678345974.2920313, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/models/roi_heads.py": 1678345974.2925265, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/defrcn_for_fewshot.py": 1678345974.289662, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/evaluation/coco_evaluation.py": 1678345974.2904465, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/evaluation/pascal_voc_evaluation.py": 1678345974.2908285, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/evaluation/evaluator.py": 1678345974.2906368, "TEMPLATE_PATH/models/cv/ocr_recognition/preprocessor.py": 1681714768.8827155, "TEMPLATE_PATH/models/cv/ocr_recognition/model.py": 1684246001.5035024, "TEMPLATE_PATH/models/cv/ocr_recognition/modules/convnextvit.py": 1681714768.881167, "TEMPLATE_PATH/models/cv/ocr_recognition/modules/crnn.py": 1681714768.8814888, "TEMPLATE_PATH/models/cv/ocr_recognition/modules/vitstr.py": 1681714768.8823054, "TEMPLATE_PATH/models/cv/ocr_recognition/modules/timm_tinyc.py": 1678345974.3579545, "TEMPLATE_PATH/models/cv/ocr_recognition/modules/convnext.py": 1678345974.3574538, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/track/quasi_dense_embed_tracker.py": 1678345974.5580919, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/mask.py": 1678345974.5271971, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_updator.py": 1678345974.5264003, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_update_head.py": 1678345974.5256743, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/semantic_fpn_wrapper.py": 1681714768.8905349, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_head.py": 1678345974.519709, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_iter_head.py": 1678345974.5233328, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/track_heads.py": 1678345974.5286357, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/neck/fpn.py": 1678345974.5311077, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/video_k_net.py": 1678345974.5597517, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/backbone/swin_checkpoint.py": 1678345974.516286, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/backbone/swin_transformer.py": 1678345974.517262, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/visualizer.py": 1678345974.5604084, "TEMPLATE_PATH/models/cv/open_vocabulary_detection_vild/vild.py": 1684246001.5045104, "TEMPLATE_PATH/models/cv/image_reid_person/pass_model.py": 1666757257.2059593, "TEMPLATE_PATH/models/cv/image_reid_person/transreid_model.py": 1666757257.2062182, "TEMPLATE_PATH/models/cv/image_face_fusion/facelib/align_trans.py": 1678345974.3027532, "TEMPLATE_PATH/models/cv/image_face_fusion/facelib/matlab_cp2tform.py": 1678345974.3029947, "TEMPLATE_PATH/models/cv/image_face_fusion/network/aad_layer.py": 1678345974.3037808, "TEMPLATE_PATH/models/cv/image_face_fusion/network/dense_motion.py": 1678345974.3045554, "TEMPLATE_PATH/models/cv/image_face_fusion/network/model_irse.py": 1678345974.3051307, "TEMPLATE_PATH/models/cv/image_face_fusion/network/bfm.py": 1678345974.3042998, "TEMPLATE_PATH/models/cv/image_face_fusion/network/ops.py": 1678345974.3053207, "TEMPLATE_PATH/models/cv/image_face_fusion/network/aei_flow_net.py": 1678345974.3040216, "TEMPLATE_PATH/models/cv/image_face_fusion/network/facerecon_model.py": 1678345974.3048775, "TEMPLATE_PATH/models/cv/image_face_fusion/image_face_fusion.py": 1678345974.3033106, "TEMPLATE_PATH/models/cv/image_face_fusion/facegan/gan_wrap.py": 1678345974.3008904, "TEMPLATE_PATH/models/cv/image_face_fusion/facegan/op/fused_act.py": 1678345974.3021884, "TEMPLATE_PATH/models/cv/image_face_fusion/facegan/op/upfirdn2d.py": 1678345974.3023663, "TEMPLATE_PATH/models/cv/image_face_fusion/facegan/op/conv2d_gradfix.py": 1678345974.3019848, "TEMPLATE_PATH/models/cv/image_face_fusion/facegan/model.py": 1678345974.3014028, "TEMPLATE_PATH/models/cv/product_segmentation/net.py": 1678695526.4043183, "TEMPLATE_PATH/models/cv/product_segmentation/seg_infer.py": 1666778289.670906, "TEMPLATE_PATH/models/cv/controllable_image_generation/controlnet.py": 1678695526.3069751, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/openpose/body.py": 1678695526.3047397, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/openpose/util.py": 1678695526.3063028, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/openpose/model.py": 1678695526.3059084, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/openpose/hand.py": 1678695526.305337, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/annotator.py": 1678695526.296671, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/api.py": 1678695526.2974072, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/midas_net_custom.py": 1678695526.299506, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/transforms.py": 1678695526.2998872, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/midas_net.py": 1678695526.2992017, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/dpt_depth.py": 1678695526.298864, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/base_model.py": 1678695526.2981143, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/vit.py": 1678695526.300227, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/blocks.py": 1678695526.298546, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/utils.py": 1678695526.3005583, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/mlsd/utils.py": 1678695526.3033025, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/mlsd/mbv2_mlsd_large.py": 1678695526.3022327, "TEMPLATE_PATH/models/cv/video_inpainting/inpainting.py": 1678695526.438486, "TEMPLATE_PATH/models/cv/video_inpainting/inpainting_model.py": 1678695526.438962, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/casmvs_model.py": 1678345974.3164253, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/colmap2mvsnet.py": 1684246001.4733398, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/cas_mvsnet.py": 1678345974.3162477, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/utils.py": 1678345974.317991, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/depth_filter.py": 1684246001.4736886, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/module.py": 1678345974.31774, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/general_eval_dataset.py": 1678345974.3174586, "TEMPLATE_PATH/models/cv/image_binary_quant_classification/binary_quant_model.py": 1678345974.2778409, "TEMPLATE_PATH/models/cv/image_binary_quant_classification/bnext.py": 1678345974.2784865, "TEMPLATE_PATH/models/cv/skin_retouching/detection_model/detection_unet_in.py": 1666757257.24693, "TEMPLATE_PATH/models/cv/skin_retouching/detection_model/detection_module.py": 1666757257.2464738, "TEMPLATE_PATH/models/cv/skin_retouching/retinaface/net.py": 1666757257.2504349, "TEMPLATE_PATH/models/cv/skin_retouching/retinaface/prior_box.py": 1666757257.2523744, "TEMPLATE_PATH/models/cv/skin_retouching/retinaface/box_utils.py": 1666757257.249882, "TEMPLATE_PATH/models/cv/skin_retouching/retinaface/utils.py": 1666757257.2532027, "TEMPLATE_PATH/models/cv/skin_retouching/retinaface/network.py": 1666757257.2511058, "TEMPLATE_PATH/models/cv/skin_retouching/retinaface/predict_single.py": 1666757257.251693, "TEMPLATE_PATH/models/cv/skin_retouching/unet_deploy.py": 1666757257.2537475, "TEMPLATE_PATH/models/cv/skin_retouching/weights_init.py": 1666757257.2549121, "TEMPLATE_PATH/models/cv/skin_retouching/utils.py": 1666757257.2543528, "TEMPLATE_PATH/models/cv/skin_retouching/inpainting_model/gconv.py": 1666757257.2480178, "TEMPLATE_PATH/models/cv/skin_retouching/inpainting_model/inpainting_unet.py": 1666757257.248478, "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/directed_graph.py": 1683889954.471591, "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/hdformer_detector.py": 1678345974.191, "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/skeleton.py": 1678345974.1912234, "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/backbone.py": 1678345974.190157, "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/hdformer.py": 1678345974.19077, "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/block.py": 1678345974.1903841, "TEMPLATE_PATH/models/cv/body_3d_keypoints/cannonical_pose/canonical_pose_modules.py": 1678345974.189541, "TEMPLATE_PATH/models/cv/body_3d_keypoints/cannonical_pose/body_3d_pose.py": 1683889954.4713006, "TEMPLATE_PATH/models/cv/action_recognition/models.py": 1666757257.1497922, "TEMPLATE_PATH/models/cv/action_recognition/s3dg.py": 1666757257.1501145, "TEMPLATE_PATH/models/cv/action_recognition/tada_convnext.py": 1666757257.1504557, "TEMPLATE_PATH/models/cv/action_recognition/temporal_patch_shift_transformer.py": 1683889954.4706383, "TEMPLATE_PATH/models/cv/video_frame_interpolation/interp_model/flow_reversal.py": 1678345974.4836097, "TEMPLATE_PATH/models/cv/video_frame_interpolation/interp_model/UNet.py": 1678345974.4825158, "TEMPLATE_PATH/models/cv/video_frame_interpolation/interp_model/IFNet_swin.py": 1678345974.481858, "TEMPLATE_PATH/models/cv/video_frame_interpolation/interp_model/refinenet_arch.py": 1678345974.4842384, "TEMPLATE_PATH/models/cv/video_frame_interpolation/interp_model/transformer_layers.py": 1678345974.4849417, "TEMPLATE_PATH/models/cv/video_frame_interpolation/utils/utils.py": 1678345974.4869297, "TEMPLATE_PATH/models/cv/video_frame_interpolation/utils/scene_change_detection.py": 1684246001.5067093, "TEMPLATE_PATH/models/cv/video_frame_interpolation/VFINet_for_video_frame_interpolation.py": 1678345974.4789994, "TEMPLATE_PATH/models/cv/video_frame_interpolation/VFINet_arch.py": 1678345974.4787207, "TEMPLATE_PATH/models/cv/video_frame_interpolation/flow_model/update.py": 1678345974.4809961, "TEMPLATE_PATH/models/cv/video_frame_interpolation/flow_model/corr.py": 1678345974.4801412, "TEMPLATE_PATH/models/cv/video_frame_interpolation/flow_model/extractor.py": 1678345974.480411, "TEMPLATE_PATH/models/cv/video_frame_interpolation/flow_model/raft.py": 1678345974.4806812, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/dense_heads/rpn_head.py": 1678695526.3648705, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/dense_heads/anchor_head.py": 1678695526.3625498, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/necks/fpn.py": 1678695526.3662295, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/utils/convModule_norm.py": 1678695526.3763406, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/utils/checkpoint.py": 1678695526.375762, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/backbones/vit.py": 1666757257.2256925, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py": 1678695526.3729281, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/fcn_mask_head.py": 1678695526.3743196, "TEMPLATE_PATH/models/cv/object_detection/mmdet_model.py": 1666757257.2243414, "TEMPLATE_PATH/models/cv/pedestrian_attribute_recognition/model.py": 1683889954.4834628, "TEMPLATE_PATH/models/cv/pointcloud_sceneflow_estimation/sf_rcp.py": 1678345974.3682785, "TEMPLATE_PATH/models/cv/pointcloud_sceneflow_estimation/rcp_model.py": 1678345974.3680415, "TEMPLATE_PATH/models/cv/pointcloud_sceneflow_estimation/common.py": 1678345974.367603, "TEMPLATE_PATH/models/cv/pointcloud_sceneflow_estimation/pointnet2_utils.py": 1678345974.36784, "TEMPLATE_PATH/models/cv/animal_recognition/splat.py": 1666757257.151845, "TEMPLATE_PATH/models/cv/animal_recognition/resnet.py": 1666757257.1516247, "TEMPLATE_PATH/models/cv/video_stabilization/utils/image_utils.py": 1678345974.5841804, "TEMPLATE_PATH/models/cv/video_stabilization/utils/RAFTUtils.py": 1678345974.5826185, "TEMPLATE_PATH/models/cv/video_stabilization/utils/math_utils.py": 1678345974.5846765, "TEMPLATE_PATH/models/cv/video_stabilization/utils/ProjectionUtils.py": 1678345974.5819445, "TEMPLATE_PATH/models/cv/video_stabilization/utils/WarpUtils.py": 1678345974.5831873, "TEMPLATE_PATH/models/cv/video_stabilization/utils/MedianFilter.py": 1678345974.5813267, "TEMPLATE_PATH/models/cv/video_stabilization/utils/IterativeSmooth.py": 1678345974.5807827, "TEMPLATE_PATH/models/cv/video_stabilization/DUTRAFTStabilizer.py": 1678345974.5794287, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/config.py": 1678345974.5730486, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/rf_det_so.py": 1678345974.5783482, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/Smoother.py": 1678345974.572002, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/DUT_raft.py": 1678345974.5681, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/MotionPro.py": 1678345974.568633, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/RAFT/update.py": 1678345974.5714862, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/RAFT/corr.py": 1678345974.5699692, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/RAFT/extractor.py": 1678345974.5704808, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/RAFT/raft.py": 1678345974.5709665, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/rf_det_module.py": 1678345974.573552, "TEMPLATE_PATH/models/cv/video_depth_estimation/dro_model.py": 1678345974.4664078, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/misc.py": 1678345974.4781265, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/config.py": 1678345974.4763255, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/horovod.py": 1678345974.4769518, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/image_gt.py": 1678345974.477618, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/types.py": 1678345974.4784274, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/depth.py": 1684246001.5061839, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/load.py": 1678345974.4778461, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/image.py": 1678345974.4773502, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/augmentations.py": 1678345974.4760456, "TEMPLATE_PATH/models/cv/video_depth_estimation/models/model_utils.py": 1678345974.4694006, "TEMPLATE_PATH/models/cv/video_depth_estimation/models/sfm_model_mf.py": 1678345974.4700265, "TEMPLATE_PATH/models/cv/video_depth_estimation/models/model_checkpoint.py": 1678345974.4691477, "TEMPLATE_PATH/models/cv/video_depth_estimation/models/model_wrapper.py": 1678345974.469756, "TEMPLATE_PATH/models/cv/video_depth_estimation/models/sup_model_mf.py": 1678345974.4702911, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/layers/resnet/pose_decoder.py": 1678345974.4731765, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/layers/resnet/resnet_encoder.py": 1678345974.473565, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/layers/resnet/layers.py": 1678345974.4729247, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/layers/resnet/depth_decoder.py": 1678345974.4725597, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/optim/update.py": 1678345974.4748883, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/optim/extractor.py": 1678345974.4745526, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/depth_pose/depth_pose_net.py": 1678345974.4713385, "TEMPLATE_PATH/models/cv/video_depth_estimation/configs/default_config.py": 1678345974.4657435, "TEMPLATE_PATH/models/cv/video_depth_estimation/geometry/pose_utils.py": 1678345974.4684362, "TEMPLATE_PATH/models/cv/video_depth_estimation/geometry/camera_utils.py": 1678345974.467637, "TEMPLATE_PATH/models/cv/video_depth_estimation/geometry/camera.py": 1678345974.4673057, "TEMPLATE_PATH/models/cv/video_depth_estimation/geometry/pose.py": 1678345974.4680007, "TEMPLATE_PATH/models/cv/vidt/backbone.py": 1681714768.8921459, "TEMPLATE_PATH/models/cv/vidt/model.py": 1681714768.8937347, "TEMPLATE_PATH/models/cv/vidt/head.py": 1681714768.8931575, "TEMPLATE_PATH/models/cv/vidt/fpn_fusion.py": 1681714768.8928485, "TEMPLATE_PATH/models/cv/vidt/deformable_transformer.py": 1681714768.8925443, "TEMPLATE_PATH/models/cv/face_human_hand_detection/shufflenetv2.py": 1678695526.3277714, "TEMPLATE_PATH/models/cv/face_human_hand_detection/one_stage_detector.py": 1678695526.3271508, "TEMPLATE_PATH/models/cv/face_human_hand_detection/nanodet_plus_head.py": 1678695526.326374, "TEMPLATE_PATH/models/cv/face_human_hand_detection/det_infer.py": 1666778289.6696548, "TEMPLATE_PATH/models/cv/face_human_hand_detection/ghost_pan.py": 1678695526.3257587, "TEMPLATE_PATH/models/cv/face_human_hand_detection/utils.py": 1678695526.328504, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/misc.py": 1666757257.2392309, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/mttr.py": 1673508904.8399704, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/multimodal_transformer.py": 1673508904.8402708, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/matcher.py": 1669108798.5943944, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/backbone.py": 1666757257.2390404, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/position_encoding_2d.py": 1666757257.239805, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/postprocessing.py": 1666757257.239986, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/criterion.py": 1669108798.5941, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/swin_transformer.py": 1669108798.5960565, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/segmentation.py": 1666757257.240216, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/model.py": 1673508904.8396866, "TEMPLATE_PATH/models/cv/hand_static/networks.py": 1678695526.3304467, "TEMPLATE_PATH/models/cv/hand_static/hand_model.py": 1666778289.6702523, "TEMPLATE_PATH/models/cv/image_depth_estimation/newcrfs_model.py": 1678345974.297565, "TEMPLATE_PATH/models/cv/image_depth_estimation/networks/uper_crf_head.py": 1678345974.2973852, "TEMPLATE_PATH/models/cv/image_depth_estimation/networks/newcrf_layers.py": 1678345974.2962215, "TEMPLATE_PATH/models/cv/image_depth_estimation/networks/newcrf_depth.py": 1678345974.2958264, "TEMPLATE_PATH/models/cv/image_depth_estimation/networks/newcrf_utils.py": 1678345974.2965019, "TEMPLATE_PATH/models/cv/image_depth_estimation/networks/swin_transformer.py": 1678345974.2970595, "TEMPLATE_PATH/models/cv/image_colorization/unet/unet.py": 1678345974.287222, "TEMPLATE_PATH/models/cv/image_colorization/unet/utils.py": 1678345974.287506, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/ddcolor_for_image_colorization.py": 1681714768.8788333, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/ddcolor.py": 1678345974.284877, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/utils/vgg.py": 1681714768.8797908, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/utils/unet.py": 1678345974.2865462, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/utils/transformer_utils.py": 1678345974.2863536, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/utils/position_encoding.py": 1678345974.2861621, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/utils/convnext.py": 1678345974.285967, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/loss.py": 1681714768.879511, "TEMPLATE_PATH/models/cv/face_detection/retinaface/detection.py": 1673508904.7995956, "TEMPLATE_PATH/models/cv/face_detection/retinaface/models/retinaface.py": 1666757257.1662319, "TEMPLATE_PATH/models/cv/face_detection/retinaface/models/net.py": 1666757257.1660082, "TEMPLATE_PATH/models/cv/face_detection/retinaface/utils.py": 1666757257.166439, "TEMPLATE_PATH/models/cv/face_detection/mtcnn/models/detector.py": 1673508904.7983325, "TEMPLATE_PATH/models/cv/face_detection/mtcnn/models/get_nets.py": 1666757257.1649437, "TEMPLATE_PATH/models/cv/face_detection/mtcnn/models/box_utils.py": 1666757257.1642718, "TEMPLATE_PATH/models/cv/face_detection/mtcnn/models/first_stage.py": 1666757257.1647036, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/detection.py": 1673508904.8020747, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/ssd/predictor.py": 1678345974.263985, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/ssd/data_preprocessing.py": 1666757257.1760805, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/ssd/ssd.py": 1666757257.1772814, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/ssd/mb_tiny_fd.py": 1666757257.1766155, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/ssd/fd_config.py": 1666757257.1763618, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/transforms.py": 1666757257.1775296, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/box_utils.py": 1666757257.1750665, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/mb_tiny.py": 1666757257.1753407, "TEMPLATE_PATH/models/cv/face_detection/peppa_pig_face/facer.py": 1678345974.2030537, "TEMPLATE_PATH/models/cv/face_detection/peppa_pig_face/LK/lk.py": 1678345974.202424, "TEMPLATE_PATH/models/cv/face_detection/peppa_pig_face/face_detector.py": 1678345974.2027018, "TEMPLATE_PATH/models/cv/face_detection/peppa_pig_face/face_landmark.py": 1684246001.4691453, "TEMPLATE_PATH/models/cv/face_detection/scrfd/scrfd_detect.py": 1678345974.262096, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/core/post_processing/bbox_nms.py": 1678695526.308986, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/core/bbox/transforms.py": 1678695526.3082, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/retinaface.py": 1678695526.3129826, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/auto_augment.py": 1678695526.3106256, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py": 1678695526.312393, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/loading.py": 1678695526.3117406, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/formating.py": 1678695526.311245, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/scrfd_head.py": 1678695526.316167, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/master_net.py": 1683889954.473481, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/mobilenet.py": 1678695526.3142238, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/resnet.py": 1678695526.3147054, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/scrfd.py": 1678695526.31966, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/single_stage.py": 1678695526.3215094, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/base.py": 1678695526.3180547, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/tinymog.py": 1678695526.3232183, "TEMPLATE_PATH/models/cv/face_detection/scrfd/tinymog_detect.py": 1678345974.263417, "TEMPLATE_PATH/models/cv/face_detection/scrfd/preprocessor.py": 1678345974.2381473, "TEMPLATE_PATH/models/cv/face_detection/scrfd/damofd_detect.py": 1683889954.4730425, "TEMPLATE_PATH/models/cv/face_detection/mogface/models/mogprednet.py": 1666757257.1628885, "TEMPLATE_PATH/models/cv/face_detection/mogface/models/resnet.py": 1666757257.1630945, "TEMPLATE_PATH/models/cv/face_detection/mogface/models/utils.py": 1666757257.1633208, "TEMPLATE_PATH/models/cv/face_detection/mogface/models/detectors.py": 1673508904.7980537, "TEMPLATE_PATH/models/cv/face_detection/mogface/models/mogface.py": 1666757257.162678, "TEMPLATE_PATH/models/cv/robust_image_classification/easyrobust_model.py": 1678345974.369159, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/ddpm_segmentation_model.py": 1678695526.3499827, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/utils/data_process_func.py": 1666757257.213139, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/utils/builder.py": 1666757257.212899, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/utils/seg_func.py": 1678345974.3271163, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/beit_adapter.py": 1666757257.2102795, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/adapter_modules.py": 1666757257.2093182, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/base/beit.py": 1666757257.210009, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/encoder_decoder_mask2former.py": 1666757257.2121763, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/base_segmentor.py": 1666757257.2117958, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/mask2former_head_from_mmseg.py": 1666757257.2112045, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/base_decode_head.py": 1666757257.2109008, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/semantic_seg_model.py": 1684246001.4743931, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/pan_merge/base_panoptic_fusion_head.py": 1666757257.2071388, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/pan_merge/maskformer_semantic_head.py": 1666757257.2074032, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/ddpm_seg/data_util.py": 1678695526.3485208, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/ddpm_seg/utils.py": 1678345974.3266795, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/ddpm_seg/feature_extractors.py": 1678695526.3490577, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/ddpm_seg/pixel_classifier.py": 1678695526.3495007, "TEMPLATE_PATH/models/cv/video_single_object_tracking/config/ostrack.py": 1666757257.2861888, "TEMPLATE_PATH/models/cv/video_single_object_tracking/utils/utils.py": 1666757257.294515, "TEMPLATE_PATH/models/cv/video_single_object_tracking/tracker/procontext.py": 1678695526.4486487, "TEMPLATE_PATH/models/cv/video_single_object_tracking/tracker/ostrack.py": 1666757257.2933815, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/layers/attn_blocks.py": 1678695526.4463873, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/layers/head.py": 1678695526.446728, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/layers/patch_embed.py": 1666757257.2896674, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/layers/attn.py": 1666757257.2881665, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/ostrack/base_backbone.py": 1666757257.290771, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/ostrack/ostrack.py": 1678695526.44714, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/ostrack/utils.py": 1666757257.291744, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/ostrack/vit_ce.py": 1666757257.292233, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/procontext/procontext.py": 1678695526.4476662, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/procontext/utils.py": 1678695526.447871, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/procontext/vit_ce.py": 1678695526.4480832, "TEMPLATE_PATH/models/cv/text_driven_segmentation/lseg_net.py": 1666757257.2610106, "TEMPLATE_PATH/models/cv/text_driven_segmentation/lseg_blocks.py": 1666757257.2598405, "TEMPLATE_PATH/models/cv/text_driven_segmentation/lseg_model.py": 1666757257.2603793, "TEMPLATE_PATH/models/cv/text_driven_segmentation/model.py": 1666757257.2622228, "TEMPLATE_PATH/models/cv/text_driven_segmentation/lseg_vit.py": 1666757257.2616358, "TEMPLATE_PATH/models/cv/text_driven_segmentation/clip.py": 1666757257.258649, "TEMPLATE_PATH/models/cv/text_driven_segmentation/simple_tokenizer.py": 1666757257.2628324, "TEMPLATE_PATH/models/cv/text_driven_segmentation/lseg_base.py": 1666757257.2592633, "TEMPLATE_PATH/models/cv/crowd_counting/hrnet_aspp_relu.py": 1684246001.4687607, "TEMPLATE_PATH/models/cv/crowd_counting/cc_model.py": 1666757257.1599863, "TEMPLATE_PATH/models/cv/image_panoptic_segmentation/panseg_model.py": 1666757257.1997347, "TEMPLATE_PATH/models/cv/face_emotion/emotion_model.py": 1666757257.1793659, "TEMPLATE_PATH/models/cv/face_emotion/emotion_infer.py": 1678345974.266066, "TEMPLATE_PATH/models/cv/face_emotion/face_alignment/face_align.py": 1666757257.1801705, "TEMPLATE_PATH/models/cv/face_emotion/face_alignment/face.py": 1666757257.1799781, "TEMPLATE_PATH/models/cv/face_emotion/efficient/model.py": 1678695526.3245037, "TEMPLATE_PATH/models/cv/face_emotion/efficient/utils.py": 1678695526.3249586, "TEMPLATE_PATH/models/cv/video_super_resolution/real_basicvsr_net.py": 1678345974.5962114, "TEMPLATE_PATH/models/cv/video_super_resolution/msrresnet_lite_model.py": 1678345974.5949175, "TEMPLATE_PATH/models/cv/video_super_resolution/common.py": 1678345974.5942144, "TEMPLATE_PATH/models/cv/video_super_resolution/real_basicvsr_for_video_super_resolution.py": 1678345974.5955362, "TEMPLATE_PATH/models/cv/video_super_resolution/basicvsr_net.py": 1678345974.5935404, "TEMPLATE_PATH/models/cv/face_attribute_recognition/fair_face/face_attribute_recognition.py": 1678345974.201238, "TEMPLATE_PATH/models/cv/image_denoise/nafnet/NAFNet_arch.py": 1666757257.1934595, "TEMPLATE_PATH/models/cv/image_denoise/nafnet/arch_util.py": 1666757257.1938126, "TEMPLATE_PATH/models/cv/image_denoise/nafnet_for_image_denoise.py": 1678345974.2943788, "TEMPLATE_PATH/models/cv/image_classification/mmcls_model.py": 1678345974.2806082, "TEMPLATE_PATH/models/cv/image_classification/utils.py": 1678345974.2812235, "TEMPLATE_PATH/models/cv/image_classification/backbones/beit_v2.py": 1678345974.279836, "TEMPLATE_PATH/models/cv/image_classification/backbones/nextvit.py": 1678345974.2801979, "TEMPLATE_PATH/models/cv/image_classification/resnet50_cc.py": 1678345974.2809508, "TEMPLATE_PATH/models/cv/image_color_enhance/csrnet.py": 1666757257.1914177, "TEMPLATE_PATH/models/cv/image_color_enhance/deeplpf/deeplpfnet.py": 1678345974.2835712, "TEMPLATE_PATH/models/cv/image_color_enhance/deeplpf/deeplpf_image_color_enhance.py": 1678345974.2831166, "TEMPLATE_PATH/models/cv/image_color_enhance/image_color_enhance.py": 1678345974.283899, "TEMPLATE_PATH/models/cv/image_color_enhance/adaint/adaint.py": 1678345974.2824776, "TEMPLATE_PATH/models/base/base_torch_head.py": 1678345974.176039, "TEMPLATE_PATH/models/base/base_model.py": 1681714768.8640296, "TEMPLATE_PATH/models/base/base_torch_model.py": 1681714768.8644474, "TEMPLATE_PATH/models/base/base_head.py": 1678695526.2807148, "TEMPLATE_PATH/metrics/image_quality_assessment_degradation_metric.py": 1678345974.1558983, "TEMPLATE_PATH/metrics/prediction_saving_wrapper.py": 1678345974.1587963, "TEMPLATE_PATH/metrics/video_stabilization_metric.py": 1678345974.161685, "TEMPLATE_PATH/metrics/ppl_metric.py": 1678345974.158113, "TEMPLATE_PATH/metrics/inbatch_recall_metric.py": 1678345974.1564841, "TEMPLATE_PATH/metrics/loss_metric.py": 1678345974.1567907, "TEMPLATE_PATH/metrics/ocr_recognition_metric.py": 1681714768.859318, "TEMPLATE_PATH/metrics/map_metric.py": 1678695526.2701354, "TEMPLATE_PATH/metrics/image_colorization_metric.py": 1681714768.8586574, "TEMPLATE_PATH/metrics/sequence_classification_metric.py": 1678345974.159533, "TEMPLATE_PATH/metrics/audio_noise_metric.py": 1678345974.1516218, "TEMPLATE_PATH/metrics/translation_evaluation_metric.py": 1684246001.462436, "TEMPLATE_PATH/metrics/video_frame_interpolation_metric.py": 1678345974.1614027, "TEMPLATE_PATH/metrics/image_inpainting_metric.py": 1678345974.1546395, "TEMPLATE_PATH/metrics/image_denoise_metric.py": 1678345974.1542509, "TEMPLATE_PATH/metrics/referring_video_object_segmentation_metric.py": 1678345974.1591957, "TEMPLATE_PATH/metrics/token_classification_metric.py": 1678345974.1608303, "TEMPLATE_PATH/metrics/video_summarization_metric.py": 1678345974.1620147, "TEMPLATE_PATH/metrics/builder.py": 1684246001.462111, "TEMPLATE_PATH/metrics/image_quality_assessment_mos_metric.py": 1678345974.1561337, "TEMPLATE_PATH/metrics/ned_metric.py": 1678345974.1578484, "TEMPLATE_PATH/metrics/text_ranking_metric.py": 1678345974.1604652, "TEMPLATE_PATH/metrics/movie_scene_segmentation_metric.py": 1678345974.1574643, "TEMPLATE_PATH/metrics/accuracy_metric.py": 1678345974.151063, "TEMPLATE_PATH/metrics/image_instance_segmentation_metric.py": 1678345974.1552966, "TEMPLATE_PATH/metrics/video_super_resolution_metric/metric_util.py": 1678345974.1631625, "TEMPLATE_PATH/metrics/video_super_resolution_metric/video_super_resolution_metric.py": 1678345974.163586, "TEMPLATE_PATH/metrics/video_super_resolution_metric/niqe.py": 1678695526.272421, "TEMPLATE_PATH/metrics/video_super_resolution_metric/matlab_functions.py": 1678695526.2713144, "TEMPLATE_PATH/metrics/ciderD/ciderD.py": 1666757257.1302783, "TEMPLATE_PATH/metrics/ciderD/ciderD_scorer.py": 1678695526.2677228, "TEMPLATE_PATH/metrics/action_detection_evaluator.py": 1678695526.2662494, "TEMPLATE_PATH/metrics/image_color_enhance_metric.py": 1678345974.153906, "TEMPLATE_PATH/metrics/image_portrait_enhancement_metric.py": 1678345974.1556726, "TEMPLATE_PATH/metrics/bleu_metric.py": 1678345974.1524482, "TEMPLATE_PATH/metrics/text_generation_metric.py": 1678345974.1598558, "TEMPLATE_PATH/metrics/base.py": 1678345974.152117, "TEMPLATE_PATH/pipelines/util.py": 1678345974.9337575, "TEMPLATE_PATH/pipelines/science/protein_structure_pipeline.py": 1678345974.9334872, "TEMPLATE_PATH/pipelines/builder.py": 1681714768.9746857, "TEMPLATE_PATH/pipelines/pipeline_template.py": 1684246001.5603435, "TEMPLATE_PATH/pipelines/audio/timestamp_pipeline.py": 1684246001.539448, "TEMPLATE_PATH/pipelines/audio/kws_farfield_pipeline.py": 1678695526.592166, "TEMPLATE_PATH/pipelines/audio/speaker_verification_pipeline.py": 1684246001.538074, "TEMPLATE_PATH/pipelines/audio/inverse_text_processing_pipeline.py": 1678345974.833208, "TEMPLATE_PATH/pipelines/audio/separation_pipeline.py": 1678345974.835587, "TEMPLATE_PATH/pipelines/audio/voice_activity_detection_pipeline.py": 1684246001.5403378, "TEMPLATE_PATH/pipelines/audio/text_to_speech_pipeline.py": 1678345974.837081, "TEMPLATE_PATH/pipelines/audio/kws_kwsbp_pipeline.py": 1678345974.8338838, "TEMPLATE_PATH/pipelines/audio/linear_aec_pipeline.py": 1678345974.8341885, "TEMPLATE_PATH/pipelines/audio/ans_pipeline.py": 1678695526.5817752, "TEMPLATE_PATH/pipelines/audio/speaker_verification_eres2net_pipeline.py": 1684247769.6647675, "TEMPLATE_PATH/pipelines/audio/lm_infer_pipeline.py": 1684246001.5343251, "TEMPLATE_PATH/pipelines/audio/ans_dfsmn_pipeline.py": 1678695526.5813322, "TEMPLATE_PATH/pipelines/audio/asr_inference_pipeline.py": 1684246001.5326667, "TEMPLATE_PATH/pipelines/audio/speaker_diarization_pipeline.py": 1684246001.537162, "TEMPLATE_PATH/pipelines/audio/speaker_verification_rdino_pipeline.py": 1684246001.5384402, "TEMPLATE_PATH/pipelines/audio/punctuation_processing_pipeline.py": 1684246001.5355213, "TEMPLATE_PATH/pipelines/audio/speaker_verification_light_pipeline.py": 1678345974.8364737, "TEMPLATE_PATH/pipelines/audio/speaker_change_locating_pipeline.py": 1684246001.5362113, "TEMPLATE_PATH/pipelines/audio/asr_wenet_inference_pipeline.py": 1678345974.8329349, "TEMPLATE_PATH/pipelines/multi_modal/asr_pipeline.py": 1678345974.9102848, "TEMPLATE_PATH/pipelines/multi_modal/image_captioning_pipeline.py": 1684246001.5449712, "TEMPLATE_PATH/pipelines/multi_modal/text_to_video_synthesis_pipeline.py": 1684246001.54651, "TEMPLATE_PATH/pipelines/multi_modal/mgeo_ranking_pipeline.py": 1678345974.913822, "TEMPLATE_PATH/pipelines/multi_modal/generative_multi_modal_embedding_pipeline.py": 1666757257.5339417, "TEMPLATE_PATH/pipelines/multi_modal/multimodal_dialogue_pipeline.py": 1684246001.5457838, "TEMPLATE_PATH/pipelines/multi_modal/text_to_image_synthesis_pipeline.py": 1683891255.6625693, "TEMPLATE_PATH/pipelines/multi_modal/text2sql_pipeline.py": 1678345974.9150336, "TEMPLATE_PATH/pipelines/multi_modal/visual_entailment_pipeline.py": 1678345974.916273, "TEMPLATE_PATH/pipelines/multi_modal/disco_guided_diffusion_pipeline/disco_guided_diffusion.py": 1681714768.987968, "TEMPLATE_PATH/pipelines/multi_modal/disco_guided_diffusion_pipeline/utils.py": 1681714768.988303, "TEMPLATE_PATH/pipelines/multi_modal/visual_question_answering_pipeline.py": 1678345974.916901, "TEMPLATE_PATH/pipelines/multi_modal/video_question_answering_pipeline.py": 1678345974.9160104, "TEMPLATE_PATH/pipelines/multi_modal/video_captioning_pipeline.py": 1678345974.915723, "TEMPLATE_PATH/pipelines/multi_modal/video_multi_modal_embedding_pipeline.py": 1666757257.5376425, "TEMPLATE_PATH/pipelines/multi_modal/efficient_diffusion_tuning_pipeline.py": 1683889954.550607, "TEMPLATE_PATH/pipelines/multi_modal/team_multi_modal_similarity_pipeline.py": 1666757257.5365796, "TEMPLATE_PATH/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py": 1684121077.5650501, "TEMPLATE_PATH/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py": 1684121077.566238, "TEMPLATE_PATH/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py": 1684121077.5659308, "TEMPLATE_PATH/pipelines/multi_modal/multi_modal_embedding_pipeline.py": 1678345974.9142408, "TEMPLATE_PATH/pipelines/multi_modal/ocr_recognition_pipeline.py": 1678345974.914581, "TEMPLATE_PATH/pipelines/multi_modal/document_vl_embedding_pipeline.py": 1678345974.9124298, "TEMPLATE_PATH/pipelines/multi_modal/image_text_retrieval_pipeline.py": 1678345974.913492, "TEMPLATE_PATH/pipelines/multi_modal/gridvlp_pipeline.py": 1678345974.9127157, "TEMPLATE_PATH/pipelines/multi_modal/visual_grounding_pipeline.py": 1678345974.9165354, "TEMPLATE_PATH/pipelines/multi_modal/soonet_video_temporal_grounding_pipeline.py": 1681714768.9888954, "TEMPLATE_PATH/pipelines/multi_modal/sudoku_pipeline.py": 1678345974.9148157, "TEMPLATE_PATH/pipelines/nlp/translation_evaluation_pipeline.py": 1684246001.5580392, "TEMPLATE_PATH/pipelines/nlp/glm130b_text_generation_pipeline.py": 1683889954.5535533, "TEMPLATE_PATH/pipelines/nlp/faq_question_answering_pipeline.py": 1678345974.9225557, "TEMPLATE_PATH/pipelines/nlp/document_grounded_dialog_generate_pipeline.py": 1684246001.5480056, "TEMPLATE_PATH/pipelines/nlp/automatic_post_editing_pipeline.py": 1666757257.5406618, "TEMPLATE_PATH/pipelines/nlp/named_entity_recognition_pipeline.py": 1684246001.5530941, "TEMPLATE_PATH/pipelines/nlp/interactive_translation_pipeline.py": 1678345974.9250765, "TEMPLATE_PATH/pipelines/nlp/summarization_pipeline.py": 1678345974.9273708, "TEMPLATE_PATH/pipelines/nlp/document_grounded_dialog_retrieval_pipeline.py": 1684246001.5499012, "TEMPLATE_PATH/pipelines/nlp/fasttext_text_classification_pipeline.py": 1678345974.9229462, "TEMPLATE_PATH/pipelines/nlp/word_alignment_pipeline.py": 1678695526.6479418, "TEMPLATE_PATH/pipelines/nlp/feature_extraction_pipeline.py": 1684246001.5515616, "TEMPLATE_PATH/pipelines/nlp/text_ranking_pipeline.py": 1684246001.5570047, "TEMPLATE_PATH/pipelines/nlp/fid_dialogue_pipeline.py": 1684246001.552004, "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py": 1684246001.5557013, "TEMPLATE_PATH/pipelines/nlp/codegeex_code_generation_pipeline.py": 1678345974.9174054, "TEMPLATE_PATH/pipelines/nlp/translation_quality_estimation_pipeline.py": 1678345974.931919, "TEMPLATE_PATH/pipelines/nlp/fill_mask_pipeline.py": 1684246001.5525877, "TEMPLATE_PATH/pipelines/nlp/distributed_plug_pipeline.py": 1678345974.91977, "TEMPLATE_PATH/pipelines/nlp/conversational_text_to_sql_pipeline.py": 1678345974.9181793, "TEMPLATE_PATH/pipelines/nlp/distributed_gpt3_pipeline.py": 1681714768.9907482, "TEMPLATE_PATH/pipelines/nlp/information_extraction_pipeline.py": 1678345974.9244976, "TEMPLATE_PATH/pipelines/nlp/table_question_answering_pipeline.py": 1684246001.555219, "TEMPLATE_PATH/pipelines/nlp/user_satisfaction_estimation_pipeline.py": 1684246001.5591247, "TEMPLATE_PATH/pipelines/nlp/dialog_modeling_pipeline.py": 1678345974.9186983, "TEMPLATE_PATH/pipelines/nlp/canmt_translation_pipeline.py": 1683889954.5525997, "TEMPLATE_PATH/pipelines/nlp/word_segmentation_pipeline.py": 1678345974.9326284, "TEMPLATE_PATH/pipelines/nlp/document_segmentation_pipeline.py": 1684246001.5505779, "TEMPLATE_PATH/pipelines/nlp/distributed_gpt_moe_pipeline.py": 1678345974.9194465, "TEMPLATE_PATH/pipelines/nlp/extractive_summarization_pipeline.py": 1684246001.5509684, "TEMPLATE_PATH/pipelines/nlp/text_error_correction_pipeline.py": 1678695526.6476424, "TEMPLATE_PATH/pipelines/nlp/dialog_state_tracking_pipeline.py": 1684246001.5474644, "TEMPLATE_PATH/pipelines/nlp/mglm_text_summarization_pipeline.py": 1678695526.6446507, "TEMPLATE_PATH/pipelines/nlp/translation_pipeline.py": 1678345974.9313443, "TEMPLATE_PATH/pipelines/nlp/siamese_uie_pipeline.py": 1684246001.5545502, "TEMPLATE_PATH/pipelines/nlp/dialog_intent_prediction_pipeline.py": 1684246001.5471377, "TEMPLATE_PATH/pipelines/nlp/sentence_embedding_pipeline.py": 1684246001.5536666, "TEMPLATE_PATH/pipelines/nlp/document_grounded_dialog_rerank_pipeline.py": 1684246001.5493042, "TEMPLATE_PATH/pipelines/nlp/zero_shot_classification_pipeline.py": 1684246001.559703, "TEMPLATE_PATH/pipelines/nlp/text_generation_pipeline.py": 1684246001.5563328, "TEMPLATE_PATH/pipelines/nlp/language_identification_pipline.py": 1678695526.6442416, "TEMPLATE_PATH/pipelines/nlp/token_classification_pipeline.py": 1684246001.5576875, "TEMPLATE_PATH/pipelines/nlp/codegeex_code_translation_pipeline.py": 1678345974.9175882, "TEMPLATE_PATH/pipelines/cv/bad_image_detecting_pipeline.py": 1678345974.84121, "TEMPLATE_PATH/pipelines/cv/image_cartoon_pipeline.py": 1666757257.498452, "TEMPLATE_PATH/pipelines/cv/image_to_image_generate_pipeline.py": 1666757257.5083926, "TEMPLATE_PATH/pipelines/cv/facial_expression_recognition_pipeline.py": 1683889954.5460215, "TEMPLATE_PATH/pipelines/cv/retina_face_detection_pipeline.py": 1666757257.525595, "TEMPLATE_PATH/pipelines/cv/image_style_transfer_pipeline.py": 1684246001.5433357, "TEMPLATE_PATH/pipelines/cv/image_face_fusion_pipeline.py": 1678345974.8556206, "TEMPLATE_PATH/pipelines/cv/ulfd_face_detection_pipeline.py": 1666757257.5294175, "TEMPLATE_PATH/pipelines/cv/pedestrian_attribute_recognition_pipeline.py": 1683889954.5479586, "TEMPLATE_PATH/pipelines/cv/image_denoise_pipeline.py": 1678345974.8544917, "TEMPLATE_PATH/pipelines/cv/vop_retrieval_se_pipeline.py": 1678695526.6399703, "TEMPLATE_PATH/pipelines/cv/image_matting_pipeline.py": 1684246001.542891, "TEMPLATE_PATH/pipelines/cv/image_deblur_pipeline.py": 1678345974.8534613, "TEMPLATE_PATH/pipelines/cv/video_human_matting_pipeline.py": 1678345974.9057999, "TEMPLATE_PATH/pipelines/cv/live_category_pipeline.py": 1666757257.5096319, "TEMPLATE_PATH/pipelines/cv/image_structured_model_probing_pipeline.py": 1678345974.890545, "TEMPLATE_PATH/pipelines/cv/face_quality_assessment_pipeline.py": 1683889954.5438397, "TEMPLATE_PATH/pipelines/cv/face_processing_base_pipeline.py": 1683889954.5433195, "TEMPLATE_PATH/pipelines/cv/image_portrait_enhancement_pipeline.py": 1678345974.8593307, "TEMPLATE_PATH/pipelines/cv/image_color_enhance_pipeline.py": 1678345974.852745, "TEMPLATE_PATH/pipelines/cv/vision_efficient_tuning_pipeline.py": 1678695526.6396506, "TEMPLATE_PATH/pipelines/cv/tbs_detection_utils/utils.py": 1681714768.9856553, "TEMPLATE_PATH/pipelines/cv/video_object_segmentation_pipeline.py": 1678345974.9070742, "TEMPLATE_PATH/pipelines/cv/face_detection_pipeline.py": 1678345974.8464031, "TEMPLATE_PATH/pipelines/cv/body_3d_keypoints_pipeline.py": 1678345974.8419, "TEMPLATE_PATH/pipelines/cv/image_paintbyexample_pipeline.py": 1678695526.6043956, "TEMPLATE_PATH/pipelines/cv/face_recognition_ood_pipeline.py": 1683889954.5451627, "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py": 1678345974.8523827, "TEMPLATE_PATH/pipelines/cv/card_detection_pipeline.py": 1678345974.8422294, "TEMPLATE_PATH/pipelines/cv/table_recognition_pipeline.py": 1678345974.9024644, "TEMPLATE_PATH/pipelines/cv/image_to_image_translation_pipeline.py": 1666757257.5090609, "TEMPLATE_PATH/pipelines/cv/face_attribute_recognition_pipeline.py": 1683889954.5421839, "TEMPLATE_PATH/pipelines/cv/image_debanding_pipeline.py": 1678345974.8531418, "TEMPLATE_PATH/pipelines/cv/video_instance_segmentation_pipeline.py": 1681714768.9862943, "TEMPLATE_PATH/pipelines/cv/tinynas_classification_pipeline.py": 1669108798.6442235, "TEMPLATE_PATH/pipelines/cv/human_reconstruction_pipeline.py": 1681714768.9829588, "TEMPLATE_PATH/pipelines/cv/video_multi_object_tracking_pipeline.py": 1678345974.906372, "TEMPLATE_PATH/pipelines/cv/controllable_image_generation_pipeline.py": 1678695526.600098, "TEMPLATE_PATH/pipelines/cv/image_defrcn_fewshot_pipeline.py": 1678345974.8541288, "TEMPLATE_PATH/pipelines/cv/ddpm_semantic_segmentation_pipeline.py": 1678345974.8439617, "TEMPLATE_PATH/pipelines/cv/content_check_pipeline.py": 1678345974.8424017, "TEMPLATE_PATH/pipelines/cv/vop_retrieval_pipeline.py": 1678345974.9095361, "TEMPLATE_PATH/pipelines/cv/object_detection_3d_pipeline.py": 1678695526.6065028, "TEMPLATE_PATH/pipelines/cv/lineless_table_recognition_pipeline.py": 1678695526.6051717, "TEMPLATE_PATH/pipelines/cv/cmdssl_video_embedding_pipeline.py": 1666757257.487139, "TEMPLATE_PATH/pipelines/cv/tinynas_detection_pipeline.py": 1678345974.9044118, "TEMPLATE_PATH/pipelines/cv/video_deinterlace_pipeline.py": 1678695526.6368866, "TEMPLATE_PATH/pipelines/cv/image_open_vocabulary_detection_pipeline.py": 1678345974.8585114, "TEMPLATE_PATH/pipelines/cv/language_guided_video_summarization_pipeline.py": 1678345974.891131, "TEMPLATE_PATH/pipelines/cv/body_2d_keypoints_pipeline.py": 1666757257.4853406, "TEMPLATE_PATH/pipelines/cv/face_human_hand_detection_pipeline.py": 1666778289.6917272, "TEMPLATE_PATH/pipelines/cv/hicossl_video_embedding_pipeline.py": 1666757257.4973748, "TEMPLATE_PATH/pipelines/cv/face_recognition_pipeline.py": 1678345974.8498085, "TEMPLATE_PATH/pipelines/cv/image_body_reshaping_pipeline.py": 1666757257.497916, "TEMPLATE_PATH/pipelines/cv/image_inpainting_pipeline.py": 1666757257.5020847, "TEMPLATE_PATH/pipelines/cv/face_recognition_onnx_fm_pipeline.py": 1683889954.5441782, "TEMPLATE_PATH/pipelines/cv/image_driving_perception_pipeline.py": 1678695526.6034508, "TEMPLATE_PATH/pipelines/cv/video_stabilization_pipeline.py": 1678345974.9080534, "TEMPLATE_PATH/pipelines/cv/indoor_layout_estimation_pipeline.py": 1678345974.8907528, "TEMPLATE_PATH/pipelines/cv/ddcolor_image_colorization_pipeline.py": 1678345974.8437521, "TEMPLATE_PATH/pipelines/cv/face_emotion_pipeline.py": 1666778289.691363, "TEMPLATE_PATH/pipelines/cv/mtcnn_face_detection_pipeline.py": 1666757257.5116644, "TEMPLATE_PATH/pipelines/cv/nerf_recon_acc_pipeline.py": 1678695526.6060696, "TEMPLATE_PATH/pipelines/cv/image_bts_depth_estimation_pipeline.py": 1681714768.983773, "TEMPLATE_PATH/pipelines/cv/facial_landmark_confidence_pipeline.py": 1683889954.5463324, "TEMPLATE_PATH/pipelines/cv/face_reconstruction_pipeline.py": 1684246001.5418012, "TEMPLATE_PATH/pipelines/cv/mog_face_detection_pipeline.py": 1666757257.5102239, "TEMPLATE_PATH/pipelines/cv/skin_retouching_pipeline.py": 1684246001.5436969, "TEMPLATE_PATH/pipelines/cv/vision_middleware_pipeline.py": 1678345974.9092615, "TEMPLATE_PATH/pipelines/cv/face_liveness_ir_pipeline.py": 1683889954.542443, "TEMPLATE_PATH/pipelines/cv/image_detection_pipeline.py": 1678345974.8551383, "TEMPLATE_PATH/pipelines/cv/realtime_video_object_detection_pipeline.py": 1678695526.631697, "TEMPLATE_PATH/pipelines/cv/video_panoptic_segmentation_pipeline.py": 1678345974.9074109, "TEMPLATE_PATH/pipelines/cv/action_detection_pipeline.py": 1678345974.8401477, "TEMPLATE_PATH/pipelines/cv/product_segmentation_pipeline.py": 1666778289.692797, "TEMPLATE_PATH/pipelines/cv/tbs_detection_pipeline.py": 1684246001.544016, "TEMPLATE_PATH/pipelines/cv/image_matching_pipeline.py": 1678345974.857486, "TEMPLATE_PATH/pipelines/cv/video_category_pipeline.py": 1669108798.6445787, "TEMPLATE_PATH/pipelines/cv/hand_static_pipeline.py": 1666778289.6920865, "TEMPLATE_PATH/pipelines/cv/animal_recognition_pipeline.py": 1678345974.840479, "TEMPLATE_PATH/pipelines/cv/pointcloud_sceneflow_estimation_pipeline.py": 1678345974.9002383, "TEMPLATE_PATH/pipelines/cv/image_instance_segmentation_pipeline.py": 1678345974.8571947, "TEMPLATE_PATH/pipelines/cv/video_frame_interpolation_pipeline.py": 1678345974.9055316, "TEMPLATE_PATH/pipelines/cv/image_quality_assessment_mos_pipeline.py": 1678345974.8893554, "TEMPLATE_PATH/pipelines/cv/video_summarization_pipeline.py": 1666757257.5319543, "TEMPLATE_PATH/pipelines/cv/panorama_depth_estimation_pipeline.py": 1678345974.8999748, "TEMPLATE_PATH/pipelines/cv/fast_instance_segmentation_pipeline.py": 1684246001.5421734, "TEMPLATE_PATH/pipelines/cv/vidt_pipeline.py": 1681714768.9865973, "TEMPLATE_PATH/pipelines/cv/image_skychange_pipeline.py": 1678345974.8903258, "TEMPLATE_PATH/pipelines/cv/image_quality_assessment_man_pipeline.py": 1678695526.6047776, "TEMPLATE_PATH/pipelines/cv/image_restoration_pipeline.py": 1678345974.8897073, "TEMPLATE_PATH/pipelines/cv/video_inpainting_pipeline.py": 1666757257.530707, "TEMPLATE_PATH/pipelines/cv/face_image_generation_pipeline.py": 1666757257.4936557, "TEMPLATE_PATH/pipelines/cv/video_super_resolution_pipeline.py": 1678695526.6385, "TEMPLATE_PATH/pipelines/cv/referring_video_object_segmentation_pipeline.py": 1678695526.633713, "TEMPLATE_PATH/pipelines/cv/virtual_try_on_pipeline.py": 1678345974.9087186, "TEMPLATE_PATH/pipelines/cv/ocr_recognition_pipeline.py": 1681714768.98453, "TEMPLATE_PATH/pipelines/cv/ocr_detection_pipeline.py": 1678695526.607303, "TEMPLATE_PATH/pipelines/cv/movie_scene_segmentation_pipeline.py": 1684121077.5641353, "TEMPLATE_PATH/pipelines/cv/maskdino_instance_segmentation_pipeline.py": 1678345974.892828, "TEMPLATE_PATH/pipelines/cv/video_colorization_pipeline.py": 1678345974.904686, "TEMPLATE_PATH/pipelines/cv/image_human_parsing_pipeline.py": 1678345974.8562174, "TEMPLATE_PATH/pipelines/cv/face_liveness_xc_pipeline.py": 1683889954.5426972, "TEMPLATE_PATH/pipelines/cv/crowd_counting_pipeline.py": 1666757257.4877608, "TEMPLATE_PATH/pipelines/cv/video_depth_estimation_pipeline.py": 1678345974.9052026, "TEMPLATE_PATH/pipelines/cv/image_colorization_pipeline.py": 1666757257.5002234, "TEMPLATE_PATH/pipelines/cv/arc_face_recognition_pipeline.py": 1683889954.5418775, "TEMPLATE_PATH/pipelines/cv/image_quality_assessment_degradation_pipeline.py": 1678345974.8601525, "TEMPLATE_PATH/pipelines/cv/ocr_utils/model_convnext_transformer.py": 1666757257.5147195, "TEMPLATE_PATH/pipelines/cv/ocr_utils/model_resnet18_half.py": 1678345974.8975644, "TEMPLATE_PATH/pipelines/cv/ocr_utils/resnet18_v1.py": 1666757257.5203307, "TEMPLATE_PATH/pipelines/cv/ocr_utils/model_dla34.py": 1678345974.897257, "TEMPLATE_PATH/pipelines/cv/ocr_utils/ocr_modules/vitstr.py": 1666757257.5185978, "TEMPLATE_PATH/pipelines/cv/ocr_utils/ocr_modules/timm_tinyc.py": 1666757257.517882, "TEMPLATE_PATH/pipelines/cv/ocr_utils/ocr_modules/convnext.py": 1666757257.5171049, "TEMPLATE_PATH/pipelines/cv/ocr_utils/table_process.py": 1678345974.8992608, "TEMPLATE_PATH/pipelines/cv/ocr_utils/resnet_utils.py": 1666757257.520979, "TEMPLATE_PATH/pipelines/cv/ocr_utils/ops.py": 1678345974.898596, "TEMPLATE_PATH/pipelines/cv/ocr_utils/utils.py": 1678345974.8997033, "TEMPLATE_PATH/pipelines/cv/ocr_utils/model_vlpt.py": 1678345974.8979936, "TEMPLATE_PATH/pipelines/cv/ocr_utils/model_resnet_mutex_v4_linewithchar.py": 1666757257.5153735, "TEMPLATE_PATH/pipelines/cv/image_inpainting_sdv2_pipeline.py": 1678345974.8565032, "TEMPLATE_PATH/pipelines/cv/image_super_resolution_pipeline.py": 1666757257.5076354, "TEMPLATE_PATH/pipelines/cv/image_salient_detection_pipeline.py": 1678345974.890104, "TEMPLATE_PATH/pipelines/cv/video_single_object_tracking_pipeline.py": 1678695526.637953, "TEMPLATE_PATH/pipelines/cv/face_recognition_onnx_ir_pipeline.py": 1683889954.5448508, "TEMPLATE_PATH/pipelines/cv/product_retrieval_embedding_pipeline.py": 1666757257.5221663, "TEMPLATE_PATH/pipelines/cv/mask_face_recognition_pipeline.py": 1678345974.8923888, "TEMPLATE_PATH/pipelines/cv/mobile_image_super_resolution_pipeline.py": 1678345974.893134, "TEMPLATE_PATH/pipelines/cv/license_plate_detection_pipeline.py": 1678345974.8913991, "TEMPLATE_PATH/pipelines/cv/image_semantic_segmentation_pipeline.py": 1666757257.5062222, "TEMPLATE_PATH/pipelines/cv/text_driven_segmentation_pipleline.py": 1666757257.5275502, "TEMPLATE_PATH/pipelines/cv/motion_generation_pipeline.py": 1678345974.8933938, "TEMPLATE_PATH/pipelines/cv/image_mvs_depth_estimation_pipeline.py": 1678345974.8579566, "TEMPLATE_PATH/pipelines/cv/image_depth_estimation_pipeline.py": 1678345974.854762, "TEMPLATE_PATH/pipelines/cv/action_recognition_pipeline.py": 1666757257.4842403, "TEMPLATE_PATH/pipelines/cv/image_reid_person_pipeline.py": 1666757257.5051024, "TEMPLATE_PATH/pipelines/cv/general_recognition_pipeline.py": 1678345974.851133, "TEMPLATE_PATH/pipelines/cv/shop_segmentation_pipleline.py": 1666757257.5262067, "TEMPLATE_PATH/pipelines/base.py": 1684246001.5408666, "TEMPLATE_PATH/preprocessors/kws.py": 1669108798.6509876, "TEMPLATE_PATH/preprocessors/multi_modal.py": 1684246001.5620222, "TEMPLATE_PATH/preprocessors/science/uni_fold.py": 1678345974.9713385, "TEMPLATE_PATH/preprocessors/tts.py": 1678695526.6618354, "TEMPLATE_PATH/preprocessors/asr.py": 1684246001.5611215, "TEMPLATE_PATH/preprocessors/builder.py": 1666757257.563367, "TEMPLATE_PATH/preprocessors/movie_scene_segmentation/transforms.py": 1678695526.6574507, "TEMPLATE_PATH/preprocessors/common.py": 1678695526.6514163, "TEMPLATE_PATH/preprocessors/nlp/token_classification_preprocessor.py": 1684246001.562926, "TEMPLATE_PATH/preprocessors/nlp/siamese_uie_preprocessor.py": 1678695526.6586974, "TEMPLATE_PATH/preprocessors/nlp/relation_extraction_preprocessor.py": 1678345974.9533129, "TEMPLATE_PATH/preprocessors/nlp/token_classification_viet_preprocessor.py": 1678345974.962513, "TEMPLATE_PATH/preprocessors/nlp/translation_evaluation_preprocessor.py": 1684246001.5633366, "TEMPLATE_PATH/preprocessors/nlp/text_classification_preprocessor.py": 1678345974.957994, "TEMPLATE_PATH/preprocessors/nlp/document_grounded_dialog_retrieval_preprocessor.py": 1678345974.9482706, "TEMPLATE_PATH/preprocessors/nlp/zero_shot_classification_preprocessor.py": 1678345974.9639843, "TEMPLATE_PATH/preprocessors/nlp/canmt_translation.py": 1683889954.5585697, "TEMPLATE_PATH/preprocessors/nlp/fill_mask_preprocessor.py": 1678345974.952453, "TEMPLATE_PATH/preprocessors/nlp/word_alignment_preprocessor.py": 1678695526.6615062, "TEMPLATE_PATH/preprocessors/nlp/space_T_en/fields/preprocess_dataset.py": 1666757257.5921733, "TEMPLATE_PATH/preprocessors/nlp/space_T_en/fields/parse.py": 1666757257.5916936, "TEMPLATE_PATH/preprocessors/nlp/space_T_en/fields/common_utils.py": 1666757257.5911734, "TEMPLATE_PATH/preprocessors/nlp/space_T_en/fields/process_dataset.py": 1666757257.5927129, "TEMPLATE_PATH/preprocessors/nlp/space_T_en/conversational_text_to_sql_preprocessor.py": 1669108798.6572416, "TEMPLATE_PATH/preprocessors/nlp/document_grounded_dialog_generate_preprocessor.py": 1678345974.946995, "TEMPLATE_PATH/preprocessors/nlp/text_error_correction.py": 1678695526.6594443, "TEMPLATE_PATH/preprocessors/nlp/text_ranking_preprocessor.py": 1678345974.9603443, "TEMPLATE_PATH/preprocessors/nlp/transformers_tokenizer.py": 1683889954.5599382, "TEMPLATE_PATH/preprocessors/nlp/bert_seq_cls_tokenizer.py": 1666757257.5692148, "TEMPLATE_PATH/preprocessors/nlp/text_clean.py": 1683889954.5587656, "TEMPLATE_PATH/preprocessors/nlp/utils.py": 1678345974.9635713, "TEMPLATE_PATH/preprocessors/nlp/document_segmentation_preprocessor.py": 1678345974.94956, "TEMPLATE_PATH/preprocessors/nlp/sentence_embedding_preprocessor.py": 1678345974.9542353, "TEMPLATE_PATH/preprocessors/nlp/mglm_summarization_preprocessor.py": 1669108798.653473, "TEMPLATE_PATH/preprocessors/nlp/token_classification_thai_preprocessor.py": 1678345974.9620914, "TEMPLATE_PATH/preprocessors/nlp/mgeo_ranking_preprocessor.py": 1678345974.9527726, "TEMPLATE_PATH/preprocessors/nlp/space/dialog_intent_prediction_preprocessor.py": 1669108798.6542664, "TEMPLATE_PATH/preprocessors/nlp/space/lazy_dataset.py": 1666757257.582808, "TEMPLATE_PATH/preprocessors/nlp/space/dialog_state_tracking_preprocessor.py": 1666757257.579098, "TEMPLATE_PATH/preprocessors/nlp/space/preprocess.py": 1666757257.583339, "TEMPLATE_PATH/preprocessors/nlp/space/data_loader.py": 1666757257.5771036, "TEMPLATE_PATH/preprocessors/nlp/space/batch.py": 1678345974.9554622, "TEMPLATE_PATH/preprocessors/nlp/space/dialog_modeling_preprocessor.py": 1666757257.5782604, "TEMPLATE_PATH/preprocessors/nlp/space/tokenizer.py": 1678345974.9572399, "TEMPLATE_PATH/preprocessors/nlp/space/dst_processors.py": 1669108798.6548202, "TEMPLATE_PATH/preprocessors/nlp/space/args.py": 1666757257.5759423, "TEMPLATE_PATH/preprocessors/nlp/space/fields/gen_field.py": 1678345974.95627, "TEMPLATE_PATH/preprocessors/nlp/space/fields/intent_field.py": 1666757257.5822835, "TEMPLATE_PATH/preprocessors/nlp/space/sampler.py": 1666757257.5839186, "TEMPLATE_PATH/preprocessors/nlp/space/tensorlistdataset.py": 1666757257.5844374, "TEMPLATE_PATH/preprocessors/nlp/dialog_classification_use_preprocessor.py": 1678345974.9462962, "TEMPLATE_PATH/preprocessors/nlp/text_generation_preprocessor.py": 1681714768.996753, "TEMPLATE_PATH/preprocessors/nlp/space_T_cn/table_question_answering_preprocessor.py": 1666757257.5885906, "TEMPLATE_PATH/preprocessors/nlp/space_T_cn/fields/database.py": 1669108798.6561291, "TEMPLATE_PATH/preprocessors/nlp/space_T_cn/fields/schema_link.py": 1666757257.587568, "TEMPLATE_PATH/preprocessors/nlp/space_T_cn/fields/struct.py": 1678345974.9575932, "TEMPLATE_PATH/preprocessors/nlp/document_grounded_dialog_rerank_preprocessor.py": 1678345974.9476607, "TEMPLATE_PATH/preprocessors/nlp/feature_extraction_preprocessor.py": 1678345974.9510474, "TEMPLATE_PATH/preprocessors/nlp/faq_question_answering_preprocessor.py": 1678345974.950317, "TEMPLATE_PATH/preprocessors/audio.py": 1678345974.9349864, "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py": 1678695526.6556287, "TEMPLATE_PATH/preprocessors/cv/util.py": 1678345974.9410372, "TEMPLATE_PATH/preprocessors/cv/timer.py": 1678345974.940564, "TEMPLATE_PATH/preprocessors/cv/bad_image_detecting_preprocessor.py": 1678345974.9370506, "TEMPLATE_PATH/preprocessors/cv/mmcls_preprocessor.py": 1678345974.9400585, "TEMPLATE_PATH/preprocessors/cv/controllable_image_generation.py": 1678695526.6533623, "TEMPLATE_PATH/preprocessors/cv/image_quality_assessment_mos.py": 1678345974.939592, "TEMPLATE_PATH/preprocessors/cv/image_restoration_preprocessor.py": 1678345974.939753, "TEMPLATE_PATH/preprocessors/cv/cv2_transforms.py": 1678695526.6544669, "TEMPLATE_PATH/preprocessors/cv/video_super_resolution.py": 1678345974.941883, "TEMPLATE_PATH/preprocessors/cv/image_quality_assessment_man.py": 1678695526.6563368, "TEMPLATE_PATH/preprocessors/cv/action_detection_mapper.py": 1678695526.6529279, "TEMPLATE_PATH/preprocessors/cv/video_stabilization.py": 1678345974.9413676, "TEMPLATE_PATH/preprocessors/video.py": 1678345974.971809, "TEMPLATE_PATH/preprocessors/image.py": 1678695526.6569033, "TEMPLATE_PATH/preprocessors/base.py": 1683889954.557044, "TEMPLATE_PATH/preprocessors/ofa/visual_question_answering.py": 1678345974.97092, "TEMPLATE_PATH/preprocessors/ofa/image_classification.py": 1678345974.9656992, "TEMPLATE_PATH/preprocessors/ofa/utils/transforms.py": 1666757257.6044796, "TEMPLATE_PATH/preprocessors/ofa/utils/bridge_content_encoder.py": 1678345974.9681842, "TEMPLATE_PATH/preprocessors/ofa/utils/collate.py": 1678345974.968443, "TEMPLATE_PATH/preprocessors/ofa/utils/get_tables.py": 1678345974.9691057, "TEMPLATE_PATH/preprocessors/ofa/utils/text2phone.py": 1678345974.9699347, "TEMPLATE_PATH/preprocessors/ofa/utils/audio_helper.py": 1678345974.967928, "TEMPLATE_PATH/preprocessors/ofa/utils/random_help.py": 1678345974.969357, "TEMPLATE_PATH/preprocessors/ofa/utils/vision_helper.py": 1666757257.6050525, "TEMPLATE_PATH/preprocessors/ofa/utils/constant.py": 1678345974.9688995, "TEMPLATE_PATH/preprocessors/ofa/asr.py": 1678345974.964469, "TEMPLATE_PATH/preprocessors/ofa/text2sql.py": 1678345974.9668753, "TEMPLATE_PATH/preprocessors/ofa/text_classification.py": 1678345974.9671476, "TEMPLATE_PATH/preprocessors/ofa/image_captioning.py": 1678345974.9651005, "TEMPLATE_PATH/preprocessors/ofa/ocr_recognition.py": 1678345974.9659903, "TEMPLATE_PATH/preprocessors/ofa/visual_entailment.py": 1678345974.9702795, "TEMPLATE_PATH/preprocessors/ofa/visual_grounding.py": 1678345974.970591, "TEMPLATE_PATH/preprocessors/ofa/summarization.py": 1678345974.96643, "TEMPLATE_PATH/preprocessors/ofa/text_to_image_synthesis.py": 1678345974.967411, "TEMPLATE_PATH/preprocessors/ofa/sudoku.py": 1678345974.966176, "TEMPLATE_PATH/preprocessors/ofa/base.py": 1678345974.9648006, "TEMPLATE_PATH/trainers/parallel/builder.py": 1666757257.6517034, "TEMPLATE_PATH/trainers/parallel/utils.py": 1666757257.652214, "TEMPLATE_PATH/trainers/optimizer/builder.py": 1678345975.0087109, "TEMPLATE_PATH/trainers/optimizer/child_tuning_adamw_optimizer.py": 1678345975.0091202, "TEMPLATE_PATH/trainers/lrscheduler/builder.py": 1681714769.013421, "TEMPLATE_PATH/trainers/lrscheduler/warmup/warmup.py": 1666757257.6361334, "TEMPLATE_PATH/trainers/lrscheduler/warmup/base.py": 1666757257.635629, "TEMPLATE_PATH/trainers/nlp_trainer.py": 1681714769.015515, "TEMPLATE_PATH/trainers/utils/inference.py": 1681714769.069791, "TEMPLATE_PATH/trainers/utils/log_buffer.py": 1666757257.6546545, "TEMPLATE_PATH/trainers/training_args.py": 1684246001.5734115, "TEMPLATE_PATH/trainers/builder.py": 1683889954.5614784, "TEMPLATE_PATH/trainers/audio/kws_nearfield_trainer.py": 1683889954.5606887, "TEMPLATE_PATH/trainers/audio/kws_utils/model_utils.py": 1678345974.9777398, "TEMPLATE_PATH/trainers/audio/kws_utils/runtime_utils.py": 1678345974.9782813, "TEMPLATE_PATH/trainers/audio/kws_utils/det_utils.py": 1681714769.0015252, "TEMPLATE_PATH/trainers/audio/kws_utils/batch_utils.py": 1683889954.5611897, "TEMPLATE_PATH/trainers/audio/kws_utils/file_utils.py": 1681714769.0070894, "TEMPLATE_PATH/trainers/audio/kws_farfield_trainer.py": 1681714768.9980917, "TEMPLATE_PATH/trainers/audio/separation_trainer.py": 1678345974.9789073, "TEMPLATE_PATH/trainers/audio/asr_trainer.py": 1678345974.974677, "TEMPLATE_PATH/trainers/audio/tts_trainer.py": 1678695526.6909325, "TEMPLATE_PATH/trainers/audio/ans_trainer.py": 1666757257.6118267, "TEMPLATE_PATH/trainers/hooks/checkpoint/checkpoint_hook.py": 1684246001.5663064, "TEMPLATE_PATH/trainers/hooks/checkpoint/checkpoint_processor.py": 1684246001.5666258, "TEMPLATE_PATH/trainers/hooks/checkpoint/load_checkpoint_hook.py": 1684246001.5668476, "TEMPLATE_PATH/trainers/hooks/logger/text_logger_hook.py": 1683889954.5632632, "TEMPLATE_PATH/trainers/hooks/logger/tensorboard_hook.py": 1678695526.7020135, "TEMPLATE_PATH/trainers/hooks/logger/base.py": 1666757257.6279666, "TEMPLATE_PATH/trainers/hooks/optimizer/apex_optimizer_hook.py": 1684246001.5697649, "TEMPLATE_PATH/trainers/hooks/optimizer/torch_optimizer_hook.py": 1684246001.5703554, "TEMPLATE_PATH/trainers/hooks/optimizer/base.py": 1684246001.5700371, "TEMPLATE_PATH/trainers/hooks/distributed/megatron_hook.py": 1684246001.5681868, "TEMPLATE_PATH/trainers/hooks/distributed/deepspeed_hook.py": 1684246001.5679266, "TEMPLATE_PATH/trainers/hooks/distributed/ddp_hook.py": 1684246001.567703, "TEMPLATE_PATH/trainers/hooks/lr_scheduler_hook.py": 1684246001.569458, "TEMPLATE_PATH/trainers/hooks/early_stop_hook.py": 1684246001.5685089, "TEMPLATE_PATH/trainers/hooks/hook.py": 1684246001.5691583, "TEMPLATE_PATH/trainers/hooks/priority.py": 1666757257.6328363, "TEMPLATE_PATH/trainers/hooks/builder.py": 1666757257.6225636, "TEMPLATE_PATH/trainers/hooks/clip_clamp_logit_scale_hook.py": 1669108798.683138, "TEMPLATE_PATH/trainers/hooks/compression/sparsity_hook.py": 1684246001.567191, "TEMPLATE_PATH/trainers/hooks/compression/utils.py": 1678345974.9935489, "TEMPLATE_PATH/trainers/hooks/iter_timer_hook.py": 1666757257.6266162, "TEMPLATE_PATH/trainers/hooks/evaluation_hook.py": 1684246001.5688426, "TEMPLATE_PATH/trainers/multi_modal/clip/clip_trainer.py": 1684246001.571492, "TEMPLATE_PATH/trainers/multi_modal/clip/clip_trainer_utils.py": 1669108798.6861904, "TEMPLATE_PATH/trainers/multi_modal/efficient_diffusion_tuning/efficient_diffusion_tuning_trainer.py": 1683889954.5644114, "TEMPLATE_PATH/trainers/multi_modal/mplug/mplug_trainer.py": 1678345975.001256, "TEMPLATE_PATH/trainers/multi_modal/team/team_trainer.py": 1678345975.0027127, "TEMPLATE_PATH/trainers/multi_modal/team/team_trainer_utils.py": 1669108798.690418, "TEMPLATE_PATH/trainers/multi_modal/mgeo_ranking_trainer.py": 1678345975.0009506, "TEMPLATE_PATH/trainers/multi_modal/ofa/ofa_trainer.py": 1678345975.0016596, "TEMPLATE_PATH/trainers/multi_modal/ofa/ofa_trainer_utils.py": 1678345975.002343, "TEMPLATE_PATH/trainers/default_config.py": 1684246001.5650253, "TEMPLATE_PATH/trainers/nlp/gpt_moe_trainer.py": 1678345975.0055368, "TEMPLATE_PATH/trainers/nlp/plug_trainer.py": 1678695526.7082524, "TEMPLATE_PATH/trainers/nlp/text_generation_trainer.py": 1681714769.0152323, "TEMPLATE_PATH/trainers/nlp/document_grounded_dialog_rerank_trainer.py": 1678345975.004579, "TEMPLATE_PATH/trainers/nlp/csanmt_translation_trainer.py": 1678345975.0036387, "TEMPLATE_PATH/trainers/nlp/translation_evaluation_trainer.py": 1684246001.5721004, "TEMPLATE_PATH/trainers/nlp/faq_question_answering_trainer.py": 1678345975.0051053, "TEMPLATE_PATH/trainers/nlp/table_question_answering_trainer.py": 1678345975.0076075, "TEMPLATE_PATH/trainers/nlp/sequence_classification_trainer.py": 1678345975.0066545, "TEMPLATE_PATH/trainers/nlp/sentence_embedding_trainer.py": 1678695526.7085762, "TEMPLATE_PATH/trainers/nlp/gpt3_trainer.py": 1681714769.014518, "TEMPLATE_PATH/trainers/nlp/text_ranking_trainer.py": 1666757257.6478848, "TEMPLATE_PATH/trainers/nlp/siamese_uie_trainer.py": 1681714769.014841, "TEMPLATE_PATH/trainers/nlp/space/metrics/metrics_tracker.py": 1666757257.645518, "TEMPLATE_PATH/trainers/nlp/space/dialog_intent_trainer.py": 1666757257.6433034, "TEMPLATE_PATH/trainers/nlp/space/eval.py": 1669108798.6920927, "TEMPLATE_PATH/trainers/nlp/space/trainer/intent_trainer.py": 1666757257.6473625, "TEMPLATE_PATH/trainers/nlp/space/trainer/gen_trainer.py": 1666757257.6467648, "TEMPLATE_PATH/trainers/nlp/space/dialog_modeling_trainer.py": 1666757257.64378, "TEMPLATE_PATH/trainers/nlp/document_grounded_dialog_retrieval_trainer.py": 1678345975.004814, "TEMPLATE_PATH/trainers/nlp/document_grounded_dialog_generate_trainer.py": 1678345975.0042856, "TEMPLATE_PATH/trainers/cli_argument_parser.py": 1684246001.5641959, "TEMPLATE_PATH/trainers/cv/ocr_recognition_trainer.py": 1681714769.0097866, "TEMPLATE_PATH/trainers/cv/image_instance_segmentation_trainer.py": 1666757257.6163688, "TEMPLATE_PATH/trainers/cv/referring_video_object_segmentation_trainer.py": 1678695526.6955135, "TEMPLATE_PATH/trainers/cv/vision_efficient_tuning_trainer.py": 1678695526.696203, "TEMPLATE_PATH/trainers/cv/movie_scene_segmentation_trainer.py": 1666757257.617418, "TEMPLATE_PATH/trainers/cv/nerf_recon_acc_trainer.py": 1678695526.694501, "TEMPLATE_PATH/trainers/cv/image_detection_damoyolo_trainer.py": 1681714769.008786, "TEMPLATE_PATH/trainers/cv/image_classifition_trainer.py": 1684246001.5647185, "TEMPLATE_PATH/trainers/cv/cartoon_translation_trainer.py": 1678695526.6927238, "TEMPLATE_PATH/trainers/cv/ocr_detection_db_trainer.py": 1681714769.0095627, "TEMPLATE_PATH/trainers/cv/card_detection_scrfd_trainer.py": 1666757257.6147146, "TEMPLATE_PATH/trainers/cv/face_detection_scrfd_trainer.py": 1666757257.6152842, "TEMPLATE_PATH/trainers/cv/image_inpainting_trainer.py": 1666757257.6158333, "TEMPLATE_PATH/trainers/cv/image_portrait_enhancement_trainer.py": 1666757257.6168902, "TEMPLATE_PATH/trainers/cv/action_detection_trainer.py": 1678695526.6920478, "TEMPLATE_PATH/trainers/cv/image_defrcn_fewshot_detection_trainer.py": 1678345974.9814935, "TEMPLATE_PATH/trainers/trainer.py": 1684246001.5725896, "TEMPLATE_PATH/trainers/base.py": 1681714769.0076036, "TEMPLATE_PATH/msdatasets/ms_dataset.py": 1684317777.8670049, "TEMPLATE_PATH/msdatasets/context/dataset_context_config.py": 1684246001.5200734, "TEMPLATE_PATH/msdatasets/auth/auth_config.py": 1684121077.5437593, "TEMPLATE_PATH/msdatasets/meta/data_meta_config.py": 1681714768.9622037, "TEMPLATE_PATH/msdatasets/meta/data_meta_manager.py": 1684246001.5257208, "TEMPLATE_PATH/msdatasets/utils/oss_utils.py": 1678345974.7962904, "TEMPLATE_PATH/msdatasets/utils/maxcompute_utils.py": 1684246001.5273504, "TEMPLATE_PATH/msdatasets/utils/dataset_utils.py": 1681789723.9124653, "TEMPLATE_PATH/msdatasets/utils/delete_utils.py": 1669108798.6375175, "TEMPLATE_PATH/msdatasets/utils/upload_utils.py": 1678345974.7967587, "TEMPLATE_PATH/msdatasets/task_datasets/video_summarization_dataset.py": 1681714768.9676905, "TEMPLATE_PATH/msdatasets/task_datasets/sidd_image_denoising.py": 1681714768.966313, "TEMPLATE_PATH/msdatasets/task_datasets/torch_base_dataset.py": 1681714768.9668994, "TEMPLATE_PATH/msdatasets/task_datasets/reds_image_deblurring_dataset.py": 1681714768.9657562, "TEMPLATE_PATH/msdatasets/task_datasets/gopro_image_deblurring_dataset.py": 1681714768.9650407, "TEMPLATE_PATH/msdatasets/data_files/data_files_manager.py": 1681714541.6009839, "TEMPLATE_PATH/msdatasets/audio/asr_dataset.py": 1681714768.9251826, "TEMPLATE_PATH/msdatasets/download/download_config.py": 1678345974.7719202, "TEMPLATE_PATH/msdatasets/download/download_manager.py": 1678345974.772169, "TEMPLATE_PATH/msdatasets/download/dataset_builder.py": 1684246001.5244808, "TEMPLATE_PATH/msdatasets/dataset_cls/dataset.py": 1684246001.5233805, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_quality_assessment_degradation/image_quality_assessment_degradation_dataset.py": 1681714769.3278096, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_portrait_enhancement/data_utils.py": 1681714769.329037, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_portrait_enhancement/image_portrait_enhancement_dataset.py": 1681714769.3286672, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/veco_dataset.py": 1681714769.3091025, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_instance_segmentation_coco_dataset.py": 1681714769.3300066, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py": 1684246001.5225265, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/image_dataset.py": 1681714769.3208869, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/make_border_map.py": 1681714769.316306, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/normalize_image.py": 1681714769.3145473, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/make_icdar_data.py": 1681714769.3156052, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/make_seg_detection_data.py": 1681714769.3150744, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/data_process.py": 1681714769.3170214, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/augment_data.py": 1681714769.3176525, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/random_crop_data.py": 1681714769.3139958, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/data_loader.py": 1681714769.3214602, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/measures/quad_measurer.py": 1681714769.319075, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/measures/iou_evaluator.py": 1681714769.3196485, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/augmenter.py": 1681714769.3220074, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/bad_image_detecting/bad_image_detecting_dataset.py": 1681714769.3407733, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_summarization_dataset.py": 1681714768.9606102, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_inpainting/image_inpainting_dataset.py": 1681714769.3304625, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_inpainting/aug.py": 1681714769.33086, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/language_guided_video_summarization_dataset.py": 1681714769.3258283, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/builder.py": 1681714769.3403647, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py": 1681714769.324932, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/movie_scene_segmentation/sampler.py": 1681714769.32447, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/text_ranking_dataset.py": 1681714769.3096716, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/audio/kws_nearfield_dataset.py": 1681714769.3418102, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/audio/kws_farfield_dataset.py": 1681714769.3421595, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/audio/kws_nearfield_processor.py": 1681714769.3414555, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/audio/asr_dataset.py": 1681714768.928494, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/transforms.py": 1681714769.3102627, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/sidd_image_denoising_dataset.py": 1681714769.3109276, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/data_utils.py": 1681714769.3115368, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/reds_image_deblurring_dataset.py": 1681714768.9551075, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_frame_interpolation/data_utils.py": 1681714769.3079662, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_frame_interpolation/video_frame_interpolation_dataset.py": 1681714769.3073726, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_quality_assmessment_mos/image_quality_assessment_mos_dataset.py": 1681714769.3270853, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/mgeo_ranking_dataset.py": 1681714769.32538, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_stabilization/video_stabilization_dataset.py": 1681714769.3063674, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/gopro_image_deblurring_dataset.py": 1681714768.9389687, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/referring_video_object_segmentation/transformers.py": 1681714769.3127193, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py": 1681714769.3133628, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_colorization/image_colorization_dataset.py": 1681714768.9425967, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/build.py": 1681714769.3396943, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/datasets/coco.py": 1681714769.3387377, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/datasets/mosaic_wrapper.py": 1681714769.338384, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/transforms/build.py": 1681714769.3349338, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/transforms/transforms.py": 1681714769.334567, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/collate_batch.py": 1681714769.3393688, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/samplers/grouped_batch_sampler.py": 1681714769.3360593, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/samplers/distributed.py": 1681714769.336487, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/samplers/iteration_based_batch_sampler.py": 1681714769.3356428, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/evaluation/coco/coco_eval.py": 1681714769.3372462, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/easycv_base.py": 1681714769.3340495, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/torch_custom_dataset.py": 1681714768.9585028, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_super_resolution/video_super_resolution_dataset.py": 1681714769.305487, "TEMPLATE_PATH/msdatasets/data_loader/data_loader_manager.py": 1684121077.5480983, "TEMPLATE_PATH/msdatasets/data_loader/data_loader.py": 1684246001.5212934, "TEMPLATE_PATH/exporters/torch_model_exporter.py": 1678695526.18393, "TEMPLATE_PATH/exporters/builder.py": 1666757257.1189609, "TEMPLATE_PATH/exporters/audio/ans_dfsmn_exporter.py": 1684246001.4573822, "TEMPLATE_PATH/exporters/nlp/csanmt_for_translation_exporter.py": 1681714768.8512428, "TEMPLATE_PATH/exporters/nlp/model_for_token_classification_exporter.py": 1683889954.460512, "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py": 1678345974.1415546, "TEMPLATE_PATH/exporters/nlp/sbert_for_zero_shot_classification_exporter.py": 1678345974.1418796, "TEMPLATE_PATH/exporters/cv/object_detection_damoyolo_exporter.py": 1678695526.1809118, "TEMPLATE_PATH/exporters/cv/face_detection_scrfd_exporter.py": 1678695526.1807334, "TEMPLATE_PATH/exporters/cv/cartoon_translation_exporter.py": 1678695526.1803331, "TEMPLATE_PATH/exporters/tf_model_exporter.py": 1678695526.1826663, "TEMPLATE_PATH/exporters/base.py": 1678345974.1376836}, "modelscope_path": "TEMPLATE_PATH"}
\ No newline at end of file
diff --git a/modelscope/utils/ast_utils.py b/modelscope/utils/ast_utils.py
index 374ada20..5cee374d 100644
--- a/modelscope/utils/ast_utils.py
+++ b/modelscope/utils/ast_utils.py
@@ -1,7 +1,6 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
 import ast
-import contextlib
 import hashlib
 import os
 import os.path as osp
@@ -9,12 +8,11 @@ import time
 import traceback
 from functools import reduce
 from pathlib import Path
-from typing import Generator, Union
+from typing import Union
 
 import gast
 import json
 
-from modelscope import __version__
 from modelscope.fileio.file import LocalStorage
 from modelscope.metainfo import (CustomDatasets, Heads, Hooks, LR_Schedulers,
                                  Metrics, Models, Optimizers, Pipelines,
@@ -574,6 +572,7 @@ file_scanner = FilesAstScanning()
 def _save_index(index, file_path, file_list=None, with_template=False):
     # convert tuple key to str key
     index[INDEX_KEY] = {str(k): v for k, v in index[INDEX_KEY].items()}
+    from modelscope.version import __version__
     index[VERSION_KEY] = __version__
     index[MD5_KEY], index[FILES_MTIME_KEY] = file_scanner.files_mtime_md5(
         file_list=file_list)
@@ -682,6 +681,7 @@ def load_index(
     if not force_rebuild and os.path.exists(file_path):
         wrapped_index = _load_index(file_path)
         md5, files_mtime = file_scanner.files_mtime_md5(file_list=file_list)
+        from modelscope.version import __version__
         if (wrapped_index[VERSION_KEY] == __version__):
             index = wrapped_index
             if (wrapped_index[MD5_KEY] != md5):
diff --git a/modelscope/utils/audio/audio_utils.py b/modelscope/utils/audio/audio_utils.py
index 3336af06..562769b8 100644
--- a/modelscope/utils/audio/audio_utils.py
+++ b/modelscope/utils/audio/audio_utils.py
@@ -11,6 +11,7 @@ from urllib.parse import urlparse
 import numpy as np
 
 from modelscope.fileio.file import HTTPStorage
+from modelscope.hub.utils.utils import get_cache_dir
 from modelscope.utils.hub import snapshot_download
 from modelscope.utils.logger import get_logger
 
@@ -323,34 +324,31 @@ def generate_sd_scp_from_url(urls: Union[tuple, list]):
 
 
 def update_local_model(model_config, model_path, extra_args):
+    if 'update_model' in extra_args and not extra_args['update_model']:
+        return
+    model_revision = None
     if 'update_model' in extra_args:
         if extra_args['update_model'] == 'latest':
             model_revision = None
         else:
             model_revision = extra_args['update_model']
-        if model_config.__contains__('model'):
-            model_name = model_config['model']
-            if isinstance(model_path, str) and os.path.exists(model_path):
-                try:
-                    logger.info(
-                        'Download the model to local path {0} ...'.format(
-                            model_path))
-                    src_path = snapshot_download(
-                        model_name, revision=model_revision)
-                    # cp to model_path
-                    if src_path == model_path:
-                        logger.warning('src_path is the same with model_path')
-                        return
-                    for filename in os.listdir(src_path):
-                        src_file = os.path.join(src_path, filename)
-                        dst_file = os.path.join(model_path, filename)
-                        if os.path.isfile(src_file):
-                            shutil.copy2(src_file, model_path)
-                        elif os.path.isdir(src_file):
-                            if os.path.exists(dst_file):
-                                shutil.rmtree(dst_file)
-                            shutil.copytree(src_file, dst_file)
-                except Exception as e:
-                    logger.warning(str(e))
-        else:
-            logger.warning('Can not find model name in configuration')
+    if model_config.__contains__('model'):
+        model_name = model_config['model']
+        dst_dir_root = get_cache_dir()
+        if isinstance(model_path, str) and os.path.exists(
+                model_path) and not model_path.startswith(dst_dir_root):
+            try:
+                dst = os.path.join(dst_dir_root, '.cache/' + model_name)
+                dst_dir = os.path.dirname(dst)
+                os.makedirs(dst_dir, exist_ok=True)
+                if not os.path.exists(dst):
+                    os.symlink(os.path.abspath(model_path), dst)
+
+                snapshot_download(
+                    model_name,
+                    cache_dir=dst_dir_root,
+                    revision=model_revision)
+            except Exception as e:
+                logger.warning(str(e))
+    else:
+        logger.warning('Can not find model name in configuration')
diff --git a/modelscope/utils/checkpoint.py b/modelscope/utils/checkpoint.py
index 64681db4..bbde6034 100644
--- a/modelscope/utils/checkpoint.py
+++ b/modelscope/utils/checkpoint.py
@@ -5,7 +5,6 @@ import os
 import re
 import time
 from collections import OrderedDict
-from functools import partial
 from shutil import copytree, ignore_patterns, rmtree
 from typing import Callable, Dict, Optional, Union
 
@@ -15,7 +14,6 @@ from torch import nn
 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import _LRScheduler
 
-from modelscope import __version__
 from modelscope.fileio import File, LocalStorage
 from modelscope.utils.config import Config, JSONIteratorEncoder
 from modelscope.utils.constant import ConfigFields, ModelFile
@@ -76,6 +74,7 @@ def save_checkpoint(model: torch.nn.Module,
         elif not isinstance(meta, dict):
             raise TypeError(
                 f'meta must be a dict or None, but got {type(meta)}')
+        from modelscope import __version__
         meta.update(modelscope=__version__, time=time.asctime())
 
         if isinstance(model, torch.nn.parallel.DistributedDataParallel):
diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py
index 2382825a..1f44fc01 100644
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -247,6 +247,7 @@ class MultiModalTasks(object):
     video_temporal_grounding = 'video-temporal-grounding'
     text_to_video_synthesis = 'text-to-video-synthesis'
     efficient_diffusion_tuning = 'efficient-diffusion-tuning'
+    multimodal_dialogue = 'multimodal-dialogue'
 
 
 class ScienceTasks(object):
@@ -277,6 +278,7 @@ class Tasks(CVTasks, NLPTasks, AudioTasks, MultiModalTasks, ScienceTasks):
     This should be used to register models, pipelines, trainers.
     """
     reverse_field_index = {}
+    task_template = 'task-template'
 
     @staticmethod
     def find_field_by_task(task_name):
@@ -327,6 +329,7 @@ class Hubs(enum.Enum):
     """
     modelscope = 'modelscope'
     huggingface = 'huggingface'
+    virgo = 'virgo'
 
 
 class DownloadMode(enum.Enum):
@@ -539,3 +542,37 @@ class DistributedParallelType(object):
 class DatasetTensorflowConfig:
     BATCH_SIZE = 'batch_size'
     DEFAULT_BATCH_SIZE_VALUE = 5
+
+
+class VirgoDatasetConfig:
+
+    default_virgo_namespace = 'default_namespace'
+
+    default_dataset_version = '1'
+
+    env_virgo_endpoint = 'VIRGO_ENDPOINT'
+
+    # Columns for meta request
+    meta_content = 'metaContent'
+    sampling_type = 'samplingType'
+
+    # Columns for meta content
+    col_id = 'id'
+    col_meta_info = 'meta_info'
+    col_analysis_result = 'analysis_result'
+    col_external_info = 'external_info'
+    col_cache_file = 'cache_file'
+
+
+DEFAULT_MAXCOMPUTE_ENDPOINT = 'http://service-corp.odps.aliyun-inc.com/api'
+
+
+class MaxComputeEnvs:
+
+    ACCESS_ID = 'ODPS_ACCESS_ID'
+
+    ACCESS_SECRET_KEY = 'ODPS_ACCESS_SECRET_KEY'
+
+    PROJECT_NAME = 'ODPS_PROJECT_NAME'
+
+    ENDPOINT = 'ODPS_ENDPOINT'
diff --git a/modelscope/utils/demo_utils.py b/modelscope/utils/demo_utils.py
deleted file mode 100644
index 99e61d45..00000000
--- a/modelscope/utils/demo_utils.py
+++ /dev/null
@@ -1,275 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-
-import io
-
-import json
-
-from modelscope.outputs import OutputKeys
-from modelscope.pipelines import pipeline
-from modelscope.utils.constant import Tasks, TasksIODescriptions
-from modelscope.utils.service_utils import NumpyEncoder
-
-TASKS_INPUT_TEMPLATES = {
-    # vision tasks
-    Tasks.image_portrait_stylization: TasksIODescriptions.image_to_image,
-    Tasks.portrait_matting: TasksIODescriptions.image_to_image,
-    Tasks.skin_retouching: TasksIODescriptions.image_to_image,
-    Tasks.image_captioning: TasksIODescriptions.image_to_text,
-    Tasks.image_denoising: TasksIODescriptions.image_to_image,
-    Tasks.image_portrait_enhancement: TasksIODescriptions.image_to_image,
-    Tasks.image_super_resolution: TasksIODescriptions.image_to_image,
-    Tasks.image_colorization: TasksIODescriptions.image_to_image,
-    Tasks.image_color_enhancement: TasksIODescriptions.image_to_image,
-    Tasks.face_image_generation: TasksIODescriptions.seed_to_image,
-    Tasks.image_style_transfer: TasksIODescriptions.images_to_image,
-    Tasks.image_segmentation: TasksIODescriptions.image_to_text,
-    Tasks.image_object_detection: TasksIODescriptions.image_to_text,
-
-    # not tested
-    Tasks.image_classification: TasksIODescriptions.image_to_text,
-    Tasks.ocr_detection: TasksIODescriptions.image_to_text,
-    Tasks.ocr_recognition: TasksIODescriptions.image_to_text,
-    Tasks.body_2d_keypoints: TasksIODescriptions.image_to_text,
-    Tasks.vision_efficient_tuning: TasksIODescriptions.image_to_text,
-
-    # nlp tasks
-    Tasks.text_classification: TasksIODescriptions.text_to_text,
-    Tasks.text_generation: TasksIODescriptions.text_to_text,
-    Tasks.word_segmentation: TasksIODescriptions.text_to_text,
-    Tasks.text_error_correction: TasksIODescriptions.text_to_text,
-    Tasks.named_entity_recognition: TasksIODescriptions.text_to_text,
-    Tasks.sentiment_classification: TasksIODescriptions.text_to_text,
-
-    # audio tasks
-    Tasks.text_to_speech: TasksIODescriptions.text_to_speech,
-    Tasks.auto_speech_recognition: TasksIODescriptions.speech_to_text,
-    Tasks.keyword_spotting: TasksIODescriptions.speech_to_text,
-    Tasks.acoustic_noise_suppression: TasksIODescriptions.speech_to_speech,
-    Tasks.acoustic_echo_cancellation: TasksIODescriptions.speeches_to_speech,
-
-    # multi-modal
-    Tasks.visual_grounding: TasksIODescriptions.visual_grounding,
-    Tasks.visual_question_answering:
-    TasksIODescriptions.visual_question_answering,
-    Tasks.visual_entailment: TasksIODescriptions.visual_entailment,
-    Tasks.generative_multi_modal_embedding:
-    TasksIODescriptions.generative_multi_modal_embedding,
-
-    # new tasks
-    Tasks.virtual_try_on: TasksIODescriptions.images_to_image,
-
-    # TODO(lingcai.wl): support more tasks and implement corresponding example
-}
-
-INPUT_EXAMPLES = {
-    # Must align with task schema defined in the Widget section of model card=
-    # cv
-    TasksIODescriptions.image_to_image: {
-        'inputs': [
-            'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/image_cartoon.png'
-        ],
-        'urlPaths': {
-            'outUrls': [{
-                'outputKey': OutputKeys.OUTPUT_IMG,
-                'fileType': 'png'
-            }]
-        }
-    },
-    TasksIODescriptions.images_to_image: {
-        'inputs': [
-            'https://modelscope.oss-cn-beijing.aliyuncs.com/demo/image-style-transfer/style_transfer_content.jpg',
-            'https://modelscope.oss-cn-beijing.aliyuncs.com/demo/image-style-transfer/style_transfer_style.jpg'
-        ],
-        'urlPaths': {
-            'outUrls': [{
-                'outputKey': OutputKeys.OUTPUT_IMG,
-                'fileType': 'png'
-            }]
-        }
-    },
-    TasksIODescriptions.image_to_text: {
-        'inputs': [
-            'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/image_cartoon.png'
-        ],
-        'urlPaths': {}
-    },
-    # nlp
-    TasksIODescriptions.text_to_text: {
-        'inputs': ['test'],
-        'urlPaths': {}
-    },
-
-    # audio
-    TasksIODescriptions.speech_to_text: {
-        'inputs': [
-            'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example.wav'
-        ],
-        'urlPaths': {}
-    },
-    TasksIODescriptions.text_to_speech: {
-        'inputs': ['北京今天天气怎么样'],
-        'urlPaths': {
-            'outUrls': [{
-                'outputKey': OutputKeys.OUTPUT_PCM,
-                'fileType': 'pcm'
-            }]
-        }
-    },
-    TasksIODescriptions.speeches_to_speech: {
-        'inputs': [
-            'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/nearend_mic.wav',
-            'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/nearend_speech.wav'
-        ],
-        'urlPaths': {
-            'outUrls': [{
-                'outputKey': OutputKeys.OUTPUT_PCM,
-                'fileType': 'pcm'
-            }]
-        }
-    },
-    TasksIODescriptions.speech_to_speech: {
-        'inputs': [
-            'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/speech_with_noise.wav'
-        ],
-        'urlPaths': {
-            'outUrls': [{
-                'outputKey': OutputKeys.OUTPUT_PCM,
-                'fileType': 'pcm'
-            }]
-        }
-    },
-
-    # multi modal
-    TasksIODescriptions.visual_grounding: {
-        'task':
-        Tasks.visual_grounding,
-        'inputs': [
-            'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-grounding/visual_grounding.png',
-            'a blue turtle-like pokemon with round head'
-        ],
-        'urlPaths': {
-            'inUrls': [{
-                'name': 'image'
-            }, {
-                'name': 'text'
-            }]
-        }
-    },
-    TasksIODescriptions.visual_question_answering: {
-        'task':
-        Tasks.visual_question_answering,
-        'inputs': [
-            'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/visual_question_answering.png',
-            'what is grown on the plant?'
-        ],
-        'urlPaths': {
-            'inUrls': [{
-                'name': 'image'
-            }, {
-                'name': 'text'
-            }],
-            'outUrls': [{
-                'outputKey': 'text'
-            }]
-        }
-    },
-    TasksIODescriptions.visual_entailment: {
-        'task':
-        Tasks.visual_entailment,
-        'inputs': [
-            'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-entailment/visual_entailment.jpg',
-            'there are two birds.', 'test'
-        ],
-        'urlPaths': {
-            'inUrls': [{
-                'name': 'image'
-            }, {
-                'name': 'text'
-            }],
-            'outUrls': [{}]
-        }
-    },
-    TasksIODescriptions.generative_multi_modal_embedding: {
-        'task':
-        Tasks.generative_multi_modal_embedding,
-        'inputs': [
-            'http://clip-multimodal.oss-cn-beijing.aliyuncs.com/lingchen/demo/dogs.jpg',
-            'dogs playing in the grass'
-        ],
-        'urlPaths': {
-            'inUrls': [{
-                'name': 'image'
-            }, {
-                'name': 'text'
-            }],
-            'outUrls': [{}]
-        }
-    },
-}
-
-
-class DemoCompatibilityCheck(object):
-
-    def compatibility_check(self):
-        if self.task not in TASKS_INPUT_TEMPLATES:
-            print('task is not supported in demo service so far')
-            return False
-        if TASKS_INPUT_TEMPLATES[self.task] not in INPUT_EXAMPLES:
-            print('no example input for this task')
-            return False
-
-        print('testing demo: ', self.task, self.model_id)
-        test_pipline = pipeline(self.task, self.model_id)
-        req = INPUT_EXAMPLES[TASKS_INPUT_TEMPLATES[self.task]]
-        inputs = preprocess(req)
-        params = req.get('parameters', {})
-        # modelscope inference
-        if params != {}:
-            output = test_pipline(inputs, **params)
-        else:
-            output = test_pipline(inputs)
-        json.dumps(output, cls=NumpyEncoder)
-        result = postprocess(req, output)
-        print(result)
-        return True
-
-
-def preprocess(req):
-    in_urls = req.get('urlPaths').get('inUrls')
-    if len(req['inputs']) == 1:
-        inputs = req['inputs'][0]
-    else:
-        inputs = tuple(req['inputs'])
-    if in_urls is None or len(in_urls) == 0:
-        return inputs
-
-    inputs_dict = {}
-    for i, in_url in enumerate(in_urls):
-        input_name = in_url.get('name')
-        if input_name is None or input_name == '':
-            return inputs
-        inputs_dict[input_name] = req['inputs'][i]
-    return inputs_dict
-
-
-def postprocess(req, resp):
-    out_urls = req.get('urlPaths').get('outUrls')
-    if out_urls is None or len(out_urls) == 0:
-        return resp
-    new_resp = resp
-    if isinstance(resp, str):
-        new_resp = json.loads(resp)
-    for out_url in out_urls:
-        output_key = out_url['outputKey']
-        file_type = out_url['fileType']
-        new_resp.get(output_key)
-        if file_type == 'png' or file_type == 'jpg':
-            content = new_resp.get(output_key)
-            import cv2
-            _, img_encode = cv2.imencode('.' + file_type, content)
-            img_bytes = img_encode.tobytes()
-            return type(img_bytes)
-        else:
-            out_mem_file = io.BytesIO()
-            out_mem_file.write(new_resp.get(output_key))
-            return type(out_mem_file)
diff --git a/modelscope/utils/input_output.py b/modelscope/utils/input_output.py
new file mode 100644
index 00000000..b2c9cd5b
--- /dev/null
+++ b/modelscope/utils/input_output.py
@@ -0,0 +1,756 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import ast
+import base64
+import importlib
+import inspect
+from io import BytesIO
+from typing import Any
+from urllib.parse import urlparse
+
+import numpy as np
+
+from modelscope.hub.api import HubApi
+from modelscope.hub.errors import NotExistError
+from modelscope.hub.file_download import model_file_download
+from modelscope.outputs.outputs import (TASK_OUTPUTS, OutputKeys, OutputTypes,
+                                        OutputTypeSchema)
+from modelscope.pipeline_inputs import (INPUT_TYPE, INPUT_TYPE_SCHEMA,
+                                        TASK_INPUTS, InputType)
+from modelscope.pipelines import pipeline
+from modelscope.pipelines.base import Pipeline
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+"""Support webservice integration pipeline。
+
+This module provides a support library when webservice uses pipeline,
+converts webservice input into pipeline input, and converts pipeline
+output into webservice output, which automatically encodes and
+decodes relevant fields.
+
+Example:
+    # create pipeine instance and pipeline information, save it to app
+    pipeline_instance = create_pipeline('damo/cv_gpen_image-portrait-enhancement', 'v1.0.0')
+    pipeline_info = get_pipeline_information_by_pipeline(pipeline_instance)
+    app.state.pipeline = pipeline_instance
+    app.state.pipeline_info = pipeline_info
+
+    # for service schema request.
+    pipeline_info = request.app.state.pipeline_info
+    return pipeline_info.schema
+
+    # for service call request.
+    def inference(request: Request):
+        pipeline_service = request.app.state.pipeline
+        pipeline_info = request.app.state.pipeline_info
+        request_json = await request.json()
+        result = call_pipeline_with_json(pipeline_info,
+                            pipeline_service,
+                            request_json)
+        # convert output to json, if binary field, we need encoded.
+        output = pipeline_output_to_service_base64_output(pipeline_info.task_name, result)
+        return output
+Todo:
+    * Support more service input type, such as form.
+
+"""
+
+
+def create_pipeline(model_id: str, revision: str):
+    model_configuration_file = model_file_download(
+        model_id=model_id,
+        file_path=ModelFile.CONFIGURATION,
+        revision=revision)
+    cfg = Config.from_file(model_configuration_file)
+    return pipeline(task=cfg.task, model=model_id, model_revision=revision)
+
+
+def get_class_user_attributes(cls):
+    attributes = inspect.getmembers(cls, lambda a: not (inspect.isroutine(a)))
+    user_attributes = [
+        a for a in attributes
+        if (not (a[0].startswith('__') and a[0].endswith('__')))
+    ]
+    return user_attributes
+
+
+def get_input_type(task_inputs: Any):
+    """Get task input schema.
+
+    Args:
+        task_name (str): The task name.
+    """
+    if isinstance(task_inputs, str):  # no input key
+        input_type = INPUT_TYPE[task_inputs]
+        return input_type
+    elif isinstance(task_inputs, tuple) or isinstance(task_inputs, list):
+        for item in task_inputs:
+            if isinstance(item,
+                          dict):  # for list, server only support dict format.
+                return get_input_type(item)
+            else:
+                continue
+    elif isinstance(task_inputs, dict):
+        input_info = {}  # key input key, value input type
+        for k, v in task_inputs.items():
+            input_info[k] = get_input_type(v)
+        return input_info
+    else:
+        raise ValueError(f'invalid input_type definition {task_inputs}')
+
+
+def get_input_schema(task_name: str, input_type: type):
+    """Get task input schema.
+
+    Args:
+        task_name (str): The task name.
+        input_type (type): The input type
+    """
+    if input_type is None:
+        task_inputs = TASK_INPUTS[task_name]
+        if isinstance(task_inputs,
+                      str):  # only one input field, key is task_inputs
+            return {
+                'type': 'object',
+                'properties': {
+                    task_inputs: INPUT_TYPE_SCHEMA[task_inputs]
+                }
+            }
+    else:
+        task_inputs = input_type
+
+    if isinstance(task_inputs, str):  # no input key
+        return INPUT_TYPE_SCHEMA[task_inputs]
+    elif input_type is None and isinstance(task_inputs, list):
+        for item in task_inputs:
+            # for list, server only support dict format.
+            if isinstance(item, dict):
+                return get_input_schema(None, item)
+    elif isinstance(task_inputs, tuple) or isinstance(task_inputs, list):
+        input_schema = {'type': 'array', 'items': {}}
+        for item in task_inputs:
+            if isinstance(item, dict):
+                item_schema = get_input_schema(None, item)
+                input_schema['items']['type'] = item_schema
+                return input_schema
+            else:
+                input_schema['items'] = INPUT_TYPE_SCHEMA[item]
+                return input_schema
+
+    elif isinstance(task_inputs, dict):
+        input_schema = {
+            'type': 'object',
+            'properties': {}
+        }  # key input key, value input type
+        for k, v in task_inputs.items():
+            input_schema['properties'][k] = get_input_schema(None, v)
+        return input_schema
+    else:
+        raise ValueError(f'invalid input_type definition {task_inputs}')
+
+
+def get_output_schema(task_name: str):
+    """Get task output schema.
+
+    Args:
+        task_name (str): The task name.
+    """
+    task_outputs = TASK_OUTPUTS[task_name]
+    output_schema = {'type': 'object', 'properties': {}}
+    if not isinstance(task_outputs, list):
+        raise ValueError('TASK_OUTPUTS for %s is not list.' % task_name)
+    else:
+        for output_key in task_outputs:
+            output_schema['properties'][output_key] = OutputTypeSchema[
+                output_key]
+    return output_schema
+
+
+def get_input_info(task_name: str):
+    task_inputs = TASK_INPUTS[task_name]
+    if isinstance(task_inputs, str):  # no input key default input key input
+        input_type = INPUT_TYPE[task_inputs]
+        return input_type
+    elif isinstance(task_inputs, tuple):
+        return task_inputs
+    elif isinstance(task_inputs, list):
+        for item in task_inputs:
+            if isinstance(item,
+                          dict):  # for list, server only support dict format.
+                return {'input': get_input_type(item)}
+            else:
+                continue
+    elif isinstance(task_inputs, dict):
+        input_info = {}  # key input key, value input type
+        for k, v in task_inputs.items():
+            input_info[k] = get_input_type(v)
+        return {'input': input_info}
+    else:
+        raise ValueError(f'invalid input_type definition {task_inputs}')
+
+
+def get_output_info(task_name: str):
+    output_keys = TASK_OUTPUTS[task_name]
+    output_type = {}
+    if not isinstance(output_keys, list):
+        raise ValueError('TASK_OUTPUTS for %s is not list.' % task_name)
+    else:
+        for output_key in output_keys:
+            output_type[output_key] = OutputTypes[output_key]
+    return output_type
+
+
+def get_task_io_info(task_name: str):
+    """Get task input output schema.
+
+    Args:
+        task_name (str): The task name.
+    """
+    tasks = get_class_user_attributes(Tasks)
+    task_exist = False
+    for key, value in tasks:
+        if key == task_name or value == task_name:
+            task_exist = True
+            break
+    if not task_exist:
+        return None, None
+
+    task_inputs = get_input_info(task_name)
+    task_outputs = get_output_info(task_name)
+
+    return task_inputs, task_outputs
+
+
+def process_arg_type_annotation(arg, default_value):
+    if arg.annotation is not None:
+        if isinstance(arg.annotation, ast.Subscript):
+            return arg.arg, arg.annotation.value.id
+        elif isinstance(arg.annotation, ast.Name):
+            return arg.arg, arg.annotation.id
+        elif isinstance(arg.annotation, ast.Attribute):
+            return arg.arg, arg.annotation.attr
+        else:
+            raise Exception('Invalid annotation: %s' % arg.annotation)
+    else:
+        if default_value is not None:
+            return arg.arg, type(default_value).__name__
+        # Irregular, assuming no type hint no default value type is object
+        logger.warning('arg: %s has no data type annotation, use default!' %
+                       (arg.arg))
+        return arg.arg, 'object'
+
+
+def process_args(args):
+    arguments = []
+    # name, type, has_default, default
+    n_args = len(args.args)
+    n_args_default = len(args.defaults)
+    # no default
+    for arg in args.args[0:n_args - n_args_default]:
+        if arg.arg == 'self':
+            continue
+        else:
+            arg_name, arg_type = process_arg_type_annotation(arg, None)
+            arguments.append((arg_name, arg_type, False, None))
+
+    # process defaults arg.
+    for arg, dft in zip(args.args[n_args - n_args_default:], args.defaults):
+        # compatible with python3.7 ast.Num no value.
+        value = dft.value if hasattr(dft, 'value') else dft.n
+        arg_name, arg_type = process_arg_type_annotation(arg, value)
+        arguments.append((arg_name, arg_type, True, value))
+
+    # kwargs
+    n_kwargs = len(args.kwonlyargs)
+    n_kwargs_default = len(args.kw_defaults)
+    for kwarg in args.kwonlyargs[0:n_kwargs - n_kwargs_default]:
+        arg_name, arg_type = process_arg_type_annotation(kwarg)
+        arguments.append((arg_name, arg_type, False, None))
+
+    for kwarg, dft in zip(args.kwonlyargs[n_kwargs - n_kwargs_default:],
+                          args.kw_defaults):
+        arg_name, arg_type = process_arg_type_annotation(kwarg)
+        arguments.append((arg_name, arg_type, True, dft.value))
+    return arguments
+
+
+class PipelineClassAnalyzer(ast.NodeVisitor):
+    """Analysis pipeline class define get inputs and parameters.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.parameters = []
+        self.has_call = False
+        self.preprocess_parameters = []
+        self.has_preprocess = False
+        self.has_postprocess = False
+        self.has_forward = False
+        self.forward_parameters = []
+        self.postprocess_parameters = []
+        self.lineno = 0
+        self.end_lineno = 0
+
+    def visit_FunctionDef(self, node: ast.FunctionDef) -> Any:
+        if node.name == '__call__':
+            self.parameters = process_args(node.args)
+            self.has_call = True
+        if node.name == 'preprocess':
+            self.preprocess_parameters = process_args(node.args)
+            self.has_preprocess = True
+        elif node.name == 'postprocess':
+            self.postprocess_parameters = process_args(node.args)
+            self.has_postprocess = True
+        elif node.name == 'forward':
+            self.forward_parameters = process_args(node.args)
+            self.has_forward = True
+
+    def get_input_parameters(self):
+        if self.has_call:
+            # custom define __call__ inputs and parameter are control by the
+            # custom __call__, all parameter is input.
+            return self.parameters, None
+        parameters = []
+        if self.has_preprocess:
+            parameters.extend(self.preprocess_parameters[1:])
+        if self.has_forward:
+            parameters.extend(self.forward_parameters[1:])
+        if self.has_postprocess:
+            parameters.extend(self.postprocess_parameters[1:])
+
+        if len(parameters) > 0:
+            return None, parameters
+        else:
+            return None, []
+
+
+class AnalysisSourceFileRegisterModules(ast.NodeVisitor):
+    """Get register_module call of the python source file.
+
+
+    Args:
+        ast (NodeVisitor): The ast node.
+
+    Examples:
+        >>> with open(source_file_path, "rb") as f:
+        >>>     src = f.read()
+        >>>     analyzer = AnalysisSourceFileRegisterModules(source_file_path)
+        >>>     analyzer.visit(ast.parse(src, filename=source_file_path))
+    """
+
+    def __init__(self, source_file_path, class_name) -> None:
+        super().__init__()
+        self.source_file_path = source_file_path
+        self.class_name = class_name
+        self.class_define = None
+
+    def visit_ClassDef(self, node: ast.ClassDef):
+        if node.name == self.class_name:
+            self.class_define = node
+
+
+def get_pipeline_input_parameters(
+    source_file_path: str,
+    class_name: str,
+):
+    """Get pipeline input and parameter
+
+    Args:
+        source_file_path (str): The pipeline source code path
+        class_name (str): The pipeline class name
+    """
+    with open(source_file_path, 'rb') as f:
+        src = f.read()
+        analyzer = AnalysisSourceFileRegisterModules(source_file_path,
+                                                     class_name)
+        analyzer.visit(
+            ast.parse(
+                src,
+                filename=source_file_path,
+                # python3.7 no type_comments parameter .
+                # type_comments=True
+            ))
+        clz = PipelineClassAnalyzer()
+        clz.visit(analyzer.class_define)
+        input, pipeline_parameters = clz.get_input_parameters()
+        # remove the first input parameter, the input is defined by task.
+        return input, pipeline_parameters
+
+
+meta_type_schema_map = {
+    # For parameters, current only support types.
+    'str': 'string',
+    'int': 'integer',
+    'float': 'number',
+    'bool': 'boolean',
+    'Dict': 'object',
+    'dict': 'object',
+    'list': 'array',
+    'List': 'array',
+    'Union': 'object',
+    'Input': 'object',
+    'object': 'object',
+}
+
+
+def generate_pipeline_parameters_schema(parameters):
+    parameters_schema = {'type': 'object', 'properties': {}}
+    if len(parameters) == 0:
+        return {}
+    for param in parameters:
+        name, param_type, has_default, default_value = param
+        # 'max_length': ('int', True, 1024)
+        prop = {'type': meta_type_schema_map[param_type]}
+        if has_default:
+            prop['default'] = default_value
+        parameters_schema['properties'][name] = prop
+    return parameters_schema
+
+
+def get_pipeline_information_by_pipeline(pipeline: Pipeline, ):
+    """Get pipeline input output schema.
+
+    Args:
+        pipeline (Pipeline): The pipeline object.
+    """
+    task_name = pipeline.group_key
+    pipeline_class = pipeline.__class__.__name__
+    spec = importlib.util.find_spec(pipeline.__module__)
+    pipeline_file_path = spec.origin
+    info = PipelineInfomation(task_name, pipeline_class, pipeline_file_path)
+    return info
+
+
+class PipelineInfomation():
+    """Analyze pipeline information, task_name, schema.
+    """
+
+    def __init__(self, task_name: str, class_name, source_path):
+        self._task_name = task_name
+        self._class_name = class_name
+        self._source_path = source_path
+        self._is_custom_call_method = False
+        self._analyze()
+
+    def _analyze(self):
+        input, parameters = get_pipeline_input_parameters(
+            self._source_path, self._class_name)
+        if input is not None:  # custom pipeline __call__ asr_inferrnce_pipeline
+            self._is_custom_call_method = True
+            self._input_schema = generate_pipeline_parameters_schema(input)
+            self._input_schema[
+                'description'] = 'For binary input such as image audio video, only url is supported.'
+            self._parameters_schema = {}
+            self._output_schema = {
+                'type': 'object',
+            }
+            if self._task_name in TASK_OUTPUTS:
+                self._output_schema = get_output_schema(self._task_name)
+        else:
+            # use base pipeline __call__
+            if self._task_name in TASK_INPUTS and self._task_name in TASK_OUTPUTS:
+                # delete the first default input which is defined by task.
+                self._parameters_schema = generate_pipeline_parameters_schema(
+                    parameters)
+                self._input_schema = get_input_schema(self._task_name, None)
+                self._output_schema = get_output_schema(self._task_name)
+            else:
+                logger.warning(
+                    'Task: %s input is defined: %s, output is defined: %s which is not completed'
+                    % (self._task_name, self._task_name
+                       in TASK_INPUTS, self._task_name in TASK_OUTPUTS))
+                self._input_schema = None
+                self._output_schema = None
+                if self._task_name in TASK_INPUTS:
+                    self._input_schema = get_input_schema(
+                        self._task_name, None)
+                if self._task_name in TASK_OUTPUTS:
+                    self._output_schema = get_output_schema(self._task_name)
+                self._parameters_schema = generate_pipeline_parameters_schema(
+                    parameters)
+
+    @property
+    def task_name(self):
+        return self._task_name
+
+    @property
+    def is_custom_call(self):
+        return self._is_custom_call_method
+
+    @property
+    def input_schema(self):
+        return self._input_schema
+
+    @property
+    def output_schema(self):
+        return self._output_schema
+
+    @property
+    def parameters_schema(self):
+        return self._parameters_schema
+
+    @property
+    def schema(self):
+        return {
+            'input': self._input_schema if self._input_schema else
+            self._parameters_schema,  # all parameter is input
+            'parameters':
+            self._parameters_schema if self._input_schema else {},
+            'output': self._output_schema if self._output_schema else {
+                'type': 'object',
+            },
+        }
+
+
+def is_url(url: str):
+    """Check the input url is valid url.
+
+    Args:
+        url (str): The url
+
+    Returns:
+        bool: If is url return True, otherwise False.
+    """
+    url_parsed = urlparse(url)
+    if url_parsed.scheme in ('http', 'https', 'oss'):
+        return True
+    else:
+        return False
+
+
+def decode_base64_to_image(content):
+    if content.startswith('http') or content.startswith('oss'):
+        return content
+
+    from PIL import Image
+    image_file_content = base64.b64decode(content)
+    return Image.open(BytesIO(image_file_content))
+
+
+def decode_base64_to_audio(content):
+    if content.startswith('http') or content.startswith('oss'):
+        return content
+
+    file_content = base64.b64decode(content)
+    return file_content
+
+
+def decode_base64_to_video(content):
+    if content.startswith('http') or content.startswith('oss'):
+        return content
+
+    file_content = base64.b64decode(content)
+    return file_content
+
+
+def return_origin(content):
+    return content
+
+
+def decode_box(content):
+    pass
+
+
+def service_multipart_input_to_pipeline_input(body):
+    """Convert multipart data to pipeline input.
+
+    Args:
+        body (dict): The multipart data body
+    """
+    pass
+
+
+def pipeline_output_to_service_multipart_output(output):
+    """Convert multipart data to service multipart output.
+
+    Args:
+        output (dict): Multipart body.
+    """
+    pass
+
+
+base64_decoder_map = {
+    InputType.IMAGE: decode_base64_to_image,
+    InputType.TEXT: return_origin,
+    InputType.AUDIO: decode_base64_to_audio,
+    InputType.VIDEO: decode_base64_to_video,
+    InputType.BOX: decode_box,
+    InputType.DICT: return_origin,
+    InputType.LIST: return_origin,
+    InputType.NUMBER: return_origin,
+}
+
+
+def call_pipeline_with_json(pipeline_info: PipelineInfomation,
+                            pipeline: Pipeline, body: str):
+    """Call pipeline with json input.
+
+    Args:
+        pipeline_info (PipelineInfomation): The pipeline information object.
+        pipeline (Pipeline): The pipeline object.
+        body (Dict): The input object, include input and parameters
+    """
+    if pipeline_info.is_custom_call:
+        pipeline_inputs = body['input']
+        result = pipeline(**pipeline_inputs)
+    else:
+        pipeline_inputs, parameters = service_base64_input_to_pipeline_input(
+            pipeline_info.task_name, body)
+        result = pipeline(pipeline_inputs, **parameters)
+
+    return result
+
+
+def service_base64_input_to_pipeline_input(task_name, body):
+    """Convert service base64 input to pipeline input and parameters
+
+    Args:
+        task_name (str): The task name.
+        body (Dict): The input object, include input and parameters
+    """
+    if 'input' not in body:
+        raise ValueError('No input data!')
+    service_input = body['input']
+    if 'parameters' in body:
+        parameters = body['parameters']
+    else:
+        parameters = {}
+    pipeline_input = {}
+
+    task_input_info = TASK_INPUTS[task_name]
+    if isinstance(task_input_info, str):  # no input key default
+        return base64_decoder_map[task_input_info](list(
+            service_input.values())[0]), parameters
+    elif isinstance(task_input_info, tuple):
+        pipeline_input = tuple(service_input)
+        return pipeline_input, parameters
+    elif isinstance(task_input_info, dict):
+        for key, value in service_input.items(
+        ):  # task input has no nesting field.
+            # get input filed type
+            input_type = task_input_info[key]
+            # TODO recursion for list, dict if need.
+            if not isinstance(input_type, str):
+                pipeline_input[key] = value
+                continue
+            if input_type not in INPUT_TYPE:
+                raise ValueError('Invalid input field: %s' % input_type)
+            pipeline_input[key] = base64_decoder_map[input_type](value)
+        return pipeline_input, parameters
+    elif isinstance(task_input_info,
+                    list):  # one of input format, we use dict.
+        for item in task_input_info:
+            if isinstance(item, dict):
+                for key, value in service_input.items(
+                ):  # task input has no nesting field.
+                    # get input filed type
+                    input_type = item[key]
+                    if input_type not in INPUT_TYPE:
+                        raise ValueError('Invalid input field: %s'
+                                         % input_type)
+                    pipeline_input[key] = base64_decoder_map[input_type](value)
+                return pipeline_input, parameters
+    else:
+        raise IndexError('Task %s input invalid: %s' %
+                         (task_name, task_input_info))
+
+
+def encode_numpy_image_to_base64(image):
+    from PIL import Image
+    with BytesIO() as output_bytes:
+        pil_image = Image.fromarray(image.astype(np.uint8))
+        pil_image.save(output_bytes, 'PNG')
+        bytes_data = output_bytes.getvalue()
+    base64_str = str(base64.b64encode(bytes_data), 'utf-8')
+    return base64_str
+
+
+def encode_video_to_base64(video):
+    return str(base64.b64encode(video), 'utf-8')
+
+
+def encode_pcm_to_base64(pcm):
+    return str(base64.b64encode(pcm), 'utf-8')
+
+
+def encode_wav_to_base64(wav):
+    return str(base64.b64encode(wav), 'utf-8')
+
+
+def encode_bytes_to_base64(bts):
+    return str(base64.b64encode(bts), 'utf-8')
+
+
+base64_encoder_map = {
+    'image': encode_numpy_image_to_base64,
+    'video': encode_video_to_base64,
+    'pcm': encode_pcm_to_base64,
+    'wav': encode_wav_to_base64,
+    'bytes': encode_bytes_to_base64,
+}
+
+# convert numpy etc type to python type.
+type_to_python_type = {
+    np.int64: int,
+}
+
+
+def _convert_to_python_type(inputs):
+    if isinstance(inputs, (list, tuple)):
+        res = []
+        for item in inputs:
+            res.append(_convert_to_python_type(item))
+        return res
+    elif isinstance(inputs, dict):
+        res = {}
+        for k, v in inputs.items():
+            if type(v) in type_to_python_type:
+                res[k] = type_to_python_type[type(v)](v)
+            else:
+                res[k] = _convert_to_python_type(v)
+        return res
+    else:
+        return inputs
+
+
+def pipeline_output_to_service_base64_output(task_name, pipeline_output):
+    """Convert pipeline output to service output,
+    convert binary fields to base64 encoding。
+
+    Args:
+        task_name (str): The output task name.
+        pipeline_output (object): The pipeline output.
+    """
+    json_serializable_output = {}
+    task_outputs = []
+    if task_name in TASK_OUTPUTS:
+        task_outputs = TASK_OUTPUTS[task_name]
+    for key, value in pipeline_output.items():
+        if key not in task_outputs:
+            continue  # skip the output not defined.
+        if key in [
+                OutputKeys.OUTPUT_IMG, OutputKeys.OUTPUT_IMGS,
+                OutputKeys.OUTPUT_VIDEO, OutputKeys.OUTPUT_PCM,
+                OutputKeys.OUTPUT_PCM_LIST, OutputKeys.OUTPUT_WAV
+        ]:
+            if isinstance(value, list):
+                items = []
+                if key == OutputKeys.OUTPUT_IMGS:
+                    output_item_type = OutputKeys.OUTPUT_IMG
+                else:
+                    output_item_type = OutputKeys.OUTPUT_PCM
+                for item in value:
+                    items.append(base64_encoder_map[output_item_type](item))
+                json_serializable_output[key] = items
+            else:
+                json_serializable_output[key] = base64_encoder_map[
+                    OutputTypes[key]](
+                        value)
+        elif OutputTypes[key] in [np.ndarray]:
+            json_serializable_output[key] = value.tolist()
+        else:
+            json_serializable_output[key] = value
+
+    return _convert_to_python_type(json_serializable_output)
diff --git a/modelscope/utils/megatron_utils.py b/modelscope/utils/megatron_utils.py
index 922cb53d..53b5aacb 100644
--- a/modelscope/utils/megatron_utils.py
+++ b/modelscope/utils/megatron_utils.py
@@ -96,15 +96,16 @@ def convert_megatron_checkpoint(
     log_master(
         f'origin_num_partitions: {origin_num_partitions}, target_num_partitions: {target_num_partitions}'
     )
-    os.makedirs(target_dir, exist_ok=True)
 
     if origin_num_partitions < target_num_partitions:
+        os.makedirs(target_dir, exist_ok=True)
         state_dict = _split_checkpoint(
             model, checkpoint_dir,
             target_num_partitions // origin_num_partitions)
         _save_converted_checkpoint(state_dict, target_dir)
         log_master('Split checkpoints succeeded.')
     elif origin_num_partitions > target_num_partitions:
+        os.makedirs(target_dir, exist_ok=True)
         state_dict = _merge_checkpoint(
             model, checkpoint_dir,
             origin_num_partitions // target_num_partitions)
diff --git a/modelscope/utils/plugins.py b/modelscope/utils/plugins.py
index a83ca03c..9d238e7d 100644
--- a/modelscope/utils/plugins.py
+++ b/modelscope/utils/plugins.py
@@ -263,12 +263,11 @@ def import_module_and_submodules(package_name: str,
 
 
 def install_module_from_requirements(requirement_path, ):
-    """
+    """ install module from requirements
     Args:
         requirement_path: The path of requirement file
 
-    Returns:
-
+    No returns, raise error if failed
     """
 
     install_list = []
@@ -292,6 +291,15 @@ def install_module_from_requirements(requirement_path, ):
 
 
 def import_module_from_file(module_name, file_path):
+    """ install module by name with file path
+
+    Args:
+        module_name: the module name need to be import
+        file_path: the related file path that matched with the module name
+
+    Returns: return the module class
+
+    """
     spec = importlib.util.spec_from_file_location(module_name, file_path)
     module = importlib.util.module_from_spec(spec)
     spec.loader.exec_module(module)
@@ -299,6 +307,14 @@ def import_module_from_file(module_name, file_path):
 
 
 def import_module_from_model_dir(model_dir):
+    """ import all the necessary module from a model dir
+
+    Args:
+        model_dir: model file location
+
+     No returns, raise error if failed
+
+    """
     from pathlib import Path
     file_scanner = FilesAstScanning()
     file_scanner.traversal_files(model_dir)
@@ -317,6 +333,14 @@ def import_module_from_model_dir(model_dir):
 
 
 def install_requirements_by_names(plugins: List[str]):
+    """ install the requirements by names
+
+    Args:
+        plugins: name of plugins (pai-easyscv, transformers)
+
+     No returns, raise error if failed
+
+    """
     plugins_manager = PluginsManager()
     uninstalled_plugins = []
     for plugin in plugins:
@@ -333,6 +357,14 @@ def install_requirements_by_names(plugins: List[str]):
 
 
 def install_requirements_by_files(requirements: List[str]):
+    """ install the requriements by files
+
+    Args:
+        requirements: a list of files including requirements info (requirements.txt)
+
+     No returns, raise error if failed
+
+    """
     for requirement in requirements:
         install_module_from_requirements(requirement)
 
@@ -343,7 +375,8 @@ def register_plugins_repo(plugins: List[str]) -> None:
         install_requirements_by_names(plugins)
         modules = []
         for plugin in plugins:
-            modules.extend(get_modules_from_package(plugin))
+            module_name, module_version, _ = get_modules_from_package(plugin)
+            modules.extend(module_name)
         import_plugins(modules)
 
 
@@ -362,12 +395,15 @@ DEFAULT_INDEX = 'https://pypi.org/simple/'
 
 
 def get_modules_from_package(package):
-    """ to get the modules from a installed package
+    """ to get the modules from an installed package
 
     Args:
         package: The distribution name or package name
 
     Returns:
+        import_names: The modules that in the package distribution
+        import_version: The version of those modules, should be same and identical
+        package_name: The package name, if installed by whl file, the package is unknown, should be passed
 
     """
     from zipfile import ZipFile
@@ -378,8 +414,6 @@ def get_modules_from_package(package):
     from urllib.parse import urlparse
     from urllib import request as urllib2
     from pip._internal.utils.packaging import get_requirement
-    req = get_requirement(package)
-    package = req.name
 
     def urlretrieve(url, filename, data=None, auth=None):
         if auth is not None:
@@ -591,24 +625,58 @@ def get_modules_from_package(package):
         return result
 
     def discover_import_names(whl_file):
+        import re
         logger.debug('finding import names')
         zipfile = ZipFile(file=whl_file)
         namelist = zipfile.namelist()
         [top_level_fname
          ] = [x for x in namelist if x.endswith('top_level.txt')]
+        [metadata_fname
+         ] = [x for x in namelist if x.endswith('.dist-info/METADATA')]
         all_names = zipfile.read(top_level_fname).decode(
             'utf-8').strip().splitlines()
+        metadata = zipfile.read(metadata_fname).decode('utf-8')
         public_names = [n for n in all_names if not n.startswith('_')]
-        return public_names
+
+        version_pattern = re.compile(r'^Version: (?P<version>.+)$',
+                                     re.MULTILINE)
+        name_pattern = re.compile(r'^Name: (?P<name>.+)$', re.MULTILINE)
+
+        version_match = version_pattern.search(metadata)
+        name_match = name_pattern.search(metadata)
+
+        module_version = version_match.group('version')
+        module_name = name_match.group('name')
+
+        return public_names, module_version, module_name
 
     tmpdir = mkdtemp()
-    data = get(package, tmpdir=tmpdir)
-    import_names = discover_import_names(data['path'])
+    if package.endswith('.whl'):
+        """if user using .whl file then parse the whl to get the module name"""
+        if not os.path.isfile(package):
+            file_name = os.path.basename(package)
+            file_path = os.path.join(tmpdir, file_name)
+            whl_file, _ = _download_dist(package, file_path, None, None)
+        else:
+            whl_file = package
+    else:
+        """if user using package name then generate whl file and parse the file to get the module name by
+        the discover_import_names method
+        """
+        req = get_requirement(package)
+        package = req.name
+        data = get(package, tmpdir=tmpdir)
+        whl_file = data['path']
+    import_names, import_version, package_name = discover_import_names(
+        whl_file)
     shutil.rmtree(tmpdir)
-    return import_names
+    return import_names, import_version, package_name
 
 
 class PluginsManager(object):
+    """
+    plugins manager class
+    """
 
     def __init__(self,
                  cache_dir=MODELSCOPE_FILE_DIR,
@@ -633,12 +701,26 @@ class PluginsManager(object):
             package: the package name need to be installed
 
         Returns:
+            if_installed: True if installed
+            version: the version of installed or None if not installed
 
         """
 
         if package.split('.')[-1] == 'whl':
-            return False, ''
+            # install from whl should test package name instead of module name
+            _, module_version, package_name = get_modules_from_package(package)
+            local_installed, version = PluginsManager._check_plugin_installed(
+                package_name)
+            if local_installed and module_version != version:
+                return False, version
+            elif not local_installed:
+                return False, version
+            return True, module_version
+        else:
+            return PluginsManager._check_plugin_installed(package)
 
+    @staticmethod
+    def _check_plugin_installed(package, verified_version=None):
         from pip._internal.utils.packaging import get_requirement, specifiers
         req = get_requirement(package)
 
@@ -656,11 +738,15 @@ class PluginsManager(object):
                 if not installed_valid_version:
                     installed = False
                     break
+
         except KeyError:
             version = ''
             installed = False
 
-        return installed, version
+        if installed and verified_version is not None and verified_version != version:
+            return False, verified_version
+        else:
+            return installed, version
 
     @staticmethod
     def pip_command(
@@ -675,6 +761,9 @@ class PluginsManager(object):
               such as ['-r', 'requirements']
 
         Returns:
+            status_code: The pip command status code, 0 if success, else is failed
+            options: parsed option from system args by pip command
+            args: the unknown args that could be parsed by pip command
 
         """
         from pip._internal.commands import create_command
@@ -702,6 +791,7 @@ class PluginsManager(object):
             Args:
             install_args (list): List of arguments passed to `pip install`.
             index_url (str, optional): The pypi index url.
+            force_update: If force update on or off
         """
 
         if len(install_args) == 0:
@@ -730,6 +820,16 @@ class PluginsManager(object):
         return status_code, install_args
 
     def parse_args_info(self, args: List[str], options):
+        """
+        parse arguments input info
+        Args:
+            args: the list of args from pip command output
+            options: the options that parsed from system args by pip command method
+
+        Returns:
+            installed_package: generate installed package info in order to store in the file
+                                the info includes: name, url and desc of the package
+        """
         installed_package = []
 
         # the case of install with requirements
@@ -781,6 +881,15 @@ class PluginsManager(object):
     def uninstall_plugins(self,
                           uninstall_args: Union[str, List],
                           is_yes=False):
+        """
+        uninstall plugins
+        Args:
+            uninstall_args: args used to uninstall by pip command
+            is_yes: force yes without verified
+
+        Returns: status code, and uninstall args
+
+        """
         if is_yes is not None:
             uninstall_args += ['-y']
 
@@ -862,6 +971,7 @@ class PluginsManager(object):
             show_all: show installed and official supported if True, else only those installed
 
         Returns:
+            local_plugins_info: show the list of plugins info
 
         """
         local_plugins_info = self._get_plugins_from_file()
@@ -901,6 +1011,7 @@ class PluginsManager(object):
             override: Override the file by the list if True, else only update.
 
         Returns:
+            local_plugins_info_json: the json version of updated plugins info
 
         """
         local_plugins_info = self._get_plugins_from_file()
@@ -921,12 +1032,12 @@ class PluginsManager(object):
         self,
         package_names: Union[str, list],
     ):
-        """
-
+        """remove the plugins from file
         Args:
             package_names:  package name
 
         Returns:
+            local_plugins_info_json: the json version of updated plugins info
 
         """
         local_plugins_info = self._get_plugins_from_file()
@@ -1012,4 +1123,5 @@ class EnvsManager(object):
 
 if __name__ == '__main__':
     install_requirements_by_files(['adaseq'])
-    import_name = get_modules_from_package('pai-easycv')
+    import_name, import_version, package_name = get_modules_from_package(
+        'pai-easycv')
diff --git a/modelscope/utils/regress_test_utils.py b/modelscope/utils/regress_test_utils.py
index 0f10c1ce..e03b3a7c 100644
--- a/modelscope/utils/regress_test_utils.py
+++ b/modelscope/utils/regress_test_utils.py
@@ -483,9 +483,9 @@ def numpify_tensor_nested(tensors, reduction=None, clip_value=10000):
             t = np.where(t > clip_value, clip_value, t)
             t = np.where(t < -clip_value, -clip_value, t)
         if reduction == 'sum':
-            return t.sum(dtype=np.float)
+            return t.sum(dtype=float)
         elif reduction == 'mean':
-            return t.mean(dtype=np.float)
+            return t.mean(dtype=float)
         return t
     return tensors
 
diff --git a/modelscope/utils/service_utils.py b/modelscope/utils/service_utils.py
index 6e7c0fc1..8f7ca42d 100644
--- a/modelscope/utils/service_utils.py
+++ b/modelscope/utils/service_utils.py
@@ -8,6 +8,7 @@ import requests
 
 from modelscope.outputs import TASK_OUTPUTS, OutputKeys
 from modelscope.pipeline_inputs import TASK_INPUTS, InputType
+from modelscope.utils.url_utils import valid_url
 
 
 # service data decoder func decodes data from network and convert it to pipeline's input
@@ -82,12 +83,16 @@ def get_mimetype(filename):
 
 
 def decode_base64_to_binary(encoding):
+    if valid_url(encoding):
+        return encoding, ''
     extension = get_extension(encoding)
     data = encoding.split(',')[1]
     return base64.b64decode(data), extension
 
 
 def decode_base64_to_image(encoding):
+    if valid_url(encoding):
+        return encoding
     from PIL import Image
     content = encoding.split(';')[1]
     image_encoded = content.split(',')[1]
@@ -151,6 +156,7 @@ def service_data_decoder(task, data):
         return input_data
     elif isinstance(input_type, dict):
         input_data = {}
+        data = json.loads(data)
         for key, val in input_type.items():
             if val == InputType.IMAGE:
                 input_data[key] = decode_base64_to_image(data[key])
@@ -158,6 +164,8 @@ def service_data_decoder(task, data):
                 input_data[key] = decode_base64_to_binary(data[key])[0]
             elif val == InputType.TEXT:
                 input_data[key] = data[key]
+            else:
+                return data
 
     return input_data
 
diff --git a/modelscope/utils/test_utils.py b/modelscope/utils/test_utils.py
index b4ce7299..03d293ec 100644
--- a/modelscope/utils/test_utils.py
+++ b/modelscope/utils/test_utils.py
@@ -150,7 +150,7 @@ def compare_arguments_nested(print_content,
 
     if arg1 is None:
         return True
-    elif isinstance(arg1, (int, str, bool, np.bool, np.integer, np.str)):
+    elif isinstance(arg1, (int, str, bool, np.bool_, np.integer, np.str_)):
         if arg1 != arg2:
             if print_content is not None:
                 print(f'{print_content}, arg1:{arg1}, arg2:{arg2}')
@@ -201,10 +201,8 @@ def compare_arguments_nested(print_content,
             return False
         return True
     elif isinstance(arg1, np.ndarray):
-        arg1 = np.where(np.equal(arg1, None), np.NaN,
-                        arg1).astype(dtype=np.float)
-        arg2 = np.where(np.equal(arg2, None), np.NaN,
-                        arg2).astype(dtype=np.float)
+        arg1 = np.where(np.equal(arg1, None), np.NaN, arg1).astype(dtype=float)
+        arg2 = np.where(np.equal(arg2, None), np.NaN, arg2).astype(dtype=float)
         if not all(
                 np.isclose(arg1, arg2, rtol=rtol, atol=atol,
                            equal_nan=True).flatten()):
diff --git a/modelscope/utils/url_utils.py b/modelscope/utils/url_utils.py
new file mode 100644
index 00000000..59cc2efd
--- /dev/null
+++ b/modelscope/utils/url_utils.py
@@ -0,0 +1,36 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from urllib.parse import urlparse
+
+import pandas as pd
+
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+def valid_url(url) -> bool:
+    try:
+        result = urlparse(url)
+        return all([result.scheme, result.netloc])
+    except ValueError as e:
+        logger.warning(e)
+        return False
+
+
+def fetch_csv_with_url(csv_url: str) -> pd.DataFrame:
+    """Fetch the csv content from url.
+
+    Args:
+        csv_url (str): The input url of csv data.
+
+    Returns:
+        A pandas DataFrame object which contains the csv content.
+    """
+    try:
+        df = pd.read_csv(csv_url)
+    except Exception as e:
+        logger.error(f'Failed to fetch csv from url: {csv_url}')
+        raise e
+
+    return df
diff --git a/modelscope/version.py b/modelscope/version.py
index 81c35379..cf9bbe98 100644
--- a/modelscope/version.py
+++ b/modelscope/version.py
@@ -1,5 +1,5 @@
 # Make sure to modify __release_datetime__ to release time when making official release.
-__version__ = '1.5.0'
+__version__ = '1.6.0'
 # default release datetime for branches under active development is set
 # to be a time far-far-away-into-the-future
-__release_datetime__ = '2099-10-13 08:56:12'
+__release_datetime__ = '2023-05-18 23:59:00'
diff --git a/requirements/audio/audio_asr.txt b/requirements/audio/audio_asr.txt
index 1ecbe421..7725a0dd 100644
--- a/requirements/audio/audio_asr.txt
+++ b/requirements/audio/audio_asr.txt
@@ -1,2 +1,2 @@
 easyasr>=0.0.2
-funasr>=0.4.0
+funasr>=0.5.0
diff --git a/requirements/audio/audio_signal.txt b/requirements/audio/audio_signal.txt
index 61e688f3..16a18e67 100644
--- a/requirements/audio/audio_signal.txt
+++ b/requirements/audio/audio_signal.txt
@@ -1,11 +1,11 @@
 hyperpyyaml
-librosa<=0.9.2
+librosa==0.9.2
 MinDAEC
 mir_eval>=0.7
 numpy
 rotary_embedding_torch>=0.1.5
 scipy
 SoundFile>0.10
-speechbrain>=0.5.7
+speechbrain>=0.5.12
 torchaudio
 tqdm
diff --git a/requirements/audio/audio_tts.txt b/requirements/audio/audio_tts.txt
index b1a85faf..81a5c6f4 100644
--- a/requirements/audio/audio_tts.txt
+++ b/requirements/audio/audio_tts.txt
@@ -3,7 +3,7 @@ greenlet>=1.1.2
 inflect
 jedi>=0.18.1
 kantts
-librosa<=0.9.2
+librosa==0.9.2
 lxml
 matplotlib
 msgpack>=1.0.4
diff --git a/requirements/cv.txt b/requirements/cv.txt
index 7d09a60b..0cec3659 100644
--- a/requirements/cv.txt
+++ b/requirements/cv.txt
@@ -6,7 +6,7 @@ chumpy
 clip>=1.0
 control_ldm
 ddpm_guided_diffusion
-diffusers
+diffusers>=0.13.1,<0.15.0
 easydict
 easyrobust
 edit_distance
@@ -25,7 +25,7 @@ lmdb
 lpips
 ml_collections
 mmcls>=0.21.0
-mmdet>=2.25.0
+mmdet>=2.25.0,<=2.28.2
 # mmdet3d-1.0.0rc6 remove networkx and numba version restriction
 mmdet3d==1.0.0a1
 mmsegmentation<=0.30.0
@@ -39,7 +39,6 @@ onnxruntime>=1.10
 onnxsim
 open-clip-torch>=2.7.0
 opencv-python
-pai-easycv>=0.8,<0.10.0
 paint_ldm
 pandas
 panopticapi
@@ -51,7 +50,7 @@ regex
 scikit-image>=0.19.3
 scikit-learn>=0.20.1
 shapely
-shotdetect_scenedetect_lgss
+shotdetect_scenedetect_lgss>=0.0.4
 smplx
 tensorflow-estimator>=1.15.1
 tf_slim
diff --git a/requirements/framework.txt b/requirements/framework.txt
index e15e95eb..e763ae63 100644
--- a/requirements/framework.txt
+++ b/requirements/framework.txt
@@ -4,9 +4,11 @@ datasets>=2.7.0,<=2.8.0
 einops
 filelock>=3.3.0
 gast>=0.2.2
-mmdet<=2.28.2
-numpy<1.24.0
+# for python3.7 python3.8 compatible
+numpy<=1.22.0
 oss2
+# for datasets compatible
+pandas<=1.5.3
 Pillow>=6.2.0
 # pyarrow 9.0.0 introduced event_loop core dump
 pyarrow>=6.0.0,!=9.0.0
@@ -14,7 +16,7 @@ python-dateutil>=2.1
 pyyaml
 requests
 scipy
-setuptools==59.8.0
+setuptools
 simplejson>=3.3.0
 sortedcontainers>=1.5.9
 tqdm>=4.64.0
diff --git a/requirements/multi-modal.txt b/requirements/multi-modal.txt
index 3d75f9b6..9d2c3448 100644
--- a/requirements/multi-modal.txt
+++ b/requirements/multi-modal.txt
@@ -1,7 +1,7 @@
 accelerate
 diffusers>=0.13.1,<0.15.0
 ftfy>=6.0.3
-librosa<=0.9.2
+librosa==0.9.2
 opencv-python
 pycocoevalcap>=1.2
 pycocotools>=2.0.4
@@ -12,13 +12,14 @@ rapidfuzz
 # which introduced compatability issues that are being investigated
 rouge_score<=0.0.4
 sacrebleu
+safetensors
 # scikit-video
 soundfile
 taming-transformers-rom1504
 timm
 tokenizers
 torchvision
-transformers>=4.12.0
+transformers>=4.27.1
 # triton==2.0.0.dev20221120
 unicodedata2
 zhconv
diff --git a/setup.py b/setup.py
index 9affe028..98b12888 100644
--- a/setup.py
+++ b/setup.py
@@ -197,11 +197,12 @@ if __name__ == '__main__':
     setup(
         name='modelscope',
         version=get_version(),
-        description='',
+        description=
+        'ModelScope: bring the notion of Model-as-a-Service to life.',
         long_description=readme(),
         long_description_content_type='text/markdown',
-        author='Alibaba ModelScope team',
-        author_email='modelscope@list.alibaba-inc.com',
+        author='ModelScope team',
+        author_email='contact@modelscope.cn',
         keywords='python,nlp,science,cv,speech,multi-modal',
         url='https://github.com/modelscope/modelscope',
         packages=find_packages(exclude=('configs', 'demo')),
diff --git a/tests/cli/test_download_cmd.py b/tests/cli/test_download_cmd.py
index 53cfdadd..6059fa12 100644
--- a/tests/cli/test_download_cmd.py
+++ b/tests/cli/test_download_cmd.py
@@ -17,7 +17,6 @@ DEFAULT_GIT_PATH = 'git'
 download_model_file_name = 'test.bin'
 
 
-@unittest.skip('temporarily skip')
 class DownloadCMDTest(unittest.TestCase):
 
     def setUp(self):
diff --git a/tests/export/test_export_speech_signal_process.py b/tests/export/test_export_speech_signal_process.py
new file mode 100644
index 00000000..d3f6fe14
--- /dev/null
+++ b/tests/export/test_export_speech_signal_process.py
@@ -0,0 +1,83 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+import pickle
+import shutil
+import tempfile
+import unittest
+
+import torch
+
+from modelscope.exporters import Exporter
+from modelscope.models import Model
+from modelscope.utils.logger import get_logger
+from modelscope.utils.regress_test_utils import (compare_arguments_nested,
+                                                 numpify_tensor_nested)
+from modelscope.utils.test_utils import test_level
+
+INPUT_PKL = 'data/test/audios/input.pkl'
+
+INPUT_NAME = 'input'
+OUTPUT_NAME = 'output'
+
+logger = get_logger()
+
+
+class ExportSpeechSignalProcessTest(unittest.TestCase):
+
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(self.tmp_dir):
+            os.makedirs(self.tmp_dir)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_dir)
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_export_ans_dfsmn(self):
+        model_id = 'damo/speech_dfsmn_ans_psm_48k_causal'
+        model = Model.from_pretrained(model_id)
+        onnx_info = Exporter.from_model(model).export_onnx(
+            output_dir=self.tmp_dir)
+
+        with open(os.path.join(os.getcwd(), INPUT_PKL), 'rb') as f:
+            fbank_input = pickle.load(f).cpu()
+        self.assertTrue(
+            self._validate_onnx_model(fbank_input, model, onnx_info['model']),
+            'export onnx failed because of validation error.')
+
+    @staticmethod
+    def _validate_onnx_model(dummy_inputs, model, output):
+        try:
+            import onnx
+            import onnxruntime as ort
+        except ImportError:
+            logger.warning(
+                'Cannot validate the exported onnx file, because '
+                'the installation of onnx or onnxruntime cannot be found')
+            return
+        onnx_model = onnx.load(output)
+        onnx.checker.check_model(onnx_model)
+        ort_session = ort.InferenceSession(output)
+        with torch.no_grad():
+            model.eval()
+            outputs_origin = model.forward(dummy_inputs)
+        outputs_origin = numpify_tensor_nested(outputs_origin)
+
+        input_feed = {INPUT_NAME: dummy_inputs.numpy()}
+        outputs = ort_session.run(
+            None,
+            input_feed,
+        )
+        outputs = numpify_tensor_nested(outputs[0])
+
+        print(outputs)
+        print(outputs_origin)
+        return compare_arguments_nested('Onnx model output match failed',
+                                        outputs, outputs_origin)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/hub/test_hub_operation.py b/tests/hub/test_hub_operation.py
index 82752869..a22aaa64 100644
--- a/tests/hub/test_hub_operation.py
+++ b/tests/hub/test_hub_operation.py
@@ -22,7 +22,6 @@ DEFAULT_GIT_PATH = 'git'
 download_model_file_name = 'test.bin'
 
 
-@unittest.skip('temporarily skip')
 class HubOperationTest(unittest.TestCase):
 
     def setUp(self):
diff --git a/tests/hub/test_hub_private_files.py b/tests/hub/test_hub_private_files.py
index b79c11cd..a343808f 100644
--- a/tests/hub/test_hub_private_files.py
+++ b/tests/hub/test_hub_private_files.py
@@ -21,7 +21,6 @@ from modelscope.utils.test_utils import (TEST_ACCESS_TOKEN1,
 download_model_file_name = 'test.bin'
 
 
-@unittest.skip('temporarily skip')
 class HubPrivateFileDownloadTest(unittest.TestCase):
 
     def setUp(self):
diff --git a/tests/hub/test_hub_private_repository.py b/tests/hub/test_hub_private_repository.py
index bd2984cf..1b7c41cd 100644
--- a/tests/hub/test_hub_private_repository.py
+++ b/tests/hub/test_hub_private_repository.py
@@ -17,7 +17,6 @@ from modelscope.utils.test_utils import (TEST_ACCESS_TOKEN1,
 DEFAULT_GIT_PATH = 'git'
 
 
-@unittest.skip('temporarily skip')
 class HubPrivateRepositoryTest(unittest.TestCase):
 
     def setUp(self):
diff --git a/tests/hub/test_hub_repository.py b/tests/hub/test_hub_repository.py
index a006d7c2..7631f5db 100644
--- a/tests/hub/test_hub_repository.py
+++ b/tests/hub/test_hub_repository.py
@@ -26,7 +26,6 @@ DEFAULT_GIT_PATH = 'git'
 download_model_file_name = 'test.bin'
 
 
-@unittest.skip('temporarily skip')
 class HubRepositoryTest(unittest.TestCase):
 
     def setUp(self):
@@ -81,6 +80,20 @@ class HubRepositoryTest(unittest.TestCase):
         assert lfs_file1 in lfs_files
         assert lfs_file2 in lfs_files
 
+    def test_add_lfs_file_type(self):
+        repo = Repository(self.model_dir, clone_from=self.model_id)
+        assert os.path.exists(os.path.join(self.model_dir, ModelFile.README))
+        os.chdir(self.model_dir)
+        lfs_file = 'test.safetensors'
+        os.system("echo 'safttensor'>%s"
+                  % os.path.join(self.model_dir, lfs_file))
+        repo.add_lfs_type('*.safetensors')
+        repo.push('test')
+        # check lfs files.
+        git_wrapper = GitCommandWrapper()
+        lfs_files = git_wrapper.list_lfs_files(self.model_dir)
+        assert lfs_file in lfs_files
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/hub/test_hub_retry.py b/tests/hub/test_hub_retry.py
index 7f47f119..e294cb68 100644
--- a/tests/hub/test_hub_retry.py
+++ b/tests/hub/test_hub_retry.py
@@ -12,7 +12,6 @@ from modelscope.hub.api import HubApi
 from modelscope.hub.file_download import http_get_file
 
 
-@unittest.skip('temporarily skip')
 class HubOperationTest(unittest.TestCase):
 
     def setUp(self):
diff --git a/tests/hub/test_hub_revision.py b/tests/hub/test_hub_revision.py
index e97422ad..00d5d53d 100644
--- a/tests/hub/test_hub_revision.py
+++ b/tests/hub/test_hub_revision.py
@@ -23,7 +23,6 @@ download_model_file_name = 'test.bin'
 download_model_file_name2 = 'test2.bin'
 
 
-@unittest.skip('temporarily skip')
 class HubRevisionTest(unittest.TestCase):
 
     def setUp(self):
diff --git a/tests/hub/test_hub_revision_release_mode.py b/tests/hub/test_hub_revision_release_mode.py
index 49a83371..3b8416db 100644
--- a/tests/hub/test_hub_revision_release_mode.py
+++ b/tests/hub/test_hub_revision_release_mode.py
@@ -26,7 +26,6 @@ download_model_file_name = 'test.bin'
 download_model_file_name2 = 'test2.bin'
 
 
-@unittest.skip('temporarily skip')
 class HubRevisionTest(unittest.TestCase):
 
     def setUp(self):
diff --git a/tests/hub/test_hub_upload.py b/tests/hub/test_hub_upload.py
index 8e439aeb..2a66cb8b 100644
--- a/tests/hub/test_hub_upload.py
+++ b/tests/hub/test_hub_upload.py
@@ -19,7 +19,6 @@ from modelscope.utils.test_utils import (TEST_ACCESS_TOKEN1, TEST_MODEL_ORG,
 logger = get_logger()
 
 
-@unittest.skip('temporarily skip')
 class HubUploadTest(unittest.TestCase):
 
     def setUp(self):
@@ -38,6 +37,7 @@ class HubUploadTest(unittest.TestCase):
         os.mkdir(self.finetune_path)
         os.system("echo '{}'>%s"
                   % os.path.join(self.finetune_path, ModelFile.CONFIGURATION))
+        os.environ['MODELSCOPE_TRAIN_ID'] = 'test-id'
 
     def tearDown(self):
         logger.info('TearDown')
diff --git a/tests/metrics/test_translation_evaluation_metrics.py b/tests/metrics/test_translation_evaluation_metrics.py
new file mode 100644
index 00000000..801f742b
--- /dev/null
+++ b/tests/metrics/test_translation_evaluation_metrics.py
@@ -0,0 +1,30 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import unittest
+
+from modelscope.metrics.translation_evaluation_metric import \
+    TranslationEvaluationMetric
+from modelscope.models.nlp.unite.configuration import InputFormat
+from modelscope.utils.test_utils import test_level
+
+
+class TestTranslationEvaluationMetrics(unittest.TestCase):
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_value(self):
+        metric = TranslationEvaluationMetric(gap_threshold=25.0)
+
+        outputs = {'score': [0.25, 0.22, 0.30, 0.78, 1.11, 0.95, 1.00, 0.86]}
+        inputs = {
+            'lp': ['zh-en'] * 8,
+            'segment_id': [0, 0, 0, 1, 1, 2, 2, 2],
+            'raw_score': [94.0, 60.0, 25.0, 59.5, 90.0, 100.0, 80.0, 60.0],
+            'input_format': [InputFormat.SRC_REF] * 8,
+        }
+        metric.add(outputs, inputs)
+        result = metric.evaluate()
+        print(result)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/msdatasets/test_ms_dataset.py b/tests/msdatasets/test_ms_dataset.py
index 8ded9a46..ddb84b45 100644
--- a/tests/msdatasets/test_ms_dataset.py
+++ b/tests/msdatasets/test_ms_dataset.py
@@ -195,18 +195,7 @@ class MsDatasetTest(unittest.TestCase):
         )
         print(next(iter(tf_dataset)))
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_streaming_load_coco(self):
-        small_coco_for_test = MsDataset.load(
-            dataset_name='EasyCV/small_coco_for_test',
-            split='train',
-            use_streaming=True,
-            download_mode=DownloadMode.FORCE_REDOWNLOAD)
-        dataset_sample_dict = next(iter(small_coco_for_test))
-        print(dataset_sample_dict)
-        assert dataset_sample_dict.values()
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
     def test_streaming_load_uni_fold(self):
         """Test case for loading large scale datasets."""
         dataset = MsDataset.load(
@@ -269,7 +258,7 @@ class MsDatasetTest(unittest.TestCase):
     def test_to_custom_dataset_movie_scene_toydata(self):
         from modelscope.msdatasets.dataset_cls.custom_datasets.movie_scene_segmentation import \
             MovieSceneSegmentationDataset
-        from modelscope.msdatasets.dataset_cls.dataset import ExternalDataset
+        from modelscope.msdatasets.dataset_cls import ExternalDataset
 
         model_id = 'damo/cv_resnet50-bert_video-scene-segmentation_movienet'
         cache_path = snapshot_download(model_id)
diff --git a/tests/msdatasets/test_virgo_dataset.py b/tests/msdatasets/test_virgo_dataset.py
new file mode 100644
index 00000000..96f7f25b
--- /dev/null
+++ b/tests/msdatasets/test_virgo_dataset.py
@@ -0,0 +1,96 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+import unittest
+
+from modelscope.hub.api import HubApi
+from modelscope.msdatasets import MsDataset
+from modelscope.msdatasets.dataset_cls.dataset import VirgoDataset
+from modelscope.utils.constant import DownloadMode, Hubs, VirgoDatasetConfig
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+# Please use your own access token for buc account.
+YOUR_ACCESS_TOKEN = 'your_access_token'
+# Please use your own virgo dataset id and ensure you have access to it.
+VIRGO_DATASET_ID = 'your_virgo_dataset_id'
+
+
+class TestVirgoDataset(unittest.TestCase):
+
+    def setUp(self):
+        self.api = HubApi()
+        self.api.login(YOUR_ACCESS_TOKEN)
+
+    @unittest.skip('to be used for local test only')
+    def test_download_virgo_dataset_meta(self):
+        ds = MsDataset.load(dataset_name=VIRGO_DATASET_ID, hub=Hubs.virgo)
+        ds_one = next(iter(ds))
+        logger.info(ds_one)
+
+        self.assertTrue(ds_one)
+        self.assertIsInstance(ds, VirgoDataset)
+        self.assertIn(VirgoDatasetConfig.col_id, ds_one)
+        self.assertIn(VirgoDatasetConfig.col_meta_info, ds_one)
+        self.assertIn(VirgoDatasetConfig.col_analysis_result, ds_one)
+        self.assertIn(VirgoDatasetConfig.col_external_info, ds_one)
+
+    @unittest.skip('to be used for local test only')
+    def test_download_virgo_dataset_files(self):
+        ds = MsDataset.load(
+            dataset_name=VIRGO_DATASET_ID,
+            hub=Hubs.virgo,
+            download_virgo_files=True)
+
+        ds_one = next(iter(ds))
+        logger.info(ds_one)
+
+        self.assertTrue(ds_one)
+        self.assertIsInstance(ds, VirgoDataset)
+        self.assertTrue(ds.download_virgo_files)
+        self.assertIn(VirgoDatasetConfig.col_cache_file, ds_one)
+        cache_file_path = ds_one[VirgoDatasetConfig.col_cache_file]
+        self.assertTrue(os.path.exists(cache_file_path))
+
+    @unittest.skip('to be used for local test only')
+    def test_force_download_virgo_dataset_files(self):
+        ds = MsDataset.load(
+            dataset_name=VIRGO_DATASET_ID,
+            hub=Hubs.virgo,
+            download_mode=DownloadMode.FORCE_REDOWNLOAD,
+            download_virgo_files=True)
+
+        ds_one = next(iter(ds))
+        logger.info(ds_one)
+
+        self.assertTrue(ds_one)
+        self.assertIsInstance(ds, VirgoDataset)
+        self.assertTrue(ds.download_virgo_files)
+        self.assertIn(VirgoDatasetConfig.col_cache_file, ds_one)
+        cache_file_path = ds_one[VirgoDatasetConfig.col_cache_file]
+        self.assertTrue(os.path.exists(cache_file_path))
+
+    @unittest.skip('to be used for local test only')
+    def test_download_virgo_dataset_odps(self):
+        # Note: the samplingType must be 1, which means to get the dataset from MaxCompute(ODPS).
+        import pandas as pd
+
+        ds = MsDataset.load(
+            dataset_name=VIRGO_DATASET_ID,
+            hub=Hubs.virgo,
+            odps_batch_size=100,
+            odps_limit=2000,
+            odps_drop_last=True)
+
+        ds_one = next(iter(ds))
+        logger.info(ds_one)
+
+        self.assertTrue(ds_one)
+        self.assertIsInstance(ds, VirgoDataset)
+        self.assertTrue(ds_one, pd.DataFrame)
+        logger.info(f'The shape of sample: {ds_one.shape}')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/easycv_pipelines/test_panoptic_segmentation_pipeline.py b/tests/pipelines/easycv_pipelines/test_panoptic_segmentation_pipeline.py
deleted file mode 100644
index 49e01251..00000000
--- a/tests/pipelines/easycv_pipelines/test_panoptic_segmentation_pipeline.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import unittest
-
-import cv2
-
-from modelscope.outputs import OutputKeys
-from modelscope.pipelines import pipeline
-from modelscope.utils.constant import Tasks
-from modelscope.utils.cv.image_utils import panoptic_seg_masks_to_image
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
-from modelscope.utils.test_utils import test_level
-
-
-class EasyCVPanopticSegmentationPipelineTest(unittest.TestCase,
-                                             DemoCompatibilityCheck):
-    img_path = 'data/test/images/image_semantic_segmentation.jpg'
-
-    def setUp(self) -> None:
-        self.task = Tasks.image_segmentation
-        self.model_id = 'damo/cv_r50_panoptic-segmentation_cocopan'
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_r50(self):
-        segmentor = pipeline(task=self.task, model=self.model_id)
-        outputs = segmentor(self.img_path)
-        draw_img = panoptic_seg_masks_to_image(outputs[OutputKeys.MASKS])
-        cv2.imwrite('result.jpg', draw_img)
-        print('print ' + self.model_id + ' success')
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/pipelines/easycv_pipelines/test_segmentation_pipeline.py b/tests/pipelines/easycv_pipelines/test_segmentation_pipeline.py
deleted file mode 100644
index 5f6dac4b..00000000
--- a/tests/pipelines/easycv_pipelines/test_segmentation_pipeline.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import unittest
-from distutils.version import LooseVersion
-
-import cv2
-import easycv
-import numpy as np
-from PIL import Image
-
-from modelscope.outputs import OutputKeys
-from modelscope.pipelines import pipeline
-from modelscope.utils.constant import Tasks
-from modelscope.utils.cv.image_utils import semantic_seg_masks_to_image
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
-from modelscope.utils.test_utils import test_level
-
-
-class EasyCVSegmentationPipelineTest(unittest.TestCase,
-                                     DemoCompatibilityCheck):
-    img_path = 'data/test/images/image_segmentation.jpg'
-
-    def setUp(self) -> None:
-        self.task = Tasks.image_segmentation
-        self.model_id = 'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k'
-
-    def _internal_test_(self, model_id):
-        semantic_seg = pipeline(task=Tasks.image_segmentation, model=model_id)
-        outputs = semantic_seg(self.img_path)
-
-        draw_img = semantic_seg_masks_to_image(outputs[OutputKeys.MASKS])
-        cv2.imwrite('result.jpg', draw_img)
-        print('test ' + model_id + ' DONE')
-
-    def _internal_test_batch_(self, model_id, num_samples=2, batch_size=2):
-        # TODO: support in the future
-        img = np.asarray(Image.open(self.img_path))
-        num_samples = num_samples
-        batch_size = batch_size
-        semantic_seg = pipeline(
-            task=Tasks.image_segmentation,
-            model=model_id,
-            batch_size=batch_size)
-        outputs = semantic_seg([self.img_path] * num_samples)
-
-        self.assertEqual(semantic_seg.predict_op.batch_size, batch_size)
-        self.assertEqual(len(outputs), num_samples)
-
-        for output in outputs:
-            self.assertListEqual(
-                list(img.shape)[:2], list(output['seg_pred'].shape))
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_segformer_b0(self):
-        model_id = 'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k'
-        self._internal_test_(model_id)
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_segformer_b1(self):
-        model_id = 'damo/cv_segformer-b1_image_semantic-segmentation_coco-stuff164k'
-        self._internal_test_(model_id)
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_segformer_b2(self):
-        model_id = 'damo/cv_segformer-b2_image_semantic-segmentation_coco-stuff164k'
-        self._internal_test_(model_id)
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_segformer_b3(self):
-        model_id = 'damo/cv_segformer-b3_image_semantic-segmentation_coco-stuff164k'
-        self._internal_test_(model_id)
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_segformer_b4(self):
-        model_id = 'damo/cv_segformer-b4_image_semantic-segmentation_coco-stuff164k'
-        self._internal_test_(model_id)
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_segformer_b5(self):
-        model_id = 'damo/cv_segformer-b5_image_semantic-segmentation_coco-stuff164k'
-        self._internal_test_(model_id)
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/pipelines/plugin_remote_pipelines/test_plugin_model.py b/tests/pipelines/plugin_remote_pipelines/test_plugin_model.py
index 43d840ea..71b9e64f 100644
--- a/tests/pipelines/plugin_remote_pipelines/test_plugin_model.py
+++ b/tests/pipelines/plugin_remote_pipelines/test_plugin_model.py
@@ -3,12 +3,11 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.plugins import PluginsManager
 from modelscope.utils.test_utils import test_level
 
 
-class PluginModelTest(unittest.TestCase, DemoCompatibilityCheck):
+class PluginModelTest(unittest.TestCase):
 
     def setUp(self):
         self.package = 'adaseq'
diff --git a/tests/pipelines/test_abnormal_object_detection.py b/tests/pipelines/test_abnormal_object_detection.py
index fbce51c6..c6264069 100644
--- a/tests/pipelines/test_abnormal_object_detection.py
+++ b/tests/pipelines/test_abnormal_object_detection.py
@@ -3,11 +3,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ObjectDetectionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_object_detection
@@ -20,10 +19,6 @@ class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
         result = object_detect(input_location)
         print(result)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_action_detection.py b/tests/pipelines/test_action_detection.py
index ae7e60b1..d724c81a 100644
--- a/tests/pipelines/test_action_detection.py
+++ b/tests/pipelines/test_action_detection.py
@@ -3,11 +3,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ActionDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ActionDetectionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.action_detection
@@ -20,10 +19,6 @@ class ActionDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
             'data/test/videos/action_detection_test_video.mp4')
         print('action detection results:', result)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_action_recognition.py b/tests/pipelines/test_action_recognition.py
index 292eb238..9d0c6175 100644
--- a/tests/pipelines/test_action_recognition.py
+++ b/tests/pipelines/test_action_recognition.py
@@ -3,11 +3,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ActionRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ActionRecognitionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.action_recognition
@@ -37,10 +36,6 @@ class ActionRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
             'data/test/videos/action_recognition_test_video.mp4')
         print('pst recognition results:', result)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_adaint_image_color_enhance.py b/tests/pipelines/test_adaint_image_color_enhance.py
index e36a85ec..f0efef5f 100644
--- a/tests/pipelines/test_adaint_image_color_enhance.py
+++ b/tests/pipelines/test_adaint_image_color_enhance.py
@@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class AdaIntImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck):
+class AdaIntImageColorEnhanceTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_adaint_image-color-enhance-models'
@@ -40,11 +39,6 @@ class AdaIntImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck):
         self.pipeline_inference(img_color_enhance,
                                 'data/test/images/image_color_enhance.png')
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    @unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest only')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_addr_mgeo.py b/tests/pipelines/test_addr_mgeo.py
index d630b857..e678d285 100644
--- a/tests/pipelines/test_addr_mgeo.py
+++ b/tests/pipelines/test_addr_mgeo.py
@@ -8,12 +8,11 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import TextClassificationPipeline
 from modelscope.preprocessors import TextClassificationTransformersPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool
 from modelscope.utils.test_utils import test_level
 
 
-class MGeoTest(unittest.TestCase, DemoCompatibilityCheck):
+class MGeoTest(unittest.TestCase):
 
     multi_modal_inputs = {
         'source_sentence': ['杭州余杭东方未来学校附近世纪华联商场(金家渡北苑店)'],
@@ -117,10 +116,6 @@ class MGeoTest(unittest.TestCase, DemoCompatibilityCheck):
             pipeline_ins = pipeline(task=task, model=model)
             print(pipeline_ins(input=inputs))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_addr_similarity.py b/tests/pipelines/test_addr_similarity.py
index 8c1f93c9..ecc879eb 100644
--- a/tests/pipelines/test_addr_similarity.py
+++ b/tests/pipelines/test_addr_similarity.py
@@ -8,12 +8,11 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import TextClassificationPipeline
 from modelscope.preprocessors import TextClassificationTransformersPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool
 from modelscope.utils.test_utils import test_level
 
 
-class AddrSimilarityTest(unittest.TestCase, DemoCompatibilityCheck):
+class AddrSimilarityTest(unittest.TestCase):
 
     sentence1 = '阿里巴巴西溪园区'
     sentence2 = '文一西路阿里巴巴'
@@ -37,10 +36,6 @@ class AddrSimilarityTest(unittest.TestCase, DemoCompatibilityCheck):
             task=Tasks.text_classification, model=self.model_id)
         print(pipeline_ins(input=(self.sentence1, self.sentence2)))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_animal_recognition.py b/tests/pipelines/test_animal_recognition.py
index eb9f92e6..57937770 100644
--- a/tests/pipelines/test_animal_recognition.py
+++ b/tests/pipelines/test_animal_recognition.py
@@ -4,11 +4,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class AnimalRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class AnimalRecognitionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.animal_recognition
@@ -21,10 +20,6 @@ class AnimalRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
         result = animal_recognition('data/test/images/dogs.jpg')
         print(result)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_arc_face_recognition.py b/tests/pipelines/test_arc_face_recognition.py
index fa17dd91..481b9f33 100644
--- a/tests/pipelines/test_arc_face_recognition.py
+++ b/tests/pipelines/test_arc_face_recognition.py
@@ -6,11 +6,10 @@ import numpy as np
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class FaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class FaceRecognitionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.face_recognition
@@ -31,10 +30,6 @@ class FaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
             sim = np.dot(emb1[0], emb2[0])
             print(f'Cos similarity={sim:.3f}, img1:{img1}  img2:{img2}')
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_automatic_post_editing.py b/tests/pipelines/test_automatic_post_editing.py
index da09851c..190ff788 100644
--- a/tests/pipelines/test_automatic_post_editing.py
+++ b/tests/pipelines/test_automatic_post_editing.py
@@ -3,11 +3,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class AutomaticPostEditingTest(unittest.TestCase, DemoCompatibilityCheck):
+class AutomaticPostEditingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.translation
@@ -21,10 +20,6 @@ class AutomaticPostEditingTest(unittest.TestCase, DemoCompatibilityCheck):
         pipeline_ins = pipeline(self.task, model=self.model_id)
         print(pipeline_ins(input=inputs))
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_automatic_speech_recognition.py b/tests/pipelines/test_automatic_speech_recognition.py
index dc624f29..6014438e 100644
--- a/tests/pipelines/test_automatic_speech_recognition.py
+++ b/tests/pipelines/test_automatic_speech_recognition.py
@@ -10,7 +10,6 @@ import soundfile
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import ColorCodes, Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.logger import get_logger
 from modelscope.utils.test_utils import download_and_untar, test_level
 
@@ -26,8 +25,7 @@ TFRECORD_TESTSETS_FILE = 'tfrecord.tar.gz'
 TFRECORD_TESTSETS_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/datasets/tfrecord.tar.gz'
 
 
-class AutomaticSpeechRecognitionTest(unittest.TestCase,
-                                     DemoCompatibilityCheck):
+class AutomaticSpeechRecognitionTest(unittest.TestCase):
     action_info = {
         'test_run_with_wav_pytorch': {
             'checking_item': OutputKeys.TEXT,
@@ -457,10 +455,6 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase,
                 logger.info(ColorCodes.MAGENTA + str(rec_result)
                             + ColorCodes.END)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_bad_image_detecting.py b/tests/pipelines/test_bad_image_detecting.py
index 728da8d1..05954f7b 100644
--- a/tests/pipelines/test_bad_image_detecting.py
+++ b/tests/pipelines/test_bad_image_detecting.py
@@ -7,11 +7,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.cv import BadImageDetecingPipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class BadImageDetectingTest(unittest.TestCase, DemoCompatibilityCheck):
+class BadImageDetectingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.bad_image_detecting
@@ -58,10 +57,6 @@ class BadImageDetectingTest(unittest.TestCase, DemoCompatibilityCheck):
         print('pipeline: the out_label is {}'.format(labels))
         print('pipeline: the out_score is {}'.format(scores))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_body_2d_keypoints.py b/tests/pipelines/test_body_2d_keypoints.py
index 5d90cbf0..25d8fa55 100644
--- a/tests/pipelines/test_body_2d_keypoints.py
+++ b/tests/pipelines/test_body_2d_keypoints.py
@@ -8,11 +8,10 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import draw_keypoints
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class Body2DKeypointsTest(unittest.TestCase, DemoCompatibilityCheck):
+class Body2DKeypointsTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.body_2d_keypoints
@@ -34,10 +33,6 @@ class Body2DKeypointsTest(unittest.TestCase, DemoCompatibilityCheck):
         body_2d_keypoints = pipeline(self.task, model=self.model_id)
         self.pipeline_inference(body_2d_keypoints, Image.open(self.test_image))
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_body_3d_keypoints.py b/tests/pipelines/test_body_3d_keypoints.py
index 6f73a243..33228022 100644
--- a/tests/pipelines/test_body_3d_keypoints.py
+++ b/tests/pipelines/test_body_3d_keypoints.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class Body3DKeypointsTest(unittest.TestCase, DemoCompatibilityCheck):
+class Body3DKeypointsTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_canonical_body-3d-keypoints_video'
@@ -41,10 +40,6 @@ class Body3DKeypointsTest(unittest.TestCase, DemoCompatibilityCheck):
                             % (self.test_video))
         self.pipeline_inference(body_3d_keypoints, pipeline_input=cap)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_body_3d_keypoints_hdformer.py b/tests/pipelines/test_body_3d_keypoints_hdformer.py
index 2ebbc95b..e86f247f 100644
--- a/tests/pipelines/test_body_3d_keypoints_hdformer.py
+++ b/tests/pipelines/test_body_3d_keypoints_hdformer.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class Body3DKeypointsHDFormerTest(unittest.TestCase, DemoCompatibilityCheck):
+class Body3DKeypointsHDFormerTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_hdformer_body-3d-keypoints_video'
@@ -41,10 +40,6 @@ class Body3DKeypointsHDFormerTest(unittest.TestCase, DemoCompatibilityCheck):
                             % (self.test_video))
         self.pipeline_inference(body_3d_keypoints, pipeline_input=cap)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_canmt_translation.py b/tests/pipelines/test_canmt_translation.py
index e3bce5d9..31e57040 100644
--- a/tests/pipelines/test_canmt_translation.py
+++ b/tests/pipelines/test_canmt_translation.py
@@ -8,11 +8,10 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import CanmtTranslationPipeline
 from modelscope.preprocessors import CanmtTranslationPreprocessor, Preprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class CanmtTranslationTest(unittest.TestCase, DemoCompatibilityCheck):
+class CanmtTranslationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.competency_aware_translation
@@ -59,10 +58,6 @@ class CanmtTranslationTest(unittest.TestCase, DemoCompatibilityCheck):
         pipeline_ins = pipeline(task=self.task)
         print(pipeline_ins(self.input))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_card_detection.py b/tests/pipelines/test_card_detection.py
index d913f494..676fb786 100644
--- a/tests/pipelines/test_card_detection.py
+++ b/tests/pipelines/test_card_detection.py
@@ -8,11 +8,10 @@ from modelscope.msdatasets import MsDataset
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import draw_card_detection_result
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class CardDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class CardDetectionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.card_detection
@@ -57,10 +56,6 @@ class CardDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
         result = card_detection(img_path)
         self.show_result(img_path, result)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_cartoon_stable_diffusion.py b/tests/pipelines/test_cartoon_stable_diffusion.py
index 751c7ea8..6a91ed44 100644
--- a/tests/pipelines/test_cartoon_stable_diffusion.py
+++ b/tests/pipelines/test_cartoon_stable_diffusion.py
@@ -6,11 +6,10 @@ import cv2
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class CartoonStableDiffusionTest(unittest.TestCase, DemoCompatibilityCheck):
+class CartoonStableDiffusionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.text_to_image_synthesis
diff --git a/tests/pipelines/test_chinese_stable_diffusion.py b/tests/pipelines/test_chinese_stable_diffusion.py
index bd6d74aa..05207ddb 100644
--- a/tests/pipelines/test_chinese_stable_diffusion.py
+++ b/tests/pipelines/test_chinese_stable_diffusion.py
@@ -6,11 +6,10 @@ import cv2
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ChineseStableDiffusionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ChineseStableDiffusionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.text_to_image_synthesis
diff --git a/tests/pipelines/test_clip_interrogator.py b/tests/pipelines/test_clip_interrogator.py
new file mode 100644
index 00000000..615aef3c
--- /dev/null
+++ b/tests/pipelines/test_clip_interrogator.py
@@ -0,0 +1,34 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+from modelscope.models import Model
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class CLIPInterrogatorTest(unittest.TestCase):
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_run_with_image_captioning_with_model(self):
+        model = Model.from_pretrained('damo/cv_clip-interrogator')
+        pipeline_caption = pipeline(
+            task=Tasks.image_captioning,
+            model=model,
+        )
+        image = 'data/test/images/image_mplug_vqa.jpg'
+        result = pipeline_caption(image)
+        print(result[OutputKeys.CAPTION])
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_image_captioning_with_name(self):
+        pipeline_caption = pipeline(
+            Tasks.image_captioning, model='damo/cv_clip-interrogator')
+        image = 'data/test/images/image_mplug_vqa.jpg'
+        result = pipeline_caption(image)
+        print(result[OutputKeys.CAPTION])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_cmdssl_video_embedding.py b/tests/pipelines/test_cmdssl_video_embedding.py
index 5807c075..9e176cf2 100644
--- a/tests/pipelines/test_cmdssl_video_embedding.py
+++ b/tests/pipelines/test_cmdssl_video_embedding.py
@@ -4,11 +4,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class CMDSSLVideoEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
+class CMDSSLVideoEmbeddingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.video_embedding
@@ -22,10 +21,6 @@ class CMDSSLVideoEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
 
         print(f'video embedding output: {result}.')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_content_check.py b/tests/pipelines/test_content_check.py
index c68af257..39a791a0 100644
--- a/tests/pipelines/test_content_check.py
+++ b/tests/pipelines/test_content_check.py
@@ -4,11 +4,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ContentCheckTest(unittest.TestCase, DemoCompatibilityCheck):
+class ContentCheckTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_classification
@@ -20,10 +19,6 @@ class ContentCheckTest(unittest.TestCase, DemoCompatibilityCheck):
         result = content_check_func('data/test/images/content_check.jpg')
         print(result)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_controllable_image_generation.py b/tests/pipelines/test_controllable_image_generation.py
index a5cff66c..7d6b03ce 100644
--- a/tests/pipelines/test_controllable_image_generation.py
+++ b/tests/pipelines/test_controllable_image_generation.py
@@ -10,12 +10,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.cv import ControllableImageGenerationPipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ControllableImageGenerationTest(unittest.TestCase,
-                                      DemoCompatibilityCheck):
+class ControllableImageGenerationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.controllable_image_generation
@@ -68,10 +66,6 @@ class ControllableImageGenerationTest(unittest.TestCase,
         print(
             'pipeline: the output image path is {}'.format(output_image_path))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_conversational_text_to_sql.py b/tests/pipelines/test_conversational_text_to_sql.py
index 17fffcaf..a7e15dcc 100644
--- a/tests/pipelines/test_conversational_text_to_sql.py
+++ b/tests/pipelines/test_conversational_text_to_sql.py
@@ -8,13 +8,12 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import ConversationalTextToSqlPipeline
 from modelscope.preprocessors import ConversationalTextToSqlPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.nlp.space_T_en.utils import \
     text2sql_tracking_and_print_results
 from modelscope.utils.test_utils import test_level
 
 
-class ConversationalTextToSql(unittest.TestCase, DemoCompatibilityCheck):
+class ConversationalTextToSql(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.table_question_answering
@@ -67,10 +66,6 @@ class ConversationalTextToSql(unittest.TestCase, DemoCompatibilityCheck):
         pipelines = [pipeline(task=self.task, model=self.model_id)]
         text2sql_tracking_and_print_results(self.test_case, pipelines)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_crowd_counting.py b/tests/pipelines/test_crowd_counting.py
index 4e15cfca..be14f29e 100644
--- a/tests/pipelines/test_crowd_counting.py
+++ b/tests/pipelines/test_crowd_counting.py
@@ -8,14 +8,13 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import numpy_to_cv2img
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.logger import get_logger
 from modelscope.utils.test_utils import test_level
 
 logger = get_logger()
 
 
-class CrowdCountingTest(unittest.TestCase, DemoCompatibilityCheck):
+class CrowdCountingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.input_location = 'data/test/images/crowd_counting.jpg'
@@ -56,10 +55,6 @@ class CrowdCountingTest(unittest.TestCase, DemoCompatibilityCheck):
         else:
             raise ValueError('process error')
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_csanmt_translation.py b/tests/pipelines/test_csanmt_translation.py
index d989a6c4..03545fc5 100644
--- a/tests/pipelines/test_csanmt_translation.py
+++ b/tests/pipelines/test_csanmt_translation.py
@@ -3,11 +3,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class TranslationTest(unittest.TestCase, DemoCompatibilityCheck):
+class TranslationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.translation
@@ -90,10 +89,6 @@ class TranslationTest(unittest.TestCase, DemoCompatibilityCheck):
         pipeline_ins = pipeline(self.task)
         print(pipeline_ins(input=inputs))
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_damo_face_detection.py b/tests/pipelines/test_damo_face_detection.py
index 8bd1e009..44578c3e 100644
--- a/tests/pipelines/test_damo_face_detection.py
+++ b/tests/pipelines/test_damo_face_detection.py
@@ -7,11 +7,10 @@ import cv2
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import draw_face_detection_result
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class FaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class FaceDetectionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.face_detection
@@ -36,10 +35,6 @@ class FaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
             result = face_detection(img_path)
             self.show_result(img_path, result)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_ddcolor_image_colorization.py b/tests/pipelines/test_ddcolor_image_colorization.py
index e1876329..5d752452 100644
--- a/tests/pipelines/test_ddcolor_image_colorization.py
+++ b/tests/pipelines/test_ddcolor_image_colorization.py
@@ -11,11 +11,10 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.pipelines.cv import DDColorImageColorizationPipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class DDColorImageColorizationTest(unittest.TestCase, DemoCompatibilityCheck):
+class DDColorImageColorizationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_colorization
@@ -52,10 +51,6 @@ class DDColorImageColorizationTest(unittest.TestCase, DemoCompatibilityCheck):
         image_colorization = pipeline(Tasks.image_colorization)
         self.pipeline_inference(image_colorization, self.test_image)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_ddpm_semantic_segmentation.py b/tests/pipelines/test_ddpm_semantic_segmentation.py
index a5303098..ad35e069 100644
--- a/tests/pipelines/test_ddpm_semantic_segmentation.py
+++ b/tests/pipelines/test_ddpm_semantic_segmentation.py
@@ -5,12 +5,10 @@ import torch
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class DDPMImageSemanticSegmentationTest(unittest.TestCase,
-                                        DemoCompatibilityCheck):
+class DDPMImageSemanticSegmentationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_segmentation
@@ -28,10 +26,6 @@ class DDPMImageSemanticSegmentationTest(unittest.TestCase,
         else:
             raise ValueError('process error')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_deeplpf_image_color_enhance.py b/tests/pipelines/test_deeplpf_image_color_enhance.py
index 08b1a357..87b709fe 100644
--- a/tests/pipelines/test_deeplpf_image_color_enhance.py
+++ b/tests/pipelines/test_deeplpf_image_color_enhance.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class DeepLPFImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck):
+class DeepLPFImageColorEnhanceTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_deeplpfnet_image-color-enhance-models'
@@ -37,10 +36,6 @@ class DeepLPFImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck):
         self.pipeline_inference(img_color_enhance,
                                 'data/test/images/image_color_enhance.png')
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_dialog_intent_prediction.py b/tests/pipelines/test_dialog_intent_prediction.py
index 2ee46388..5a1729b8 100644
--- a/tests/pipelines/test_dialog_intent_prediction.py
+++ b/tests/pipelines/test_dialog_intent_prediction.py
@@ -8,11 +8,10 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import DialogIntentPredictionPipeline
 from modelscope.preprocessors import DialogIntentPredictionPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class DialogIntentPredictionTest(unittest.TestCase, DemoCompatibilityCheck):
+class DialogIntentPredictionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.task_oriented_conversation
@@ -68,10 +67,6 @@ class DialogIntentPredictionTest(unittest.TestCase, DemoCompatibilityCheck):
         for my_pipeline, item in list(zip(pipelines, self.test_case)):
             print(my_pipeline(item))
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_dialog_modeling.py b/tests/pipelines/test_dialog_modeling.py
index 6b6259ce..202951a5 100644
--- a/tests/pipelines/test_dialog_modeling.py
+++ b/tests/pipelines/test_dialog_modeling.py
@@ -10,11 +10,10 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import DialogModelingPipeline
 from modelscope.preprocessors import DialogModelingPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class DialogModelingTest(unittest.TestCase, DemoCompatibilityCheck):
+class DialogModelingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.task_oriented_conversation
@@ -148,10 +147,6 @@ class DialogModelingTest(unittest.TestCase, DemoCompatibilityCheck):
         pipelines = [pipeline(task=self.task)]
         self.generate_and_print_dialog_response(pipelines)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_dialog_state_tracking.py b/tests/pipelines/test_dialog_state_tracking.py
index 6cdd5ee7..e7f72b83 100644
--- a/tests/pipelines/test_dialog_state_tracking.py
+++ b/tests/pipelines/test_dialog_state_tracking.py
@@ -8,13 +8,12 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import DialogStateTrackingPipeline
 from modelscope.preprocessors import DialogStateTrackingPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.nlp.space.utils_dst import \
     tracking_and_print_dialog_states
 from modelscope.utils.test_utils import test_level
 
 
-class DialogStateTrackingTest(unittest.TestCase, DemoCompatibilityCheck):
+class DialogStateTrackingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.task_oriented_conversation
@@ -119,10 +118,6 @@ class DialogStateTrackingTest(unittest.TestCase, DemoCompatibilityCheck):
         pipelines = [pipeline(task=self.task, model=self.model_id)]
         tracking_and_print_dialog_states(self.test_case, pipelines)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_diffusers_stable_diffusion.py b/tests/pipelines/test_diffusers_stable_diffusion.py
index 98c4862a..eef677fc 100644
--- a/tests/pipelines/test_diffusers_stable_diffusion.py
+++ b/tests/pipelines/test_diffusers_stable_diffusion.py
@@ -6,11 +6,10 @@ import cv2
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class DiffusersStableDiffusionTest(unittest.TestCase, DemoCompatibilityCheck):
+class DiffusersStableDiffusionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.text_to_image_synthesis
diff --git a/tests/pipelines/test_disco_guided_diffusion.py b/tests/pipelines/test_disco_guided_diffusion.py
index d7be7292..f3fd668b 100644
--- a/tests/pipelines/test_disco_guided_diffusion.py
+++ b/tests/pipelines/test_disco_guided_diffusion.py
@@ -5,11 +5,10 @@ import cv2
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class DiscoGuidedDiffusionTest(unittest.TestCase, DemoCompatibilityCheck):
+class DiscoGuidedDiffusionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.text_to_image_synthesis
diff --git a/tests/pipelines/test_document_grounded_dialog_generate.py b/tests/pipelines/test_document_grounded_dialog_generate.py
index da23fe19..b08a07fa 100644
--- a/tests/pipelines/test_document_grounded_dialog_generate.py
+++ b/tests/pipelines/test_document_grounded_dialog_generate.py
@@ -9,12 +9,10 @@ from modelscope.pipelines import pipeline
 from modelscope.preprocessors.nlp import \
     DocumentGroundedDialogGeneratePreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class DocumentGroundedDialogGenerateTest(unittest.TestCase,
-                                         DemoCompatibilityCheck):
+class DocumentGroundedDialogGenerateTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.document_grounded_dialog_generate
diff --git a/tests/pipelines/test_document_grounded_dialog_retrieval.py b/tests/pipelines/test_document_grounded_dialog_retrieval.py
index 6bcca369..48a63087 100644
--- a/tests/pipelines/test_document_grounded_dialog_retrieval.py
+++ b/tests/pipelines/test_document_grounded_dialog_retrieval.py
@@ -9,12 +9,10 @@ from modelscope.pipelines import pipeline
 from modelscope.preprocessors.nlp import \
     DocumentGroundedDialogRetrievalPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class DocumentGroundedDialogRetrievalTest(unittest.TestCase,
-                                          DemoCompatibilityCheck):
+class DocumentGroundedDialogRetrievalTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.document_grounded_dialog_retrieval
diff --git a/tests/pipelines/test_document_segmentation.py b/tests/pipelines/test_document_segmentation.py
index 41c490d2..09ce5756 100644
--- a/tests/pipelines/test_document_segmentation.py
+++ b/tests/pipelines/test_document_segmentation.py
@@ -6,14 +6,13 @@ from typing import Any, Dict
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.logger import get_logger
 from modelscope.utils.test_utils import test_level
 
 logger = get_logger()
 
 
-class DocumentSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class DocumentSegmentationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.document_segmentation
@@ -64,10 +63,6 @@ class DocumentSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
         for document in documents_list:
             print(document)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_document_vl_embedding.py b/tests/pipelines/test_document_vl_embedding.py
index f8d2d5a3..349547d1 100644
--- a/tests/pipelines/test_document_vl_embedding.py
+++ b/tests/pipelines/test_document_vl_embedding.py
@@ -10,11 +10,10 @@ from modelscope.models import Model
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class DocumentVLEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
+class DocumentVLEmbeddingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/multi-modal_convnext-roberta-base_vldoc-embedding'
@@ -51,10 +50,6 @@ class DocumentVLEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
         vldoc_doc_VL_emb_pipeline = pipeline(self.task)
         self.pipeline_inference(vldoc_doc_VL_emb_pipeline)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_domain_classification.py b/tests/pipelines/test_domain_classification.py
index 8e5bfa7f..006daa65 100644
--- a/tests/pipelines/test_domain_classification.py
+++ b/tests/pipelines/test_domain_classification.py
@@ -3,11 +3,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class DomainClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
+class DomainClassificationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.text_classification
@@ -36,10 +35,6 @@ class DomainClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
         pipeline_ins = pipeline(self.task, model=model_id)
         print(pipeline_ins(input=inputs))
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_efficient_diffusion_tuning.py b/tests/pipelines/test_efficient_diffusion_tuning.py
index 9dc5e412..e33b2bf2 100644
--- a/tests/pipelines/test_efficient_diffusion_tuning.py
+++ b/tests/pipelines/test_efficient_diffusion_tuning.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
 from modelscope.models.multi_modal import EfficientStableDiffusion
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class EfficientDiffusionTuningTest(unittest.TestCase, DemoCompatibilityCheck):
+class EfficientDiffusionTuningTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.efficient_diffusion_tuning
@@ -28,13 +27,9 @@ class EfficientDiffusionTuningTest(unittest.TestCase, DemoCompatibilityCheck):
         model = Model.from_pretrained(model_id)
         self.assertTrue(model.__class__ == EfficientStableDiffusion)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_efficient_diffusion_tuning_lora_demo_compatibility(self):
-        self.model_id = 'damo/multi-modal_efficient-diffusion-tuning-lora'
-        self.compatibility_check()
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
     def test_efficient_diffusion_tuning_control_lora_run_pipeline(self):
+        # TODO: to be fixed in the future
         model_id = 'damo/multi-modal_efficient-diffusion-tuning-control-lora'
         inputs = {
             'prompt':
@@ -53,11 +48,6 @@ class EfficientDiffusionTuningTest(unittest.TestCase, DemoCompatibilityCheck):
         model = Model.from_pretrained(model_id)
         self.assertTrue(model.__class__ == EfficientStableDiffusion)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_efficient_diffusion_tuning_control_lora_demo_compatibility(self):
-        self.model_id = 'damo/multi-modal_efficient-diffusion-tuning-control-lora'
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_extractive_summarization.py b/tests/pipelines/test_extractive_summarization.py
index 26ac508c..a7f12d14 100644
--- a/tests/pipelines/test_extractive_summarization.py
+++ b/tests/pipelines/test_extractive_summarization.py
@@ -6,14 +6,13 @@ from typing import Any, Dict
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.logger import get_logger
 from modelscope.utils.test_utils import test_level
 
 logger = get_logger()
 
 
-class ExtractiveSummarizationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ExtractiveSummarizationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.extractive_summarization
@@ -46,10 +45,6 @@ class ExtractiveSummarizationTest(unittest.TestCase, DemoCompatibilityCheck):
             model_id=self.ponet_topic_model_id, documents=self.sentences)
         print(result[OutputKeys.TEXT])
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_face_2d_keypoints.py b/tests/pipelines/test_face_2d_keypoints.py
index 7ccc8a59..875a0e11 100644
--- a/tests/pipelines/test_face_2d_keypoints.py
+++ b/tests/pipelines/test_face_2d_keypoints.py
@@ -10,7 +10,7 @@ from modelscope.utils.test_utils import test_level
 
 class EasyCVFace2DKeypointsPipelineTest(unittest.TestCase):
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip('skip easycv related cases')
     def test_face_2d_keypoints(self):
         img_path = 'data/test/images/face_detection.png'
         model_id = 'damo/cv_mobilenet_face-2d-keypoints_alignment'
diff --git a/tests/pipelines/test_face_detection.py b/tests/pipelines/test_face_detection.py
index 7e35cdbb..86e0f702 100644
--- a/tests/pipelines/test_face_detection.py
+++ b/tests/pipelines/test_face_detection.py
@@ -8,11 +8,10 @@ from modelscope.msdatasets import MsDataset
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import draw_face_detection_result
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class FaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class FaceDetectionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.face_detection
@@ -42,10 +41,6 @@ class FaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
         result = face_detection(img_path)
         self.show_result(img_path, result)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_face_image_generation.py b/tests/pipelines/test_face_image_generation.py
index 21d8e835..fbd7e3b5 100644
--- a/tests/pipelines/test_face_image_generation.py
+++ b/tests/pipelines/test_face_image_generation.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class FaceGenerationTest(unittest.TestCase, DemoCompatibilityCheck):
+class FaceGenerationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.face_image_generation
@@ -39,10 +38,6 @@ class FaceGenerationTest(unittest.TestCase, DemoCompatibilityCheck):
         face_generation = pipeline(self.task)
         self.pipeline_inference(face_generation, seed)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_face_recognition.py b/tests/pipelines/test_face_recognition.py
index d3451f5d..7b84590c 100644
--- a/tests/pipelines/test_face_recognition.py
+++ b/tests/pipelines/test_face_recognition.py
@@ -6,11 +6,10 @@ import numpy as np
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class FaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class FaceRecognitionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.face_recognition
@@ -28,10 +27,6 @@ class FaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
         sim = np.dot(emb1[0], emb2[0])
         print(f'Cos similarity={sim:.3f}, img1:{img1}  img2:{img2}')
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_face_recognition_onnx_fm.py b/tests/pipelines/test_face_recognition_onnx_fm.py
index 8478b3bf..b60bec93 100644
--- a/tests/pipelines/test_face_recognition_onnx_fm.py
+++ b/tests/pipelines/test_face_recognition_onnx_fm.py
@@ -6,11 +6,10 @@ import numpy as np
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class FmFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class FmFaceRecognitionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.face_recognition
@@ -31,10 +30,6 @@ class FmFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
             sim = np.dot(emb1[0], emb2[0])
             print(f'Cos similarity={sim:.3f}, img1:{img1}  img2:{img2}')
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_face_recognition_onnx_ir.py b/tests/pipelines/test_face_recognition_onnx_ir.py
index c45042be..a7cf008c 100644
--- a/tests/pipelines/test_face_recognition_onnx_ir.py
+++ b/tests/pipelines/test_face_recognition_onnx_ir.py
@@ -6,11 +6,10 @@ import numpy as np
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class IrFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class IrFaceRecognitionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.face_recognition
@@ -31,10 +30,6 @@ class IrFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
             sim = np.dot(emb1[0], emb2[0])
             print(f'Cos similarity={sim:.3f}, img1:{img1}  img2:{img2}')
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_face_recognition_onnx_transface.py b/tests/pipelines/test_face_recognition_onnx_transface.py
new file mode 100644
index 00000000..183257f0
--- /dev/null
+++ b/tests/pipelines/test_face_recognition_onnx_transface.py
@@ -0,0 +1,35 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+import numpy as np
+
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class TransFaceRecognitionTest(unittest.TestCase):
+
+    def setUp(self) -> None:
+        self.task = Tasks.face_recognition
+        self.model_id = 'damo/cv_vit_face-recognition'
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_face_compare(self):
+        img1 = 'data/test/images/face_recognition_1.png'
+        img2 = 'data/test/images/face_recognition_2.png'
+
+        face_recognition = pipeline(
+            Tasks.face_recognition, model=self.model_id)
+        emb1 = face_recognition(img1)[OutputKeys.IMG_EMBEDDING]
+        emb2 = face_recognition(img2)[OutputKeys.IMG_EMBEDDING]
+        if emb1 is None or emb2 is None:
+            print('No Detected Face.')
+        else:
+            sim = np.dot(emb1[0], emb2[0])
+            print(f'Cos similarity={sim:.3f}, img1:{img1}  img2:{img2}')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_face_recognition_ood.py b/tests/pipelines/test_face_recognition_ood.py
index 8a6fb444..68cf1f0b 100644
--- a/tests/pipelines/test_face_recognition_ood.py
+++ b/tests/pipelines/test_face_recognition_ood.py
@@ -6,11 +6,10 @@ import numpy as np
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class FaceRecognitionOodTest(unittest.TestCase, DemoCompatibilityCheck):
+class FaceRecognitionOodTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.face_recognition
@@ -39,10 +38,6 @@ class FaceRecognitionOodTest(unittest.TestCase, DemoCompatibilityCheck):
             print(f'Cos similarity={sim:.3f}, img1:{img1}  img2:{img2}')
             print(f'OOD score: img1:{score1:.3f}  img2:{score2:.3f}')
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_face_reconstruction.py b/tests/pipelines/test_face_reconstruction.py
index b35482fb..06950487 100644
--- a/tests/pipelines/test_face_reconstruction.py
+++ b/tests/pipelines/test_face_reconstruction.py
@@ -14,13 +14,12 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 sys.path.append('.')
 
 
-class FaceReconstructionTest(unittest.TestCase, DemoCompatibilityCheck):
+class FaceReconstructionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.face_reconstruction
@@ -60,7 +59,7 @@ class FaceReconstructionTest(unittest.TestCase, DemoCompatibilityCheck):
             Tasks.face_reconstruction, model=model_dir)
         self.pipeline_inference(face_reconstruction, self.test_image)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     def test_run_modelhub(self):
         face_reconstruction = pipeline(
             Tasks.face_reconstruction,
@@ -68,10 +67,6 @@ class FaceReconstructionTest(unittest.TestCase, DemoCompatibilityCheck):
             model_revision='v2.0.0-HRN')
         self.pipeline_inference(face_reconstruction, self.test_image)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_faq_question_answering.py b/tests/pipelines/test_faq_question_answering.py
index 31680095..89f95162 100644
--- a/tests/pipelines/test_faq_question_answering.py
+++ b/tests/pipelines/test_faq_question_answering.py
@@ -12,11 +12,10 @@ from modelscope.pipelines.nlp import FaqQuestionAnsweringPipeline
 from modelscope.preprocessors import \
     FaqQuestionAnsweringTransformersPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class FaqQuestionAnsweringTest(unittest.TestCase, DemoCompatibilityCheck):
+class FaqQuestionAnsweringTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.faq_question_answering
@@ -103,10 +102,6 @@ class FaqQuestionAnsweringTest(unittest.TestCase, DemoCompatibilityCheck):
             ['今天星期六', '明天星期几明天星期几'])
         print(np.shape(sentence_vec))
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_fast_instance_segmentation.py b/tests/pipelines/test_fast_instance_segmentation.py
new file mode 100644
index 00000000..d5789150
--- /dev/null
+++ b/tests/pipelines/test_fast_instance_segmentation.py
@@ -0,0 +1,34 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+from modelscope.models import Model
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class FastInstanceSegmentationTest(unittest.TestCase):
+
+    def setUp(self) -> None:
+        self.task = Tasks.image_segmentation
+        self.model_id = 'damo/cv_resnet50_fast-instance-segmentation_coco'
+
+    image = 'data/test/images/image_instance_segmentation.jpg'
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_model_name(self):
+        pipeline_parsing = pipeline(
+            task=Tasks.image_segmentation, model=self.model_id)
+        print(pipeline_parsing(input=self.image)[OutputKeys.LABELS])
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_run_with_model_from_modelhub(self):
+        model = Model.from_pretrained(self.model_id)
+        pipeline_parsing = pipeline(
+            task=Tasks.image_segmentation, model=model, preprocessor=None)
+        print(pipeline_parsing(input=self.image)[OutputKeys.LABELS])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_feature_extraction.py b/tests/pipelines/test_feature_extraction.py
index da6be1c0..8e365eab 100644
--- a/tests/pipelines/test_feature_extraction.py
+++ b/tests/pipelines/test_feature_extraction.py
@@ -11,12 +11,10 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import FeatureExtractionPipeline
 from modelscope.preprocessors import FillMaskTransformersPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class FeatureExtractionTaskModelTest(unittest.TestCase,
-                                     DemoCompatibilityCheck):
+class FeatureExtractionTaskModelTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.feature_extraction
diff --git a/tests/pipelines/test_fid_dialogue.py b/tests/pipelines/test_fid_dialogue.py
index c69823ce..96d7c919 100644
--- a/tests/pipelines/test_fid_dialogue.py
+++ b/tests/pipelines/test_fid_dialogue.py
@@ -3,11 +3,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class FidDialogueTest(unittest.TestCase, DemoCompatibilityCheck):
+class FidDialogueTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.fid_dialogue
@@ -49,8 +48,7 @@ class FidDialogueTest(unittest.TestCase, DemoCompatibilityCheck):
             'forward_params': forward_params
         }
 
-    # @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
-    @unittest.skip('temporarily skip')
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
     def test_240m_pipeline(self):
         pipeline_ins = pipeline(
             task=self.task,
@@ -59,8 +57,7 @@ class FidDialogueTest(unittest.TestCase, DemoCompatibilityCheck):
         result = pipeline_ins(self.input, **self.kwargs)
         print(result)
 
-    # @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
-    @unittest.skip('temporarily skip')
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
     def test_3_7b_pipeline(self):
         pipeline_ins = pipeline(
             task=self.task,
@@ -69,10 +66,6 @@ class FidDialogueTest(unittest.TestCase, DemoCompatibilityCheck):
         result = pipeline_ins(self.input, **self.kwargs)
         print(result)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_fill_mask.py b/tests/pipelines/test_fill_mask.py
index 0e427464..450ada15 100644
--- a/tests/pipelines/test_fill_mask.py
+++ b/tests/pipelines/test_fill_mask.py
@@ -10,12 +10,11 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import FillMaskPipeline
 from modelscope.preprocessors import FillMaskTransformersPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool
 from modelscope.utils.test_utils import test_level
 
 
-class FillMaskTest(unittest.TestCase, DemoCompatibilityCheck):
+class FillMaskTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.fill_mask
@@ -134,8 +133,9 @@ class FillMaskTest(unittest.TestCase, DemoCompatibilityCheck):
                     f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: '
                     f'{pipeline_ins(test_input)}\n')
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
     def test_run_with_model_name(self):
+        # TODO: to be fixed in the future
         # veco
         pipeline_ins = pipeline(task=Tasks.fill_mask, model=self.model_id_veco)
         for language in ['zh', 'en']:
@@ -176,10 +176,6 @@ class FillMaskTest(unittest.TestCase, DemoCompatibilityCheck):
         print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: '
               f'{pipeline_ins(test_input)}\n')
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_general_image_classification.py b/tests/pipelines/test_general_image_classification.py
index 978c474a..df036fa1 100644
--- a/tests/pipelines/test_general_image_classification.py
+++ b/tests/pipelines/test_general_image_classification.py
@@ -4,13 +4,11 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.regress_test_utils import MsRegressTool
 from modelscope.utils.test_utils import test_level
 
 
-class GeneralImageClassificationTest(unittest.TestCase,
-                                     DemoCompatibilityCheck):
+class GeneralImageClassificationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_classification
@@ -83,10 +81,6 @@ class GeneralImageClassificationTest(unittest.TestCase,
         result = general_image_classification('data/test/images/bird.JPEG')
         print(result)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_general_recognition.py b/tests/pipelines/test_general_recognition.py
index ba713bbe..873aaa02 100644
--- a/tests/pipelines/test_general_recognition.py
+++ b/tests/pipelines/test_general_recognition.py
@@ -4,11 +4,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class GeneralRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class GeneralRecognitionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.general_recognition
@@ -22,10 +21,6 @@ class GeneralRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
         result = general_recognition('data/test/images/dogs.jpg')
         print(result)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_generative_multi_modal_embedding.py b/tests/pipelines/test_generative_multi_modal_embedding.py
index 18b96f65..3a853725 100644
--- a/tests/pipelines/test_generative_multi_modal_embedding.py
+++ b/tests/pipelines/test_generative_multi_modal_embedding.py
@@ -5,11 +5,10 @@ import unittest
 from modelscope.models import Model
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class GEMMMultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
+class GEMMMultiModalEmbeddingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.generative_multi_modal_embedding
@@ -68,10 +67,6 @@ class GEMMMultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
         output = generative_multi_modal_embedding_pipeline(test_input)
         print(output)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_gridvlp_classification.py b/tests/pipelines/test_gridvlp_classification.py
index 18c6c582..7479d0fa 100644
--- a/tests/pipelines/test_gridvlp_classification.py
+++ b/tests/pipelines/test_gridvlp_classification.py
@@ -3,11 +3,10 @@ import unittest
 
 from modelscope.pipelines.multi_modal.gridvlp_pipeline import (
     GridVlpClassificationPipeline, GridVlpEmbeddingPipeline)
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class GridVlpClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
+class GridVlpClassificationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'rgtjf1/multi-modal_gridvlp_classification_chinese-base-ecom-cate'
@@ -62,10 +61,6 @@ class GridVlpClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
         print(f'text: {self.text}\nimage: {self.image}\n'
               f'outputs shape: {outputs.shape}')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_hand_2d_keypoints.py b/tests/pipelines/test_hand_2d_keypoints.py
index 43b569d0..a243a478 100644
--- a/tests/pipelines/test_hand_2d_keypoints.py
+++ b/tests/pipelines/test_hand_2d_keypoints.py
@@ -23,7 +23,7 @@ class Hand2DKeypointsPipelineTest(unittest.TestCase):
         self.assertEqual(results[OutputKeys.KEYPOINTS].shape[2], 3)
         self.assertEqual(results[OutputKeys.BOXES].shape[1], 4)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip('skip test in current test level: no pipeline implemented')
     def test_hand_2d_keypoints_with_default_model(self):
         img_path = 'data/test/images/hand_keypoints.jpg'
 
diff --git a/tests/pipelines/test_hand_detection.py b/tests/pipelines/test_hand_detection.py
index 8a6bbd5a..9ea192a1 100644
--- a/tests/pipelines/test_hand_detection.py
+++ b/tests/pipelines/test_hand_detection.py
@@ -3,17 +3,16 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ObjectDetectionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.domain_specific_object_detection
         self.model_id = 'damo/cv_yolox-pai_hand-detection'
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip('skip test in current test level: no pipeline implemented')
     def test_hand_detection_pipeline(self):
         test_image = 'data/test/images/hand_detection.jpg'
 
diff --git a/tests/pipelines/test_hicossl_video_embedding.py b/tests/pipelines/test_hicossl_video_embedding.py
index 8a7de1fa..a367457f 100644
--- a/tests/pipelines/test_hicossl_video_embedding.py
+++ b/tests/pipelines/test_hicossl_video_embedding.py
@@ -4,11 +4,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class HICOSSLVideoEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
+class HICOSSLVideoEmbeddingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.video_embedding
@@ -23,10 +22,6 @@ class HICOSSLVideoEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
 
         print(f'video embedding output: {result}.')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_hitea_tasks.py b/tests/pipelines/test_hitea_tasks.py
index 50efdfbd..60cd206d 100644
--- a/tests/pipelines/test_hitea_tasks.py
+++ b/tests/pipelines/test_hitea_tasks.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class HiTeATasksTest(unittest.TestCase, DemoCompatibilityCheck):
+class HiTeATasksTest(unittest.TestCase):
 
     @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
     def test_run_with_video_captioning_with_model(self):
@@ -55,10 +54,6 @@ class HiTeATasksTest(unittest.TestCase, DemoCompatibilityCheck):
         result = pipeline_vqa(input)
         print(result)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_human_wholebody_keypoint.py b/tests/pipelines/test_human_wholebody_keypoint.py
index 7c5946cc..e0052f77 100644
--- a/tests/pipelines/test_human_wholebody_keypoint.py
+++ b/tests/pipelines/test_human_wholebody_keypoint.py
@@ -11,7 +11,7 @@ from modelscope.utils.test_utils import test_level
 
 class EasyCVFace2DKeypointsPipelineTest(unittest.TestCase):
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip('skip easycv related cases')
     def test_human_wholebody_keypoint(self):
         img_path = 'data/test/images/keypoints_detect/img_test_wholebody.jpg'
         model_id = 'damo/cv_hrnetw48_human-wholebody-keypoint_image'
diff --git a/tests/pipelines/test_image_body_reshaping.py b/tests/pipelines/test_image_body_reshaping.py
index e1955e94..5a0ec0e7 100644
--- a/tests/pipelines/test_image_body_reshaping.py
+++ b/tests/pipelines/test_image_body_reshaping.py
@@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageBodyReshapingTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageBodyReshapingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_body_reshaping
@@ -49,10 +48,6 @@ class ImageBodyReshapingTest(unittest.TestCase, DemoCompatibilityCheck):
         image_body_reshaping = pipeline(Tasks.image_body_reshaping)
         self.pipeline_inference(image_body_reshaping, self.test_image)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_color_enhance.py b/tests/pipelines/test_image_color_enhance.py
index 7c3ae8c0..5e222776 100644
--- a/tests/pipelines/test_image_color_enhance.py
+++ b/tests/pipelines/test_image_color_enhance.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageColorEnhanceTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_csrnet_image-color-enhance-models'
@@ -37,10 +36,6 @@ class ImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck):
         self.pipeline_inference(img_color_enhance,
                                 'data/test/images/image_color_enhance.png')
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_colorization.py b/tests/pipelines/test_image_colorization.py
index 547fce89..15ea314a 100644
--- a/tests/pipelines/test_image_colorization.py
+++ b/tests/pipelines/test_image_colorization.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageColorizationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageColorizationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_unet_image-colorization'
@@ -37,10 +36,6 @@ class ImageColorizationTest(unittest.TestCase, DemoCompatibilityCheck):
         image_colorization = pipeline(Tasks.image_colorization)
         self.pipeline_inference(image_colorization, self.test_image)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_debanding.py b/tests/pipelines/test_image_debanding.py
index 105d1f45..da784596 100644
--- a/tests/pipelines/test_image_debanding.py
+++ b/tests/pipelines/test_image_debanding.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageColorEnhanceTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_rrdb_image-debanding'
@@ -36,10 +35,6 @@ class ImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck):
         self.pipeline_inference(img_debanding,
                                 'data/test/images/image_debanding.png')
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_deblur.py b/tests/pipelines/test_image_deblur.py
index fc9d0101..529ae96c 100644
--- a/tests/pipelines/test_image_deblur.py
+++ b/tests/pipelines/test_image_deblur.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.cv import ImageDeblurPipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageDenoiseTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageDenoiseTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_deblurring
@@ -56,10 +55,6 @@ class ImageDenoiseTest(unittest.TestCase, DemoCompatibilityCheck):
         h, w = deblur_img.shape[:2]
         print('pipeline: the shape of output_img is {}x{}'.format(h, w))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_defrcn_fewshot.py b/tests/pipelines/test_image_defrcn_fewshot.py
index d2ecde13..1771d7b8 100644
--- a/tests/pipelines/test_image_defrcn_fewshot.py
+++ b/tests/pipelines/test_image_defrcn_fewshot.py
@@ -8,14 +8,13 @@ from modelscope.models import Model
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.logger import get_logger
 from modelscope.utils.test_utils import test_level
 
 logger = get_logger()
 
 
-class ImageDefrcnFewShotTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageDefrcnFewShotTest(unittest.TestCase):
 
     def setUp(self) -> None:
         logger.info('start install detectron2-0.3')
@@ -58,10 +57,6 @@ class ImageDefrcnFewShotTest(unittest.TestCase, DemoCompatibilityCheck):
             self.task, model=cache_path, model_revision=self.revision)
         print(pipeline_defrcn(input=self.image)[OutputKeys.LABELS])
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_denoise.py b/tests/pipelines/test_image_denoise.py
index d95dd343..891e703e 100644
--- a/tests/pipelines/test_image_denoise.py
+++ b/tests/pipelines/test_image_denoise.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.cv import ImageDenoisePipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageDenoiseTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageDenoiseTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_denoising
@@ -56,10 +55,6 @@ class ImageDenoiseTest(unittest.TestCase, DemoCompatibilityCheck):
         h, w = denoise_img.shape[:2]
         print('pipeline: the shape of output_img is {}x{}'.format(h, w))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_depth_estimation.py b/tests/pipelines/test_image_depth_estimation.py
index 6ec16a64..7f9b3bb9 100644
--- a/tests/pipelines/test_image_depth_estimation.py
+++ b/tests/pipelines/test_image_depth_estimation.py
@@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import depth_to_color
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageDepthEstimationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageDepthEstimationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = 'image-depth-estimation'
diff --git a/tests/pipelines/test_image_depth_estimation_bts.py b/tests/pipelines/test_image_depth_estimation_bts.py
index bda7a41f..e952da30 100644
--- a/tests/pipelines/test_image_depth_estimation_bts.py
+++ b/tests/pipelines/test_image_depth_estimation_bts.py
@@ -8,11 +8,10 @@ from modelscope.models import Model
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageDepthEstimationBtsTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageDepthEstimationBtsTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_depth_estimation
@@ -45,10 +44,6 @@ class ImageDepthEstimationBtsTest(unittest.TestCase, DemoCompatibilityCheck):
         cv2.imwrite('result_snapshot.jpg', depth_vis)
         print('Test run with snapshot ok.')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_driving_perception.py b/tests/pipelines/test_image_driving_perception.py
index 2f28b7d3..a6ad902d 100644
--- a/tests/pipelines/test_image_driving_perception.py
+++ b/tests/pipelines/test_image_driving_perception.py
@@ -17,11 +17,10 @@ from modelscope.preprocessors.image import LoadImage
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import \
     show_image_driving_perception_result
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageDrivingPerceptionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageDrivingPerceptionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_yolopv2_image-driving-perception_bdd100k'
@@ -59,10 +58,6 @@ class ImageDrivingPerceptionTest(unittest.TestCase, DemoCompatibilityCheck):
         self.pipeline_inference(image_driving_perception_pipeline,
                                 self.img_path)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_face_fusion.py b/tests/pipelines/test_image_face_fusion.py
index fde15edf..54d2c3a4 100644
--- a/tests/pipelines/test_image_face_fusion.py
+++ b/tests/pipelines/test_image_face_fusion.py
@@ -7,11 +7,10 @@ from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageFaceFusionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageFaceFusionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_face_fusion
@@ -50,10 +49,6 @@ class ImageFaceFusionTest(unittest.TestCase, DemoCompatibilityCheck):
         cv2.imwrite('result_facefusion.png', result[OutputKeys.OUTPUT_IMG])
         print('facefusion.test_run_modelhub_default_model done')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_human_parsing.py b/tests/pipelines/test_image_human_parsing.py
index 77d75862..f9263ea8 100644
--- a/tests/pipelines/test_image_human_parsing.py
+++ b/tests/pipelines/test_image_human_parsing.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageHumanParsingTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageHumanParsingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_segmentation
@@ -39,10 +38,6 @@ class ImageHumanParsingTest(unittest.TestCase, DemoCompatibilityCheck):
             task=Tasks.image_segmentation, model=model, preprocessor=None)
         print(pipeline_parsing(input=self.image_multiple)[OutputKeys.LABELS])
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_inpainting_sdv2.py b/tests/pipelines/test_image_inpainting_sdv2.py
index 81002ce8..b21ac69d 100644
--- a/tests/pipelines/test_image_inpainting_sdv2.py
+++ b/tests/pipelines/test_image_inpainting_sdv2.py
@@ -10,11 +10,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.cv import ImageInpaintingSDV2Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageInpaintingSDV2Test(unittest.TestCase, DemoCompatibilityCheck):
+class ImageInpaintingSDV2Test(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_inpainting
@@ -50,10 +49,6 @@ class ImageInpaintingSDV2Test(unittest.TestCase, DemoCompatibilityCheck):
         print(
             'pipeline: the output image path is {}'.format(output_image_path))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_instance_segmentation.py b/tests/pipelines/test_image_instance_segmentation.py
index 2ba0724a..c305a7c0 100644
--- a/tests/pipelines/test_image_instance_segmentation.py
+++ b/tests/pipelines/test_image_instance_segmentation.py
@@ -12,11 +12,10 @@ from modelscope.pipelines.cv import ImageInstanceSegmentationPipeline
 from modelscope.preprocessors import build_preprocessor
 from modelscope.utils.config import Config
 from modelscope.utils.constant import Fields, ModelFile, Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageInstanceSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageInstanceSegmentationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_segmentation
@@ -61,10 +60,6 @@ class ImageInstanceSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
         print(f'pipeline1:{pipeline1(input=self.image)[OutputKeys.LABELS]}')
         print(f'pipeline2: {pipeline2(input=self.image)[OutputKeys.LABELS]}')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_layout_estimation.py b/tests/pipelines/test_image_layout_estimation.py
index b312e8c2..4c93fa30 100644
--- a/tests/pipelines/test_image_layout_estimation.py
+++ b/tests/pipelines/test_image_layout_estimation.py
@@ -7,11 +7,10 @@ import cv2
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageLayoutEstimationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageLayoutEstimationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.indoor_layout_estimation
diff --git a/tests/pipelines/test_image_matching.py b/tests/pipelines/test_image_matching.py
index 55fd56df..6007ea31 100644
--- a/tests/pipelines/test_image_matching.py
+++ b/tests/pipelines/test_image_matching.py
@@ -11,11 +11,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import match_pair_visualization
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageMatchingTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageMatchingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = 'image-matching'
diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py
index a3edb705..d6d87a0c 100644
--- a/tests/pipelines/test_image_matting.py
+++ b/tests/pipelines/test_image_matting.py
@@ -8,11 +8,10 @@ from modelscope.msdatasets import MsDataset
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import ModelFile, Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageMattingTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageMattingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_unet_image-matting'
@@ -61,10 +60,6 @@ class ImageMattingTest(unittest.TestCase, DemoCompatibilityCheck):
             f'Output written to dir: {osp.dirname(osp.abspath("result_0.png"))}'
         )
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_mvs_depth_estimation.py b/tests/pipelines/test_image_mvs_depth_estimation.py
index a7e327e3..b158623b 100644
--- a/tests/pipelines/test_image_mvs_depth_estimation.py
+++ b/tests/pipelines/test_image_mvs_depth_estimation.py
@@ -6,11 +6,10 @@ from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageMVSDepthEstimationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageMVSDepthEstimationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = 'image-multi-view-depth-estimation'
diff --git a/tests/pipelines/test_image_open_vocabulary_detection.py b/tests/pipelines/test_image_open_vocabulary_detection.py
index 52dc1d11..923e1efe 100644
--- a/tests/pipelines/test_image_open_vocabulary_detection.py
+++ b/tests/pipelines/test_image_open_vocabulary_detection.py
@@ -10,15 +10,13 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import draw_box
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.logger import get_logger
 from modelscope.utils.test_utils import test_level
 
 logger = get_logger()
 
 
-class ImageOpenVocabularyDetectionTest(unittest.TestCase,
-                                       DemoCompatibilityCheck):
+class ImageOpenVocabularyDetectionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         os.system(
@@ -74,10 +72,6 @@ class ImageOpenVocabularyDetectionTest(unittest.TestCase,
         cv2.imwrite('result_snapshot.jpg', image)
         print('Test run with snapshot ok.')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_panoptic_segmentation.py b/tests/pipelines/test_image_panoptic_segmentation.py
index 4f12e6af..38c66055 100644
--- a/tests/pipelines/test_image_panoptic_segmentation.py
+++ b/tests/pipelines/test_image_panoptic_segmentation.py
@@ -9,17 +9,16 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import panoptic_seg_masks_to_image
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImagePanopticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImagePanopticSegmentationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_segmentation
         self.model_id = 'damo/cv_swinL_panoptic-segmentation_cocopan'
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip('skip test in current test level: no pipeline implemented')
     def test_image_panoptic_segmentation(self):
         input_location = 'data/test/images/image_panoptic_segmentation.jpg'
         pan_segmentor = pipeline(Tasks.image_segmentation, model=self.model_id)
@@ -29,7 +28,7 @@ class ImagePanopticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
         cv2.imwrite('result.jpg', draw_img)
         print('print test_image_panoptic_segmentation return success')
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip('skip test in current test level: no pipeline implemented')
     def test_image_panoptic_segmentation_from_PIL(self):
         input_location = 'data/test/images/image_panoptic_segmentation.jpg'
         pan_segmentor = pipeline(Tasks.image_segmentation, model=self.model_id)
@@ -40,10 +39,6 @@ class ImagePanopticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
         cv2.imwrite('result.jpg', draw_img)
         print('print test_image_panoptic_segmentation from PIL return success')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_portrait_enhancement.py b/tests/pipelines/test_image_portrait_enhancement.py
index f0814c07..43978fd2 100644
--- a/tests/pipelines/test_image_portrait_enhancement.py
+++ b/tests/pipelines/test_image_portrait_enhancement.py
@@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImagePortraitEnhancementTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImagePortraitEnhancementTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_portrait_enhancement
@@ -46,10 +45,6 @@ class ImagePortraitEnhancementTest(unittest.TestCase, DemoCompatibilityCheck):
         face_enhancement = pipeline(Tasks.image_portrait_enhancement)
         self.pipeline_inference(face_enhancement, self.test_image)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_quality_assessment_degradation.py b/tests/pipelines/test_image_quality_assessment_degradation.py
index cb0f24c7..1acf8163 100644
--- a/tests/pipelines/test_image_quality_assessment_degradation.py
+++ b/tests/pipelines/test_image_quality_assessment_degradation.py
@@ -9,14 +9,12 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.cv import ImageQualityAssessmentDegradationPipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 sys.path.insert(0, '.')
 
 
-class ImageQualityAssessmentDegradationTest(unittest.TestCase,
-                                            DemoCompatibilityCheck):
+class ImageQualityAssessmentDegradationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_quality_assessment_degradation
@@ -54,10 +52,6 @@ class ImageQualityAssessmentDegradationTest(unittest.TestCase,
         out_path = pipeline_ins(input=self.test_img)[OutputKeys.SCORES]
         print('pipeline: the out_path is {}'.format(out_path))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_quality_assessment_man.py b/tests/pipelines/test_image_quality_assessment_man.py
index 2668d45d..f36f8b3c 100644
--- a/tests/pipelines/test_image_quality_assessment_man.py
+++ b/tests/pipelines/test_image_quality_assessment_man.py
@@ -7,11 +7,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.cv import ImageQualityAssessmentMANPipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageQualityAssessmentMANTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageQualityAssessmentMANTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_quality_assessment_mos
@@ -47,10 +46,6 @@ class ImageQualityAssessmentMANTest(unittest.TestCase, DemoCompatibilityCheck):
         out_path = pipeline_ins(input=self.test_img)[OutputKeys.SCORE]
         print('pipeline: the out_path is {}'.format(out_path))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_quality_assessment_mos.py b/tests/pipelines/test_image_quality_assessment_mos.py
index 608be8f8..3ca26b0a 100644
--- a/tests/pipelines/test_image_quality_assessment_mos.py
+++ b/tests/pipelines/test_image_quality_assessment_mos.py
@@ -7,11 +7,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.cv import ImageQualityAssessmentMosPipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageQualityAssessmentMosTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageQualityAssessmentMosTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_quality_assessment_mos
@@ -47,10 +46,6 @@ class ImageQualityAssessmentMosTest(unittest.TestCase, DemoCompatibilityCheck):
         out_path = pipeline_ins(input=self.test_img)[OutputKeys.SCORE]
         print('pipeline: the out_path is {}'.format(out_path))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_reid_person.py b/tests/pipelines/test_image_reid_person.py
index 310cdd66..e107d5ee 100644
--- a/tests/pipelines/test_image_reid_person.py
+++ b/tests/pipelines/test_image_reid_person.py
@@ -6,11 +6,10 @@ from PIL import Image
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageReidPersonTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageReidPersonTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.input_location = 'data/test/images/image_reid_person.jpg'
@@ -50,10 +49,6 @@ class ImageReidPersonTest(unittest.TestCase, DemoCompatibilityCheck):
         )
         print(f'The img embedding is: {result[OutputKeys.IMG_EMBEDDING]}')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_restoration.py b/tests/pipelines/test_image_restoration.py
index baffa7d5..b9c600b2 100644
--- a/tests/pipelines/test_image_restoration.py
+++ b/tests/pipelines/test_image_restoration.py
@@ -4,11 +4,10 @@ import unittest
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageRestorationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageRestorationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_demoireing
@@ -24,10 +23,6 @@ class ImageRestorationTest(unittest.TestCase, DemoCompatibilityCheck):
         Image.fromarray(result[OutputKeys.OUTPUT_IMG]).save(input_location
                                                             + '_demoire.jpg')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_semantic_segmentation.py b/tests/pipelines/test_image_semantic_segmentation.py
index 2e8d7522..5bc89bd1 100644
--- a/tests/pipelines/test_image_semantic_segmentation.py
+++ b/tests/pipelines/test_image_semantic_segmentation.py
@@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import semantic_seg_masks_to_image
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageSemanticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageSemanticSegmentationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = 'image-segmentation'
@@ -54,10 +53,6 @@ class ImageSemanticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
         cv2.imwrite('result.jpg', draw_img)
         print('test_image_semantic_segmentation_vitadapter_from_PIL DONE')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_structured_model_probing.py b/tests/pipelines/test_image_structured_model_probing.py
index 1befcf98..f4d46d92 100644
--- a/tests/pipelines/test_image_structured_model_probing.py
+++ b/tests/pipelines/test_image_structured_model_probing.py
@@ -4,12 +4,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageStructuredModelProbingTest(unittest.TestCase,
-                                      DemoCompatibilityCheck):
+class ImageStructuredModelProbingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_classification
diff --git a/tests/pipelines/test_image_style_transfer.py b/tests/pipelines/test_image_style_transfer.py
index 5f37f204..223ec757 100644
--- a/tests/pipelines/test_image_style_transfer.py
+++ b/tests/pipelines/test_image_style_transfer.py
@@ -7,11 +7,10 @@ from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageStyleTransferTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageStyleTransferTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_style_transfer
@@ -53,10 +52,6 @@ class ImageStyleTransferTest(unittest.TestCase, DemoCompatibilityCheck):
         cv2.imwrite('result_styletransfer3.png', result[OutputKeys.OUTPUT_IMG])
         print('style_transfer.test_run_modelhub_default_model done')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_image_super_resolution.py b/tests/pipelines/test_image_super_resolution.py
index d5cbebe8..45066300 100644
--- a/tests/pipelines/test_image_super_resolution.py
+++ b/tests/pipelines/test_image_super_resolution.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageSuperResolutionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageSuperResolutionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_rrdb_image-super-resolution'
@@ -37,10 +36,6 @@ class ImageSuperResolutionTest(unittest.TestCase, DemoCompatibilityCheck):
         super_resolution = pipeline(Tasks.image_super_resolution)
         self.pipeline_inference(super_resolution, self.img)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_interactive_translation_pipeline.py b/tests/pipelines/test_interactive_translation_pipeline.py
index b973250a..c240ba70 100644
--- a/tests/pipelines/test_interactive_translation_pipeline.py
+++ b/tests/pipelines/test_interactive_translation_pipeline.py
@@ -3,11 +3,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class InteractiveTranslationTest(unittest.TestCase, DemoCompatibilityCheck):
+class InteractiveTranslationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.translation
@@ -28,10 +27,6 @@ class InteractiveTranslationTest(unittest.TestCase, DemoCompatibilityCheck):
         pipeline_ins = pipeline(self.task, model=model_id)
         print(pipeline_ins(inputs + '<PREFIX_SPLIT>' + prefix))
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_inverse_text_processing.py b/tests/pipelines/test_inverse_text_processing.py
index dc7fb1e0..a1d5a712 100644
--- a/tests/pipelines/test_inverse_text_processing.py
+++ b/tests/pipelines/test_inverse_text_processing.py
@@ -3,11 +3,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class InverseTextProcessingTest(unittest.TestCase, DemoCompatibilityCheck):
+class InverseTextProcessingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.inverse_text_processing,
@@ -61,10 +60,6 @@ class InverseTextProcessingTest(unittest.TestCase, DemoCompatibilityCheck):
             itn_result = itn_inference_pipline(text_in=lang_text_in)
             print(itn_result)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_key_word_spotting.py b/tests/pipelines/test_key_word_spotting.py
index 13f7a308..f22bc845 100644
--- a/tests/pipelines/test_key_word_spotting.py
+++ b/tests/pipelines/test_key_word_spotting.py
@@ -10,7 +10,6 @@ import soundfile
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import ColorCodes, Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.logger import get_logger
 from modelscope.utils.test_utils import download_and_untar, test_level
 
@@ -27,7 +26,7 @@ NEG_TESTSETS_FILE = 'neg_testsets.tar.gz'
 NEG_TESTSETS_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/KWS/neg_testsets.tar.gz'
 
 
-class KeyWordSpottingTest(unittest.TestCase, DemoCompatibilityCheck):
+class KeyWordSpottingTest(unittest.TestCase):
     action_info = {
         'test_run_with_wav': {
             'checking_item': [OutputKeys.KWS_LIST, 0, 'keyword'],
@@ -344,10 +343,6 @@ class KeyWordSpottingTest(unittest.TestCase, DemoCompatibilityCheck):
                 model_id=model_id, audio_in=wav_path, keywords=keywords)
             logger.info(ColorCodes.YELLOW + str(kws_result) + ColorCodes.END)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_key_word_spotting_farfield.py b/tests/pipelines/test_key_word_spotting_farfield.py
index e736f48b..3193149c 100644
--- a/tests/pipelines/test_key_word_spotting_farfield.py
+++ b/tests/pipelines/test_key_word_spotting_farfield.py
@@ -19,6 +19,7 @@ class KWSFarfieldTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/speech_dfsmn_kws_char_farfield_16k_nihaomiya'
+        self.model_id_iot = 'damo/speech_dfsmn_kws_char_farfield_iot_16k_nihaomiya'
         if os.path.isfile(OUTPUT_WAV):
             os.remove(OUTPUT_WAV)
 
@@ -29,6 +30,13 @@ class KWSFarfieldTest(unittest.TestCase):
         self.assertEqual(len(result['kws_list']), 5)
         print(result['kws_list'][-1])
 
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_normal_iot(self):
+        kws = pipeline(Tasks.keyword_spotting, model=self.model_id_iot)
+        result = kws(os.path.join(os.getcwd(), TEST_SPEECH_FILE))
+        self.assertEqual(len(result['kws_list']), 5)
+        print(result['kws_list'][-1])
+
     @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
     def test_output(self):
         kws = pipeline(Tasks.keyword_spotting, model=self.model_id)
diff --git a/tests/pipelines/test_language_guided_video_summarization.py b/tests/pipelines/test_language_guided_video_summarization.py
index 0f06d4f2..01d88b55 100755
--- a/tests/pipelines/test_language_guided_video_summarization.py
+++ b/tests/pipelines/test_language_guided_video_summarization.py
@@ -9,12 +9,10 @@ import torch
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class LanguageGuidedVideoSummarizationTest(unittest.TestCase,
-                                           DemoCompatibilityCheck):
+class LanguageGuidedVideoSummarizationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.language_guided_video_summarization
@@ -40,10 +38,6 @@ class LanguageGuidedVideoSummarizationTest(unittest.TestCase,
 
         print(f'video summarization output:\n {result}.')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_language_identification.py b/tests/pipelines/test_language_identification.py
index ddd91e69..ccfa1a7d 100644
--- a/tests/pipelines/test_language_identification.py
+++ b/tests/pipelines/test_language_identification.py
@@ -3,11 +3,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class LanguageIdentificationTest(unittest.TestCase, DemoCompatibilityCheck):
+class LanguageIdentificationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.text_classification
@@ -22,11 +21,6 @@ class LanguageIdentificationTest(unittest.TestCase, DemoCompatibilityCheck):
         pipeline_ins = pipeline(self.task, model=self.model_id)
         print(pipeline_ins(input=inputs))
 
-    @unittest.skipUnless(test_level() >= 0,
-                         'skip test case in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_license_plate_detection.py b/tests/pipelines/test_license_plate_detection.py
index 70cdb820..3c30618d 100644
--- a/tests/pipelines/test_license_plate_detection.py
+++ b/tests/pipelines/test_license_plate_detection.py
@@ -5,11 +5,10 @@ import unittest
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class LicensePlateDectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class LicensePlateDectionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_resnet18_license-plate-detection_damo'
@@ -32,10 +31,6 @@ class LicensePlateDectionTest(unittest.TestCase, DemoCompatibilityCheck):
         license_plate_detection = pipeline(Tasks.license_plate_detection)
         self.pipeline_inference(license_plate_detection, self.test_image)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_lineless_table_recognition.py b/tests/pipelines/test_lineless_table_recognition.py
index 53fde8a1..59e173f4 100644
--- a/tests/pipelines/test_lineless_table_recognition.py
+++ b/tests/pipelines/test_lineless_table_recognition.py
@@ -8,11 +8,10 @@ import numpy as np
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class TableRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class TableRecognitionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_resnet-transformer_table-structure-recognition_lore'
@@ -35,10 +34,6 @@ class TableRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
         lineless_table_recognition = pipeline(Tasks.lineless_table_recognition)
         self.pipeline_inference(lineless_table_recognition, self.test_image)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_live_category.py b/tests/pipelines/test_live_category.py
index 391ed283..88b94b69 100644
--- a/tests/pipelines/test_live_category.py
+++ b/tests/pipelines/test_live_category.py
@@ -3,11 +3,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class LiveCategoryTest(unittest.TestCase, DemoCompatibilityCheck):
+class LiveCategoryTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.live_category
@@ -21,10 +20,6 @@ class LiveCategoryTest(unittest.TestCase, DemoCompatibilityCheck):
 
         print(f'live category output: {result}.')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_mask_face_recognition.py b/tests/pipelines/test_mask_face_recognition.py
index 550e80e4..2a7e8ede 100644
--- a/tests/pipelines/test_mask_face_recognition.py
+++ b/tests/pipelines/test_mask_face_recognition.py
@@ -6,11 +6,10 @@ import numpy as np
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class MaskFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class MaskFaceRecognitionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.face_recognition
@@ -28,10 +27,6 @@ class MaskFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
         sim = np.dot(emb1[0], emb2[0])
         print(f'Cos similarity={sim:.3f}, img1:{img1}  img2:{img2}')
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_maskdino_instance_segmentation.py b/tests/pipelines/test_maskdino_instance_segmentation.py
index 14e0887d..88c46de1 100644
--- a/tests/pipelines/test_maskdino_instance_segmentation.py
+++ b/tests/pipelines/test_maskdino_instance_segmentation.py
@@ -8,12 +8,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.cv import MaskDINOInstanceSegmentationPipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class MaskDINOInstanceSegmentationTest(unittest.TestCase,
-                                       DemoCompatibilityCheck):
+class MaskDINOInstanceSegmentationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_segmentation
@@ -45,10 +43,6 @@ class MaskDINOInstanceSegmentationTest(unittest.TestCase,
         print(f'pipeline1:{pipeline1(input=self.image)[OutputKeys.LABELS]}')
         print(f'pipeline2: {pipeline2(input=self.image)[OutputKeys.LABELS]}')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_mglm_text_summarization.py b/tests/pipelines/test_mglm_text_summarization.py
index 47abc741..703e9bbe 100644
--- a/tests/pipelines/test_mglm_text_summarization.py
+++ b/tests/pipelines/test_mglm_text_summarization.py
@@ -6,11 +6,10 @@ from modelscope.models import Model
 from modelscope.pipelines import pipeline
 from modelscope.preprocessors import MGLMSummarizationPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class mGLMTest(unittest.TestCase, DemoCompatibilityCheck):
+class mGLMTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.output_dir = 'unittest_output'
diff --git a/tests/pipelines/test_mobile_image_super_resolution.py b/tests/pipelines/test_mobile_image_super_resolution.py
index 2cc7adf0..a486d244 100644
--- a/tests/pipelines/test_mobile_image_super_resolution.py
+++ b/tests/pipelines/test_mobile_image_super_resolution.py
@@ -8,12 +8,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class MobileImageSuperResolutionTest(unittest.TestCase,
-                                     DemoCompatibilityCheck):
+class MobileImageSuperResolutionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_ecbsr_image-super-resolution_mobile'
@@ -38,10 +36,6 @@ class MobileImageSuperResolutionTest(unittest.TestCase,
         super_resolution = pipeline(Tasks.image_super_resolution)
         self.pipeline_inference(super_resolution, self.img)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_motion_generation.py b/tests/pipelines/test_motion_generation.py
index 7938611c..43903eb8 100644
--- a/tests/pipelines/test_motion_generation.py
+++ b/tests/pipelines/test_motion_generation.py
@@ -4,11 +4,10 @@ import unittest
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class MDMMotionGenerationTest(unittest.TestCase, DemoCompatibilityCheck):
+class MDMMotionGenerationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.motion_generation
@@ -23,10 +22,6 @@ class MDMMotionGenerationTest(unittest.TestCase, DemoCompatibilityCheck):
               result[OutputKeys.KEYPOINTS].shape)
         print('motion generation video file:', result[OutputKeys.OUTPUT_VIDEO])
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_movie_scene_segmentation.py b/tests/pipelines/test_movie_scene_segmentation.py
index 0ac8b716..c6498a6c 100644
--- a/tests/pipelines/test_movie_scene_segmentation.py
+++ b/tests/pipelines/test_movie_scene_segmentation.py
@@ -10,11 +10,10 @@ from modelscope.pipelines import pipeline
 from modelscope.trainers import build_trainer
 from modelscope.utils.config import Config, ConfigDict
 from modelscope.utils.constant import ModelFile, Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class MovieSceneSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class MovieSceneSegmentationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.movie_scene_segmentation
@@ -123,10 +122,6 @@ class MovieSceneSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
         else:
             raise ValueError('process error')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_mplug_owl_multimodal_dialogue.py b/tests/pipelines/test_mplug_owl_multimodal_dialogue.py
new file mode 100644
index 00000000..57bce67e
--- /dev/null
+++ b/tests/pipelines/test_mplug_owl_multimodal_dialogue.py
@@ -0,0 +1,100 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+from PIL import Image
+
+from modelscope.models import Model
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class MplugOwlMultimodalDialogueTest(unittest.TestCase):
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_run_with_multimodal_dialogue_with_model(self):
+        model = Model.from_pretrained(
+            'damo/multi-modal_mplug_owl_multimodal-dialogue_7b')
+        pipeline_multimodal_dialogue = pipeline(
+            task=Tasks.multimodal_dialogue,
+            model=model,
+        )
+        image = 'data/resource/portrait_input.png'
+        system_prompt_1 = 'The following is a conversation between a curious human and AI assistant.'
+        system_prompt_2 = "The assistant gives helpful, detailed, and polite answers to the user's questions."
+        messages = {
+            'messages': [
+                {
+                    'role': 'system',
+                    'content': system_prompt_1 + ' ' + system_prompt_2
+                },
+                {
+                    'role': 'user',
+                    'content': [{
+                        'image': image
+                    }]
+                },
+                {
+                    'role': 'user',
+                    'content': 'Describe the facial expression of the man.'
+                },
+            ]
+        }
+        result = pipeline_multimodal_dialogue(messages)
+        print(result[OutputKeys.TEXT])
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_multimodal_dialogue_with_name(self):
+        pipeline_multimodal_dialogue = pipeline(
+            Tasks.multimodal_dialogue,
+            model='damo/multi-modal_mplug_owl_multimodal-dialogue_7b')
+        image = 'data/resource/portrait_input.png'
+        system_prompt_1 = 'The following is a conversation between a curious human and AI assistant.'
+        system_prompt_2 = "The assistant gives helpful, detailed, and polite answers to the user's questions."
+        messages = {
+            'messages': [
+                {
+                    'role': 'system',
+                    'content': system_prompt_1 + ' ' + system_prompt_2
+                },
+                {
+                    'role': 'user',
+                    'content': [{
+                        'image': image
+                    }]
+                },
+                {
+                    'role': 'user',
+                    'content': 'Describe the facial expression of the man.'
+                },
+            ]
+        }
+        result = pipeline_multimodal_dialogue(messages)
+        print(result[OutputKeys.TEXT])
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_multimodal_dialogue_with_text(self):
+        pipeline_multimodal_dialogue = pipeline(
+            Tasks.multimodal_dialogue,
+            model='damo/multi-modal_mplug_owl_multimodal-dialogue_7b')
+        system_prompt_1 = 'The following is a conversation between a curious human and AI assistant.'
+        system_prompt_2 = "The assistant gives helpful, detailed, and polite answers to the user's questions."
+        messages = {
+            'messages': [
+                {
+                    'role': 'system',
+                    'content': system_prompt_1 + ' ' + system_prompt_2
+                },
+                {
+                    'role': 'user',
+                    'content': 'Where is the captial of China?'
+                },
+            ]
+        }
+        result = pipeline_multimodal_dialogue(messages)
+        print(result[OutputKeys.TEXT])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_mplug_tasks.py b/tests/pipelines/test_mplug_tasks.py
index 21439ce2..cff998b4 100644
--- a/tests/pipelines/test_mplug_tasks.py
+++ b/tests/pipelines/test_mplug_tasks.py
@@ -7,11 +7,10 @@ from modelscope.models import Model
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck):
+class MplugTasksTest(unittest.TestCase):
 
     @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
     def test_run_with_image_captioning_with_model(self):
@@ -95,10 +94,6 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck):
         result = pipeline_vqa(input)
         print(result)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_msrresnetlite_video_super_resolution.py b/tests/pipelines/test_msrresnetlite_video_super_resolution.py
index d79e9702..d44cbd34 100644
--- a/tests/pipelines/test_msrresnetlite_video_super_resolution.py
+++ b/tests/pipelines/test_msrresnetlite_video_super_resolution.py
@@ -7,11 +7,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.cv import VideoSuperResolutionPipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class MSRResNetLiteVSRTest(unittest.TestCase, DemoCompatibilityCheck):
+class MSRResNetLiteVSRTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.video_super_resolution
@@ -50,10 +49,6 @@ class MSRResNetLiteVSRTest(unittest.TestCase, DemoCompatibilityCheck):
             input=self.test_video)[OutputKeys.OUTPUT_VIDEO]
         print('pipeline: the output video path is {}'.format(out_video_path))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_multi_modal_embedding.py b/tests/pipelines/test_multi_modal_embedding.py
index 7eddc690..486adc94 100644
--- a/tests/pipelines/test_multi_modal_embedding.py
+++ b/tests/pipelines/test_multi_modal_embedding.py
@@ -8,11 +8,10 @@ from modelscope.models import Model
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class MultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
+class MultiModalEmbeddingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.multi_modal_embedding
@@ -54,10 +53,6 @@ class MultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
         print('l2-norm: {}'.format(torch.norm(text_embedding,
                                               dim=-1).item()))  # should be 1.0
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_multilingual_named_entity_recognition.py b/tests/pipelines/test_multilingual_named_entity_recognition.py
index ec134023..a31adf1f 100644
--- a/tests/pipelines/test_multilingual_named_entity_recognition.py
+++ b/tests/pipelines/test_multilingual_named_entity_recognition.py
@@ -8,12 +8,10 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import NamedEntityRecognitionPipeline
 from modelscope.preprocessors import NERPreprocessorThai, NERPreprocessorViet
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class MultilingualNamedEntityRecognitionTest(unittest.TestCase,
-                                             DemoCompatibilityCheck):
+class MultilingualNamedEntityRecognitionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.named_entity_recognition
@@ -123,10 +121,6 @@ class MultilingualNamedEntityRecognitionTest(unittest.TestCase,
                 self.viet_sentence[5:]
             ]))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_multilingual_word_segmentation.py b/tests/pipelines/test_multilingual_word_segmentation.py
index f10e6d98..878af0d3 100644
--- a/tests/pipelines/test_multilingual_word_segmentation.py
+++ b/tests/pipelines/test_multilingual_word_segmentation.py
@@ -8,12 +8,11 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import WordSegmentationThaiPipeline
 from modelscope.preprocessors import WordSegmentationPreprocessorThai
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.regress_test_utils import MsRegressTool
 from modelscope.utils.test_utils import test_level
 
 
-class WordSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class WordSegmentationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.word_segmentation
@@ -65,10 +64,6 @@ class WordSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
             pipeline_ins(
                 input=[self.sentence, self.sentence[:10], self.sentence[6:]]))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_named_entity_recognition.py b/tests/pipelines/test_named_entity_recognition.py
index 175e9261..8b7424f4 100644
--- a/tests/pipelines/test_named_entity_recognition.py
+++ b/tests/pipelines/test_named_entity_recognition.py
@@ -10,11 +10,10 @@ from modelscope.pipelines.nlp import NamedEntityRecognitionPipeline
 from modelscope.preprocessors import \
     TokenClassificationTransformersPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class NamedEntityRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class NamedEntityRecognitionTest(unittest.TestCase):
     language_examples = {
         'zh':
         '新华社北京二月十一日电（记者唐虹）',
@@ -470,10 +469,6 @@ class NamedEntityRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
                                         model_id)
                 print(pipeline_ins(input=sentence))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_nerf_recon_acc.py b/tests/pipelines/test_nerf_recon_acc.py
index 95d879fb..7ca0fa44 100644
--- a/tests/pipelines/test_nerf_recon_acc.py
+++ b/tests/pipelines/test_nerf_recon_acc.py
@@ -9,11 +9,10 @@ from modelscope.msdatasets import MsDataset
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import DownloadMode, Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class NeRFReconAccTest(unittest.TestCase, DemoCompatibilityCheck):
+class NeRFReconAccTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_nerf-3d-reconstruction-accelerate_damo'
@@ -63,11 +62,6 @@ class NeRFReconAccTest(unittest.TestCase, DemoCompatibilityCheck):
             dict(data_dir=self.data_dir, render_dir=self.render_dir))
         print('facefusion.test_run_modelhub_default_model done')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    @unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest only')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_nli.py b/tests/pipelines/test_nli.py
index a7d2a236..5bbe353b 100644
--- a/tests/pipelines/test_nli.py
+++ b/tests/pipelines/test_nli.py
@@ -7,12 +7,11 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import TextClassificationPipeline
 from modelscope.preprocessors import TextClassificationTransformersPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool
 from modelscope.utils.test_utils import test_level
 
 
-class NLITest(unittest.TestCase, DemoCompatibilityCheck):
+class NLITest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.nli
@@ -78,10 +77,6 @@ class NLITest(unittest.TestCase, DemoCompatibilityCheck):
         pipeline_ins = pipeline(task=Tasks.nli)
         print(pipeline_ins(input=(self.sentence1, self.sentence2)))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_object_detecion_3d.py b/tests/pipelines/test_object_detecion_3d.py
index bb0eebda..69b75b39 100644
--- a/tests/pipelines/test_object_detecion_3d.py
+++ b/tests/pipelines/test_object_detecion_3d.py
@@ -10,11 +10,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ObjectDetection3DTest(unittest.TestCase, DemoCompatibilityCheck):
+class ObjectDetection3DTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.object_detection_3d
@@ -48,10 +47,6 @@ class ObjectDetection3DTest(unittest.TestCase, DemoCompatibilityCheck):
         detect = pipeline(self.task)
         self.pipeline_inference(detect, idx)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_object_detection.py b/tests/pipelines/test_object_detection.py
index e4bf6b54..f06d954b 100644
--- a/tests/pipelines/test_object_detection.py
+++ b/tests/pipelines/test_object_detection.py
@@ -3,11 +3,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ObjectDetectionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.human_detection
@@ -43,12 +42,9 @@ class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
         result = human_detect(input_location)
         print(result)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
     def test_image_object_detection_auto_pipeline(self):
+        # TODO: to be fixed in the future
         model_id = 'damo/cv_yolox_image-object-detection-auto'
         test_image = 'data/test/images/auto_demo.jpg'
 
@@ -59,7 +55,7 @@ class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
         image_object_detection_auto.show_result(test_image, result,
                                                 'auto_demo_ret.jpg')
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip('skip test in current test level: no pipeline implemented')
     def test_image_object_detection_dino_pipeline(self):
         model_id = 'damo/cv_swinl_image-object-detection_dino'
         test_image = 'data/test/images/image_detection.jpg'
diff --git a/tests/pipelines/test_ocr_detection.py b/tests/pipelines/test_ocr_detection.py
index 243e274b..0ed2e59c 100644
--- a/tests/pipelines/test_ocr_detection.py
+++ b/tests/pipelines/test_ocr_detection.py
@@ -4,11 +4,10 @@ import unittest
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class OCRDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class OCRDetectionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_resnet18_ocr-detection-line-level_damo'
@@ -43,10 +42,6 @@ class OCRDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
         ocr_detection = pipeline(Tasks.ocr_detection)
         self.pipeline_inference(ocr_detection, self.test_image)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_ocr_recognition.py b/tests/pipelines/test_ocr_recognition.py
index 145ae22a..94ee521f 100644
--- a/tests/pipelines/test_ocr_recognition.py
+++ b/tests/pipelines/test_ocr_recognition.py
@@ -6,14 +6,13 @@ import PIL
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class OCRRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class OCRRecognitionTest(unittest.TestCase):
 
     def setUp(self) -> None:
-        self.model_id = 'damo/cv_crnn_ocr-recognition-general_damo'
+        self.model_id = 'damo/cv_convnextTiny_ocr-recognition-general_damo'
         self.test_image = 'data/test/images/ocr_recognition.jpg'
         self.task = Tasks.ocr_recognition
 
@@ -26,7 +25,47 @@ class OCRRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
         ocr_recognition = pipeline(
             Tasks.ocr_recognition,
             model=self.model_id,
-            model_revision='v2.2.1')
+            model_revision='v2.3.0')
+        self.pipeline_inference(ocr_recognition, self.test_image)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_model_from_modelhub_handwritten(self):
+        ocr_recognition = pipeline(
+            Tasks.ocr_recognition,
+            model='damo/cv_convnextTiny_ocr-recognition-handwritten_damo',
+            model_revision='v2.3.0')
+        self.pipeline_inference(ocr_recognition, self.test_image)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_model_from_modelhub_scene(self):
+        ocr_recognition = pipeline(
+            Tasks.ocr_recognition,
+            model='damo/cv_convnextTiny_ocr-recognition-scene_damo',
+            model_revision='v2.3.0')
+        self.pipeline_inference(ocr_recognition, self.test_image)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_model_from_modelhub_document(self):
+        ocr_recognition = pipeline(
+            Tasks.ocr_recognition,
+            model='damo/cv_convnextTiny_ocr-recognition-document_damo',
+            model_revision='v2.3.0')
+        self.pipeline_inference(ocr_recognition, self.test_image)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_model_from_modelhub_licenseplate(self):
+        ocr_recognition = pipeline(
+            Tasks.ocr_recognition,
+            model='damo/cv_convnextTiny_ocr-recognition-licenseplate_damo',
+            model_revision='v2.3.0')
+        self.pipeline_inference(ocr_recognition, self.test_image)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_model_from_modelhub_crnn(self):
+        ocr_recognition = pipeline(
+            Tasks.ocr_recognition,
+            model='damo/cv_crnn_ocr-recognition-general_damo',
+            model_revision='v2.2.2')
         self.pipeline_inference(ocr_recognition, self.test_image)
 
     @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@@ -34,7 +73,7 @@ class OCRRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
         ocr_recognition = pipeline(
             Tasks.ocr_recognition,
             model=self.model_id,
-            model_revision='v2.2.1')
+            model_revision='v2.3.0')
         imagePIL = PIL.Image.open(self.test_image)
         self.pipeline_inference(ocr_recognition, imagePIL)
 
@@ -44,9 +83,75 @@ class OCRRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
             Tasks.ocr_recognition, model_revision='v2.3.0')
         self.pipeline_inference(ocr_recognition, self.test_image)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_model_from_modelhub_cpu(self):
+        ocr_recognition = pipeline(
+            Tasks.ocr_recognition,
+            model=self.model_id,
+            model_revision='v2.3.0',
+            device='cpu')
+        self.pipeline_inference(ocr_recognition, self.test_image)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_model_from_modelhub_handwritten_cpu(self):
+        ocr_recognition = pipeline(
+            Tasks.ocr_recognition,
+            model='damo/cv_convnextTiny_ocr-recognition-handwritten_damo',
+            model_revision='v2.3.0',
+            device='cpu')
+        self.pipeline_inference(ocr_recognition, self.test_image)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_model_from_modelhub_scene_cpu(self):
+        ocr_recognition = pipeline(
+            Tasks.ocr_recognition,
+            model='damo/cv_convnextTiny_ocr-recognition-scene_damo',
+            model_revision='v2.3.0',
+            device='cpu')
+        self.pipeline_inference(ocr_recognition, self.test_image)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_model_from_modelhub_document_cpu(self):
+        ocr_recognition = pipeline(
+            Tasks.ocr_recognition,
+            model='damo/cv_convnextTiny_ocr-recognition-document_damo',
+            model_revision='v2.3.0',
+            device='cpu')
+        self.pipeline_inference(ocr_recognition, self.test_image)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_model_from_modelhub_licenseplate_cpu(self):
+        ocr_recognition = pipeline(
+            Tasks.ocr_recognition,
+            model='damo/cv_convnextTiny_ocr-recognition-licenseplate_damo',
+            model_revision='v2.3.0',
+            device='cpu')
+        self.pipeline_inference(ocr_recognition, self.test_image)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_model_from_modelhub_crnn_cpu(self):
+        ocr_recognition = pipeline(
+            Tasks.ocr_recognition,
+            model='damo/cv_crnn_ocr-recognition-general_damo',
+            model_revision='v2.2.2',
+            device='cpu')
+        self.pipeline_inference(ocr_recognition, self.test_image)
+
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_run_with_model_from_modelhub_PILinput_cpu(self):
+        ocr_recognition = pipeline(
+            Tasks.ocr_recognition,
+            model=self.model_id,
+            model_revision='v2.3.0',
+            device='cpu')
+        imagePIL = PIL.Image.open(self.test_image)
+        self.pipeline_inference(ocr_recognition, imagePIL)
+
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    def test_run_modelhub_default_model_cpu(self):
+        ocr_recognition = pipeline(
+            Tasks.ocr_recognition, model_revision='v2.3.0', device='cpu')
+        self.pipeline_inference(ocr_recognition, self.test_image)
 
 
 if __name__ == '__main__':
diff --git a/tests/pipelines/test_ofa_tasks.py b/tests/pipelines/test_ofa_tasks.py
index df1b5647..55c3ae65 100644
--- a/tests/pipelines/test_ofa_tasks.py
+++ b/tests/pipelines/test_ofa_tasks.py
@@ -11,11 +11,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import created_boxed_image
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck):
+class OfaTasksTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.output_dir = 'unittest_output'
@@ -366,10 +365,6 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck):
         for r in result:
             print(r[OutputKeys.TEXT])
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_panorama_depth_estimation.py b/tests/pipelines/test_panorama_depth_estimation.py
index 99e575e3..23552274 100644
--- a/tests/pipelines/test_panorama_depth_estimation.py
+++ b/tests/pipelines/test_panorama_depth_estimation.py
@@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import depth_to_color
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class PanoramaDepthEstimationTest(unittest.TestCase, DemoCompatibilityCheck):
+class PanoramaDepthEstimationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = 'panorama-depth-estimation'
diff --git a/tests/pipelines/test_pedestrian_attribute_recognition.py b/tests/pipelines/test_pedestrian_attribute_recognition.py
index c0ace43c..7d58ce12 100644
--- a/tests/pipelines/test_pedestrian_attribute_recognition.py
+++ b/tests/pipelines/test_pedestrian_attribute_recognition.py
@@ -8,12 +8,10 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import draw_pedestrian_attribute
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class PedestrianAttributeRecognitionTest(unittest.TestCase,
-                                         DemoCompatibilityCheck):
+class PedestrianAttributeRecognitionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.pedestrian_attribute_recognition
@@ -39,10 +37,6 @@ class PedestrianAttributeRecognitionTest(unittest.TestCase,
         self.pipeline_inference(pedestrian_attribute_recognition,
                                 Image.open(self.test_image))
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_person_image_cartoon.py b/tests/pipelines/test_person_image_cartoon.py
index 1dfaf519..40d26c86 100644
--- a/tests/pipelines/test_person_image_cartoon.py
+++ b/tests/pipelines/test_person_image_cartoon.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ImageCartoonTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageCartoonTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_unet_person-image-cartoon_compound-models'
@@ -83,10 +82,6 @@ class ImageCartoonTest(unittest.TestCase, DemoCompatibilityCheck):
         img_cartoon = pipeline(Tasks.image_portrait_stylization)
         self.pipeline_inference(img_cartoon, self.test_image)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_pointcloud_sceneflow_estimation.py b/tests/pipelines/test_pointcloud_sceneflow_estimation.py
index 34d87f09..4d4bf7f2 100644
--- a/tests/pipelines/test_pointcloud_sceneflow_estimation.py
+++ b/tests/pipelines/test_pointcloud_sceneflow_estimation.py
@@ -7,12 +7,10 @@ import numpy as np
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class PointCloudSceneFlowEstimationTest(unittest.TestCase,
-                                        DemoCompatibilityCheck):
+class PointCloudSceneFlowEstimationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = 'pointcloud-sceneflow-estimation'
diff --git a/tests/pipelines/test_product_retrieval_embedding.py b/tests/pipelines/test_product_retrieval_embedding.py
index 2483d53a..f194bb7b 100644
--- a/tests/pipelines/test_product_retrieval_embedding.py
+++ b/tests/pipelines/test_product_retrieval_embedding.py
@@ -8,11 +8,10 @@ from modelscope.models import Model
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ProductRetrievalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
+class ProductRetrievalEmbeddingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.product_retrieval_embedding
@@ -41,10 +40,6 @@ class ProductRetrievalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
         result = product_embed(self.img_input)[OutputKeys.IMG_EMBEDDING]
         print('abs sum value is: {}'.format(np.sum(np.abs(result))))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_realtime_object_detection.py b/tests/pipelines/test_realtime_object_detection.py
index 498c09d8..21450619 100644
--- a/tests/pipelines/test_realtime_object_detection.py
+++ b/tests/pipelines/test_realtime_object_detection.py
@@ -7,14 +7,13 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import realtime_object_detection_bbox_vis
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.logger import get_logger
 from modelscope.utils.test_utils import test_level
 
 logger = get_logger()
 
 
-class RealtimeObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class RealtimeObjectDetectionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.easycv_small_model_id = 'damo/cv_cspnet_image-object-detection_yolox'
@@ -22,7 +21,7 @@ class RealtimeObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
         self.test_image = 'data/test/images/keypoints_detect/000000438862.jpg'
         self.task = Tasks.image_object_detection
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip('skip test in current test level: no pipeline implemented')
     def test_run_easycv_yolox(self):
         realtime_object_detection = pipeline(
             Tasks.image_object_detection, model=self.easycv_small_model_id)
@@ -34,7 +33,7 @@ class RealtimeObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
         else:
             raise ValueError('process error')
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip('skip test in current test level: no pipeline implemented')
     def test_run_easycv_yolox_nano(self):
         realtime_object_detection = pipeline(
             Tasks.image_object_detection, model=self.easycv_nano_model_id)
@@ -46,10 +45,6 @@ class RealtimeObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
         else:
             raise ValueError('process error')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_realtime_video_object_detection.py b/tests/pipelines/test_realtime_video_object_detection.py
index 716c9260..d42bda67 100644
--- a/tests/pipelines/test_realtime_video_object_detection.py
+++ b/tests/pipelines/test_realtime_video_object_detection.py
@@ -9,15 +9,13 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import show_video_object_detection_result
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.logger import get_logger
 from modelscope.utils.test_utils import test_level
 
 logger = get_logger()
 
 
-class RealtimeVideoObjectDetectionTest(unittest.TestCase,
-                                       DemoCompatibilityCheck):
+class RealtimeVideoObjectDetectionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_cspnet_video-object-detection_streamyolo'
@@ -53,10 +51,6 @@ class RealtimeVideoObjectDetectionTest(unittest.TestCase,
         else:
             raise ValueError('process error')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_referring_video_object_segmentation.py b/tests/pipelines/test_referring_video_object_segmentation.py
index 509e9317..2b7de41c 100644
--- a/tests/pipelines/test_referring_video_object_segmentation.py
+++ b/tests/pipelines/test_referring_video_object_segmentation.py
@@ -3,12 +3,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ReferringVideoObjectSegmentationTest(unittest.TestCase,
-                                           DemoCompatibilityCheck):
+class ReferringVideoObjectSegmentationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.referring_video_object_segmentation
@@ -45,10 +43,6 @@ class ReferringVideoObjectSegmentationTest(unittest.TestCase,
         else:
             raise ValueError('process error')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_relation_extraction.py b/tests/pipelines/test_relation_extraction.py
index 17ab61fc..44c0b9ad 100644
--- a/tests/pipelines/test_relation_extraction.py
+++ b/tests/pipelines/test_relation_extraction.py
@@ -8,11 +8,10 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import InformationExtractionPipeline
 from modelscope.preprocessors import RelationExtractionTransformersPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class RelationExtractionTest(unittest.TestCase, DemoCompatibilityCheck):
+class RelationExtractionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.relation_extraction
@@ -55,10 +54,6 @@ class RelationExtractionTest(unittest.TestCase, DemoCompatibilityCheck):
         pipeline_ins = pipeline(task=Tasks.relation_extraction)
         print(pipeline_ins(input=self.sentence))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_salient_detection.py b/tests/pipelines/test_salient_detection.py
index 3101213c..78ae94db 100644
--- a/tests/pipelines/test_salient_detection.py
+++ b/tests/pipelines/test_salient_detection.py
@@ -4,11 +4,10 @@ import unittest
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class SalientDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class SalientDetectionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.semantic_segmentation
@@ -44,10 +43,6 @@ class SalientDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
         cv2.imwrite(input_location + '_camouflag.jpg',
                     result[OutputKeys.MASKS])
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_sentence_similarity.py b/tests/pipelines/test_sentence_similarity.py
index 233bd3a1..e411158f 100644
--- a/tests/pipelines/test_sentence_similarity.py
+++ b/tests/pipelines/test_sentence_similarity.py
@@ -11,12 +11,11 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import TextClassificationPipeline
 from modelscope.preprocessors import TextClassificationTransformersPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool
 from modelscope.utils.test_utils import test_level
 
 
-class SentenceSimilarityTest(unittest.TestCase, DemoCompatibilityCheck):
+class SentenceSimilarityTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.sentence_similarity
@@ -110,10 +109,6 @@ class SentenceSimilarityTest(unittest.TestCase, DemoCompatibilityCheck):
         pipeline_ins = pipeline(task=Tasks.sentence_similarity)
         print(pipeline_ins(input=(self.sentence1, self.sentence2)))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_sentiment_classification.py b/tests/pipelines/test_sentiment_classification.py
index 278f34a8..bb0311ff 100644
--- a/tests/pipelines/test_sentiment_classification.py
+++ b/tests/pipelines/test_sentiment_classification.py
@@ -9,12 +9,10 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import TextClassificationPipeline
 from modelscope.preprocessors import TextClassificationTransformersPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class SentimentClassificationTaskModelTest(unittest.TestCase,
-                                           DemoCompatibilityCheck):
+class SentimentClassificationTaskModelTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.text_classification
@@ -63,10 +61,6 @@ class SentimentClassificationTaskModelTest(unittest.TestCase,
         self.assertTrue(
             isinstance(pipeline_ins.model, ModelForTextClassification))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_siamese_uie.py b/tests/pipelines/test_siamese_uie.py
index 30b38d2e..c5008573 100644
--- a/tests/pipelines/test_siamese_uie.py
+++ b/tests/pipelines/test_siamese_uie.py
@@ -10,12 +10,11 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import SiameseUiePipeline
 from modelscope.preprocessors import SiameseUiePreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool
 from modelscope.utils.test_utils import test_level
 
 
-class ZeroShotClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ZeroShotClassificationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.siamese_uie
@@ -67,10 +66,6 @@ class ZeroShotClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
         pipeline_ins = pipeline(task=Tasks.siamese_uie, model_revision='v1.1')
         print(pipeline_ins(input=self.sentence, schema=self.schema))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_skin_retouching.py b/tests/pipelines/test_skin_retouching.py
index db8d89ed..aa1e0c59 100644
--- a/tests/pipelines/test_skin_retouching.py
+++ b/tests/pipelines/test_skin_retouching.py
@@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class SkinRetouchingTest(unittest.TestCase, DemoCompatibilityCheck):
+class SkinRetouchingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.skin_retouching
@@ -41,10 +40,6 @@ class SkinRetouchingTest(unittest.TestCase, DemoCompatibilityCheck):
         skin_retouching = pipeline(Tasks.skin_retouching)
         self.pipeline_inference(skin_retouching, self.test_image)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_soonet_video_temporal_grounding.py b/tests/pipelines/test_soonet_video_temporal_grounding.py
index 21f8027c..4fafeb31 100644
--- a/tests/pipelines/test_soonet_video_temporal_grounding.py
+++ b/tests/pipelines/test_soonet_video_temporal_grounding.py
@@ -5,12 +5,10 @@ from modelscope.models import Model
 from modelscope.models.multi_modal.soonet import SOONet
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class SOONetVideoTemporalGroundingTest(unittest.TestCase,
-                                       DemoCompatibilityCheck):
+class SOONetVideoTemporalGroundingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.video_temporal_grounding
diff --git a/tests/pipelines/test_speaker_verification.py b/tests/pipelines/test_speaker_verification.py
index 83d8aff3..2b90c66e 100644
--- a/tests/pipelines/test_speaker_verification.py
+++ b/tests/pipelines/test_speaker_verification.py
@@ -1,13 +1,11 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
-import os.path
 import unittest
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Union
 
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.logger import get_logger
 from modelscope.utils.test_utils import test_level
 
@@ -16,17 +14,28 @@ logger = get_logger()
 SPEAKER1_A_EN_16K_WAV = 'data/test/audios/speaker1_a_en_16k.wav'
 SPEAKER1_B_EN_16K_WAV = 'data/test/audios/speaker1_b_en_16k.wav'
 SPEAKER2_A_EN_16K_WAV = 'data/test/audios/speaker2_a_en_16k.wav'
+SCL_EXAMPLE_WAV = 'data/test/audios/scl_example1.wav'
 
 
-class SpeakerVerificationTest(unittest.TestCase, DemoCompatibilityCheck):
+class SpeakerVerificationTest(unittest.TestCase):
     ecapatdnn_voxceleb_16k_model_id = 'damo/speech_ecapa-tdnn_sv_en_voxceleb_16k'
     campplus_voxceleb_16k_model_id = 'damo/speech_campplus_sv_en_voxceleb_16k'
+    rdino_voxceleb_16k_model_id = 'damo/speech_rdino_ecapa_tdnn_sv_en_voxceleb_16k'
+    speaker_change_locating_cn_model_id = 'damo/speech_campplus-transformer_scl_zh-cn_16k-common'
+    eres2net_voxceleb_16k_model_id = 'damo/speech_eres2net_sv_en_voxceleb_16k'
 
     def setUp(self) -> None:
         self.task = Tasks.speaker_verification
 
-    def run_pipeline(self, model_id: str, audios: List[str]) -> Dict[str, Any]:
-        p = pipeline(task=self.task, model=model_id)
+    def run_pipeline(self,
+                     model_id: str,
+                     audios: Union[List[str], str],
+                     task: str = None,
+                     model_revision=None) -> Dict[str, Any]:
+        if task is not None:
+            self.task = task
+        p = pipeline(
+            task=self.task, model=model_id, model_revision=model_revision)
         result = p(audios)
         return result
 
@@ -51,9 +60,36 @@ class SpeakerVerificationTest(unittest.TestCase, DemoCompatibilityCheck):
         print(result)
         self.assertTrue(OutputKeys.SCORE in result)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_speaker_verification_rdino_voxceleb_16k(self):
+        logger.info('Run speaker verification for rdino_voxceleb_16k model')
+        result = self.run_pipeline(
+            model_id=self.rdino_voxceleb_16k_model_id,
+            audios=[SPEAKER1_A_EN_16K_WAV, SPEAKER1_B_EN_16K_WAV],
+            model_revision='v1.0.1')
+        print(result)
+        self.assertTrue(OutputKeys.SCORE in result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_speaker_change_locating_cn_16k(self):
+        logger.info(
+            'Run speaker change locating for campplus-transformer model')
+        result = self.run_pipeline(
+            model_id=self.speaker_change_locating_cn_model_id,
+            task=Tasks.speaker_diarization,
+            audios=SCL_EXAMPLE_WAV)
+        print(result)
+        self.assertTrue(OutputKeys.TEXT in result)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_speaker_verification_eres2net_voxceleb_16k(self):
+        logger.info('Run speaker verification for eres2net_voxceleb_16k model')
+        result = self.run_pipeline(
+            model_id=self.eres2net_voxceleb_16k_model_id,
+            audios=[SPEAKER1_A_EN_16K_WAV, SPEAKER1_B_EN_16K_WAV],
+            model_revision='v1.0.2')
+        print(result)
+        self.assertTrue(OutputKeys.SCORE in result)
 
 
 if __name__ == '__main__':
diff --git a/tests/pipelines/test_speech_separation.py b/tests/pipelines/test_speech_separation.py
index 194f84a8..4edb3b43 100644
--- a/tests/pipelines/test_speech_separation.py
+++ b/tests/pipelines/test_speech_separation.py
@@ -8,13 +8,12 @@ import numpy
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 MIX_SPEECH_FILE = 'data/test/audios/mix_speech.wav'
 
 
-class SpeechSeparationTest(unittest.TestCase, DemoCompatibilityCheck):
+class SpeechSeparationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         pass
@@ -32,10 +31,6 @@ class SpeechSeparationTest(unittest.TestCase, DemoCompatibilityCheck):
             sf.write(save_file, numpy.frombuffer(signal, dtype=numpy.int16),
                      8000)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_speech_signal_process.py b/tests/pipelines/test_speech_signal_process.py
index 2c26cee6..104bf88a 100644
--- a/tests/pipelines/test_speech_signal_process.py
+++ b/tests/pipelines/test_speech_signal_process.py
@@ -7,7 +7,6 @@ from modelscope.metainfo import Pipelines
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 NEAREND_MIC_FILE = 'data/test/audios/nearend_mic.wav'
@@ -24,7 +23,7 @@ NOISE_SPEECH_URL = 'https://modelscope.oss-cn-beijing.aliyuncs.com/' \
                    'test/audios/speech_with_noise.wav'
 
 
-class SpeechSignalProcessTest(unittest.TestCase, DemoCompatibilityCheck):
+class SpeechSignalProcessTest(unittest.TestCase):
 
     def setUp(self) -> None:
         pass
@@ -150,10 +149,6 @@ class SpeechSignalProcessTest(unittest.TestCase, DemoCompatibilityCheck):
                     w.write(pcm)
                     audio = f.read(block_size)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_table_question_answering.py b/tests/pipelines/test_table_question_answering.py
index 4d6eff24..d688ef23 100644
--- a/tests/pipelines/test_table_question_answering.py
+++ b/tests/pipelines/test_table_question_answering.py
@@ -40,7 +40,7 @@ def tableqa_tracking_and_print_results_with_history(
             print('question', question)
             print('sql text:', output_dict[OutputKeys.SQL_STRING])
             print('sql query:', output_dict[OutputKeys.SQL_QUERY])
-            print('query result:', output_dict[OutputKeys.QUERT_RESULT])
+            print('query result:', output_dict[OutputKeys.QUERY_RESULT])
             print('json dumps', json.dumps(output_dict, ensure_ascii=False))
             print()
             historical_queries = output_dict[OutputKeys.HISTORY]
@@ -66,7 +66,7 @@ def tableqa_tracking_and_print_results_without_history(
             print('question', question)
             print('sql text:', output_dict[OutputKeys.SQL_STRING])
             print('sql query:', output_dict[OutputKeys.SQL_QUERY])
-            print('query result:', output_dict[OutputKeys.QUERT_RESULT])
+            print('query result:', output_dict[OutputKeys.QUERY_RESULT])
             print('json dumps', json.dumps(output_dict, ensure_ascii=False))
             print()
 
@@ -99,7 +99,7 @@ def tableqa_tracking_and_print_results_with_tableid(
             print('question', question)
             print('sql text:', output_dict[OutputKeys.SQL_STRING])
             print('sql query:', output_dict[OutputKeys.SQL_QUERY])
-            print('query result:', output_dict[OutputKeys.QUERT_RESULT])
+            print('query result:', output_dict[OutputKeys.QUERY_RESULT])
             print('json dumps', json.dumps(output_dict, ensure_ascii=False))
             print()
             historical_queries = output_dict[OutputKeys.HISTORY]
@@ -135,7 +135,7 @@ class TableQuestionAnswering(unittest.TestCase):
                 'history_sql': None
             })
             print(i, result[OutputKeys.OUTPUT][OutputKeys.SQL_QUERY],
-                  result[OutputKeys.OUTPUT][OutputKeys.QUERT_RESULT],
+                  result[OutputKeys.OUTPUT][OutputKeys.QUERY_RESULT],
                   json.dumps(result))
 
         procs = []
diff --git a/tests/pipelines/test_table_recognition.py b/tests/pipelines/test_table_recognition.py
index 3c6ee74a..6b81fc62 100644
--- a/tests/pipelines/test_table_recognition.py
+++ b/tests/pipelines/test_table_recognition.py
@@ -5,11 +5,10 @@ import unittest
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class TableRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class TableRecognitionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_dla34_table-structure-recognition_cycle-centernet'
@@ -32,10 +31,6 @@ class TableRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
         table_recognition = pipeline(Tasks.table_recognition)
         self.pipeline_inference(table_recognition, self.test_image)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_tbs_detection.py b/tests/pipelines/test_tbs_detection.py
index ac0dd550..0d5a1283 100644
--- a/tests/pipelines/test_tbs_detection.py
+++ b/tests/pipelines/test_tbs_detection.py
@@ -2,11 +2,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ObjectDetectionTest(unittest.TestCase):
 
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_run_with_model_name(self):
diff --git a/tests/pipelines/test_text2text_generation.py b/tests/pipelines/test_text2text_generation.py
index 40576a29..d439e033 100644
--- a/tests/pipelines/test_text2text_generation.py
+++ b/tests/pipelines/test_text2text_generation.py
@@ -8,11 +8,10 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import TextGenerationT5Pipeline
 from modelscope.preprocessors import TextGenerationT5Preprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class Text2TextGenerationTest(unittest.TestCase, DemoCompatibilityCheck):
+class Text2TextGenerationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id_generate = 'damo/t5-cn-base-test'
@@ -86,10 +85,6 @@ class Text2TextGenerationTest(unittest.TestCase, DemoCompatibilityCheck):
         pipeline_ins = pipeline(task=Tasks.text2text_generation)
         print(pipeline_ins(self.input_generate))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_text_classification.py b/tests/pipelines/test_text_classification.py
index d07ddbb8..128f86af 100644
--- a/tests/pipelines/test_text_classification.py
+++ b/tests/pipelines/test_text_classification.py
@@ -7,11 +7,10 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import TextClassificationPipeline
 from modelscope.preprocessors import TextClassificationTransformersPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class SequenceClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
+class SequenceClassificationTest(unittest.TestCase):
     sentence1 = 'i like this wonderful place'
 
     def setUp(self) -> None:
@@ -91,10 +90,6 @@ class SequenceClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
         result = text_classification(dataset)
         self.printDataset(result)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_text_driven_segmentation.py b/tests/pipelines/test_text_driven_segmentation.py
index a67729ff..741787d9 100644
--- a/tests/pipelines/test_text_driven_segmentation.py
+++ b/tests/pipelines/test_text_driven_segmentation.py
@@ -23,10 +23,6 @@ class TextDrivenSegmentationTest(unittest.TestCase):
         # result[OutputKeys.MASKS] is segment map result,other keys are not used
         cv2.imwrite(input_location + '_lseg.jpg', result[OutputKeys.MASKS])
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.test_demo()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_text_error_correction.py b/tests/pipelines/test_text_error_correction.py
index 171f3ab2..b4bf5be9 100644
--- a/tests/pipelines/test_text_error_correction.py
+++ b/tests/pipelines/test_text_error_correction.py
@@ -9,11 +9,10 @@ from modelscope.pipelines.nlp import TextErrorCorrectionPipeline
 from modelscope.preprocessors import (Preprocessor,
                                       TextErrorCorrectionPreprocessor)
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class TextErrorCorrectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class TextErrorCorrectionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.text_error_correction
@@ -81,10 +80,6 @@ class TextErrorCorrectionTest(unittest.TestCase, DemoCompatibilityCheck):
             task=Tasks.text_error_correction, model=self.law_model_id)
         print(pipeline_ins(self.input_law))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_text_generation.py b/tests/pipelines/test_text_generation.py
index 998cbd18..378b1bbc 100644
--- a/tests/pipelines/test_text_generation.py
+++ b/tests/pipelines/test_text_generation.py
@@ -8,11 +8,10 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.nlp import TextGenerationPipeline
 from modelscope.preprocessors import TextGenerationTransformersPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class TextGenerationTest(unittest.TestCase, DemoCompatibilityCheck):
+class TextGenerationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.palm_model_id_zh_base = 'damo/nlp_palm2.0_text-generation_chinese-base'
@@ -261,10 +260,6 @@ class TextGenerationTest(unittest.TestCase, DemoCompatibilityCheck):
             model='damo/nlp_gpt2_text-generation_english-base')
         print(pipe('My name is Teven and I am'))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_text_to_image_synthesis.py b/tests/pipelines/test_text_to_image_synthesis.py
index 5e28282b..63c38571 100644
--- a/tests/pipelines/test_text_to_image_synthesis.py
+++ b/tests/pipelines/test_text_to_image_synthesis.py
@@ -8,11 +8,10 @@ from modelscope.models import Model
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class TextToImageSynthesisTest(unittest.TestCase, DemoCompatibilityCheck):
+class TextToImageSynthesisTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.text_to_image_synthesis
@@ -61,10 +60,6 @@ class TextToImageSynthesisTest(unittest.TestCase, DemoCompatibilityCheck):
             self.test_text)[OutputKeys.OUTPUT_IMGS][0]
         print(np.sum(np.abs(img)))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_text_to_speech.py b/tests/pipelines/test_text_to_speech.py
index f746dfbe..528977ce 100644
--- a/tests/pipelines/test_text_to_speech.py
+++ b/tests/pipelines/test_text_to_speech.py
@@ -11,7 +11,6 @@ import torch
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.logger import get_logger
 from modelscope.utils.test_utils import test_level
 
@@ -20,8 +19,7 @@ import tensorflow as tf  # isort:skip
 logger = get_logger()
 
 
-class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase,
-                                                DemoCompatibilityCheck):
+class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.text_to_speech
@@ -109,10 +107,6 @@ class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase,
             with open(f'output_{self.test_model_name[i]}', 'wb') as f:
                 f.write(wav)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_text_to_video_synthesis.py b/tests/pipelines/test_text_to_video_synthesis.py
index 6463c155..97ef6089 100644
--- a/tests/pipelines/test_text_to_video_synthesis.py
+++ b/tests/pipelines/test_text_to_video_synthesis.py
@@ -5,11 +5,10 @@ import unittest
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class TextToVideoSynthesisTest(unittest.TestCase, DemoCompatibilityCheck):
+class TextToVideoSynthesisTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.text_to_video_synthesis
@@ -27,10 +26,6 @@ class TextToVideoSynthesisTest(unittest.TestCase, DemoCompatibilityCheck):
             self.test_text)[OutputKeys.OUTPUT_VIDEO]
         print(output_video_path)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_tinymog_face_detection.py b/tests/pipelines/test_tinymog_face_detection.py
index e80fa482..48e74f44 100644
--- a/tests/pipelines/test_tinymog_face_detection.py
+++ b/tests/pipelines/test_tinymog_face_detection.py
@@ -8,11 +8,10 @@ from modelscope.msdatasets import MsDataset
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import draw_face_detection_result
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class TinyMogFaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class TinyMogFaceDetectionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.face_detection
@@ -48,10 +47,6 @@ class TinyMogFaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
         result = face_detection(self.img_path)
         self.show_result(self.img_path, result)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_tinynas_classification.py b/tests/pipelines/test_tinynas_classification.py
index ebc6b722..300bd2b1 100644
--- a/tests/pipelines/test_tinynas_classification.py
+++ b/tests/pipelines/test_tinynas_classification.py
@@ -4,11 +4,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class TinyNASClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
+class TinyNASClassificationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_classification
@@ -21,10 +20,6 @@ class TinyNASClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
         result = tinynas_classification('data/test/images/image_wolf.jpeg')
         print(result)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_tinynas_detection.py b/tests/pipelines/test_tinynas_detection.py
index f7c513ff..08c746ea 100644
--- a/tests/pipelines/test_tinynas_detection.py
+++ b/tests/pipelines/test_tinynas_detection.py
@@ -7,11 +7,10 @@ from PIL import Image
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class TinynasObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class TinynasObjectDetectionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_object_detection
@@ -52,10 +51,6 @@ class TinynasObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
             'data/test/images/image_detection.jpg')
         print('damoyolo-t', result)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_image_object_detection_auto_pipeline(self):
         test_image = 'data/test/images/image_detection.jpg'
diff --git a/tests/pipelines/test_traffic_sign_detection.py b/tests/pipelines/test_traffic_sign_detection.py
index 5404649d..efedec14 100644
--- a/tests/pipelines/test_traffic_sign_detection.py
+++ b/tests/pipelines/test_traffic_sign_detection.py
@@ -7,20 +7,15 @@ from PIL import Image
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class TrafficSignDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class TrafficSignDetectionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.domain_specific_object_detection
         self.model_id = 'damo/cv_tinynas_object-detection_damoyolo_traffic_sign'
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_traffic_sign_detection_damoyolo(self):
         tinynas_object_detection = pipeline(
diff --git a/tests/pipelines/test_translation_evaluation.py b/tests/pipelines/test_translation_evaluation.py
index 53524fdc..e936f41a 100644
--- a/tests/pipelines/test_translation_evaluation.py
+++ b/tests/pipelines/test_translation_evaluation.py
@@ -2,14 +2,13 @@
 
 import unittest
 
-from modelscope.models.nlp.unite.configuration_unite import EvaluationMode
+from modelscope.models.nlp.unite.configuration import InputFormat
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class TranslationEvaluationTest(unittest.TestCase, DemoCompatibilityCheck):
+class TranslationEvaluationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.translation_evaluation
@@ -18,7 +17,7 @@ class TranslationEvaluationTest(unittest.TestCase, DemoCompatibilityCheck):
 
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_run_with_model_name_for_unite_large(self):
-        input = {
+        input_dict = {
             'hyp': [
                 'This is a sentence.',
                 'This is another sentence.',
@@ -34,27 +33,27 @@ class TranslationEvaluationTest(unittest.TestCase, DemoCompatibilityCheck):
         }
 
         pipeline_ins = pipeline(self.task, model=self.model_id_large)
-        print(pipeline_ins(input=input))
+        print(pipeline_ins(input_dict)['score'])
 
-        pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.SRC)
-        print(pipeline_ins(input=input))
+        pipeline_ins.change_input_format(input_format=InputFormat.SRC)
+        print(pipeline_ins(input_dict)['score'])
 
-        pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.REF)
-        print(pipeline_ins(input=input))
+        pipeline_ins.change_input_format(input_format=InputFormat.REF)
+        print(pipeline_ins(input_dict)['score'])
 
         pipeline_ins = pipeline(
             self.task, model=self.model_id_large, device='cpu')
-        print(pipeline_ins(input=input))
+        print(pipeline_ins(input_dict)['score'])
 
-        pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.SRC)
-        print(pipeline_ins(input=input))
+        pipeline_ins.change_input_format(input_format=InputFormat.SRC)
+        print(pipeline_ins(input_dict)['score'])
 
-        pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.REF)
-        print(pipeline_ins(input=input))
+        pipeline_ins.change_input_format(input_format=InputFormat.REF)
+        print(pipeline_ins(input_dict)['score'])
 
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_run_with_model_name_for_unite_base(self):
-        input = {
+        input_dict = {
             'hyp': [
                 'This is a sentence.',
                 'This is another sentence.',
@@ -70,23 +69,23 @@ class TranslationEvaluationTest(unittest.TestCase, DemoCompatibilityCheck):
         }
 
         pipeline_ins = pipeline(self.task, model=self.model_id_base)
-        print(pipeline_ins(input=input))
+        print(pipeline_ins(input_dict)['score'])
 
-        pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.SRC)
-        print(pipeline_ins(input=input))
+        pipeline_ins.change_input_format(input_format=InputFormat.SRC)
+        print(pipeline_ins(input_dict)['score'])
 
-        pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.REF)
-        print(pipeline_ins(input=input))
+        pipeline_ins.change_input_format(input_format=InputFormat.REF)
+        print(pipeline_ins(input_dict)['score'])
 
         pipeline_ins = pipeline(
             self.task, model=self.model_id_base, device='cpu')
-        print(pipeline_ins(input=input))
+        print(pipeline_ins(input_dict)['score'])
 
-        pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.SRC)
-        print(pipeline_ins(input=input))
+        pipeline_ins.change_input_format(input_format=InputFormat.SRC)
+        print(pipeline_ins(input_dict)['score'])
 
-        pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.REF)
-        print(pipeline_ins(input=input))
+        pipeline_ins.change_input_format(input_format=InputFormat.REF)
+        print(pipeline_ins(input_dict)['score'])
 
 
 if __name__ == '__main__':
diff --git a/tests/pipelines/test_translation_quality_estimation.py b/tests/pipelines/test_translation_quality_estimation.py
index 315fa72b..0890f31b 100644
--- a/tests/pipelines/test_translation_quality_estimation.py
+++ b/tests/pipelines/test_translation_quality_estimation.py
@@ -3,12 +3,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class TranslationQualityEstimationTest(unittest.TestCase,
-                                       DemoCompatibilityCheck):
+class TranslationQualityEstimationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.sentence_similarity
@@ -23,10 +21,6 @@ class TranslationQualityEstimationTest(unittest.TestCase,
         pipeline_ins = pipeline(self.task, model=self.model_id)
         print(pipeline_ins(input=inputs))
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_unifold.py b/tests/pipelines/test_unifold.py
index cf67929d..98d2c1ce 100644
--- a/tests/pipelines/test_unifold.py
+++ b/tests/pipelines/test_unifold.py
@@ -4,11 +4,10 @@ import unittest
 from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class UnifoldProteinStructureTest(unittest.TestCase, DemoCompatibilityCheck):
+class UnifoldProteinStructureTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.protein_structure
diff --git a/tests/pipelines/test_universal_matting.py b/tests/pipelines/test_universal_matting.py
index 5868cf36..1450d938 100644
--- a/tests/pipelines/test_universal_matting.py
+++ b/tests/pipelines/test_universal_matting.py
@@ -8,11 +8,10 @@ from modelscope.msdatasets import MsDataset
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import ModelFile, Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class UniversalMattingTest(unittest.TestCase, DemoCompatibilityCheck):
+class UniversalMattingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_unet_universal-matting'
@@ -35,10 +34,6 @@ class UniversalMattingTest(unittest.TestCase, DemoCompatibilityCheck):
         cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG])
         print(f'Output written to {osp.abspath("result.png")}')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_user_satisfaction_estimation.py b/tests/pipelines/test_user_satisfaction_estimation.py
index 2bbfd5d7..2904ea30 100644
--- a/tests/pipelines/test_user_satisfaction_estimation.py
+++ b/tests/pipelines/test_user_satisfaction_estimation.py
@@ -6,12 +6,10 @@ from modelscope.models import Model
 from modelscope.pipelines import pipeline
 from modelscope.preprocessors import DialogueClassificationUsePreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class UserSatisfactionEstimationTest(unittest.TestCase,
-                                     DemoCompatibilityCheck):
+class UserSatisfactionEstimationTest(unittest.TestCase):
 
     model_id = 'damo/nlp_user-satisfaction-estimation_chinese'
     input_dialogue = [('返修退换货咨询|||', '手机有质量问题怎么办|||稍等，我看下', '开不开机了|||',
@@ -33,10 +31,6 @@ class UserSatisfactionEstimationTest(unittest.TestCase,
             task=Tasks.text_classification, model=self.model_id)
         print(pipeline_ins(input=self.input_dialogue))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        print(self.compatibility_check())
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_video_category.py b/tests/pipelines/test_video_category.py
index 660196b8..61ee72b0 100644
--- a/tests/pipelines/test_video_category.py
+++ b/tests/pipelines/test_video_category.py
@@ -3,11 +3,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VideoCategoryTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoCategoryTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.video_category
@@ -21,10 +20,6 @@ class VideoCategoryTest(unittest.TestCase, DemoCompatibilityCheck):
 
         print(f'video category output: {result}.')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_video_colorization.py b/tests/pipelines/test_video_colorization.py
index c35577a4..fe6c0f87 100644
--- a/tests/pipelines/test_video_colorization.py
+++ b/tests/pipelines/test_video_colorization.py
@@ -11,11 +11,10 @@ from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.pipelines.cv import VideoColorizationPipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VideoColorizationTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoColorizationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.video_colorization
@@ -44,10 +43,6 @@ class VideoColorizationTest(unittest.TestCase, DemoCompatibilityCheck):
         video_colorization = pipeline(Tasks.video_colorization)
         self.pipeline_inference(video_colorization, self.test_video)
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_video_deinterlace.py b/tests/pipelines/test_video_deinterlace.py
index bcb36cc3..267d4664 100644
--- a/tests/pipelines/test_video_deinterlace.py
+++ b/tests/pipelines/test_video_deinterlace.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.cv import VideoDeinterlacePipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VideoDeinterlaceTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoDeinterlaceTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.video_deinterlace
@@ -52,10 +51,6 @@ class VideoDeinterlaceTest(unittest.TestCase, DemoCompatibilityCheck):
             input=self.test_video)[OutputKeys.OUTPUT_VIDEO]
         print('pipeline: the output video path is {}'.format(out_video_path))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_video_depth_estimation.py b/tests/pipelines/test_video_depth_estimation.py
index 30ca3b33..6a054b2b 100644
--- a/tests/pipelines/test_video_depth_estimation.py
+++ b/tests/pipelines/test_video_depth_estimation.py
@@ -5,11 +5,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import show_video_depth_estimation_result
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VideoDepthEstimationTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoDepthEstimationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = 'video-depth-estimation'
diff --git a/tests/pipelines/test_video_frame_interpolation.py b/tests/pipelines/test_video_frame_interpolation.py
index c23aa46a..11a4f568 100644
--- a/tests/pipelines/test_video_frame_interpolation.py
+++ b/tests/pipelines/test_video_frame_interpolation.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.cv import VideoFrameInterpolationPipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VideoFrameInterpolationTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoFrameInterpolationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.video_frame_interpolation
@@ -58,10 +57,6 @@ class VideoFrameInterpolationTest(unittest.TestCase, DemoCompatibilityCheck):
             input=self.test_video)[OutputKeys.OUTPUT_VIDEO]
         print('pipeline: the output video path is {}'.format(out_video_path))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_video_instance_segmentation.py b/tests/pipelines/test_video_instance_segmentation.py
index 0a76d260..465cf26f 100644
--- a/tests/pipelines/test_video_instance_segmentation.py
+++ b/tests/pipelines/test_video_instance_segmentation.py
@@ -3,11 +3,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VideoInstanceSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoInstanceSegmentationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.video_panoptic_segmentation
@@ -33,10 +32,6 @@ class VideoInstanceSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
 
         print(f'video instance segmentation output:\n {result}.')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_video_multi_modal_embedding.py b/tests/pipelines/test_video_multi_modal_embedding.py
index afe5940d..fe87b089 100644
--- a/tests/pipelines/test_video_multi_modal_embedding.py
+++ b/tests/pipelines/test_video_multi_modal_embedding.py
@@ -4,14 +4,13 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.logger import get_logger
 from modelscope.utils.test_utils import test_level
 
 logger = get_logger()
 
 
-class VideoMultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoMultiModalEmbeddingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.video_multi_modal_embedding
@@ -41,10 +40,6 @@ class VideoMultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
         logger.info('video feature: {}'.format(
             output['video_embedding'][0][0][0]))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_video_multi_object_tracking.py b/tests/pipelines/test_video_multi_object_tracking.py
index 97f1e705..f63fd8b1 100644
--- a/tests/pipelines/test_video_multi_object_tracking.py
+++ b/tests/pipelines/test_video_multi_object_tracking.py
@@ -4,11 +4,10 @@ import unittest
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class MultiObjectTracking(unittest.TestCase, DemoCompatibilityCheck):
+class MultiObjectTracking(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.video_multi_object_tracking
@@ -34,10 +33,6 @@ class MultiObjectTracking(unittest.TestCase, DemoCompatibilityCheck):
                                                              in result)
         assert len(result[OutputKeys.LABELS]) == len(result[OutputKeys.BOXES])
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_video_object_segmentation.py b/tests/pipelines/test_video_object_segmentation.py
index e4adeb26..6f0e7c2a 100644
--- a/tests/pipelines/test_video_object_segmentation.py
+++ b/tests/pipelines/test_video_object_segmentation.py
@@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import masks_visualization
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VideoObjectSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoObjectSegmentationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = 'video-object-segmentation'
diff --git a/tests/pipelines/test_video_panoptic_segmentation.py b/tests/pipelines/test_video_panoptic_segmentation.py
index ad038135..cc805812 100644
--- a/tests/pipelines/test_video_panoptic_segmentation.py
+++ b/tests/pipelines/test_video_panoptic_segmentation.py
@@ -3,11 +3,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VideoPanopticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoPanopticSegmentationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.video_panoptic_segmentation
@@ -32,10 +31,6 @@ class VideoPanopticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
 
         print(f'video summarization output:\n {result}.')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_video_single_object_tracking.py b/tests/pipelines/test_video_single_object_tracking.py
index e75ccbb0..c8331649 100644
--- a/tests/pipelines/test_video_single_object_tracking.py
+++ b/tests/pipelines/test_video_single_object_tracking.py
@@ -5,11 +5,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import show_video_tracking_result
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class SingleObjectTracking(unittest.TestCase, DemoCompatibilityCheck):
+class SingleObjectTracking(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.video_single_object_tracking
@@ -46,10 +45,6 @@ class SingleObjectTracking(unittest.TestCase, DemoCompatibilityCheck):
         result = video_single_object_tracking((video_path, init_bbox))
         print('result is : ', result[OutputKeys.BOXES])
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_video_stabilization.py b/tests/pipelines/test_video_stabilization.py
index d102f3e1..26501c2d 100644
--- a/tests/pipelines/test_video_stabilization.py
+++ b/tests/pipelines/test_video_stabilization.py
@@ -7,11 +7,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.cv import VideoStabilizationPipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VideoStabilizationTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoStabilizationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.video_stabilization
@@ -42,10 +41,6 @@ class VideoStabilizationTest(unittest.TestCase, DemoCompatibilityCheck):
             input=self.test_video)[OutputKeys.OUTPUT_VIDEO]
         print('pipeline: the output video path is {}'.format(out_video_path))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_video_summarization.py b/tests/pipelines/test_video_summarization.py
index 1f965c53..dc6a3a80 100644
--- a/tests/pipelines/test_video_summarization.py
+++ b/tests/pipelines/test_video_summarization.py
@@ -3,11 +3,10 @@ import unittest
 
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VideoSummarizationTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoSummarizationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.video_summarization
@@ -30,10 +29,6 @@ class VideoSummarizationTest(unittest.TestCase, DemoCompatibilityCheck):
 
         print(f'video summarization output:\n {result}.')
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_video_super_resolution.py b/tests/pipelines/test_video_super_resolution.py
index 0da18dd7..2e207887 100644
--- a/tests/pipelines/test_video_super_resolution.py
+++ b/tests/pipelines/test_video_super_resolution.py
@@ -7,11 +7,10 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.cv import VideoSuperResolutionPipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VideoSuperResolutionTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoSuperResolutionTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.video_super_resolution
@@ -50,10 +49,6 @@ class VideoSuperResolutionTest(unittest.TestCase, DemoCompatibilityCheck):
             input=self.test_video)[OutputKeys.OUTPUT_VIDEO]
         print('pipeline: the output video path is {}'.format(out_video_path))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_vidt_face.py b/tests/pipelines/test_vidt_face.py
index 8640d128..e49d9de9 100644
--- a/tests/pipelines/test_vidt_face.py
+++ b/tests/pipelines/test_vidt_face.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
 from modelscope.models.cv.vidt import VidtModel
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VidtTest(unittest.TestCase, DemoCompatibilityCheck):
+class VidtTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_object_detection
diff --git a/tests/pipelines/test_vidt_logo.py b/tests/pipelines/test_vidt_logo.py
index 143eb205..fce6fe48 100644
--- a/tests/pipelines/test_vidt_logo.py
+++ b/tests/pipelines/test_vidt_logo.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
 from modelscope.models.cv.vidt import VidtModel
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VidtTest(unittest.TestCase, DemoCompatibilityCheck):
+class VidtTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_object_detection
diff --git a/tests/pipelines/test_virtual_try_on.py b/tests/pipelines/test_virtual_try_on.py
index 5c18dcc4..c8a55f79 100644
--- a/tests/pipelines/test_virtual_try_on.py
+++ b/tests/pipelines/test_virtual_try_on.py
@@ -8,11 +8,10 @@ from PIL import Image
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VirtualTryonTest(unittest.TestCase, DemoCompatibilityCheck):
+class VirtualTryonTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.virtual_try_on
@@ -36,10 +35,6 @@ class VirtualTryonTest(unittest.TestCase, DemoCompatibilityCheck):
         img = pipeline_virtual_tryon(self.input_imgs)[OutputKeys.OUTPUT_IMG]
         cv2.imwrite('demo.jpg', img[:, :, ::-1])
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_vision_efficient_tuning.py b/tests/pipelines/test_vision_efficient_tuning.py
index c88ed478..acfbb235 100644
--- a/tests/pipelines/test_vision_efficient_tuning.py
+++ b/tests/pipelines/test_vision_efficient_tuning.py
@@ -6,11 +6,10 @@ from modelscope.models.cv.vision_efficient_tuning.model import \
     VisionEfficientTuningModel
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck):
+class VisionEfficientTuningTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.vision_efficient_tuning
@@ -29,11 +28,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck):
         model = Model.from_pretrained(model_id)
         self.assertTrue(model.__class__ == VisionEfficientTuningModel)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_vision_efficient_tuning_adapter_demo_compatibility(self):
-        self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-adapter'
-        self.compatibility_check()
-
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_vision_efficient_tuning_lora_run_pipeline(self):
         model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-lora'
@@ -48,11 +42,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck):
         model = Model.from_pretrained(model_id)
         self.assertTrue(model.__class__ == VisionEfficientTuningModel)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_vision_efficient_tuning_lora_demo_compatibility(self):
-        self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-lora'
-        self.compatibility_check()
-
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_vision_efficient_tuning_prefix_run_pipeline(self):
         model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-prefix'
@@ -67,11 +56,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck):
         model = Model.from_pretrained(model_id)
         self.assertTrue(model.__class__ == VisionEfficientTuningModel)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_vision_efficient_tuning_prefix_demo_compatibility(self):
-        self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-prefix'
-        self.compatibility_check()
-
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_vision_efficient_tuning_prompt_run_pipeline(self):
         model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-prompt'
@@ -86,11 +70,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck):
         model = Model.from_pretrained(model_id)
         self.assertTrue(model.__class__ == VisionEfficientTuningModel)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_vision_efficient_tuning_prompt_demo_compatibility(self):
-        self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-prompt'
-        self.compatibility_check()
-
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_vision_efficient_tuning_bitfit_run_pipeline(self):
         model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-bitfit'
@@ -105,11 +84,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck):
         model = Model.from_pretrained(model_id)
         self.assertTrue(model.__class__ == VisionEfficientTuningModel)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_vision_efficient_tuning_bitfit_demo_compatibility(self):
-        self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-bitfit'
-        self.compatibility_check()
-
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_vision_efficient_tuning_sidetuning_run_pipeline(self):
         model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-sidetuning'
@@ -125,11 +99,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck):
         model = Model.from_pretrained(model_id)
         self.assertTrue(model.__class__ == VisionEfficientTuningModel)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_vision_efficient_tuning_sidetuning_demo_compatibility(self):
-        self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-sidetuning'
-        self.compatibility_check()
-
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_vision_efficient_tuning_utuning_run_pipeline(self):
         model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-utuning'
@@ -144,11 +113,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck):
         model = Model.from_pretrained(model_id)
         self.assertTrue(model.__class__ == VisionEfficientTuningModel)
 
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_vision_efficient_tuning_utuning_demo_compatibility(self):
-        self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-utuning'
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_vision_middleware.py b/tests/pipelines/test_vision_middleware.py
index b3531154..e8c1218b 100644
--- a/tests/pipelines/test_vision_middleware.py
+++ b/tests/pipelines/test_vision_middleware.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
 from modelscope.models.cv.vision_middleware import VisionMiddlewareModel
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VisionMiddlewareTest(unittest.TestCase, DemoCompatibilityCheck):
+class VisionMiddlewareTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.image_segmentation
diff --git a/tests/pipelines/test_vop_retrieval.py b/tests/pipelines/test_vop_retrieval.py
index c9c356c5..78e7eecc 100644
--- a/tests/pipelines/test_vop_retrieval.py
+++ b/tests/pipelines/test_vop_retrieval.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
 from modelscope.models.cv.vop_retrieval import VoP
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VopRetrievalTest(unittest.TestCase, DemoCompatibilityCheck):
+class VopRetrievalTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.vop_retrieval
diff --git a/tests/pipelines/test_vop_retrieval_sebias.py b/tests/pipelines/test_vop_retrieval_sebias.py
index bea1bc45..a129f7f0 100644
--- a/tests/pipelines/test_vop_retrieval_sebias.py
+++ b/tests/pipelines/test_vop_retrieval_sebias.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
 from modelscope.models.cv.vop_retrieval import VideoTextRetrievalModelSeries
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VopRetrievalTest(unittest.TestCase, DemoCompatibilityCheck):
+class VopRetrievalTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.vop_retrieval
diff --git a/tests/pipelines/test_vop_retrieval_separtial.py b/tests/pipelines/test_vop_retrieval_separtial.py
index 942fbd3b..c5832aaa 100644
--- a/tests/pipelines/test_vop_retrieval_separtial.py
+++ b/tests/pipelines/test_vop_retrieval_separtial.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
 from modelscope.models.cv.vop_retrieval import VideoTextRetrievalModelSeries
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VopRetrievalTest(unittest.TestCase, DemoCompatibilityCheck):
+class VopRetrievalTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.vop_retrieval
diff --git a/tests/pipelines/test_vop_retrieval_seproj.py b/tests/pipelines/test_vop_retrieval_seproj.py
index a371ac36..2fceb2e7 100644
--- a/tests/pipelines/test_vop_retrieval_seproj.py
+++ b/tests/pipelines/test_vop_retrieval_seproj.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
 from modelscope.models.cv.vop_retrieval import VideoTextRetrievalModelSeries
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 
 
-class VopRetrievalTest(unittest.TestCase, DemoCompatibilityCheck):
+class VopRetrievalTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.vop_retrieval
diff --git a/tests/pipelines/test_wenet_automatic_speech_recognition.py b/tests/pipelines/test_wenet_automatic_speech_recognition.py
index 4adf8119..ac47cea7 100644
--- a/tests/pipelines/test_wenet_automatic_speech_recognition.py
+++ b/tests/pipelines/test_wenet_automatic_speech_recognition.py
@@ -10,7 +10,6 @@ import soundfile
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import ColorCodes, Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.logger import get_logger
 from modelscope.utils.test_utils import download_and_untar, test_level
 
@@ -20,8 +19,7 @@ WAV_FILE = 'data/test/audios/asr_example.wav'
 URL_FILE = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example.wav'
 
 
-class WeNetAutomaticSpeechRecognitionTest(unittest.TestCase,
-                                          DemoCompatibilityCheck):
+class WeNetAutomaticSpeechRecognitionTest(unittest.TestCase):
     action_info = {
         'test_run_with_pcm': {
             'checking_item': OutputKeys.TEXT,
diff --git a/tests/pipelines/test_word_segmentation.py b/tests/pipelines/test_word_segmentation.py
index f8bdaef7..f8c9e078 100644
--- a/tests/pipelines/test_word_segmentation.py
+++ b/tests/pipelines/test_word_segmentation.py
@@ -10,12 +10,11 @@ from modelscope.pipelines.nlp import WordSegmentationPipeline
 from modelscope.preprocessors import \
     TokenClassificationTransformersPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool
 from modelscope.utils.test_utils import test_level
 
 
-class WordSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class WordSegmentationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.word_segmentation
@@ -164,10 +163,6 @@ class WordSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
         pipeline_ins = pipeline(task=Tasks.word_segmentation)
         print(pipeline_ins(input=self.sentence))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/pipelines/test_zero_shot_classification.py b/tests/pipelines/test_zero_shot_classification.py
index f9a52b42..89832d18 100644
--- a/tests/pipelines/test_zero_shot_classification.py
+++ b/tests/pipelines/test_zero_shot_classification.py
@@ -9,12 +9,11 @@ from modelscope.pipelines.nlp import ZeroShotClassificationPipeline
 from modelscope.preprocessors import \
     ZeroShotClassificationTransformersPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool
 from modelscope.utils.test_utils import test_level
 
 
-class ZeroShotClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ZeroShotClassificationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.task = Tasks.zero_shot_classification
@@ -79,10 +78,6 @@ class ZeroShotClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
         pipeline_ins = pipeline(task=Tasks.zero_shot_classification)
         print(pipeline_ins(input=self.sentence, candidate_labels=self.labels))
 
-    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
-    def test_demo_compatibility(self):
-        self.compatibility_check()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/run_config.yaml b/tests/run_config.yaml
index 773c6397..ba678468 100644
--- a/tests/run_config.yaml
+++ b/tests/run_config.yaml
@@ -21,6 +21,7 @@ isolated:  # test cases that may require excessive anmount of GPU memory or run
   - test_image_instance_segmentation_trainer.py
   - test_image_portrait_enhancement_trainer.py
   - test_translation_trainer.py
+  - test_translation_evaluation_trainer.py
   - test_unifold.py
   - test_automatic_post_editing.py
   - test_mplug_tasks.py
@@ -66,7 +67,7 @@ isolated:  # test cases that may require excessive anmount of GPU memory or run
 envs:
   default: # default env, case not in other env will in default, pytorch.
     dependencies: # requirement packages，pip install before test case run.
-      - numpy>=1.20
+      - numpy>=1.20,<=1.21.0
       - protobuf<4,>=3.20.2
   tensorflow1x: #  cases excuted  tensorflow1.x framework.
     requirements: # requirements files run before test case run.
@@ -77,6 +78,7 @@ envs:
       - test_text_to_speech.py
       - test_csanmt_translation.py
       - test_translation_trainer.py
+      - test_translation_evaluation_trainer.py
       - test_ocr_detection.py
       - test_automatic_speech_recognition.py
       - test_image_matting.py
@@ -85,3 +87,21 @@ envs:
       - test_image_style_transfer.py
       - test_image_portrait_stylization_trainer.py
       - test_language_identification.py
+      - test_language_guided_video_summarization_trainer.py
+      - test_motion_generation.py
+      - test_universal_matting.py
+      - test_dialog_modeling.py
+      - test_trainer.py
+      - test_abnormal_object_detection.py
+      - test_image_face_fusion.py
+      - test_ocr_detection_db_trainer.py
+      - test_language_guided_video_summarization.py
+      - test_interactive_translation_pipeline.py
+      - test_image_defrcn_fewshot_trainer.py
+      - test_automatic_post_editing.py
+      - test_human_reconstruction.py
+      - test_nerf_recon_acc_trainer.py
+      - test_nerf_recon_acc.py
+      - test_speech_signal_process.py
+      - test_tensorboard_hook.py
+      - test_efficient_diffusion_tuning_trainer.py
diff --git a/tests/trainers/audio/test_kws_farfield_trainer.py b/tests/trainers/audio/test_kws_farfield_trainer.py
index cc2b38f6..9bf65e04 100644
--- a/tests/trainers/audio/test_kws_farfield_trainer.py
+++ b/tests/trainers/audio/test_kws_farfield_trainer.py
@@ -23,6 +23,7 @@ class TestKwsFarfieldTrainer(unittest.TestCase):
         if not os.path.exists(self.tmp_dir):
             os.makedirs(self.tmp_dir)
         self.model_id = 'damo/speech_dfsmn_kws_char_farfield_16k_nihaomiya'
+        self.model_id_iot = 'damo/speech_dfsmn_kws_char_farfield_iot_16k_nihaomiya'
 
         train_pos_list = self.create_list('pos.list', POS_FILE)
         train_neg_list = self.create_list('neg.list', NEG_FILE)
@@ -83,3 +84,23 @@ class TestKwsFarfieldTrainer(unittest.TestCase):
                       f'work_dir:{self.tmp_dir}')
         self.assertIn('val_dataset.bin', results_files,
                       f'work_dir:{self.tmp_dir}')
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_normal_iot(self):
+        kwargs = dict(
+            model=self.model_id_iot,
+            work_dir=self.tmp_dir,
+            workers=2,
+            max_epochs=2,
+            train_iters_per_epoch=2,
+            val_iters_per_epoch=1,
+            custom_conf=self.custom_conf)
+
+        trainer = build_trainer(
+            Trainers.speech_dfsmn_kws_char_farfield, default_args=kwargs)
+        trainer.train()
+        results_files = os.listdir(self.tmp_dir)
+        self.assertIn(f'{trainer.timestamp}.log.json', results_files,
+                      f'work_dir:{self.tmp_dir}')
+        self.assertIn('val_dataset.bin', results_files,
+                      f'work_dir:{self.tmp_dir}')
diff --git a/tests/trainers/easycv/__init__.py b/tests/trainers/cli/__init__.py
similarity index 100%
rename from tests/trainers/easycv/__init__.py
rename to tests/trainers/cli/__init__.py
diff --git a/tests/trainers/cli/test_cli.py b/tests/trainers/cli/test_cli.py
new file mode 100644
index 00000000..b9fb7539
--- /dev/null
+++ b/tests/trainers/cli/test_cli.py
@@ -0,0 +1,52 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+import json
+
+from modelscope import MsDataset, TrainingArgs, build_dataset_from_file
+from modelscope.utils.test_utils import test_level
+
+
+class TestCli(unittest.TestCase):
+
+    def setUp(self) -> None:
+        content = [{
+            'dataset': {
+                'dataset_name': 'clue',
+                'subset_name': 'cmnli',
+                'split': 'train',
+            },
+            'column_mapping': {
+                'sentence1': 'sentence1',
+                'sentence2': 'sentence2',
+                'label': 'label',
+            },
+            'split': 0.8,
+        }, {
+            'dataset': {
+                'dataset_name': 'glue',
+                'subset_name': 'mnli',
+                'split': 'validation_matched',
+            },
+            'column_mapping': {
+                'premise': 'sentence1',
+                'hypothesis': 'sentence2',
+                'label': 'label',
+            },
+            'split': 'val',
+        }]
+        with open('./dataset.json', 'w') as f:
+            json.dump(content, f)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_merge_dataset_from_file(self):
+        dataset = MsDataset.load('clue', subset_name='cmnli', split='train')
+        dataset2 = MsDataset.load(
+            'glue', subset_name='mnli', split='validation_matched')
+        training_args = TrainingArgs(dataset_json_file='./dataset.json')
+        train, test = build_dataset_from_file(training_args.dataset_json_file)
+        self.assertEqual(len(train) + len(test), len(dataset) + len(dataset2))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer.py b/tests/trainers/easycv/test_easycv_trainer.py
deleted file mode 100644
index 11f9a739..00000000
--- a/tests/trainers/easycv/test_easycv_trainer.py
+++ /dev/null
@@ -1,238 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import shutil
-import tempfile
-import unittest
-
-import json
-import torch
-
-from modelscope.metainfo import Models, Pipelines, Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import build_trainer
-from modelscope.utils.config import Config
-from modelscope.utils.constant import LogKeys, ModeKeys, Tasks
-from modelscope.utils.logger import get_logger
-from modelscope.utils.test_utils import DistributedTestCase, test_level
-from modelscope.utils.torch_utils import is_master
-
-
-def train_func(work_dir, dist=False, log_interval=3, imgs_per_gpu=4):
-    import easycv
-    config_path = os.path.join(
-        os.path.dirname(easycv.__file__),
-        'configs/detection/yolox/yolox_s_8xb16_300e_coco.py')
-
-    cfg = Config.from_file(config_path)
-
-    cfg.log_config.update(
-        dict(hooks=[
-            dict(type='TextLoggerHook'),
-            dict(type='TensorboardLoggerHook')
-        ]))  # not support TensorboardLoggerHookV2
-
-    ms_cfg_file = os.path.join(work_dir, 'ms_yolox_s_8xb16_300e_coco.json')
-    from easycv.utils.ms_utils import to_ms_config
-
-    if is_master():
-        to_ms_config(
-            cfg,
-            dump=True,
-            task=Tasks.image_object_detection,
-            ms_model_name=Models.yolox,
-            pipeline_name=Pipelines.easycv_detection,
-            save_path=ms_cfg_file)
-
-    trainer_name = Trainers.easycv
-    train_dataset = MsDataset.load(
-        dataset_name='small_coco_for_test', namespace='EasyCV', split='train')
-    eval_dataset = MsDataset.load(
-        dataset_name='small_coco_for_test',
-        namespace='EasyCV',
-        split='validation')
-
-    cfg_options = {
-        'train.max_epochs':
-        2,
-        'train.dataloader.batch_size_per_gpu':
-        imgs_per_gpu,
-        'evaluation.dataloader.batch_size_per_gpu':
-        2,
-        'train.hooks': [
-            {
-                'type': 'CheckpointHook',
-                'interval': 1
-            },
-            {
-                'type': 'EvaluationHook',
-                'interval': 1
-            },
-            {
-                'type': 'TextLoggerHook',
-                'ignore_rounding_keys': None,
-                'interval': log_interval
-            },
-        ]
-    }
-    kwargs = dict(
-        cfg_file=ms_cfg_file,
-        train_dataset=train_dataset,
-        eval_dataset=eval_dataset,
-        work_dir=work_dir,
-        cfg_options=cfg_options,
-        launcher='pytorch' if dist else None)
-
-    trainer = build_trainer(trainer_name, kwargs)
-    trainer.train()
-
-
-@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
-class EasyCVTrainerTestSingleGpu(unittest.TestCase):
-
-    def setUp(self):
-        self.logger = get_logger()
-        self.logger.info(('Testing %s.%s' %
-                          (type(self).__name__, self._testMethodName)))
-        self.tmp_dir = tempfile.TemporaryDirectory().name
-        if not os.path.exists(self.tmp_dir):
-            os.makedirs(self.tmp_dir)
-
-    def tearDown(self):
-        super().tearDown()
-        shutil.rmtree(self.tmp_dir, ignore_errors=True)
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_single_gpu(self):
-        train_func(self.tmp_dir)
-
-        results_files = os.listdir(self.tmp_dir)
-        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
-        self.assertEqual(len(json_files), 1)
-
-        with open(json_files[0], 'r', encoding='utf-8') as f:
-            lines = [i.strip() for i in f.readlines()]
-
-        self.assertDictContainsSubset(
-            {
-                LogKeys.MODE: ModeKeys.TRAIN,
-                LogKeys.EPOCH: 1,
-                LogKeys.ITER: 3,
-                LogKeys.LR: 0.00029
-            }, json.loads(lines[0]))
-        self.assertDictContainsSubset(
-            {
-                LogKeys.MODE: ModeKeys.EVAL,
-                LogKeys.EPOCH: 1,
-                LogKeys.ITER: 10
-            }, json.loads(lines[1]))
-        self.assertDictContainsSubset(
-            {
-                LogKeys.MODE: ModeKeys.TRAIN,
-                LogKeys.EPOCH: 2,
-                LogKeys.ITER: 3,
-                LogKeys.LR: 0.00205
-            }, json.loads(lines[2]))
-        self.assertDictContainsSubset(
-            {
-                LogKeys.MODE: ModeKeys.EVAL,
-                LogKeys.EPOCH: 2,
-                LogKeys.ITER: 10
-            }, json.loads(lines[3]))
-        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
-        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
-        for i in [0, 2]:
-            self.assertIn(LogKeys.DATA_LOAD_TIME, lines[i])
-            self.assertIn(LogKeys.ITER_TIME, lines[i])
-            self.assertIn(LogKeys.MEMORY, lines[i])
-            self.assertIn('total_loss', lines[i])
-        for i in [1, 3]:
-            self.assertIn(
-                'CocoDetectionEvaluator_DetectionBoxes_Precision/mAP',
-                lines[i])
-            self.assertIn('DetectionBoxes_Precision/mAP', lines[i])
-            self.assertIn('DetectionBoxes_Precision/mAP@.50IOU', lines[i])
-            self.assertIn('DetectionBoxes_Precision/mAP@.75IOU', lines[i])
-            self.assertIn('DetectionBoxes_Precision/mAP (small)', lines[i])
-
-
-@unittest.skipIf(not torch.cuda.is_available()
-                 or torch.cuda.device_count() <= 1, 'distributed unittest')
-class EasyCVTrainerTestMultiGpus(DistributedTestCase):
-
-    def setUp(self):
-        self.logger = get_logger()
-        self.logger.info(('Testing %s.%s' %
-                          (type(self).__name__, self._testMethodName)))
-        self.tmp_dir = tempfile.TemporaryDirectory().name
-        if not os.path.exists(self.tmp_dir):
-            os.makedirs(self.tmp_dir)
-
-    def tearDown(self):
-        super().tearDown()
-        shutil.rmtree(self.tmp_dir, ignore_errors=True)
-
-    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
-    def test_multi_gpus(self):
-        self.start(
-            train_func,
-            num_gpus=2,
-            work_dir=self.tmp_dir,
-            dist=True,
-            log_interval=2,
-            imgs_per_gpu=5)
-
-        results_files = os.listdir(self.tmp_dir)
-        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
-        self.assertEqual(len(json_files), 1)
-
-        with open(json_files[0], 'r', encoding='utf-8') as f:
-            lines = [i.strip() for i in f.readlines()]
-
-        self.assertDictContainsSubset(
-            {
-                LogKeys.MODE: ModeKeys.TRAIN,
-                LogKeys.EPOCH: 1,
-                LogKeys.ITER: 2,
-                LogKeys.LR: 0.0002
-            }, json.loads(lines[0]))
-        self.assertDictContainsSubset(
-            {
-                LogKeys.MODE: ModeKeys.EVAL,
-                LogKeys.EPOCH: 1,
-                LogKeys.ITER: 5
-            }, json.loads(lines[1]))
-        self.assertDictContainsSubset(
-            {
-                LogKeys.MODE: ModeKeys.TRAIN,
-                LogKeys.EPOCH: 2,
-                LogKeys.ITER: 2,
-                LogKeys.LR: 0.0018
-            }, json.loads(lines[2]))
-        self.assertDictContainsSubset(
-            {
-                LogKeys.MODE: ModeKeys.EVAL,
-                LogKeys.EPOCH: 2,
-                LogKeys.ITER: 5
-            }, json.loads(lines[3]))
-
-        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
-        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
-
-        for i in [0, 2]:
-            self.assertIn(LogKeys.DATA_LOAD_TIME, lines[i])
-            self.assertIn(LogKeys.ITER_TIME, lines[i])
-            self.assertIn(LogKeys.MEMORY, lines[i])
-            self.assertIn('total_loss', lines[i])
-        for i in [1, 3]:
-            self.assertIn(
-                'CocoDetectionEvaluator_DetectionBoxes_Precision/mAP',
-                lines[i])
-            self.assertIn('DetectionBoxes_Precision/mAP', lines[i])
-            self.assertIn('DetectionBoxes_Precision/mAP@.50IOU', lines[i])
-            self.assertIn('DetectionBoxes_Precision/mAP@.75IOU', lines[i])
-            self.assertIn('DetectionBoxes_Precision/mAP (small)', lines[i])
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer_detection_dino.py b/tests/trainers/easycv/test_easycv_trainer_detection_dino.py
deleted file mode 100644
index 90d1f691..00000000
--- a/tests/trainers/easycv/test_easycv_trainer_detection_dino.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import shutil
-import tempfile
-import unittest
-
-import torch
-
-from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import build_trainer
-from modelscope.utils.constant import LogKeys
-from modelscope.utils.logger import get_logger
-from modelscope.utils.test_utils import test_level
-
-
-@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
-class EasyCVTrainerTestDetectionDino(unittest.TestCase):
-    model_id = 'damo/cv_swinl_image-object-detection_dino'
-
-    def setUp(self):
-        self.logger = get_logger()
-        self.logger.info(('Testing %s.%s' %
-                          (type(self).__name__, self._testMethodName)))
-
-    def _train(self, tmp_dir):
-        cfg_options = {'train.max_epochs': 1}
-
-        trainer_name = Trainers.easycv
-
-        train_dataset = MsDataset.load(
-            dataset_name='small_coco_for_test',
-            namespace='EasyCV',
-            split='train')
-        eval_dataset = MsDataset.load(
-            dataset_name='small_coco_for_test',
-            namespace='EasyCV',
-            split='validation')
-
-        kwargs = dict(
-            model=self.model_id,
-            train_dataset=train_dataset,
-            eval_dataset=eval_dataset,
-            work_dir=tmp_dir,
-            cfg_options=cfg_options)
-
-        trainer = build_trainer(trainer_name, kwargs)
-        trainer.train()
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_trainer_single_gpu(self):
-        temp_file_dir = tempfile.TemporaryDirectory()
-        tmp_dir = temp_file_dir.name
-        if not os.path.exists(tmp_dir):
-            os.makedirs(tmp_dir)
-
-        self._train(tmp_dir)
-
-        results_files = os.listdir(tmp_dir)
-        json_files = glob.glob(os.path.join(tmp_dir, '*.log.json'))
-        self.assertEqual(len(json_files), 1)
-        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
-
-        temp_file_dir.cleanup()
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py b/tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py
deleted file mode 100644
index e4f0c57e..00000000
--- a/tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import shutil
-import tempfile
-import unittest
-
-import torch
-
-from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import build_trainer
-from modelscope.utils.constant import DownloadMode, LogKeys, Tasks
-from modelscope.utils.logger import get_logger
-from modelscope.utils.test_utils import test_level
-
-
-@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
-class EasyCVTrainerTestFace2DKeypoints(unittest.TestCase):
-    model_id = 'damo/cv_mobilenet_face-2d-keypoints_alignment'
-
-    def setUp(self):
-        self.logger = get_logger()
-        self.logger.info(('Testing %s.%s' %
-                          (type(self).__name__, self._testMethodName)))
-
-    def _train(self, tmp_dir):
-        cfg_options = {'train.max_epochs': 2}
-
-        trainer_name = Trainers.easycv
-
-        train_dataset = MsDataset.load(
-            dataset_name='face_2d_keypoints_dataset',
-            namespace='modelscope',
-            split='train',
-            download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS)
-        eval_dataset = MsDataset.load(
-            dataset_name='face_2d_keypoints_dataset',
-            namespace='modelscope',
-            split='train',
-            download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS)
-
-        kwargs = dict(
-            model=self.model_id,
-            train_dataset=train_dataset,
-            eval_dataset=eval_dataset,
-            work_dir=tmp_dir,
-            cfg_options=cfg_options)
-
-        trainer = build_trainer(trainer_name, kwargs)
-        trainer.train()
-
-    @unittest.skip(
-        'skip since face_2d_keypoints_dataset is set to private for now')
-    def test_trainer_single_gpu(self):
-        temp_file_dir = tempfile.TemporaryDirectory()
-        tmp_dir = temp_file_dir.name
-        if not os.path.exists(tmp_dir):
-            os.makedirs(tmp_dir)
-
-        self._train(tmp_dir)
-
-        results_files = os.listdir(tmp_dir)
-        json_files = glob.glob(os.path.join(tmp_dir, '*.log.json'))
-        self.assertEqual(len(json_files), 1)
-        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
-
-        temp_file_dir.cleanup()
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer_hand_2d_keypoints.py b/tests/trainers/easycv/test_easycv_trainer_hand_2d_keypoints.py
deleted file mode 100644
index 270ecbc4..00000000
--- a/tests/trainers/easycv/test_easycv_trainer_hand_2d_keypoints.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import shutil
-import tempfile
-import unittest
-
-import torch
-
-from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import build_trainer
-from modelscope.utils.constant import DownloadMode, LogKeys, Tasks
-from modelscope.utils.logger import get_logger
-from modelscope.utils.test_utils import test_level
-
-
-@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
-class EasyCVTrainerTestHand2dKeypoints(unittest.TestCase):
-    model_id = 'damo/cv_hrnetw18_hand-pose-keypoints_coco-wholebody'
-
-    def setUp(self):
-        self.logger = get_logger()
-        self.logger.info(('Testing %s.%s' %
-                          (type(self).__name__, self._testMethodName)))
-        self.tmp_dir = tempfile.TemporaryDirectory().name
-        if not os.path.exists(self.tmp_dir):
-            os.makedirs(self.tmp_dir)
-
-    def tearDown(self):
-        super().tearDown()
-        shutil.rmtree(self.tmp_dir, ignore_errors=True)
-
-    def _train(self):
-        cfg_options = {'train.max_epochs': 20}
-
-        trainer_name = Trainers.easycv
-
-        train_dataset = MsDataset.load(
-            dataset_name='cv_hand_2d_keypoints_coco_wholebody',
-            namespace='chenhyer',
-            split='subtrain',
-            download_mode=DownloadMode.FORCE_REDOWNLOAD)
-        eval_dataset = MsDataset.load(
-            dataset_name='cv_hand_2d_keypoints_coco_wholebody',
-            namespace='chenhyer',
-            split='subtrain',
-            download_mode=DownloadMode.FORCE_REDOWNLOAD)
-
-        kwargs = dict(
-            model=self.model_id,
-            train_dataset=train_dataset,
-            eval_dataset=eval_dataset,
-            work_dir=self.tmp_dir,
-            cfg_options=cfg_options)
-
-        trainer = build_trainer(trainer_name, kwargs)
-        trainer.train()
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_trainer_single_gpu(self):
-        self._train()
-
-        results_files = os.listdir(self.tmp_dir)
-        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
-        self.assertEqual(len(json_files), 1)
-        self.assertIn(f'{LogKeys.EPOCH}_10.pth', results_files)
-        self.assertIn(f'{LogKeys.EPOCH}_20.pth', results_files)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer_hand_detection.py b/tests/trainers/easycv/test_easycv_trainer_hand_detection.py
deleted file mode 100644
index 60ea1319..00000000
--- a/tests/trainers/easycv/test_easycv_trainer_hand_detection.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import shutil
-import tempfile
-import unittest
-
-import torch
-
-from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import build_trainer
-from modelscope.utils.constant import DownloadMode, LogKeys, Tasks
-from modelscope.utils.logger import get_logger
-from modelscope.utils.test_utils import test_level
-
-
-class EasyCVTrainerTestHandDetection(unittest.TestCase):
-    model_id = 'damo/cv_yolox-pai_hand-detection'
-
-    def setUp(self):
-        self.logger = get_logger()
-        self.logger.info(('Testing %s.%s' %
-                          (type(self).__name__, self._testMethodName)))
-
-    def _train(self, tmp_dir):
-        cfg_options = {'train.max_epochs': 2}
-
-        trainer_name = Trainers.easycv
-
-        train_dataset = MsDataset.load(
-            dataset_name='hand_detection_dataset', split='subtrain')
-        eval_dataset = MsDataset.load(
-            dataset_name='hand_detection_dataset', split='subtrain')
-
-        kwargs = dict(
-            model=self.model_id,
-            train_dataset=train_dataset,
-            eval_dataset=eval_dataset,
-            work_dir=tmp_dir,
-            cfg_options=cfg_options)
-
-        trainer = build_trainer(trainer_name, kwargs)
-        trainer.train()
-
-    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
-    def test_trainer_single_gpu(self):
-        temp_file_dir = tempfile.TemporaryDirectory()
-        tmp_dir = temp_file_dir.name
-        if not os.path.exists(tmp_dir):
-            os.makedirs(tmp_dir)
-
-        self._train(tmp_dir)
-
-        results_files = os.listdir(tmp_dir)
-        # json_files = glob.glob(os.path.join(tmp_dir, '*.log.json'))
-        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
-
-        temp_file_dir.cleanup()
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer_panoptic_mask2former.py b/tests/trainers/easycv/test_easycv_trainer_panoptic_mask2former.py
deleted file mode 100644
index f6a6c41a..00000000
--- a/tests/trainers/easycv/test_easycv_trainer_panoptic_mask2former.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import shutil
-import tempfile
-import unittest
-
-import torch
-from mmcv.runner.hooks import HOOKS as MMCV_HOOKS
-
-from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import build_trainer
-from modelscope.utils.constant import LogKeys, Tasks
-from modelscope.utils.logger import get_logger
-from modelscope.utils.test_utils import test_level
-
-
-@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
-class EasyCVTrainerTestPanopticMask2Former(unittest.TestCase):
-
-    def setUp(self):
-        self.logger = get_logger()
-        self.logger.info(('Testing %s.%s' %
-                          (type(self).__name__, self._testMethodName)))
-        self.tmp_dir = tempfile.TemporaryDirectory().name
-        if not os.path.exists(self.tmp_dir):
-            os.makedirs(self.tmp_dir)
-
-    def tearDown(self):
-        super().tearDown()
-        shutil.rmtree(self.tmp_dir, ignore_errors=True)
-
-    def _train(self):
-        cfg_options = {'train.max_epochs': 1}
-
-        trainer_name = Trainers.easycv
-
-        train_dataset = MsDataset.load(
-            dataset_name='COCO2017_panopic_subset', split='train')
-        eval_dataset = MsDataset.load(
-            dataset_name='COCO2017_panopic_subset', split='validation')
-        kwargs = dict(
-            model='damo/cv_r50_panoptic-segmentation_cocopan',
-            train_dataset=train_dataset,
-            eval_dataset=eval_dataset,
-            work_dir=self.tmp_dir,
-            cfg_options=cfg_options)
-
-        trainer = build_trainer(trainer_name, kwargs)
-
-        hook_name = 'YOLOXLrUpdaterHook'
-        mmcv_hook = MMCV_HOOKS._module_dict.pop(hook_name, None)
-
-        trainer.train()
-
-        MMCV_HOOKS._module_dict[hook_name] = mmcv_hook
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_single_gpu_mask2former_r50(self):
-        self._train()
-
-        results_files = os.listdir(self.tmp_dir)
-        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
-        self.assertEqual(len(json_files), 1)
-        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer_realtime_object_detection.py b/tests/trainers/easycv/test_easycv_trainer_realtime_object_detection.py
deleted file mode 100644
index 1171eed4..00000000
--- a/tests/trainers/easycv/test_easycv_trainer_realtime_object_detection.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import shutil
-import tempfile
-import unittest
-
-import torch
-
-from modelscope.hub.snapshot_download import snapshot_download
-from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import build_trainer
-from modelscope.utils.constant import DownloadMode, LogKeys, Tasks
-from modelscope.utils.logger import get_logger
-from modelscope.utils.test_utils import test_level
-
-
-@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
-class EasyCVTrainerTestRealtimeObjectDetection(unittest.TestCase):
-    model_id = 'damo/cv_cspnet_image-object-detection_yolox'
-
-    def setUp(self):
-        self.logger = get_logger()
-        self.logger.info(('Testing %s.%s' %
-                          (type(self).__name__, self._testMethodName)))
-
-    def _train(self, tmp_dir):
-        # cfg_options = {'train.max_epochs': 2}
-        self.cache_path = snapshot_download(self.model_id)
-        cfg_options = {
-            'train.max_epochs':
-            2,
-            'train.dataloader.batch_size_per_gpu':
-            4,
-            'evaluation.dataloader.batch_size_per_gpu':
-            2,
-            'train.hooks': [
-                {
-                    'type': 'CheckpointHook',
-                    'interval': 1
-                },
-                {
-                    'type': 'EvaluationHook',
-                    'interval': 1
-                },
-                {
-                    'type': 'TextLoggerHook',
-                    'ignore_rounding_keys': None,
-                    'interval': 2
-                },
-            ],
-            'load_from':
-            os.path.join(self.cache_path, 'pytorch_model.bin')
-        }
-
-        trainer_name = Trainers.easycv
-
-        train_dataset = MsDataset.load(
-            dataset_name='small_coco_for_test',
-            namespace='EasyCV',
-            split='train')
-        eval_dataset = MsDataset.load(
-            dataset_name='small_coco_for_test',
-            namespace='EasyCV',
-            split='validation')
-
-        kwargs = dict(
-            model=self.model_id,
-            # model_revision='v1.0.2',
-            train_dataset=train_dataset,
-            eval_dataset=eval_dataset,
-            work_dir=tmp_dir,
-            cfg_options=cfg_options)
-
-        trainer = build_trainer(trainer_name, kwargs)
-        trainer.train()
-
-    @unittest.skipUnless(
-        test_level() >= 0,
-        'skip since face_2d_keypoints_dataset is set to private for now')
-    def test_trainer_single_gpu(self):
-        temp_file_dir = tempfile.TemporaryDirectory()
-        tmp_dir = temp_file_dir.name
-        if not os.path.exists(tmp_dir):
-            os.makedirs(tmp_dir)
-
-        self._train(tmp_dir)
-
-        results_files = os.listdir(tmp_dir)
-        json_files = glob.glob(os.path.join(tmp_dir, '*.log.json'))
-        self.assertEqual(len(json_files), 1)
-        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
-
-        temp_file_dir.cleanup()
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/trainers/easycv/test_segformer.py b/tests/trainers/easycv/test_segformer.py
deleted file mode 100644
index 90a66635..00000000
--- a/tests/trainers/easycv/test_segformer.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import shutil
-import tempfile
-import unittest
-
-import torch
-
-from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import build_trainer
-from modelscope.utils.constant import LogKeys, Tasks
-from modelscope.utils.logger import get_logger
-from modelscope.utils.test_utils import test_level
-
-
-@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
-class EasyCVTrainerTestSegformer(unittest.TestCase):
-
-    def setUp(self):
-        self.logger = get_logger()
-        self.logger.info(('Testing %s.%s' %
-                          (type(self).__name__, self._testMethodName)))
-        self.tmp_dir = tempfile.TemporaryDirectory().name
-        if not os.path.exists(self.tmp_dir):
-            os.makedirs(self.tmp_dir)
-
-    def tearDown(self):
-        super().tearDown()
-        shutil.rmtree(self.tmp_dir, ignore_errors=True)
-
-    def _train(self):
-
-        cfg_options = {
-            'train.max_epochs': 2,
-            'model.decode_head.norm_cfg.type': 'BN'
-        }
-
-        trainer_name = Trainers.easycv
-        train_dataset = MsDataset.load(
-            dataset_name='small_coco_stuff164k',
-            namespace='EasyCV',
-            split='train')
-        eval_dataset = MsDataset.load(
-            dataset_name='small_coco_stuff164k',
-            namespace='EasyCV',
-            split='validation')
-        kwargs = dict(
-            model=
-            'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k',
-            train_dataset=train_dataset,
-            eval_dataset=eval_dataset,
-            work_dir=self.tmp_dir,
-            cfg_options=cfg_options)
-
-        trainer = build_trainer(trainer_name, kwargs)
-        trainer.train()
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_single_gpu_segformer(self):
-        self._train()
-
-        results_files = os.listdir(self.tmp_dir)
-        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
-        self.assertEqual(len(json_files), 1)
-        self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
-        self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/trainers/hooks/test_lr_scheduler_hook.py b/tests/trainers/hooks/test_lr_scheduler_hook.py
index cd28b055..432fb39a 100644
--- a/tests/trainers/hooks/test_lr_scheduler_hook.py
+++ b/tests/trainers/hooks/test_lr_scheduler_hook.py
@@ -105,6 +105,7 @@ class LrSchedulerHookTest(unittest.TestCase):
         train_dataloader = trainer._build_dataloader_with_dataset(
             trainer.train_dataset, **trainer.cfg.train.get('dataloader', {}))
         trainer.register_optimizers_hook()
+        trainer.register_processors()
         trainer._hooks = [
             hook for hook in trainer._hooks if hook.__class__.__name__ not in
             ['CheckpointHook', 'TextLoggerHook', 'IterTimerHook']
@@ -177,6 +178,7 @@ class LrSchedulerHookTest(unittest.TestCase):
         train_dataloader = trainer._build_dataloader_with_dataset(
             trainer.train_dataset, **trainer.cfg.train.get('dataloader', {}))
         trainer.register_optimizers_hook()
+        trainer.register_processors()
         trainer._hooks = [
             hook for hook in trainer._hooks if hook.__class__.__name__ not in
             ['CheckpointHook', 'TextLoggerHook', 'IterTimerHook']
@@ -365,6 +367,7 @@ class PlateauLrSchedulerHookTest(unittest.TestCase):
         trainer.train_dataloader = train_dataloader
         trainer.data_loader = train_dataloader
         trainer.register_optimizers_hook()
+        trainer.register_processors()
         trainer._hooks = [
             hook for hook in trainer._hooks if hook.__class__.__name__ not in
             ['CheckpointHook', 'TextLoggerHook', 'IterTimerHook']
diff --git a/tests/trainers/hooks/test_optimizer_hook.py b/tests/trainers/hooks/test_optimizer_hook.py
index b9899c36..ed0e202a 100644
--- a/tests/trainers/hooks/test_optimizer_hook.py
+++ b/tests/trainers/hooks/test_optimizer_hook.py
@@ -150,6 +150,7 @@ class TorchAMPOptimizerHookTest(unittest.TestCase):
         train_dataloader = trainer._build_dataloader_with_dataset(
             trainer.train_dataset, **trainer.cfg.train.get('dataloader', {}))
         trainer.register_optimizers_hook()
+        trainer.register_processors()
         trainer._hooks = [
             hook for hook in trainer._hooks if hook.__class__.__name__ not in
             ['CheckpointHook', 'TextLoggerHook', 'IterTimerHook']
diff --git a/tests/trainers/model_trainer_map.py b/tests/trainers/model_trainer_map.py
index 4057c331..4e9005f7 100644
--- a/tests/trainers/model_trainer_map.py
+++ b/tests/trainers/model_trainer_map.py
@@ -11,33 +11,18 @@ model_trainer_map = {
     ['tests/trainers/audio/test_separation_trainer.py'],
     'speech_tts/speech_sambert-hifigan_tts_zh-cn_multisp_pretrain_16k':
     ['tests/trainers/audio/test_tts_trainer.py'],
-    'damo/cv_mobilenet_face-2d-keypoints_alignment':
-    ['tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py'],
-    'damo/cv_hrnetw18_hand-pose-keypoints_coco-wholebody':
-    ['tests/trainers/easycv/test_easycv_trainer_hand_2d_keypoints.py'],
-    'damo/cv_yolox-pai_hand-detection':
-    ['tests/trainers/easycv/test_easycv_trainer_hand_detection.py'],
-    'damo/cv_r50_panoptic-segmentation_cocopan':
-    ['tests/trainers/easycv/test_easycv_trainer_panoptic_mask2former.py'],
-    'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k':
-    ['tests/trainers/easycv/test_segformer.py'],
     'damo/cv_resnet_carddetection_scrfd34gkps':
     ['tests/trainers/test_card_detection_scrfd_trainer.py'],
-    'damo/multi-modal_clip-vit-base-patch16_zh': [
-        'tests/trainers/test_clip_trainer.py'
-    ],
-    'damo/nlp_space_pretrained-dialog-model': [
-        'tests/trainers/test_dialog_intent_trainer.py'
-    ],
-    'damo/cv_resnet_facedetection_scrfd10gkps': [
-        'tests/trainers/test_face_detection_scrfd_trainer.py'
-    ],
-    'damo/nlp_structbert_faq-question-answering_chinese-base': [
-        'tests/trainers/test_finetune_faq_question_answering.py'
-    ],
-    'PAI/nlp_gpt3_text-generation_0.35B_MoE-64': [
-        'tests/trainers/test_finetune_gpt_moe.py'
-    ],
+    'damo/multi-modal_clip-vit-base-patch16_zh':
+    ['tests/trainers/test_clip_trainer.py'],
+    'damo/nlp_space_pretrained-dialog-model':
+    ['tests/trainers/test_dialog_intent_trainer.py'],
+    'damo/cv_resnet_facedetection_scrfd10gkps':
+    ['tests/trainers/test_face_detection_scrfd_trainer.py'],
+    'damo/nlp_structbert_faq-question-answering_chinese-base':
+    ['tests/trainers/test_finetune_faq_question_answering.py'],
+    'PAI/nlp_gpt3_text-generation_0.35B_MoE-64':
+    ['tests/trainers/test_finetune_gpt_moe.py'],
     'damo/nlp_gpt3_text-generation_1.3B': [
         'tests/trainers/test_finetune_gpt3.py'
     ],
@@ -139,6 +124,12 @@ model_trainer_map = {
     'damo/nlp_csanmt_translation_en2es': [
         'tests/trainers/test_translation_trainer.py'
     ],
+    'damo/nlp_unite_mup_translation_evaluation_multilingual_base': [
+        'tests/trainers/test_translation_evaluation_trainer.py'
+    ],
+    'damo/nlp_unite_mup_translation_evaluation_multilingual_large': [
+        'tests/trainers/test_translation_evaluation_trainer.py'
+    ],
     'damo/cv_googlenet_pgl-video-summarization': [
         'tests/trainers/test_video_summarization_trainer.py'
     ],
diff --git a/tests/trainers/test_trainer_with_nlp.py b/tests/trainers/test_trainer_with_nlp.py
index ceb04e15..a736d4fa 100644
--- a/tests/trainers/test_trainer_with_nlp.py
+++ b/tests/trainers/test_trainer_with_nlp.py
@@ -9,6 +9,7 @@ import unittest
 import numpy as np
 import torch
 from packaging import version
+from torch.utils.data import RandomSampler
 
 from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.metainfo import Metrics
@@ -204,12 +205,20 @@ class TestTrainerWithNlp(unittest.TestCase):
         cfg.preprocessor.val['label2id'] = {'0': 0, '1': 1}
         cfg.train.dataloader.batch_size_per_gpu = 2
         cfg.train.hooks = [{
-            'type': 'BestCkptSaverHook',
-            'interval': 1,
-            'by_epoch': False,
-            'metric_key': 'accuracy',
-            'max_checkpoint_num': 4,
-            'restore_best': True,
+            'type':
+            'BestCkptSaverHook',
+            'interval':
+            1,
+            'by_epoch':
+            False,
+            'output_dir':
+            os.path.join(self.tmp_dir, 'output_test_best'),
+            'metric_key':
+            'accuracy',
+            'max_checkpoint_num':
+            4,
+            'restore_best':
+            True,
         }, {
             'type': 'TextLoggerHook',
             'interval': 1
@@ -270,7 +279,7 @@ class TestTrainerWithNlp(unittest.TestCase):
                 os.path.join(self.tmp_dir, 'output', 'pytorch_model.bin')))
         self.assertTrue(
             os.path.isfile(
-                os.path.join(self.tmp_dir, 'output_best',
+                os.path.join(self.tmp_dir, 'output_test_best',
                              'pytorch_model.bin')))
         md51 = hashlib.md5(
             pathlib.Path(
@@ -282,7 +291,7 @@ class TestTrainerWithNlp(unittest.TestCase):
         self.assertEqual(md51, md52)
         md51 = hashlib.md5(
             pathlib.Path(
-                os.path.join(self.tmp_dir, 'output_best',
+                os.path.join(self.tmp_dir, 'output_test_best',
                              'pytorch_model.bin')).read_bytes()).hexdigest()
         md52 = hashlib.md5(
             pathlib.Path(
@@ -472,6 +481,34 @@ class TestTrainerWithNlp(unittest.TestCase):
                 cache_path + '/pytorch_model.bin', saving_fn=saving_fn))
         self.assertTrue(os.path.isfile(f'{tmp_dir}/predicts.txt'))
 
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_trainer_with_custom_sampler(self):
+        tmp_dir = tempfile.TemporaryDirectory().name
+        if not os.path.exists(tmp_dir):
+            os.makedirs(tmp_dir)
+
+        model_id = 'damo/nlp_structbert_sentence-similarity_chinese-tiny'
+        cache_path = snapshot_download(model_id)
+        model = SbertForSequenceClassification.from_pretrained(cache_path)
+
+        class CustomSampler(RandomSampler):
+
+            pass
+
+        kwargs = dict(
+            cfg_file=os.path.join(cache_path, ModelFile.CONFIGURATION),
+            model=model,
+            train_dataset=self.dataset,
+            eval_dataset=self.dataset,
+            samplers=CustomSampler(self.dataset),
+            work_dir=self.tmp_dir)
+
+        trainer = build_trainer(default_args=kwargs)
+        trainer.train()
+        self.assertTrue(
+            type(trainer.train_dataloader.sampler) == CustomSampler)
+        self.assertTrue(type(trainer.eval_dataloader.sampler) == CustomSampler)
+
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_trainer_with_prediction(self):
         tmp_dir = tempfile.TemporaryDirectory().name
diff --git a/tests/trainers/test_training_args.py b/tests/trainers/test_training_args.py
index 6e4d306e..e8f6d8a2 100644
--- a/tests/trainers/test_training_args.py
+++ b/tests/trainers/test_training_args.py
@@ -1,8 +1,8 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import unittest
 
-from modelscope.trainers.default_config import DEFAULT_CONFIG
-from modelscope.trainers.training_args import CliArgumentParser, TrainingArgs
+from modelscope import TrainingArgs
+from modelscope.trainers.cli_argument_parser import CliArgumentParser
 from modelscope.utils.test_utils import test_level
 
 
@@ -29,14 +29,14 @@ class TrainingArgsTest(unittest.TestCase):
 
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_flatten_args(self):
-        cfg = DEFAULT_CONFIG
+        training_args = TrainingArgs()
         input_args = [
             '--optimizer_params',
             'weight_decay=0.8,eps=1e-6,correct_bias=False',
             '--lr_scheduler_params', 'initial_lr=3e-5,niter_decay=1'
         ]
-        training_args = TrainingArgs.from_cli(input_args)
-        cfg = training_args(cfg)
+        training_args = training_args.parse_cli(input_args)
+        cfg, _ = training_args.to_config()
         self.assertAlmostEqual(cfg.train.optimizer.weight_decay, 0.8)
         self.assertAlmostEqual(cfg.train.optimizer.eps, 1e-6)
         self.assertFalse(cfg.train.optimizer.correct_bias)
diff --git a/tests/trainers/test_translation_evaluation_trainer.py b/tests/trainers/test_translation_evaluation_trainer.py
new file mode 100644
index 00000000..139427da
--- /dev/null
+++ b/tests/trainers/test_translation_evaluation_trainer.py
@@ -0,0 +1,30 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+from modelscope.metainfo import Trainers
+from modelscope.trainers import build_trainer
+from modelscope.utils.test_utils import test_level
+
+
+class TranslationEvaluationTest(unittest.TestCase):
+
+    def setUp(self) -> None:
+        self.name = Trainers.translation_evaluation_trainer
+        self.model_id_large = 'damo/nlp_unite_mup_translation_evaluation_multilingual_large'
+        self.model_id_base = 'damo/nlp_unite_mup_translation_evaluation_multilingual_base'
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_unite_mup_large(self) -> None:
+        default_args = {'model': self.model_id_large}
+        trainer = build_trainer(name=self.name, default_args=default_args)
+        trainer.train()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_run_with_unite_mup_base(self) -> None:
+        default_args = {'model': self.model_id_base}
+        trainer = build_trainer(name=self.name, default_args=default_args)
+        trainer.train()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/utils/test_input_output.py b/tests/utils/test_input_output.py
new file mode 100644
index 00000000..53b75a39
--- /dev/null
+++ b/tests/utils/test_input_output.py
@@ -0,0 +1,142 @@
+import base64
+import unittest
+
+import json
+
+from modelscope.utils.constant import Tasks
+from modelscope.utils.input_output import (
+    PipelineInfomation, service_base64_input_to_pipeline_input)
+
+
+def encode_image_to_base64(image):
+    base64_str = str(base64.b64encode(image), 'utf-8')
+    return base64_str
+
+
+class PipelineInputOutputTest(unittest.TestCase):
+
+    def test_template_pipeline_dict_input(self):
+        pipeline_info = PipelineInfomation(
+            Tasks.task_template, 'PipelineTemplate',
+            'modelscope/pipelines/pipeline_template.py')
+        schema = pipeline_info.schema
+        expect_schema = {
+            'input': {
+                'type': 'object',
+                'properties': {
+                    'image': {
+                        'type': 'string',
+                        'description':
+                        'Base64 encoded image file or url string.'
+                    },
+                    'text': {
+                        'type': 'string',
+                        'description': 'The input text.'
+                    }
+                }
+            },
+            'parameters': {
+                'type': 'object',
+                'properties': {
+                    'max_length': {
+                        'type': 'integer',
+                        'default': 1024
+                    },
+                    'top_p': {
+                        'type': 'number',
+                        'default': 0.8
+                    },
+                    'postprocess_param1': {
+                        'type': 'string',
+                        'default': None
+                    }
+                }
+            },
+            'output': {
+                'type': 'object',
+                'properties': {
+                    'boxes': {
+                        'type': 'array',
+                        'items': {
+                            'type': 'number'
+                        }
+                    },
+                    'output_img': {
+                        'type': 'string',
+                        'description': 'The base64 encoded image.'
+                    },
+                    'text_embedding': {
+                        'type': 'array',
+                        'items': {
+                            'type': 'number'
+                        }
+                    }
+                }
+            }
+        }
+        assert expect_schema == schema
+
+    def test_template_pipeline_list_input(self):
+        pipeline_info = PipelineInfomation(
+            Tasks.text_classification, 'LanguageIdentificationPipeline',
+            'modelscope/pipelines/nlp/language_identification_pipline.py')
+        schema = pipeline_info.schema
+        expect_schema = {
+            'input': {
+                'type': 'object',
+                'properties': {
+                    'text': {
+                        'type': 'string',
+                        'description': 'The input text.'
+                    },
+                    'text2': {
+                        'type': 'string',
+                        'description': 'The input text.'
+                    }
+                }
+            },
+            'parameters': {},
+            'output': {
+                'type': 'object',
+                'properties': {
+                    'scores': {
+                        'type': 'array',
+                        'items': {
+                            'type': 'number'
+                        }
+                    },
+                    'labels': {
+                        'type': 'array',
+                        'items': {
+                            'type': 'string'
+                        }
+                    }
+                }
+            }
+        }
+        assert expect_schema == schema
+
+    def test_input_output_encode_decode(self):
+        with open('data/test/images/image_captioning.png', 'rb') as f:
+            image = f.read()
+        text = 'hello schema.'
+        request_json = {
+            'input': {
+                'image': encode_image_to_base64(image),
+                'text': text
+            },
+            'parameters': {
+                'max_length': 10000,
+                'top_p': 0.8
+            }
+        }
+        pipeline_inputs, parameters = service_base64_input_to_pipeline_input(
+            Tasks.task_template, request_json)
+        assert 'image' in pipeline_inputs
+        assert pipeline_inputs['text'] == text
+        assert parameters['max_length'] == 10000
+        assert parameters['top_p'] == 0.8
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/convert_megatron_ckpt.py b/tools/convert_megatron_ckpt.py
new file mode 100644
index 00000000..f9b8f8f3
--- /dev/null
+++ b/tools/convert_megatron_ckpt.py
@@ -0,0 +1,31 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import argparse
+import os
+
+from modelscope.models import Model
+from modelscope.utils.megatron_utils import convert_megatron_checkpoint
+
+
+def unwrap_model(model):
+    for name in ('model', 'module', 'dist_model'):
+        while hasattr(model, name):
+            model = getattr(model, name)
+    return model
+
+
+parser = argparse.ArgumentParser(
+    description='Split or merge your megatron_based checkpoint.')
+parser.add_argument(
+    '--model_dir', type=str, required=True, help='Checkpoint to be converted.')
+parser.add_argument(
+    '--target_dir', type=str, required=True, help='Target save path.')
+args = parser.parse_args()
+
+model = Model.from_pretrained(
+    args.model_dir,
+    rank=int(os.getenv('RANK')),
+    megatron_cfg={'tensor_model_parallel_size': int(os.getenv('WORLD_SIZE'))})
+unwrapped_model = unwrap_model(model)
+
+convert_megatron_checkpoint(unwrapped_model, model.model_dir, args.target_dir)
diff --git a/tools/convert_megatron_ckpt.sh b/tools/convert_megatron_ckpt.sh
new file mode 100644
index 00000000..86e94877
--- /dev/null
+++ b/tools/convert_megatron_ckpt.sh
@@ -0,0 +1,7 @@
+TARGET_TENSOR_MODEL_PARALLEL_SIZE=1
+ORIGIN_MODEL='damo/nlp_gpt3_text-generation_1.3B'
+TARGET_DIR='./target'
+
+torchrun --nproc_per_node $TARGET_TENSOR_MODEL_PARALLEL_SIZE tools/convert_megatron_ckpt.py \
+    --model_dir $ORIGIN_MODEL \
+    --target_dir $TARGET_DIR \