diff --git a/README.md b/README.md
index 68f03744..c9b071ab 100644
--- a/README.md
+++ b/README.md
@@ -108,9 +108,9 @@ Audio:
* [speech_charctc_kws_phone-xiaoyun](https://modelscope.cn/models/damo/speech_charctc_kws_phone-xiaoyun)
* [u2pp_conformer-asr-cn-16k-online](https://modelscope.cn/models/wenet/u2pp_conformer-asr-cn-16k-online)
-
+
* [speech_fsmn_vad_zh-cn-16k-common-pytorch](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary)
-
+
* [punc_ct-transformer_zh-cn-common-vocab272727-pytorch](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/summary)
* [speech_frcrn_ans_cirm_16k](https://modelscope.cn/models/damo/speech_frcrn_ans_cirm_16k)
diff --git a/examples/pytorch/image_classification/finetune_image_classification.py b/examples/pytorch/image_classification/finetune_image_classification.py
index 4e96c2cd..e5bb9bdd 100644
--- a/examples/pytorch/image_classification/finetune_image_classification.py
+++ b/examples/pytorch/image_classification/finetune_image_classification.py
@@ -1,13 +1,12 @@
import os
from dataclasses import dataclass, field
+from modelscope import MsDataset, TrainingArgs
from modelscope.metainfo import Trainers
-from modelscope.msdatasets.ms_dataset import MsDataset
from modelscope.trainers.builder import build_trainer
-from modelscope.trainers.training_args import TrainingArgs
-@dataclass
+@dataclass(init=False)
class ImageClassificationTrainingArgs(TrainingArgs):
num_classes: int = field(
default=None,
@@ -46,26 +45,35 @@ def create_dataset(name, split):
dataset_name, namespace=namespace, subset_name='default', split=split)
-def train():
- args = ImageClassificationTrainingArgs.from_cli(
- model='damo/cv_vit-base_image-classification_ImageNet-labels',
- max_epochs=1,
- lr=1e-4,
- optimizer='AdamW',
- warmup_iters=1,
- topk=(1, ))
- if args.dataset_name is not None:
- train_dataset = create_dataset(args.dataset_name, split='train')
- val_dataset = create_dataset(args.dataset_name, split='validation')
+training_args = ImageClassificationTrainingArgs(
+ model='damo/cv_vit-base_image-classification_ImageNet-labels',
+ max_epochs=1,
+ lr=1e-4,
+ optimizer='AdamW',
+ warmup_iters=1,
+ topk=(1, )).parse_cli()
+config, args = training_args.to_config()
+
+
+def cfg_modify_fn(cfg):
+ if args.use_model_config:
+ cfg.merge_from_dict(config)
else:
- train_dataset = create_dataset(args.train_dataset_name, split='train')
- val_dataset = create_dataset(args.val_dataset_name, split='validation')
+ cfg = config
+ return cfg
+
+
+def train():
+ train_dataset = create_dataset(
+ training_args.train_dataset_name, split=training_args.train_split)
+ val_dataset = create_dataset(
+ training_args.val_dataset_name, split=training_args.val_split)
kwargs = dict(
model=args.model, # model id
train_dataset=train_dataset, # training dataset
eval_dataset=val_dataset, # validation dataset
- cfg_modify_fn=args # callback to modify configuration
+ cfg_modify_fn=cfg_modify_fn # callback to modify configuration
)
# in distributed training, specify pytorch launcher
diff --git a/examples/pytorch/image_classification/run_train.sh b/examples/pytorch/image_classification/run_train.sh
index 5a7b3a09..ad560424 100644
--- a/examples/pytorch/image_classification/run_train.sh
+++ b/examples/pytorch/image_classification/run_train.sh
@@ -2,4 +2,7 @@ PYTHONPATH=. python -m torch.distributed.launch --nproc_per_node=2 \
examples/pytorch/image_classification/finetune_image_classification.py \
--num_classes 2 \
--train_dataset_name 'tany0699/cats_and_dogs' \
- --val_dataset_name 'tany0699/cats_and_dogs'
+ --val_dataset_name 'tany0699/cats_and_dogs' \
+ --train_split train \
+ --val_split validation \
+ --use_model_config true \
diff --git a/examples/pytorch/multi_modal_embedding/finetune_multi_modal_embedding.py b/examples/pytorch/multi_modal_embedding/finetune_multi_modal_embedding.py
index cc7da842..7b4cfbb8 100644
--- a/examples/pytorch/multi_modal_embedding/finetune_multi_modal_embedding.py
+++ b/examples/pytorch/multi_modal_embedding/finetune_multi_modal_embedding.py
@@ -1,15 +1,13 @@
import os
from dataclasses import dataclass, field
-from functools import partial
+from modelscope import MsDataset, TrainingArgs
from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
from modelscope.trainers import build_trainer
-from modelscope.trainers.training_args import (TrainingArgs, get_flatten_value,
- set_flatten_value)
+from modelscope.trainers.training_args import set_flatten_value
-@dataclass
+@dataclass(init=False)
class MultiModalEmbeddingArguments(TrainingArgs):
trainer: str = field(
@@ -17,6 +15,12 @@ class MultiModalEmbeddingArguments(TrainingArgs):
'help': 'The trainer used',
})
+ work_dir: str = field(
+ default='./tmp',
+ metadata={
+ 'help': 'The working path for saving checkpoint',
+ })
+
use_fp16: bool = field(
default=None,
metadata={
@@ -35,7 +39,6 @@ class MultiModalEmbeddingArguments(TrainingArgs):
default=None,
metadata={
'cfg_node': 'train.optimizer_hparams',
- 'cfg_getter': partial(get_flatten_value, exclusions=['lr']),
'cfg_setter': set_flatten_value,
'help': 'The optimizer init params except `lr`',
})
@@ -51,7 +54,6 @@ class MultiModalEmbeddingArguments(TrainingArgs):
default=None,
metadata={
'cfg_node': 'dataset.column_map',
- 'cfg_getter': get_flatten_value,
'cfg_setter': set_flatten_value,
'help': 'The column map for dataset',
})
@@ -67,7 +69,6 @@ class MultiModalEmbeddingArguments(TrainingArgs):
default=None,
metadata={
'cfg_node': 'train.lr_scheduler_hook',
- 'cfg_getter': get_flatten_value,
'cfg_setter': set_flatten_value,
'help': 'The parameters for lr scheduler hook',
})
@@ -76,7 +77,6 @@ class MultiModalEmbeddingArguments(TrainingArgs):
default=None,
metadata={
'cfg_node': 'train.optimizer_hook',
- 'cfg_getter': get_flatten_value,
'cfg_setter': set_flatten_value,
'help': 'The parameters for optimizer hook',
})
@@ -92,23 +92,28 @@ class MultiModalEmbeddingArguments(TrainingArgs):
'help': 'The data parallel world size',
})
- def __call__(self, config):
- config = super().__call__(config)
- config.merge_from_dict({'pretrained_model.model_name': self.model})
- if self.clip_clamp:
- config.train.hooks.append({'type': 'ClipClampLogitScaleHook'})
- if self.world_size > 1:
- config.train.launcher = 'pytorch'
- return config
+
+config, args = MultiModalEmbeddingArguments().parse_cli().to_config()
+print(config, args)
-args = MultiModalEmbeddingArguments.from_cli(task='multi-modal-embedding')
-print(args)
+def cfg_modify_fn(cfg):
+ if args.use_model_config:
+ cfg.merge_from_dict(config)
+ else:
+ cfg = config
+ cfg.merge_from_dict({'pretrained_model.model_name': args.model})
+ if args.clip_clamp:
+ cfg.train.hooks.append({'type': 'ClipClampLogitScaleHook'})
+ if args.world_size > 1:
+ cfg.train.launcher = 'pytorch'
+ return cfg
+
train_dataset = MsDataset.load(
- args.dataset_name, namespace='modelscope', split='train')
+ args.train_dataset_name, namespace='modelscope', split='train')
eval_dataset = MsDataset.load(
- args.dataset_name, namespace='modelscope', split='validation')
+ args.train_dataset_name, namespace='modelscope', split='validation')
os.makedirs(args.work_dir, exist_ok=True)
kwargs = dict(
@@ -116,6 +121,6 @@ kwargs = dict(
train_dataset=train_dataset,
eval_dataset=eval_dataset,
work_dir=args.work_dir,
- cfg_modify_fn=args)
+ cfg_modify_fn=cfg_modify_fn)
trainer = build_trainer(name=args.trainer, default_args=kwargs)
trainer.train()
diff --git a/examples/pytorch/multi_modal_embedding/run_train.sh b/examples/pytorch/multi_modal_embedding/run_train.sh
index 89eef73e..3974405b 100644
--- a/examples/pytorch/multi_modal_embedding/run_train.sh
+++ b/examples/pytorch/multi_modal_embedding/run_train.sh
@@ -6,14 +6,16 @@ PYTHONPATH=. torchrun --nproc_per_node $DATA_PARALLEL_SIZE \
--trainer 'clip-multi-modal-embedding' \
--work_dir './workspace/ckpts/clip' \
--model 'damo/multi-modal_clip-vit-base-patch16_zh' \
- --dataset_name 'muge' \
+ --train_dataset_name 'muge' \
--dataset_column_map 'img=image,text=query' \
--max_epochs 1 \
--use_fp16 true \
--per_device_train_batch_size 180 \
+ --train_data_worker 0 \
--train_shuffle true \
--train_drop_last true \
--per_device_eval_batch_size 128 \
+ --eval_data_worker 0 \
--eval_shuffle true \
--eval_drop_last true \
--save_ckpt_best true \
@@ -33,3 +35,4 @@ PYTHONPATH=. torchrun --nproc_per_node $DATA_PARALLEL_SIZE \
--optimizer_hook 'type=TorchAMPOptimizerHook,cumulative_iters=1,loss_keys=loss' \
--clip_clamp true \
--world_size $DATA_PARALLEL_SIZE \
+ --use_model_config true \
diff --git a/examples/pytorch/stable_diffusion/finetune_stable_diffusion.py b/examples/pytorch/stable_diffusion/finetune_stable_diffusion.py
index bd05097d..28ba853c 100644
--- a/examples/pytorch/stable_diffusion/finetune_stable_diffusion.py
+++ b/examples/pytorch/stable_diffusion/finetune_stable_diffusion.py
@@ -4,30 +4,32 @@ from modelscope.msdatasets import MsDataset
from modelscope.trainers import EpochBasedTrainer, build_trainer
from modelscope.trainers.training_args import TrainingArgs
-
-@dataclass
-class StableDiffusionArguments(TrainingArgs):
-
- def __call__(self, config):
- config = super().__call__(config)
- config.train.lr_scheduler.T_max = self.max_epochs
- config.model.inference = False
- return config
-
-
-args = StableDiffusionArguments.from_cli(task='efficient-diffusion-tuning')
+training_args = TrainingArgs(task='efficient-diffusion-tuning').parse_cli()
+config, args = training_args.to_config()
print(args)
-dataset = MsDataset.load(args.dataset_name, namespace=args.namespace)
+dataset = MsDataset.load(
+ args.train_dataset_name, namespace=args.train_dataset_namespace)
train_dataset = dataset['train']
validation_dataset = dataset['validation']
+
+def cfg_modify_fn(cfg):
+ if args.use_model_config:
+ cfg.merge_from_dict(config)
+ else:
+ cfg = config
+ cfg.train.lr_scheduler.T_max = training_args.max_epochs
+ cfg.model.inference = False
+ return cfg
+
+
kwargs = dict(
- model=args.model,
- work_dir=args.work_dir,
+ model=training_args.model,
+ work_dir=training_args.work_dir,
train_dataset=train_dataset,
eval_dataset=validation_dataset,
- cfg_modify_fn=args)
+ cfg_modify_fn=cfg_modify_fn)
trainer: EpochBasedTrainer = build_trainer(name='trainer', default_args=kwargs)
trainer.train()
diff --git a/examples/pytorch/stable_diffusion/run_train.sh b/examples/pytorch/stable_diffusion/run_train.sh
index c8bfa26c..0e551942 100644
--- a/examples/pytorch/stable_diffusion/run_train.sh
+++ b/examples/pytorch/stable_diffusion/run_train.sh
@@ -1,11 +1,12 @@
PYTHONPATH=. torchrun examples/pytorch/stable_diffusion/finetune_stable_diffusion.py \
--model 'damo/multi-modal_efficient-diffusion-tuning-lora' \
--work_dir './tmp/stable_diffusion_tuning' \
- --namespace 'damo' \
- --dataset_name 'buptwq/lora-stable-diffusion-finetune-dog' \
- --max_epochs 150 \
+ --train_dataset_namespace 'damo' \
+ --train_dataset_name 'controlnet_dataset_condition_fill50k' \
+ --max_epochs 1 \
--save_ckpt_strategy 'by_epoch' \
--logging_interval 100 \
--train.dataloader.workers_per_gpu 0 \
--evaluation.dataloader.workers_per_gpu 0 \
- --train.optimizer.lr 1e-4
+ --train.optimizer.lr 1e-5 \
+ --use_model_config true
diff --git a/examples/pytorch/text_classification/finetune_text_classification.py b/examples/pytorch/text_classification/finetune_text_classification.py
index 7747bc25..dfcb7b4d 100644
--- a/examples/pytorch/text_classification/finetune_text_classification.py
+++ b/examples/pytorch/text_classification/finetune_text_classification.py
@@ -1,26 +1,18 @@
import os
from dataclasses import dataclass, field
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import EpochBasedTrainer, build_trainer
-from modelscope.trainers.training_args import TrainingArgs
+from modelscope import (EpochBasedTrainer, MsDataset, TrainingArgs,
+ build_dataset_from_file)
+from modelscope.trainers import build_trainer
-def get_labels(cfg, metadata):
- label2id = cfg.safe_get(metadata['cfg_node'])
- if label2id is not None:
- return ','.join(label2id.keys())
-
-
-def set_labels(cfg, labels, metadata):
+def set_labels(labels):
if isinstance(labels, str):
labels = labels.split(',')
- cfg.merge_from_dict(
- {metadata['cfg_node']: {label: id
- for id, label in enumerate(labels)}})
+ return {label: id for id, label in enumerate(labels)}
-@dataclass
+@dataclass(init=False)
class TextClassificationArguments(TrainingArgs):
first_sequence: str = field(
@@ -49,7 +41,6 @@ class TextClassificationArguments(TrainingArgs):
metadata={
'help': 'The labels of the dataset',
'cfg_node': 'preprocessor.label2id',
- 'cfg_getter': get_labels,
'cfg_setter': set_labels,
})
@@ -60,30 +51,39 @@ class TextClassificationArguments(TrainingArgs):
'cfg_node': 'preprocessor.type'
})
- def __call__(self, config):
- config = super().__call__(config)
- config.model['num_labels'] = len(self.labels)
- if config.train.lr_scheduler.type == 'LinearLR':
- config.train.lr_scheduler['total_iters'] = \
- int(len(train_dataset) / self.per_device_train_batch_size) * self.max_epochs
- return config
+
+config, args = TextClassificationArguments().parse_cli().to_config()
+
+print(config, args)
-args = TextClassificationArguments.from_cli(
- task='text-classification', eval_metrics='seq-cls-metric')
+def cfg_modify_fn(cfg):
+ if args.use_model_config:
+ cfg.merge_from_dict(config)
+ else:
+ cfg = config
+ cfg.model['num_labels'] = len(cfg.preprocessor.label2id)
+ if cfg.train.lr_scheduler.type == 'LinearLR':
+ cfg.train.lr_scheduler['total_iters'] = \
+ int(len(train_dataset) / cfg.train.dataloader.batch_size_per_gpu) * cfg.train.max_epochs
+ return cfg
-print(args)
-dataset = MsDataset.load(args.dataset_name, subset_name=args.subset_name)
-train_dataset = dataset['train']
-validation_dataset = dataset['validation']
+if args.dataset_json_file is None:
+ dataset = MsDataset.load(
+ args.train_dataset_name, subset_name=args.train_subset_name)
+ train_dataset = dataset['train']
+ validation_dataset = dataset['validation']
+else:
+ train_dataset, validation_dataset = build_dataset_from_file(
+ args.dataset_json_file)
kwargs = dict(
model=args.model,
train_dataset=train_dataset,
eval_dataset=validation_dataset,
seed=args.seed,
- cfg_modify_fn=args)
+ cfg_modify_fn=cfg_modify_fn)
os.environ['LOCAL_RANK'] = str(args.local_rank)
trainer: EpochBasedTrainer = build_trainer(name='trainer', default_args=kwargs)
diff --git a/examples/pytorch/text_classification/run_train.sh b/examples/pytorch/text_classification/run_train.sh
index 93c23d0d..e91a9996 100644
--- a/examples/pytorch/text_classification/run_train.sh
+++ b/examples/pytorch/text_classification/run_train.sh
@@ -1,12 +1,16 @@
PYTHONPATH=. python examples/pytorch/text_classification/finetune_text_classification.py \
+ --task 'text-classification' \
--model 'damo/nlp_structbert_backbone_base_std' \
- --dataset_name 'clue' \
- --subset_name 'tnews' \
+ --train_dataset_name 'clue' \
+ --train_subset_name 'tnews' \
--first_sequence 'sentence' \
--preprocessor.label label \
--model.num_labels 15 \
--labels '0,1,2,3,4,5,6,7,8,9,10,11,12,13,14' \
--preprocessor 'sen-cls-tokenizer' \
+ --use_model_config True \
+ --max_epochs 1 \
--train.dataloader.workers_per_gpu 0 \
--evaluation.dataloader.workers_per_gpu 0 \
--train.optimizer.lr 1e-5 \
+ --eval_metrics 'seq-cls-metric' \
diff --git a/examples/pytorch/text_generation/finetune_text_generation.py b/examples/pytorch/text_generation/finetune_text_generation.py
index 7a140a0c..a89970e8 100644
--- a/examples/pytorch/text_generation/finetune_text_generation.py
+++ b/examples/pytorch/text_generation/finetune_text_generation.py
@@ -1,12 +1,11 @@
from dataclasses import dataclass, field
+from modelscope import EpochBasedTrainer, MsDataset, TrainingArgs
from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import EpochBasedTrainer, build_trainer
-from modelscope.trainers.training_args import TrainingArgs
+from modelscope.trainers import build_trainer
-@dataclass
+@dataclass(init=False)
class TextGenerationArguments(TrainingArgs):
trainer: str = field(
@@ -67,30 +66,35 @@ class TextGenerationArguments(TrainingArgs):
'help': 'Whether to use MegatronHook',
})
- def __call__(self, config):
- config = super().__call__(config)
- if config.train.lr_scheduler.type == 'noam':
- config.train.lr_scheduler = {
- 'type': 'LambdaLR',
- 'lr_lambda': noam_lambda,
- 'options': {
- 'by_epoch': False
- }
- }
- if self.use_megatron:
- config.train.hooks.append({'type': 'MegatronHook'})
- return config
-
def noam_lambda(current_step: int):
current_step += 1
return min(current_step**(-0.5), current_step * 100**(-1.5))
-args = TextGenerationArguments.from_cli(task='text-generation')
-print(args)
+config, args = TextGenerationArguments().parse_cli().to_config()
+print(config, args)
-dataset = MsDataset.load(args.dataset_name)
+
+def cfg_modify_fn(cfg):
+ if args.use_model_config:
+ cfg.merge_from_dict(config)
+ else:
+ cfg = config
+ if cfg.train.lr_scheduler.type == 'noam':
+ cfg.train.lr_scheduler = {
+ 'type': 'LambdaLR',
+ 'lr_lambda': noam_lambda,
+ 'options': {
+ 'by_epoch': False
+ }
+ }
+ if args.use_megatron:
+ cfg.train.hooks.append({'type': 'MegatronHook'})
+ return cfg
+
+
+dataset = MsDataset.load(args.train_dataset_name)
train_dataset = dataset['train']
eval_dataset = dataset['validation' if 'validation' in dataset else 'test']
@@ -100,7 +104,7 @@ kwargs = dict(
eval_dataset=eval_dataset,
seed=args.seed,
work_dir=args.work_dir,
- cfg_modify_fn=args)
+ cfg_modify_fn=cfg_modify_fn)
trainer: EpochBasedTrainer = build_trainer(
name=args.trainer, default_args=kwargs)
diff --git a/examples/pytorch/text_generation/run_train_gpt3.sh b/examples/pytorch/text_generation/run_train_gpt3.sh
index a20a5bb2..fd37b42c 100644
--- a/examples/pytorch/text_generation/run_train_gpt3.sh
+++ b/examples/pytorch/text_generation/run_train_gpt3.sh
@@ -8,7 +8,7 @@ PYTHONPATH=. torchrun --nproc_per_node $WORLD_SIZE examples/pytorch/text_generat
--trainer 'nlp-gpt3-trainer' \
--work_dir './tmp' \
--model 'damo/nlp_gpt3_text-generation_1.3B' \
- --dataset_name 'chinese-poetry-collection' \
+ --train_dataset_name 'chinese-poetry-collection' \
--preprocessor 'text-gen-jieba-tokenizer' \
--src_txt 'text1' \
--tgt_txt 'text2' \
@@ -20,4 +20,5 @@ PYTHONPATH=. torchrun --nproc_per_node $WORLD_SIZE examples/pytorch/text_generat
--world_size $WORLD_SIZE \
--tensor_model_parallel_size $TENSOR_MODEL_PARALLEL_SIZE \
--use_megatron true \
- # --dataset_name 'DuReader_robust-QG' \ # input&output
+ --use_model_config true \
+ # --train_dataset_name 'DuReader_robust-QG' \ # input&output
diff --git a/examples/pytorch/text_generation/run_train_mt5.sh b/examples/pytorch/text_generation/run_train_mt5.sh
new file mode 100644
index 00000000..6d032d6e
--- /dev/null
+++ b/examples/pytorch/text_generation/run_train_mt5.sh
@@ -0,0 +1,13 @@
+PYTHONPATH=. torchrun examples/pytorch/text_generation/finetune_text_generation.py \
+ --trainer 'text-generation-trainer' \
+ --work_dir './tmp' \
+ --task 'text2text-generation' \
+ --model 'damo/nlp_mt5_zero-shot-augment_chinese-base' \
+ --train_dataset_name 'DuReader_robust-QG' \
+ --src_txt 'text1' \
+ --tgt_txt 'text2' \
+ --max_epochs 1 \
+ --use_model_config True \
+ --per_device_train_batch_size 8 \
+ --lr 1e-3 \
+ --lr_scheduler 'noam' \
diff --git a/examples/pytorch/text_generation/run_train_palm.sh b/examples/pytorch/text_generation/run_train_palm.sh
index ff88ce7d..68b9e89d 100644
--- a/examples/pytorch/text_generation/run_train_palm.sh
+++ b/examples/pytorch/text_generation/run_train_palm.sh
@@ -2,10 +2,11 @@ PYTHONPATH=. torchrun examples/pytorch/text_generation/finetune_text_generation.
--trainer 'text-generation-trainer' \
--work_dir './tmp' \
--model 'damo/nlp_palm2.0_pretrained_chinese-base' \
- --dataset_name 'DuReader_robust-QG' \
+ --train_dataset_name 'DuReader_robust-QG' \
--src_txt 'text1' \
--tgt_txt 'text2' \
- --max_epochs 15 \
+ --max_epochs 1 \
+ --use_model_config True \
--per_device_train_batch_size 8 \
--lr 1e-3 \
--lr_scheduler 'noam' \
diff --git a/examples/pytorch/token_classification/finetune_token_classification.py b/examples/pytorch/token_classification/finetune_token_classification.py
index cf51ed22..3f9de791 100644
--- a/examples/pytorch/token_classification/finetune_token_classification.py
+++ b/examples/pytorch/token_classification/finetune_token_classification.py
@@ -1,20 +1,22 @@
from dataclasses import dataclass, field
-from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import build_trainer
-from modelscope.trainers.training_args import (TrainingArgs, get_flatten_value,
- set_flatten_value)
+from modelscope import (EpochBasedTrainer, MsDataset, TrainingArgs,
+ build_dataset_from_file)
-@dataclass
+@dataclass(init=False)
class TokenClassificationArguments(TrainingArgs):
-
trainer: str = field(
- default=Trainers.default, metadata={
+ default=None, metadata={
'help': 'The trainer used',
})
+ work_dir: str = field(
+ default='./tmp',
+ metadata={
+ 'help': 'The working path for saving checkpoint',
+ })
+
preprocessor: str = field(
default=None,
metadata={
@@ -29,60 +31,99 @@ class TokenClassificationArguments(TrainingArgs):
'cfg_node': 'preprocessor.padding'
})
- train_dataset_params: str = field(
+ mode: str = field(
+ default='inference',
+ metadata={
+ 'help': 'The preprocessor padding',
+ 'cfg_node': 'preprocessor.mode'
+ })
+
+ first_sequence: str = field(
default=None,
metadata={
- 'cfg_node': 'dataset.train',
- 'cfg_getter': get_flatten_value,
- 'cfg_setter': set_flatten_value,
+ 'cfg_node': 'preprocessor.first_sequence',
'help': 'The parameters for train dataset',
})
- def __call__(self, config):
- config = super().__call__(config)
- if config.safe_get('dataset.train.label') == 'ner_tags':
- ner_tags_labels = train_dataset['ner_tags'] + eval_dataset[
- 'ner_tags']
- label_enumerate_values = self._get_label_list(ner_tags_labels)
- config.merge_from_dict(
- {'dataset.train.labels': label_enumerate_values})
- if config.train.lr_scheduler.type == 'LinearLR':
- config.train.lr_scheduler['total_iters'] = \
- int(len(train_dataset) / self.per_device_train_batch_size) * self.max_epochs
- return config
+ label: str = field(
+ default=None,
+ metadata={
+ 'cfg_node': 'preprocessor.label',
+ 'help': 'The parameters for train dataset',
+ })
- # TODO: Future performance optimization in MsDataset
- @staticmethod
- def _get_label_list(labels):
- unique_labels = set()
- for label in labels:
- unique_labels = unique_labels | set(label)
- label_list = list(unique_labels)
- label_list.sort()
- return label_list
+ sequence_length: int = field(
+ default=128,
+ metadata={
+ 'cfg_node': 'preprocessor.sequence_length',
+ 'help': 'The parameters for train dataset',
+ })
-args = TokenClassificationArguments.from_cli(task='token-classification')
+training_args = TokenClassificationArguments().parse_cli()
+config, args = training_args.to_config()
print(args)
-# load dataset
-train_dataset = MsDataset.load(
- args.dataset_name,
- subset_name=args.subset_name,
- split='train',
- namespace='damo')['train']
-eval_dataset = MsDataset.load(
- args.dataset_name,
- subset_name=args.subset_name,
- split='validation',
- namespace='damo')['validation']
+
+def get_label_list(labels):
+ unique_labels = set()
+ for label in labels:
+ unique_labels = unique_labels | set(label)
+ label_list = list(unique_labels)
+ label_list.sort()
+ return label_list
+
+
+def cfg_modify_fn(cfg):
+ if args.use_model_config:
+ cfg.merge_from_dict(config)
+ else:
+ cfg = config
+ labels = train_dataset[training_args.label] + validation_dataset[
+ training_args.label]
+ label_enumerate_values = get_label_list(labels)
+ cfg.merge_from_dict({
+ 'preprocessor.label2id':
+ {label: id
+ for id, label in enumerate(label_enumerate_values)}
+ })
+ cfg.merge_from_dict({'model.num_labels': len(label_enumerate_values)})
+ cfg.merge_from_dict({'preprocessor.use_fast': True})
+ cfg.merge_from_dict({
+ 'evaluation.metrics': {
+ 'type': 'token-cls-metric',
+ 'label2id':
+ {label: id
+ for id, label in enumerate(label_enumerate_values)}
+ }
+ })
+ if cfg.train.lr_scheduler.type == 'LinearLR':
+ cfg.train.lr_scheduler['total_iters'] = \
+ int(len(train_dataset) / cfg.train.dataloader.batch_size_per_gpu) * cfg.train.max_epochs
+ return cfg
+
+
+if args.dataset_json_file is None:
+ train_dataset = MsDataset.load(
+ args.train_dataset_name,
+ subset_name=args.train_subset_name,
+ split='train',
+ namespace=args.train_dataset_namespace)['train']
+ validation_dataset = MsDataset.load(
+ args.train_dataset_name,
+ subset_name=args.train_subset_name,
+ split='validation',
+ namespace=args.train_dataset_namespace)['validation']
+else:
+ train_dataset, validation_dataset = build_dataset_from_file(
+ args.dataset_json_file)
kwargs = dict(
model=args.model,
train_dataset=train_dataset,
- eval_dataset=eval_dataset,
+ eval_dataset=validation_dataset,
work_dir=args.work_dir,
- cfg_modify_fn=args)
+ cfg_modify_fn=cfg_modify_fn)
-trainer = build_trainer(name=args.trainer, default_args=kwargs)
+trainer = EpochBasedTrainer(**kwargs)
trainer.train()
diff --git a/examples/pytorch/token_classification/run_train_mgeo.sh b/examples/pytorch/token_classification/run_train_mgeo.sh
index f80af84f..1e384ec5 100644
--- a/examples/pytorch/token_classification/run_train_mgeo.sh
+++ b/examples/pytorch/token_classification/run_train_mgeo.sh
@@ -1,15 +1,22 @@
-PYTHONPATH=. torchrun examples/pytorch/token_classification/finetune_token_classification.py \
+PYTHONPATH=. python examples/pytorch/token_classification/finetune_token_classification.py \
+ --task 'token-classification' \
--trainer 'nlp-base-trainer' \
--work_dir './tmp' \
--model 'damo/mgeo_backbone_chinese_base' \
- --dataset_name 'GeoGLUE' \
- --subset_name 'GeoETA' \
- --train_dataset_params 'first_sequence=tokens,label=ner_tags,sequence_length=128' \
+ --train_dataset_name 'GeoGLUE' \
+ --train_subset_name 'GeoETA' \
+ --train_dataset_namespace 'damo' \
+ --first_sequence 'tokens' \
+ --eval_strategy by_step \
+ --eval_interval 10 \
+ --label 'ner_tags' \
+ --sequence_length 128 \
--preprocessor 'token-cls-tokenizer' \
--preprocessor_padding 'max_length' \
--max_epochs 1 \
+ --mode 'inference' \
+ --use_model_config True \
--per_device_train_batch_size 32 \
+ --train_data_worker 0 \
+ --eval_data_worker 0 \
--lr 3e-5 \
- --save_ckpt_strategy 'by_epoch' \
- --logging_interval 100 \
- --eval_strategy 'by_epoch' \
diff --git a/examples/pytorch/token_classification/run_train_structbert.sh b/examples/pytorch/token_classification/run_train_structbert.sh
index 28967f60..a44c4519 100644
--- a/examples/pytorch/token_classification/run_train_structbert.sh
+++ b/examples/pytorch/token_classification/run_train_structbert.sh
@@ -1,16 +1,22 @@
-PYTHONPATH=. torchrun examples/pytorch/token_classification/finetune_token_classification.py \
+PYTHONPATH=. python examples/pytorch/token_classification/finetune_token_classification.py \
+ --task 'token-classification' \
--trainer 'nlp-base-trainer' \
--work_dir './tmp' \
--model 'damo/nlp_structbert_backbone_base_std' \
- --dataset_name 'GeoGLUE' \
- --subset_name 'GeoETA' \
- --train_dataset_params 'first_sequence=tokens,label=ner_tags,sequence_length=128' \
+ --train_dataset_name 'GeoGLUE' \
+ --train_subset_name 'GeoETA' \
+ --train_dataset_namespace 'damo' \
+ --first_sequence 'tokens' \
+ --eval_strategy by_step \
+ --eval_interval 20 \
+ --label 'ner_tags' \
+ --sequence_length 128 \
--preprocessor 'token-cls-tokenizer' \
--preprocessor_padding 'max_length' \
--max_epochs 2 \
+ --mode 'inference' \
+ --use_model_config True \
--per_device_train_batch_size 32 \
+ --train_data_worker 0 \
+ --eval_data_worker 0 \
--lr 3e-5 \
- --save_ckpt_strategy 'by_epoch' \
- --logging_interval 1 \
- --eval_strategy 'by_step' \
- --eval_interval 20 \
diff --git a/examples/pytorch/transformers/configuration.json b/examples/pytorch/transformers/configuration.json
deleted file mode 100644
index df6a73c8..00000000
--- a/examples/pytorch/transformers/configuration.json
+++ /dev/null
@@ -1 +0,0 @@
-{"framework":"pytorch","train":{"work_dir":"/tmp","max_epochs":10,"dataloader":{"batch_size_per_gpu":16,"workers_per_gpu":0},"optimizer":{"type":"SGD","lr":0.001},"lr_scheduler":{"type":"StepLR","step_size":2},"hooks":[{"type":"CheckpointHook","interval":1}]},"evaluation":{"dataloader":{"batch_size_per_gpu":16,"workers_per_gpu":0,"shuffle":false}}}
diff --git a/examples/pytorch/transformers/finetune_transformers_model.py b/examples/pytorch/transformers/finetune_transformers_model.py
index bbfb807a..5110f751 100644
--- a/examples/pytorch/transformers/finetune_transformers_model.py
+++ b/examples/pytorch/transformers/finetune_transformers_model.py
@@ -5,11 +5,11 @@ from datasets import load_dataset
from transformers import (BertForSequenceClassification, BertTokenizerFast,
default_data_collator)
+from modelscope import TrainingArgs
from modelscope.trainers import EpochBasedTrainer, build_trainer
-from modelscope.trainers.default_config import DEFAULT_CONFIG, TrainingArgs
-@dataclass
+@dataclass(init=False)
class TransformersArguments(TrainingArgs):
num_labels: int = field(
@@ -17,13 +17,27 @@ class TransformersArguments(TrainingArgs):
'help': 'The number of labels',
})
+ sentence: str = field(
+ default=None, metadata={
+ 'help': 'The sentence key',
+ })
-args = TransformersArguments.from_cli(
- task='text-classification', eval_metrics='seq-cls-metric')
+ label: str = field(
+ default=None, metadata={
+ 'help': 'The label key',
+ })
-print(args)
-dataset = load_dataset(args.dataset_name, args.subset_name)
+training_args = TransformersArguments(
+ task='text-classification', eval_metrics='seq-cls-metric').parse_cli()
+config, args = training_args.to_config()
+
+print(config, args)
+
+train_dataset = load_dataset(
+ args.train_dataset_name, args.train_subset_name, split=args.train_split)
+val_dataset = load_dataset(
+ args.val_dataset_name, args.val_subset_name, split=args.val_split)
model = BertForSequenceClassification.from_pretrained(
args.model, num_labels=args.num_labels)
@@ -31,26 +45,30 @@ tokenizer = BertTokenizerFast.from_pretrained(args.model)
def tokenize_sentence(row):
- return tokenizer(row['sentence'], padding='max_length', max_length=128)
+ return tokenizer(
+ row[training_args.sentence], padding='max_length', max_length=128)
# Extra columns, Rename columns
-dataset = dataset.map(tokenize_sentence).remove_columns(['sentence',
- 'idx']).rename_column(
- 'label', 'labels')
+train_dataset = train_dataset.map(tokenize_sentence)
+val_dataset = val_dataset.map(tokenize_sentence)
+if training_args.label != 'labels':
+ train_dataset = train_dataset.rename_columns(
+ {training_args.label: 'labels'})
+ val_dataset = val_dataset.rename_columns({training_args.label: 'labels'})
cfg_file = os.path.join(args.work_dir or './', 'configuration.json')
-DEFAULT_CONFIG.dump(cfg_file)
+config.dump(cfg_file)
kwargs = dict(
model=model,
cfg_file=cfg_file,
# data_collator
data_collator=default_data_collator,
- train_dataset=dataset['train'],
- eval_dataset=dataset['validation'],
- seed=args.seed,
- cfg_modify_fn=args)
+ train_dataset=train_dataset,
+ eval_dataset=val_dataset,
+ remove_unused_data=True,
+ seed=args.seed)
os.environ['LOCAL_RANK'] = str(args.local_rank)
trainer: EpochBasedTrainer = build_trainer(name='trainer', default_args=kwargs)
diff --git a/examples/pytorch/transformers/run_train.sh b/examples/pytorch/transformers/run_train.sh
index c76c4636..94e5ef75 100644
--- a/examples/pytorch/transformers/run_train.sh
+++ b/examples/pytorch/transformers/run_train.sh
@@ -1,5 +1,14 @@
PYTHONPATH=. python examples/pytorch/transformers/finetune_transformers_model.py \
--model bert-base-uncased \
--num_labels 15 \
- --dataset_name clue \
- --subset_name tnews
+ --train_dataset_name clue \
+ --train_subset_name tnews \
+ --train_split train \
+ --val_dataset_name clue \
+ --val_subset_name tnews \
+ --train_split train \
+ --val_split validation \
+ --sentence sentence \
+ --label label \
+ --eval_strategy by_step \
+ --eval_interval 100
diff --git a/modelscope/__init__.py b/modelscope/__init__.py
index 81fdf505..f7553958 100644
--- a/modelscope/__init__.py
+++ b/modelscope/__init__.py
@@ -1,4 +1,79 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
-from .version import __release_datetime__, __version__
+from typing import TYPE_CHECKING
-__all__ = ['__version__', '__release_datetime__']
+from modelscope.utils.import_utils import LazyImportModule
+
+if TYPE_CHECKING:
+ from .version import __release_datetime__, __version__
+ from .trainers import EpochBasedTrainer, TrainingArgs, build_dataset_from_file
+ from .trainers import Hook, Priority
+ from .exporters import Exporter
+ from .exporters import TfModelExporter
+ from .exporters import TorchModelExporter
+ from .hub.api import HubApi
+ from .hub.snapshot_download import snapshot_download
+ from .hub.push_to_hub import push_to_hub, push_to_hub_async
+ from .hub.check_model import check_model_is_id, check_local_model_is_latest
+ from .metrics import AudioNoiseMetric, Metric, task_default_metrics, ImageColorEnhanceMetric, ImageDenoiseMetric, \
+ ImageInstanceSegmentationCOCOMetric, ImagePortraitEnhancementMetric, SequenceClassificationMetric, \
+ TextGenerationMetric, TokenClassificationMetric, VideoSummarizationMetric, MovieSceneSegmentationMetric, \
+ AccuracyMetric, BleuMetric, ImageInpaintingMetric, ReferringVideoObjectSegmentationMetric, \
+ VideoFrameInterpolationMetric, VideoStabilizationMetric, VideoSuperResolutionMetric, PplMetric, \
+ ImageQualityAssessmentDegradationMetric, ImageQualityAssessmentMosMetric, TextRankingMetric, \
+ LossMetric, ImageColorizationMetric, OCRRecognitionMetric
+ from .models import Model, TorchModel
+ from .preprocessors import Preprocessor
+ from .pipelines import Pipeline, pipeline
+ from .utils.hub import read_config, create_model_if_not_exist
+ from .utils.logger import get_logger
+ from .msdatasets import MsDataset
+
+else:
+ _import_structure = {
+ 'version': ['__release_datetime__', '__version__'],
+ 'trainers': [
+ 'EpochBasedTrainer', 'TrainingArgs', 'Hook', 'Priority',
+ 'build_dataset_from_file'
+ ],
+ 'exporters': [
+ 'Exporter',
+ 'TfModelExporter',
+ 'TorchModelExporter',
+ ],
+ 'hub.api': ['HubApi'],
+ 'hub.snapshot_download': ['snapshot_download'],
+ 'hub.push_to_hub': ['push_to_hub', 'push_to_hub_async'],
+ 'hub.check_model':
+ ['check_model_is_id', 'check_local_model_is_latest'],
+ 'metrics': [
+ 'AudioNoiseMetric', 'Metric', 'task_default_metrics',
+ 'ImageColorEnhanceMetric', 'ImageDenoiseMetric',
+ 'ImageInstanceSegmentationCOCOMetric',
+ 'ImagePortraitEnhancementMetric', 'SequenceClassificationMetric',
+ 'TextGenerationMetric', 'TokenClassificationMetric',
+ 'VideoSummarizationMetric', 'MovieSceneSegmentationMetric',
+ 'AccuracyMetric', 'BleuMetric', 'ImageInpaintingMetric',
+ 'ReferringVideoObjectSegmentationMetric',
+ 'VideoFrameInterpolationMetric', 'VideoStabilizationMetric',
+ 'VideoSuperResolutionMetric', 'PplMetric',
+ 'ImageQualityAssessmentDegradationMetric',
+ 'ImageQualityAssessmentMosMetric', 'TextRankingMetric',
+ 'LossMetric', 'ImageColorizationMetric', 'OCRRecognitionMetric'
+ ],
+ 'models': ['Model', 'TorchModel'],
+ 'preprocessors': ['Preprocessor'],
+ 'pipelines': ['Pipeline', 'pipeline'],
+ 'utils.hub': ['read_config', 'create_model_if_not_exist'],
+ 'utils.logger': ['get_logger'],
+ 'msdatasets': ['MsDataset']
+ }
+
+ import sys
+
+ sys.modules[__name__] = LazyImportModule(
+ __name__,
+ globals()['__file__'],
+ _import_structure,
+ module_spec=__spec__,
+ extra_objects={},
+ )
diff --git a/modelscope/cli/template/template.tpl b/modelscope/cli/template/template.tpl
index 0c09a925..78fe339c 100644
--- a/modelscope/cli/template/template.tpl
+++ b/modelscope/cli/template/template.tpl
@@ -122,10 +122,11 @@ class ${pipeline_name}(Pipeline):
# Tips: usr_config_path is the temporary save configuration location, after upload modelscope hub, it is the model_id
usr_config_path = '${configuration_path}'
config = Config({
- 'framework': 'pytorch',
- 'task': '${task_name}',
- 'model': {'type': 'my-custom-model'},
- "pipeline": {"type": "my-custom-pipeline"}
+ "framework": 'pytorch',
+ "task": '${task_name}',
+ "model": {'type': 'my-custom-model'},
+ "pipeline": {"type": "my-custom-pipeline"},
+ "allow_remote": True
})
config.dump('${configuration_path}' + 'configuration.json')
diff --git a/modelscope/models/cv/human_wholebody_keypoint/__init__.py b/modelscope/exporters/audio/__init__.py
similarity index 75%
rename from modelscope/models/cv/human_wholebody_keypoint/__init__.py
rename to modelscope/exporters/audio/__init__.py
index 30e23457..883151cd 100644
--- a/modelscope/models/cv/human_wholebody_keypoint/__init__.py
+++ b/modelscope/exporters/audio/__init__.py
@@ -1,14 +1,14 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
+
from typing import TYPE_CHECKING
from modelscope.utils.import_utils import LazyImportModule
if TYPE_CHECKING:
- from .human_wholebody_keypoint import HumanWholeBodyKeypoint
-
+ from .ans_dfsmn_exporter import ANSDFSMNExporter
else:
_import_structure = {
- 'human_wholebody_keypoint': ['HumanWholeBodyKeypoint']
+ 'ans_dfsmn_exporter': ['ANSDFSMNExporter'],
}
import sys
diff --git a/modelscope/exporters/audio/ans_dfsmn_exporter.py b/modelscope/exporters/audio/ans_dfsmn_exporter.py
new file mode 100644
index 00000000..976f983f
--- /dev/null
+++ b/modelscope/exporters/audio/ans_dfsmn_exporter.py
@@ -0,0 +1,62 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+
+import torch
+
+from modelscope.exporters.builder import EXPORTERS
+from modelscope.exporters.torch_model_exporter import TorchModelExporter
+from modelscope.metainfo import Models
+from modelscope.utils.constant import ModelFile, Tasks
+
+INPUT_NAME = 'input'
+OUTPUT_NAME = 'output'
+
+
+@EXPORTERS.register_module(
+ Tasks.acoustic_noise_suppression, module_name=Models.speech_dfsmn_ans)
+class ANSDFSMNExporter(TorchModelExporter):
+
+ def export_onnx(self, output_dir: str, opset=9, **kwargs):
+ """Export the model as onnx format files.
+
+ Args:
+ output_dir: The output dir.
+ opset: The version of the ONNX operator set to use.
+ kwargs:
+ device: The device used to forward.
+ Returns:
+ A dict containing the model key - model file path pairs.
+ """
+ model = self.model if 'model' not in kwargs else kwargs.pop('model')
+ device_name = 'cpu' if 'device' not in kwargs else kwargs.pop('device')
+ model_bin_file = os.path.join(model.model_dir,
+ ModelFile.TORCH_MODEL_BIN_FILE)
+ if os.path.exists(model_bin_file):
+ checkpoint = torch.load(model_bin_file, map_location='cpu')
+ model.load_state_dict(checkpoint)
+ onnx_file = os.path.join(output_dir, ModelFile.ONNX_MODEL_FILE)
+
+ with torch.no_grad():
+ model.eval()
+ device = torch.device(device_name)
+ model.to(device)
+ model_script = torch.jit.script(model)
+ fbank_input = torch.zeros((1, 3, 120), dtype=torch.float32)
+ torch.onnx.export(
+ model_script,
+ fbank_input,
+ onnx_file,
+ opset_version=opset,
+ input_names=[INPUT_NAME],
+ output_names=[OUTPUT_NAME],
+ dynamic_axes={
+ INPUT_NAME: {
+ 0: 'batch_size',
+ 1: 'number_of_frame'
+ },
+ OUTPUT_NAME: {
+ 0: 'batch_size',
+ 1: 'number_of_frame'
+ }
+ })
+ return {'model': onnx_file}
diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py
index ad8d0c5d..e3436aea 100644
--- a/modelscope/hub/api.py
+++ b/modelscope/hub/api.py
@@ -6,6 +6,7 @@ import functools
import os
import pickle
import platform
+import re
import shutil
import tempfile
import uuid
@@ -15,10 +16,10 @@ from http.cookiejar import CookieJar
from os.path import expanduser
from typing import Dict, List, Optional, Tuple, Union
+import requests
from requests import Session
from requests.adapters import HTTPAdapter, Retry
-from modelscope import __version__
from modelscope.hub.constants import (API_HTTP_CLIENT_TIMEOUT,
API_RESPONSE_FIELD_DATA,
API_RESPONSE_FIELD_EMAIL,
@@ -45,7 +46,7 @@ from modelscope.utils.constant import (DEFAULT_DATASET_REVISION,
MASTER_MODEL_BRANCH, DatasetFormations,
DatasetMetaFormats,
DatasetVisibilityMap, DownloadChannel,
- ModelFile)
+ ModelFile, VirgoDatasetConfig)
from modelscope.utils.logger import get_logger
from .utils.utils import (get_endpoint, get_release_datetime,
model_id_to_group_owner_name)
@@ -160,6 +161,7 @@ class HubApi:
'Visibility': visibility, # server check
'License': license,
'OriginalModelId': original_model_id,
+ 'TrainId': os.environ.get('MODELSCOPE_TRAIN_ID', ''),
}
r = self.session.post(
path, json=body, cookies=cookies, headers=self.headers)
@@ -236,8 +238,10 @@ class HubApi:
license: Optional[str] = Licenses.APACHE_V2,
chinese_name: Optional[str] = None,
commit_message: Optional[str] = 'upload model',
+ tag: Optional[str] = None,
revision: Optional[str] = DEFAULT_REPOSITORY_REVISION,
- original_model_id: Optional[str] = None):
+ original_model_id: Optional[str] = None,
+ ignore_file_pattern: Optional[Union[List[str], str]] = None):
"""Upload model from a given directory to given repository. A valid model directory
must contain a configuration.json file.
@@ -268,10 +272,13 @@ class HubApi:
chinese name of the new created model.
commit_message(`str`, *optional*, defaults to `None`):
commit message of the push request.
+ tag(`str`, *optional*, defaults to `None`):
+ The tag on this commit
revision (`str`, *optional*, default to DEFAULT_MODEL_REVISION):
which branch to push. If the branch is not exists, It will create a new
branch and push to it.
original_model_id (str, optional): The base model id which this model is trained from
+ ignore_file_pattern (`Union[List[str], str]`, optional): The file pattern to ignore uploading
Raises:
InvalidParameter: Parameter invalid.
@@ -292,6 +299,10 @@ class HubApi:
if cookies is None:
raise NotLoginException('Must login before upload!')
files_to_save = os.listdir(model_dir)
+ if ignore_file_pattern is None:
+ ignore_file_pattern = []
+ if isinstance(ignore_file_pattern, str):
+ ignore_file_pattern = [ignore_file_pattern]
try:
self.get_model(model_id=model_id)
except Exception:
@@ -325,6 +336,8 @@ class HubApi:
shutil.rmtree(src, ignore_errors=True)
for f in files_to_save:
if f[0] != '.':
+ if any([re.search(pattern, f) is not None for pattern in ignore_file_pattern]):
+ continue
src = os.path.join(model_dir, f)
if os.path.isdir(src):
shutil.copytree(src, os.path.join(tmp_dir, f))
@@ -338,6 +351,8 @@ class HubApi:
commit_message=commit_message,
local_branch=revision,
remote_branch=revision)
+ if tag is not None:
+ repo.tag_and_push(tag, tag)
except Exception:
raise
finally:
@@ -581,6 +596,17 @@ class HubApi:
file_list = file_list['Files']
return file_list
+ @staticmethod
+ def dump_datatype_file(dataset_type: int, meta_cache_dir: str):
+ """
+ Dump the data_type as a local file, in order to get the dataset formation without calling the datahub.
+ More details, please refer to the class `modelscope.utils.constant.DatasetFormations`.
+ """
+ dataset_type_file_path = os.path.join(meta_cache_dir,
+ f'{str(dataset_type)}{DatasetFormations.formation_mark_ext.value}')
+ with open(dataset_type_file_path, 'w') as fp:
+ fp.write('*** Automatically-generated file, do not modify ***')
+
def get_dataset_meta_files_local_paths(self, dataset_name: str,
namespace: str,
revision: str,
@@ -591,10 +617,7 @@ class HubApi:
cookies = ModelScopeConfig.get_cookies()
# Dump the data_type as a local file
- dataset_type_file_path = os.path.join(meta_cache_dir,
- f'{str(dataset_type)}{DatasetFormations.formation_mark_ext.value}')
- with open(dataset_type_file_path, 'w') as fp:
- fp.write('*** Automatically-generated file, do not modify ***')
+ HubApi.dump_datatype_file(dataset_type=dataset_type, meta_cache_dir=meta_cache_dir)
for file_info in file_list:
file_path = file_info['Path']
@@ -661,7 +684,6 @@ class HubApi:
cookies = self._check_cookie(use_cookies=True)
else:
cookies = ModelScopeConfig.get_cookies()
- r = self.session.get(url=datahub_url, cookies=cookies, headers=self.headers)
r = self.session.get(
url=datahub_url, cookies=cookies, headers=self.headers)
@@ -669,6 +691,31 @@ class HubApi:
raise_on_error(resp)
return resp['Data']
+ def get_virgo_meta(self, dataset_id: str, version: int = 1) -> dict:
+ """
+ Get virgo dataset meta info.
+ """
+ virgo_endpoint = os.environ.get(VirgoDatasetConfig.env_virgo_endpoint, '')
+ if not virgo_endpoint:
+ raise RuntimeError(f'Virgo endpoint is not set in env: {VirgoDatasetConfig.env_virgo_endpoint}')
+
+ virgo_dataset_url = f'{virgo_endpoint}/data/set/download'
+ cookies = requests.utils.dict_from_cookiejar(ModelScopeConfig.get_cookies())
+
+ dataset_info = dict(
+ dataSetId=dataset_id,
+ dataSetVersion=version
+ )
+ data = dict(
+ data=dataset_info,
+ )
+ r = self.session.post(url=virgo_dataset_url, json=data, cookies=cookies, headers=self.headers, timeout=900)
+ resp = r.json()
+ if resp['code'] != 0:
+ raise RuntimeError(f'Failed to get virgo dataset: {resp}')
+
+ return resp['data']
+
def get_dataset_access_config_for_unzipped(self,
dataset_name: str,
namespace: str,
@@ -895,6 +942,7 @@ class ModelScopeConfig:
if MODELSCOPE_CLOUD_USERNAME in os.environ:
user_name = os.environ[MODELSCOPE_CLOUD_USERNAME]
+ from modelscope import __version__
ua = 'modelscope/%s; python/%s; session_id/%s; platform/%s; processor/%s; env/%s; user/%s' % (
__version__,
platform.python_version(),
diff --git a/modelscope/hub/errors.py b/modelscope/hub/errors.py
index be94d7fd..4bf2f935 100644
--- a/modelscope/hub/errors.py
+++ b/modelscope/hub/errors.py
@@ -2,6 +2,7 @@
from http import HTTPStatus
+import requests
from requests.exceptions import HTTPError
from modelscope.utils.logger import get_logger
@@ -57,13 +58,22 @@ def is_ok(rsp):
return rsp['Code'] == HTTPStatus.OK and rsp['Success']
+def _decode_response_error(response: requests.Response):
+ if 'application/json' in response.headers.get('content-type', ''):
+ message = response.json()
+ else:
+ message = response.content.decode('utf-8')
+ return message
+
+
def handle_http_post_error(response, url, request_body):
try:
response.raise_for_status()
except HTTPError as error:
logger.error('Request %s with body: %s exception' %
(url, request_body))
- logger.error('Response details: %s' % response.content)
+ message = _decode_response_error(response)
+ logger.error('Response details: %s' % message)
raise error
@@ -75,7 +85,8 @@ def handle_http_response(response, logger, cookies, model_id):
logger.error(
f'Authentication token does not exist, failed to access model {model_id} which may not exist or may be \
private. Please login first.')
- logger.error('Response details: %s' % response.content)
+ message = _decode_response_error(response)
+ logger.error('Response details: %s' % message)
raise error
diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py
index 380d2432..6d3ad63d 100644
--- a/modelscope/hub/file_download.py
+++ b/modelscope/hub/file_download.py
@@ -12,7 +12,6 @@ import requests
from requests.adapters import Retry
from tqdm import tqdm
-from modelscope import __version__
from modelscope.hub.api import HubApi, ModelScopeConfig
from modelscope.hub.constants import (API_FILE_DOWNLOAD_CHUNK_SIZE,
API_FILE_DOWNLOAD_RETRY_TIMES,
diff --git a/modelscope/hub/git.py b/modelscope/hub/git.py
index 80887738..b0fae148 100644
--- a/modelscope/hub/git.py
+++ b/modelscope/hub/git.py
@@ -55,16 +55,10 @@ class GitCommandWrapper(metaclass=Singleton):
response.check_returncode()
return response
except subprocess.CalledProcessError as error:
- if response.returncode == 1:
- logger.info('Nothing to commit.')
- return response
- else:
- logger.error(
- 'There are error run git command, you may need to login first.'
- )
- raise GitError('stdout: %s, stderr: %s' %
- (response.stdout.decode('utf8'),
- error.stderr.decode('utf8')))
+ logger.error('There are error run git command.')
+ raise GitError(
+ 'stdout: %s, stderr: %s' %
+ (response.stdout.decode('utf8'), error.stderr.decode('utf8')))
def config_auth_token(self, repo_dir, auth_token):
url = self.get_repo_remote_url(repo_dir)
@@ -199,8 +193,11 @@ class GitCommandWrapper(metaclass=Singleton):
else:
return ['/'.join(line.split('/')[1:]) for line in info[1:]]
- def pull(self, repo_dir: str):
- cmds = ['-C', repo_dir, 'pull']
+ def pull(self,
+ repo_dir: str,
+ remote: str = 'origin',
+ branch: str = 'master'):
+ cmds = ['-C', repo_dir, 'pull', remote, branch]
return self._run_git_command(*cmds)
def push(self,
diff --git a/modelscope/hub/push_to_hub.py b/modelscope/hub/push_to_hub.py
index ee7b240e..d117cc7f 100644
--- a/modelscope/hub/push_to_hub.py
+++ b/modelscope/hub/push_to_hub.py
@@ -4,8 +4,8 @@ import concurrent.futures
import os
from modelscope.hub.api import HubApi
-from modelscope.hub.constants import Licenses, ModelVisibility
-from modelscope.hub.errors import NotExistError
+from modelscope.hub.constants import ModelVisibility
+from modelscope.utils.constant import DEFAULT_REPOSITORY_REVISION
from modelscope.utils.logger import get_logger
logger = get_logger()
@@ -18,7 +18,10 @@ def _api_push_to_hub(repo_name,
token,
private=True,
commit_message='',
- source_repo=''):
+ tag=None,
+ source_repo='',
+ ignore_file_pattern=None,
+ revision=DEFAULT_REPOSITORY_REVISION):
try:
api = HubApi()
api.login(token)
@@ -29,7 +32,10 @@ def _api_push_to_hub(repo_name,
if not private else ModelVisibility.PRIVATE,
chinese_name=repo_name,
commit_message=commit_message,
- original_model_id=source_repo)
+ tag=tag,
+ original_model_id=source_repo,
+ ignore_file_pattern=ignore_file_pattern,
+ revision=revision)
commit_message = commit_message or 'No commit message'
logger.info(
f'Successfully upload the model to {repo_name} with message: {commit_message}'
@@ -48,7 +54,10 @@ def push_to_hub(repo_name,
private=True,
retry=3,
commit_message='',
- source_repo=''):
+ tag=None,
+ source_repo='',
+ ignore_file_pattern=None,
+ revision=DEFAULT_REPOSITORY_REVISION):
"""
Args:
repo_name: The repo name for the modelhub repo
@@ -57,13 +66,18 @@ def push_to_hub(repo_name,
private: If is a private repo, default True
retry: Retry times if something error in uploading, default 3
commit_message: The commit message
+ tag: The tag of this commit
source_repo: The source repo (model id) which this model comes from
-
+ ignore_file_pattern: The file pattern to be ignored in uploading.
+ revision: The branch to commit to
Returns:
The boolean value to represent whether the model is uploaded.
"""
if token is None:
token = os.environ.get('MODELSCOPE_API_TOKEN')
+ if ignore_file_pattern is None:
+ ignore_file_pattern = os.environ.get('UPLOAD_IGNORE_FILE_PATTERN')
+ assert repo_name is not None
assert token is not None, 'Either pass in a token or to set `MODELSCOPE_API_TOKEN` in the environment variables.'
assert os.path.isdir(output_dir)
assert 'configuration.json' in os.listdir(output_dir) or 'configuration.yaml' in os.listdir(output_dir) \
@@ -73,7 +87,8 @@ def push_to_hub(repo_name,
f'Uploading {output_dir} to {repo_name} with message {commit_message}')
for i in range(retry):
if _api_push_to_hub(repo_name, output_dir, token, private,
- commit_message, source_repo):
+ commit_message, tag, source_repo,
+ ignore_file_pattern, revision):
return True
return False
@@ -83,7 +98,10 @@ def push_to_hub_async(repo_name,
token=None,
private=True,
commit_message='',
- source_repo=''):
+ tag=None,
+ source_repo='',
+ ignore_file_pattern=None,
+ revision=DEFAULT_REPOSITORY_REVISION):
"""
Args:
repo_name: The repo name for the modelhub repo
@@ -91,13 +109,18 @@ def push_to_hub_async(repo_name,
token: The user api token, function will check the `MODELSCOPE_API_TOKEN` variable if this argument is None
private: If is a private repo, default True
commit_message: The commit message
+ tag: The tag of this commit
source_repo: The source repo (model id) which this model comes from
-
+ ignore_file_pattern: The file pattern to be ignored in uploading
+ revision: The branch to commit to
Returns:
A handler to check the result and the status
"""
if token is None:
token = os.environ.get('MODELSCOPE_API_TOKEN')
+ if ignore_file_pattern is None:
+ ignore_file_pattern = os.environ.get('UPLOAD_IGNORE_FILE_PATTERN')
+ assert repo_name is not None
assert token is not None, 'Either pass in a token or to set `MODELSCOPE_API_TOKEN` in the environment variables.'
assert os.path.isdir(output_dir)
assert 'configuration.json' in os.listdir(output_dir) or 'configuration.yaml' in os.listdir(output_dir) \
@@ -106,4 +129,5 @@ def push_to_hub_async(repo_name,
logger.info(
f'Uploading {output_dir} to {repo_name} with message {commit_message}')
return _executor.submit(_api_push_to_hub, repo_name, output_dir, token,
- private, commit_message, source_repo)
+ private, commit_message, tag, source_repo,
+ ignore_file_pattern, revision)
diff --git a/modelscope/hub/repository.py b/modelscope/hub/repository.py
index 1d107a3c..3fc6da2b 100644
--- a/modelscope/hub/repository.py
+++ b/modelscope/hub/repository.py
@@ -88,6 +88,26 @@ class Repository:
remote = None
return remote
+ def pull(self, remote: str = 'origin', branch: str = 'master'):
+ """Pull remote branch
+
+ Args:
+ remote (str, optional): The remote name. Defaults to 'origin'.
+ branch (str, optional): The remote branch. Defaults to 'master'.
+ """
+ self.git_wrapper.pull(self.model_dir, remote=remote, branch=branch)
+
+ def add_lfs_type(self, file_name_suffix: str):
+ """Add file suffix to lfs list.
+
+ Args:
+ file_name_suffix (str): The file name suffix.
+ examples '*.safetensors'
+ """
+ os.system(
+ "printf '%s filter=lfs diff=lfs merge=lfs -text\n'>>%s" %
+ (file_name_suffix, os.path.join(self.model_dir, '.gitattributes')))
+
def push(self,
commit_message: str,
local_branch: Optional[str] = DEFAULT_REPOSITORY_REVISION,
@@ -120,7 +140,6 @@ class Repository:
self.model_repo_name)
url = self.git_wrapper.get_repo_remote_url(self.model_dir)
- self.git_wrapper.pull(self.model_dir)
self.git_wrapper.add(self.model_dir, all_files=True)
self.git_wrapper.commit(self.model_dir, commit_message)
diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py
index 60ad6d85..c4057314 100644
--- a/modelscope/metainfo.py
+++ b/modelscope/metainfo.py
@@ -116,15 +116,9 @@ class Models(object):
bad_image_detecting = 'bad-image-detecting'
controllable_image_generation = 'controllable-image-generation'
longshortnet = 'longshortnet'
+ fastinst = 'fastinst'
pedestrian_attribute_recognition = 'pedestrian-attribute-recognition'
- # EasyCV models
- yolox = 'YOLOX'
- segformer = 'Segformer'
- hand_2d_keypoints = 'HRNet-Hand2D-Keypoints'
- image_object_detection_auto = 'image-object-detection-auto'
- dino = 'DINO'
-
# nlp models
bert = 'bert'
palm = 'palm-v2'
@@ -177,6 +171,7 @@ class Models(object):
speech_frcrn_ans_cirm_16k = 'speech_frcrn_ans_cirm_16k'
speech_dfsmn_ans = 'speech_dfsmn_ans'
speech_dfsmn_kws_char_farfield = 'speech_dfsmn_kws_char_farfield'
+ speech_dfsmn_kws_char_farfield_iot = 'speech_dfsmn_kws_char_farfield_iot'
speech_kws_fsmn_char_ctc_nearfield = 'speech_kws_fsmn_char_ctc_nearfield'
speech_mossformer_separation_temporal_8k = 'speech_mossformer_separation_temporal_8k'
kws_kwsbp = 'kws-kwsbp'
@@ -187,6 +182,9 @@ class Models(object):
generic_sv = 'generic-sv'
ecapa_tdnn_sv = 'ecapa-tdnn-sv'
campplus_sv = 'cam++-sv'
+ eres2net_sv = 'eres2net-sv'
+ scl_sd = 'scl-sd'
+ rdino_tdnn_sv = 'rdino_ecapa-tdnn-sv'
generic_lm = 'generic-lm'
# multi-modal models
@@ -205,6 +203,8 @@ class Models(object):
hitea = 'hitea'
soonet = 'soonet'
efficient_diffusion_tuning = 'efficient-diffusion-tuning'
+ mplug_owl = 'mplug-owl'
+ clip_interrogator = 'clip-interrogator'
# science models
unifold = 'unifold'
@@ -255,6 +255,7 @@ class Pipelines(object):
should use task name for this pipeline.
For pipeline which suuport only one model, we should use ${Model}-${Task} as its name.
"""
+ pipeline_template = 'pipeline-template'
# vision tasks
portrait_matting = 'unet-image-matting'
universal_matting = 'unet-universal-matting'
@@ -277,8 +278,6 @@ class Pipelines(object):
tbs_detection = 'tbs-detection'
object_detection = 'vit-object-detection'
abnormal_object_detection = 'abnormal-object-detection'
- easycv_detection = 'easycv-detection'
- easycv_segmentation = 'easycv-segmentation'
face_2d_keypoints = 'mobilenet_face-2d-keypoints_alignment'
salient_detection = 'u2net-salient-detection'
salient_boudary_detection = 'res2net-salient-detection'
@@ -347,7 +346,6 @@ class Pipelines(object):
video_single_object_tracking_procontext = 'procontext-vitb-video-single-object-tracking'
video_multi_object_tracking = 'video-multi-object-tracking'
image_panoptic_segmentation = 'image-panoptic-segmentation'
- image_panoptic_segmentation_easycv = 'image-panoptic-segmentation-easycv'
video_summarization = 'googlenet_pgl_video_summarization'
language_guided_video_summarization = 'clip-it-video-summarization'
image_semantic_segmentation = 'image-semantic-segmentation'
@@ -402,7 +400,7 @@ class Pipelines(object):
nerf_recon_acc = 'nerf-recon-acc'
bad_image_detecting = 'bad-image-detecting'
controllable_image_generation = 'controllable-image-generation'
-
+ fast_instance_segmentation = 'fast-instance-segmentation'
image_quality_assessment_mos = 'image-quality-assessment-mos'
image_quality_assessment_man = 'image-quality-assessment-man'
image_quality_assessment_degradation = 'image-quality-assessment-degradation'
@@ -485,6 +483,9 @@ class Pipelines(object):
speaker_diarization_inference = 'speaker-diarization-inference'
vad_inference = 'vad-inference'
speaker_verification = 'speaker-verification'
+ speaker_verification_rdino = 'speaker-verification-rdino'
+ speaker_verification_eres2net = 'speaker-verification-eres2net'
+ speaker_change_locating = 'speaker-change-locating'
lm_inference = 'language-score-prediction'
speech_timestamp_inference = 'speech-timestamp-inference'
@@ -514,6 +515,7 @@ class Pipelines(object):
gridvlp_multi_modal_embedding = 'gridvlp-multi-modal-embedding'
soonet_video_temporal_grounding = 'soonet-video-temporal-grounding'
efficient_diffusion_tuning = 'efficient-diffusion-tuning'
+ multimodal_dialogue = 'multimodal-dialogue'
# science tasks
protein_structure = 'unifold-protein-structure'
@@ -881,6 +883,7 @@ class NLPTrainers(object):
document_grounded_dialog_rerank_trainer = 'document-grounded-dialog-rerank-trainer'
document_grounded_dialog_retrieval_trainer = 'document-grounded-dialog-retrieval-trainer'
siamese_uie_trainer = 'siamese-uie-trainer'
+ translation_evaluation_trainer = 'translation-evaluation-trainer'
class MultiModalTrainers(object):
@@ -911,7 +914,6 @@ class Trainers(CVTrainers, NLPTrainers, MultiModalTrainers, AudioTrainers):
"""
default = 'trainer'
- easycv = 'easycv'
tinynas_damoyolo = 'tinynas-damoyolo'
@staticmethod
@@ -933,8 +935,6 @@ class Trainers(CVTrainers, NLPTrainers, MultiModalTrainers, AudioTrainers):
return Fields.multi_modal
elif attribute_or_value == Trainers.default:
return Trainers.default
- elif attribute_or_value == Trainers.easycv:
- return Trainers.easycv
else:
return 'unknown'
@@ -1034,6 +1034,8 @@ class Preprocessors(object):
vldoc_preprocessor = 'vldoc-preprocessor'
hitea_tasks_preprocessor = 'hitea-tasks-preprocessor'
diffusion_image_generation_preprocessor = 'diffusion-image-generation-preprocessor'
+ mplug_owl_preprocessor = 'mplug-owl-preprocessor'
+ image_captioning_clip_interrogator_preprocessor = 'image-captioning-clip-interrogator-preprocessor'
# science preprocessor
unifold_preprocessor = 'unifold-preprocessor'
@@ -1098,6 +1100,8 @@ class Metrics(object):
# metric for image-colorization task
image_colorization_metric = 'image-colorization-metric'
ocr_recognition_metric = 'ocr-recognition-metric'
+ # metric for translation evaluation
+ translation_evaluation_metric = 'translation-evaluation-metric'
class Optimizers(object):
@@ -1165,14 +1169,6 @@ class LR_Schedulers(object):
class CustomDatasets(object):
""" Names for different datasets.
"""
- ClsDataset = 'ClsDataset'
- Face2dKeypointsDataset = 'FaceKeypointDataset'
- HandCocoWholeBodyDataset = 'HandCocoWholeBodyDataset'
- HumanWholeBodyKeypointDataset = 'WholeBodyCocoTopDownDataset'
- SegDataset = 'SegDataset'
- DetDataset = 'DetDataset'
- DetImagesMixDataset = 'DetImagesMixDataset'
- PanopticDataset = 'PanopticDataset'
PairedDataset = 'PairedDataset'
SiddDataset = 'SiddDataset'
GoproDataset = 'GoproDataset'
diff --git a/modelscope/metrics/__init__.py b/modelscope/metrics/__init__.py
index 17767001..6f5dfbde 100644
--- a/modelscope/metrics/__init__.py
+++ b/modelscope/metrics/__init__.py
@@ -31,6 +31,7 @@ if TYPE_CHECKING:
from .loss_metric import LossMetric
from .image_colorization_metric import ImageColorizationMetric
from .ocr_recognition_metric import OCRRecognitionMetric
+ from .translation_evaluation_metric import TranslationEvaluationMetric
else:
_import_structure = {
'audio_noise_metric': ['AudioNoiseMetric'],
@@ -62,7 +63,8 @@ else:
'text_ranking_metric': ['TextRankingMetric'],
'loss_metric': ['LossMetric'],
'image_colorization_metric': ['ImageColorizationMetric'],
- 'ocr_recognition_metric': ['OCRRecognitionMetric']
+ 'ocr_recognition_metric': ['OCRRecognitionMetric'],
+ 'translation_evaluation_metric': ['TranslationEvaluationMetric']
}
import sys
diff --git a/modelscope/metrics/builder.py b/modelscope/metrics/builder.py
index 2bc756e6..43aaea14 100644
--- a/modelscope/metrics/builder.py
+++ b/modelscope/metrics/builder.py
@@ -42,6 +42,7 @@ class MetricKeys(object):
NDCG = 'ndcg'
AR = 'AR'
Colorfulness = 'colorfulness'
+ Kendall_Tau_Correlation = 'kendall_tau_correlation'
task_default_metrics = {
@@ -76,6 +77,7 @@ task_default_metrics = {
Tasks.bad_image_detecting: [Metrics.accuracy],
Tasks.ocr_recognition: [Metrics.ocr_recognition_metric],
Tasks.efficient_diffusion_tuning: [Metrics.loss_metric],
+ Tasks.translation_evaluation: [Metrics.translation_evaluation_metric]
}
diff --git a/modelscope/metrics/translation_evaluation_metric.py b/modelscope/metrics/translation_evaluation_metric.py
new file mode 100644
index 00000000..81705d3b
--- /dev/null
+++ b/modelscope/metrics/translation_evaluation_metric.py
@@ -0,0 +1,174 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import importlib
+from typing import Dict, List, Union
+
+from pandas import DataFrame
+
+from modelscope.metainfo import Metrics
+from modelscope.metrics.base import Metric
+from modelscope.metrics.builder import METRICS, MetricKeys
+from modelscope.models.nlp.unite.configuration import InputFormat
+from modelscope.utils.logger import get_logger
+from modelscope.utils.registry import default_group
+
+logger = get_logger()
+
+
+@METRICS.register_module(
+ group_key=default_group, module_name=Metrics.translation_evaluation_metric)
+class TranslationEvaluationMetric(Metric):
+ r"""The metric class for translation evaluation.
+
+ """
+
+ def __init__(self, gap_threshold: float = 25.0):
+ r"""Build a translation evaluation metric, following the designed
+ Kendall's tau correlation from WMT Metrics Shared Task competitions.
+
+ Args:
+ gap_threshold: The score gap denoting the available hypothesis pair.
+
+ Returns:
+ A metric for translation evaluation.
+ """
+ self.gap_threshold = gap_threshold
+
+ self.lp = list()
+ self.segment_id = list()
+ self.raw_score = list()
+ self.score = list()
+ self.input_format = list()
+
+ def clear(self) -> None:
+ r"""Clear all the stored variables.
+ """
+ self.lp.clear()
+ self.segment_id.clear()
+ self.raw_score.clear()
+ self.input_format.clear()
+
+ self.score.clear()
+
+ return
+
+ def add(self, outputs: Dict[str, List[float]],
+ inputs: Dict[str, List[Union[float, int]]]) -> None:
+ r"""Collect the related results for processing.
+
+ Args:
+ outputs: Dict containing 'scores'
+ inputs: Dict containing 'labels' and 'segment_ids'
+
+ """
+
+ self.lp += inputs['lp']
+ self.segment_id += inputs['segment_id']
+ self.raw_score += inputs['raw_score']
+ self.input_format += inputs['input_format']
+
+ self.score += outputs['score']
+
+ return
+
+ def evaluate(self) -> Dict[str, Dict[str, float]]:
+ r"""Compute the Kendall's tau correlation.
+
+ Returns:
+ A dict denoting Kendall's tau correlation.
+
+ """
+
+ data = {
+ 'lp': self.lp,
+ 'segment_id': self.segment_id,
+ 'raw_score': self.raw_score,
+ 'input_format': self.input_format,
+ 'score': self.score
+ }
+ data = DataFrame(data=data)
+ correlation = dict()
+
+ for input_format in data.input_format.unique():
+ logger.info('Evaluation results for %s input format'
+ % input_format.value)
+ input_format_data = data[data.input_format == input_format]
+
+ temp_correlation = dict()
+
+ for lp in sorted(input_format_data.lp.unique()):
+ sub_data = input_format_data[input_format_data.lp == lp]
+ temp_correlation[input_format.value + '_'
+ + lp] = self.compute_kendall_tau(sub_data)
+ logger.info(
+ '\t%s: %f' %
+ (lp,
+ temp_correlation[input_format.value + '_' + lp] * 100))
+
+ avg_correlation = sum(
+ temp_correlation.values()) / len(temp_correlation)
+ correlation[input_format.value + '_avg'] = avg_correlation
+ logger.info('Average evaluation result for %s input format: %f' %
+ (input_format.value, avg_correlation))
+ logger.info('')
+ correlation.update(temp_correlation)
+
+ return correlation
+
+ def merge(self, other: 'TranslationEvaluationMetric') -> None:
+ r"""Merge the predictions from other TranslationEvaluationMetric objects.
+
+ Args:
+ other: Another TranslationEvaluationMetric object.
+
+ """
+
+ self.lp += other.lp
+ self.segment_id += other.segment_ids
+ self.raw_score += other.raw_score
+ self.input_format += other.input_format
+
+ self.score += other.score
+
+ return
+
+ def compute_kendall_tau(self, csv_data: DataFrame) -> float:
+ r"""Compute kendall's tau correlation.
+
+ Args:
+ csv_data: The pandas dataframe.
+
+ Returns:
+ float: THe kendall's Tau correlation.
+
+ """
+ concor = discor = 0
+
+ for segment_id in sorted(csv_data.segment_id.unique()):
+ group_csv_data = csv_data[csv_data.segment_id == segment_id]
+
+ examples = group_csv_data.to_dict('records')
+
+ for i in range(0, len(examples)):
+ for j in range(i + 1, len(examples)):
+ if self.raw_score[i] - self.raw_score[
+ j] >= self.gap_threshold:
+ if self.score[i] > self.score[j]:
+ concor += 1
+ elif self.score[i] < self.score[j]:
+ discor += 1
+ elif self.raw_score[i] - self.raw_score[
+ j] <= -self.gap_threshold:
+ if self.score[i] < self.score[j]:
+ concor += 1
+ elif self.score[i] > self.score[j]:
+ discor += 1
+
+ if concor + discor == 0:
+ logger.warning(
+ 'We don\'t have available pairs when evaluation. '
+ 'Marking the kendall tau correlation as the lowest value (-1.0).'
+ )
+ return -1.0
+ else:
+ return (concor - discor) / (concor + discor)
diff --git a/modelscope/models/audio/ans/conv_stft.py b/modelscope/models/audio/ans/conv_stft.py
index 4b393a4c..3d37f1aa 100644
--- a/modelscope/models/audio/ans/conv_stft.py
+++ b/modelscope/models/audio/ans/conv_stft.py
@@ -39,7 +39,7 @@ class ConvSTFT(nn.Module):
super(ConvSTFT, self).__init__()
if fft_len is None:
- self.fft_len = np.int(2**np.ceil(np.log2(win_len)))
+ self.fft_len = int(2**np.ceil(np.log2(win_len)))
else:
self.fft_len = fft_len
@@ -78,7 +78,7 @@ class ConviSTFT(nn.Module):
fix=True):
super(ConviSTFT, self).__init__()
if fft_len is None:
- self.fft_len = np.int(2**np.ceil(np.log2(win_len)))
+ self.fft_len = int(2**np.ceil(np.log2(win_len)))
else:
self.fft_len = fft_len
kernel, window = init_kernels(
diff --git a/modelscope/models/audio/asr/generic_automatic_speech_recognition.py b/modelscope/models/audio/asr/generic_automatic_speech_recognition.py
index 25de839e..8dd11982 100644
--- a/modelscope/models/audio/asr/generic_automatic_speech_recognition.py
+++ b/modelscope/models/audio/asr/generic_automatic_speech_recognition.py
@@ -45,27 +45,5 @@ class GenericAutomaticSpeechRecognition(Model):
def forward(self) -> Dict[str, Any]:
"""preload model and return the info of the model
"""
- if self.model_cfg['model_config']['type'] == Frameworks.tf:
- from easyasr import asr_inference_paraformer_tf
- if hasattr(asr_inference_paraformer_tf, 'preload'):
- model_workspace = self.model_cfg['model_workspace']
- model_path = os.path.join(model_workspace,
- self.model_cfg['am_model'])
- vocab_path = os.path.join(
- model_workspace,
- self.model_cfg['model_config']['vocab_file'])
- sampled_ids = 'seq2seq/sampled_ids'
- sampled_lengths = 'seq2seq/sampled_lengths'
- if 'sampled_ids' in self.model_cfg['model_config']:
- sampled_ids = self.model_cfg['model_config']['sampled_ids']
- if 'sampled_lengths' in self.model_cfg['model_config']:
- sampled_lengths = self.model_cfg['model_config'][
- 'sampled_lengths']
- asr_inference_paraformer_tf.preload(
- ngpu=1,
- asr_model_file=model_path,
- vocab_file=vocab_path,
- sampled_ids=sampled_ids,
- sampled_lengths=sampled_lengths)
return self.model_cfg
diff --git a/modelscope/models/audio/kws/farfield/fsmn_sele_v3.py b/modelscope/models/audio/kws/farfield/fsmn_sele_v3.py
new file mode 100644
index 00000000..d57354d0
--- /dev/null
+++ b/modelscope/models/audio/kws/farfield/fsmn_sele_v3.py
@@ -0,0 +1,233 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .fsmn import AffineTransform, Fsmn, LinearTransform, RectifiedLinear
+from .model_def import HEADER_BLOCK_SIZE, ActivationType, LayerType, f32ToI32
+
+
+class DFSMNUnit(nn.Module):
+ """ one multi-channel deep fsmn unit
+ Args:
+ dimin: input dimension
+ dimexpand: feature expansion dimension
+ dimout: output dimension
+ lorder: left ofder
+ rorder: right order
+ """
+
+ def __init__(self,
+ dimin=64,
+ dimexpand=128,
+ dimout=64,
+ lorder=10,
+ rorder=1):
+ super(DFSMNUnit, self).__init__()
+
+ self.expand = AffineTransform(dimin, dimexpand)
+ self.shrink = LinearTransform(dimexpand, dimout)
+ self.fsmn = Fsmn(dimout, dimout, lorder, rorder, 1, 1)
+
+ self.debug = False
+ self.dataout = None
+
+ def forward(self, input):
+ """
+ Args:
+ input: [batch, time, feature]
+ """
+ out1 = F.relu(self.expand(input))
+ out2 = self.shrink(out1)
+ out3 = self.fsmn(out2)
+
+ # add skip connection for matched data
+ if input.shape[-1] == out3.shape[-1]:
+ out3 = input + out3
+ if self.debug:
+ self.dataout = out3
+ return out3
+
+ def print_model(self):
+ self.expand.printModel()
+ self.shrink.printModel()
+ self.fsmn.printModel()
+
+ def to_kaldi_nnet(self):
+ re_str = self.expand.toKaldiNNet()
+ relu = RectifiedLinear(self.expand.linear.out_features,
+ self.expand.linear.out_features)
+ re_str += relu.toKaldiNNet()
+ re_str = self.shrink.toKaldiNNet()
+ re_str += self.fsmn.toKaldiNNet()
+ return re_str
+
+
+class FSMNSeleNetV3(nn.Module):
+ """ Deep FSMN model with channel selection performs multi-channel kws.
+ Zhang, Shiliang, et al. "Deep-FSMN for large vocabulary continuous speech
+ recognition." 2018 IEEE International Conference on Acoustics, Speech and
+ Signal Processing (ICASSP). IEEE, 2018.
+
+ Args:
+ input_dim: input dimension
+ linear_dim: fsmn input dimension
+ proj_dim: fsmn projection dimension
+ lorder: fsmn left order
+ rorder: fsmn right order
+ num_syn: output dimension
+ fsmn_layers: no. of fsmn units
+ """
+
+ def __init__(self,
+ input_dim=120,
+ linear_dim=128,
+ proj_dim=64,
+ lorder=10,
+ rorder=1,
+ num_syn=5,
+ fsmn_layers=5):
+ super(FSMNSeleNetV3, self).__init__()
+
+ self.mem = []
+ # the first unit, mapping input dim to proj dim
+ unit = DFSMNUnit(input_dim, linear_dim, proj_dim, lorder, rorder)
+ self.mem.append(unit)
+ self.add_module('mem_{:d}'.format(0), unit)
+
+ # deep fsmn layers with skip connection
+ for i in range(1, fsmn_layers):
+ unit = DFSMNUnit(proj_dim, linear_dim, proj_dim, lorder, rorder)
+ self.mem.append(unit)
+ self.add_module('mem_{:d}'.format(i), unit)
+
+ self.expand2 = AffineTransform(proj_dim, linear_dim)
+ self.decision = AffineTransform(linear_dim, num_syn)
+
+ def forward(self, input):
+ # multi-channel temp space, [batch, time, channel, feature]
+ if torch.cuda.is_available():
+ x = torch.zeros(input.shape[0], input.shape[1], input.shape[2],
+ self.expand2.linear.out_features).cuda()
+ else:
+ x = torch.zeros(input.shape[0], input.shape[1], input.shape[2],
+ self.expand2.linear.out_features)
+
+ for n in range(input.shape[2]):
+ chin = input[:, :, n, :]
+
+ for unit in self.mem:
+ chout = unit(chin)
+ chin = chout
+
+ x[:, :, n, :] = F.relu(self.expand2(chout))
+
+ # perform max pooling
+ pool = nn.MaxPool2d((x.shape[2], 1), stride=(x.shape[2], 1))
+ y = pool(x)
+
+ # remove channel dimension
+ y = torch.squeeze(y, -2)
+ z = self.decision(y)
+
+ return z
+
+ def print_model(self):
+ for unit in self.mem:
+ unit.print_model()
+
+ self.expand2.printModel()
+ self.decision.printModel()
+
+ def print_header(self):
+ """ get DFSMN params
+ """
+ input_dim = self.mem[0].expand.linear.in_features
+ linear_dim = self.mem[0].expand.linear.out_features
+ proj_dim = self.mem[0].shrink.linear.out_features
+ lorder = self.mem[0].fsmn.conv_left.kernel_size[0]
+ rorder = 0
+ if self.mem[0].fsmn.conv_right is not None:
+ rorder = self.mem[0].fsmn.conv_right.kernel_size[0]
+
+ num_syn = self.decision.linear.out_features
+ fsmn_layers = len(self.mem)
+
+ # no. of output channels, 0.0 means the same as numins
+ numouts = 1.0
+
+ #
+ # write total header
+ #
+ header = [0.0] * HEADER_BLOCK_SIZE * 5
+ # numins
+ header[0] = 0.0
+ # numouts
+ header[1] = numouts
+ # dimins
+ header[2] = input_dim
+ # dimouts
+ header[3] = num_syn
+ # numlayers
+ header[4] = 4
+
+ #
+ # write each layer's header
+ #
+ hidx = 1
+
+ header[HEADER_BLOCK_SIZE * hidx + 0] = float(
+ LayerType.LAYER_DFSMN.value)
+ header[HEADER_BLOCK_SIZE * hidx + 1] = 0.0
+ header[HEADER_BLOCK_SIZE * hidx + 2] = input_dim
+ header[HEADER_BLOCK_SIZE * hidx + 3] = linear_dim
+ header[HEADER_BLOCK_SIZE * hidx + 4] = proj_dim
+ header[HEADER_BLOCK_SIZE * hidx + 5] = lorder
+ header[HEADER_BLOCK_SIZE * hidx + 6] = rorder
+ header[HEADER_BLOCK_SIZE * hidx + 7] = fsmn_layers
+ hidx += 1
+
+ header[HEADER_BLOCK_SIZE * hidx + 0] = float(
+ LayerType.LAYER_DENSE.value)
+ header[HEADER_BLOCK_SIZE * hidx + 1] = 0.0
+ header[HEADER_BLOCK_SIZE * hidx + 2] = proj_dim
+ header[HEADER_BLOCK_SIZE * hidx + 3] = linear_dim
+ header[HEADER_BLOCK_SIZE * hidx + 4] = 1.0
+ header[HEADER_BLOCK_SIZE * hidx + 5] = float(
+ ActivationType.ACTIVATION_RELU.value)
+ hidx += 1
+
+ header[HEADER_BLOCK_SIZE * hidx + 0] = float(
+ LayerType.LAYER_MAX_POOLING.value)
+ header[HEADER_BLOCK_SIZE * hidx + 1] = 0.0
+ header[HEADER_BLOCK_SIZE * hidx + 2] = linear_dim
+ hidx += 1
+
+ header[HEADER_BLOCK_SIZE * hidx + 0] = float(
+ LayerType.LAYER_DENSE.value)
+ header[HEADER_BLOCK_SIZE * hidx + 1] = numouts
+ header[HEADER_BLOCK_SIZE * hidx + 2] = linear_dim
+ header[HEADER_BLOCK_SIZE * hidx + 3] = num_syn
+ header[HEADER_BLOCK_SIZE * hidx + 4] = 1.0
+ header[HEADER_BLOCK_SIZE * hidx + 5] = float(
+ ActivationType.ACTIVATION_SOFTMAX.value)
+
+ for h in header:
+ print(f32ToI32(h))
+
+ def to_kaldi_nnet(self):
+ re_str = '\n'
+ for unit in self.mem:
+ re_str += unit.to_kaldi_nnet()
+ re_str = self.expand2.toKaldiNNet()
+ relu = RectifiedLinear(self.expand2.linear.out_features,
+ self.expand2.linear.out_features)
+ re_str += relu.toKaldiNNet()
+ re_str += self.decision.toKaldiNNet()
+ re_str += ' %d %d\n' % (self.decision.linear.out_features,
+ self.decision.linear.out_features)
+ re_str += '\n'
+ re_str += '\n'
+
+ return re_str
diff --git a/modelscope/models/audio/kws/farfield/model.py b/modelscope/models/audio/kws/farfield/model.py
index fff88805..670ac97c 100644
--- a/modelscope/models/audio/kws/farfield/model.py
+++ b/modelscope/models/audio/kws/farfield/model.py
@@ -11,6 +11,7 @@ from modelscope.models.builder import MODELS
from modelscope.utils.audio.audio_utils import update_conf
from modelscope.utils.constant import Tasks
from .fsmn_sele_v2 import FSMNSeleNetV2
+from .fsmn_sele_v3 import FSMNSeleNetV3
@MODELS.register_module(
@@ -18,6 +19,7 @@ from .fsmn_sele_v2 import FSMNSeleNetV2
class FSMNSeleNetV2Decorator(TorchModel):
r""" A decorator of FSMNSeleNetV2 for integrating into modelscope framework """
+ MODEL_CLASS = FSMNSeleNetV2
MODEL_TXT = 'model.txt'
SC_CONFIG = 'sound_connect.conf'
@@ -33,7 +35,7 @@ class FSMNSeleNetV2Decorator(TorchModel):
"""
super().__init__(model_dir, *args, **kwargs)
if training:
- self.model = FSMNSeleNetV2(*args, **kwargs)
+ self.model = self.MODEL_CLASS(*args, **kwargs)
else:
sc_config_file = os.path.join(model_dir, self.SC_CONFIG)
model_txt_file = os.path.join(model_dir, self.MODEL_TXT)
@@ -42,7 +44,7 @@ class FSMNSeleNetV2Decorator(TorchModel):
self._sc = None
if os.path.exists(model_txt_file):
- conf_dict = dict(mode=56542, kws_model=model_txt_file)
+ conf_dict = dict(kws_model=model_txt_file)
update_conf(sc_config_file, new_config_file, conf_dict)
import py_sound_connect
self._sc = py_sound_connect.SoundConnect(new_config_file)
@@ -50,8 +52,8 @@ class FSMNSeleNetV2Decorator(TorchModel):
self.size_out = self._sc.bytesPerBlockOut()
else:
raise Exception(
- f'Invalid model directory! Failed to load model file: {model_txt_file}.'
- )
+ f'Invalid model directory! Failed to load model file:'
+ f' {model_txt_file}.')
def __del__(self):
if hasattr(self, 'tmp_dir'):
@@ -73,3 +75,24 @@ class FSMNSeleNetV2Decorator(TorchModel):
'confidence': self._sc.kwsConfidence()
}
return result
+
+
+@MODELS.register_module(
+ Tasks.keyword_spotting,
+ module_name=Models.speech_dfsmn_kws_char_farfield_iot)
+class FSMNSeleNetV3Decorator(FSMNSeleNetV2Decorator):
+ r""" A decorator of FSMNSeleNetV3 for integrating into modelscope framework """
+
+ MODEL_CLASS = FSMNSeleNetV3
+
+ def __init__(self,
+ model_dir: str,
+ training: Optional[bool] = False,
+ *args,
+ **kwargs):
+ """initialize the dfsmn model from the `model_dir` path.
+
+ Args:
+ model_dir (str): the model path.
+ """
+ super().__init__(model_dir, training, *args, **kwargs)
diff --git a/modelscope/models/audio/sv/DTDNN.py b/modelscope/models/audio/sv/DTDNN.py
index d9e21ce8..d86d6799 100644
--- a/modelscope/models/audio/sv/DTDNN.py
+++ b/modelscope/models/audio/sv/DTDNN.py
@@ -76,11 +76,13 @@ class CAMPPlus(nn.Module):
bn_size=4,
init_channels=128,
config_str='batchnorm-relu',
- memory_efficient=True):
+ memory_efficient=True,
+ output_level='segment'):
super(CAMPPlus, self).__init__()
self.head = FCM(feat_dim=feat_dim)
channels = self.head.out_channels
+ self.output_level = output_level
self.xvector = nn.Sequential(
OrderedDict([
@@ -118,10 +120,14 @@ class CAMPPlus(nn.Module):
self.xvector.add_module('out_nonlinear',
get_nonlinear(config_str, channels))
- self.xvector.add_module('stats', StatsPool())
- self.xvector.add_module(
- 'dense',
- DenseLayer(channels * 2, embedding_size, config_str='batchnorm_'))
+ if self.output_level == 'segment':
+ self.xvector.add_module('stats', StatsPool())
+ self.xvector.add_module(
+ 'dense',
+ DenseLayer(
+ channels * 2, embedding_size, config_str='batchnorm_'))
+ else:
+ assert self.output_level == 'frame', '`output_level` should be set to \'segment\' or \'frame\'. '
for m in self.modules():
if isinstance(m, (nn.Conv1d, nn.Linear)):
@@ -133,6 +139,8 @@ class CAMPPlus(nn.Module):
x = x.permute(0, 2, 1) # (B,T,F) => (B,F,T)
x = self.head(x)
x = self.xvector(x)
+ if self.output_level == 'frame':
+ x = x.transpose(1, 2)
return x
diff --git a/modelscope/models/audio/sv/ERes2Net.py b/modelscope/models/audio/sv/ERes2Net.py
new file mode 100644
index 00000000..615be064
--- /dev/null
+++ b/modelscope/models/audio/sv/ERes2Net.py
@@ -0,0 +1,344 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+""" Res2Net implementation is adapted from https://github.com/wenet-e2e/wespeaker.
+ ERes2Net incorporates both local and global feature fusion techniques to improve the performance. The local feature
+ fusion (LFF) fuses the features within one single residual block to extract the local signal.
+ The global feature fusion (GFF) takes acoustic features of different scales as input to aggregate global signal.
+"""
+import math
+import os
+from typing import Any, Dict, Union
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchaudio.compliance.kaldi as Kaldi
+
+import modelscope.models.audio.sv.pooling_layers as pooling_layers
+from modelscope.metainfo import Models
+from modelscope.models import MODELS, TorchModel
+from modelscope.models.audio.sv.fusion import AFF
+from modelscope.utils.constant import Tasks
+
+
+class ReLU(nn.Hardtanh):
+
+ def __init__(self, inplace=False):
+ super(ReLU, self).__init__(0, 20, inplace)
+
+ def __repr__(self):
+ inplace_str = 'inplace' if self.inplace else ''
+ return self.__class__.__name__ + ' (' \
+ + inplace_str + ')'
+
+
+def conv1x1(in_planes, out_planes, stride=1):
+ '1x1 convolution without padding'
+ return nn.Conv2d(
+ in_planes,
+ out_planes,
+ kernel_size=1,
+ stride=stride,
+ padding=0,
+ bias=False)
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+ '3x3 convolution with padding'
+ return nn.Conv2d(
+ in_planes,
+ out_planes,
+ kernel_size=3,
+ stride=stride,
+ padding=1,
+ bias=False)
+
+
+class BasicBlockRes2Net(nn.Module):
+ expansion = 2
+
+ def __init__(self, in_planes, planes, stride=1, baseWidth=32, scale=2):
+ super(BasicBlockRes2Net, self).__init__()
+ width = int(math.floor(planes * (baseWidth / 64.0)))
+ self.conv1 = conv1x1(in_planes, width * scale, stride)
+ self.bn1 = nn.BatchNorm2d(width * scale)
+ self.nums = scale
+
+ convs = []
+ bns = []
+ for i in range(self.nums):
+ convs.append(conv3x3(width, width))
+ bns.append(nn.BatchNorm2d(width))
+ self.convs = nn.ModuleList(convs)
+ self.bns = nn.ModuleList(bns)
+ self.relu = ReLU(inplace=True)
+
+ self.conv3 = conv1x1(width * scale, planes * self.expansion)
+ self.bn3 = nn.BatchNorm2d(planes * self.expansion)
+ self.shortcut = nn.Sequential()
+ if stride != 1 or in_planes != self.expansion * planes:
+ self.shortcut = nn.Sequential(
+ nn.Conv2d(
+ in_planes,
+ self.expansion * planes,
+ kernel_size=1,
+ stride=stride,
+ bias=False), nn.BatchNorm2d(self.expansion * planes))
+ self.stride = stride
+ self.width = width
+ self.scale = scale
+
+ def forward(self, x):
+ residual = x
+
+ out = self.conv1(x)
+ out = self.bn1(out)
+ out = self.relu(out)
+ spx = torch.split(out, self.width, 1)
+ for i in range(self.nums):
+ if i == 0:
+ sp = spx[i]
+ else:
+ sp = sp + spx[i]
+ sp = self.convs[i](sp)
+ sp = self.relu(self.bns[i](sp))
+ if i == 0:
+ out = sp
+ else:
+ out = torch.cat((out, sp), 1)
+
+ out = self.conv3(out)
+ out = self.bn3(out)
+
+ residual = self.shortcut(x)
+ out += residual
+ out = self.relu(out)
+
+ return out
+
+
+class BasicBlockRes2Net_diff_AFF(nn.Module):
+ expansion = 2
+
+ def __init__(self, in_planes, planes, stride=1, baseWidth=32, scale=2):
+ super(BasicBlockRes2Net_diff_AFF, self).__init__()
+ width = int(math.floor(planes * (baseWidth / 64.0)))
+ self.conv1 = conv1x1(in_planes, width * scale, stride)
+ self.bn1 = nn.BatchNorm2d(width * scale)
+ self.nums = scale
+
+ convs = []
+ fuse_models = []
+ bns = []
+ for i in range(self.nums):
+ convs.append(conv3x3(width, width))
+ bns.append(nn.BatchNorm2d(width))
+ for j in range(self.nums - 1):
+ fuse_models.append(AFF(channels=width))
+
+ self.convs = nn.ModuleList(convs)
+ self.bns = nn.ModuleList(bns)
+ self.fuse_models = nn.ModuleList(fuse_models)
+ self.relu = ReLU(inplace=True)
+
+ self.conv3 = conv1x1(width * scale, planes * self.expansion)
+ self.bn3 = nn.BatchNorm2d(planes * self.expansion)
+ self.shortcut = nn.Sequential()
+ if stride != 1 or in_planes != self.expansion * planes:
+ self.shortcut = nn.Sequential(
+ nn.Conv2d(
+ in_planes,
+ self.expansion * planes,
+ kernel_size=1,
+ stride=stride,
+ bias=False), nn.BatchNorm2d(self.expansion * planes))
+ self.stride = stride
+ self.width = width
+ self.scale = scale
+
+ def forward(self, x):
+ residual = x
+
+ out = self.conv1(x)
+ out = self.bn1(out)
+ out = self.relu(out)
+ spx = torch.split(out, self.width, 1)
+ for i in range(self.nums):
+ if i == 0:
+ sp = spx[i]
+ else:
+ sp = self.fuse_models[i - 1](sp, spx[i])
+
+ sp = self.convs[i](sp)
+ sp = self.relu(self.bns[i](sp))
+ if i == 0:
+ out = sp
+ else:
+ out = torch.cat((out, sp), 1)
+
+ out = self.conv3(out)
+ out = self.bn3(out)
+
+ residual = self.shortcut(x)
+ out += residual
+ out = self.relu(out)
+
+ return out
+
+
+class ERes2Net(nn.Module):
+
+ def __init__(self,
+ block=BasicBlockRes2Net,
+ block_fuse=BasicBlockRes2Net_diff_AFF,
+ num_blocks=[3, 4, 6, 3],
+ m_channels=32,
+ feat_dim=80,
+ embed_dim=192,
+ pooling_func='TSTP',
+ two_emb_layer=False):
+ super(ERes2Net, self).__init__()
+ self.in_planes = m_channels
+ self.feat_dim = feat_dim
+ self.embed_dim = embed_dim
+ self.stats_dim = int(feat_dim / 8) * m_channels * 8
+ self.two_emb_layer = two_emb_layer
+
+ self.conv1 = nn.Conv2d(
+ 1, m_channels, kernel_size=3, stride=1, padding=1, bias=False)
+ self.bn1 = nn.BatchNorm2d(m_channels)
+ self.layer1 = self._make_layer(
+ block, m_channels, num_blocks[0], stride=1)
+ self.layer2 = self._make_layer(
+ block, m_channels * 2, num_blocks[1], stride=2)
+ self.layer3 = self._make_layer(
+ block_fuse, m_channels * 4, num_blocks[2], stride=2)
+ self.layer4 = self._make_layer(
+ block_fuse, m_channels * 8, num_blocks[3], stride=2)
+
+ # downsampling
+ self.layer1_downsample = nn.Conv2d(
+ m_channels * 2,
+ m_channels * 4,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False)
+ self.layer2_downsample = nn.Conv2d(
+ m_channels * 4,
+ m_channels * 8,
+ kernel_size=3,
+ padding=1,
+ stride=2,
+ bias=False)
+ self.layer3_downsample = nn.Conv2d(
+ m_channels * 8,
+ m_channels * 16,
+ kernel_size=3,
+ padding=1,
+ stride=2,
+ bias=False)
+
+ # bottom-up fusion
+ self.fuse_mode12 = AFF(channels=m_channels * 4)
+ self.fuse_mode123 = AFF(channels=m_channels * 8)
+ self.fuse_mode1234 = AFF(channels=m_channels * 16)
+
+ self.n_stats = 1 if pooling_func == 'TAP' or pooling_func == 'TSDP' else 2
+ self.pool = getattr(pooling_layers, pooling_func)(
+ in_dim=self.stats_dim * block.expansion)
+ self.seg_1 = nn.Linear(self.stats_dim * block.expansion * self.n_stats,
+ embed_dim)
+ if self.two_emb_layer:
+ self.seg_bn_1 = nn.BatchNorm1d(embed_dim, affine=False)
+ self.seg_2 = nn.Linear(embed_dim, embed_dim)
+ else:
+ self.seg_bn_1 = nn.Identity()
+ self.seg_2 = nn.Identity()
+
+ def _make_layer(self, block, planes, num_blocks, stride):
+ strides = [stride] + [1] * (num_blocks - 1)
+ layers = []
+ for stride in strides:
+ layers.append(block(self.in_planes, planes, stride))
+ self.in_planes = planes * block.expansion
+ return nn.Sequential(*layers)
+
+ def forward(self, x):
+ x = x.permute(0, 2, 1)
+
+ x = x.unsqueeze_(1)
+ out = F.relu(self.bn1(self.conv1(x)))
+ out1 = self.layer1(out)
+
+ # bottom-up fusion
+ out2 = self.layer2(out1)
+ out1_downsample = self.layer1_downsample(out1)
+ fuse_out12 = self.fuse_mode12(out2, out1_downsample)
+
+ out3 = self.layer3(out2)
+ fuse_out12_downsample = self.layer2_downsample(fuse_out12)
+ fuse_out123 = self.fuse_mode123(out3, fuse_out12_downsample)
+
+ out4 = self.layer4(out3)
+ fuse_out123_downsample = self.layer3_downsample(fuse_out123)
+ fuse_out1234 = self.fuse_mode1234(out4, fuse_out123_downsample)
+ stats = self.pool(fuse_out1234)
+
+ embed_a = self.seg_1(stats)
+ if self.two_emb_layer:
+ out = F.relu(embed_a)
+ out = self.seg_bn_1(out)
+ embed_b = self.seg_2(out)
+ return embed_b
+ else:
+ return embed_a
+
+
+@MODELS.register_module(
+ Tasks.speaker_verification, module_name=Models.eres2net_sv)
+class SpeakerVerificationERes2Net(TorchModel):
+ r"""Enhanced Res2Net architecture with local and global feature fusion. ERes2Net is mainly composed
+ of LFF and GFF. The LFF extracts localization-preserved speaker features and strengthen the local information
+ interaction. GFF fuses multi-scale feature maps in bottom-up pathway to obtain global information.
+ Args:
+ model_dir: A model dir.
+ model_config: The model config.
+ """
+
+ def __init__(self, model_dir, model_config: Dict[str, Any], *args,
+ **kwargs):
+ super().__init__(model_dir, model_config, *args, **kwargs)
+ self.model_config = model_config
+ self.other_config = kwargs
+ self.feature_dim = 80
+
+ self.embedding_model = ERes2Net()
+
+ pretrained_model_name = kwargs['pretrained_model']
+ self.__load_check_point(pretrained_model_name)
+
+ self.embedding_model.eval()
+
+ def forward(self, audio):
+ assert len(audio.shape) == 2 and audio.shape[
+ 0] == 1, 'modelscope error: the shape of input audio to model needs to be [1, T]'
+ # audio shape: [1, T]
+ feature = self.__extract_feature(audio)
+ embedding = self.embedding_model(feature)
+
+ return embedding
+
+ def __extract_feature(self, audio):
+ feature = Kaldi.fbank(audio, num_mel_bins=self.feature_dim)
+ feature = feature - feature.mean(dim=0, keepdim=True)
+ feature = feature.unsqueeze(0)
+ return feature
+
+ def __load_check_point(self, pretrained_model_name, device=None):
+ if not device:
+ device = torch.device('cpu')
+ self.embedding_model.load_state_dict(
+ torch.load(
+ os.path.join(self.model_dir, pretrained_model_name),
+ map_location=device),
+ strict=True)
diff --git a/modelscope/models/audio/sv/fusion.py b/modelscope/models/audio/sv/fusion.py
new file mode 100644
index 00000000..615529bd
--- /dev/null
+++ b/modelscope/models/audio/sv/fusion.py
@@ -0,0 +1,32 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import torch
+import torch.nn as nn
+
+
+class AFF(nn.Module):
+
+ def __init__(self, channels=64, r=4):
+ super(AFF, self).__init__()
+ inter_channels = int(channels // r)
+
+ self.local_att = nn.Sequential(
+ nn.Conv2d(
+ channels * 2,
+ inter_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0),
+ nn.BatchNorm2d(inter_channels),
+ nn.SiLU(inplace=True),
+ nn.Conv2d(
+ inter_channels, channels, kernel_size=1, stride=1, padding=0),
+ nn.BatchNorm2d(channels),
+ )
+
+ def forward(self, x, ds_y):
+ xa = torch.cat((x, ds_y), dim=1)
+ x_att = self.local_att(xa)
+ x_att = 1.0 + torch.tanh(x_att)
+ xo = torch.mul(x, x_att) + torch.mul(ds_y, 2.0 - x_att)
+
+ return xo
diff --git a/modelscope/models/audio/sv/pooling_layers.py b/modelscope/models/audio/sv/pooling_layers.py
new file mode 100644
index 00000000..0fdc44ca
--- /dev/null
+++ b/modelscope/models/audio/sv/pooling_layers.py
@@ -0,0 +1,107 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+""" This implementation is adapted from https://github.com/wenet-e2e/wespeaker.
+"""
+import torch
+import torch.nn as nn
+
+
+class TAP(nn.Module):
+ """
+ Temporal average pooling, only first-order mean is considered
+ """
+
+ def __init__(self, **kwargs):
+ super(TAP, self).__init__()
+
+ def forward(self, x):
+ pooling_mean = x.mean(dim=-1)
+ # To be compatable with 2D input
+ pooling_mean = pooling_mean.flatten(start_dim=1)
+ return pooling_mean
+
+
+class TSDP(nn.Module):
+ """
+ Temporal standard deviation pooling, only second-order std is considered
+ """
+
+ def __init__(self, **kwargs):
+ super(TSDP, self).__init__()
+
+ def forward(self, x):
+ # The last dimension is the temporal axis
+ pooling_std = torch.sqrt(torch.var(x, dim=-1) + 1e-8)
+ pooling_std = pooling_std.flatten(start_dim=1)
+ return pooling_std
+
+
+class TSTP(nn.Module):
+ """
+ Temporal statistics pooling, concatenate mean and std, which is used in
+ x-vector
+ Comment: simple concatenation can not make full use of both statistics
+ """
+
+ def __init__(self, **kwargs):
+ super(TSTP, self).__init__()
+
+ def forward(self, x):
+ # The last dimension is the temporal axis
+ pooling_mean = x.mean(dim=-1)
+ pooling_std = torch.sqrt(torch.var(x, dim=-1) + 1e-8)
+ pooling_mean = pooling_mean.flatten(start_dim=1)
+ pooling_std = pooling_std.flatten(start_dim=1)
+
+ stats = torch.cat((pooling_mean, pooling_std), 1)
+ return stats
+
+
+class ASTP(nn.Module):
+ """ Attentive statistics pooling: Channel- and context-dependent
+ statistics pooling, first used in ECAPA_TDNN.
+ """
+
+ def __init__(self, in_dim, bottleneck_dim=128, global_context_att=False):
+ super(ASTP, self).__init__()
+ self.global_context_att = global_context_att
+
+ # Use Conv1d with stride == 1 rather than Linear, then we don't
+ # need to transpose inputs.
+ if global_context_att:
+ self.linear1 = nn.Conv1d(
+ in_dim * 3, bottleneck_dim,
+ kernel_size=1) # equals W and b in the paper
+ else:
+ self.linear1 = nn.Conv1d(
+ in_dim, bottleneck_dim,
+ kernel_size=1) # equals W and b in the paper
+ self.linear2 = nn.Conv1d(
+ bottleneck_dim, in_dim,
+ kernel_size=1) # equals V and k in the paper
+
+ def forward(self, x):
+ """
+ x: a 3-dimensional tensor in tdnn-based architecture (B,F,T)
+ or a 4-dimensional tensor in resnet architecture (B,C,F,T)
+ 0-dim: batch-dimension, last-dim: time-dimension (frame-dimension)
+ """
+ if len(x.shape) == 4:
+ x = x.reshape(x.shape[0], x.shape[1] * x.shape[2], x.shape[3])
+ assert len(x.shape) == 3
+
+ if self.global_context_att:
+ context_mean = torch.mean(x, dim=-1, keepdim=True).expand_as(x)
+ context_std = torch.sqrt(
+ torch.var(x, dim=-1, keepdim=True) + 1e-10).expand_as(x)
+ x_in = torch.cat((x, context_mean, context_std), dim=1)
+ else:
+ x_in = x
+
+ # DON'T use ReLU here! ReLU may be hard to converge.
+ alpha = torch.tanh(
+ self.linear1(x_in)) # alpha = F.relu(self.linear1(x_in))
+ alpha = torch.softmax(self.linear2(alpha), dim=2)
+ mean = torch.sum(alpha * x, dim=2)
+ var = torch.sum(alpha * (x**2), dim=2) - mean**2
+ std = torch.sqrt(var.clamp(min=1e-10))
+ return torch.cat([mean, std], dim=1)
diff --git a/modelscope/models/audio/sv/rdino.py b/modelscope/models/audio/sv/rdino.py
new file mode 100644
index 00000000..0d51ee7a
--- /dev/null
+++ b/modelscope/models/audio/sv/rdino.py
@@ -0,0 +1,573 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+""" This ECAPA-TDNN implementation is adapted from https://github.com/speechbrain/speechbrain.
+ RDINOHead implementation is adapted from DINO framework.
+"""
+import math
+import os
+from typing import Any, Dict, Union
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchaudio.compliance.kaldi as Kaldi
+
+from modelscope.metainfo import Models
+from modelscope.models import MODELS, TorchModel
+from modelscope.utils.constant import Tasks
+
+
+def length_to_mask(length, max_len=None, dtype=None, device=None):
+ assert len(length.shape) == 1
+
+ if max_len is None:
+ max_len = length.max().long().item()
+ mask = torch.arange(
+ max_len, device=length.device, dtype=length.dtype).expand(
+ len(length), max_len) < length.unsqueeze(1)
+
+ if dtype is None:
+ dtype = length.dtype
+
+ if device is None:
+ device = length.device
+
+ mask = torch.as_tensor(mask, dtype=dtype, device=device)
+ return mask
+
+
+def get_padding_elem(L_in: int, stride: int, kernel_size: int, dilation: int):
+ if stride > 1:
+ n_steps = math.ceil(((L_in - kernel_size * dilation) / stride) + 1)
+ L_out = stride * (n_steps - 1) + kernel_size * dilation
+ padding = [kernel_size // 2, kernel_size // 2]
+
+ else:
+ L_out = (L_in - dilation * (kernel_size - 1) - 1) // stride + 1
+
+ padding = [(L_in - L_out) // 2, (L_in - L_out) // 2]
+ return padding
+
+
+class Conv1d(nn.Module):
+
+ def __init__(
+ self,
+ out_channels,
+ kernel_size,
+ in_channels,
+ stride=1,
+ dilation=1,
+ padding='same',
+ groups=1,
+ bias=True,
+ padding_mode='reflect',
+ ):
+ super().__init__()
+ self.kernel_size = kernel_size
+ self.stride = stride
+ self.dilation = dilation
+ self.padding = padding
+ self.padding_mode = padding_mode
+
+ self.conv = nn.Conv1d(
+ in_channels,
+ out_channels,
+ self.kernel_size,
+ stride=self.stride,
+ dilation=self.dilation,
+ padding=0,
+ groups=groups,
+ bias=bias,
+ )
+
+ def forward(self, x):
+ if self.padding == 'same':
+ x = self._manage_padding(x, self.kernel_size, self.dilation,
+ self.stride)
+
+ elif self.padding == 'causal':
+ num_pad = (self.kernel_size - 1) * self.dilation
+ x = F.pad(x, (num_pad, 0))
+
+ elif self.padding == 'valid':
+ pass
+
+ else:
+ raise ValueError(
+ "Padding must be 'same', 'valid' or 'causal'. Got "
+ + self.padding)
+
+ wx = self.conv(x)
+
+ return wx
+
+ def _manage_padding(
+ self,
+ x,
+ kernel_size: int,
+ dilation: int,
+ stride: int,
+ ):
+ L_in = x.shape[-1]
+ padding = get_padding_elem(L_in, stride, kernel_size, dilation)
+ x = F.pad(x, padding, mode=self.padding_mode)
+
+ return x
+
+
+class BatchNorm1d(nn.Module):
+
+ def __init__(
+ self,
+ input_size,
+ eps=1e-05,
+ momentum=0.1,
+ ):
+ super().__init__()
+ self.norm = nn.BatchNorm1d(
+ input_size,
+ eps=eps,
+ momentum=momentum,
+ )
+
+ def forward(self, x):
+ return self.norm(x)
+
+
+class TDNNBlock(nn.Module):
+
+ def __init__(
+ self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ dilation,
+ activation=nn.ReLU,
+ groups=1,
+ ):
+ super(TDNNBlock, self).__init__()
+ self.conv = Conv1d(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ dilation=dilation,
+ groups=groups,
+ )
+ self.activation = activation()
+ self.norm = BatchNorm1d(input_size=out_channels)
+
+ def forward(self, x):
+ return self.norm(self.activation(self.conv(x)))
+
+
+class Res2NetBlock(torch.nn.Module):
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ scale=8,
+ kernel_size=3,
+ dilation=1):
+ super(Res2NetBlock, self).__init__()
+ assert in_channels % scale == 0
+ assert out_channels % scale == 0
+
+ in_channel = in_channels // scale
+ hidden_channel = out_channels // scale
+
+ self.blocks = nn.ModuleList([
+ TDNNBlock(
+ in_channel,
+ hidden_channel,
+ kernel_size=kernel_size,
+ dilation=dilation,
+ ) for i in range(scale - 1)
+ ])
+ self.scale = scale
+
+ def forward(self, x):
+ y = []
+ for i, x_i in enumerate(torch.chunk(x, self.scale, dim=1)):
+ if i == 0:
+ y_i = x_i
+ elif i == 1:
+ y_i = self.blocks[i - 1](x_i)
+ else:
+ y_i = self.blocks[i - 1](x_i + y_i)
+ y.append(y_i)
+ y = torch.cat(y, dim=1)
+ return y
+
+
+class SEBlock(nn.Module):
+
+ def __init__(self, in_channels, se_channels, out_channels):
+ super(SEBlock, self).__init__()
+
+ self.conv1 = Conv1d(
+ in_channels=in_channels, out_channels=se_channels, kernel_size=1)
+ self.relu = torch.nn.ReLU(inplace=True)
+ self.conv2 = Conv1d(
+ in_channels=se_channels, out_channels=out_channels, kernel_size=1)
+ self.sigmoid = torch.nn.Sigmoid()
+
+ def forward(self, x, lengths=None):
+ L = x.shape[-1]
+ if lengths is not None:
+ mask = length_to_mask(lengths * L, max_len=L, device=x.device)
+ mask = mask.unsqueeze(1)
+ total = mask.sum(dim=2, keepdim=True)
+ s = (x * mask).sum(dim=2, keepdim=True) / total
+ else:
+ s = x.mean(dim=2, keepdim=True)
+
+ s = self.relu(self.conv1(s))
+ s = self.sigmoid(self.conv2(s))
+
+ return s * x
+
+
+class AttentiveStatisticsPooling(nn.Module):
+
+ def __init__(self, channels, attention_channels=128, global_context=True):
+ super().__init__()
+
+ self.eps = 1e-12
+ self.global_context = global_context
+ if global_context:
+ self.tdnn = TDNNBlock(channels * 3, attention_channels, 1, 1)
+ else:
+ self.tdnn = TDNNBlock(channels, attention_channels, 1, 1)
+ self.tanh = nn.Tanh()
+ self.conv = Conv1d(
+ in_channels=attention_channels,
+ out_channels=channels,
+ kernel_size=1)
+
+ def forward(self, x, lengths=None):
+ L = x.shape[-1]
+
+ def _compute_statistics(x, m, dim=2, eps=self.eps):
+ mean = (m * x).sum(dim)
+ std = torch.sqrt(
+ (m * (x - mean.unsqueeze(dim)).pow(2)).sum(dim).clamp(eps))
+ return mean, std
+
+ if lengths is None:
+ lengths = torch.ones(x.shape[0], device=x.device)
+
+ # Make binary mask of shape [N, 1, L]
+ mask = length_to_mask(lengths * L, max_len=L, device=x.device)
+ mask = mask.unsqueeze(1)
+
+ # Expand the temporal context of the pooling layer by allowing the
+ # self-attention to look at global properties of the utterance.
+ if self.global_context:
+ # torch.std is unstable for backward computation
+ # https://github.com/pytorch/pytorch/issues/4320
+ total = mask.sum(dim=2, keepdim=True).float()
+ mean, std = _compute_statistics(x, mask / total)
+ mean = mean.unsqueeze(2).repeat(1, 1, L)
+ std = std.unsqueeze(2).repeat(1, 1, L)
+ attn = torch.cat([x, mean, std], dim=1)
+ else:
+ attn = x
+
+ # Apply layers
+ attn = self.conv(self.tanh(self.tdnn(attn)))
+
+ # Filter out zero-paddings
+ attn = attn.masked_fill(mask == 0, float('-inf'))
+
+ attn = F.softmax(attn, dim=2)
+ mean, std = _compute_statistics(x, attn)
+ # Append mean and std of the batch
+ pooled_stats = torch.cat((mean, std), dim=1)
+ pooled_stats = pooled_stats.unsqueeze(2)
+
+ return pooled_stats
+
+
+class SERes2NetBlock(nn.Module):
+
+ def __init__(
+ self,
+ in_channels,
+ out_channels,
+ res2net_scale=8,
+ se_channels=128,
+ kernel_size=1,
+ dilation=1,
+ activation=torch.nn.ReLU,
+ groups=1,
+ ):
+ super().__init__()
+ self.out_channels = out_channels
+ self.tdnn1 = TDNNBlock(
+ in_channels,
+ out_channels,
+ kernel_size=1,
+ dilation=1,
+ activation=activation,
+ groups=groups,
+ )
+ self.res2net_block = Res2NetBlock(out_channels, out_channels,
+ res2net_scale, kernel_size, dilation)
+ self.tdnn2 = TDNNBlock(
+ out_channels,
+ out_channels,
+ kernel_size=1,
+ dilation=1,
+ activation=activation,
+ groups=groups,
+ )
+ self.se_block = SEBlock(out_channels, se_channels, out_channels)
+
+ self.shortcut = None
+ if in_channels != out_channels:
+ self.shortcut = Conv1d(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=1,
+ )
+
+ def forward(self, x, lengths=None):
+ residual = x
+ if self.shortcut:
+ residual = self.shortcut(x)
+
+ x = self.tdnn1(x)
+ x = self.res2net_block(x)
+ x = self.tdnn2(x)
+ x = self.se_block(x, lengths)
+
+ return x + residual
+
+
+class ECAPA_TDNN(nn.Module):
+ """An implementation of the speaker embedding model in a paper.
+ "ECAPA-TDNN: Emphasized Channel Attention, Propagation and Aggregation in
+ TDNN Based Speaker Verification" (https://arxiv.org/abs/2005.07143).
+ """
+
+ def __init__(
+ self,
+ input_size,
+ device='cpu',
+ lin_neurons=512,
+ activation=torch.nn.ReLU,
+ channels=[512, 512, 512, 512, 1536],
+ kernel_sizes=[5, 3, 3, 3, 1],
+ dilations=[1, 2, 3, 4, 1],
+ attention_channels=128,
+ res2net_scale=8,
+ se_channels=128,
+ global_context=True,
+ groups=[1, 1, 1, 1, 1],
+ ):
+
+ super().__init__()
+ assert len(channels) == len(kernel_sizes)
+ assert len(channels) == len(dilations)
+ self.channels = channels
+ self.blocks = nn.ModuleList()
+
+ # The initial TDNN layer
+ self.blocks.append(
+ TDNNBlock(
+ input_size,
+ channels[0],
+ kernel_sizes[0],
+ dilations[0],
+ activation,
+ groups[0],
+ ))
+
+ # SE-Res2Net layers
+ for i in range(1, len(channels) - 1):
+ self.blocks.append(
+ SERes2NetBlock(
+ channels[i - 1],
+ channels[i],
+ res2net_scale=res2net_scale,
+ se_channels=se_channels,
+ kernel_size=kernel_sizes[i],
+ dilation=dilations[i],
+ activation=activation,
+ groups=groups[i],
+ ))
+
+ # Multi-layer feature aggregation
+ self.mfa = TDNNBlock(
+ channels[-1],
+ channels[-1],
+ kernel_sizes[-1],
+ dilations[-1],
+ activation,
+ groups=groups[-1],
+ )
+
+ # Attentive Statistical Pooling
+ self.asp = AttentiveStatisticsPooling(
+ channels[-1],
+ attention_channels=attention_channels,
+ global_context=global_context,
+ )
+ self.asp_bn = BatchNorm1d(input_size=channels[-1] * 2)
+
+ # Final linear transformation
+ self.fc = Conv1d(
+ in_channels=channels[-1] * 2,
+ out_channels=lin_neurons,
+ kernel_size=1,
+ )
+
+ def forward(self, x, lengths=None):
+ """Returns the embedding vector.
+
+ Arguments
+ ---------
+ x : torch.Tensor
+ Tensor of shape (batch, time, channel).
+ """
+ x = x.transpose(1, 2)
+
+ xl = []
+ for layer in self.blocks:
+ try:
+ x = layer(x, lengths=lengths)
+ except TypeError:
+ x = layer(x)
+ xl.append(x)
+
+ # Multi-layer feature aggregation
+ x = torch.cat(xl[1:], dim=1)
+ x = self.mfa(x)
+
+ # Attentive Statistical Pooling
+ x = self.asp(x, lengths=lengths)
+ x = self.asp_bn(x)
+
+ # Final linear transformation
+ x = self.fc(x)
+
+ x = x.transpose(1, 2).squeeze(1)
+ return x
+
+
+class RDINOHead(nn.Module):
+
+ def __init__(self,
+ in_dim,
+ out_dim,
+ use_bn=False,
+ norm_last_layer=True,
+ nlayers=3,
+ hidden_dim=2048,
+ bottleneck_dim=256,
+ add_dim=8192):
+ super().__init__()
+ nlayers = max(nlayers, 1)
+ if nlayers == 1:
+ self.mlp = nn.Linear(in_dim, bottleneck_dim)
+ else:
+ layers = [nn.Linear(in_dim, hidden_dim)]
+ if use_bn:
+ layers.append(nn.BatchNorm1d(hidden_dim))
+ layers.append(nn.GELU())
+ for _ in range(nlayers - 2):
+ layers.append(nn.Linear(hidden_dim, hidden_dim))
+ if use_bn:
+ layers.append(nn.BatchNorm1d(hidden_dim))
+ layers.append(nn.GELU())
+
+ layers.append(nn.Linear(hidden_dim, add_dim))
+ self.mlp = nn.Sequential(*layers)
+ self.add_layer = nn.Linear(add_dim, bottleneck_dim)
+ self.apply(self._init_weights)
+ self.last_layer = nn.utils.weight_norm(
+ nn.Linear(bottleneck_dim, out_dim, bias=False))
+ self.last_layer.weight_g.data.fill_(1)
+ if norm_last_layer:
+ self.last_layer.weight_g.requires_grad = False
+
+ def _init_weights(self, m):
+ if isinstance(m, nn.Linear):
+ torch.nn.init.trunc_normal_(m.weight, std=.02)
+ if isinstance(m, nn.Linear) and m.bias is not None:
+ nn.init.constant_(m.bias, 0)
+
+ def forward(self, x):
+ vicr_out = self.mlp(x)
+ x = self.add_layer(vicr_out)
+ x = nn.functional.normalize(x, dim=-1, p=2)
+ x = self.last_layer(x)
+ return vicr_out, x
+
+
+class Combine(nn.Module):
+
+ def __init__(self, backbone, head):
+ super(Combine, self).__init__()
+ self.backbone = backbone
+ self.head = head
+
+ def forward(self, x):
+ x = self.backbone(x)
+ output = self.head(x)
+ return output
+
+
+@MODELS.register_module(
+ Tasks.speaker_verification, module_name=Models.rdino_tdnn_sv)
+class SpeakerVerification_RDINO(TorchModel):
+
+ def __init__(self, model_dir, model_config: Dict[str, Any], *args,
+ **kwargs):
+ super().__init__(model_dir, model_config, *args, **kwargs)
+ self.model_config = model_config
+ self.other_config = kwargs
+ if self.model_config['channel'] != 1024:
+ raise ValueError(
+ 'modelscope error: Currently only 1024-channel ecapa tdnn is supported.'
+ )
+
+ self.feature_dim = 80
+ channels_config = [1024, 1024, 1024, 1024, 3072]
+
+ self.embedding_model = ECAPA_TDNN(
+ self.feature_dim, channels=channels_config)
+ self.embedding_model = Combine(self.embedding_model,
+ RDINOHead(512, 65536, True))
+
+ pretrained_model_name = kwargs['pretrained_model']
+ self.__load_check_point(pretrained_model_name)
+
+ self.embedding_model.eval()
+
+ def forward(self, audio):
+ assert len(audio.shape) == 2 and audio.shape[
+ 0] == 1, 'modelscope error: the shape of input audio to model needs to be [1, T]'
+ # audio shape: [1, T]
+ feature = self.__extract_feature(audio)
+ embedding = self.embedding_model.backbone(feature)
+
+ return embedding
+
+ def __extract_feature(self, audio):
+ feature = Kaldi.fbank(audio, num_mel_bins=self.feature_dim)
+ feature = feature - feature.mean(dim=0, keepdim=True)
+ feature = feature.unsqueeze(0)
+ return feature
+
+ def __load_check_point(self, pretrained_model_name, device=None):
+ if not device:
+ device = torch.device('cpu')
+ state_dict = torch.load(
+ os.path.join(self.model_dir, pretrained_model_name),
+ map_location=device)
+ state_dict_tea = {
+ k.replace('module.', ''): v
+ for k, v in state_dict['teacher'].items()
+ }
+ self.embedding_model.load_state_dict(state_dict_tea, strict=True)
diff --git a/modelscope/models/audio/sv/speaker_change_locator.py b/modelscope/models/audio/sv/speaker_change_locator.py
new file mode 100644
index 00000000..c22e4c1b
--- /dev/null
+++ b/modelscope/models/audio/sv/speaker_change_locator.py
@@ -0,0 +1,319 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+from collections import OrderedDict
+from typing import Any, Dict, Union
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchaudio.compliance.kaldi as Kaldi
+
+from modelscope.metainfo import Models
+from modelscope.models import MODELS, TorchModel
+from modelscope.models.audio.sv.DTDNN import CAMPPlus
+from modelscope.utils.constant import Tasks
+
+
+class MultiHeadSelfAttention(nn.Module):
+
+ def __init__(self, n_units, h=8, dropout=0.1):
+ super(MultiHeadSelfAttention, self).__init__()
+ self.linearQ = nn.Linear(n_units, n_units)
+ self.linearK = nn.Linear(n_units, n_units)
+ self.linearV = nn.Linear(n_units, n_units)
+ self.linearO = nn.Linear(n_units, n_units)
+ self.d_k = n_units // h
+ self.h = h
+ self.dropout = nn.Dropout(p=dropout)
+ self.att = None
+
+ def forward(self, x, batch_size):
+ # x: (BT, F)
+ q = self.linearQ(x).reshape(batch_size, -1, self.h, self.d_k)
+ k = self.linearK(x).reshape(batch_size, -1, self.h, self.d_k)
+ v = self.linearV(x).reshape(batch_size, -1, self.h, self.d_k)
+ scores = torch.matmul(q.transpose(1, 2), k.permute(
+ 0, 2, 3, 1)) / np.sqrt(self.d_k)
+ # scores: (B, h, T, T)
+ self.att = F.softmax(scores, dim=3)
+ p_att = self.dropout(self.att)
+ # v : (B, T, h, d_k)
+ # p_att : (B, h, T, T)
+ x = torch.matmul(p_att, v.transpose(1, 2))
+ # x : (B, h, T, d_k)
+ x = x.transpose(1, 2).reshape(-1, self.h * self.d_k)
+ return self.linearO(x)
+
+
+class PositionwiseFeedForward(nn.Module):
+
+ def __init__(self, n_units, d_units, dropout):
+ super(PositionwiseFeedForward, self).__init__()
+ self.linear1 = nn.Linear(n_units, d_units)
+ self.linear2 = nn.Linear(d_units, n_units)
+ self.dropout = nn.Dropout(p=dropout)
+
+ def forward(self, x):
+ return self.linear2(self.dropout(F.relu(self.linear1(x))))
+
+
+class PosEncoding(nn.Module):
+
+ def __init__(self, max_seq_len, d_word_vec):
+ super(PosEncoding, self).__init__()
+ pos_enc = np.array([[
+ pos / np.power(10000, 2.0 * (j // 2) / d_word_vec)
+ for j in range(d_word_vec)
+ ] for pos in range(max_seq_len)])
+ pos_enc[:, 0::2] = np.sin(pos_enc[:, 0::2])
+ pos_enc[:, 1::2] = np.cos(pos_enc[:, 1::2])
+ pad_row = np.zeros([1, d_word_vec])
+ pos_enc = np.concatenate([pad_row, pos_enc]).astype(np.float32)
+
+ self.pos_enc = torch.nn.Embedding(max_seq_len + 1, d_word_vec)
+ self.pos_enc.weight = torch.nn.Parameter(
+ torch.from_numpy(pos_enc), requires_grad=False)
+
+ def forward(self, input_len):
+ max_len = torch.max(input_len)
+ input_pos = torch.LongTensor([
+ list(range(1, len + 1)) + [0] * (max_len - len)
+ for len in input_len
+ ])
+
+ return self.pos_enc(input_pos)
+
+
+class TransformerEncoder(nn.Module):
+
+ def __init__(self,
+ idim,
+ n_units=256,
+ n_layers=2,
+ e_units=512,
+ h=4,
+ dropout=0.1):
+ super(TransformerEncoder, self).__init__()
+ self.linear_in = nn.Linear(idim, n_units)
+ self.lnorm_in = nn.LayerNorm(n_units)
+
+ self.n_layers = n_layers
+ self.dropout = nn.Dropout(p=dropout)
+ for i in range(n_layers):
+ setattr(self, '{}{:d}'.format('lnorm1_', i), nn.LayerNorm(n_units))
+ setattr(self, '{}{:d}'.format('self_att_', i),
+ MultiHeadSelfAttention(n_units, h))
+ setattr(self, '{}{:d}'.format('lnorm2_', i), nn.LayerNorm(n_units))
+ setattr(self, '{}{:d}'.format('ff_', i),
+ PositionwiseFeedForward(n_units, e_units, dropout))
+ self.lnorm_out = nn.LayerNorm(n_units)
+
+ def forward(self, x):
+ # x: [B, num_anchors, T, n_in]
+ bs, num, tframe, dim = x.size()
+ x = x.reshape(bs * num, tframe, -1) # [B*num_anchors, T, dim]
+ # x: (B, T, F) ... batch, time, (mel)freq
+ B_size, T_size, _ = x.shape
+ # e: (BT, F)
+ e = self.linear_in(x.reshape(B_size * T_size, -1))
+ # Encoder stack
+ for i in range(self.n_layers):
+ # layer normalization
+ e = getattr(self, '{}{:d}'.format('lnorm1_', i))(e)
+ # self-attention
+ s = getattr(self, '{}{:d}'.format('self_att_', i))(e, x.shape[0])
+ # residual
+ e = e + self.dropout(s)
+ # layer normalization
+ e = getattr(self, '{}{:d}'.format('lnorm2_', i))(e)
+ # positionwise feed-forward
+ s = getattr(self, '{}{:d}'.format('ff_', i))(e)
+ # residual
+ e = e + self.dropout(s)
+ # final layer normalization
+ # output: (BT, F)
+ # output: (B, F, T)
+ output = self.lnorm_out(e).reshape(B_size, T_size, -1)
+ output = output.reshape(bs, num, tframe,
+ -1) # [B, num_anchors, T, dim]
+ return output
+
+
+class TransformerEncoder_out(nn.Module):
+
+ def __init__(self,
+ idim,
+ n_units=256,
+ n_layers=2,
+ e_units=512,
+ h=4,
+ dropout=0.1):
+ super(TransformerEncoder_out, self).__init__()
+ self.linear_in = nn.Linear(idim, n_units)
+ self.lnorm_in = nn.LayerNorm(n_units)
+
+ self.n_layers = n_layers
+ self.dropout = nn.Dropout(p=dropout)
+ for i in range(n_layers):
+ setattr(self, '{}{:d}'.format('lnorm1_', i), nn.LayerNorm(n_units))
+ setattr(self, '{}{:d}'.format('self_att_', i),
+ MultiHeadSelfAttention(n_units, h))
+ setattr(self, '{}{:d}'.format('lnorm2_', i), nn.LayerNorm(n_units))
+ setattr(self, '{}{:d}'.format('ff_', i),
+ PositionwiseFeedForward(n_units, e_units, dropout))
+ self.lnorm_out = nn.LayerNorm(n_units)
+
+ def forward(self, x):
+ # x: (B, T, F)
+ B_size, T_size, _ = x.shape
+ # e: (BT, F)
+ e = self.linear_in(x.reshape(B_size * T_size, -1))
+ # Encoder stack
+ for i in range(self.n_layers):
+ # layer normalization
+ e = getattr(self, '{}{:d}'.format('lnorm1_', i))(e)
+ # self-attention
+ s = getattr(self, '{}{:d}'.format('self_att_', i))(e, x.shape[0])
+ # residual
+ e = e + self.dropout(s)
+ # layer normalization
+ e = getattr(self, '{}{:d}'.format('lnorm2_', i))(e)
+ # positionwise feed-forward
+ s = getattr(self, '{}{:d}'.format('ff_', i))(e)
+ # residual
+ e = e + self.dropout(s)
+ # final layer normalization
+ # output: (BT, F)
+ # output: (B, T, F)
+ output = self.lnorm_out(e).reshape(B_size, T_size, -1)
+ return output
+
+
+class OutLayer(nn.Module):
+
+ def __init__(self, n_units=256, num_anchors=2):
+ super(OutLayer, self).__init__()
+ self.combine = TransformerEncoder_out(num_anchors * n_units, n_units)
+ self.out_linear = nn.Linear(n_units // num_anchors, 1)
+
+ def forward(self, input):
+ # input: [B, num_anchors, T, dim]
+ bs, num, tframe, dim = input.size()
+ output = input.permute(0, 2, 1,
+ 3).reshape(bs, tframe,
+ -1) # [Bs, t, num_anchors*dim]
+ output = self.combine(output) # [Bs, t, n_units]
+ output = output.reshape(
+ bs, tframe, num, -1) # [Bs, t, num_anchors, n_units//num_anchors]
+ output = self.out_linear(output).squeeze(-1) # [Bs, t, num_anchors]
+
+ return output
+
+
+class TransformerDetector(nn.Module):
+
+ def __init__(self,
+ frame_dim=512,
+ anchor_dim=192,
+ hidden_dim=256,
+ max_seq_len=1000):
+ super(TransformerDetector, self).__init__()
+ self.detection = TransformerEncoder(
+ idim=frame_dim + anchor_dim, n_units=hidden_dim)
+ self.output = OutLayer(n_units=hidden_dim)
+ self.pos_enc = PosEncoding(max_seq_len, hidden_dim)
+
+ def forward(self, feats, anchors):
+ # feats: [1, t, fdim]
+ num_frames = feats.shape[1]
+ num_anchors = anchors.shape[1]
+ bs = feats.shape[0]
+ feats = feats.unsqueeze(1).repeat(
+ 1, num_anchors, 1, 1) # shape: [Bs, num_anchors, t, fdim]
+ anchors = anchors.unsqueeze(2).repeat(
+ 1, 1, num_frames, 1) # shape: [Bs, num_anchors, t, xdim]
+ sd_in = torch.cat((feats, anchors),
+ dim=-1) # shape: [Bs, num_anchors, t, fdim+xdim]
+ sd_out = self.detection(sd_in) # shape: [Bs, num_anchors, t, sd_dim]
+
+ # pos
+ pos_emb = self.pos_enc(torch.tensor([num_frames] * (bs * num_anchors)))
+ pos_emb = pos_emb.reshape(bs, num_anchors, num_frames, -1)
+ sd_out += pos_emb
+
+ # output
+ output = self.output(sd_out) # shape: [Bs, t, num_anchors]
+
+ return output
+
+
+@MODELS.register_module(Tasks.speaker_diarization, module_name=Models.scl_sd)
+class SpeakerChangeLocatorTransformer(TorchModel):
+ r"""A speaekr change locator using the transformer architecture as the backbone.
+ Args:
+ model_dir: A model dir.
+ model_config: The model config.
+ """
+
+ def __init__(self, model_dir, model_config: Dict[str, Any], *args,
+ **kwargs):
+ super().__init__(model_dir, model_config, *args, **kwargs)
+ self.model_config = model_config
+
+ self.feature_dim = self.model_config['fbank_dim']
+ frame_size = self.model_config['frame_size']
+ anchor_size = self.model_config['anchor_size']
+
+ self.encoder = CAMPPlus(self.feature_dim, output_level='frame')
+ self.backend = TransformerDetector(
+ frame_dim=frame_size, anchor_dim=anchor_size)
+
+ pretrained_encoder = kwargs['pretrained_encoder']
+ pretrained_backend = kwargs['pretrained_backend']
+
+ self.__load_check_point(pretrained_encoder, pretrained_backend)
+
+ self.encoder.eval()
+ self.backend.eval()
+
+ def forward(self, audio, anchors):
+ assert len(audio.shape) == 2 and audio.shape[
+ 0] == 1, 'modelscope error: the shape of input audio to model needs to be [1, T]'
+ assert len(
+ anchors.shape
+ ) == 3 and anchors.shape[0] == 1 and anchors.shape[
+ 1] == 2, 'modelscope error: the shape of input anchors to model needs to be [1, 2, D]'
+ # audio shape: [1, T]
+ feature = self.__extract_feature(audio)
+ frame_state = self.encoder(feature)
+ output = self.backend(frame_state, anchors)
+ output = output.squeeze(0).detach().cpu().sigmoid()
+
+ time_scale_factor = int(np.ceil(feature.shape[1] / output.shape[0]))
+ output = output.unsqueeze(1).expand(-1, time_scale_factor,
+ -1).reshape(-1, output.shape[-1])
+ return output
+
+ def __extract_feature(self, audio):
+ feature = Kaldi.fbank(audio, num_mel_bins=self.feature_dim)
+ feature = feature - feature.mean(dim=0, keepdim=True)
+ feature = feature.unsqueeze(0)
+ return feature
+
+ def __load_check_point(self,
+ pretrained_encoder,
+ pretrained_backend,
+ device=None):
+ if not device:
+ device = torch.device('cpu')
+ self.encoder.load_state_dict(
+ torch.load(
+ os.path.join(self.model_dir, pretrained_encoder),
+ map_location=device))
+
+ self.backend.load_state_dict(
+ torch.load(
+ os.path.join(self.model_dir, pretrained_backend),
+ map_location=device))
diff --git a/modelscope/models/audio/tts/voice.py b/modelscope/models/audio/tts/voice.py
index 645a528f..ed9edf43 100644
--- a/modelscope/models/audio/tts/voice.py
+++ b/modelscope/models/audio/tts/voice.py
@@ -17,11 +17,9 @@ from kantts.train.trainer import GAN_Trainer, Sambert_Trainer, distributed_init
from kantts.utils.ling_unit.ling_unit import KanTtsLinguisticUnit
from torch.utils.data import DataLoader
-from modelscope import __version__
from modelscope.utils.audio.audio_utils import TtsCustomParams
from modelscope.utils.audio.tts_exceptions import (
TtsModelConfigurationException, TtsModelNotExistsException)
-from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.logger import get_logger
logger = get_logger()
@@ -394,6 +392,7 @@ class Voice:
logger.info(f'TRAINING steps: {train_max_steps}')
config['create_time'] = time.strftime('%Y-%m-%d %H:%M:%S',
time.localtime())
+ from modelscope import __version__
config['modelscope_version'] = __version__
with open(os.path.join(stage_dir, 'config.yaml'), 'w') as f:
@@ -558,6 +557,7 @@ class Voice:
logger.info(f'resume from: {resume_from}')
config['create_time'] = time.strftime('%Y-%m-%d %H:%M:%S',
time.localtime())
+ from modelscope import __version__
config['modelscope_version'] = __version__
with open(os.path.join(stage_dir, 'config.yaml'), 'w') as f:
diff --git a/modelscope/models/cv/__init__.py b/modelscope/models/cv/__init__.py
index 21487216..39acec69 100644
--- a/modelscope/models/cv/__init__.py
+++ b/modelscope/models/cv/__init__.py
@@ -4,9 +4,8 @@
from . import (action_recognition, animal_recognition, bad_image_detecting,
body_2d_keypoints, body_3d_keypoints, cartoon,
cmdssl_video_embedding, controllable_image_generation,
- crowd_counting, face_2d_keypoints, face_detection,
- face_generation, face_reconstruction, human_reconstruction,
- human_wholebody_keypoint, image_classification,
+ crowd_counting, face_detection, face_generation,
+ face_reconstruction, human_reconstruction, image_classification,
image_color_enhance, image_colorization, image_defrcn_fewshot,
image_denoise, image_inpainting, image_instance_segmentation,
image_matching, image_mvs_depth_estimation,
diff --git a/modelscope/models/cv/body_2d_keypoints/hrnet_v2.py b/modelscope/models/cv/body_2d_keypoints/hrnet_v2.py
index ebd69adb..19e426b2 100644
--- a/modelscope/models/cv/body_2d_keypoints/hrnet_v2.py
+++ b/modelscope/models/cv/body_2d_keypoints/hrnet_v2.py
@@ -72,7 +72,7 @@ class PoseHighResolutionNetV2(TorchModel):
self.stage4, pre_stage_channels = self._make_stage(
self.stage4_cfg, num_channels, multi_scale_output=True)
"""final four layers"""
- last_inp_channels = np.int(np.sum(pre_stage_channels))
+ last_inp_channels = int(np.sum(pre_stage_channels))
self.final_layer = nn.Sequential(
nn.Conv2d(
in_channels=last_inp_channels,
diff --git a/modelscope/models/cv/cartoon/facelib/face_landmark.py b/modelscope/models/cv/cartoon/facelib/face_landmark.py
index 3b7cc1b9..3c53f3a6 100644
--- a/modelscope/models/cv/cartoon/facelib/face_landmark.py
+++ b/modelscope/models/cv/cartoon/facelib/face_landmark.py
@@ -81,7 +81,7 @@ class FaceLandmark:
bbox[2] = center[0] + one_edge // 2
bbox[3] = center[1] + one_edge // 2
- bbox = bbox.astype(np.int)
+ bbox = bbox.astype(int)
crop_image = bimg[bbox[1]:bbox[3], bbox[0]:bbox[2], :]
h, w, _ = crop_image.shape
crop_image = cv2.resize(
diff --git a/modelscope/models/cv/crowd_counting/hrnet_aspp_relu.py b/modelscope/models/cv/crowd_counting/hrnet_aspp_relu.py
index 0d1bd3ca..64f40da0 100644
--- a/modelscope/models/cv/crowd_counting/hrnet_aspp_relu.py
+++ b/modelscope/models/cv/crowd_counting/hrnet_aspp_relu.py
@@ -356,7 +356,7 @@ class HighResolutionNet(nn.Module):
num_channels)
self.stage3, pre_stage_channels = self._make_stage(
self.stage3_cfg, num_channels)
- last_inp_channels = np.int(np.sum(pre_stage_channels)) + 256
+ last_inp_channels = int(np.sum(pre_stage_channels)) + 256
self.redc_layer = nn.Sequential(
nn.Conv2d(
in_channels=last_inp_channels,
diff --git a/modelscope/models/cv/easycv_base.py b/modelscope/models/cv/easycv_base.py
deleted file mode 100644
index 7bc35e84..00000000
--- a/modelscope/models/cv/easycv_base.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.models.base import BaseModel
-from easycv.utils.ms_utils import EasyCVMeta
-
-from modelscope.models.base import TorchModel
-
-
-class EasyCVBaseModel(BaseModel, TorchModel):
- """Base model for EasyCV."""
-
- def __init__(self, model_dir=None, args=(), kwargs={}):
- kwargs.pop(EasyCVMeta.ARCH, None) # pop useless keys
- BaseModel.__init__(self)
- TorchModel.__init__(self, model_dir=model_dir)
-
- def forward(self, img, mode='train', **kwargs):
- if self.training:
- losses = self.forward_train(img, **kwargs)
- loss, log_vars = self._parse_losses(losses)
- return dict(loss=loss, log_vars=log_vars)
- else:
- return self.forward_test(img, **kwargs)
-
- def __call__(self, *args, **kwargs):
- return self.forward(*args, **kwargs)
diff --git a/modelscope/models/cv/face_2d_keypoints/__init__.py b/modelscope/models/cv/face_2d_keypoints/__init__.py
deleted file mode 100644
index 636ba0f4..00000000
--- a/modelscope/models/cv/face_2d_keypoints/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
- from .face_2d_keypoints_align import Face2DKeypoints
-
-else:
- _import_structure = {'face_2d_keypoints_align': ['Face2DKeypoints']}
-
- import sys
-
- sys.modules[__name__] = LazyImportModule(
- __name__,
- globals()['__file__'],
- _import_structure,
- module_spec=__spec__,
- extra_objects={},
- )
diff --git a/modelscope/models/cv/face_2d_keypoints/face_2d_keypoints_align.py b/modelscope/models/cv/face_2d_keypoints/face_2d_keypoints_align.py
deleted file mode 100644
index 468662a0..00000000
--- a/modelscope/models/cv/face_2d_keypoints/face_2d_keypoints_align.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.models.face.face_keypoint import FaceKeypoint
-
-from modelscope.metainfo import Models
-from modelscope.models.builder import MODELS
-from modelscope.models.cv.easycv_base import EasyCVBaseModel
-from modelscope.utils.constant import Tasks
-
-
-@MODELS.register_module(
- group_key=Tasks.face_2d_keypoints, module_name=Models.face_2d_keypoints)
-class Face2DKeypoints(EasyCVBaseModel, FaceKeypoint):
-
- def __init__(self, model_dir=None, *args, **kwargs):
- EasyCVBaseModel.__init__(self, model_dir, args, kwargs)
- FaceKeypoint.__init__(self, *args, **kwargs)
diff --git a/modelscope/models/cv/face_detection/peppa_pig_face/face_landmark.py b/modelscope/models/cv/face_detection/peppa_pig_face/face_landmark.py
index 03a3b5b7..e7e2ddaf 100644
--- a/modelscope/models/cv/face_detection/peppa_pig_face/face_landmark.py
+++ b/modelscope/models/cv/face_detection/peppa_pig_face/face_landmark.py
@@ -82,7 +82,7 @@ class FaceLandmark:
bbox[2] = center[0] + one_edge // 2
bbox[3] = center[1] + one_edge // 2
- bbox = bbox.astype(np.int)
+ bbox = bbox.astype(int)
crop_image = bimg[bbox[1]:bbox[3], bbox[0]:bbox[2], :]
h, w, _ = crop_image.shape
crop_image = cv2.resize(crop_image,
diff --git a/modelscope/models/cv/hand_2d_keypoints/__init__.py b/modelscope/models/cv/hand_2d_keypoints/__init__.py
deleted file mode 100644
index 2b06f19a..00000000
--- a/modelscope/models/cv/hand_2d_keypoints/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
- from .hand_2d_keypoints import Hand2dKeyPoints
-
-else:
- _import_structure = {'hand_2d_keypoints': ['Hand2dKeyPoints']}
-
- import sys
-
- sys.modules[__name__] = LazyImportModule(
- __name__,
- globals()['__file__'],
- _import_structure,
- module_spec=__spec__,
- extra_objects={},
- )
diff --git a/modelscope/models/cv/hand_2d_keypoints/hand_2d_keypoints.py b/modelscope/models/cv/hand_2d_keypoints/hand_2d_keypoints.py
deleted file mode 100644
index 15a97c30..00000000
--- a/modelscope/models/cv/hand_2d_keypoints/hand_2d_keypoints.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.models.pose import TopDown
-
-from modelscope.metainfo import Models
-from modelscope.models.builder import MODELS
-from modelscope.models.cv.easycv_base import EasyCVBaseModel
-from modelscope.utils.constant import Tasks
-
-
-@MODELS.register_module(
- group_key=Tasks.hand_2d_keypoints, module_name=Models.hand_2d_keypoints)
-class Hand2dKeyPoints(EasyCVBaseModel, TopDown):
-
- def __init__(self, model_dir=None, *args, **kwargs):
- EasyCVBaseModel.__init__(self, model_dir, args, kwargs)
- TopDown.__init__(self, *args, **kwargs)
diff --git a/modelscope/models/cv/human_reconstruction/models/human_segmenter.py b/modelscope/models/cv/human_reconstruction/models/human_segmenter.py
index 3f0261e7..29bf6f70 100644
--- a/modelscope/models/cv/human_reconstruction/models/human_segmenter.py
+++ b/modelscope/models/cv/human_reconstruction/models/human_segmenter.py
@@ -31,7 +31,7 @@ class human_segmenter(object):
img = np.dstack((img, img, img))
elif img.shape[2] == 4:
img = img[:, :, :3]
- img = img.astype(np.float)
+ img = img.astype(float)
return img
def run(self, img):
diff --git a/modelscope/models/cv/human_reconstruction/utils.py b/modelscope/models/cv/human_reconstruction/utils.py
index 45653dc6..67e1efdb 100644
--- a/modelscope/models/cv/human_reconstruction/utils.py
+++ b/modelscope/models/cv/human_reconstruction/utils.py
@@ -69,8 +69,8 @@ def eval_grid(coords,
num_samples=512 * 512 * 512):
resolution = coords.shape[1:4]
sdf = np.zeros(resolution)
- dirty = np.ones(resolution, dtype=np.bool)
- grid_mask = np.zeros(resolution, dtype=np.bool)
+ dirty = np.ones(resolution, dtype=bool)
+ grid_mask = np.zeros(resolution, dtype=bool)
reso = resolution[0] // init_resolution
while reso > 0:
diff --git a/modelscope/models/cv/human_wholebody_keypoint/human_wholebody_keypoint.py b/modelscope/models/cv/human_wholebody_keypoint/human_wholebody_keypoint.py
deleted file mode 100644
index dd3c0290..00000000
--- a/modelscope/models/cv/human_wholebody_keypoint/human_wholebody_keypoint.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.models.pose.top_down import TopDown
-
-from modelscope.metainfo import Models
-from modelscope.models.builder import MODELS
-from modelscope.models.cv.easycv_base import EasyCVBaseModel
-from modelscope.utils.constant import Tasks
-
-
-@MODELS.register_module(
- group_key=Tasks.human_wholebody_keypoint,
- module_name=Models.human_wholebody_keypoint)
-class HumanWholeBodyKeypoint(EasyCVBaseModel, TopDown):
-
- def __init__(self, model_dir=None, *args, **kwargs):
- EasyCVBaseModel.__init__(self, model_dir, args, kwargs)
- TopDown.__init__(self, *args, **kwargs)
diff --git a/modelscope/models/cv/image_defrcn_fewshot/utils/voc_register.py b/modelscope/models/cv/image_defrcn_fewshot/utils/voc_register.py
index 7a94066e..0b043493 100644
--- a/modelscope/models/cv/image_defrcn_fewshot/utils/voc_register.py
+++ b/modelscope/models/cv/image_defrcn_fewshot/utils/voc_register.py
@@ -163,7 +163,7 @@ def load_filtered_voc_instances(name: str, root: str, dirname: str, split: str,
os.path.join(split_dir,
'box_{}shot_{}_train.txt'.format(shot,
cls))) as f:
- fileids_ = np.loadtxt(f, dtype=np.str).tolist()
+ fileids_ = np.loadtxt(f, dtype=np.str_).tolist()
if isinstance(fileids_, str):
fileids_ = [fileids_]
fileids_ = [
@@ -219,7 +219,7 @@ def load_filtered_voc_instances(name: str, root: str, dirname: str, split: str,
with PathManager.open(
os.path.join(root, dirname, 'ImageSets', 'Main',
split + '.txt')) as f:
- fileids = np.loadtxt(f, dtype=np.str)
+ fileids = np.loadtxt(f, dtype=np.str_)
for fileid in fileids:
anno_file = os.path.join(root, dirname, 'Annotations',
diff --git a/modelscope/models/cv/image_instance_segmentation/__init__.py b/modelscope/models/cv/image_instance_segmentation/__init__.py
index 60e688eb..8041a7e7 100644
--- a/modelscope/models/cv/image_instance_segmentation/__init__.py
+++ b/modelscope/models/cv/image_instance_segmentation/__init__.py
@@ -8,10 +8,12 @@ if TYPE_CHECKING:
from .maskdino_swin import MaskDINOSwin
from .model import CascadeMaskRCNNSwinModel
from .maskdino_model import MaskDINOSwinModel
+ from .fastinst_model import FastInst
from .postprocess_utils import get_img_ins_seg_result, get_maskdino_ins_seg_result
else:
_import_structure = {
'cascade_mask_rcnn_swin': ['CascadeMaskRCNNSwin'],
+ 'fastinst_model': ['FastInst'],
'maskdino_swin': ['MaskDINOSwin'],
'model': ['CascadeMaskRCNNSwinModel'],
'maskdino_model': ['MaskDINOSwinModel'],
diff --git a/modelscope/models/cv/image_instance_segmentation/backbones/__init__.py b/modelscope/models/cv/image_instance_segmentation/backbones/__init__.py
index bbeac51e..1e7325f3 100644
--- a/modelscope/models/cv/image_instance_segmentation/backbones/__init__.py
+++ b/modelscope/models/cv/image_instance_segmentation/backbones/__init__.py
@@ -6,10 +6,12 @@ from modelscope.utils.import_utils import LazyImportModule
if TYPE_CHECKING:
from .swin_transformer import SwinTransformer
from .swin_transformer import D2SwinTransformer
+ from .resnet import build_resnet_backbone
else:
_import_structure = {
'swin_transformer': ['SwinTransformer', 'D2SwinTransformer'],
+ 'resnet': ['build_resnet_backbone']
}
import sys
diff --git a/modelscope/models/cv/image_instance_segmentation/backbones/resnet.py b/modelscope/models/cv/image_instance_segmentation/backbones/resnet.py
new file mode 100644
index 00000000..4e2a5ec1
--- /dev/null
+++ b/modelscope/models/cv/image_instance_segmentation/backbones/resnet.py
@@ -0,0 +1,114 @@
+# Part of the implementation is borrowed and modified from Detectron2, publicly available at
+# https://github.com/facebookresearch/detectron2/blob/main/projects/DeepLab/deeplab/resnet.py
+
+import torch.nn.functional as F
+from torch import nn
+
+from modelscope.models.cv.image_human_parsing.backbone.deeplab_resnet import (
+ BottleneckBlock, DeeplabResNet, get_norm)
+from modelscope.models.cv.image_instance_segmentation.maskdino.utils import \
+ Conv2d
+
+
+class BasicStem(nn.Module):
+ """
+ The standard ResNet stem (layers before the first residual block),
+ with a conv, relu and max_pool.
+ """
+
+ def __init__(self, in_channels=3, out_channels=64, norm='BN'):
+ """
+ Args:
+ norm (str or callable): norm after the first conv layer.
+ See :func:`layers.get_norm` for supported format.
+ """
+ super().__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.stride = 4
+ self.conv1 = Conv2d(
+ in_channels,
+ out_channels,
+ kernel_size=7,
+ stride=2,
+ padding=3,
+ bias=False,
+ norm=get_norm(norm, out_channels),
+ )
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = F.relu_(x)
+ x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)
+ return x
+
+
+def build_resnet_backbone(out_features, depth, num_groups, width_per_group,
+ norm, stem_out_channels, res2_out_channels,
+ stride_in_1x1, res4_dilation, res5_dilation,
+ res5_multi_grid, input_shape):
+ stem = BasicStem(
+ in_channels=input_shape['channels'],
+ out_channels=stem_out_channels,
+ norm=norm)
+ bottleneck_channels = num_groups * width_per_group
+ in_channels = stem_out_channels
+ out_channels = res2_out_channels
+
+ assert res4_dilation in {
+ 1, 2
+ }, 'res4_dilation cannot be {}.'.format(res4_dilation)
+ assert res5_dilation in {
+ 1, 2, 4
+ }, 'res5_dilation cannot be {}.'.format(res5_dilation)
+ if res4_dilation == 2:
+ # Always dilate res5 if res4 is dilated.
+ assert res5_dilation == 4
+
+ num_blocks_per_stage = {
+ 50: [3, 4, 6, 3],
+ 101: [3, 4, 23, 3],
+ 152: [3, 8, 36, 3]
+ }[depth]
+
+ stages = []
+ out_stage_idx = [{
+ 'res2': 2,
+ 'res3': 3,
+ 'res4': 4,
+ 'res5': 5
+ }[f] for f in out_features]
+ max_stage_idx = max(out_stage_idx)
+ for idx, stage_idx in enumerate(range(2, max_stage_idx + 1)):
+ if stage_idx == 4:
+ dilation = res4_dilation
+ elif stage_idx == 5:
+ dilation = res5_dilation
+ else:
+ dilation = 1
+ first_stride = 1 if idx == 0 or dilation > 1 else 2
+ stride_per_block = [first_stride]
+ stride_per_block += [1] * (num_blocks_per_stage[idx] - 1)
+ stage_kargs = {
+ 'num_blocks': num_blocks_per_stage[idx],
+ 'stride_per_block': stride_per_block,
+ 'in_channels': in_channels,
+ 'out_channels': out_channels,
+ 'norm': norm,
+ 'bottleneck_channels': bottleneck_channels,
+ 'stride_in_1x1': stride_in_1x1,
+ 'dilation': dilation,
+ 'num_groups': num_groups,
+ 'block_class': BottleneckBlock
+ }
+ if stage_idx == 5:
+ stage_kargs.pop('dilation')
+ stage_kargs['dilation_per_block'] = [
+ dilation * mg for mg in res5_multi_grid
+ ]
+ blocks = DeeplabResNet.make_stage(**stage_kargs)
+ in_channels = out_channels
+ out_channels *= 2
+ bottleneck_channels *= 2
+ stages.append(blocks)
+ return DeeplabResNet(stem, stages, out_features=out_features)
diff --git a/modelscope/models/cv/image_instance_segmentation/fastinst/__init__.py b/modelscope/models/cv/image_instance_segmentation/fastinst/__init__.py
new file mode 100644
index 00000000..b937315b
--- /dev/null
+++ b/modelscope/models/cv/image_instance_segmentation/fastinst/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
diff --git a/modelscope/models/cv/image_instance_segmentation/fastinst/fastinst_decoder.py b/modelscope/models/cv/image_instance_segmentation/fastinst/fastinst_decoder.py
new file mode 100644
index 00000000..aa4300f6
--- /dev/null
+++ b/modelscope/models/cv/image_instance_segmentation/fastinst/fastinst_decoder.py
@@ -0,0 +1,351 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import math
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from modelscope.models.cv.image_colorization.ddcolor.utils.transformer_utils import (
+ MLP, CrossAttentionLayer, FFNLayer, SelfAttentionLayer)
+
+
+class QueryProposal(nn.Module):
+
+ def __init__(self, num_features, num_queries, num_classes):
+ super().__init__()
+ self.topk = num_queries
+ self.num_classes = num_classes
+
+ self.conv_proposal_cls_logits = nn.Sequential(
+ nn.Conv2d(
+ num_features, num_features, kernel_size=3, stride=1,
+ padding=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(
+ num_features,
+ num_classes + 1,
+ kernel_size=1,
+ stride=1,
+ padding=0),
+ )
+
+ @torch.no_grad()
+ def compute_coordinates(self, x):
+ h, w = x.size(2), x.size(3)
+ y_loc = torch.linspace(0, 1, h, device=x.device)
+ x_loc = torch.linspace(0, 1, w, device=x.device)
+ y_loc, x_loc = torch.meshgrid(y_loc, x_loc)
+ locations = torch.stack([x_loc, y_loc], 0).unsqueeze(0)
+ return locations
+
+ def seek_local_maximum(self, x, epsilon=1e-6):
+ """
+ inputs:
+ x: torch.tensor, shape [b, c, h, w]
+ return:
+ torch.tensor, shape [b, c, h, w]
+ """
+ x_pad = F.pad(x, (1, 1, 1, 1), 'constant', 0)
+ # top, bottom, left, right, top-left, top-right, bottom-left, bottom-right
+ maximum = (x >= x_pad[:, :, :-2, 1:-1]) & \
+ (x >= x_pad[:, :, 2:, 1:-1]) & \
+ (x >= x_pad[:, :, 1:-1, :-2]) & \
+ (x >= x_pad[:, :, 1:-1, 2:]) & \
+ (x >= x_pad[:, :, :-2, :-2]) & \
+ (x >= x_pad[:, :, :-2, 2:]) & \
+ (x >= x_pad[:, :, 2:, :-2]) & \
+ (x >= x_pad[:, :, 2:, 2:]) & \
+ (x >= epsilon)
+ return maximum.to(x)
+
+ def forward(self, x, pos_embeddings):
+
+ proposal_cls_logits = self.conv_proposal_cls_logits(x) # b, c, h, w
+ proposal_cls_probs = proposal_cls_logits.softmax(dim=1) # b, c, h, w
+ proposal_cls_one_hot = F.one_hot(
+ proposal_cls_probs[:, :-1, :, :].max(1)[1],
+ num_classes=self.num_classes + 1).permute(0, 3, 1, 2) # b, c, h, w
+ proposal_cls_probs = proposal_cls_probs.mul(proposal_cls_one_hot)
+ proposal_local_maximum_map = self.seek_local_maximum(
+ proposal_cls_probs) # b, c, h, w
+ proposal_cls_probs = proposal_cls_probs + proposal_local_maximum_map # b, c, h, w
+
+ # top-k indices
+ topk_indices = torch.topk(
+ proposal_cls_probs[:, :-1, :, :].flatten(2).max(1)[0],
+ self.topk,
+ dim=1)[1] # b, q
+ topk_indices = topk_indices.unsqueeze(1) # b, 1, q
+
+ # topk queries
+ topk_proposals = torch.gather(
+ x.flatten(2), dim=2, index=topk_indices.repeat(1, x.shape[1],
+ 1)) # b, c, q
+ pos_embeddings = pos_embeddings.repeat(x.shape[0], 1, 1, 1).flatten(2)
+ topk_pos_embeddings = torch.gather(
+ pos_embeddings,
+ dim=2,
+ index=topk_indices.repeat(1, pos_embeddings.shape[1],
+ 1)) # b, c, q
+ if self.training:
+ locations = self.compute_coordinates(x).repeat(x.shape[0], 1, 1, 1)
+ topk_locations = torch.gather(
+ locations.flatten(2),
+ dim=2,
+ index=topk_indices.repeat(1, locations.shape[1], 1))
+ topk_locations = topk_locations.transpose(-1, -2) # b, q, 2
+ else:
+ topk_locations = None
+ return topk_proposals, topk_pos_embeddings, topk_locations, proposal_cls_logits
+
+
+class FastInstDecoder(nn.Module):
+
+ def __init__(self, in_channels, *, num_classes: int, hidden_dim: int,
+ num_queries: int, num_aux_queries: int, nheads: int,
+ dim_feedforward: int, dec_layers: int, pre_norm: bool,
+ mask_dim: int):
+ """
+ Args:
+ in_channels: channels of the input features
+ num_classes: number of classes
+ hidden_dim: Transformer feature dimension
+ num_queries: number of queries
+ num_aux_queries: number of auxiliary queries
+ nheads: number of heads
+ dim_feedforward: feature dimension in feedforward network
+ dec_layers: number of Transformer decoder layers
+ pre_norm: whether to use pre-LayerNorm or not
+ mask_dim: mask feature dimension
+ """
+ super().__init__()
+ self.num_heads = nheads
+ self.num_layers = dec_layers
+ self.num_queries = num_queries
+ self.num_aux_queries = num_aux_queries
+ self.num_classes = num_classes
+
+ meta_pos_size = int(round(math.sqrt(self.num_queries)))
+ self.meta_pos_embed = nn.Parameter(
+ torch.empty(1, hidden_dim, meta_pos_size, meta_pos_size))
+ if num_aux_queries > 0:
+ self.empty_query_features = nn.Embedding(num_aux_queries,
+ hidden_dim)
+ self.empty_query_pos_embed = nn.Embedding(num_aux_queries,
+ hidden_dim)
+
+ self.query_proposal = QueryProposal(hidden_dim, num_queries,
+ num_classes)
+
+ self.transformer_query_cross_attention_layers = nn.ModuleList()
+ self.transformer_query_self_attention_layers = nn.ModuleList()
+ self.transformer_query_ffn_layers = nn.ModuleList()
+ self.transformer_mask_cross_attention_layers = nn.ModuleList()
+ self.transformer_mask_ffn_layers = nn.ModuleList()
+ for idx in range(self.num_layers):
+ self.transformer_query_cross_attention_layers.append(
+ CrossAttentionLayer(
+ d_model=hidden_dim,
+ nhead=nheads,
+ dropout=0.0,
+ normalize_before=pre_norm))
+ self.transformer_query_self_attention_layers.append(
+ SelfAttentionLayer(
+ d_model=hidden_dim,
+ nhead=nheads,
+ dropout=0.0,
+ normalize_before=pre_norm))
+ self.transformer_query_ffn_layers.append(
+ FFNLayer(
+ d_model=hidden_dim,
+ dim_feedforward=dim_feedforward,
+ dropout=0.0,
+ normalize_before=pre_norm))
+ self.transformer_mask_cross_attention_layers.append(
+ CrossAttentionLayer(
+ d_model=hidden_dim,
+ nhead=nheads,
+ dropout=0.0,
+ normalize_before=pre_norm))
+ self.transformer_mask_ffn_layers.append(
+ FFNLayer(
+ d_model=hidden_dim,
+ dim_feedforward=dim_feedforward,
+ dropout=0.0,
+ normalize_before=pre_norm))
+
+ self.decoder_query_norm_layers = nn.ModuleList()
+ self.class_embed_layers = nn.ModuleList()
+ self.mask_embed_layers = nn.ModuleList()
+ self.mask_features_layers = nn.ModuleList()
+ for idx in range(self.num_layers + 1):
+ self.decoder_query_norm_layers.append(nn.LayerNorm(hidden_dim))
+ self.class_embed_layers.append(
+ MLP(hidden_dim, hidden_dim, num_classes + 1, 3))
+ self.mask_embed_layers.append(
+ MLP(hidden_dim, hidden_dim, mask_dim, 3))
+ self.mask_features_layers.append(nn.Linear(hidden_dim, mask_dim))
+
+ def forward(self, x, mask_features, targets=None):
+ bs = x[0].shape[0]
+ proposal_size = x[1].shape[-2:]
+ pixel_feature_size = x[2].shape[-2:]
+
+ pixel_pos_embeds = F.interpolate(
+ self.meta_pos_embed,
+ size=pixel_feature_size,
+ mode='bilinear',
+ align_corners=False)
+ proposal_pos_embeds = F.interpolate(
+ self.meta_pos_embed,
+ size=proposal_size,
+ mode='bilinear',
+ align_corners=False)
+
+ pixel_features = x[2].flatten(2).permute(2, 0, 1)
+ pixel_pos_embeds = pixel_pos_embeds.flatten(2).permute(2, 0, 1)
+
+ query_features, query_pos_embeds, query_locations, proposal_cls_logits = self.query_proposal(
+ x[1], proposal_pos_embeds)
+ query_features = query_features.permute(2, 0, 1)
+ query_pos_embeds = query_pos_embeds.permute(2, 0, 1)
+ if self.num_aux_queries > 0:
+ aux_query_features = self.empty_query_features.weight.unsqueeze(
+ 1).repeat(1, bs, 1)
+ aux_query_pos_embed = self.empty_query_pos_embed.weight.unsqueeze(
+ 1).repeat(1, bs, 1)
+ query_features = torch.cat([query_features, aux_query_features],
+ dim=0)
+ query_pos_embeds = torch.cat(
+ [query_pos_embeds, aux_query_pos_embed], dim=0)
+
+ outputs_class, outputs_mask, attn_mask, _, _ = self.forward_prediction_heads(
+ query_features,
+ pixel_features,
+ pixel_feature_size,
+ -1,
+ return_attn_mask=True)
+ predictions_class = [outputs_class]
+ predictions_mask = [outputs_mask]
+ predictions_matching_index = [None]
+ query_feature_memory = [query_features]
+ pixel_feature_memory = [pixel_features]
+
+ for i in range(self.num_layers):
+ query_features, pixel_features = self.forward_one_layer(
+ query_features, pixel_features, query_pos_embeds,
+ pixel_pos_embeds, attn_mask, i)
+ if i < self.num_layers - 1:
+ outputs_class, outputs_mask, attn_mask, _, _ = self.forward_prediction_heads(
+ query_features,
+ pixel_features,
+ pixel_feature_size,
+ i,
+ return_attn_mask=True,
+ )
+ else:
+ outputs_class, outputs_mask, _, matching_indices, gt_attn_mask = self.forward_prediction_heads(
+ query_features,
+ pixel_features,
+ pixel_feature_size,
+ i,
+ )
+ predictions_class.append(outputs_class)
+ predictions_mask.append(outputs_mask)
+ predictions_matching_index.append(None)
+ query_feature_memory.append(query_features)
+ pixel_feature_memory.append(pixel_features)
+
+ out = {
+ 'proposal_cls_logits':
+ proposal_cls_logits,
+ 'query_locations':
+ query_locations,
+ 'pred_logits':
+ predictions_class[-1],
+ 'pred_masks':
+ predictions_mask[-1],
+ 'pred_indices':
+ predictions_matching_index[-1],
+ 'aux_outputs':
+ self._set_aux_loss(predictions_class, predictions_mask,
+ predictions_matching_index, query_locations)
+ }
+ return out
+
+ def forward_one_layer(self, query_features, pixel_features,
+ query_pos_embeds, pixel_pos_embeds, attn_mask, i):
+ pixel_features = self.transformer_mask_cross_attention_layers[i](
+ pixel_features,
+ query_features,
+ query_pos=pixel_pos_embeds,
+ pos=query_pos_embeds)
+ pixel_features = self.transformer_mask_ffn_layers[i](pixel_features)
+
+ query_features = self.transformer_query_cross_attention_layers[i](
+ query_features,
+ pixel_features,
+ memory_mask=attn_mask,
+ query_pos=query_pos_embeds,
+ pos=pixel_pos_embeds)
+ query_features = self.transformer_query_self_attention_layers[i](
+ query_features, query_pos=query_pos_embeds)
+ query_features = self.transformer_query_ffn_layers[i](query_features)
+ return query_features, pixel_features
+
+ def forward_prediction_heads(self,
+ query_features,
+ pixel_features,
+ pixel_feature_size,
+ idx_layer,
+ return_attn_mask=False,
+ return_gt_attn_mask=False,
+ targets=None,
+ query_locations=None):
+ decoder_query_features = self.decoder_query_norm_layers[idx_layer + 1](
+ query_features[:self.num_queries])
+ decoder_query_features = decoder_query_features.transpose(0, 1)
+ if idx_layer + 1 == self.num_layers:
+ outputs_class = self.class_embed_layers[idx_layer + 1](
+ decoder_query_features)
+ else:
+ outputs_class = None
+ outputs_mask_embed = self.mask_embed_layers[idx_layer + 1](
+ decoder_query_features)
+ outputs_mask_features = self.mask_features_layers[idx_layer + 1](
+ pixel_features.transpose(0, 1))
+
+ outputs_mask = torch.einsum('bqc,blc->bql', outputs_mask_embed,
+ outputs_mask_features)
+ outputs_mask = outputs_mask.reshape(-1, self.num_queries,
+ *pixel_feature_size)
+
+ if return_attn_mask:
+ # outputs_mask.shape: b, q, h, w
+ attn_mask = F.pad(outputs_mask,
+ (0, 0, 0, 0, 0, self.num_aux_queries),
+ 'constant', 1)
+ attn_mask = (attn_mask < 0.).flatten(2) # b, q, hw
+ invalid_query = attn_mask.all(-1, keepdim=True) # b, q, 1
+ attn_mask = (~invalid_query) & attn_mask # b, q, hw
+ attn_mask = attn_mask.unsqueeze(1).repeat(1, self.num_heads, 1,
+ 1).flatten(0, 1)
+ attn_mask = attn_mask.detach()
+ else:
+ attn_mask = None
+
+ matching_indices = None
+ gt_attn_mask = None
+
+ return outputs_class, outputs_mask, attn_mask, matching_indices, gt_attn_mask
+
+ @torch.jit.unused
+ def _set_aux_loss(self, outputs_class, outputs_seg_masks, output_indices,
+ output_query_locations):
+ return [{
+ 'query_locations': output_query_locations,
+ 'pred_logits': a,
+ 'pred_masks': b,
+ 'pred_matching_indices': c
+ } for a, b, c in zip(outputs_class[:-1], outputs_seg_masks[:-1],
+ output_indices[:-1])]
diff --git a/modelscope/models/cv/image_instance_segmentation/fastinst/fastinst_encoder.py b/modelscope/models/cv/image_instance_segmentation/fastinst/fastinst_encoder.py
new file mode 100644
index 00000000..46b3f74d
--- /dev/null
+++ b/modelscope/models/cv/image_instance_segmentation/fastinst/fastinst_encoder.py
@@ -0,0 +1,180 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import logging
+from typing import Callable, Optional, Union
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from modelscope.models.cv.image_instance_segmentation.maskdino.utils import \
+ Conv2d
+
+
+# This is a modified FPN decoder.
+class BaseFPN(nn.Module):
+
+ def __init__(
+ self,
+ input_shape,
+ *,
+ convs_dim: int,
+ mask_dim: int,
+ norm: Optional[Union[str, Callable]] = None,
+ ):
+ """
+ Args:
+ input_shape: shapes (channels and stride) of the input features
+ convs_dim: number of output channels for the intermediate conv layers.
+ mask_dim: number of output channels for the final conv layer.
+ norm (str or callable): normalization for all conv layers
+ """
+ super().__init__()
+
+ input_shape = sorted(input_shape.items(), key=lambda x: x[1]['stride'])
+ self.in_features = [k for k, v in input_shape
+ ] # starting from "res3" to "res5"
+ feature_channels = [v['channels'] for k, v in input_shape]
+
+ lateral_convs = []
+ output_convs = []
+
+ use_bias = norm == ''
+ for idx, in_channels in enumerate(feature_channels):
+ lateral_norm = nn.GroupNorm(32, convs_dim)
+ output_norm = nn.GroupNorm(32, convs_dim)
+
+ lateral_conv = Conv2d(
+ in_channels,
+ convs_dim,
+ kernel_size=1,
+ bias=use_bias,
+ norm=lateral_norm)
+ output_conv = Conv2d(
+ convs_dim,
+ convs_dim,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias=use_bias,
+ norm=output_norm,
+ activation=F.relu,
+ )
+ self.add_module('adapter_{}'.format(idx + 1), lateral_conv)
+ self.add_module('layer_{}'.format(idx + 1), output_conv)
+
+ lateral_convs.append(lateral_conv)
+ output_convs.append(output_conv)
+ # Place convs into top-down order (from low to high resolution)
+ # to make the top-down computation in forward clearer.
+ self.lateral_convs = lateral_convs[::-1]
+ self.output_convs = output_convs[::-1]
+
+ self.convs_dim = convs_dim
+ self.num_feature_levels = 3 # always use 3 scales
+
+ def forward_features(self, features):
+ multi_scale_features = []
+ num_cur_levels = 0
+ # Reverse feature maps into top-down order (from low to high resolution)
+ for idx, f in enumerate(self.in_features[::-1]):
+ x = features[f]
+ lateral_conv = self.lateral_convs[idx]
+ output_conv = self.output_convs[idx]
+ if idx == 0:
+ y = lateral_conv(x)
+ else:
+ cur_fpn = lateral_conv(x)
+ y = cur_fpn + F.interpolate(
+ y,
+ size=cur_fpn.shape[-2:],
+ mode='bilinear',
+ align_corners=False)
+ y = output_conv(y)
+
+ if num_cur_levels < self.num_feature_levels:
+ multi_scale_features.append(y)
+ num_cur_levels += 1
+ return None, multi_scale_features
+
+ def forward(self, features, targets=None):
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ 'Calling forward() may cause unpredicted behavior of PixelDecoder module.'
+ )
+ return self.forward_features(features)
+
+
+class PyramidPoolingModule(nn.Module):
+
+ def __init__(self, in_channels, channels=512, sizes=(1, 2, 3, 6)):
+ super().__init__()
+ self.stages = []
+ self.stages = nn.ModuleList(
+ [self._make_stage(in_channels, channels, size) for size in sizes])
+ self.bottleneck = Conv2d(in_channels + len(sizes) * channels,
+ in_channels, 1)
+
+ def _make_stage(self, features, out_features, size):
+ prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
+ conv = Conv2d(features, out_features, 1)
+ return nn.Sequential(prior, conv)
+
+ def forward(self, feats):
+ h, w = feats.size(2), feats.size(3)
+ priors = [
+ F.interpolate(
+ input=F.relu_(stage(feats)),
+ size=(h, w),
+ mode='bilinear',
+ align_corners=False) for stage in self.stages
+ ] + [feats]
+ out = F.relu_(self.bottleneck(torch.cat(priors, 1)))
+ return out
+
+
+class PyramidPoolingModuleFPN(BaseFPN):
+
+ def __init__(
+ self,
+ input_shape,
+ *,
+ convs_dim: int,
+ mask_dim: int,
+ norm: Optional[Union[str, Callable]] = None,
+ ):
+ """
+ NOTE: this interface is experimental.
+ Args:
+ input_shape: shapes (channels and stride) of the input features
+ convs_dim: number of output channels for the intermediate conv layers.
+ mask_dim: number of output channels for the final conv layer.
+ norm (str or callable): normalization for all conv layers
+ """
+ super().__init__(
+ input_shape, convs_dim=convs_dim, mask_dim=mask_dim, norm=norm)
+ self.ppm = PyramidPoolingModule(convs_dim, convs_dim // 4)
+
+ def forward_features(self, features):
+ multi_scale_features = []
+ num_cur_levels = 0
+ # Reverse feature maps into top-down order (from low to high resolution)
+ for idx, f in enumerate(self.in_features[::-1]):
+ x = features[f]
+ lateral_conv = self.lateral_convs[idx]
+ output_conv = self.output_convs[idx]
+ if idx == 0:
+ y = self.ppm(lateral_conv(x))
+ else:
+ cur_fpn = lateral_conv(x)
+ y = cur_fpn + F.interpolate(
+ y,
+ size=cur_fpn.shape[-2:],
+ mode='bilinear',
+ align_corners=False)
+ y = output_conv(y)
+
+ if num_cur_levels < self.num_feature_levels:
+ multi_scale_features.append(y)
+ num_cur_levels += 1
+
+ return None, multi_scale_features
diff --git a/modelscope/models/cv/image_instance_segmentation/fastinst_model.py b/modelscope/models/cv/image_instance_segmentation/fastinst_model.py
new file mode 100644
index 00000000..f9cfbc4f
--- /dev/null
+++ b/modelscope/models/cv/image_instance_segmentation/fastinst_model.py
@@ -0,0 +1,221 @@
+# Part of implementation is borrowed and modified from Mask2Former, publicly available at
+# https://github.com/facebookresearch/Mask2Former.
+import os
+from typing import Any, Dict, List
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from modelscope.metainfo import Models
+from modelscope.models.base import TorchModel
+from modelscope.models.builder import MODELS
+from modelscope.models.cv.image_instance_segmentation.maskdino_swin import \
+ ImageList
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+from .backbones import build_resnet_backbone
+from .fastinst.fastinst_decoder import FastInstDecoder
+from .fastinst.fastinst_encoder import PyramidPoolingModuleFPN
+
+logger = get_logger()
+
+
+@MODELS.register_module(Tasks.image_segmentation, module_name=Models.fastinst)
+class FastInst(TorchModel):
+
+ def __init__(self,
+ model_dir,
+ backbone=None,
+ encoder=None,
+ decoder=None,
+ pretrained=None,
+ classes=None,
+ **kwargs):
+ """
+ Deep Learning Technique for Human Parsing: A Survey and Outlook. See https://arxiv.org/abs/2301.00394
+ Args:
+ backbone (dict): backbone config.
+ encoder (dict): encoder config.
+ decoder (dict): decoder config.
+ pretrained (bool): whether to use pretrained model
+ classes (list): class names
+ """
+ super(FastInst, self).__init__(model_dir, **kwargs)
+
+ self.backbone = build_resnet_backbone(
+ **backbone, input_shape={'channels': 3})
+ in_features = encoder.pop('in_features')
+ input_shape = {
+ k: v
+ for k, v in self.backbone.output_shape().items()
+ if k in in_features
+ }
+ encoder = PyramidPoolingModuleFPN(input_shape=input_shape, **encoder)
+ decoder = FastInstDecoder(in_channels=encoder.convs_dim, **decoder)
+ self.sem_seg_head = FastInstHead(
+ pixel_decoder=encoder, transformer_predictor=decoder)
+
+ self.num_classes = decoder.num_classes
+ self.num_queries = decoder.num_queries
+ self.size_divisibility = 32
+ self.register_buffer(
+ 'pixel_mean',
+ torch.Tensor([123.675, 116.28, 103.53]).view(-1, 1, 1), False)
+ self.register_buffer(
+ 'pixel_std',
+ torch.Tensor([58.395, 57.12, 57.375]).view(-1, 1, 1), False)
+ self.classes = classes
+ self.test_topk_per_image = 100
+
+ if pretrained:
+ model_path = os.path.join(model_dir, ModelFile.TORCH_MODEL_FILE)
+ logger.info(f'loading model from {model_path}')
+ weight = torch.load(model_path, map_location='cpu')['model']
+ tgt_weight = self.state_dict()
+ for name in list(weight.keys()):
+ if name in tgt_weight:
+ load_size = weight[name].size()
+ tgt_size = tgt_weight[name].size()
+ mis_match = False
+ if len(load_size) != len(tgt_size):
+ mis_match = True
+ else:
+ for n1, n2 in zip(load_size, tgt_size):
+ if n1 != n2:
+ mis_match = True
+ break
+ if mis_match:
+ logger.info(
+ f'size mismatch for {name} '
+ f'({load_size} -> {tgt_size}), skip loading.')
+ del weight[name]
+ else:
+ logger.info(
+ f'{name} doesn\'t exist in current model, skip loading.'
+ )
+
+ self.load_state_dict(weight, strict=False)
+ logger.info('load model done')
+
+ def forward(self, batched_inputs: List[dict]) -> Dict[str, Any]:
+ images = [x['image'].to(self.device) for x in batched_inputs]
+ images = [(x - self.pixel_mean) / self.pixel_std for x in images]
+ images = ImageList.from_tensors(images, self.size_divisibility)
+
+ features = self.backbone(images.tensor)
+ outputs = self.sem_seg_head(features)
+
+ return dict(
+ outputs=outputs, batched_inputs=batched_inputs, images=images)
+
+ def postprocess(self, input: Dict[str, Any]) -> Dict[str, Any]:
+ outputs = input['outputs']
+ batched_inputs = input['batched_inputs']
+ images = input['images']
+ if self.training:
+ raise NotImplementedError
+ else:
+ mask_cls_results = outputs['pred_logits'] # (B, Q, C+1)
+ mask_pred_results = outputs['pred_masks'] # (B, Q, H, W)
+ # upsample masks
+ mask_pred_results = F.interpolate(
+ mask_pred_results,
+ size=(images.tensor.shape[-2], images.tensor.shape[-1]),
+ mode='bilinear',
+ align_corners=False,
+ )
+
+ del outputs
+
+ processed_results = []
+ for mask_cls_result, mask_pred_result, input_per_image, image_size in zip(
+ mask_cls_results, mask_pred_results, batched_inputs,
+ images.image_sizes):
+ height = input_per_image.get('height', image_size[0])
+ width = input_per_image.get('width', image_size[1])
+ processed_results.append({}) # for each image
+
+ mask_pred_result = self.sem_seg_postprocess(
+ mask_pred_result, image_size, height, width)
+ mask_cls_result = mask_cls_result.to(mask_pred_result)
+
+ instance_r = self.instance_inference(mask_cls_result,
+ mask_pred_result)
+ processed_results[-1]['instances'] = instance_r
+
+ return dict(eval_result=processed_results)
+
+ @property
+ def device(self):
+ return self.pixel_mean.device
+
+ def sem_seg_postprocess(self, result, img_size, output_height,
+ output_width):
+ result = result[:, :img_size[0], :img_size[1]].expand(1, -1, -1, -1)
+ result = F.interpolate(
+ result,
+ size=(output_height, output_width),
+ mode='bilinear',
+ align_corners=False)[0]
+ return result
+
+ def instance_inference(self, mask_cls, mask_pred):
+ # mask_pred is already processed to have the same shape as original input
+ image_size = mask_pred.shape[-2:]
+
+ # [Q, K]
+ scores = F.softmax(mask_cls, dim=-1)[:, :-1]
+ labels = torch.arange(
+ self.num_classes,
+ device=self.device).unsqueeze(0).repeat(self.num_queries,
+ 1).flatten(0, 1)
+ scores_per_image, topk_indices = scores.flatten(0, 1).topk(
+ self.test_topk_per_image, sorted=False)
+ labels_per_image = labels[topk_indices]
+
+ topk_indices = topk_indices // self.num_classes
+ mask_pred = mask_pred[topk_indices]
+
+ result = {'image_size': image_size}
+ # mask (before sigmoid)
+ mask_pred_sigmoid = mask_pred.sigmoid()
+ result['pred_masks'] = (mask_pred_sigmoid > 0.5).float()
+
+ # calculate average mask prob
+ mask_scores_per_image = (mask_pred_sigmoid.flatten(1)
+ * result['pred_masks'].flatten(1)).sum(1) / (
+ result['pred_masks'].flatten(1).sum(1)
+ + 1e-6)
+ result['scores'] = scores_per_image * mask_scores_per_image
+ result['pred_classes'] = labels_per_image
+ return result
+
+
+class FastInstHead(nn.Module):
+
+ def __init__(
+ self,
+ *,
+ pixel_decoder: nn.Module,
+ # extra parameters
+ transformer_predictor: nn.Module):
+ """
+ NOTE: this interface is experimental.
+ Args:
+ pixel_decoder: the pixel decoder module
+ transformer_predictor: the transformer decoder that makes prediction
+ """
+ super().__init__()
+ self.pixel_decoder = pixel_decoder
+ self.predictor = transformer_predictor
+
+ def forward(self, features, targets=None):
+ return self.layers(features, targets)
+
+ def layers(self, features, targets=None):
+ mask_features, multi_scale_features = self.pixel_decoder.forward_features(
+ features)
+ predictions = self.predictor(multi_scale_features, mask_features,
+ targets)
+ return predictions
diff --git a/modelscope/models/cv/image_instance_segmentation/postprocess_utils.py b/modelscope/models/cv/image_instance_segmentation/postprocess_utils.py
index fdbb2fb0..aad7d8e9 100644
--- a/modelscope/models/cv/image_instance_segmentation/postprocess_utils.py
+++ b/modelscope/models/cv/image_instance_segmentation/postprocess_utils.py
@@ -108,16 +108,16 @@ def get_img_ins_seg_result(img_seg_result=None,
for seg_result in img_seg_result:
box = [
- np.int(seg_result[0]),
- np.int(seg_result[1]),
- np.int(seg_result[2]),
- np.int(seg_result[3])
+ int(seg_result[0]),
+ int(seg_result[1]),
+ int(seg_result[2]),
+ int(seg_result[3])
]
- score = np.float(seg_result[4])
+ score = float(seg_result[4])
category = seg_result[5]
mask = np.array(seg_result[6], order='F', dtype='uint8')
- mask = mask.astype(np.float)
+ mask = mask.astype(float)
results_dict[OutputKeys.BOXES].append(box)
results_dict[OutputKeys.MASKS].append(mask)
diff --git a/modelscope/models/cv/image_mvs_depth_estimation/colmap2mvsnet.py b/modelscope/models/cv/image_mvs_depth_estimation/colmap2mvsnet.py
index feda4430..37d92c13 100644
--- a/modelscope/models/cv/image_mvs_depth_estimation/colmap2mvsnet.py
+++ b/modelscope/models/cv/image_mvs_depth_estimation/colmap2mvsnet.py
@@ -382,7 +382,7 @@ def processing_single_scene(args):
points3d[p3d_id].xyz[0], points3d[p3d_id].xyz[1],
points3d[p3d_id].xyz[2], 1
])
- zs.append(np.asscalar(transformed[2]))
+ zs.append(transformed[2].item())
zs_sorted = sorted(zs)
# relaxed depth range
max_ratio = 0.1
diff --git a/modelscope/models/cv/image_mvs_depth_estimation/depth_filter.py b/modelscope/models/cv/image_mvs_depth_estimation/depth_filter.py
index 16cdedf4..4ef6275a 100644
--- a/modelscope/models/cv/image_mvs_depth_estimation/depth_filter.py
+++ b/modelscope/models/cv/image_mvs_depth_estimation/depth_filter.py
@@ -40,7 +40,7 @@ def read_mask(filename):
# save a binary mask
def save_mask(filename, mask):
- assert mask.dtype == np.bool
+ assert mask.dtype == bool
mask = mask.astype(np.uint8) * 255
Image.fromarray(mask).save(filename)
diff --git a/modelscope/models/cv/image_panoptic_segmentation/__init__.py b/modelscope/models/cv/image_panoptic_segmentation/__init__.py
index 1af5b6f8..2b2be4b7 100644
--- a/modelscope/models/cv/image_panoptic_segmentation/__init__.py
+++ b/modelscope/models/cv/image_panoptic_segmentation/__init__.py
@@ -5,7 +5,6 @@ from modelscope.utils.import_utils import LazyImportModule
if TYPE_CHECKING:
from .panseg_model import SwinLPanopticSegmentation
- from .r50_panseg_model import R50PanopticSegmentation
else:
_import_structure = {
diff --git a/modelscope/models/cv/image_panoptic_segmentation/r50_panseg_model.py b/modelscope/models/cv/image_panoptic_segmentation/r50_panseg_model.py
deleted file mode 100644
index 73b6b76c..00000000
--- a/modelscope/models/cv/image_panoptic_segmentation/r50_panseg_model.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-
-from easycv.models.segmentation import Mask2Former
-
-from modelscope.metainfo import Models
-from modelscope.models.builder import MODELS
-from modelscope.models.cv.easycv_base import EasyCVBaseModel
-from modelscope.utils.constant import Tasks
-
-
-@MODELS.register_module(
- group_key=Tasks.image_segmentation,
- module_name=Models.r50_panoptic_segmentation)
-class R50PanopticSegmentation(EasyCVBaseModel, Mask2Former):
-
- def __init__(self, model_dir=None, *args, **kwargs):
- EasyCVBaseModel.__init__(self, model_dir, args, kwargs)
- Mask2Former.__init__(self, *args, **kwargs)
diff --git a/modelscope/models/cv/image_semantic_segmentation/segformer.py b/modelscope/models/cv/image_semantic_segmentation/segformer.py
deleted file mode 100644
index 46303526..00000000
--- a/modelscope/models/cv/image_semantic_segmentation/segformer.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.models.segmentation import EncoderDecoder
-
-from modelscope.metainfo import Models
-from modelscope.models.builder import MODELS
-from modelscope.models.cv.easycv_base import EasyCVBaseModel
-from modelscope.utils.constant import Tasks
-
-
-@MODELS.register_module(
- group_key=Tasks.image_segmentation, module_name=Models.segformer)
-class Segformer(EasyCVBaseModel, EncoderDecoder):
-
- def __init__(self, model_dir=None, *args, **kwargs):
- EasyCVBaseModel.__init__(self, model_dir, args, kwargs)
- EncoderDecoder.__init__(self, *args, **kwargs)
diff --git a/modelscope/models/cv/image_semantic_segmentation/semantic_seg_model.py b/modelscope/models/cv/image_semantic_segmentation/semantic_seg_model.py
index 2b38ebad..455f29fb 100644
--- a/modelscope/models/cv/image_semantic_segmentation/semantic_seg_model.py
+++ b/modelscope/models/cv/image_semantic_segmentation/semantic_seg_model.py
@@ -60,7 +60,7 @@ class SemanticSegmentation(TorchModel):
ids = ids[legal_indices]
segms = (semantic_result[None] == ids[:, None, None])
- masks = [it.astype(np.int) for it in segms]
+ masks = [it.astype(int) for it in segms]
labels_txt = np.array(self.CLASSES)[ids].tolist()
results = {
diff --git a/modelscope/models/cv/image_skychange/ptsemseg/hrnet_backnone.py b/modelscope/models/cv/image_skychange/ptsemseg/hrnet_backnone.py
index 66429d67..8fcb6625 100644
--- a/modelscope/models/cv/image_skychange/ptsemseg/hrnet_backnone.py
+++ b/modelscope/models/cv/image_skychange/ptsemseg/hrnet_backnone.py
@@ -458,7 +458,7 @@ class HrnetBackBone(nn.Module):
self.stage4, pre_stage_channels = self._make_stage(
self.stage4_cfg, num_channels, multi_scale_output=True)
- self.backbone_last_inp_channels = np.int(np.sum(pre_stage_channels))
+ self.backbone_last_inp_channels = int(np.sum(pre_stage_channels))
def _make_transition_layer(self, num_channels_pre_layer,
num_channels_cur_layer):
diff --git a/modelscope/models/cv/image_skychange/ptsemseg/hrnet_super_and_ocr.py b/modelscope/models/cv/image_skychange/ptsemseg/hrnet_super_and_ocr.py
index 09768451..5dbef66e 100644
--- a/modelscope/models/cv/image_skychange/ptsemseg/hrnet_super_and_ocr.py
+++ b/modelscope/models/cv/image_skychange/ptsemseg/hrnet_super_and_ocr.py
@@ -259,7 +259,7 @@ class HrnetSuperAndOcr(HrnetBackBone):
num_channels = [64, last_inp_channels]
self.stage_super, super_stage_channels = self._make_stage(
self.super_dict, num_channels)
- last_inp_channels = np.int(np.sum(super_stage_channels))
+ last_inp_channels = int(np.sum(super_stage_channels))
if self.is_contain_aspp:
aspp_param = kwargs['aspp']
@@ -372,7 +372,7 @@ class HrnetSuperAndOcr(HrnetBackBone):
num_channels = [64, ocr_mid_channels]
self.stage_super, super_stage_channels = self._make_stage(
self.super_dict, num_channels)
- last_inp_channels = np.int(np.sum(super_stage_channels))
+ last_inp_channels = int(np.sum(super_stage_channels))
self.cls_head = nn.Sequential(
nn.Conv2d(
diff --git a/modelscope/models/cv/movie_scene_segmentation/model.py b/modelscope/models/cv/movie_scene_segmentation/model.py
index 336af3b3..818a3876 100644
--- a/modelscope/models/cv/movie_scene_segmentation/model.py
+++ b/modelscope/models/cv/movie_scene_segmentation/model.py
@@ -13,7 +13,8 @@ import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as TF
from PIL import Image
-from shotdetect_scenedetect_lgss import shot_detect
+from shotdetect_scenedetect_lgss import shot_detector
+from tqdm import tqdm
from modelscope.metainfo import Models
from modelscope.models.base.base_torch_model import TorchModel
@@ -60,6 +61,9 @@ class MovieSceneSegmentationModel(TorchModel):
self.head_sbd = nn.Linear(hdim, 2)
load_param_with_prefix('head_sbd', self.head_sbd, params)
+ self.shot_detector = shot_detector()
+ self.shot_detector.init(**self.cfg.preprocessor.shot_detect)
+
self.test_transform = TF.Compose([
TF.Resize(size=256, interpolation=Image.BICUBIC),
TF.CenterCrop(224),
@@ -98,29 +102,45 @@ class MovieSceneSegmentationModel(TorchModel):
def inference(self, batch):
logger.info('Begin scene detect ......')
bs = self.cfg.pipeline.batch_size_per_gpu
- sids = batch['sid']
- inputs = batch['shot_feat']
+ device = self.crn.attention_mask.device
- shot_num = len(sids)
+ shot_timecode_lst = batch['shot_timecode_lst']
+ shot_idx_lst = batch['shot_idx_lst']
+
+ shot_num = len(shot_timecode_lst)
cnt = math.ceil(shot_num / bs)
- infer_sid, infer_pred = [], []
+ infer_pred = []
infer_result = {}
- for i in range(cnt):
+ self.shot_detector.start()
+
+ for i in tqdm(range(cnt)):
start = i * bs
end = (i + 1) * bs if (i + 1) * bs < shot_num else shot_num
- input_ = inputs[start:end]
- sid_ = sids[start:end]
- input_ = torch.stack(input_)
+
+ batch_shot_idx_lst = shot_idx_lst[start:end]
+
+ shot_start_idx = batch_shot_idx_lst[0][0]
+ shot_end_idx = batch_shot_idx_lst[-1][-1]
+ batch_timecode_lst = {
+ i: shot_timecode_lst[i]
+ for i in range(shot_start_idx, shot_end_idx + 1)
+ }
+ batch_shot_keyf_lst = self.shot_detector.get_frame_img(
+ batch_timecode_lst, shot_start_idx, shot_num)
+ inputs = self.get_batch_input(batch_shot_keyf_lst, shot_start_idx,
+ batch_shot_idx_lst)
+
+ input_ = torch.stack(inputs).to(device)
outputs = self.shared_step(input_) # shape [b,2]
prob = F.softmax(outputs, dim=1)
- infer_sid.extend(sid_.cpu().detach().numpy())
infer_pred.extend(prob[:, 1].cpu().detach().numpy())
- infer_result.update({'pred': np.stack(infer_pred)})
- infer_result.update({'sid': infer_sid})
- assert len(infer_result['sid']) == len(sids)
- assert len(infer_result['pred']) == len(inputs)
+ infer_result.update({'pred': np.stack(infer_pred)})
+ infer_result.update({'sid': np.arange(shot_num)})
+
+ assert len(infer_result['pred']) == shot_num
+ self.shot_detector.release()
return infer_result
def shared_step(self, inputs):
@@ -162,38 +182,48 @@ class MovieSceneSegmentationModel(TorchModel):
logger.info('Generate scene .......')
pred_dict = inputs['feat']
+ shot2keyf = inputs['shot2keyf']
thres = self.cfg.pipeline.save_threshold
anno_dict = get_pred_boundary(pred_dict, thres)
scene_dict_lst, scene_list, shot_num, shot_dict_lst = pred2scene(
- self.shot2keyf, anno_dict)
+ shot2keyf, anno_dict)
if self.cfg.pipeline.save_split_scene:
re_dir = scene2video(inputs['input_video_pth'], scene_list, thres)
print(f'Split scene video saved to {re_dir}')
return len(scene_list), scene_dict_lst, shot_num, shot_dict_lst
- def preprocess(self, inputs):
- logger.info('Begin shot detect......')
- shot_keyf_lst, anno, shot2keyf = shot_detect(
- inputs, **self.cfg.preprocessor.shot_detect)
- logger.info('Shot detect done!')
+ def get_batch_input(self, shot_keyf_lst, shot_start_idx, shot_idx_lst):
- single_shot_feat, sid = [], []
+ single_shot_feat = []
for idx, one_shot in enumerate(shot_keyf_lst):
one_shot = [
self.test_transform(one_frame) for one_frame in one_shot
]
one_shot = torch.stack(one_shot, dim=0)
single_shot_feat.append(one_shot)
- sid.append(idx)
+
single_shot_feat = torch.stack(single_shot_feat, dim=0)
+
shot_feat = []
+ for idx, shot_idx in enumerate(shot_idx_lst):
+ shot_idx_ = shot_idx - shot_start_idx
+ _one_shot = single_shot_feat[shot_idx_]
+ shot_feat.append(_one_shot)
+
+ return shot_feat
+
+ def preprocess(self, inputs):
+ logger.info('Begin shot detect......')
+ shot_timecode_lst, anno, shot2keyf = self.shot_detector.shot_detect(
+ inputs, **self.cfg.preprocessor.shot_detect)
+ logger.info('Shot detect done!')
+
+ shot_idx_lst = []
for idx, one_shot in enumerate(anno):
shot_idx = int(one_shot['shot_id']) + np.arange(
-self.neighbor_size, self.neighbor_size + 1)
- shot_idx = np.clip(shot_idx, 0, one_shot['num_shot'])
- _one_shot = single_shot_feat[shot_idx]
- shot_feat.append(_one_shot)
- self.shot2keyf = shot2keyf
- self.anno = anno
- return shot_feat, sid
+ shot_idx = np.clip(shot_idx, 0, one_shot['num_shot'] - 1)
+ shot_idx_lst.append(shot_idx)
+
+ return shot2keyf, anno, shot_timecode_lst, shot_idx_lst
diff --git a/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py b/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py
index 49155716..34bebce0 100644
--- a/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py
+++ b/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py
@@ -10,11 +10,12 @@ from tqdm import tqdm
def get_pred_boundary(pred_dict, threshold=0.5):
- pred = pred_dict['pred']
+ pred = pred_dict['pred'].cpu().numpy()
+ sid = pred_dict['sid'].cpu().numpy().astype(np.int32)
tmp = (pred > threshold).astype(np.int32)
anno_dict = {}
for idx in range(len(tmp)):
- anno_dict.update({str(pred_dict['sid'][idx]).zfill(4): int(tmp[idx])})
+ anno_dict.update({str(sid[idx]).zfill(4): int(tmp[idx])})
return anno_dict
diff --git a/modelscope/models/cv/nerf_recon_acc/network/segmenter.py b/modelscope/models/cv/nerf_recon_acc/network/segmenter.py
index d71b9f16..e3d0ca8d 100644
--- a/modelscope/models/cv/nerf_recon_acc/network/segmenter.py
+++ b/modelscope/models/cv/nerf_recon_acc/network/segmenter.py
@@ -31,7 +31,7 @@ class ObjectSegmenter(object):
elif img.shape[2] == 4:
img = img[:, :, :3]
img = img[:, :, ::-1]
- img = img.astype(np.float)
+ img = img.astype(float)
return img
def run_mask(self, img):
diff --git a/modelscope/models/cv/object_detection/dino.py b/modelscope/models/cv/object_detection/dino.py
deleted file mode 100644
index e6c652f1..00000000
--- a/modelscope/models/cv/object_detection/dino.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.models.detection.detectors import Detection as _Detection
-
-from modelscope.metainfo import Models
-from modelscope.models.builder import MODELS
-from modelscope.models.cv.easycv_base import EasyCVBaseModel
-from modelscope.utils.constant import Tasks
-
-
-@MODELS.register_module(
- group_key=Tasks.image_object_detection, module_name=Models.dino)
-class DINO(EasyCVBaseModel, _Detection):
-
- def __init__(self, model_dir=None, *args, **kwargs):
- EasyCVBaseModel.__init__(self, model_dir, args, kwargs)
- _Detection.__init__(self, *args, **kwargs)
diff --git a/modelscope/models/cv/object_detection/yolox_pai.py b/modelscope/models/cv/object_detection/yolox_pai.py
deleted file mode 100644
index 7888cf82..00000000
--- a/modelscope/models/cv/object_detection/yolox_pai.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.models.detection.detectors import YOLOX as _YOLOX
-
-from modelscope.metainfo import Models
-from modelscope.models.builder import MODELS
-from modelscope.models.cv.easycv_base import EasyCVBaseModel
-from modelscope.utils.constant import Tasks
-
-
-@MODELS.register_module(
- group_key=Tasks.image_object_detection, module_name=Models.yolox)
-@MODELS.register_module(
- group_key=Tasks.image_object_detection,
- module_name=Models.image_object_detection_auto)
-@MODELS.register_module(
- group_key=Tasks.domain_specific_object_detection, module_name=Models.yolox)
-class YOLOX(EasyCVBaseModel, _YOLOX):
-
- def __init__(self, model_dir=None, *args, **kwargs):
- EasyCVBaseModel.__init__(self, model_dir, args, kwargs)
- _YOLOX.__init__(self, *args, **kwargs)
diff --git a/modelscope/models/cv/object_detection_3d/depe/result_vis.py b/modelscope/models/cv/object_detection_3d/depe/result_vis.py
index d577ab68..efaef0b5 100644
--- a/modelscope/models/cv/object_detection_3d/depe/result_vis.py
+++ b/modelscope/models/cv/object_detection_3d/depe/result_vis.py
@@ -30,7 +30,7 @@ def depth2color(depth):
if gray == 1:
return tuple(colors[-1].tolist())
num_rank = len(colors) - 1
- rank = np.floor(gray * num_rank).astype(np.int)
+ rank = np.floor(gray * num_rank).astype(int)
diff = (gray - rank / num_rank) * num_rank
tmp = colors[rank + 1] - colors[rank]
return tuple((colors[rank] + tmp * diff).tolist())
@@ -136,7 +136,7 @@ def plot_result(res_path,
l2g = get_lidar2global(infos)
corners_lidar = corners_global @ np.linalg.inv(l2g).T
corners_lidar = corners_lidar[:, :3]
- pred_flag = np.ones((corners_lidar.shape[0] // 8, ), dtype=np.bool)
+ pred_flag = np.ones((corners_lidar.shape[0] // 8, ), dtype=bool)
scores = [
pred_res[rid]['detection_score'] for rid in range(len(pred_res))
]
@@ -151,7 +151,7 @@ def plot_result(res_path,
origin=(0.5, 0.5, 0.5)).corners.numpy().reshape(-1, 3)
corners_lidar = np.concatenate([corners_lidar, corners_lidar_gt],
axis=0)
- gt_flag = np.ones((corners_lidar_gt.shape[0] // 8), dtype=np.bool)
+ gt_flag = np.ones((corners_lidar_gt.shape[0] // 8), dtype=bool)
pred_flag = np.concatenate(
[pred_flag, np.logical_not(gt_flag)], axis=0)
scores = scores + [0 for _ in range(infos['gt_boxes'].shape[0])]
@@ -169,7 +169,7 @@ def plot_result(res_path,
check_point_in_img(corners_img, img.shape[0], img.shape[1]))
valid = valid.reshape(
-1, 8) # valid means: d>0 and visible in current view
- corners_img = corners_img.reshape(-1, 8, 2).astype(np.int)
+ corners_img = corners_img.reshape(-1, 8, 2).astype(int)
for aid in range(valid.shape[0]):
if scores[aid] < vis_thred and pred_flag[aid]:
continue
diff --git a/modelscope/models/cv/ocr_recognition/model.py b/modelscope/models/cv/ocr_recognition/model.py
index 6eb13403..2406b6dc 100644
--- a/modelscope/models/cv/ocr_recognition/model.py
+++ b/modelscope/models/cv/ocr_recognition/model.py
@@ -90,8 +90,15 @@ class OCRRecognition(TorchModel):
f'recognizer should be either ConvNextViT, CRNN, but got {cfgs.model.recognizer}'
)
if model_path != '':
- self.recognizer.load_state_dict(
- torch.load(model_path, map_location='cpu'))
+ params_pretrained = torch.load(model_path, map_location='cpu')
+ model_dict = self.recognizer.state_dict()
+ # remove prefix for finetuned models
+ check_point = {
+ k.replace('recognizer.', ''): v
+ for k, v in params_pretrained.items()
+ }
+ model_dict.update(check_point)
+ self.recognizer.load_state_dict(model_dict)
dict_path = os.path.join(model_dir, ModelFile.VOCAB_FILE)
self.labelMapping = dict()
diff --git a/modelscope/models/cv/open_vocabulary_detection_vild/vild.py b/modelscope/models/cv/open_vocabulary_detection_vild/vild.py
index 999ec27a..2aea0593 100644
--- a/modelscope/models/cv/open_vocabulary_detection_vild/vild.py
+++ b/modelscope/models/cv/open_vocabulary_detection_vild/vild.py
@@ -176,8 +176,7 @@ class OpenVocabularyDetectionViLD(Model):
# Filter out invalid rois (nmsed rois)
valid_indices = np.where(
np.logical_and(
- np.isin(
- np.arange(len(roi_scores), dtype=np.int), nmsed_indices),
+ np.isin(np.arange(len(roi_scores), dtype=int), nmsed_indices),
np.logical_and(
np.logical_not(np.all(roi_boxes == 0., axis=-1)),
np.logical_and(roi_scores >= min_rpn_score_thresh,
diff --git a/modelscope/models/cv/panorama_depth_estimation/networks/layers.py b/modelscope/models/cv/panorama_depth_estimation/networks/layers.py
index 99e166aa..52fb3d39 100644
--- a/modelscope/models/cv/panorama_depth_estimation/networks/layers.py
+++ b/modelscope/models/cv/panorama_depth_estimation/networks/layers.py
@@ -72,7 +72,7 @@ class Cube2Equirec(nn.Module):
self.equ_h, 0), 3 * self.equ_w // 8, 1)
# Prepare ceil mask
- mask = np.zeros((self.equ_h, self.equ_w // 4), np.bool)
+ mask = np.zeros((self.equ_h, self.equ_w // 4), bool)
idx = np.linspace(-np.pi, np.pi, self.equ_w // 4) / 4
idx = self.equ_h // 2 - np.round(
np.arctan(np.cos(idx)) * self.equ_h / np.pi).astype(int)
diff --git a/modelscope/models/cv/video_depth_estimation/utils/depth.py b/modelscope/models/cv/video_depth_estimation/utils/depth.py
index e9f287e7..5fbf6aa6 100644
--- a/modelscope/models/cv/video_depth_estimation/utils/depth.py
+++ b/modelscope/models/cv/video_depth_estimation/utils/depth.py
@@ -29,7 +29,7 @@ def load_depth(file):
elif file.endswith('png'):
depth_png = np.array(load_image(file), dtype=int)
assert (np.max(depth_png) > 255), 'Wrong .png depth file'
- return depth_png.astype(np.float) / 256.
+ return depth_png.astype(float) / 256.
else:
raise NotImplementedError('Depth extension not supported.')
diff --git a/modelscope/models/cv/video_frame_interpolation/utils/scene_change_detection.py b/modelscope/models/cv/video_frame_interpolation/utils/scene_change_detection.py
index 4cbe60a7..379fe855 100644
--- a/modelscope/models/cv/video_frame_interpolation/utils/scene_change_detection.py
+++ b/modelscope/models/cv/video_frame_interpolation/utils/scene_change_detection.py
@@ -85,7 +85,7 @@ def do_scene_detect(F01_tensor, F10_tensor, img0_tensor, img1_tensor):
img_diff = ori_img.float() - ref_img.float()
img_diff = torch.abs(img_diff)
- kernel = np.ones([8, 8], np.float) / 64
+ kernel = np.ones([8, 8], float) / 64
kernel = torch.FloatTensor(kernel).to(device).unsqueeze(0).unsqueeze(0)
diff = F.conv2d(img_diff, kernel, padding=4)
diff --git a/modelscope/models/cv/video_multi_object_tracking/tracker/matching.py b/modelscope/models/cv/video_multi_object_tracking/tracker/matching.py
index 45d2f5c0..e5c2e8a9 100644
--- a/modelscope/models/cv/video_multi_object_tracking/tracker/matching.py
+++ b/modelscope/models/cv/video_multi_object_tracking/tracker/matching.py
@@ -27,7 +27,7 @@ def linear_assignment(cost_matrix, thresh):
def ious(atlbrs, btlbrs):
- ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
+ ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=float)
if ious.size == 0:
return ious
@@ -60,13 +60,13 @@ def embedding_distance(tracks, detections, metric='cosine'):
cost_matrix: np.ndarray
"""
- cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
+ cost_matrix = np.zeros((len(tracks), len(detections)), dtype=float)
if cost_matrix.size == 0:
return cost_matrix
det_features = np.asarray([track.curr_feat for track in detections],
- dtype=np.float)
+ dtype=float)
track_features = np.asarray([track.smooth_feat for track in tracks],
- dtype=np.float)
+ dtype=float)
cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric))
return cost_matrix
diff --git a/modelscope/models/cv/video_multi_object_tracking/tracker/multitracker.py b/modelscope/models/cv/video_multi_object_tracking/tracker/multitracker.py
index 1dc3297f..d38477b7 100644
--- a/modelscope/models/cv/video_multi_object_tracking/tracker/multitracker.py
+++ b/modelscope/models/cv/video_multi_object_tracking/tracker/multitracker.py
@@ -28,7 +28,7 @@ class STrack(BaseTrack):
def __init__(self, tlwh, score, temp_feat, buffer_size=30):
# wait activate
- self._tlwh = np.asarray(tlwh, dtype=np.float)
+ self._tlwh = np.asarray(tlwh, dtype=float)
self.kalman_filter = None
self.mean, self.covariance = None, None
self.is_activated = False
diff --git a/modelscope/models/multi_modal/__init__.py b/modelscope/models/multi_modal/__init__.py
index e85c48fb..9fa34baf 100644
--- a/modelscope/models/multi_modal/__init__.py
+++ b/modelscope/models/multi_modal/__init__.py
@@ -20,6 +20,8 @@ if TYPE_CHECKING:
from .vldoc import VLDocForDocVLEmbedding
from .video_synthesis import TextToVideoSynthesis
from .efficient_diffusion_tuning import EfficientStableDiffusion
+ from .mplug_owl import MplugOwlForConditionalGeneration
+ from .clip_interrogator import CLIP_Interrogator
else:
_import_structure = {
@@ -37,7 +39,9 @@ else:
['MultiStageDiffusionForTextToImageSynthesis'],
'vldoc': ['VLDocForDocVLEmbedding'],
'video_synthesis': ['TextToVideoSynthesis'],
- 'efficient_diffusion_tuning': ['EfficientStableDiffusion']
+ 'efficient_diffusion_tuning': ['EfficientStableDiffusion'],
+ 'mplug_owl': ['MplugOwlForConditionalGeneration'],
+ 'clip_interrogator': ['CLIP_Interrogator'],
}
import sys
diff --git a/modelscope/models/multi_modal/clip_interrogator/__init__.py b/modelscope/models/multi_modal/clip_interrogator/__init__.py
new file mode 100644
index 00000000..96fefbf6
--- /dev/null
+++ b/modelscope/models/multi_modal/clip_interrogator/__init__.py
@@ -0,0 +1 @@
+from .model import CLIP_Interrogator
diff --git a/modelscope/models/multi_modal/clip_interrogator/model.py b/modelscope/models/multi_modal/clip_interrogator/model.py
new file mode 100644
index 00000000..a7e27cbd
--- /dev/null
+++ b/modelscope/models/multi_modal/clip_interrogator/model.py
@@ -0,0 +1,599 @@
+# This implementation is adopted from CLIP-Interrogator, made pubicly available under the MIT License at
+# https://github.com/pharmapsychotic/clip-interrogator/blob/main/clip_interrogator/clip_interrogator.py
+
+import hashlib
+import math
+import os
+import time
+from dataclasses import dataclass
+from typing import List, Optional
+
+import numpy as np
+import open_clip
+import requests
+import torch
+import torchvision.transforms as transforms
+from PIL import Image
+from safetensors.numpy import load_file, save_file
+from tqdm import tqdm
+from transformers import (AutoModelForCausalLM, AutoProcessor,
+ Blip2ForConditionalGeneration,
+ BlipForConditionalGeneration)
+
+from modelscope.metainfo import Models
+from modelscope.models.base import TorchModel
+from modelscope.models.builder import MODELS
+from modelscope.outputs import OutputKeys
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+__all__ = ['CLIP_Interrogator']
+
+CAPTION_MODELS = {
+ 'blip-base': 'blip-image-captioning-base',
+ 'blip-large': 'blip-image-captioning-large',
+ 'blip2-2.7b': 'blip2-opt-2.7b',
+ 'blip2-flan-t5-xl': 'blip2-flan-t5-xl',
+ 'git-large-coco': 'git-large-coco',
+}
+
+
+@dataclass
+class Config:
+ # models can optionally be passed in directly
+ caption_model = None
+ caption_processor = None
+ clip_model = None
+ clip_preprocess = None
+
+ # blip settings
+ caption_max_length: int = 32
+ caption_model_name: Optional[
+ str] = 'blip-large' # use a key from CAPTION_MODELS or None
+ caption_offload: bool = False
+
+ # clip settings
+ clip_model_name: str = 'ViT-L-14/openai'
+ clip_model_path: Optional[str] = None
+ clip_offload: bool = False
+
+ # interrogator settings
+ cache_path: str = 'cache' # path to store cached text embeddings
+ download_cache: bool = False # when true, cached embeds are downloaded from huggingface
+ chunk_size: int = 2048 # batch size for CLIP, use smaller for lower VRAM
+ data_path: str = os.path.join(os.path.dirname(__file__), 'data')
+ device: str = ('cuda' if torch.cuda.is_available() else 'cpu')
+ flavor_intermediate_count: int = 2048
+ quiet: bool = False # when quiet progress bars are not shown
+
+ def apply_low_vram_defaults(self):
+ self.caption_model_name = 'blip-base'
+ self.caption_offload = True
+ self.clip_offload = True
+ self.chunk_size = 1024
+ self.flavor_intermediate_count = 1024
+
+
+# CLIP-Interrogator utilize CLIP and BLIP to generate rich caption for images.
+# CLIP is a zero-shot image classifier which can be used to generate image and text embeddings.
+# BLIP is a new VLP framework which transfers flexibly to both vision-language understanding and generation tasks.
+# BLIP effectively utilizes the noisy web data by bootstrapping the captions, where
+# a captioner generates synthetic captions and a filter removes the noisy ones.
+# Please infer to the paper CLIP: Learning Transferable Visual Models From Natural Language Supervision
+# https://arxiv.org/abs/2103.00020
+# BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation
+# https://arxiv.org/abs/2201.12086
+
+
+class Interrogator():
+
+ def __init__(self, config: Config):
+ self.config = config
+ self.device = config.device
+ self.dtype = torch.float16 if self.device == 'cuda' else torch.float32
+ self.caption_offloaded = True
+ self.clip_offloaded = True
+ self.load_caption_model()
+ self.load_clip_model()
+
+ def load_caption_model(self):
+ if self.config.caption_model is None and self.config.caption_model_name:
+ if not self.config.quiet:
+ print(
+ f'Loading caption model {self.config.caption_model_name}...'
+ )
+
+ model_path = CAPTION_MODELS[self.config.caption_model_name]
+ if self.config.caption_model_name.startswith('git-'):
+ caption_model = AutoModelForCausalLM.from_pretrained(
+ os.path.join(self.config.cache_path, model_path),
+ torch_dtype=torch.float32)
+ elif self.config.caption_model_name.startswith('blip2-'):
+ caption_model = Blip2ForConditionalGeneration.from_pretrained(
+ os.path.join(self.config.cache_path, model_path),
+ torch_dtype=self.dtype)
+ else:
+ caption_model = BlipForConditionalGeneration.from_pretrained(
+ os.path.join(self.config.cache_path, model_path),
+ torch_dtype=self.dtype)
+ self.caption_processor = AutoProcessor.from_pretrained(
+ os.path.join(self.config.cache_path, model_path))
+
+ caption_model.eval()
+ if not self.config.caption_offload:
+ caption_model = caption_model.to(self.config.device)
+ self.caption_model = caption_model
+ else:
+ self.caption_model = self.config.caption_model
+ self.caption_processor = self.config.caption_processor
+
+ def load_clip_model(self):
+ start_time = time.time()
+ config = self.config
+
+ clip_model_name, clip_model_pretrained_name = config.clip_model_name.split(
+ '/', 2)
+
+ if config.clip_model is None:
+ if not config.quiet:
+ print(f'Loading CLIP model {config.clip_model_name}...')
+
+ self.clip_model, _, self.clip_preprocess = open_clip.create_model_and_transforms(
+ clip_model_name,
+ pretrained=clip_model_pretrained_name,
+ precision='fp16' if config.device == 'cuda' else 'fp32',
+ device=config.device,
+ jit=False,
+ cache_dir=config.clip_model_path)
+ self.clip_model.eval()
+ else:
+ self.clip_model = config.clip_model
+ self.clip_preprocess = config.clip_preprocess
+ self.tokenize = open_clip.get_tokenizer(clip_model_name)
+
+ sites = [
+ 'Artstation', 'behance', 'cg society', 'cgsociety', 'deviantart',
+ 'dribbble', 'flickr', 'instagram', 'pexels', 'pinterest',
+ 'pixabay', 'pixiv', 'polycount', 'reddit', 'shutterstock',
+ 'tumblr', 'unsplash', 'zbrush central'
+ ]
+ trending_list = [site for site in sites]
+ trending_list.extend(['trending on ' + site for site in sites])
+ trending_list.extend(['featured on ' + site for site in sites])
+ trending_list.extend([site + ' contest winner' for site in sites])
+
+ raw_artists = load_list(config.data_path, 'artists.txt')
+ artists = [f'by {a}' for a in raw_artists]
+ artists.extend([f'inspired by {a}' for a in raw_artists])
+
+ self._prepare_clip()
+ self.artists = LabelTable(artists, 'artists', self)
+ self.flavors = LabelTable(
+ load_list(config.data_path, 'flavors.txt'), 'flavors', self)
+ self.mediums = LabelTable(
+ load_list(config.data_path, 'mediums.txt'), 'mediums', self)
+ self.movements = LabelTable(
+ load_list(config.data_path, 'movements.txt'), 'movements', self)
+ self.trendings = LabelTable(trending_list, 'trendings', self)
+ self.negative = LabelTable(
+ load_list(config.data_path, 'negative.txt'), 'negative', self)
+
+ end_time = time.time()
+ if not config.quiet:
+ print(
+ f'Loaded CLIP model and data in {end_time-start_time:.2f} seconds.'
+ )
+
+ def chain(self,
+ image_features: torch.Tensor,
+ phrases: List[str],
+ best_prompt: str = '',
+ best_sim: float = 0,
+ min_count: int = 8,
+ max_count: int = 32,
+ desc='Chaining',
+ reverse: bool = False) -> str:
+ self._prepare_clip()
+
+ phrases = set(phrases)
+ if not best_prompt:
+ best_prompt = self.rank_top(
+ image_features, [f for f in phrases], reverse=reverse)
+ best_sim = self.similarity(image_features, best_prompt)
+ phrases.remove(best_prompt)
+ curr_prompt, curr_sim = best_prompt, best_sim
+
+ def check(addition: str, idx: int) -> bool:
+ nonlocal best_prompt, best_sim, curr_prompt, curr_sim
+ prompt = curr_prompt + ', ' + addition
+ sim = self.similarity(image_features, prompt)
+ if reverse:
+ sim = -sim
+
+ if sim > best_sim:
+ best_prompt, best_sim = prompt, sim
+ if sim > curr_sim or idx < min_count:
+ curr_prompt, curr_sim = prompt, sim
+ return True
+ return False
+
+ for idx in tqdm(
+ range(max_count), desc=desc, disable=self.config.quiet):
+ best = self.rank_top(
+ image_features, [f'{curr_prompt}, {f}' for f in phrases],
+ reverse=reverse)
+ flave = best[len(curr_prompt) + 2:]
+ if not check(flave, idx):
+ break
+ if _prompt_at_max_len(curr_prompt, self.tokenize):
+ break
+ phrases.remove(flave)
+
+ return best_prompt
+
+ def generate_caption(self, pil_image: Image) -> str:
+ assert self.caption_model is not None, 'No caption model loaded.'
+ self._prepare_caption()
+ inputs = self.caption_processor(
+ images=pil_image, return_tensors='pt').to(self.device)
+ if not self.config.caption_model_name.startswith('git-'):
+ inputs = inputs.to(self.dtype)
+ tokens = self.caption_model.generate(
+ **inputs, max_new_tokens=self.config.caption_max_length)
+ return self.caption_processor.batch_decode(
+ tokens, skip_special_tokens=True)[0].strip()
+
+ def image_to_features(self, image: Image) -> torch.Tensor:
+ self._prepare_clip()
+ images = self.clip_preprocess(image).unsqueeze(0).to(self.device)
+ with torch.no_grad(), torch.cuda.amp.autocast():
+ image_features = self.clip_model.encode_image(images)
+ image_features /= image_features.norm(dim=-1, keepdim=True)
+ return image_features
+
+ def interrogate_classic(self,
+ image: Image,
+ max_flavors: int = 3,
+ caption: Optional[str] = None) -> str:
+ """Classic mode creates a prompt in a standard format first describing the image,
+ then listing the artist, trending, movement, and flavor text modifiers."""
+ caption = caption or self.generate_caption(image)
+ image_features = self.image_to_features(image)
+
+ medium = self.mediums.rank(image_features, 1)[0]
+ artist = self.artists.rank(image_features, 1)[0]
+ trending = self.trendings.rank(image_features, 1)[0]
+ movement = self.movements.rank(image_features, 1)[0]
+ flaves = ', '.join(self.flavors.rank(image_features, max_flavors))
+
+ if caption.startswith(medium):
+ prompt = f'{caption} {artist}, {trending}, {movement}, {flaves}'
+ else:
+ prompt = f'{caption}, {medium} {artist}, {trending}, {movement}, {flaves}'
+
+ return _truncate_to_fit(prompt, self.tokenize)
+
+ def interrogate_fast(self,
+ image: Image,
+ max_flavors: int = 32,
+ caption: Optional[str] = None) -> str:
+ """Fast mode simply adds the top ranked terms after a caption. It generally results in
+ better similarity between generated prompt and image than classic mode, but the prompts
+ are less readable."""
+ caption = caption or self.generate_caption(image)
+ image_features = self.image_to_features(image)
+ merged = _merge_tables([
+ self.artists, self.flavors, self.mediums, self.movements,
+ self.trendings
+ ], self)
+ tops = merged.rank(image_features, max_flavors)
+ return _truncate_to_fit(caption + ', ' + ', '.join(tops),
+ self.tokenize)
+
+ def interrogate_negative(self, image: Image, max_flavors: int = 32) -> str:
+ """Negative mode chains together the most dissimilar terms to the image. It can be used
+ to help build a negative prompt to pair with the regular positive prompt and often
+ improve the results of generated images particularly with Stable Diffusion 2."""
+ image_features = self.image_to_features(image)
+ flaves = self.flavors.rank(
+ image_features,
+ self.config.flavor_intermediate_count,
+ reverse=True)
+ flaves = flaves + self.negative.labels
+ return self.chain(
+ image_features,
+ flaves,
+ max_count=max_flavors,
+ reverse=True,
+ desc='Negative chain')
+
+ def interrogate(self,
+ image: Image,
+ min_flavors: int = 8,
+ max_flavors: int = 32,
+ caption: Optional[str] = None) -> str:
+ caption = caption or self.generate_caption(image)
+ image_features = self.image_to_features(image)
+
+ merged = _merge_tables([
+ self.artists, self.flavors, self.mediums, self.movements,
+ self.trendings
+ ], self)
+ flaves = merged.rank(image_features,
+ self.config.flavor_intermediate_count)
+ best_prompt, best_sim = caption, self.similarity(
+ image_features, caption)
+ best_prompt = self.chain(
+ image_features,
+ flaves,
+ best_prompt,
+ best_sim,
+ min_count=min_flavors,
+ max_count=max_flavors,
+ desc='Flavor chain')
+
+ fast_prompt = self.interrogate_fast(
+ image, max_flavors, caption=caption)
+ classic_prompt = self.interrogate_classic(
+ image, max_flavors, caption=caption)
+ candidates = [caption, classic_prompt, fast_prompt, best_prompt]
+ return candidates[np.argmax(
+ self.similarities(image_features, candidates))]
+
+ def rank_top(self,
+ image_features: torch.Tensor,
+ text_array: List[str],
+ reverse: bool = False) -> str:
+ self._prepare_clip()
+ text_tokens = self.tokenize([text
+ for text in text_array]).to(self.device)
+ with torch.no_grad(), torch.cuda.amp.autocast():
+ text_features = self.clip_model.encode_text(text_tokens)
+ text_features /= text_features.norm(dim=-1, keepdim=True)
+ similarity = text_features @ image_features.T
+ if reverse:
+ similarity = -similarity
+ return text_array[similarity.argmax().item()]
+
+ def similarity(self, image_features: torch.Tensor, text: str) -> float:
+ self._prepare_clip()
+ text_tokens = self.tokenize([text]).to(self.device)
+ with torch.no_grad(), torch.cuda.amp.autocast():
+ text_features = self.clip_model.encode_text(text_tokens)
+ text_features /= text_features.norm(dim=-1, keepdim=True)
+ similarity = text_features @ image_features.T
+ return similarity[0][0].item()
+
+ def similarities(self, image_features: torch.Tensor,
+ text_array: List[str]) -> List[float]:
+ self._prepare_clip()
+ text_tokens = self.tokenize([text
+ for text in text_array]).to(self.device)
+ with torch.no_grad(), torch.cuda.amp.autocast():
+ text_features = self.clip_model.encode_text(text_tokens)
+ text_features /= text_features.norm(dim=-1, keepdim=True)
+ similarity = text_features @ image_features.T
+ return similarity.T[0].tolist()
+
+ def _prepare_caption(self):
+ if self.config.clip_offload and not self.clip_offloaded:
+ self.clip_model = self.clip_model.to('cpu')
+ self.clip_offloaded = True
+ if self.caption_offloaded:
+ self.caption_model = self.caption_model.to(self.device)
+ self.caption_offloaded = False
+
+ def _prepare_clip(self):
+ if self.config.caption_offload and not self.caption_offloaded:
+ self.caption_model = self.caption_model.to('cpu')
+ self.caption_offloaded = True
+ if self.clip_offloaded:
+ self.clip_model = self.clip_model.to(self.device)
+ self.clip_offloaded = False
+
+
+class LabelTable():
+
+ def __init__(self, labels: List[str], desc: str, ci: Interrogator):
+ clip_model, config = ci.clip_model, ci.config
+ self.chunk_size = config.chunk_size
+ self.config = config
+ self.device = config.device
+ self.embeds = []
+ self.labels = labels
+ self.tokenize = ci.tokenize
+
+ hash = hashlib.sha256(','.join(labels).encode()).hexdigest()
+ sanitized_name = self.config.clip_model_name.replace('/', '_').replace(
+ '@', '_')
+ self._load_cached(desc, hash, sanitized_name)
+
+ if len(self.labels) != len(self.embeds):
+ self.embeds = []
+ chunks = np.array_split(
+ self.labels, max(1,
+ len(self.labels) / config.chunk_size))
+ for chunk in tqdm(
+ chunks,
+ desc=f'Preprocessing {desc}' if desc else None,
+ disable=self.config.quiet):
+ text_tokens = self.tokenize(chunk).to(self.device)
+ with torch.no_grad(), torch.cuda.amp.autocast():
+ text_features = clip_model.encode_text(text_tokens)
+ text_features /= text_features.norm(dim=-1, keepdim=True)
+ text_features = text_features.half().cpu().numpy()
+ for i in range(text_features.shape[0]):
+ self.embeds.append(text_features[i])
+
+ if desc and self.config.cache_path:
+ os.makedirs(self.config.cache_path, exist_ok=True)
+ cache_filepath = os.path.join(
+ self.config.cache_path,
+ f'{sanitized_name}_{desc}.safetensors')
+ tensors = {
+ 'embeds': np.stack(self.embeds),
+ 'hash': np.array([ord(c) for c in hash], dtype=np.int8)
+ }
+ save_file(tensors, cache_filepath)
+
+ if self.device == 'cpu' or self.device == torch.device('cpu'):
+ self.embeds = [e.astype(np.float32) for e in self.embeds]
+
+ def _load_cached(self, desc: str, hash: str, sanitized_name: str) -> bool:
+ if self.config.cache_path is None or desc is None:
+ return False
+
+ cached_safetensors = os.path.join(
+ self.config.cache_path, f'{sanitized_name}_{desc}.safetensors')
+
+ if os.path.exists(cached_safetensors):
+ try:
+ tensors = load_file(cached_safetensors)
+ except Exception as e:
+ print(f'Failed to load {cached_safetensors}')
+ print(e)
+ return False
+ if 'hash' in tensors and 'embeds' in tensors:
+ if np.array_equal(
+ tensors['hash'],
+ np.array([ord(c) for c in hash], dtype=np.int8)):
+ self.embeds = tensors['embeds']
+ if len(self.embeds.shape) == 2:
+ self.embeds = [
+ self.embeds[i] for i in range(self.embeds.shape[0])
+ ]
+ return True
+
+ return False
+
+ def _rank(self,
+ image_features: torch.Tensor,
+ text_embeds: torch.Tensor,
+ top_count: int = 1,
+ reverse: bool = False) -> str:
+ top_count = min(top_count, len(text_embeds))
+ text_embeds = torch.stack([torch.from_numpy(t)
+ for t in text_embeds]).to(self.device)
+ with torch.cuda.amp.autocast():
+ similarity = image_features @ text_embeds.T
+ if reverse:
+ similarity = -similarity
+ _, top_labels = similarity.float().cpu().topk(top_count, dim=-1)
+ return [top_labels[0][i].numpy() for i in range(top_count)]
+
+ def rank(self,
+ image_features: torch.Tensor,
+ top_count: int = 1,
+ reverse: bool = False) -> List[str]:
+ if len(self.labels) <= self.chunk_size:
+ tops = self._rank(
+ image_features,
+ self.embeds,
+ top_count=top_count,
+ reverse=reverse)
+ return [self.labels[i] for i in tops]
+
+ num_chunks = int(math.ceil(len(self.labels) / self.chunk_size))
+ keep_per_chunk = int(self.chunk_size / num_chunks)
+
+ top_labels, top_embeds = [], []
+ for chunk_idx in tqdm(range(num_chunks), disable=self.config.quiet):
+ start = chunk_idx * self.chunk_size
+ stop = min(start + self.chunk_size, len(self.embeds))
+ tops = self._rank(
+ image_features,
+ self.embeds[start:stop],
+ top_count=keep_per_chunk,
+ reverse=reverse)
+ top_labels.extend([self.labels[start + i] for i in tops])
+ top_embeds.extend([self.embeds[start + i] for i in tops])
+
+ tops = self._rank(image_features, top_embeds, top_count=top_count)
+ return [top_labels[i] for i in tops]
+
+
+def _download_file(url: str,
+ filepath: str,
+ chunk_size: int = 4 * 1024 * 1024,
+ quiet: bool = False):
+ r = requests.get(url, stream=True)
+ if r.status_code != 200:
+ return
+
+ file_size = int(r.headers.get('Content-Length', 0))
+ filename = url.split('/')[-1]
+ progress = tqdm(
+ total=file_size,
+ unit='B',
+ unit_scale=True,
+ desc=filename,
+ disable=quiet)
+ with open(filepath, 'wb') as f:
+ for chunk in r.iter_content(chunk_size=chunk_size):
+ if chunk:
+ f.write(chunk)
+ progress.update(len(chunk))
+ progress.close()
+
+
+def _merge_tables(tables: List[LabelTable], ci: Interrogator) -> LabelTable:
+ m = LabelTable([], None, ci)
+ for table in tables:
+ m.labels.extend(table.labels)
+ m.embeds.extend(table.embeds)
+ return m
+
+
+def _prompt_at_max_len(text: str, tokenize) -> bool:
+ tokens = tokenize([text])
+ return tokens[0][-1] != 0
+
+
+def _truncate_to_fit(text: str, tokenize) -> str:
+ parts = text.split(', ')
+ new_text = parts[0]
+ for part in parts[1:]:
+ if _prompt_at_max_len(new_text + part, tokenize):
+ break
+ new_text += ', ' + part
+ return new_text
+
+
+def list_caption_models() -> List[str]:
+ return list(CAPTION_MODELS.keys())
+
+
+def list_clip_models() -> List[str]:
+ return ['/'.join(x) for x in open_clip.list_pretrained()]
+
+
+def load_list(data_path: str, filename: Optional[str] = None) -> List[str]:
+ """Load a list of strings from a file."""
+ if filename is not None:
+ data_path = os.path.join(data_path, filename)
+ with open(data_path, 'r', encoding='utf-8', errors='replace') as f:
+ items = [line.strip() for line in f.readlines()]
+ return items
+
+
+@MODELS.register_module(
+ Tasks.image_captioning, module_name=Models.clip_interrogator)
+class CLIP_Interrogator(TorchModel):
+
+ def __init__(self, model_dir, device='cuda', device_id=0, *args, **kwargs):
+ super().__init__(
+ model_dir=model_dir, device_id=device_id, *args, **kwargs)
+ self.device = device
+ self.dtype = torch.float16 if self.device == 'cuda' else torch.float32
+ cf = Config(clip_model_name='ViT-L-14/openai')
+ cf.data_path = os.path.join(model_dir, 'data')
+ cf.clip_model_path = model_dir
+ cf.cache_path = model_dir
+ self.ci = Interrogator(cf)
+
+ def forward(self, inputs):
+ image = transforms.ToPILImage()(inputs)
+ return {'caption': self.ci.interrogate(image)}
diff --git a/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py b/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py
index 813f750e..743c049a 100644
--- a/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py
+++ b/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py
@@ -128,13 +128,13 @@ class VideoCLIPForMultiModalEmbedding(TorchModel):
local_transform,
s=None,
e=None):
- video_mask = np.zeros(self.max_frames, dtype=np.long)
+ video_mask = np.zeros(self.max_frames, dtype=int)
max_video_length = 0
# T x 3 x H x W
video = np.zeros((self.max_frames, 3, rawVideoExtractor.size,
rawVideoExtractor.size),
- dtype=np.float)
+ dtype=float)
if s is None:
start_time, end_time = None, None
diff --git a/modelscope/models/multi_modal/mplug_owl/__init__.py b/modelscope/models/multi_modal/mplug_owl/__init__.py
new file mode 100644
index 00000000..76ccfb5a
--- /dev/null
+++ b/modelscope/models/multi_modal/mplug_owl/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2021-2023 The Alibaba DAMO mPLUG Authors.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .configuration_mplug_owl import (MplugOwlConfig, MplugOwlVisionConfig,
+ MplugOwlVisualAbstractorConfig)
+from .modeling_mplug_owl import MplugOwlForConditionalGeneration
diff --git a/modelscope/models/multi_modal/mplug_owl/configuration_mplug_owl.py b/modelscope/models/multi_modal/mplug_owl/configuration_mplug_owl.py
new file mode 100644
index 00000000..6e32238a
--- /dev/null
+++ b/modelscope/models/multi_modal/mplug_owl/configuration_mplug_owl.py
@@ -0,0 +1,257 @@
+# Copyright 2021-2023 The Alibaba DAMO mPLUG Team Authors.
+# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" MPLUG OWL model configuration """
+import copy
+import os
+from typing import Union
+
+from transformers import PretrainedConfig
+from transformers.models.auto import CONFIG_MAPPING
+from transformers.utils import logging
+
+from modelscope.utils.constant import Tasks
+
+logger = logging.get_logger()
+
+
+class MplugOwlVisionConfig(PretrainedConfig):
+ r"""
+ Args:
+ hidden_size (`int`, *optional*, defaults to 768):
+ Dimensionality of the encoder layers and the pooler layer.
+ intermediate_size (`int`, *optional*, defaults to 3072):
+ Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
+ num_hidden_layers (`int`, *optional*, defaults to 12):
+ Number of hidden layers in the Transformer encoder.
+ num_attention_heads (`int`, *optional*, defaults to 12):
+ Number of attention heads for each attention layer in the Transformer encoder.
+ image_size (`int`, *optional*, defaults to 224):
+ The size (resolution) of each image.
+ patch_size (`int`, *optional*, defaults to 32):
+ The size (resolution) of each patch.
+ hidden_act (`str` or `function`, *optional*, defaults to `"quick_gelu"`):
+ The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
+ `"relu"`, `"selu"` and `"gelu_new"` ``"quick_gelu"` are supported.
+ layer_norm_eps (`float`, *optional*, defaults to 1e-5):
+ The epsilon used by the layer normalization layers.
+ attention_dropout (`float`, *optional*, defaults to 0.0):
+ The dropout ratio for the attention probabilities.
+ initializer_range (`float`, *optional*, defaults to 0.02):
+ The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+ initializer_factor (`float`, *optional*, defaults to 1):
+ A factor for initializing all weight matrices (should be kept to 1, used internally for initialization
+ testing).
+ ```"""
+
+ model_type = 'mplug_owl_vision_model'
+
+ def __init__(
+ self,
+ hidden_size=1024,
+ intermediate_size=4096,
+ projection_dim=768,
+ num_hidden_layers=24,
+ num_attention_heads=16,
+ num_channels=3,
+ image_size=224,
+ patch_size=14,
+ hidden_act='quick_gelu',
+ layer_norm_eps=1e-6,
+ attention_dropout=0.0,
+ initializer_range=0.02,
+ initializer_factor=1.0,
+ use_flash_attn=False,
+ **kwargs,
+ ):
+ super().__init__(**kwargs)
+
+ self.hidden_size = hidden_size
+ self.intermediate_size = intermediate_size
+ self.projection_dim = projection_dim
+ self.num_hidden_layers = num_hidden_layers
+ self.num_attention_heads = num_attention_heads
+ self.num_channels = num_channels
+ self.patch_size = patch_size
+ self.image_size = image_size
+ self.initializer_range = initializer_range
+ self.initializer_factor = initializer_factor
+ self.attention_dropout = attention_dropout
+ self.layer_norm_eps = layer_norm_eps
+ self.hidden_act = hidden_act
+ self.use_flash_attn = use_flash_attn
+
+ @classmethod
+ def from_pretrained(cls, pretrained_model_name_or_path: Union[str,
+ os.PathLike],
+ **kwargs) -> 'PretrainedConfig':
+ config_dict, kwargs = cls.get_config_dict(
+ pretrained_model_name_or_path, **kwargs)
+
+ # get the vision config dict if we are loading from MplugOwlConfig
+ if config_dict.get('model_type') == 'mplug_owl':
+ config_dict = config_dict['vision_config']
+
+ if 'model_type' in config_dict and hasattr(
+ cls,
+ 'model_type') and config_dict['model_type'] != cls.model_type:
+ logger.warning(
+ f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
+ f'{cls.model_type}. This is not supported for all configurations of models and can yield errors.'
+ )
+
+ return cls.from_dict(config_dict, **kwargs)
+
+
+class MplugOwlVisualAbstractorConfig(PretrainedConfig):
+
+ model_type = 'MPlugOwlVisualAbstractor'
+
+ def __init__(
+ self,
+ hidden_size=1024,
+ num_hidden_layers=6,
+ num_attention_heads=16,
+ intermediate_size=4096,
+ attention_probs_dropout_prob=0.1,
+ initializer_range=0.02,
+ layer_norm_eps=1e-6,
+ encoder_hidden_size=1024,
+ **kwargs,
+ ):
+ super().__init__(**kwargs)
+
+ self.hidden_size = hidden_size
+ self.num_hidden_layers = num_hidden_layers
+ self.num_attention_heads = num_attention_heads
+ self.intermediate_size = intermediate_size
+ self.attention_probs_dropout_prob = attention_probs_dropout_prob
+ self.initializer_range = initializer_range
+ self.layer_norm_eps = layer_norm_eps
+ self.encoder_hidden_size = encoder_hidden_size
+
+ @classmethod
+ def from_pretrained(cls, pretrained_model_name_or_path: Union[str,
+ os.PathLike],
+ **kwargs) -> 'PretrainedConfig':
+ config_dict, kwargs = cls.get_config_dict(
+ pretrained_model_name_or_path, **kwargs)
+
+ # get the qformer config dict if we are loading from MplugOwlConfig
+ if config_dict.get('model_type') == 'mplug_owl':
+ config_dict = config_dict['abstractor_config']
+
+ if 'model_type' in config_dict and hasattr(
+ cls,
+ 'model_type') and config_dict['model_type'] != cls.model_type:
+ logger.warning(
+ f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
+ f'{cls.model_type}. This is not supported for all configurations of models and can yield errors.'
+ )
+
+ return cls.from_dict(config_dict, **kwargs)
+
+
+class MplugOwlConfig(PretrainedConfig):
+ r"""
+ Args:
+ vision_config (`dict`, *optional*):
+ Dictionary of configuration options used to initialize [`MplugOwlVisionConfig`].
+ qformer_config (`dict`, *optional*):
+ Dictionary of configuration options used to initialize [`MplugOwlVisualAbstractorConfig`].
+ text_config (`dict`, *optional*):
+ Dictionary of configuration options used to initialize any [`PretrainedConfig`].
+ num_query_tokens (`int`, *optional*, defaults to 32):
+ The number of query tokens passed through the Transformer.
+
+ kwargs (*optional*):
+ Dictionary of keyword arguments.
+ """
+
+ model_type = 'mplug_owl'
+ is_composition = True
+
+ def __init__(self,
+ task=Tasks.multimodal_dialogue,
+ vision_config=None,
+ visual_abstractor_config=None,
+ text_config=None,
+ num_query_tokens=64,
+ **kwargs):
+
+ super().__init__(**kwargs)
+ self.task = task
+ if vision_config is None:
+ vision_config = MplugOwlVisionConfig().to_dict()
+ logger.info('vision_config is None.')
+
+ if visual_abstractor_config is None:
+ visual_abstractor_config = {}
+ logger.info('abstractor_config is None. ')
+
+ if text_config is None:
+ # we use LLAMA 7b by default
+ from transformers.models.llama.configuration_llama import \
+ LlamaConfig
+ text_config = LlamaConfig(pad_token_id=2).to_dict()
+ logger.info('text_config is None.')
+
+ self.vision_config = MplugOwlVisionConfig(**vision_config)
+ self.visual_abstractor_config = MplugOwlVisualAbstractorConfig(
+ **visual_abstractor_config)
+ text_model_type = text_config[
+ 'model_type'] if 'model_type' in text_config else 'llama'
+ self.text_config = CONFIG_MAPPING[text_model_type](**text_config)
+
+ self.tie_word_embeddings = self.text_config.tie_word_embeddings
+
+ self.num_query_tokens = num_query_tokens
+ self.initializer_factor = 1.0
+ self.initializer_range = 0.02
+
+ @classmethod
+ def from_vision_abstractor_text_configs(
+ cls,
+ vision_config: MplugOwlVisionConfig,
+ visual_abstractor_config: MplugOwlVisualAbstractorConfig,
+ text_config: PretrainedConfig,
+ **kwargs,
+ ):
+ r"""
+ Returns:
+ [`MplugOwlConfig`]: An instance of a configuration object
+ """
+
+ return cls(
+ vision_config=vision_config.to_dict(),
+ visual_abstractor_config=visual_abstractor_config.to_dict(),
+ text_config=text_config.to_dict(),
+ **kwargs,
+ )
+
+ def to_dict(self):
+ """
+ Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`].
+
+ Returns:
+ `Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
+ """
+ output = copy.deepcopy(self.__dict__)
+ output['vision_config'] = self.vision_config.to_dict()
+ tmp = self.visual_abstractor_config.to_dict()
+ output['visual_abstractor_config'] = tmp
+ output['text_config'] = self.text_config.to_dict()
+ output['model_type'] = self.__class__.model_type
+ return output
diff --git a/modelscope/models/multi_modal/mplug_owl/modeling_mplug_owl.py b/modelscope/models/multi_modal/mplug_owl/modeling_mplug_owl.py
new file mode 100644
index 00000000..21a29185
--- /dev/null
+++ b/modelscope/models/multi_modal/mplug_owl/modeling_mplug_owl.py
@@ -0,0 +1,1551 @@
+# Copyright 2021-2023 The Alibaba DAMO mPLUG Team Authors.
+# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch MPLUG OWL model. """
+
+import copy
+import logging
+import math
+import os
+import os.path as osp
+import random
+from dataclasses import dataclass
+from io import BytesIO
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint
+import transformers
+from torch.nn import CrossEntropyLoss
+from transformers.activations import ACT2FN
+from transformers.modeling_outputs import (
+ BaseModelOutput, BaseModelOutputWithPastAndCrossAttentions,
+ BaseModelOutputWithPooling, BaseModelOutputWithPoolingAndCrossAttentions,
+ CausalLMOutputWithCrossAttentions)
+from transformers.modeling_utils import (PreTrainedModel,
+ apply_chunking_to_forward,
+ find_pruneable_heads_and_indices,
+ prune_linear_layer)
+from transformers.models.auto import AutoModelForCausalLM
+from transformers.utils import ModelOutput
+
+from modelscope.metainfo import Models
+from modelscope.models import TorchModel
+from modelscope.models.base import Tensor
+from modelscope.models.builder import MODELS
+from modelscope.models.multi_modal.mplug_owl.configuration_mplug_owl import (
+ MplugOwlConfig, MplugOwlVisionConfig, MplugOwlVisualAbstractorConfig)
+from modelscope.outputs import OutputKeys
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile, Tasks
+
+__all__ = ['MplugOwlForConditionalGeneration']
+
+
+@dataclass
+class MplugOwlForConditionalGenerationModelOutput(ModelOutput):
+ """
+ Class defining the outputs of [`MPlugOwlForConditionalGeneration`].
+
+ Args:
+ loss (`torch.FloatTensor`, *optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
+ Language modeling loss from the language model.
+ logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
+ Prediction scores of the language modeling head of the language model.
+ vision_outputs (`BaseModelOutputWithPooling`):
+ Outputs of the vision encoder.
+
+ language_model_outputs (`CausalLMOutputWithPast`):
+ Outputs of the language model.
+ """
+
+ loss: Optional[Tuple[torch.FloatTensor]] = None
+ logits: Optional[Tuple[torch.FloatTensor]] = None
+ vision_outputs: Optional[torch.FloatTensor] = None
+ language_model_outputs: Optional[Tuple[torch.FloatTensor]] = None
+
+ def to_tuple(self) -> Tuple[Any]:
+ return tuple(
+ self[k] if k not in ['vision_outputs', 'language_model_outputs'
+ ] else getattr(self, k).to_tuple()
+ for k in self.keys())
+
+
+def get_ltor_masks_and_position_ids_from_embeddings(data):
+ """Build masks and position id for left to right model."""
+
+ # Extract batch size and sequence length.
+ micro_batch_size, seq_length = data.size()[:2]
+
+ # Attention mask (lower triangular).
+ att_mask_batch = 1
+ attention_mask = torch.tril(
+ torch.ones((att_mask_batch, seq_length, seq_length),
+ device=data.device)).view(att_mask_batch, 1, seq_length,
+ seq_length)
+
+ # Loss mask.
+ loss_mask = torch.ones(
+ data.size()[:2], dtype=torch.float, device=data.device)
+
+ # Position ids.
+ position_ids = torch.arange(
+ seq_length, dtype=torch.long, device=data.device)
+ position_ids = position_ids.unsqueeze(0).expand_as(data[..., 0])
+
+ # Convert attention mask to binary:
+ attention_mask = (attention_mask < 0.5)
+
+ return attention_mask, loss_mask, position_ids
+
+
+class MplugOwlVisionEmbeddings(nn.Module):
+
+ def __init__(self, config: MplugOwlVisionConfig):
+ super().__init__()
+ self.config = config
+ self.hidden_size = config.hidden_size
+ self.image_size = config.image_size
+ self.patch_size = config.patch_size
+
+ self.cls_token = nn.Parameter(torch.randn(1, 1, self.hidden_size))
+
+ self.patch_embed = nn.Conv2d(
+ in_channels=3,
+ out_channels=self.hidden_size,
+ kernel_size=self.patch_size,
+ stride=self.patch_size,
+ bias=False)
+
+ self.num_patches = (self.image_size // self.patch_size)**2
+
+ self.position_embedding = nn.Parameter(
+ torch.randn(1, self.num_patches + 1, self.hidden_size))
+
+ self.pre_layernorm = LayerNormFp32(
+ self.hidden_size, eps=config.layer_norm_eps)
+
+ def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
+ batch_size = pixel_values.size(0)
+ image_embeds = self.patch_embed(pixel_values)
+ image_embeds = image_embeds.flatten(2).transpose(1, 2)
+
+ class_embeds = self.cls_token.expand(batch_size, 1,
+ -1).to(image_embeds.dtype)
+ embeddings = torch.cat([class_embeds, image_embeds], dim=1)
+ embeddings = embeddings + \
+ self.position_embedding[:, : embeddings.size(1)].to(
+ image_embeds.dtype)
+ embeddings = self.pre_layernorm(embeddings)
+ return embeddings
+
+
+class LayerNormFp32(nn.LayerNorm):
+ """Subclass torch's LayerNorm to handle fp16 (by casting to float32 and back)."""
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ def forward(self, x: torch.Tensor):
+ output = torch.nn.functional.layer_norm(
+ x.float(),
+ self.normalized_shape,
+ self.weight.float() if self.weight is not None else None,
+ self.bias.float() if self.bias is not None else None,
+ self.eps,
+ )
+ return output.type_as(x)
+
+
+class MplugOwlVisionAttention(nn.Module):
+ """Multi-headed attention from 'Attention Is All You Need' paper"""
+
+ def __init__(self, config):
+ super().__init__()
+ self.config = config
+ self.hidden_size = config.hidden_size
+ self.num_heads = config.num_attention_heads
+ self.head_dim = self.hidden_size // self.num_heads
+ if self.head_dim * self.num_heads != self.hidden_size:
+ raise ValueError(
+ f'hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size} and `num_heads`:'
+ f' {self.num_heads}).')
+ self.scale = self.head_dim**-0.5
+ self.dropout = nn.Dropout(config.attention_dropout)
+
+ self.query_key_value = nn.Linear(self.hidden_size,
+ 3 * self.hidden_size)
+ self.dense = nn.Linear(self.hidden_size, self.hidden_size)
+
+ def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
+ return tensor.view(bsz, seq_len, self.num_heads,
+ self.head_dim).transpose(1, 2).contiguous()
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ head_mask: Optional[torch.Tensor] = None,
+ output_attentions: Optional[bool] = False,
+ ) -> Tuple[torch.Tensor, Optional[torch.Tensor],
+ Optional[Tuple[torch.Tensor]]]:
+ """Input shape: Batch x Time x Channel"""
+
+ bsz, seq_len, embed_dim = hidden_states.size()
+
+ mixed_qkv = self.query_key_value(hidden_states)
+
+ mixed_qkv = mixed_qkv.reshape(bsz, seq_len, self.num_heads, 3,
+ embed_dim // self.num_heads).permute(
+ 3, 0, 2, 1, 4) # [3, b, np, sq, hn]
+ query_states, key_states, value_states = (
+ mixed_qkv[0],
+ mixed_qkv[1],
+ mixed_qkv[2],
+ )
+
+ # Take the dot product between "query" and "key" to get the raw attention scores.
+ attention_scores = torch.matmul(query_states,
+ key_states.transpose(-1, -2))
+
+ attention_scores = attention_scores * self.scale
+
+ # Normalize the attention scores to probabilities.
+ attention_probs = torch.softmax(attention_scores, dim=-1)
+
+ # This is actually dropping out entire tokens to attend to, which might
+ # seem a bit unusual, but is taken from the original Transformer paper.
+ attention_probs = self.dropout(attention_probs)
+
+ # Mask heads if we want to
+ if head_mask is not None:
+ attention_probs = attention_probs * head_mask
+
+ context_layer = torch.matmul(attention_probs,
+ value_states).permute(0, 2, 1, 3)
+
+ new_context_layer_shape = context_layer.size()[:-2] + (
+ self.hidden_size, )
+ context_layer = context_layer.reshape(new_context_layer_shape)
+
+ output = self.dense(context_layer)
+
+ outputs = (output, attention_probs) if output_attentions else (output,
+ None)
+
+ return outputs
+
+
+class QuickGELU(nn.Module):
+
+ def forward(self, x: torch.Tensor):
+ return x * torch.sigmoid(1.702 * x)
+
+
+class MplugOwlMLP(nn.Module):
+
+ def __init__(self, config):
+ super().__init__()
+ self.config = config
+ self.activation_fn = QuickGELU()
+ self.fc1 = nn.Linear(config.hidden_size, config.intermediate_size)
+ self.fc2 = nn.Linear(config.intermediate_size, config.hidden_size)
+
+ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+ hidden_states = self.fc1(hidden_states)
+ hidden_states = self.activation_fn(hidden_states)
+ hidden_states = self.fc2(hidden_states)
+ return hidden_states
+
+
+class MplugOwlVisionEncoderLayer(nn.Module):
+
+ def __init__(self, config: MplugOwlVisionConfig):
+ super().__init__()
+ self.hidden_size = config.hidden_size
+ self.self_attn = MplugOwlVisionAttention(config)
+ self.input_layernorm = LayerNormFp32(
+ self.hidden_size, eps=config.layer_norm_eps)
+ self.mlp = MplugOwlMLP(config)
+ self.post_attention_layernorm = LayerNormFp32(
+ self.hidden_size, eps=config.layer_norm_eps)
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ attention_mask: torch.Tensor,
+ output_attentions: Optional[bool] = False,
+ ) -> Tuple[torch.FloatTensor]:
+ """
+ Args:
+ hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
+ attention_mask (`torch.FloatTensor`): attention mask of size
+ `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
+ `(config.encoder_attention_heads,)`.
+ output_attentions (`bool`, *optional*):
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+ returned tensors for more detail.
+ """
+ residual = hidden_states
+
+ hidden_states = self.input_layernorm(hidden_states)
+ hidden_states, attn_weights = self.self_attn(
+ hidden_states=hidden_states,
+ head_mask=attention_mask,
+ output_attentions=output_attentions,
+ )
+ hidden_states = hidden_states + residual
+ residual = hidden_states
+ hidden_states = self.post_attention_layernorm(hidden_states)
+ hidden_states = self.mlp(hidden_states)
+
+ hidden_states = hidden_states + residual
+
+ outputs = (hidden_states, )
+
+ if output_attentions:
+ outputs += (attn_weights, )
+
+ return outputs
+
+
+class MplugOwlPreTrainedModel(PreTrainedModel):
+ """
+ An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
+ models.
+ """
+
+ config_class = MplugOwlConfig
+ base_model_prefix = 'mplug_owl'
+ supports_gradient_checkpointing = True
+ _keys_to_ignore_on_load_missing = [
+ r'position_ids',
+ r'language_model.encoder.embed_tokens.weight',
+ r'language_model.decoder.embed_tokens.weight',
+ r'language_model.lm_head.weight',
+ ]
+ _no_split_modules = ['MplugOwlAttention']
+ _keep_in_fp32_modules = ['wo']
+
+ def _init_weights(self, module):
+ """Initialize the weights"""
+ factor = self.config.initializer_range
+ if isinstance(module, nn.Conv2d) or isinstance(
+ module, nn.Embedding) or isinstance(module, nn.Linear):
+ module.weight.data.normal_(mean=0.0, std=factor)
+ if hasattr(module, 'bias') and module.bias is not None:
+ module.bias.data.zero_()
+
+ if isinstance(module, MplugOwlVisionEmbeddings):
+ if hasattr(self.config, 'vision_config'):
+ factor = self.config.vision_config.initializer_range
+ nn.init.trunc_normal_(
+ module.position_embedding, mean=0.0, std=factor)
+ nn.init.trunc_normal_(module.cls_token, mean=0.0, std=factor)
+
+ elif isinstance(module, nn.LayerNorm):
+ module.bias.data.zero_()
+ module.weight.data.fill_(1.0)
+ elif isinstance(module, nn.Linear) and module.bias is not None:
+ module.bias.data.zero_()
+ elif isinstance(module, nn.Parameter):
+ nn.init.trunc_normal_(module.data, mean=0.0, std=factor)
+
+ def _set_gradient_checkpointing(self, module, value=False):
+ if isinstance(module, MplugOwlVisionEncoder):
+ module.gradient_checkpointing = value
+
+
+MPLUG_OWL_START_DOCSTRING = r"""
+ This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
+ library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
+ etc.)
+
+ This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
+ Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
+ and behavior.
+
+ Parameters:
+ config ([`MplugOwlConfig`]): Model configuration class with all the parameters of the model.
+ Initializing with a config file does not load the weights associated with the model, only the
+ configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
+"""
+
+MPLUG_OWL_VISION_INPUTS_DOCSTRING = r"""
+ Args:
+ pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
+ Pixel values. Pixel values can be obtained using [`MplugOwlPreprocessor`].
+ output_attentions (`bool`, *optional*):
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
+ tensors for more detail.
+ output_hidden_states (`bool`, *optional*):
+ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+ more detail.
+ return_dict (`bool`, *optional*):
+ Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+"""
+
+MPLUG_OWL_TEXT_INPUTS_DOCSTRING = r"""
+ Args:
+ input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
+ Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
+ it. Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+ [`PreTrainedTokenizer.__call__`] for details. [What are input IDs?](../glossary#input-ids)
+ attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+ Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+ - 1 for tokens that are **not masked**,
+ - 0 for tokens that are **masked**.
+ [What are attention masks?](../glossary#attention-mask)
+ decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
+ Indices of decoder input sequence tokens in the vocabulary.
+
+ Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+ [`PreTrainedTokenizer.__call__`] for details.
+
+ [What are decoder input IDs?](../glossary#decoder-input-ids)
+
+ T5 uses the `pad_token_id` as the starting token for `decoder_input_ids` generation. If `past_key_values`
+ is used, optionally only the last `decoder_input_ids` have to be input (see `past_key_values`).
+
+ To know more on how to prepare `decoder_input_ids` for pretraining take a look at [T5
+ Training](./t5#training).
+ decoder_attention_mask (`torch.BoolTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
+ Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also
+ be used by default.
+ output_attentions (`bool`, *optional*):
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
+ tensors for more detail.
+ output_hidden_states (`bool`, *optional*):
+ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+ more detail.
+ return_dict (`bool`, *optional*):
+ Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+"""
+
+MPLUG_OWL_INPUTS_DOCSTRING = r"""
+ Args:
+ pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
+ Pixel values. Pixel values can be obtained using [`MplugOwlPreprocessor`].
+ input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+ Indices of input sequence tokens in the vocabulary of the language model. Input tokens can optionally be
+ provided to serve as text prompt, which the language model can continue.
+
+ Indices can be obtained using [`MplugOwlPreprocessor`]. See [`MplugOwlPreprocessor.__call__`] for details.
+
+ [What are input IDs?](../glossary#input-ids)
+ attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+ Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+ - 1 for tokens that are **not masked**,
+ - 0 for tokens that are **masked**.
+
+ [What are attention masks?](../glossary#attention-mask)
+
+ decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
+ Indices of decoder input sequence tokens in the vocabulary of the language model. Only relevant in case an
+ encoder-decoder language model (like T5) is used.
+
+ Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+ [`PreTrainedTokenizer.__call__`] for details. [What are decoder input IDs?](../glossary#decoder-input-ids)
+
+ decoder_attention_mask (`torch.BoolTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
+ Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also
+ be used by default.
+
+ Only relevant in case an encoder-decoder language model (like T5) is used.
+
+ output_attentions (`bool`, *optional*):
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
+ tensors for more detail.
+ output_hidden_states (`bool`, *optional*):
+ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+ more detail.
+ return_dict (`bool`, *optional*):
+ Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+"""
+
+
+class MplugOwlVisionEncoder(nn.Module):
+ """
+ Transformer encoder consisting of `config.num_hidden_layers` self attention layers. Each layer is a
+ [`MplugOwlVisionEncoderLayer`].
+
+ Args:
+ config (`MplugOwlVisionConfig`):
+ The corresponding vision configuration for the `MplugOwlEncoder`.
+ """
+
+ def __init__(self, config: MplugOwlVisionConfig):
+ super().__init__()
+ self.config = config
+ self.layers = nn.ModuleList([
+ MplugOwlVisionEncoderLayer(config)
+ for _ in range(config.num_hidden_layers)
+ ])
+ self.gradient_checkpointing = False
+
+ def forward(
+ self,
+ inputs_embeds,
+ attention_mask: Optional[torch.Tensor] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ ) -> Union[Tuple, BaseModelOutput]:
+ r"""
+ Args:
+ inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
+ Embedded representation of the inputs. Should be float, not int tokens.
+ attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+ Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+ - 1 for tokens that are **not masked**,
+ - 0 for tokens that are **masked**.
+
+ [What are attention masks?](../glossary#attention-mask)
+ output_attentions (`bool`, *optional*):
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+ returned tensors for more detail.
+ output_hidden_states (`bool`, *optional*):
+ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
+ for more detail.
+ return_dict (`bool`, *optional*):
+ Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+ """
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+ output_hidden_states = (
+ output_hidden_states if output_hidden_states is not None else
+ self.config.output_hidden_states)
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+ encoder_states = () if output_hidden_states else None
+ all_attentions = () if output_attentions else None
+
+ hidden_states = inputs_embeds
+ for idx, encoder_layer in enumerate(self.layers):
+ if output_hidden_states:
+ encoder_states = encoder_states + (hidden_states, )
+ if self.gradient_checkpointing and self.training:
+
+ def create_custom_forward(module):
+
+ def custom_forward(*inputs):
+ return module(*inputs, output_attentions)
+
+ return custom_forward
+
+ layer_outputs = torch.utils.checkpoint.checkpoint(
+ create_custom_forward(encoder_layer),
+ hidden_states,
+ attention_mask,
+ )
+ else:
+ layer_outputs = encoder_layer(
+ hidden_states,
+ attention_mask,
+ output_attentions=output_attentions,
+ )
+
+ hidden_states = layer_outputs[0]
+
+ if output_attentions:
+ all_attentions = all_attentions + (layer_outputs[1], )
+
+ if output_hidden_states:
+ encoder_states = encoder_states + (hidden_states, )
+
+ if not return_dict:
+ return tuple(
+ v for v in [hidden_states, encoder_states, all_attentions]
+ if v is not None)
+ return BaseModelOutput(
+ last_hidden_state=hidden_states,
+ hidden_states=encoder_states,
+ attentions=all_attentions)
+
+
+class MplugOwlVisionModel(MplugOwlPreTrainedModel):
+ main_input_name = 'pixel_values'
+ config_class = MplugOwlVisionConfig
+
+ def __init__(self, config: MplugOwlVisionConfig):
+ super().__init__(config)
+ self.config = config
+ self.hidden_size = config.hidden_size
+
+ self.embeddings = MplugOwlVisionEmbeddings(config)
+ self.encoder = MplugOwlVisionEncoder(config)
+ self.post_layernorm = LayerNormFp32(
+ self.hidden_size, eps=config.layer_norm_eps)
+
+ self.post_init()
+
+ def forward(
+ self,
+ pixel_values: Optional[torch.FloatTensor] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ ) -> Union[Tuple, BaseModelOutputWithPooling]:
+ r"""
+ Returns:
+
+ """
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+ output_hidden_states = (
+ output_hidden_states if output_hidden_states is not None else
+ self.config.output_hidden_states)
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+ if pixel_values is None:
+ raise ValueError('You have to specify pixel_values')
+
+ hidden_states = self.embeddings(pixel_values)
+
+ encoder_outputs = self.encoder(
+ inputs_embeds=hidden_states,
+ output_attentions=output_attentions,
+ output_hidden_states=output_hidden_states,
+ return_dict=return_dict,
+ )
+
+ last_hidden_state = encoder_outputs[0]
+ last_hidden_state = self.post_layernorm(last_hidden_state)
+
+ pooled_output = last_hidden_state[:, 0, :]
+ pooled_output = self.post_layernorm(pooled_output)
+
+ if not return_dict:
+ return (last_hidden_state, pooled_output) + encoder_outputs[1:]
+
+ return BaseModelOutputWithPooling(
+ last_hidden_state=last_hidden_state,
+ pooler_output=pooled_output,
+ hidden_states=encoder_outputs.hidden_states,
+ attentions=encoder_outputs.attentions,
+ )
+
+ def get_input_embeddings(self):
+ return self.embeddings
+
+
+class MplugOwlVisualAbstractorMLP(nn.Module):
+
+ def __init__(self, config: MplugOwlVisualAbstractorConfig):
+ super().__init__()
+ self.config = config
+ in_features = config.hidden_size
+ hidden_features = config.intermediate_size
+ hidden_features = int(2 * hidden_features / 3)
+ multiple_of = 256
+ hidden_features = multiple_of * \
+ ((hidden_features + multiple_of - 1) // multiple_of)
+ self.act = nn.SiLU()
+
+ self.w1 = nn.Linear(in_features, hidden_features)
+ self.w2 = nn.Linear(hidden_features, in_features)
+ self.w3 = nn.Linear(in_features, hidden_features)
+ self.ffn_ln = LayerNormFp32(hidden_features, eps=config.layer_norm_eps)
+
+ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+ hidden_states = self.act(
+ self.w1(hidden_states)) * self.w3(hidden_states)
+ hidden_states = self.ffn_ln(hidden_states)
+ hidden_states = self.w2(hidden_states)
+ return hidden_states
+
+
+class MplugOwlVisualAbstractorMultiHeadAttention(nn.Module):
+
+ def __init__(self, config: MplugOwlVisualAbstractorConfig):
+ super().__init__()
+ self.config = config
+ if config.hidden_size % config.num_attention_heads != 0:
+ raise ValueError(
+ 'The hidden size (%d) is not a multiple of the number of attention heads (%d)'
+ % (config.hidden_size, config.num_attention_heads))
+
+ self.num_attention_heads = config.num_attention_heads
+ self.attention_head_size = int(config.hidden_size
+ / config.num_attention_heads)
+ self.all_head_size = self.num_attention_heads * self.attention_head_size
+
+ self.query = nn.Linear(config.hidden_size, self.all_head_size)
+ self.key = nn.Linear(config.encoder_hidden_size, self.all_head_size)
+ self.value = nn.Linear(config.encoder_hidden_size, self.all_head_size)
+
+ self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+ self.save_attention = False
+
+ def save_attn_gradients(self, attn_gradients):
+ self.attn_gradients = attn_gradients
+
+ def get_attn_gradients(self):
+ return self.attn_gradients
+
+ def save_attention_map(self, attention_map):
+ self.attention_map = attention_map
+
+ def get_attention_map(self):
+ return self.attention_map
+
+ def transpose_for_scores(self, x):
+ new_x_shape = x.size()[:-1] + (self.num_attention_heads,
+ self.attention_head_size)
+ x = x.view(*new_x_shape)
+ return x.permute(0, 2, 1, 3)
+
+ def forward(
+ self,
+ hidden_states,
+ attention_mask=None,
+ head_mask=None,
+ encoder_hidden_states=None,
+ encoder_attention_mask=None,
+ past_key_value=None,
+ output_attentions=False,
+ ):
+ # If this is instantiated as a cross-attention module, the keys
+ # and values come from an encoder; the attention mask needs to be
+ # such that the encoder's padding tokens are not attended to.
+ key_layer = self.transpose_for_scores(self.key(encoder_hidden_states))
+ value_layer = self.transpose_for_scores(
+ self.value(encoder_hidden_states))
+ attention_mask = encoder_attention_mask
+
+ mixed_query_layer = self.query(hidden_states)
+
+ query_layer = self.transpose_for_scores(mixed_query_layer)
+
+ past_key_value = (key_layer, value_layer)
+
+ # Take the dot product between "query" and "key" to get the raw attention scores.
+ attention_scores = torch.matmul(query_layer,
+ key_layer.transpose(-1, -2))
+
+ attention_scores = attention_scores / \
+ math.sqrt(self.attention_head_size)
+
+ if attention_mask is not None:
+ # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
+ attention_scores = attention_scores + attention_mask
+
+ # Normalize the attention scores to probabilities.
+ attention_probs = nn.Softmax(dim=-1)(attention_scores)
+
+ if self.save_attention:
+ self.save_attention_map(attention_probs)
+ attention_probs.register_hook(self.save_attn_gradients)
+
+ # This is actually dropping out entire tokens to attend to, which might
+ # seem a bit unusual, but is taken from the original Transformer paper.
+ attention_probs_dropped = self.dropout(attention_probs)
+
+ # Mask heads if we want to
+ if head_mask is not None:
+ attention_probs_dropped = attention_probs_dropped * head_mask
+
+ context_layer = torch.matmul(attention_probs_dropped, value_layer)
+
+ context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+ new_context_layer_shape = context_layer.size()[:-2] + (
+ self.all_head_size, )
+ context_layer = context_layer.view(*new_context_layer_shape)
+
+ outputs = (context_layer,
+ attention_probs) if output_attentions else (context_layer, )
+
+ outputs = outputs + (past_key_value, )
+ return outputs
+
+
+class MplugOwlVisualAbstractorCrossOutput(nn.Module):
+
+ def __init__(self, config: MplugOwlVisualAbstractorConfig):
+ super().__init__()
+ dim = config.hidden_size
+ self.out_proj = nn.Linear(dim, dim, bias=True)
+ self.norm2 = LayerNormFp32(dim)
+ self.mlp = MplugOwlVisualAbstractorMLP(config)
+
+ def forward(self, hidden_states: torch.Tensor,
+ input_tensor: torch.Tensor) -> torch.Tensor:
+ input_tensor = input_tensor + self.out_proj(hidden_states)
+ input_tensor = input_tensor + self.mlp(self.norm2(input_tensor))
+ return input_tensor
+
+
+class MplugOwlVisualAbstractorAttention(nn.Module):
+
+ def __init__(self, config: MplugOwlVisualAbstractorConfig):
+ super().__init__()
+ self.attention = MplugOwlVisualAbstractorMultiHeadAttention(config)
+ self.output = MplugOwlVisualAbstractorCrossOutput(config)
+ self.pruned_heads = set()
+ self.norm1 = LayerNormFp32(config.hidden_size)
+ self.normk = LayerNormFp32(config.hidden_size)
+
+ def prune_heads(self, heads):
+ if len(heads) == 0:
+ return
+ heads, index = find_pruneable_heads_and_indices(
+ heads, self.attention.num_attention_heads,
+ self.attention.attention_head_size, self.pruned_heads)
+
+ # Prune linear layers
+ self.attention.query = prune_linear_layer(self.attention.query, index)
+ self.attention.key = prune_linear_layer(self.attention.key, index)
+ self.attention.value = prune_linear_layer(self.attention.value, index)
+ self.output.dense = prune_linear_layer(
+ self.output.out_proj, index, dim=1)
+
+ # Update hyper params and store pruned heads
+ self.attention.num_attention_heads = self.attention.num_attention_heads - \
+ len(heads)
+ self.attention.all_head_size = self.attention.attention_head_size * \
+ self.attention.num_attention_heads
+ self.pruned_heads = self.pruned_heads.union(heads)
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ attention_mask: Optional[torch.FloatTensor] = None,
+ head_mask: Optional[torch.FloatTensor] = None,
+ encoder_hidden_states: Optional[torch.FloatTensor] = None,
+ encoder_attention_mask: Optional[torch.FloatTensor] = None,
+ past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
+ output_attentions: Optional[bool] = False,
+ ) -> Tuple[torch.Tensor]:
+ # HACK we apply norm on q and k
+ hidden_states = self.norm1(hidden_states)
+ encoder_hidden_states = self.normk(encoder_hidden_states)
+ encoder_hidden_states = torch.cat(
+ [hidden_states, encoder_hidden_states], dim=1)
+ encoder_attention_mask = torch.cat(
+ [attention_mask, encoder_attention_mask], dim=-1)
+ self_outputs = self.attention(
+ hidden_states,
+ attention_mask,
+ head_mask,
+ encoder_hidden_states,
+ encoder_attention_mask,
+ past_key_value,
+ output_attentions,
+ )
+ attention_output = self.output(self_outputs[0], hidden_states)
+ # add attentions if we output them
+ outputs = (attention_output, ) + self_outputs[1:]
+ return outputs
+
+
+class MplugOwlVisualAbstractorLayer(nn.Module):
+
+ def __init__(self, config, layer_idx):
+ super().__init__()
+ self.chunk_size_feed_forward = config.chunk_size_feed_forward
+ self.seq_len_dim = 1
+
+ self.layer_idx = layer_idx
+
+ self.crossattention = MplugOwlVisualAbstractorAttention(config)
+ self.has_cross_attention = True
+
+ def forward(
+ self,
+ hidden_states,
+ attention_mask=None,
+ head_mask=None,
+ encoder_hidden_states=None,
+ encoder_attention_mask=None,
+ output_attentions=False,
+ ):
+ if encoder_hidden_states is None:
+ raise ValueError(
+ 'encoder_hidden_states must be given for cross-attention layers'
+ )
+ cross_attention_outputs = self.crossattention(
+ hidden_states,
+ attention_mask,
+ head_mask,
+ encoder_hidden_states,
+ encoder_attention_mask,
+ output_attentions=output_attentions,
+ )
+ query_attention_output = cross_attention_outputs[0]
+
+ outputs = (query_attention_output, )
+ return outputs
+
+
+class MplugOwlVisualAbstractorEncoder(nn.Module):
+
+ def __init__(self, config):
+ super().__init__()
+ self.config = config
+ self.layers = nn.ModuleList([
+ MplugOwlVisualAbstractorLayer(config, layer_idx)
+ for layer_idx in range(config.num_hidden_layers)
+ ])
+ self.gradient_checkpointing = False
+
+ def forward(
+ self,
+ hidden_states,
+ attention_mask=None,
+ head_mask=None,
+ encoder_hidden_states=None,
+ encoder_attention_mask=None,
+ past_key_values=None,
+ output_attentions=False,
+ output_hidden_states=False,
+ return_dict=True,
+ ):
+ all_hidden_states = () if output_hidden_states else None
+
+ for i in range(self.config.num_hidden_layers):
+ layer_module = self.layers[i]
+ if output_hidden_states:
+ all_hidden_states = all_hidden_states + (hidden_states, )
+
+ layer_head_mask = head_mask[i] if head_mask is not None else None
+ past_key_value = past_key_values[
+ i] if past_key_values is not None else None
+
+ if getattr(self.config, 'gradient_checkpointing',
+ False) and self.training:
+
+ def create_custom_forward(module):
+
+ def custom_forward(*inputs):
+ return module(*inputs, past_key_value,
+ output_attentions)
+
+ return custom_forward
+
+ layer_outputs = torch.utils.checkpoint.checkpoint(
+ create_custom_forward(layer_module),
+ hidden_states,
+ attention_mask,
+ layer_head_mask,
+ encoder_hidden_states,
+ encoder_attention_mask,
+ )
+ else:
+ layer_outputs = layer_module(
+ hidden_states,
+ attention_mask,
+ layer_head_mask,
+ encoder_hidden_states,
+ encoder_attention_mask,
+ output_attentions,
+ )
+
+ hidden_states = layer_outputs[0]
+
+ return BaseModelOutput(last_hidden_state=hidden_states, )
+
+
+class MplugOwlVisualAbstractorModel(MplugOwlPreTrainedModel):
+
+ def __init__(self, config: MplugOwlVisualAbstractorConfig,
+ language_hidden_size):
+ super().__init__(config)
+ self.config = config
+
+ self.encoder = MplugOwlVisualAbstractorEncoder(config)
+ self.visual_fc = torch.nn.Linear(config.hidden_size,
+ language_hidden_size)
+ self.vit_eos = torch.nn.Parameter(
+ torch.randn(1, 1, language_hidden_size))
+ self.post_init()
+
+ def _prune_heads(self, heads_to_prune):
+ """
+ Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
+ class PreTrainedModel
+ """
+ for layer, heads in heads_to_prune.items():
+ self.encoder.layer[layer].attention.prune_heads(heads)
+
+ def get_extended_attention_mask(
+ self,
+ attention_mask: torch.Tensor,
+ input_shape: Tuple[int],
+ device: torch.device,
+ ) -> torch.Tensor:
+ """
+ Makes broadcastable attention and causal masks so that future and masked tokens are ignored.
+
+ Arguments:
+ attention_mask (`torch.Tensor`):
+ Mask with ones indicating tokens to attend to, zeros for tokens to ignore.
+ input_shape (`Tuple[int]`):
+ The shape of the input to the model.
+ device: (`torch.device`):
+ The device of the input to the model.
+
+ Returns:
+ `torch.Tensor` The extended attention mask, with a the same dtype as `attention_mask.dtype`.
+ """
+ # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
+ # ourselves in which case we just need to make it broadcastable to all heads.
+ if attention_mask.dim() == 3:
+ extended_attention_mask = attention_mask[:, None, :, :]
+ elif attention_mask.dim() == 2:
+ extended_attention_mask = attention_mask[:, None, None, :]
+ else:
+ raise ValueError(
+ 'Wrong shape for input_ids (shape {}) or attention_mask (shape {})'
+ .format(input_shape, attention_mask.shape))
+
+ # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
+ # masked positions, this operation will create a tensor which is 0.0 for
+ # positions we want to attend and -10000.0 for masked positions.
+ # Since we are adding it to the raw scores before the softmax, this is
+ # effectively the same as removing these entirely.
+ extended_attention_mask = extended_attention_mask.to(
+ dtype=self.dtype) # fp16 compatibility
+ extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
+ return extended_attention_mask
+
+ def forward(
+ self,
+ query_embeds,
+ attention_mask=None,
+ head_mask=None,
+ encoder_hidden_states=None,
+ encoder_attention_mask=None,
+ past_key_values=None,
+ output_attentions=None,
+ output_hidden_states=None,
+ return_dict=None,
+ ):
+ r"""
+ encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, `optional`):
+ Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
+ the model is configured as a decoder.
+ encoder_attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, `optional`):
+ Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
+ the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`:
+ - 1 for tokens that are **not masked**,
+ - 0 for tokens that are **masked**.
+ past_key_values (`tuple(tuple(torch.FloatTensor))` of length `config.n_layers` with each tuple having 4 tensors:
+ shape `(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`): Contains precomputed key and
+ value hidden states of the attention blocks. Can be used to speed up decoding. If `past_key_values` are
+ used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key
+ value states given to this model) of shape `(batch_size, 1)` instead of all `decoder_input_ids` of shape
+ `(batch_size, sequence_length)`.
+ """
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+ output_hidden_states = (
+ output_hidden_states if output_hidden_states is not None else
+ self.config.output_hidden_states)
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+ embedding_output = query_embeds
+ input_shape = embedding_output.size()[:-1]
+ batch_size, seq_length = input_shape
+ device = embedding_output.device
+
+ # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
+ # ourselves in which case we just need to make it broadcastable to all heads.
+ if attention_mask is None:
+ attention_mask = torch.ones(
+ (query_embeds.shape[0], query_embeds.shape[1]),
+ dtype=torch.long,
+ device=query_embeds.device)
+ extended_attention_mask = self.get_extended_attention_mask(
+ attention_mask, input_shape, device)
+
+ # If a 2D or 3D attention mask is provided for the cross-attention
+ # we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]
+ if encoder_hidden_states is not None:
+ if type(encoder_hidden_states) == list:
+ encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states[
+ 0].size()
+ else:
+ (
+ encoder_batch_size,
+ encoder_sequence_length,
+ _,
+ ) = encoder_hidden_states.size()
+ encoder_hidden_shape = (encoder_batch_size,
+ encoder_sequence_length)
+
+ if type(encoder_attention_mask) == list:
+ encoder_extended_attention_mask = [
+ self.invert_attention_mask(mask)
+ for mask in encoder_attention_mask
+ ]
+ elif encoder_attention_mask is None:
+ encoder_attention_mask = torch.ones(
+ encoder_hidden_shape, device=device)
+ encoder_extended_attention_mask = self.invert_attention_mask(
+ encoder_attention_mask)
+ else:
+ encoder_extended_attention_mask = self.invert_attention_mask(
+ encoder_attention_mask)
+ else:
+ encoder_extended_attention_mask = None
+
+ # Prepare head mask if needed
+ # 1.0 in head_mask indicate we keep the head
+ # attention_probs has shape bsz x n_heads x N x N
+ # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
+ # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
+ head_mask = self.get_head_mask(head_mask,
+ self.config.num_hidden_layers)
+
+ encoder_outputs = self.encoder(
+ embedding_output,
+ attention_mask=extended_attention_mask,
+ head_mask=head_mask,
+ encoder_hidden_states=encoder_hidden_states,
+ encoder_attention_mask=encoder_extended_attention_mask,
+ past_key_values=past_key_values,
+ output_attentions=output_attentions,
+ output_hidden_states=output_hidden_states,
+ return_dict=return_dict,
+ )
+ sequence_output = encoder_outputs[0]
+ pooled_output = sequence_output[:, 0, :]
+
+ sequence_output = self.visual_fc(sequence_output)
+ eos_repeat = self.vit_eos.repeat(sequence_output.shape[0], 1, 1)
+ sequence_output = torch.cat([sequence_output, eos_repeat], dim=1)
+
+ return BaseModelOutputWithPooling(
+ last_hidden_state=sequence_output,
+ pooler_output=pooled_output,
+ hidden_states=encoder_outputs.hidden_states,
+ )
+
+
+class MplugOwlModel(MplugOwlPreTrainedModel):
+ r"""The mPLUG-Owl model is a multi-modal conversation model that support various modalities as input.
+ mPLUG-Owl consists a visual encoder, a visual abstrator module and a language decoder model, which enables
+ both image and text input.
+ This model is implemented base on mPLUG-Owl: Modularization Empowers Large Language Models with Multimodality.
+ `Paper `.
+ """
+ config_class = MplugOwlConfig
+ main_input_name = 'pixel_values'
+
+ def __init__(self, config: MplugOwlConfig):
+ super().__init__(config)
+
+ self.vision_model = MplugOwlVisionModel(config.vision_config)
+
+ self.query_tokens = nn.Parameter(
+ torch.zeros(1, config.num_query_tokens,
+ config.visual_abstractor_config.hidden_size))
+ self.abstractor = MplugOwlVisualAbstractorModel(
+ config.visual_abstractor_config, config.text_config.hidden_size)
+
+ # if config.use_decoder_only_language_model:
+ language_model = AutoModelForCausalLM.from_config(config.text_config)
+ self.language_model = language_model
+
+ # Initialize weights and apply final processing
+ self.post_init()
+
+ def get_input_embeddings(self):
+ return self.language_model.get_input_embeddings()
+
+ def set_input_embeddings(self, value):
+ self.language_model.set_input_embeddings(value)
+
+ def set_output_embeddings(self, new_embeddings):
+ self.language_model.set_output_embeddings(new_embeddings)
+
+ def get_output_embeddings(self) -> nn.Module:
+ return self.language_model.get_output_embeddings()
+
+ def get_encoder(self):
+ return self.language_model.get_encoder()
+
+ def get_decoder(self):
+ return self.language_model.get_decoder()
+
+ def _tie_weights(self):
+ if not self.config.use_decoder_only_language_model:
+ self.language_model.encoder.embed_tokens = self.language_model.shared
+ self.language_model.decoder.embed_tokens = self.language_model.shared
+
+ def get_text_features(
+ self,
+ input_ids: Optional[torch.Tensor] = None,
+ attention_mask: Optional[torch.Tensor] = None,
+ decoder_input_ids: Optional[torch.Tensor] = None,
+ decoder_attention_mask: Optional[torch.Tensor] = None,
+ labels: Optional[torch.Tensor] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ ):
+
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+ output_hidden_states = (
+ output_hidden_states if output_hidden_states is not None else
+ self.config.output_hidden_states)
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+ if self.config.use_decoder_only_language_model:
+ text_outputs = self.language_model(
+ input_ids=input_ids,
+ attention_mask=attention_mask,
+ output_attentions=output_attentions,
+ output_hidden_states=output_hidden_states,
+ return_dict=return_dict,
+ )
+ else:
+ inputs_embeds = self.language_model.get_input_embeddings()(
+ input_ids)
+
+ text_outputs = self.language_model(
+ inputs_embeds=inputs_embeds,
+ attention_mask=attention_mask,
+ decoder_input_ids=decoder_input_ids,
+ decoder_attention_mask=decoder_attention_mask,
+ output_attentions=output_attentions,
+ output_hidden_states=output_hidden_states,
+ return_dict=return_dict,
+ labels=labels,
+ )
+
+ return text_outputs
+
+ def get_image_features(
+ self,
+ pixel_values: Optional[torch.FloatTensor] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ ):
+
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+ output_hidden_states = (
+ output_hidden_states if output_hidden_states is not None else
+ self.config.output_hidden_states)
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+ vision_outputs = self.vision_model(
+ pixel_values=pixel_values,
+ output_attentions=output_attentions,
+ output_hidden_states=output_hidden_states,
+ return_dict=return_dict,
+ )
+ return vision_outputs
+
+
+def get_media_indices(my_list):
+ if isinstance(my_list, torch.Tensor):
+ my_list = my_list.cpu().tolist()
+ result = []
+ for i in range(len(my_list)):
+ if i == 0 and my_list[i] < 0:
+ result.append(i)
+ elif my_list[i] != my_list[i - 1] and my_list[i] < 0:
+ result.append(i)
+ return result
+
+
+class MplugOwlForConditionalGenerationHF(MplugOwlPreTrainedModel):
+ config_class = MplugOwlConfig
+ main_input_name = 'pixel_values'
+
+ def __init__(self, config: MplugOwlConfig, **kwargs):
+ super().__init__(config)
+
+ self.vision_model = MplugOwlVisionModel(config.vision_config)
+
+ self.query_tokens = nn.Parameter(
+ torch.zeros(1, config.num_query_tokens,
+ config.visual_abstractor_config.hidden_size))
+ self.abstractor = MplugOwlVisualAbstractorModel(
+ config.visual_abstractor_config, config.text_config.hidden_size)
+
+ # if config.use_decoder_only_language_model:
+ language_model = AutoModelForCausalLM.from_config(config.text_config)
+ self.language_model = language_model
+
+ # Initialize weights and apply final processing
+ self.post_init()
+ self.main_input_name = 'input_ids'
+
+ def get_input_embeddings(self):
+ return self.language_model.get_input_embeddings()
+
+ def set_input_embeddings(self, value):
+ self.language_model.set_input_embeddings(value)
+
+ def set_output_embeddings(self, new_embeddings):
+ self.language_model.set_output_embeddings(new_embeddings)
+
+ def get_output_embeddings(self) -> nn.Module:
+ return self.language_model.get_output_embeddings()
+
+ def get_encoder(self):
+ return self.language_model.get_encoder()
+
+ def get_decoder(self):
+ return self.language_model.get_decoder()
+
+ def _tie_weights(self):
+ if not self.config.use_decoder_only_language_model:
+ self.language_model.encoder.embed_tokens = self.language_model.shared
+ self.language_model.decoder.embed_tokens = self.language_model.shared
+
+ def _preprocess_accelerate(self):
+ r"""
+ Some pre-processing hacks to make the model `accelerate` compatible. Check
+ https://github.com/huggingface/transformers/pull/21707 for more details.
+ """
+ hf_device_map = self.hf_device_map
+
+ if len(
+ hf_device_map
+ ) > 1 and 'language_model' not in hf_device_map and torch.cuda.device_count(
+ ) > 1:
+ # warn users about unexpected behavior when using multi-GPU + mPLUG-Owl + `accelerate`.
+ logger.warning(
+ 'The `language_model` is not in the `hf_device_map` dictionary and you are running your script'
+ ' in a multi-GPU environment. this may lead to unexpected behavior when using `accelerate`.'
+ ' Please pass a `device_map` that contains `language_model` to remove this warning.'
+ ' Please refer to https://github.com/huggingface/blog/blob/main/accelerate-large-models.md for'
+ ' more details on creating a `device_map` for large models.', )
+
+ if hasattr(self.language_model, '_hf_hook'):
+ self.language_model._hf_hook.io_same_device = True # For `generate` compatibility
+
+ def forward(
+ self,
+ pixel_values: torch.FloatTensor,
+ input_ids: torch.FloatTensor,
+ num_images,
+ non_padding_mask: Optional[torch.LongTensor] = None,
+ non_media_mask: Optional[torch.LongTensor] = None,
+ prompt_mask: Optional[torch.LongTensor] = None,
+ attention_mask: Optional[torch.LongTensor] = None,
+ decoder_input_ids: Optional[torch.LongTensor] = None,
+ decoder_attention_mask: Optional[torch.LongTensor] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ labels: Optional[torch.LongTensor] = None,
+ return_dict: Optional[bool] = None,
+ ) -> Union[Tuple, MplugOwlForConditionalGenerationModelOutput]:
+
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+ # get text embedding
+ text_tokens_ = input_ids
+ batch_size = input_ids.shape[0]
+
+ media_token_indices = [
+ # [:-1] since we would not use the last token for embedding
+ get_media_indices(text_tokens_[i][:-1]) for i in range(batch_size)
+ ]
+ text_tokens_[text_tokens_ < 0] = 1 # Not used
+ text_embeds = self.get_input_embeddings()(
+ text_tokens_) # Temporally Embedding
+
+ if pixel_values is not None:
+ pixel_values = pixel_values.half()
+ image_embeds = self.vision_model(
+ pixel_values, return_dict=True).last_hidden_state
+
+ image_attention_mask = torch.ones(
+ image_embeds.size()[:-1],
+ dtype=torch.long,
+ device=image_embeds.device)
+ query_tokens = self.query_tokens.expand(image_embeds.shape[0], -1,
+ -1)
+
+ query_features = self.abstractor(
+ query_embeds=query_tokens,
+ encoder_hidden_states=image_embeds,
+ encoder_attention_mask=image_attention_mask,
+ )['last_hidden_state']
+ img_seq_length = query_features.shape[1]
+
+ num_images_per_sample = num_images.long().cpu().tolist()
+
+ text_chunk_embeds = []
+ img_idx = 0
+ for b in range(batch_size):
+ start = 0
+ result = []
+ if len(media_token_indices[b]) > 0:
+ for i, pos in enumerate(media_token_indices[b]):
+ if pos > start:
+ result.append(text_embeds[b, start:pos])
+ result.append(query_features[img_idx + i])
+ start = pos + img_seq_length
+ if start < text_embeds.shape[1]:
+ result.append(text_embeds[b, start:])
+
+ img_idx += num_images_per_sample[b]
+ text_chunk_embeds.append(torch.cat(result, dim=0))
+
+ # Actual Input Embeddings
+ input_embeds = torch.stack(text_chunk_embeds, dim=0)
+
+ # Create causal mask and position ids
+ _, loss_mask, position_ids = \
+ get_ltor_masks_and_position_ids_from_embeddings(input_embeds)
+
+ # Calculate the loss_mask
+ non_padding_mask = non_padding_mask.long()
+ non_media_mask = non_media_mask.long()
+ prompt_mask = prompt_mask.long() # TODO How to deal with prompt mask
+ loss_mask = loss_mask[:, :-1]
+
+ loss_mask = loss_mask * non_padding_mask * non_media_mask * prompt_mask
+
+ # Forward into GPT
+ outputs = self.language_model(
+ inputs_embeds=input_embeds,
+ attention_mask=attention_mask,
+ labels=labels,
+ )
+ outputs.loss = (outputs.loss
+ * loss_mask.view(-1)).sum() / loss_mask.sum()
+ return outputs
+
+ @torch.no_grad()
+ def generate(
+ self,
+ pixel_values: torch.FloatTensor,
+ input_ids: Optional[torch.LongTensor] = None,
+ attention_mask: Optional[torch.LongTensor] = None,
+ **generate_kwargs,
+ ) -> torch.LongTensor:
+ """
+ Overrides `generate` function to be able to use the model as a conditional generator.
+
+ Args:
+ pixel_values (`torch.FloatTensor` of shape (batch_size, num_channels, height, width)):
+ Input images to be processed.
+ input_ids (`torch.LongTensor` of shape (batch_size, sequence_length), *optional*):
+ The sequence used as a prompt for the generation.
+ attention_mask (`torch.LongTensor` of shape (batch_size, sequence_length), *optional*):
+ Mask to avoid performing attention on padding token indices
+
+ Returns:
+ captions (list): A list of strings of length batch_size * num_captions.
+ """
+
+ if input_ids is not None:
+ batch_size = input_ids.size(0)
+ media_token_indices = [
+ get_media_indices(input_ids[i]) for i in range(batch_size)
+ ]
+ num_images_per_sample = [len(x) for x in media_token_indices]
+ input_ids = input_ids.clone()
+ input_ids[input_ids < 0] = 0 # Not used
+
+ if attention_mask is None:
+ attention_mask = torch.ones_like(input_ids).long().to(
+ input_ids.device)
+
+ if hasattr(self, 'hf_device_map'):
+ # preprocess for `accelerate`
+ self._preprocess_accelerate()
+ batch_size = input_ids.shape[0]
+ # get text embedding
+ inputs_embeds = self.get_input_embeddings()(input_ids)
+ # get visual embedding
+ if pixel_values is not None:
+ pixel_values = pixel_values.half()
+ pixel_values = pixel_values.to(input_ids.device)
+ with torch.no_grad():
+ image_embeds = self.vision_model(
+ pixel_values, return_dict=True).last_hidden_state
+ image_attention_mask = torch.ones(
+ image_embeds.size()[:-1],
+ dtype=torch.long,
+ device=image_embeds.device)
+ query_tokens = self.query_tokens.expand(
+ image_embeds.shape[0], -1, -1)
+ query_outputs = self.abstractor(
+ query_embeds=query_tokens,
+ encoder_hidden_states=image_embeds,
+ encoder_attention_mask=image_attention_mask,
+ return_dict=True,
+ )
+ query_output = query_outputs['last_hidden_state']
+ image_embeds = query_output
+ img_seq_length = image_embeds.shape[1]
+
+ # ===================
+ # Get actual input embeddings
+ # ===================
+ text_chunk_embeds = []
+ text_chunk_attns = []
+ img_idx = 0
+
+ for b in range(batch_size):
+ start = 0
+ result = []
+ result_attn = []
+ for i, pos in enumerate(media_token_indices[b]):
+ if pos > start:
+ result.append(inputs_embeds[b, start:pos])
+ result_attn.append(attention_mask[b, start:pos])
+ result.append(image_embeds[img_idx + i])
+ result_attn.append(
+ torch.ones(
+ image_embeds[img_idx + i].shape[0],
+ device=inputs_embeds.device))
+ start = pos + img_seq_length
+ if start < inputs_embeds.shape[1]:
+ result.append(inputs_embeds[b, start:])
+ result_attn.append(attention_mask[b, start:])
+
+ img_idx += num_images_per_sample[b]
+ text_chunk_embeds.append(torch.cat(result, dim=0))
+ text_chunk_attns.append(torch.cat(result_attn, dim=0))
+ inputs_embeds = torch.stack(text_chunk_embeds, dim=0)
+ attention_mask = torch.stack(text_chunk_attns, dim=0)
+
+ outputs = self.language_model.generate(
+ inputs_embeds=inputs_embeds,
+ attention_mask=attention_mask,
+ **generate_kwargs,
+ )
+
+ return outputs
+
+
+@MODELS.register_module(
+ Tasks.multimodal_dialogue, module_name=Models.mplug_owl)
+class MplugOwlForConditionalGeneration(TorchModel):
+
+ def __init__(self, model_dir: str, *args, **kwargs):
+ """initialize the mPLUG-Owl model from the `model_dir` path.
+ Args:
+ model_dir (str): the model path.
+ """
+
+ super().__init__(model_dir, *args, **kwargs)
+ self.model = MplugOwlForConditionalGenerationHF.from_pretrained(
+ model_dir,
+ torch_dtype=torch.half,
+ )
+
+ def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]:
+ output = self.model.generate(**input)
+ return output
diff --git a/modelscope/models/nlp/mglm/blocklm_utils.py b/modelscope/models/nlp/mglm/blocklm_utils.py
index b05cd2c2..e75aea92 100644
--- a/modelscope/models/nlp/mglm/blocklm_utils.py
+++ b/modelscope/models/nlp/mglm/blocklm_utils.py
@@ -212,10 +212,10 @@ class ConstructBlockStrategy:
block_spans,
rng,
task='bert'):
- position_ids = np.arange(len(tokens), dtype=np.long)
+ position_ids = np.arange(len(tokens), dtype=int)
targets = copy.deepcopy(tokens)
mask_id = self.tokenizer.get_command('MASK').Id
- mlm_masks = np.zeros(len(tokens), dtype=np.long)
+ mlm_masks = np.zeros(len(tokens), dtype=int)
for start, end in block_spans:
for idx in range(start, end):
tokens[idx] = mask_id
@@ -231,7 +231,7 @@ class ConstructBlockStrategy:
rng,
task='bert'):
text_length = len(tokens)
- position_ids = np.ones(len(tokens), dtype=np.long)
+ position_ids = np.ones(len(tokens), dtype=int)
for start, end in block_spans:
position_ids[start + 1:end] = 0
position_ids = np.cumsum(position_ids) - 1
@@ -270,7 +270,7 @@ class ConstructBlockStrategy:
(end - start + 1))
if self.block_position_encoding:
target_block_position_ids.append(
- np.arange(1, end - start + 2, dtype=np.long))
+ np.arange(1, end - start + 2, dtype=int))
else:
target_block_position_ids.append([1] * (end - start + 1))
block_spans.sort(key=lambda x: x[0])
@@ -307,7 +307,7 @@ class ConstructBlockStrategy:
target_tokens = target_tokens + [
self.tokenizer.get_command('eop').Id
]
- loss_masks = np.ones(len(target_tokens), dtype=np.long)
+ loss_masks = np.ones(len(target_tokens), dtype=int)
return source_tokens, target_tokens, loss_masks
else:
tokens = np.concatenate(source_tokens + target_tokens)
@@ -326,12 +326,12 @@ class ConstructBlockStrategy:
for pos in mask_pos:
tokens[pos] = self.tokenizer.get_command('dBLOCK').Id
targets = np.concatenate(source_tokens + targets)
- loss_masks = np.ones(len(tokens), dtype=np.long)
+ loss_masks = np.ones(len(tokens), dtype=int)
loss_masks[:source_length] = 0
position_ids = np.concatenate(source_position_ids
+ target_position_ids)
block_position_ids = np.concatenate(
- [np.zeros(source_length, dtype=np.long)]
+ [np.zeros(source_length, dtype=int)]
+ target_block_position_ids)
position_ids = np.stack([position_ids, block_position_ids], axis=0)
if attention_mask is not None:
@@ -539,22 +539,21 @@ class ConstructBlockStrategy:
(source_tokens, [self.generation_mask], target_tokens))
loss_masks = np.concatenate(
(np.zeros(len(source_tokens) + 1,
- dtype=np.long), target_masks))
+ dtype=int), target_masks))
token_batch.append(tokens)
target_batch.append(targets)
loss_mask_batch.append(loss_masks)
position_ids = np.arange(
- len(source_tokens) + len(target_tokens) + 1,
- dtype=np.long)
+ len(source_tokens) + len(target_tokens) + 1, dtype=int)
position_ids[len(source_tokens) + 1:] = len(source_tokens)
if self.block_position_encoding:
block_position_ids = np.concatenate(
- (np.zeros(len(source_tokens), dtype=np.long),
- np.arange(len(target_tokens) + 1, dtype=np.long)))
+ (np.zeros(len(source_tokens), dtype=int),
+ np.arange(len(target_tokens) + 1, dtype=int)))
else:
block_position_ids = np.concatenate(
- (np.zeros(len(source_tokens) + 1, dtype=np.long),
- np.ones(len(target_tokens) + 1, dtype=np.long)))
+ (np.zeros(len(source_tokens) + 1, dtype=int),
+ np.ones(len(target_tokens) + 1, dtype=int)))
position_id_batch.append(
np.stack([position_ids, block_position_ids], axis=0))
else:
@@ -597,27 +596,25 @@ class ConstructBlockStrategy:
max_length = max(seq_lengths)
token_batch = [
np.concatenate(
- (tokens, np.zeros(max_length - len(tokens),
- dtype=np.long)))
+ (tokens, np.zeros(max_length - len(tokens), dtype=int)))
for tokens in token_batch
]
target_batch = [
np.concatenate(
- (targets,
- np.zeros(max_length - len(targets), dtype=np.long)))
+ (targets, np.zeros(max_length - len(targets), dtype=int)))
for targets in target_batch
]
loss_mask_batch = [
np.concatenate(
(loss_masks,
- np.zeros(max_length - len(loss_masks), dtype=np.long)))
+ np.zeros(max_length - len(loss_masks), dtype=int)))
for loss_masks in loss_mask_batch
]
position_id_batch = [
- np.concatenate((position_ids,
- np.zeros(
- (2, max_length - position_ids.shape[1]),
- dtype=np.long)),
- axis=1) for position_ids in position_id_batch
+ np.concatenate(
+ (position_ids,
+ np.zeros(
+ (2, max_length - position_ids.shape[1]), dtype=int)),
+ axis=1) for position_ids in position_id_batch
]
return token_batch, target_batch, loss_mask_batch, position_id_batch
diff --git a/modelscope/models/nlp/mglm/data_utils/datasets.py b/modelscope/models/nlp/mglm/data_utils/datasets.py
index 39ffaea3..37bfbcc2 100644
--- a/modelscope/models/nlp/mglm/data_utils/datasets.py
+++ b/modelscope/models/nlp/mglm/data_utils/datasets.py
@@ -583,8 +583,8 @@ class XLDataset(data.Dataset):
def getidx(self, idx):
tokens, targets, loss_masks = [], [], []
attention_mask = np.concatenate(
- (np.zeros((self.max_seq_len, self.mem_len), dtype=np.long),
- np.ones((self.max_seq_len, self.max_seq_len), dtype=np.long)),
+ (np.zeros((self.max_seq_len, self.mem_len), dtype=int),
+ np.ones((self.max_seq_len, self.max_seq_len), dtype=int)),
axis=1)
sample_idx = bisect_right(self.indices, idx * self.max_seq_len)
last_end = 0 if sample_idx == 0 else self.indices[sample_idx - 1]
diff --git a/modelscope/models/nlp/mglm/test/test_block.py b/modelscope/models/nlp/mglm/test/test_block.py
index ed4225da..eb630835 100644
--- a/modelscope/models/nlp/mglm/test/test_block.py
+++ b/modelscope/models/nlp/mglm/test/test_block.py
@@ -28,7 +28,7 @@ def main():
counts = np.array([0] * 10)
for _ in range(10000):
spans = strategy.sample_span_in_document(
- np.array([1, 2, 3, 0, 4, 5, 6, 7, 9, 0], dtype=np.long), [1, 1],
+ np.array([1, 2, 3, 0, 4, 5, 6, 7, 9, 0], dtype=int), [1, 1],
random.Random())
for start, end in spans:
counts[start:end] += 1
diff --git a/modelscope/models/nlp/mglm/test/test_rel_shift.py b/modelscope/models/nlp/mglm/test/test_rel_shift.py
index 00cbb9fe..ad68b15e 100644
--- a/modelscope/models/nlp/mglm/test/test_rel_shift.py
+++ b/modelscope/models/nlp/mglm/test/test_rel_shift.py
@@ -17,7 +17,7 @@ def main():
num_iters=300000,
decay_style='cosine',
decay_ratio=0.1)
- steps = np.arange(0, 400000, 10, dtype=np.long)
+ steps = np.arange(0, 400000, 10, dtype=int)
rates = []
for step in steps:
lr_scheduler.num_iters = step
diff --git a/modelscope/models/nlp/unite/__init__.py b/modelscope/models/nlp/unite/__init__.py
index 06c2146e..939f0ab7 100644
--- a/modelscope/models/nlp/unite/__init__.py
+++ b/modelscope/models/nlp/unite/__init__.py
@@ -5,12 +5,12 @@ from typing import TYPE_CHECKING
from modelscope.utils.import_utils import LazyImportModule
if TYPE_CHECKING:
- from .configuration_unite import UniTEConfig
- from .modeling_unite import UniTEForTranslationEvaluation
+ from .configuration import UniTEConfig
+ from .translation_evaluation import UniTEForTranslationEvaluation
else:
_import_structure = {
- 'configuration_unite': ['UniTEConfig'],
- 'modeling_unite': ['UniTEForTranslationEvaluation'],
+ 'configuration': ['UniTEConfig'],
+ 'translation_evaluation': ['UniTEForTranslationEvaluation'],
}
import sys
diff --git a/modelscope/models/nlp/unite/configuration_unite.py b/modelscope/models/nlp/unite/configuration.py
similarity index 93%
rename from modelscope/models/nlp/unite/configuration_unite.py
rename to modelscope/models/nlp/unite/configuration.py
index b0a48585..402538f7 100644
--- a/modelscope/models/nlp/unite/configuration_unite.py
+++ b/modelscope/models/nlp/unite/configuration.py
@@ -9,7 +9,7 @@ from modelscope.utils.config import Config
logger = logging.get_logger()
-class EvaluationMode(Enum):
+class InputFormat(Enum):
SRC = 'src'
REF = 'ref'
SRC_REF = 'src-ref'
diff --git a/modelscope/models/nlp/unite/modeling_unite.py b/modelscope/models/nlp/unite/translation_evaluation.py
similarity index 61%
rename from modelscope/models/nlp/unite/modeling_unite.py
rename to modelscope/models/nlp/unite/translation_evaluation.py
index deea737d..c7e96027 100644
--- a/modelscope/models/nlp/unite/modeling_unite.py
+++ b/modelscope/models/nlp/unite/translation_evaluation.py
@@ -20,6 +20,8 @@ from transformers.activations import ACT2FN
from modelscope.metainfo import Models
from modelscope.models.base import TorchModel
from modelscope.models.builder import MODELS
+from modelscope.models.nlp.unite.configuration import InputFormat
+from modelscope.outputs.nlp_outputs import TranslationEvaluationOutput
from modelscope.utils.constant import Tasks
from modelscope.utils.logger import get_logger
@@ -71,8 +73,16 @@ class LayerwiseAttention(Module):
mask: torch.Tensor = None,
) -> torch.Tensor:
tensors = torch.cat(list(x.unsqueeze(dim=0) for x in tensors), dim=0)
- normed_weights = softmax(
- self.scalar_parameters, dim=0).view(-1, 1, 1, 1)
+
+ if self.training and self.dropout:
+ normed_weights = softmax(
+ torch.where(self.dropout_mask.uniform_() > self.dropout,
+ self.scalar_parameters, self.dropout_fill),
+ dim=-1)
+ else:
+ normed_weights = softmax(self.scalar_parameters, dim=-1)
+
+ normed_weights = normed_weights.view(-1, 1, 1, 1)
mask_float = mask.float()
weighted_sum = (normed_weights
@@ -97,18 +107,18 @@ class FeedForward(Module):
Feed Forward Neural Network.
Args:
- in_dim (:obj:`int`):
- Number of input features.
- out_dim (:obj:`int`, defaults to 1):
- Number of output features. Default is 1 -- a single scalar.
- hidden_sizes (:obj:`List[int]`, defaults to `[3072, 768]`):
- List with hidden layer sizes.
- activations (:obj:`str`, defaults to `Sigmoid`):
- Name of the activation function to be used in the hidden layers.
- final_activation (:obj:`str`, Optional, defaults to `None`):
- Name of the final activation function if any.
- dropout (:obj:`float`, defaults to 0.1):
- Dropout ratio to be used in the hidden layers.
+ in_dim (:obj:`int`):
+ Number of input features.
+ out_dim (:obj:`int`, defaults to 1):
+ Number of output features. Default is 1 -- a single scalar.
+ hidden_sizes (:obj:`List[int]`, defaults to `[3072, 768]`):
+ List with hidden layer sizes.
+ activations (:obj:`str`, defaults to `Sigmoid`):
+ Name of the activation function to be used in the hidden layers.
+ final_activation (:obj:`str`, Optional, defaults to `None`):
+ Name of the final activation function if any.
+ dropout (:obj:`float`, defaults to 0.1):
+ Dropout ratio to be used in the hidden layers.
"""
super().__init__()
modules = []
@@ -266,8 +276,11 @@ class UniTEForTranslationEvaluation(TorchModel):
return
- def forward(self, input_sentences: List[torch.Tensor]):
- input_ids = self.combine_input_sentences(input_sentences)
+ def forward(self,
+ input_ids: torch.Tensor,
+ input_format: Optional[List[InputFormat]] = None,
+ score: Optional[torch.Tensor] = None,
+ **kwargs) -> TranslationEvaluationOutput:
attention_mask = input_ids.ne(self.pad_token_id).long()
outputs = self.encoder(
input_ids=input_ids,
@@ -276,125 +289,138 @@ class UniTEForTranslationEvaluation(TorchModel):
return_dict=True)
mix_states = self.layerwise_attention(outputs['hidden_states'],
attention_mask)
- pred = self.estimator(mix_states)
- return pred.squeeze(dim=-1)
+ pred = self.estimator(mix_states).squeeze(dim=-1)
+ output = TranslationEvaluationOutput(
+ score=pred.cpu().tolist(), input_format=input_format)
- def load_checkpoint(self, path: str, device: torch.device):
- state_dict = torch.load(path, map_location=device)
- self.load_state_dict(state_dict)
+ if score is not None:
+ loss = (pred - score).pow(2).mean()
+ output['loss'] = loss
+
+ return output
+
+ def load_checkpoint(self, path: str, device: torch.device, plm_only: bool):
+ if plm_only:
+ self.encoder = self.encoder.from_pretrained(path).to(device)
+ self.encoder.pooler = None
+ else:
+ state_dict = torch.load(path, map_location=device)
+ self.load_state_dict(state_dict)
logger.info('Loading checkpoint parameters from %s' % path)
return
- def combine_input_sentences(self, input_sent_groups: List[torch.Tensor]):
- for input_sent_group in input_sent_groups[1:]:
- input_sent_group[:, 0] = self.eos_token_id
- if len(input_sent_groups) == 3:
- cutted_sents = self.cut_long_sequences3(input_sent_groups)
- else:
- cutted_sents = self.cut_long_sequences2(input_sent_groups)
- return cutted_sents
-
- @staticmethod
- def cut_long_sequences2(all_input_concat: List[List[torch.Tensor]],
+def combine_input_sentences(all_input_concat: List[List[torch.Tensor]],
maximum_length: int = 512,
- pad_idx: int = 1):
- all_input_concat = list(zip(*all_input_concat))
- collected_tuples = list()
- for tensor_tuple in all_input_concat:
- all_lens = tuple(len(x) for x in tensor_tuple)
+ pad_idx: int = 1,
+ eos_idx: int = 2):
+ for group in all_input_concat[1:]:
+ group[:, 0] = eos_idx
- if sum(all_lens) > maximum_length:
- lengths = dict(enumerate(all_lens))
- lengths_sorted_idxes = list(x[0] for x in sorted(
- lengths.items(), key=lambda d: d[1], reverse=True))
+ if len(all_input_concat) == 3:
+ return cut_long_sequences3(all_input_concat, maximum_length, pad_idx)
+ else:
+ return cut_long_sequences2(all_input_concat, maximum_length, pad_idx)
- offset = ceil((sum(lengths.values()) - maximum_length) / 2)
- if min(all_lens) > (maximum_length
- // 2) and min(all_lens) > offset:
- lengths = dict((k, v - offset) for k, v in lengths.items())
- else:
- lengths[lengths_sorted_idxes[
- 0]] = maximum_length - lengths[lengths_sorted_idxes[1]]
+def cut_long_sequences2(all_input_concat: List[List[torch.Tensor]],
+ maximum_length: int = 512,
+ pad_idx: int = 1):
+ all_input_concat = list(zip(*all_input_concat))
+ collected_tuples = list()
+ for tensor_tuple in all_input_concat:
+ tensor_tuple = tuple(
+ x.masked_select(x.ne(pad_idx)) for x in tensor_tuple)
+ all_lens = tuple(len(x) for x in tensor_tuple)
- new_lens = list(lengths[k]
- for k in range(0, len(tensor_tuple)))
- new_tensor_tuple = tuple(
- x[:y] for x, y in zip(tensor_tuple, new_lens))
- for x, y in zip(new_tensor_tuple, tensor_tuple):
- x[-1] = y[-1]
- collected_tuples.append(new_tensor_tuple)
+ if sum(all_lens) > maximum_length:
+ lengths = dict(enumerate(all_lens))
+ lengths_sorted_idxes = list(x[0] for x in sorted(
+ lengths.items(), key=lambda d: d[1], reverse=True))
+
+ offset = ceil((sum(lengths.values()) - maximum_length) / 2)
+
+ if min(all_lens) > (maximum_length
+ // 2) and min(all_lens) > offset:
+ lengths = dict((k, v - offset) for k, v in lengths.items())
else:
- collected_tuples.append(tensor_tuple)
+ lengths[lengths_sorted_idxes[0]] = maximum_length - lengths[
+ lengths_sorted_idxes[1]]
- concat_tensor = list(torch.cat(x, dim=0) for x in collected_tuples)
- all_input_concat_padded = pad_sequence(
- concat_tensor, batch_first=True, padding_value=pad_idx)
+ new_lens = list(lengths[k] for k in range(0, len(tensor_tuple)))
+ new_tensor_tuple = tuple(x[:y]
+ for x, y in zip(tensor_tuple, new_lens))
+ for x, y in zip(new_tensor_tuple, tensor_tuple):
+ x[-1] = y[-1]
+ collected_tuples.append(new_tensor_tuple)
+ else:
+ collected_tuples.append(tensor_tuple)
- return all_input_concat_padded
+ concat_tensor = list(torch.cat(x, dim=0) for x in collected_tuples)
+ all_input_concat_padded = pad_sequence(
+ concat_tensor, batch_first=True, padding_value=pad_idx)
+ return all_input_concat_padded
- @staticmethod
- def cut_long_sequences3(all_input_concat: List[List[torch.Tensor]],
- maximum_length: int = 512,
- pad_idx: int = 1):
- all_input_concat = list(zip(*all_input_concat))
- collected_tuples = list()
- for tensor_tuple in all_input_concat:
- all_lens = tuple(len(x) for x in tensor_tuple)
- if sum(all_lens) > maximum_length:
- lengths = dict(enumerate(all_lens))
- lengths_sorted_idxes = list(x[0] for x in sorted(
- lengths.items(), key=lambda d: d[1], reverse=True))
+def cut_long_sequences3(all_input_concat: List[List[torch.Tensor]],
+ maximum_length: int = 512,
+ pad_idx: int = 1):
+ all_input_concat = list(zip(*all_input_concat))
+ collected_tuples = list()
+ for tensor_tuple in all_input_concat:
+ tensor_tuple = tuple(
+ x.masked_select(x.ne(pad_idx)) for x in tensor_tuple)
+ all_lens = tuple(len(x) for x in tensor_tuple)
- offset = ceil((sum(lengths.values()) - maximum_length) / 3)
+ if sum(all_lens) > maximum_length:
+ lengths = dict(enumerate(all_lens))
+ lengths_sorted_idxes = list(x[0] for x in sorted(
+ lengths.items(), key=lambda d: d[1], reverse=True))
- if min(all_lens) > (maximum_length
- // 3) and min(all_lens) > offset:
- lengths = dict((k, v - offset) for k, v in lengths.items())
- else:
- while sum(lengths.values()) > maximum_length:
- if lengths[lengths_sorted_idxes[0]] > lengths[
- lengths_sorted_idxes[1]]:
- offset = maximum_length - lengths[
- lengths_sorted_idxes[1]] - lengths[
- lengths_sorted_idxes[2]]
- if offset > lengths[lengths_sorted_idxes[1]]:
- lengths[lengths_sorted_idxes[0]] = offset
- else:
- lengths[lengths_sorted_idxes[0]] = lengths[
- lengths_sorted_idxes[1]]
- elif lengths[lengths_sorted_idxes[0]] == lengths[
- lengths_sorted_idxes[1]] > lengths[
- lengths_sorted_idxes[2]]:
- offset = (maximum_length
- - lengths[lengths_sorted_idxes[2]]) // 2
- if offset > lengths[lengths_sorted_idxes[2]]:
- lengths[lengths_sorted_idxes[0]] = lengths[
- lengths_sorted_idxes[1]] = offset
- else:
- lengths[lengths_sorted_idxes[0]] = lengths[
- lengths_sorted_idxes[1]] = lengths[
- lengths_sorted_idxes[2]]
+ offset = ceil((sum(lengths.values()) - maximum_length) / 3)
+
+ if min(all_lens) > (maximum_length
+ // 3) and min(all_lens) > offset:
+ lengths = dict((k, v - offset) for k, v in lengths.items())
+ else:
+ while sum(lengths.values()) > maximum_length:
+ if lengths[lengths_sorted_idxes[0]] > lengths[
+ lengths_sorted_idxes[1]]:
+ offset = maximum_length - lengths[lengths_sorted_idxes[
+ 1]] - lengths[lengths_sorted_idxes[2]]
+ if offset > lengths[lengths_sorted_idxes[1]]:
+ lengths[lengths_sorted_idxes[0]] = offset
+ else:
+ lengths[lengths_sorted_idxes[0]] = lengths[
+ lengths_sorted_idxes[1]]
+ elif lengths[lengths_sorted_idxes[0]] == lengths[
+ lengths_sorted_idxes[1]] > lengths[
+ lengths_sorted_idxes[2]]:
+ offset = (maximum_length
+ - lengths[lengths_sorted_idxes[2]]) // 2
+ if offset > lengths[lengths_sorted_idxes[2]]:
+ lengths[lengths_sorted_idxes[0]] = lengths[
+ lengths_sorted_idxes[1]] = offset
else:
lengths[lengths_sorted_idxes[0]] = lengths[
lengths_sorted_idxes[1]] = lengths[
- lengths_sorted_idxes[
- 2]] = maximum_length // 3
+ lengths_sorted_idxes[2]]
+ else:
+ lengths[lengths_sorted_idxes[0]] = lengths[
+ lengths_sorted_idxes[1]] = lengths[
+ lengths_sorted_idxes[2]] = maximum_length // 3
- new_lens = list(lengths[k] for k in range(0, len(lengths)))
- new_tensor_tuple = tuple(
- x[:y] for x, y in zip(tensor_tuple, new_lens))
+ new_lens = list(lengths[k] for k in range(0, len(lengths)))
+ new_tensor_tuple = tuple(x[:y]
+ for x, y in zip(tensor_tuple, new_lens))
- for x, y in zip(new_tensor_tuple, tensor_tuple):
- x[-1] = y[-1]
- collected_tuples.append(new_tensor_tuple)
- else:
- collected_tuples.append(tensor_tuple)
+ for x, y in zip(new_tensor_tuple, tensor_tuple):
+ x[-1] = y[-1]
+ collected_tuples.append(new_tensor_tuple)
+ else:
+ collected_tuples.append(tensor_tuple)
- concat_tensor = list(torch.cat(x, dim=0) for x in collected_tuples)
- all_input_concat_padded = pad_sequence(
- concat_tensor, batch_first=True, padding_value=pad_idx)
-
- return all_input_concat_padded
+ concat_tensor = list(torch.cat(x, dim=0) for x in collected_tuples)
+ all_input_concat_padded = pad_sequence(
+ concat_tensor, batch_first=True, padding_value=pad_idx)
+ return all_input_concat_padded
diff --git a/modelscope/models/science/unifold/data/msa_pairing.py b/modelscope/models/science/unifold/data/msa_pairing.py
index cc65962c..77c4e9a6 100644
--- a/modelscope/models/science/unifold/data/msa_pairing.py
+++ b/modelscope/models/science/unifold/data/msa_pairing.py
@@ -115,7 +115,7 @@ def pad_features(feature: np.ndarray, feature_name: str) -> np.ndarray:
Returns:
The feature with an additional padding row.
"""
- assert feature.dtype != np.dtype(np.string_)
+ assert feature.dtype != np.dtype(np.str_)
if feature_name in (
'msa_all_seq',
'msa_mask_all_seq',
diff --git a/modelscope/models/science/unifold/msa/templates.py b/modelscope/models/science/unifold/msa/templates.py
index d1ff8cf1..f2d3d79c 100644
--- a/modelscope/models/science/unifold/msa/templates.py
+++ b/modelscope/models/science/unifold/msa/templates.py
@@ -1100,9 +1100,9 @@ class HmmsearchHitFeaturizer(TemplateHitFeaturizer):
np.zeros((1, num_res, residue_constants.atom_type_num, 3),
np.float32),
'template_domain_names':
- np.array([''.encode()], dtype=np.object),
+ np.array([''.encode()], dtype=np.object_),
'template_sequence':
- np.array([''.encode()], dtype=np.object),
+ np.array([''.encode()], dtype=np.object_),
'template_sum_probs':
np.array([0], dtype=np.float32),
}
diff --git a/modelscope/msdatasets/auth/auth_config.py b/modelscope/msdatasets/auth/auth_config.py
index 576a6efd..e09db93c 100644
--- a/modelscope/msdatasets/auth/auth_config.py
+++ b/modelscope/msdatasets/auth/auth_config.py
@@ -23,6 +23,15 @@ class OssAuthConfig(BaseAuthConfig):
cookies=cookies, git_token=git_token, user_info=user_info)
+class VirgoAuthConfig(BaseAuthConfig):
+ """The authorization config for virgo dataset."""
+
+ def __init__(self, cookies: CookieJar, git_token: str,
+ user_info: Tuple[str, str]):
+ super().__init__(
+ cookies=cookies, git_token=git_token, user_info=user_info)
+
+
class MaxComputeAuthConfig(BaseAuthConfig):
# TODO: MaxCompute dataset to be supported.
def __init__(self, cookies: CookieJar, git_token: str,
diff --git a/modelscope/msdatasets/context/dataset_context_config.py b/modelscope/msdatasets/context/dataset_context_config.py
index 26b05f7d..4007d60c 100644
--- a/modelscope/msdatasets/context/dataset_context_config.py
+++ b/modelscope/msdatasets/context/dataset_context_config.py
@@ -42,6 +42,7 @@ class DatasetContextConfig:
self.data_files = data_files
self.cache_root_dir = cache_root_dir
self.use_streaming = use_streaming
+ self.download_virgo_files: bool = False
@property
def config_kwargs(self) -> dict:
diff --git a/modelscope/msdatasets/data_loader/data_loader.py b/modelscope/msdatasets/data_loader/data_loader.py
index 1ef92372..b1450c61 100644
--- a/modelscope/msdatasets/data_loader/data_loader.py
+++ b/modelscope/msdatasets/data_loader/data_loader.py
@@ -1,11 +1,12 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
from abc import ABC, abstractmethod
from typing import Optional, Union
from datasets import (Dataset, DatasetBuilder, DatasetDict, IterableDataset,
IterableDatasetDict)
-from datasets import load_dataset as hf_data_loader
+from datasets import load_dataset as hf_load_dataset
from modelscope.hub.api import ModelScopeConfig
from modelscope.msdatasets.auth.auth_config import OssAuthConfig
@@ -13,13 +14,18 @@ from modelscope.msdatasets.context.dataset_context_config import \
DatasetContextConfig
from modelscope.msdatasets.data_files.data_files_manager import \
DataFilesManager
-from modelscope.msdatasets.dataset_cls.dataset import ExternalDataset
+from modelscope.msdatasets.dataset_cls import ExternalDataset
from modelscope.msdatasets.meta.data_meta_manager import DataMetaManager
-from modelscope.utils.constant import DatasetFormations
+from modelscope.utils.constant import (DatasetFormations, DatasetPathName,
+ DownloadMode, VirgoDatasetConfig)
+from modelscope.utils.logger import get_logger
+from modelscope.utils.url_utils import valid_url
+
+logger = get_logger()
-class BaseDataLoader(ABC):
- """Base dataset loader to load data."""
+class BaseDownloader(ABC):
+ """Base dataset downloader to load data."""
def __init__(self, dataset_context_config: DatasetContextConfig):
self.dataset_context_config = dataset_context_config
@@ -28,35 +34,35 @@ class BaseDataLoader(ABC):
def process(self):
"""The entity processing pipeline for fetching the data. """
raise NotImplementedError(
- f'No default implementation provided for {BaseDataLoader.__name__}.process.'
+ f'No default implementation provided for {BaseDownloader.__name__}.process.'
)
@abstractmethod
def _authorize(self):
raise NotImplementedError(
- f'No default implementation provided for {BaseDataLoader.__name__}._authorize.'
+ f'No default implementation provided for {BaseDownloader.__name__}._authorize.'
)
@abstractmethod
def _build(self):
raise NotImplementedError(
- f'No default implementation provided for {BaseDataLoader.__name__}._build.'
+ f'No default implementation provided for {BaseDownloader.__name__}._build.'
)
@abstractmethod
def _prepare_and_download(self):
raise NotImplementedError(
- f'No default implementation provided for {BaseDataLoader.__name__}._prepare_and_download.'
+ f'No default implementation provided for {BaseDownloader.__name__}._prepare_and_download.'
)
@abstractmethod
def _post_process(self):
raise NotImplementedError(
- f'No default implementation provided for {BaseDataLoader.__name__}._post_process.'
+ f'No default implementation provided for {BaseDownloader.__name__}._post_process.'
)
-class OssDataLoader(BaseDataLoader):
+class OssDownloader(BaseDownloader):
def __init__(self, dataset_context_config: DatasetContextConfig):
super().__init__(dataset_context_config)
@@ -127,7 +133,7 @@ class OssDataLoader(BaseDataLoader):
raise f'meta-file: {dataset_name}.py not found on the modelscope hub.'
if dataset_py_script and dataset_formation == DatasetFormations.hf_compatible:
- self.dataset = hf_data_loader(
+ self.dataset = hf_load_dataset(
dataset_py_script,
name=subset_name,
revision=version,
@@ -147,8 +153,151 @@ class OssDataLoader(BaseDataLoader):
self.dataset.custom_map = self.dataset_context_config.data_meta_config.meta_type_map
-class MaxComputeDataLoader(BaseDataLoader):
- """Data loader for MaxCompute data source."""
+class VirgoDownloader(BaseDownloader):
+ """Data downloader for Virgo data source."""
+
+ def __init__(self, dataset_context_config: DatasetContextConfig):
+ super().__init__(dataset_context_config)
+ self.dataset = None
+
+ def process(self):
+ """
+ Sequential data fetching virgo dataset process: authorize -> build -> prepare_and_download -> post_process
+ """
+ self._authorize()
+ self._build()
+ self._prepare_and_download()
+ self._post_process()
+
+ def _authorize(self):
+ """Authorization of virgo dataset."""
+ from modelscope.msdatasets.auth.auth_config import VirgoAuthConfig
+
+ cookies = ModelScopeConfig.get_cookies()
+ user_info = ModelScopeConfig.get_user_info()
+
+ if not self.dataset_context_config.auth_config:
+ auth_config = VirgoAuthConfig(
+ cookies=cookies, git_token='', user_info=user_info)
+ else:
+ auth_config = self.dataset_context_config.auth_config
+ auth_config.cookies = cookies
+ auth_config.git_token = ''
+ auth_config.user_info = user_info
+
+ self.dataset_context_config.auth_config = auth_config
+
+ def _build(self):
+ """
+ Fetch virgo meta and build virgo dataset.
+ """
+ from modelscope.msdatasets.dataset_cls.dataset import VirgoDataset
+ import pandas as pd
+
+ meta_manager = DataMetaManager(self.dataset_context_config)
+ meta_manager.fetch_virgo_meta()
+ self.dataset_context_config = meta_manager.dataset_context_config
+ self.dataset = VirgoDataset(
+ **self.dataset_context_config.config_kwargs)
+
+ virgo_cache_dir = os.path.join(
+ self.dataset_context_config.cache_root_dir,
+ self.dataset_context_config.namespace,
+ self.dataset_context_config.dataset_name,
+ self.dataset_context_config.version)
+ os.makedirs(
+ os.path.join(virgo_cache_dir, DatasetPathName.META_NAME),
+ exist_ok=True)
+ meta_content_cache_file = os.path.join(virgo_cache_dir,
+ DatasetPathName.META_NAME,
+ 'meta_content.csv')
+
+ if isinstance(self.dataset.meta, pd.DataFrame):
+ meta_content_df = self.dataset.meta
+ meta_content_df.to_csv(meta_content_cache_file, index=False)
+ self.dataset.meta_content_cache_file = meta_content_cache_file
+ self.dataset.virgo_cache_dir = virgo_cache_dir
+ logger.info(
+ f'Virgo meta content saved to {meta_content_cache_file}')
+
+ def _prepare_and_download(self):
+ """
+ Fetch data-files from oss-urls in the virgo meta content.
+ """
+
+ download_virgo_files = self.dataset_context_config.config_kwargs.pop(
+ 'download_virgo_files', '')
+
+ if self.dataset.data_type == 0 and download_virgo_files:
+ import requests
+ import json
+ import shutil
+ from urllib.parse import urlparse
+ from functools import partial
+
+ def download_file(meta_info_val, data_dir):
+ file_url_list = []
+ file_path_list = []
+ try:
+ meta_info_val = json.loads(meta_info_val)
+ # get url first, if not exist, try to get inner_url
+ file_url = meta_info_val.get('url', '')
+ if file_url:
+ file_url_list.append(file_url)
+ else:
+ tmp_inner_member_list = meta_info_val.get(
+ 'inner_url', '')
+ for item in tmp_inner_member_list:
+ file_url = item.get('url', '')
+ if file_url:
+ file_url_list.append(file_url)
+
+ for one_file_url in file_url_list:
+ is_url = valid_url(one_file_url)
+ if is_url:
+ url_parse_res = urlparse(file_url)
+ file_name = os.path.basename(url_parse_res.path)
+ else:
+ raise ValueError(f'Unsupported url: {file_url}')
+ file_path = os.path.join(data_dir, file_name)
+ file_path_list.append((one_file_url, file_path))
+
+ except Exception as e:
+ logger.error(f'parse virgo meta info error: {e}')
+ file_path_list = []
+
+ for file_url_item, file_path_item in file_path_list:
+ if file_path_item and not os.path.exists(file_path_item):
+ logger.info(f'Downloading file to {file_path_item}')
+ os.makedirs(data_dir, exist_ok=True)
+ with open(file_path_item, 'wb') as f:
+ f.write(requests.get(file_url_item).content)
+
+ return file_path_list
+
+ self.dataset.download_virgo_files = True
+ download_mode = self.dataset_context_config.download_mode
+ data_files_dir = os.path.join(self.dataset.virgo_cache_dir,
+ DatasetPathName.DATA_FILES_NAME)
+
+ if download_mode == DownloadMode.FORCE_REDOWNLOAD:
+ shutil.rmtree(data_files_dir, ignore_errors=True)
+
+ from tqdm import tqdm
+ tqdm.pandas(desc='apply download_file')
+ self.dataset.meta[
+ VirgoDatasetConfig.
+ col_cache_file] = self.dataset.meta.progress_apply(
+ lambda row: partial(
+ download_file, data_dir=data_files_dir)(row.meta_info),
+ axis=1)
+
+ def _post_process(self):
+ ...
+
+
+class MaxComputeDownloader(BaseDownloader):
+ """Data downloader for MaxCompute data source."""
# TODO: MaxCompute data source to be supported .
def __init__(self, dataset_context_config: DatasetContextConfig):
diff --git a/modelscope/msdatasets/data_loader/data_loader_manager.py b/modelscope/msdatasets/data_loader/data_loader_manager.py
index 3c8a638a..5be32de1 100644
--- a/modelscope/msdatasets/data_loader/data_loader_manager.py
+++ b/modelscope/msdatasets/data_loader/data_loader_manager.py
@@ -9,7 +9,7 @@ from datasets import load_dataset as hf_data_loader
from modelscope.hub.api import HubApi
from modelscope.msdatasets.context.dataset_context_config import \
DatasetContextConfig
-from modelscope.msdatasets.data_loader.data_loader import OssDataLoader
+from modelscope.msdatasets.data_loader.data_loader import OssDownloader
from modelscope.utils.constant import EXTENSIONS_TO_LOAD
from modelscope.utils.logger import get_logger
@@ -127,7 +127,7 @@ class RemoteDataLoaderManager(DataLoaderManager):
return dataset_ret
# To use the modelscope data loader
elif data_loader_type == RemoteDataLoaderType.MS_DATA_LOADER:
- oss_data_loader = OssDataLoader(
+ oss_data_loader = OssDownloader(
dataset_context_config=self.dataset_context_config)
oss_data_loader.process()
# download statistics
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/__init__.py
index 9eb62168..a367fe79 100644
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/__init__.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/__init__.py
@@ -27,12 +27,6 @@ if TYPE_CHECKING:
from .video_frame_interpolation import VideoFrameInterpolationDataset
from .video_stabilization import VideoStabilizationDataset
from .video_super_resolution import VideoSuperResolutionDataset
- from .image_semantic_segmentation import SegDataset
- from .face_2d_keypoins import FaceKeypointDataset
- from .hand_2d_keypoints import HandCocoWholeBodyDataset
- from .human_wholebody_keypoint import WholeBodyCocoTopDownDataset
- from .image_classification import ClsDataset
- from .object_detection import DetDataset, DetImagesMixDataset
from .ocr_detection import DataLoader, ImageDataset, QuadMeasurer
from .ocr_recognition_dataset import OCRRecognitionDataset
from .image_colorization import ImageColorizationDataset
@@ -66,12 +60,6 @@ else:
'video_frame_interpolation': ['VideoFrameInterpolationDataset'],
'video_stabilization': ['VideoStabilizationDataset'],
'video_super_resolution': ['VideoSuperResolutionDataset'],
- 'image_semantic_segmentation': ['SegDataset'],
- 'face_2d_keypoins': ['FaceKeypointDataset'],
- 'hand_2d_keypoints': ['HandCocoWholeBodyDataset'],
- 'human_wholebody_keypoint': ['WholeBodyCocoTopDownDataset'],
- 'image_classification': ['ClsDataset'],
- 'object_detection': ['DetDataset', 'DetImagesMixDataset'],
'ocr_detection': ['DataLoader', 'ImageDataset', 'QuadMeasurer'],
'ocr_recognition_dataset': ['OCRRecognitionDataset'],
'image_colorization': ['ImageColorizationDataset'],
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/__init__.py
deleted file mode 100644
index e9d76b7e..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
- from .face_2d_keypoints_dataset import FaceKeypointDataset
-
-else:
- _import_structure = {'face_2d_keypoints_dataset': ['FaceKeypointDataset']}
-
- import sys
-
- sys.modules[__name__] = LazyImportModule(
- __name__,
- globals()['__file__'],
- _import_structure,
- module_spec=__spec__,
- extra_objects={},
- )
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/face_2d_keypoints_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/face_2d_keypoints_dataset.py
deleted file mode 100644
index 9f55901f..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/face_2d_keypoints_dataset.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.datasets.face import FaceKeypointDataset as _FaceKeypointDataset
-
-from modelscope.metainfo import CustomDatasets
-from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS
-from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \
- EasyCVBaseDataset
-from modelscope.utils.constant import Tasks
-
-
-@CUSTOM_DATASETS.register_module(
- group_key=Tasks.face_2d_keypoints,
- module_name=CustomDatasets.Face2dKeypointsDataset)
-class FaceKeypointDataset(EasyCVBaseDataset, _FaceKeypointDataset):
- """EasyCV dataset for face 2d keypoints.
-
- Args:
- split_config (dict): Dataset root path from MSDataset, e.g.
- {"train":"local cache path"} or {"evaluation":"local cache path"}.
- preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
- the model if supplied. Not support yet.
- mode: Training or Evaluation.
- """
-
- def __init__(self,
- split_config=None,
- preprocessor=None,
- mode=None,
- *args,
- **kwargs) -> None:
- EasyCVBaseDataset.__init__(
- self,
- split_config=split_config,
- preprocessor=preprocessor,
- mode=mode,
- args=args,
- kwargs=kwargs)
- _FaceKeypointDataset.__init__(self, *args, **kwargs)
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/__init__.py
deleted file mode 100644
index 3af670e3..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
- from .hand_2d_keypoints_dataset import HandCocoWholeBodyDataset
-
-else:
- _import_structure = {
- 'hand_2d_keypoints_dataset': ['HandCocoWholeBodyDataset']
- }
-
- import sys
-
- sys.modules[__name__] = LazyImportModule(
- __name__,
- globals()['__file__'],
- _import_structure,
- module_spec=__spec__,
- extra_objects={},
- )
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/hand_2d_keypoints_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/hand_2d_keypoints_dataset.py
deleted file mode 100644
index c6163715..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/hand_2d_keypoints_dataset.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.datasets.pose import \
- HandCocoWholeBodyDataset as _HandCocoWholeBodyDataset
-
-from modelscope.metainfo import CustomDatasets
-from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS
-from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \
- EasyCVBaseDataset
-from modelscope.utils.constant import Tasks
-
-
-@CUSTOM_DATASETS.register_module(
- group_key=Tasks.hand_2d_keypoints,
- module_name=CustomDatasets.HandCocoWholeBodyDataset)
-class HandCocoWholeBodyDataset(EasyCVBaseDataset, _HandCocoWholeBodyDataset):
- """EasyCV dataset for human hand 2d keypoints.
-
- Args:
- split_config (dict): Dataset root path from MSDataset, e.g.
- {"train":"local cache path"} or {"evaluation":"local cache path"}.
- preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
- the model if supplied. Not support yet.
- mode: Training or Evaluation.
- """
-
- def __init__(self,
- split_config=None,
- preprocessor=None,
- mode=None,
- *args,
- **kwargs) -> None:
- EasyCVBaseDataset.__init__(
- self,
- split_config=split_config,
- preprocessor=preprocessor,
- mode=mode,
- args=args,
- kwargs=kwargs)
- _HandCocoWholeBodyDataset.__init__(self, *args, **kwargs)
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/__init__.py
deleted file mode 100644
index 472ed2d8..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
- from .human_wholebody_keypoint_dataset import WholeBodyCocoTopDownDataset
-
-else:
- _import_structure = {
- 'human_wholebody_keypoint_dataset': ['WholeBodyCocoTopDownDataset']
- }
-
- import sys
-
- sys.modules[__name__] = LazyImportModule(
- __name__,
- globals()['__file__'],
- _import_structure,
- module_spec=__spec__,
- extra_objects={},
- )
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py
deleted file mode 100644
index 59c97af8..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.datasets.pose import \
- WholeBodyCocoTopDownDataset as _WholeBodyCocoTopDownDataset
-
-from modelscope.metainfo import CustomDatasets
-from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS
-from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \
- EasyCVBaseDataset
-from modelscope.utils.constant import Tasks
-
-
-@CUSTOM_DATASETS.register_module(
- group_key=Tasks.human_wholebody_keypoint,
- module_name=CustomDatasets.HumanWholeBodyKeypointDataset)
-class WholeBodyCocoTopDownDataset(EasyCVBaseDataset,
- _WholeBodyCocoTopDownDataset):
- """EasyCV dataset for human whole body 2d keypoints.
-
- Args:
- split_config (dict): Dataset root path from MSDataset, e.g.
- {"train":"local cache path"} or {"evaluation":"local cache path"}.
- preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
- the model if supplied. Not support yet.
- mode: Training or Evaluation.
- """
-
- def __init__(self,
- split_config=None,
- preprocessor=None,
- mode=None,
- *args,
- **kwargs) -> None:
- EasyCVBaseDataset.__init__(
- self,
- split_config=split_config,
- preprocessor=preprocessor,
- mode=mode,
- args=args,
- kwargs=kwargs)
- _WholeBodyCocoTopDownDataset.__init__(self, *args, **kwargs)
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/__init__.py
deleted file mode 100644
index 95e8d7a1..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
- from .classification_dataset import ClsDataset
-
-else:
- _import_structure = {'classification_dataset': ['ClsDataset']}
-
- import sys
-
- sys.modules[__name__] = LazyImportModule(
- __name__,
- globals()['__file__'],
- _import_structure,
- module_spec=__spec__,
- extra_objects={},
- )
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/classification_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/classification_dataset.py
deleted file mode 100644
index 386810c7..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/classification_dataset.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.datasets.classification import ClsDataset as _ClsDataset
-
-from modelscope.metainfo import CustomDatasets
-from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS
-from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \
- EasyCVBaseDataset
-from modelscope.utils.constant import Tasks
-
-
-@CUSTOM_DATASETS.register_module(
- group_key=Tasks.image_classification,
- module_name=CustomDatasets.ClsDataset)
-class ClsDataset(_ClsDataset):
- """EasyCV dataset for classification.
-
- Args:
- split_config (dict): Dataset root path from MSDataset, e.g.
- {"train":"local cache path"} or {"evaluation":"local cache path"}.
- preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
- the model if supplied. Not support yet.
- mode: Training or Evaluation.
- """
-
- def __init__(self,
- split_config=None,
- preprocessor=None,
- mode=None,
- *args,
- **kwargs) -> None:
- EasyCVBaseDataset.__init__(
- self,
- split_config=split_config,
- preprocessor=preprocessor,
- mode=mode,
- args=args,
- kwargs=kwargs)
- _ClsDataset.__init__(self, *args, **kwargs)
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/__init__.py
deleted file mode 100644
index 26121bdb..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
- from .segmentation_dataset import SegDataset
-
-else:
- _import_structure = {'easycv_segmentation': ['SegDataset']}
-
- import sys
-
- sys.modules[__name__] = LazyImportModule(
- __name__,
- globals()['__file__'],
- _import_structure,
- module_spec=__spec__,
- extra_objects={},
- )
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/segmentation_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/segmentation_dataset.py
deleted file mode 100644
index 71e7c42b..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/segmentation_dataset.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from easycv.datasets.segmentation import SegDataset as _SegDataset
-
-from modelscope.metainfo import CustomDatasets
-from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS
-from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \
- EasyCVBaseDataset
-from modelscope.utils.constant import Tasks
-
-
-@CUSTOM_DATASETS.register_module(
- group_key=Tasks.image_segmentation, module_name=CustomDatasets.SegDataset)
-class SegDataset(EasyCVBaseDataset, _SegDataset):
- """EasyCV dataset for Sementic segmentation.
- For more details, please refer to :
- https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/segmentation/raw.py .
-
- Args:
- split_config (dict): Dataset root path from MSDataset, e.g.
- {"train":"local cache path"} or {"evaluation":"local cache path"}.
- preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
- the model if supplied. Not support yet.
- mode: Training or Evaluation.
- data_source: Data source config to parse input data.
- pipeline: Sequence of transform object or config dict to be composed.
- ignore_index (int): Label index to be ignored.
- profiling: If set True, will print transform time.
- """
-
- def __init__(self,
- split_config=None,
- preprocessor=None,
- mode=None,
- *args,
- **kwargs) -> None:
- EasyCVBaseDataset.__init__(
- self,
- split_config=split_config,
- preprocessor=preprocessor,
- mode=mode,
- args=args,
- kwargs=kwargs)
- _SegDataset.__init__(self, *args, **kwargs)
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/__init__.py
deleted file mode 100644
index 403163e9..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
- from .detection_dataset import DetDataset, DetImagesMixDataset
-
-else:
- _import_structure = {
- 'detection_dataset': ['DetDataset', 'DetImagesMixDataset']
- }
-
- import sys
-
- sys.modules[__name__] = LazyImportModule(
- __name__,
- globals()['__file__'],
- _import_structure,
- module_spec=__spec__,
- extra_objects={},
- )
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/detection_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/detection_dataset.py
deleted file mode 100644
index 66c11f64..00000000
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/detection_dataset.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-
-from easycv.datasets.detection import DetDataset as _DetDataset
-from easycv.datasets.detection import \
- DetImagesMixDataset as _DetImagesMixDataset
-
-from modelscope.metainfo import CustomDatasets
-from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS
-from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \
- EasyCVBaseDataset
-from modelscope.utils.constant import Tasks
-
-
-@CUSTOM_DATASETS.register_module(
- group_key=Tasks.image_object_detection,
- module_name=CustomDatasets.DetDataset)
-@CUSTOM_DATASETS.register_module(
- group_key=Tasks.image_segmentation, module_name=CustomDatasets.DetDataset)
-class DetDataset(EasyCVBaseDataset, _DetDataset):
- """EasyCV dataset for object detection.
- For more details, please refer to https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/detection/raw.py .
-
- Args:
- split_config (dict): Dataset root path from MSDataset, e.g.
- {"train":"local cache path"} or {"evaluation":"local cache path"}.
- preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
- the model if supplied. Not support yet.
- mode: Training or Evaluation.
- data_source: Data source config to parse input data.
- pipeline: Transform config list
- profiling: If set True, will print pipeline time
- classes: A list of class names, used in evaluation for result and groundtruth visualization
- """
-
- def __init__(self,
- split_config=None,
- preprocessor=None,
- mode=None,
- *args,
- **kwargs) -> None:
- EasyCVBaseDataset.__init__(
- self,
- split_config=split_config,
- preprocessor=preprocessor,
- mode=mode,
- args=args,
- kwargs=kwargs)
- _DetDataset.__init__(self, *args, **kwargs)
-
-
-@CUSTOM_DATASETS.register_module(
- group_key=Tasks.image_object_detection,
- module_name=CustomDatasets.DetImagesMixDataset)
-@CUSTOM_DATASETS.register_module(
- group_key=Tasks.domain_specific_object_detection,
- module_name=CustomDatasets.DetImagesMixDataset)
-class DetImagesMixDataset(EasyCVBaseDataset, _DetImagesMixDataset):
- """EasyCV dataset for object detection, a wrapper of multiple images mixed dataset.
- Suitable for training on multiple images mixed data augmentation like
- mosaic and mixup. For the augmentation pipeline of mixed image data,
- the `get_indexes` method needs to be provided to obtain the image
- indexes, and you can set `skip_flags` to change the pipeline running
- process. At the same time, we provide the `dynamic_scale` parameter
- to dynamically change the output image size.
- output boxes format: cx, cy, w, h
-
- For more details, please refer to https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/detection/mix.py .
-
- Args:
- split_config (dict): Dataset root path from MSDataset, e.g.
- {"train":"local cache path"} or {"evaluation":"local cache path"}.
- preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
- the model if supplied. Not support yet.
- mode: Training or Evaluation.
- data_source (:obj:`DetSourceCoco`): Data source config to parse input data.
- pipeline (Sequence[dict]): Sequence of transform object or
- config dict to be composed.
- dynamic_scale (tuple[int], optional): The image scale can be changed
- dynamically. Default to None.
- skip_type_keys (list[str], optional): Sequence of type string to
- be skip pipeline. Default to None.
- label_padding: out labeling padding [N, 120, 5]
- """
-
- def __init__(self,
- split_config=None,
- preprocessor=None,
- mode=None,
- *args,
- **kwargs) -> None:
- EasyCVBaseDataset.__init__(
- self,
- split_config=split_config,
- preprocessor=preprocessor,
- mode=mode,
- args=args,
- kwargs=kwargs)
- _DetImagesMixDataset.__init__(self, *args, **kwargs)
diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py
index bc9cd3ca..bfbb6eb3 100644
--- a/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py
+++ b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py
@@ -34,10 +34,12 @@ def Q2B(uchar):
Tasks.ocr_recognition, module_name=Models.ocr_recognition)
class OCRRecognitionDataset(TorchCustomDataset):
- def __init__(self, **kwargs):
+ def __init__(self, local_lmdb=None, preprocessor=None, **kwargs):
split_config = kwargs['split_config']
cache_root = next(iter(split_config.values()))
lmdb_path = os.path.join(cache_root, DATASET_STRUCTURE['lmdb'])
+ if local_lmdb is not None:
+ lmdb_path = local_lmdb
self.env = lmdb.open(
lmdb_path,
max_readers=1,
@@ -51,7 +53,7 @@ class OCRRecognitionDataset(TorchCustomDataset):
self.nSamples = 0
with self.env.begin(write=False) as txn:
self.nSamples = int(txn.get('num-samples'.encode()))
- self.reco_preprocess = kwargs['preprocessor']
+ self.reco_preprocess = preprocessor
def __len__(self):
return self.nSamples
diff --git a/modelscope/msdatasets/dataset_cls/dataset.py b/modelscope/msdatasets/dataset_cls/dataset.py
index 4acf51b1..9114285e 100644
--- a/modelscope/msdatasets/dataset_cls/dataset.py
+++ b/modelscope/msdatasets/dataset_cls/dataset.py
@@ -4,11 +4,15 @@ import copy
import os
import datasets
+import pandas as pd
from datasets import IterableDataset
-from PIL import Image
-from modelscope.utils.constant import EXTENSIONS_TO_LOAD
+from modelscope.msdatasets.utils.maxcompute_utils import MaxComputeUtil
+from modelscope.utils.constant import (DEFAULT_MAXCOMPUTE_ENDPOINT,
+ EXTENSIONS_TO_LOAD, MaxComputeEnvs,
+ VirgoDatasetConfig)
from modelscope.utils.logger import get_logger
+from modelscope.utils.url_utils import fetch_csv_with_url, valid_url
logger = get_logger()
@@ -97,6 +101,7 @@ class NativeIterableDataset(IterableDataset):
ex_cache_path = dl_manager.download_and_extract(v)
ret[k] = ex_cache_path
if k.endswith('Image:FILE'):
+ from PIL import Image
ret[k + ':Object'] = Image.open(fp=ex_cache_path)
if k.endswith('Audio:FILE'):
import torchaudio
@@ -108,3 +113,154 @@ class NativeIterableDataset(IterableDataset):
def __len__(self):
return 1
+
+
+class VirgoDataset(object):
+ """Dataset class for Virgo.
+
+ Attributes:
+ _meta_content (str): Virgo meta data content, could be a url that contains csv file.
+ _data_type (int): Virgo dataset type, 0-Standard virgo dataset; Others-User define dataset (to be supported)
+
+ Examples:
+ >>> from modelscope.msdatasets.dataset_cls.dataset import VirgoDataset
+ >>> input_kwargs = {'metaContent': 'http://xxx-xxx/xxx.csv', 'samplingType': 0}
+ >>> virgo_dataset = VirgoDataset(**input_kwargs)
+ >>> print(virgo_dataset[1])
+ >>> print(len(virgo_dataset))
+ >>> for line in virgo_dataset:
+ >>> print(line)
+
+ Note: If you set `download_virgo_files` to True by using
+ MsDataset.load(dataset_name='your-virgo-dataset-id', hub=Hubs.virgo, download_virgo_files=True),
+ you can get the cache file path of the virgo dataset, the column name is `cache_file`.
+ >>> if virgo_dataset.download_virgo_files:
+ >>> print(virgo_dataset[1].get('cache_file'))
+ """
+
+ def __init__(self, **kwargs):
+
+ self._meta_content: str = ''
+ self.data_type: int = 0
+ self.odps_table_name: str = ''
+ self.odps_table_partition: str = None
+ self._odps_utils: MaxComputeUtil = None
+ self.config_kwargs = kwargs
+
+ self._meta: pd.DataFrame = pd.DataFrame()
+
+ self._meta_content = self.config_kwargs.pop(
+ VirgoDatasetConfig.meta_content, '')
+ self.data_type = self.config_kwargs.pop(
+ VirgoDatasetConfig.sampling_type, 0)
+
+ self._check_variables()
+ self._parse_meta()
+
+ self.meta_content_cache_file = ''
+ self.virgo_cache_dir = ''
+ self.download_virgo_files: bool = False
+
+ self.odps_table_ins = None
+ self.odps_reader_ins = None
+ self.odps_batch_size = self.config_kwargs.pop('odps_batch_size', 100)
+ self.odps_limit = self.config_kwargs.pop('odps_limit', None)
+ self.odps_drop_last = self.config_kwargs.pop('odps_drop_last', False)
+ if self._odps_utils:
+ self.odps_table_ins, self.odps_reader_ins = self._odps_utils.get_table_reader_ins(
+ self.odps_table_name, self.odps_table_partition)
+
+ def __getitem__(self, index):
+ if self.odps_reader_ins:
+ return MaxComputeUtil.gen_reader_item(
+ reader=self.odps_reader_ins,
+ index=index,
+ batch_size_in=self.odps_batch_size,
+ limit_in=self.odps_limit,
+ drop_last_in=self.odps_drop_last,
+ partitions=self.odps_table_ins.table_schema.partitions,
+ columns=self.odps_table_ins.table_schema.names)
+ return self._meta.iloc[index].to_dict()
+
+ def __len__(self):
+ if isinstance(self._meta, dict):
+ return self._meta.get('odpsCount', 0)
+ return len(self._meta)
+
+ def __iter__(self):
+ if self.odps_reader_ins:
+ odps_batch_data = MaxComputeUtil.gen_reader_batch(
+ reader=self.odps_reader_ins,
+ batch_size_in=self.odps_batch_size,
+ limit_in=self.odps_limit,
+ drop_last_in=self.odps_drop_last,
+ partitions=self.odps_table_ins.table_schema.partitions,
+ columns=self.odps_table_ins.table_schema.names)
+ for batch in odps_batch_data:
+ yield batch
+ else:
+ for _, row in self._meta.iterrows():
+ yield row.to_dict()
+
+ @property
+ def meta(self) -> pd.DataFrame:
+ """
+ Virgo meta data. Contains columns: id, meta_info, analysis_result, external_info and
+ cache_file (if download_virgo_files is True).
+ """
+ return self._meta
+
+ def _parse_meta(self):
+ # Fetch csv content
+ if isinstance(self._meta_content, str) and valid_url(
+ self._meta_content):
+ meta_content_df = fetch_csv_with_url(self._meta_content)
+ self._meta = meta_content_df
+ elif isinstance(self._meta_content, dict):
+ self._meta = self._meta_content
+ self.odps_table_name = self._meta.get('odpsTableName', '')
+ self.odps_table_partition = self._meta.get('odpsTablePartition',
+ None)
+ self._odps_utils = self._get_odps_info()
+ else:
+ raise 'The meta content must be url or dict.'
+
+ @staticmethod
+ def _get_odps_info() -> MaxComputeUtil:
+ """
+ Get MaxComputeUtil instance.
+
+ Args:
+ None
+
+ Returns:
+ MaxComputeUtil instance.
+ """
+ access_id = os.environ.get(MaxComputeEnvs.ACCESS_ID, '')
+ access_key = os.environ.get(MaxComputeEnvs.ACCESS_SECRET_KEY, '')
+ proj_name = os.environ.get(MaxComputeEnvs.PROJECT_NAME, '')
+ endpoint = os.environ.get(MaxComputeEnvs.ENDPOINT,
+ DEFAULT_MAXCOMPUTE_ENDPOINT)
+
+ if not access_id or not access_key or not proj_name:
+ raise ValueError(
+ f'Please set MaxCompute envs for Virgo: {MaxComputeEnvs.ACCESS_ID}, '
+ f'{MaxComputeEnvs.ACCESS_SECRET_KEY}, {MaxComputeEnvs.PROJECT_NAME}, '
+ f'{MaxComputeEnvs.ENDPOINT}(default: http://service-corp.odps.aliyun-inc.com/api)'
+ )
+
+ return MaxComputeUtil(access_id, access_key, proj_name, endpoint)
+
+ def _check_variables(self):
+ """Check member variables in this class.
+ 1. Condition-1: self._meta_content cannot be empty
+ 2. Condition-2: self._meta_content must be url when self._data_type is 0
+ """
+ if not self._meta_content:
+ raise 'Them meta content cannot be empty.'
+ if self.data_type not in [0, 1]:
+ raise 'Supported samplingType should be 0 or 1, others are not supported yet.'
+ if self.data_type == 0 and not valid_url(self._meta_content):
+ raise 'The meta content must be url when data type is 0.'
+ if self.data_type == 1 and not isinstance(self._meta_content, dict):
+ raise 'The meta content must be dict when data type is 1.'
diff --git a/modelscope/msdatasets/download/dataset_builder.py b/modelscope/msdatasets/download/dataset_builder.py
index 73a3a1a1..8ad5243a 100644
--- a/modelscope/msdatasets/download/dataset_builder.py
+++ b/modelscope/msdatasets/download/dataset_builder.py
@@ -18,8 +18,8 @@ from datasets.utils.py_utils import map_nested
from modelscope.hub.api import HubApi
from modelscope.msdatasets.context.dataset_context_config import \
DatasetContextConfig
-from modelscope.msdatasets.dataset_cls.dataset import (ExternalDataset,
- NativeIterableDataset)
+from modelscope.msdatasets.dataset_cls import (ExternalDataset,
+ NativeIterableDataset)
from modelscope.msdatasets.download.download_manager import \
DataStreamingDownloadManager
from modelscope.msdatasets.utils.dataset_utils import \
diff --git a/modelscope/msdatasets/meta/data_meta_manager.py b/modelscope/msdatasets/meta/data_meta_manager.py
index d90b8d5e..0fa74c37 100644
--- a/modelscope/msdatasets/meta/data_meta_manager.py
+++ b/modelscope/msdatasets/meta/data_meta_manager.py
@@ -140,6 +140,14 @@ class DataMetaManager(object):
self.dataset_context_config.data_meta_config = data_meta_config
+ def fetch_virgo_meta(self) -> None:
+ virgo_dataset_id = self.dataset_context_config.dataset_name
+ version = int(self.dataset_context_config.version)
+
+ meta_content = self.api.get_virgo_meta(
+ dataset_id=virgo_dataset_id, version=version)
+ self.dataset_context_config.config_kwargs.update(meta_content)
+
def _fetch_meta_from_cache(self, meta_cache_dir):
local_paths = defaultdict(list)
dataset_type = None
diff --git a/modelscope/msdatasets/ms_dataset.py b/modelscope/msdatasets/ms_dataset.py
index 0a88eb91..912e061d 100644
--- a/modelscope/msdatasets/ms_dataset.py
+++ b/modelscope/msdatasets/ms_dataset.py
@@ -13,13 +13,14 @@ from datasets.utils.file_utils import is_relative_path
from modelscope.hub.repository import DatasetRepository
from modelscope.msdatasets.context.dataset_context_config import \
DatasetContextConfig
+from modelscope.msdatasets.data_loader.data_loader import VirgoDownloader
from modelscope.msdatasets.data_loader.data_loader_manager import (
LocalDataLoaderManager, LocalDataLoaderType, RemoteDataLoaderManager,
RemoteDataLoaderType)
+from modelscope.msdatasets.dataset_cls import (ExternalDataset,
+ NativeIterableDataset)
from modelscope.msdatasets.dataset_cls.custom_datasets.builder import \
build_custom_dataset
-from modelscope.msdatasets.dataset_cls.dataset import (ExternalDataset,
- NativeIterableDataset)
from modelscope.msdatasets.utils.delete_utils import DatasetDeleteManager
from modelscope.msdatasets.utils.upload_utils import DatasetUploadManager
from modelscope.preprocessors import build_preprocessor
@@ -28,7 +29,7 @@ from modelscope.utils.config_ds import MS_DATASETS_CACHE
from modelscope.utils.constant import (DEFAULT_DATASET_NAMESPACE,
DEFAULT_DATASET_REVISION, ConfigFields,
DownloadMode, Hubs, ModeKeys, Tasks,
- UploadMode)
+ UploadMode, VirgoDatasetConfig)
from modelscope.utils.import_utils import is_tf_available, is_torch_available
from modelscope.utils.logger import get_logger
@@ -188,9 +189,6 @@ class MsDataset:
data_files (str or Sequence or Mapping, optional): Path(s) to source data file(s).
split (str, optional): Which split of the data to load.
hub (Hubs or str, optional): When loading from a remote hub, where it is from. default Hubs.modelscope
- download_mode (DownloadMode or str, optional):
- How to treat existing datasets. default DownloadMode.REUSE_DATASET_IF_EXISTS
- config_kwargs (additional keyword arguments): Keyword arguments to be passed
download_mode (DownloadMode or str, optional): How to treat existing datasets. default
DownloadMode.REUSE_DATASET_IF_EXISTS
cache_dir (str, Optional): User-define local cache directory.
@@ -287,6 +285,23 @@ class MsDataset:
custom_cfg=custom_cfg, **config_kwargs)
dataset_inst.is_custom = True
return dataset_inst
+ elif hub == Hubs.virgo:
+ # Rewrite the namespace, version and cache_dir for virgo dataset.
+ if namespace == DEFAULT_DATASET_NAMESPACE:
+ dataset_context_config.namespace = VirgoDatasetConfig.default_virgo_namespace
+ if version == DEFAULT_DATASET_REVISION:
+ dataset_context_config.version = VirgoDatasetConfig.default_dataset_version
+ if cache_dir == MS_DATASETS_CACHE:
+ from modelscope.utils.config_ds import CACHE_HOME
+ cache_dir = os.path.join(CACHE_HOME, 'virgo', 'hub',
+ 'datasets')
+ dataset_context_config.cache_root_dir = cache_dir
+
+ virgo_downloader = VirgoDownloader(dataset_context_config)
+ virgo_downloader.process()
+
+ return virgo_downloader.dataset
+
else:
raise 'Please adjust input args to specify a loading mode, we support following scenes: ' \
'loading from local disk, huggingface hub and modelscope hub.'
diff --git a/modelscope/msdatasets/utils/maxcompute_utils.py b/modelscope/msdatasets/utils/maxcompute_utils.py
new file mode 100644
index 00000000..83c6370d
--- /dev/null
+++ b/modelscope/msdatasets/utils/maxcompute_utils.py
@@ -0,0 +1,160 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import math
+
+import pandas as pd
+
+
+class MaxComputeUtil:
+ """
+ MaxCompute util class.
+
+ Args:
+ access_id: your access id of MaxCompute
+ access_key: access key of MaxCompute
+ project_name: your project name of MaxCompute
+ endpoint: endpoint of MaxCompute
+
+ Attributes:
+ _odps: ODPS object
+
+ """
+
+ def __init__(self, access_id, access_key, project_name, endpoint):
+ from odps import ODPS
+ self._odps = ODPS(access_id, access_key, project_name, endpoint)
+
+ def _get_table(self, table_name):
+ """
+ Get MaxCompute table object.
+ """
+ return self._odps.get_table(table_name)
+
+ def _read_data(self, table_name: str, pt_condition: str) -> pd.DataFrame:
+ """
+ Read data from MaxCompute table.
+ :param table_name: table name
+ :param pt_condition: partition condition,
+ Example: pt_condition = 'dt=20230331'
+ :return: pandas dataframe with all data
+ """
+ t = self._get_table(table_name)
+
+ with t.open_reader(partition=pt_condition, limit=False) as reader:
+ pd_df = reader.to_pandas()
+
+ return pd_df
+
+ def fetch_data_to_csv(self, table_name: str, pt_condition: str,
+ output_path: str) -> None:
+ """
+ Fetch data from MaxCompute table to local file.
+ :param table_name: table name
+ :param pt_condition: partition condition,
+ Example: pt_condition = 'dt=20230331'
+ :param output_path: output path
+ :return: None
+ """
+ pd_df = self._read_data(table_name, pt_condition)
+ pd_df.to_csv(output_path, index=False)
+ print(f'Fetch data to {output_path} successfully.')
+
+ @staticmethod
+ def _check_batch_args(reader, batch_size, limit):
+ if not limit:
+ limit = reader.count
+ if batch_size <= 0:
+ raise ValueError(
+ f'batch_size must be positive, but got {batch_size}')
+ if batch_size > limit:
+ batch_size = limit
+ return batch_size, limit
+
+ @staticmethod
+ def gen_reader_batch(reader, batch_size_in: int, limit_in: int,
+ drop_last_in: bool, partitions: list, columns: list):
+ """
+ Generate batch data from MaxCompute table.
+
+ Args:
+ reader: MaxCompute table reader
+ batch_size_in: batch size
+ limit_in: limit of data, None means fetch all data
+ drop_last_in: whether drop last incomplete batch data
+ partitions: table partitions
+ columns: table columns
+
+ Returns:
+ batch data generator
+ """
+
+ batch_size_in, limit_in = MaxComputeUtil._check_batch_args(
+ reader, batch_size_in, limit_in)
+
+ batch_num = math.floor(limit_in / batch_size_in)
+ for i in range(batch_num + 1):
+ if i == batch_num and not drop_last_in and limit_in % batch_size_in > 0:
+ batch_records = reader[i * batch_size_in:(
+ i * batch_size_in + (limit_in % batch_size_in))]
+ else:
+ batch_records = reader[i * batch_size_in:(i + 1)
+ * batch_size_in]
+ batch_data_list = []
+ for record in batch_records:
+ tmp_vals = [val for _, val in list(record)]
+ tmp_vals = tmp_vals[:(len(tmp_vals) - len(partitions))]
+ batch_data_list.append(tmp_vals)
+ yield pd.DataFrame(batch_data_list, columns=columns)
+
+ @staticmethod
+ def gen_reader_item(reader, index: int, batch_size_in: int, limit_in: int,
+ drop_last_in: bool, partitions: list, columns: list):
+ """
+ Get single batch data from MaxCompute table by indexing.
+
+ Args:
+ reader: MaxCompute table reader
+ index: index of batch data
+ batch_size_in: batch size
+ limit_in: limit of data, None means fetch all data
+ drop_last_in: whether drop last incomplete batch data
+ partitions: table partitions
+ columns: table columns
+
+ Returns:
+ single batch data (dataframe)
+ """
+ batch_size_in, limit_in = MaxComputeUtil._check_batch_args(
+ reader, batch_size_in, limit_in)
+
+ if drop_last_in:
+ batch_num = math.floor(limit_in / batch_size_in)
+ else:
+ batch_num = math.ceil(limit_in / batch_size_in)
+
+ if index < 0:
+ raise ValueError(f'index must be non-negative, but got {index}')
+ if index >= batch_num:
+ raise ValueError(
+ f'index must be less than batch_num, but got index={index}, batch_num={batch_num}'
+ )
+
+ start = index * batch_size_in
+ end = (index + 1) * batch_size_in
+ if end > limit_in:
+ end = limit_in
+ batch_item = reader[start:end]
+
+ batch_data_list = []
+ for record in batch_item:
+ tmp_vals = [val for _, val in list(record)]
+ tmp_vals = tmp_vals[:(len(tmp_vals) - len(partitions))]
+ batch_data_list.append(tmp_vals)
+
+ return pd.DataFrame(batch_data_list, columns=columns)
+
+ def get_table_reader_ins(self, table_name: str, pt_condition: str = None):
+
+ table_ins = self._get_table(table_name)
+ with table_ins.open_reader(partition=pt_condition) as reader:
+ return table_ins, reader
diff --git a/modelscope/outputs/nlp_outputs.py b/modelscope/outputs/nlp_outputs.py
index e288df70..d6b934c2 100644
--- a/modelscope/outputs/nlp_outputs.py
+++ b/modelscope/outputs/nlp_outputs.py
@@ -454,3 +454,13 @@ class SentencEmbeddingModelOutput(ModelOutputBase):
query_embeddings: Tensor = None
doc_embeddings: Tensor = None
loss: Tensor = None
+
+
+@dataclass
+class TranslationEvaluationOutput(ModelOutputBase):
+ """The output class for translation evaluation models.
+ """
+
+ score: Tensor = None
+ loss: Tensor = None
+ input_format: List[str] = None
diff --git a/modelscope/outputs/outputs.py b/modelscope/outputs/outputs.py
index ddbe4593..ab24a34c 100644
--- a/modelscope/outputs/outputs.py
+++ b/modelscope/outputs/outputs.py
@@ -1,6 +1,10 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from collections import OrderedDict, namedtuple
from dataclasses import dataclass, fields
+from typing import Dict, List, Tuple
+
+import numpy as np
+import torch
from modelscope.utils.constant import Tasks
@@ -50,7 +54,7 @@ class OutputKeys(object):
SQL_STRING = 'sql_string'
SQL_QUERY = 'sql_query'
HISTORY = 'history'
- QUERT_RESULT = 'query_result'
+ QUERY_RESULT = 'query_result'
TIMESTAMPS = 'timestamps'
SHOT_NUM = 'shot_num'
SCENE_NUM = 'scene_num'
@@ -62,8 +66,343 @@ class OutputKeys(object):
TBOUNDS = 'tbounds'
-TASK_OUTPUTS = {
+OutputTypes = {
+ OutputKeys.LOSS: float, # checked
+ OutputKeys.LOGITS: np.ndarray, # checked.
+ OutputKeys.SCORES: List[float], # checked
+ OutputKeys.SCORE: float, # checked
+ OutputKeys.LABEL: str, # checked
+ OutputKeys.LABELS: List[str], # checked
+ OutputKeys.INPUT_IDS: np.ndarray, # checked
+ OutputKeys.LABEL_POS: np.ndarray, # checked
+ OutputKeys.POSES:
+ List[np.ndarray], # [Tuple(np.ndarray, np.ndarray)] # checked doubtful
+ OutputKeys.CAPTION: str,
+ OutputKeys.BOXES: np.ndarray, # checked
+ OutputKeys.KEYPOINTS: np.ndarray, # checked
+ OutputKeys.MASKS: np.ndarray, # checked
+ OutputKeys.DEPTHS: List[np.ndarray], # checked
+ OutputKeys.DEPTHS_COLOR: List[np.ndarray], # checked
+ OutputKeys.LAYOUT: np.ndarray, # checked
+ OutputKeys.TEXT: str, # checked
+ OutputKeys.POLYGONS: np.array, # checked
+ OutputKeys.OUTPUT: Dict,
+ OutputKeys.OUTPUT_IMG: 'image', # checked
+ OutputKeys.OUTPUT_IMGS: List[np.ndarray], # checked
+ OutputKeys.OUTPUT_VIDEO: 'bytes',
+ OutputKeys.OUTPUT_PCM: np.ndarray,
+ OutputKeys.OUTPUT_PCM_LIST: List[np.ndarray],
+ OutputKeys.OUTPUT_WAV: np.ndarray,
+ OutputKeys.OUTPUT_OBJ: Dict,
+ OutputKeys.OUTPUT_MESH: np.ndarray,
+ OutputKeys.IMG_EMBEDDING: np.ndarray,
+ OutputKeys.SPK_EMBEDDING: np.ndarray,
+ OutputKeys.SPO_LIST: List[float],
+ OutputKeys.TEXT_EMBEDDING: np.ndarray,
+ OutputKeys.TRANSLATION: str,
+ OutputKeys.RESPONSE: Dict,
+ OutputKeys.PREDICTION: np.ndarray, # checked
+ OutputKeys.PREDICTIONS: List[np.ndarray],
+ OutputKeys.PROBABILITIES: np.ndarray,
+ OutputKeys.DIALOG_STATES: object,
+ OutputKeys.VIDEO_EMBEDDING: np.ndarray,
+ OutputKeys.UUID: str,
+ OutputKeys.WORD: str,
+ OutputKeys.KWS_LIST: List[str],
+ OutputKeys.SQL_STRING: str, # checked
+ OutputKeys.SQL_QUERY: str, # checked
+ OutputKeys.HISTORY: Dict, # checked
+ OutputKeys.QUERY_RESULT: Dict, # checked
+ OutputKeys.TIMESTAMPS: str,
+ OutputKeys.SHOT_NUM: int,
+ OutputKeys.SCENE_NUM: int,
+ OutputKeys.SCENE_META_LIST: List[int],
+ OutputKeys.SHOT_META_LIST: List[int],
+ OutputKeys.MATCHES: List[np.ndarray],
+ OutputKeys.PCD12: np.ndarray,
+ OutputKeys.PCD12_ALIGN: np.ndarray,
+ OutputKeys.TBOUNDS: Dict,
+}
+OutputTypeSchema = {
+ OutputKeys.LOSS: {
+ 'type': 'number'
+ }, # checked
+ OutputKeys.LOGITS: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ }, # checked.
+ OutputKeys.SCORES: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ }, # checked
+ OutputKeys.SCORE: {
+ 'type': 'number'
+ }, # checked
+ OutputKeys.LABEL: {
+ 'type': 'string'
+ }, # checked
+ OutputKeys.LABELS: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'string'
+ }
+ }, # checked
+ OutputKeys.INPUT_IDS: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ }, # checked
+ OutputKeys.LABEL_POS: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ }, # checked
+ OutputKeys.POSES: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ }
+ }, # [Tuple(np.ndarray, np.ndarray)] # checked doubtful
+ OutputKeys.CAPTION: {
+ 'type': 'string'
+ },
+ OutputKeys.BOXES: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ }, # checked
+ OutputKeys.KEYPOINTS: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ }, # checked
+ OutputKeys.MASKS: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ }, # checked
+ OutputKeys.DEPTHS: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ }
+ }, # checked
+ OutputKeys.DEPTHS_COLOR: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ }
+ }, # checked
+ OutputKeys.LAYOUT: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ }, # checked
+ OutputKeys.TEXT: {
+ 'type': 'string'
+ }, # checked
+ OutputKeys.POLYGONS: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ }, # checked
+ OutputKeys.OUTPUT: {
+ 'type': 'object'
+ },
+ OutputKeys.OUTPUT_IMG: {
+ 'type': 'string',
+ 'description': 'The base64 encoded image.',
+ }, # checked
+ OutputKeys.OUTPUT_IMGS: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'string',
+ 'description': 'The base64 encoded image.',
+ }
+ }, # checked
+ OutputKeys.OUTPUT_VIDEO: {
+ 'type': 'string',
+ 'description': 'The base64 encoded video.',
+ },
+ OutputKeys.OUTPUT_PCM: {
+ 'type': 'string',
+ 'description': 'The base64 encoded PCM.',
+ },
+ OutputKeys.OUTPUT_PCM_LIST: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'string',
+ 'description': 'The base64 encoded PCM.',
+ }
+ },
+ OutputKeys.OUTPUT_WAV: {
+ 'type': 'string',
+ 'description': 'The base64 encoded WAV.',
+ },
+ OutputKeys.OUTPUT_OBJ: {
+ 'type': 'object'
+ },
+ OutputKeys.OUTPUT_MESH: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ },
+ OutputKeys.IMG_EMBEDDING: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ },
+ OutputKeys.SPK_EMBEDDING: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ },
+ OutputKeys.SPO_LIST: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ },
+ OutputKeys.TEXT_EMBEDDING: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ },
+ OutputKeys.TRANSLATION: {
+ 'type': 'string'
+ },
+ OutputKeys.RESPONSE: {
+ 'type': 'object'
+ },
+ OutputKeys.PREDICTION: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ }, # checked
+ OutputKeys.PREDICTIONS: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ }
+ },
+ OutputKeys.PROBABILITIES: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ },
+ OutputKeys.DIALOG_STATES: {
+ 'type': 'object'
+ },
+ OutputKeys.VIDEO_EMBEDDING: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ },
+ OutputKeys.UUID: {
+ 'type': 'string'
+ },
+ OutputKeys.WORD: {
+ 'type': 'string'
+ },
+ OutputKeys.KWS_LIST: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'string'
+ }
+ },
+ OutputKeys.SQL_STRING: {
+ 'type': 'string'
+ }, # checked
+ OutputKeys.SQL_QUERY: {
+ 'type': 'string'
+ }, # checked
+ OutputKeys.HISTORY: {
+ 'type': 'object'
+ }, # checked
+ OutputKeys.QUERY_RESULT: {
+ 'type': 'object'
+ }, # checked
+ OutputKeys.TIMESTAMPS: {
+ 'type': 'string'
+ },
+ OutputKeys.SHOT_NUM: {
+ 'type': 'integer'
+ },
+ OutputKeys.SCENE_NUM: {
+ 'type': 'integer'
+ },
+ OutputKeys.SCENE_META_LIST: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'integer'
+ }
+ },
+ OutputKeys.SHOT_META_LIST: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'integer'
+ }
+ },
+ OutputKeys.MATCHES: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ }
+ },
+ OutputKeys.PCD12: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ },
+ OutputKeys.PCD12_ALIGN: {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ },
+ OutputKeys.TBOUNDS: {
+ 'type': 'object'
+ },
+}
+
+TASK_OUTPUTS = {
+ Tasks.task_template:
+ [OutputKeys.BOXES, OutputKeys.OUTPUT_IMG, OutputKeys.TEXT_EMBEDDING],
# ============ vision tasks ===================
# ocr detection result for single sample
@@ -388,8 +727,9 @@ TASK_OUTPUTS = {
# "scores": [0.885272, 0.014790631, 0.014558001]
# "labels": ['噪声强度', '模糊程度', '压缩强度'],
# }
- Tasks.image_quality_assessment_degradation:
- [OutputKeys.SCORES, OutputKeys.LABELS],
+ Tasks.image_quality_assessment_degradation: [
+ OutputKeys.SCORES, OutputKeys.LABELS
+ ],
# live category recognition result for single video
# {
@@ -1029,6 +1369,10 @@ TASK_OUTPUTS = {
# {"text": "this is a text answser. "}
Tasks.video_question_answering: [OutputKeys.TEXT],
+ # Multimodal Dialogue result for a sample
+ # {"text": "this is a text response. "}
+ Tasks.multimodal_dialogue: [OutputKeys.TEXT],
+
# auto_speech_recognition result for a single sample
# {
# "text": "每天都要快乐喔"
@@ -1107,9 +1451,9 @@ TASK_OUTPUTS = {
# }
Tasks.image_skychange: [OutputKeys.OUTPUT_IMG],
# {
- # 'scores': [0.1, 0.2, 0.3, ...]
+ # 'score': [0.1, 0.2, 0.3, ...]
# }
- Tasks.translation_evaluation: [OutputKeys.SCORES],
+ Tasks.translation_evaluation: [OutputKeys.SCORE],
# video object segmentation result for a single video
# {
@@ -1140,6 +1484,7 @@ TASK_OUTPUTS = {
Tasks.document_grounded_dialog_rerank: [OutputKeys.OUTPUT],
Tasks.document_grounded_dialog_retrieval: [OutputKeys.OUTPUT],
Tasks.video_temporal_grounding: [OutputKeys.SCORES, OutputKeys.TBOUNDS],
+ Tasks.text_to_video_synthesis: [OutputKeys.OUTPUT_VIDEO],
}
diff --git a/modelscope/pipeline_inputs.py b/modelscope/pipeline_inputs.py
index 032bdff6..8cb031e7 100644
--- a/modelscope/pipeline_inputs.py
+++ b/modelscope/pipeline_inputs.py
@@ -20,7 +20,7 @@ class InputType(object):
BOX = 'box'
DICT = 'dict'
LIST = 'list'
- INT = 'int'
+ NUMBER = 'number'
INPUT_TYPE = {
@@ -31,7 +31,42 @@ INPUT_TYPE = {
InputType.BOX: (list, np.ndarray),
InputType.DICT: (dict, type(None)),
InputType.LIST: (list, type(None)),
- InputType.INT: int,
+ InputType.NUMBER: int,
+}
+
+INPUT_TYPE_SCHEMA = {
+ InputType.IMAGE: {
+ 'type': 'string',
+ 'description': 'Base64 encoded image file or url string.'
+ }, # support url or base64 encoded file.
+ InputType.AUDIO: {
+ 'type': 'string',
+ 'description': 'Base64 encoded audio file or url string..'
+ }, # support url or base64 encoded file.
+ InputType.VIDEO: {
+ 'type': 'string',
+ 'description': 'Base64 encoded video file or url string..'
+ }, # support url or base64 encoded file.
+ InputType.TEXT: {
+ 'type': 'string',
+ 'description': 'The input text.'
+ },
+ InputType.BOX: {
+ 'type': 'array',
+ 'description': 'Box coordinate, should be int.',
+ 'items': {
+ 'type': 'number'
+ }
+ },
+ InputType.DICT: { # unknown properties
+ 'type': 'object',
+ },
+ InputType.LIST: {
+ 'type': 'array'
+ }, # unknown item type.
+ InputType.NUMBER: {
+ 'type': 'integer'
+ },
}
@@ -47,12 +82,19 @@ def check_input_type(input_type, input):
TASK_INPUTS = {
+
+ Tasks.task_template: {
+ 'image': InputType.IMAGE,
+ 'text': InputType.TEXT
+ },
# if task input is single var, value is InputType
# if task input is a tuple, value is tuple of InputType
# if task input is a dict, value is a dict of InputType, where key
# equals the one needed in pipeline input dict
# if task input is a list, value is a set of input format, in which
- # each element corresponds to one input format as described above.
+ # each element corresponds to one input format as described above and
+ # must include a dict format.
+
# ============ vision tasks ===================
Tasks.ocr_detection:
InputType.IMAGE,
@@ -73,7 +115,7 @@ TASK_INPUTS = {
Tasks.human_detection:
InputType.IMAGE,
Tasks.face_image_generation:
- InputType.INT,
+ InputType.NUMBER,
Tasks.image_classification:
InputType.IMAGE,
Tasks.image_object_detection:
@@ -191,8 +233,7 @@ TASK_INPUTS = {
Tasks.nli: (InputType.TEXT, InputType.TEXT),
Tasks.sentiment_classification:
InputType.TEXT,
- Tasks.zero_shot_classification:
- InputType.TEXT,
+ Tasks.zero_shot_classification: InputType.TEXT,
Tasks.relation_extraction:
InputType.TEXT,
Tasks.translation:
@@ -212,7 +253,13 @@ TASK_INPUTS = {
'source_sentence': InputType.LIST,
'sentences_to_compare': InputType.LIST,
},
- Tasks.text_ranking: (InputType.TEXT, InputType.TEXT),
+ Tasks.text_ranking: [
+ (InputType.TEXT, InputType.TEXT),
+ {
+ 'source_sentence': InputType.LIST,
+ 'sentences_to_compare': InputType.LIST
+ }
+ ],
Tasks.text_generation:
InputType.TEXT,
Tasks.fid_dialogue: {
@@ -261,7 +308,7 @@ TASK_INPUTS = {
},
# ============ audio tasks ===================
- Tasks.auto_speech_recognition:
+ Tasks.auto_speech_recognition: # input can be audio, or audio and text.
[InputType.AUDIO, {
'wav': InputType.AUDIO,
'text': InputType.TEXT
@@ -290,6 +337,9 @@ TASK_INPUTS = {
Tasks.video_captioning: [InputType.VIDEO, {
'video': InputType.VIDEO,
}],
+ Tasks.multimodal_dialogue: {
+ 'messages': InputType.LIST,
+ },
Tasks.visual_grounding: {
'image': InputType.IMAGE,
'text': InputType.TEXT
@@ -332,5 +382,9 @@ TASK_INPUTS = {
'video_input_path': InputType.TEXT,
'video_output_path': InputType.TEXT,
'mask_path': InputType.TEXT,
- }
+ },
+ Tasks.text_to_video_synthesis: {
+ 'text': InputType.TEXT
+ },
+ Tasks.video_summarization: InputType.TEXT,
}
diff --git a/modelscope/pipelines/__init__.py b/modelscope/pipelines/__init__.py
index 71fe307b..d98a7af9 100644
--- a/modelscope/pipelines/__init__.py
+++ b/modelscope/pipelines/__init__.py
@@ -1,7 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-from modelscope.utils.import_utils import LazyImportModule
from . import audio, cv, multi_modal, nlp
from .base import Pipeline
from .builder import pipeline
diff --git a/modelscope/pipelines/audio/asr_inference_pipeline.py b/modelscope/pipelines/audio/asr_inference_pipeline.py
index b5a4cba7..b9c0bd03 100644
--- a/modelscope/pipelines/audio/asr_inference_pipeline.py
+++ b/modelscope/pipelines/audio/asr_inference_pipeline.py
@@ -54,6 +54,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
lm_model_revision: Optional[str] = None,
timestamp_model: Optional[Union[Model, str]] = None,
timestamp_model_revision: Optional[str] = None,
+ ngpu: int = 1,
**kwargs):
"""
Use `model` and `preprocessor` to create an asr pipeline for prediction
@@ -87,7 +88,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
beam_size('int'):
beam size for decoding
ctc_weight('float'):
- CTC weight in joint decoding
+ the CTC weight in joint decoding
lm_weight('float'):
lm weight
decoding_ind('int', defaults to 0):
@@ -119,48 +120,48 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
self.model_cfg = self.model.forward()
self.cmd = self.get_cmd(kwargs, model)
- if self.cmd['code_base'] == 'funasr':
- from funasr.bin import asr_inference_launch
- self.funasr_infer_modelscope = asr_inference_launch.inference_launch(
- mode=self.cmd['mode'],
- maxlenratio=self.cmd['maxlenratio'],
- minlenratio=self.cmd['minlenratio'],
- batch_size=self.cmd['batch_size'],
- beam_size=self.cmd['beam_size'],
- ngpu=self.cmd['ngpu'],
- ctc_weight=self.cmd['ctc_weight'],
- lm_weight=self.cmd['lm_weight'],
- penalty=self.cmd['penalty'],
- log_level=self.cmd['log_level'],
- asr_train_config=self.cmd['asr_train_config'],
- asr_model_file=self.cmd['asr_model_file'],
- cmvn_file=self.cmd['cmvn_file'],
- lm_file=self.cmd['lm_file'],
- token_type=self.cmd['token_type'],
- key_file=self.cmd['key_file'],
- lm_train_config=self.cmd['lm_train_config'],
- bpemodel=self.cmd['bpemodel'],
- allow_variable_data_keys=self.cmd['allow_variable_data_keys'],
- output_dir=self.cmd['output_dir'],
- dtype=self.cmd['dtype'],
- seed=self.cmd['seed'],
- ngram_weight=self.cmd['ngram_weight'],
- nbest=self.cmd['nbest'],
- num_workers=self.cmd['num_workers'],
- vad_infer_config=self.cmd['vad_infer_config'],
- vad_model_file=self.cmd['vad_model_file'],
- vad_cmvn_file=self.cmd['vad_cmvn_file'],
- punc_model_file=self.cmd['punc_model_file'],
- punc_infer_config=self.cmd['punc_infer_config'],
- timestamp_model_file=self.cmd['timestamp_model_file'],
- timestamp_infer_config=self.cmd['timestamp_infer_config'],
- timestamp_cmvn_file=self.cmd['timestamp_cmvn_file'],
- outputs_dict=self.cmd['outputs_dict'],
- param_dict=self.cmd['param_dict'],
- token_num_relax=self.cmd['token_num_relax'],
- decoding_ind=self.cmd['decoding_ind'],
- decoding_mode=self.cmd['decoding_mode'],
- )
+ from funasr.bin import asr_inference_launch
+ self.funasr_infer_modelscope = asr_inference_launch.inference_launch(
+ mode=self.cmd['mode'],
+ maxlenratio=self.cmd['maxlenratio'],
+ minlenratio=self.cmd['minlenratio'],
+ batch_size=self.cmd['batch_size'],
+ beam_size=self.cmd['beam_size'],
+ ngpu=self.cmd['ngpu'],
+ ctc_weight=self.cmd['ctc_weight'],
+ lm_weight=self.cmd['lm_weight'],
+ penalty=self.cmd['penalty'],
+ log_level=self.cmd['log_level'],
+ asr_train_config=self.cmd['asr_train_config'],
+ asr_model_file=self.cmd['asr_model_file'],
+ cmvn_file=self.cmd['cmvn_file'],
+ lm_file=self.cmd['lm_file'],
+ token_type=self.cmd['token_type'],
+ key_file=self.cmd['key_file'],
+ lm_train_config=self.cmd['lm_train_config'],
+ bpemodel=self.cmd['bpemodel'],
+ allow_variable_data_keys=self.cmd['allow_variable_data_keys'],
+ output_dir=self.cmd['output_dir'],
+ dtype=self.cmd['dtype'],
+ seed=self.cmd['seed'],
+ ngram_weight=self.cmd['ngram_weight'],
+ nbest=self.cmd['nbest'],
+ num_workers=self.cmd['num_workers'],
+ vad_infer_config=self.cmd['vad_infer_config'],
+ vad_model_file=self.cmd['vad_model_file'],
+ vad_cmvn_file=self.cmd['vad_cmvn_file'],
+ punc_model_file=self.cmd['punc_model_file'],
+ punc_infer_config=self.cmd['punc_infer_config'],
+ timestamp_model_file=self.cmd['timestamp_model_file'],
+ timestamp_infer_config=self.cmd['timestamp_infer_config'],
+ timestamp_cmvn_file=self.cmd['timestamp_cmvn_file'],
+ outputs_dict=self.cmd['outputs_dict'],
+ param_dict=self.cmd['param_dict'],
+ token_num_relax=self.cmd['token_num_relax'],
+ decoding_ind=self.cmd['decoding_ind'],
+ decoding_mode=self.cmd['decoding_mode'],
+ **kwargs,
+ )
def __call__(self,
audio_in: Union[str, bytes],
@@ -197,7 +198,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
"""
# code base
- code_base = self.cmd['code_base']
+ # code_base = self.cmd['code_base']
self.recog_type = recog_type
self.audio_format = audio_format
self.audio_fs = None
@@ -207,31 +208,21 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
self.cmd['output_dir'] = output_dir
self.cmd['param_dict'] = param_dict
- if code_base == 'funasr':
- if isinstance(audio_in, str):
- # for funasr code, generate wav.scp from url or local path
- self.audio_in, self.raw_inputs = generate_scp_from_url(
- audio_in)
- elif isinstance(audio_in, bytes):
- self.audio_in = audio_in
- self.raw_inputs = None
- else:
- import numpy
- import torch
- if isinstance(audio_in, torch.Tensor):
- self.audio_in = None
- self.raw_inputs = audio_in
- elif isinstance(audio_in, numpy.ndarray):
- self.audio_in = None
- self.raw_inputs = audio_in
- elif isinstance(audio_in, str):
- # load pcm data from url if audio_in is url str
- self.audio_in, checking_audio_fs = load_bytes_from_url(audio_in)
+ if isinstance(audio_in, str):
+ # for funasr code, generate wav.scp from url or local path
+ self.audio_in, self.raw_inputs = generate_scp_from_url(audio_in)
elif isinstance(audio_in, bytes):
- # load pcm data from wav data if audio_in is wave format
- self.audio_in, checking_audio_fs = extract_pcm_from_wav(audio_in)
- else:
self.audio_in = audio_in
+ self.raw_inputs = None
+ else:
+ import numpy
+ import torch
+ if isinstance(audio_in, torch.Tensor):
+ self.audio_in = None
+ self.raw_inputs = audio_in
+ elif isinstance(audio_in, numpy.ndarray):
+ self.audio_in = None
+ self.raw_inputs = audio_in
# set the sample_rate of audio_in if checking_audio_fs is valid
if checking_audio_fs is not None:
@@ -265,12 +256,6 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
if self.preprocessor is None:
self.preprocessor = WavToScp()
- # pipeline() from pipelines/builder.py passes 'device' but 'ngpu' needed here
- device = extra_args.get('device')
- if device == 'cpu':
- extra_args['ngpu'] = 0
- elif device == 'gpu':
- extra_args['ngpu'] = 1
outputs = self.preprocessor.config_checking(self.model_cfg)
# generate asr inference command
cmd = {
@@ -323,109 +308,88 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
}
}
- if self.framework == Frameworks.torch:
- frontend_conf = None
- token_num_relax = None
- decoding_ind = None
- decoding_mode = None
- if os.path.exists(outputs['am_model_config']):
- config_file = open(
- outputs['am_model_config'], encoding='utf-8')
- root = yaml.full_load(config_file)
- config_file.close()
- if 'frontend_conf' in root:
- frontend_conf = root['frontend_conf']
- if os.path.exists(outputs['asr_model_config']):
- config_file = open(
- outputs['asr_model_config'], encoding='utf-8')
- root = yaml.full_load(config_file)
- config_file.close()
- if 'token_num_relax' in root:
- token_num_relax = root['token_num_relax']
- if 'decoding_ind' in root:
- decoding_ind = root['decoding_ind']
- if 'decoding_mode' in root:
- decoding_mode = root['decoding_mode']
+ frontend_conf = None
+ token_num_relax = None
+ decoding_ind = None
+ decoding_mode = None
+ if os.path.exists(outputs['am_model_config']):
+ config_file = open(outputs['am_model_config'], encoding='utf-8')
+ root = yaml.full_load(config_file)
+ config_file.close()
+ if 'frontend_conf' in root:
+ frontend_conf = root['frontend_conf']
+ if os.path.exists(outputs['asr_model_config']):
+ config_file = open(outputs['asr_model_config'], encoding='utf-8')
+ root = yaml.full_load(config_file)
+ config_file.close()
+ if 'token_num_relax' in root:
+ token_num_relax = root['token_num_relax']
+ if 'decoding_ind' in root:
+ decoding_ind = root['decoding_ind']
+ if 'decoding_mode' in root:
+ decoding_mode = root['decoding_mode']
- cmd['beam_size'] = root['beam_size']
- cmd['penalty'] = root['penalty']
- cmd['maxlenratio'] = root['maxlenratio']
- cmd['minlenratio'] = root['minlenratio']
- cmd['ctc_weight'] = root['ctc_weight']
- cmd['lm_weight'] = root['lm_weight']
- cmd['asr_train_config'] = outputs['am_model_config']
- cmd['lm_file'] = outputs['lm_model_path']
- cmd['lm_train_config'] = outputs['lm_model_config']
- cmd['batch_size'] = outputs['model_config']['batch_size']
- cmd['frontend_conf'] = frontend_conf
- if frontend_conf is not None and 'fs' in frontend_conf:
- cmd['fs']['model_fs'] = frontend_conf['fs']
- cmd['token_num_relax'] = token_num_relax
- cmd['decoding_ind'] = decoding_ind
- cmd['decoding_mode'] = decoding_mode
- if outputs.__contains__('mvn_file'):
- cmd['cmvn_file'] = outputs['mvn_file']
- model_config = self.model_cfg['model_config']
- if model_config.__contains__('vad_model') and self.vad_model != '':
- self.vad_model = model_config['vad_model']
- if model_config.__contains__('vad_model_revision'):
- self.vad_model_revision = model_config['vad_model_revision']
- if model_config.__contains__(
- 'punc_model') and self.punc_model != '':
- self.punc_model = model_config['punc_model']
- if model_config.__contains__('punc_model_revision'):
- self.punc_model_revision = model_config['punc_model_revision']
- if model_config.__contains__(
- 'timestamp_model') and self.timestamp_model != '':
- self.timestamp_model = model_config['timestamp_model']
- if model_config.__contains__('timestamp_model_revision'):
- self.timestamp_model_revision = model_config[
- 'timestamp_model_revision']
- update_local_model(model_config, model_path, extra_args)
- self.load_vad_model(cmd)
- self.load_punc_model(cmd)
- self.load_lm_model(cmd)
- self.load_timestamp_model(cmd)
+ cmd['beam_size'] = root['beam_size']
+ cmd['penalty'] = root['penalty']
+ cmd['maxlenratio'] = root['maxlenratio']
+ cmd['minlenratio'] = root['minlenratio']
+ cmd['ctc_weight'] = root['ctc_weight']
+ cmd['lm_weight'] = root['lm_weight']
+ cmd['asr_train_config'] = outputs['am_model_config']
+ cmd['lm_file'] = outputs['lm_model_path']
+ cmd['lm_train_config'] = outputs['lm_model_config']
+ cmd['batch_size'] = outputs['model_config']['batch_size']
+ cmd['frontend_conf'] = frontend_conf
+ if frontend_conf is not None and 'fs' in frontend_conf:
+ cmd['fs']['model_fs'] = frontend_conf['fs']
+ cmd['token_num_relax'] = token_num_relax
+ cmd['decoding_ind'] = decoding_ind
+ cmd['decoding_mode'] = decoding_mode
+ if outputs.__contains__('mvn_file'):
+ cmd['cmvn_file'] = outputs['mvn_file']
+ model_config = self.model_cfg['model_config']
+ if model_config.__contains__('vad_model') and self.vad_model != '':
+ self.vad_model = model_config['vad_model']
+ if model_config.__contains__('vad_model_revision'):
+ self.vad_model_revision = model_config['vad_model_revision']
+ if model_config.__contains__('punc_model') and self.punc_model != '':
+ self.punc_model = model_config['punc_model']
+ if model_config.__contains__('punc_model_revision'):
+ self.punc_model_revision = model_config['punc_model_revision']
+ if model_config.__contains__(
+ 'timestamp_model') and self.timestamp_model != '':
+ self.timestamp_model = model_config['timestamp_model']
+ if model_config.__contains__('timestamp_model_revision'):
+ self.timestamp_model_revision = model_config[
+ 'timestamp_model_revision']
+ update_local_model(model_config, model_path, extra_args)
+ self.load_vad_model(cmd)
+ self.load_punc_model(cmd)
+ self.load_lm_model(cmd)
+ self.load_timestamp_model(cmd)
- user_args_dict = [
- 'output_dir',
- 'batch_size',
- 'mode',
- 'ngpu',
- 'beam_size',
- 'ctc_weight',
- 'lm_weight',
- 'decoding_ind',
- 'decoding_mode',
- 'vad_model_file',
- 'vad_infer_config',
- 'vad_cmvn_file',
- 'punc_model_file',
- 'punc_infer_config',
- 'param_dict',
- ]
+ user_args_dict = [
+ 'output_dir',
+ 'batch_size',
+ 'mode',
+ 'ngpu',
+ 'beam_size',
+ 'ctc_weight',
+ 'lm_weight',
+ 'decoding_ind',
+ 'decoding_mode',
+ 'vad_model_file',
+ 'vad_infer_config',
+ 'vad_cmvn_file',
+ 'punc_model_file',
+ 'punc_infer_config',
+ 'param_dict',
+ ]
- for user_args in user_args_dict:
- if user_args in extra_args and extra_args[
- user_args] is not None:
- cmd[user_args] = extra_args[user_args]
-
- elif self.framework == Frameworks.tf:
- cmd['fs']['model_fs'] = outputs['model_config']['fs']
- cmd['hop_length'] = outputs['model_config']['hop_length']
- cmd['feature_dims'] = outputs['model_config']['feature_dims']
- cmd['predictions_file'] = 'text'
- cmd['cmvn_file'] = outputs['am_mvn_file']
- cmd['vocab_file'] = outputs['vocab_file']
- if 'idx_text' in outputs:
- cmd['idx_text'] = outputs['idx_text']
- if 'sampled_ids' in outputs['model_config']:
- cmd['sampled_ids'] = outputs['model_config']['sampled_ids']
- if 'sampled_lengths' in outputs['model_config']:
- cmd['sampled_lengths'] = outputs['model_config'][
- 'sampled_lengths']
- else:
- raise ValueError('model type is mismatching')
+ for user_args in user_args_dict:
+ if user_args in extra_args and extra_args[user_args] is not None:
+ cmd[user_args] = extra_args[user_args]
+ del extra_args[user_args]
return cmd
@@ -520,23 +484,12 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
logger.info(f"Decoding with {inputs['audio_format']} files ...")
data_cmd: Sequence[Tuple[str, str, str]]
- if self.cmd['code_base'] == 'funasr':
- if isinstance(self.audio_in, bytes):
- data_cmd = [self.audio_in, 'speech', 'bytes']
- elif isinstance(self.audio_in, str):
- data_cmd = [self.audio_in, 'speech', 'sound']
- elif self.raw_inputs is not None:
- data_cmd = None
- else:
- if inputs['audio_format'] == 'wav' or inputs[
- 'audio_format'] == 'pcm':
- data_cmd = ['speech', 'sound']
- elif inputs['audio_format'] == 'kaldi_ark':
- data_cmd = ['speech', 'kaldi_ark']
- elif inputs['audio_format'] == 'tfrecord':
- data_cmd = ['speech', 'tfrecord']
- if inputs.__contains__('mvn_file'):
- data_cmd.append(inputs['mvn_file'])
+ if isinstance(self.audio_in, bytes):
+ data_cmd = [self.audio_in, 'speech', 'bytes']
+ elif isinstance(self.audio_in, str):
+ data_cmd = [self.audio_in, 'speech', 'sound']
+ elif self.raw_inputs is not None:
+ data_cmd = None
# generate asr inference command
self.cmd['name_and_type'] = data_cmd
@@ -618,34 +571,9 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
return ref_list
def run_inference(self, cmd, **kwargs):
- asr_result = []
- if self.framework == Frameworks.torch and cmd['code_base'] == 'funasr':
- asr_result = self.funasr_infer_modelscope(
- cmd['name_and_type'], cmd['raw_inputs'], cmd['output_dir'],
- cmd['fs'], cmd['param_dict'], **kwargs)
-
- elif self.framework == Frameworks.tf:
- from easyasr import asr_inference_paraformer_tf
- if hasattr(asr_inference_paraformer_tf, 'set_parameters'):
- asr_inference_paraformer_tf.set_parameters(
- language=cmd['lang'])
- else:
- # in order to support easyasr-0.0.2
- cmd['fs'] = cmd['fs']['model_fs']
-
- asr_result = asr_inference_paraformer_tf.asr_inference(
- ngpu=cmd['ngpu'],
- name_and_type=cmd['name_and_type'],
- audio_lists=cmd['audio_in'],
- idx_text_file=cmd['idx_text'],
- asr_model_file=cmd['asr_model_file'],
- vocab_file=cmd['vocab_file'],
- am_mvn_file=cmd['cmvn_file'],
- predictions_file=cmd['predictions_file'],
- fs=cmd['fs'],
- hop_length=cmd['hop_length'],
- feature_dims=cmd['feature_dims'],
- sampled_ids=cmd['sampled_ids'],
- sampled_lengths=cmd['sampled_lengths'])
+ asr_result = self.funasr_infer_modelscope(cmd['name_and_type'],
+ cmd['raw_inputs'],
+ cmd['output_dir'], cmd['fs'],
+ cmd['param_dict'], **kwargs)
return asr_result
diff --git a/modelscope/pipelines/audio/lm_infer_pipeline.py b/modelscope/pipelines/audio/lm_infer_pipeline.py
index f271ea45..75d835d6 100644
--- a/modelscope/pipelines/audio/lm_infer_pipeline.py
+++ b/modelscope/pipelines/audio/lm_infer_pipeline.py
@@ -35,7 +35,10 @@ class LanguageModelPipeline(Pipeline):
"""
- def __init__(self, model: Union[Model, str] = None, **kwargs):
+ def __init__(self,
+ model: Union[Model, str] = None,
+ ngpu: int = 1,
+ **kwargs):
"""
Use `model` to create a LM pipeline for prediction
Args:
@@ -88,7 +91,9 @@ class LanguageModelPipeline(Pipeline):
split_with_space=self.cmd['split_with_space'],
seg_dict_file=self.cmd['seg_dict_file'],
output_dir=self.cmd['output_dir'],
- param_dict=self.cmd['param_dict'])
+ param_dict=self.cmd['param_dict'],
+ **kwargs,
+ )
def __call__(self,
text_in: str = None,
@@ -189,6 +194,7 @@ class LanguageModelPipeline(Pipeline):
for user_args in user_args_dict:
if user_args in extra_args and extra_args[user_args] is not None:
cmd[user_args] = extra_args[user_args]
+ del extra_args[user_args]
return cmd
diff --git a/modelscope/pipelines/audio/punctuation_processing_pipeline.py b/modelscope/pipelines/audio/punctuation_processing_pipeline.py
index 2f4dee7a..3ab3481d 100644
--- a/modelscope/pipelines/audio/punctuation_processing_pipeline.py
+++ b/modelscope/pipelines/audio/punctuation_processing_pipeline.py
@@ -39,7 +39,10 @@ class PunctuationProcessingPipeline(Pipeline):
"""
- def __init__(self, model: Union[Model, str] = None, **kwargs):
+ def __init__(self,
+ model: Union[Model, str] = None,
+ ngpu: int = 1,
+ **kwargs):
"""use `model` to create an asr pipeline for prediction
"""
super().__init__(model=model, **kwargs)
@@ -59,7 +62,9 @@ class PunctuationProcessingPipeline(Pipeline):
train_config=self.cmd['train_config'],
model_file=self.cmd['model_file'],
output_dir=self.cmd['output_dir'],
- param_dict=self.cmd['param_dict'])
+ param_dict=self.cmd['param_dict'],
+ **kwargs,
+ )
def __call__(self,
text_in: str = None,
@@ -141,6 +146,7 @@ class PunctuationProcessingPipeline(Pipeline):
for user_args in user_args_dict:
if user_args in extra_args and extra_args[user_args] is not None:
cmd[user_args] = extra_args[user_args]
+ del extra_args[user_args]
return cmd
diff --git a/modelscope/pipelines/audio/speaker_change_locating_pipeline.py b/modelscope/pipelines/audio/speaker_change_locating_pipeline.py
new file mode 100644
index 00000000..0bab08ac
--- /dev/null
+++ b/modelscope/pipelines/audio/speaker_change_locating_pipeline.py
@@ -0,0 +1,105 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import io
+from typing import Any, Dict, List, Union
+
+import numpy as np
+import soundfile as sf
+import torch
+
+from modelscope.fileio import File
+from modelscope.metainfo import Pipelines
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines.base import InputModel, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+__all__ = ['SpeakerChangeLocatingPipeline']
+
+
+@PIPELINES.register_module(
+ Tasks.speaker_diarization, module_name=Pipelines.speaker_change_locating)
+class SpeakerChangeLocatingPipeline(Pipeline):
+ """Speaker Change Locating Inference Pipeline
+ use `model` to create a speaker change Locating pipeline.
+
+ Args:
+ model (SpeakerChangeLocatingPipeline): A model instance, or a model local dir, or a model id in the model hub.
+ kwargs (dict, `optional`):
+ Extra kwargs passed into the pipeline's constructor.
+ Example:
+ >>> from modelscope.pipelines import pipeline
+ >>> from modelscope.utils.constant import Tasks
+ >>> p = pipeline(
+ >>> task=Tasks.speaker_diarization, model='damo/speech_campplus-transformer_scl_zh-cn_16k-common')
+ >>> print(p(audio))
+
+ """
+
+ def __init__(self, model: InputModel, **kwargs):
+ """use `model` to create a speaker change Locating pipeline for prediction
+ Args:
+ model (str): a valid offical model id
+ """
+ super().__init__(model=model, **kwargs)
+ self.model_config = self.model.model_config
+ self.config = self.model.model_config
+ self.anchor_size = self.config['anchor_size']
+
+ def __call__(self, audio: str, embds: List = None) -> Dict[str, Any]:
+ if embds is not None:
+ assert len(embds) == 2
+ assert isinstance(embds[0], np.ndarray) and isinstance(
+ embds[1], np.ndarray)
+ assert embds[0].shape == (
+ self.anchor_size, ) and embds[1].shape == (self.anchor_size, )
+ else:
+ embd1 = np.zeros(self.anchor_size // 2)
+ embd2 = np.ones(self.anchor_size - self.anchor_size // 2)
+ embd3 = np.ones(self.anchor_size // 2)
+ embd4 = np.zeros(self.anchor_size - self.anchor_size // 2)
+ embds = [
+ np.stack([embd1, embd2], axis=1).flatten(),
+ np.stack([embd3, embd4], axis=1).flatten(),
+ ]
+ anchors = torch.from_numpy(np.stack(embds,
+ axis=0)).float().unsqueeze(0)
+
+ output = self.preprocess(audio)
+ output = self.forward(output, anchors)
+ output = self.postprocess(output)
+
+ return output
+
+ def forward(self, input: torch.Tensor, anchors: torch.Tensor):
+ output = self.model(input, anchors)
+ return output
+
+ def postprocess(self, input: torch.Tensor) -> Dict[str, Any]:
+ predict = np.where(np.diff(input.argmax(-1).numpy()))
+ try:
+ predict = predict[0][0] * 0.01 + 0.02
+ predict = round(predict, 2)
+ return {OutputKeys.TEXT: f'The change point is at {predict}s.'}
+ except Exception:
+ return {OutputKeys.TEXT: 'No change point is found.'}
+
+ def preprocess(self, input: str) -> torch.Tensor:
+ if isinstance(input, str):
+ file_bytes = File.read(input)
+ data, fs = sf.read(io.BytesIO(file_bytes), dtype='float32')
+ if len(data.shape) == 2:
+ data = data[:, 0]
+ if fs != self.model_config['sample_rate']:
+ raise ValueError(
+ 'modelscope error: Only support %d sample rate files'
+ % self.model_cfg['sample_rate'])
+ data = torch.from_numpy(data).unsqueeze(0)
+ else:
+ raise ValueError(
+ 'modelscope error: The input type is restricted to audio file address'
+ % i)
+ return data
diff --git a/modelscope/pipelines/audio/speaker_diarization_pipeline.py b/modelscope/pipelines/audio/speaker_diarization_pipeline.py
index f4f68cba..71715ecd 100644
--- a/modelscope/pipelines/audio/speaker_diarization_pipeline.py
+++ b/modelscope/pipelines/audio/speaker_diarization_pipeline.py
@@ -48,6 +48,7 @@ class SpeakerDiarizationPipeline(Pipeline):
model: Union[Model, str] = None,
sv_model: Optional[Union[Model, str]] = None,
sv_model_revision: Optional[str] = None,
+ ngpu: int = 1,
**kwargs):
"""use `model` to create a speaker diarization pipeline for prediction
Args:
@@ -90,6 +91,7 @@ class SpeakerDiarizationPipeline(Pipeline):
dur_threshold=self.cmd['dur_threshold'],
out_format=self.cmd['out_format'],
param_dict=self.cmd['param_dict'],
+ **kwargs,
)
def __call__(self,
@@ -203,6 +205,7 @@ class SpeakerDiarizationPipeline(Pipeline):
cmd[user_args].update(extra_args[user_args])
else:
cmd[user_args] = extra_args[user_args]
+ del extra_args[user_args]
return cmd
diff --git a/modelscope/pipelines/audio/speaker_verification_eres2net_pipeline.py b/modelscope/pipelines/audio/speaker_verification_eres2net_pipeline.py
new file mode 100644
index 00000000..ef91d83b
--- /dev/null
+++ b/modelscope/pipelines/audio/speaker_verification_eres2net_pipeline.py
@@ -0,0 +1,110 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import io
+from typing import Any, Dict, List, Union
+
+import soundfile as sf
+import torch
+
+from modelscope.fileio import File
+from modelscope.metainfo import Pipelines
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines.base import InputModel, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(
+ Tasks.speaker_verification,
+ module_name=Pipelines.speaker_verification_eres2net)
+class ERes2Net_Pipeline(Pipeline):
+ """Speaker Verification Inference Pipeline
+ use `model` to create a Speaker Verification pipeline.
+
+ Args:
+ model (SpeakerVerificationPipeline): A model instance, or a model local dir, or a model id in the model hub.
+ kwargs (dict, `optional`):
+ Extra kwargs passed into the pipeline's constructor.
+ Example:
+ >>> from modelscope.pipelines import pipeline
+ >>> from modelscope.utils.constant import Tasks
+ >>> p = pipeline(
+ >>> task=Tasks.speaker_verification, model='damo/speech_ecapa-tdnn_sv_en_voxceleb_16k')
+ >>> print(p([audio_1, audio_2]))
+
+ """
+
+ def __init__(self, model: InputModel, **kwargs):
+ """use `model` to create a speaker verification pipeline for prediction
+ Args:
+ model (str): a valid offical model id
+ """
+ super().__init__(model=model, **kwargs)
+ self.model_config = self.model.model_config
+ self.config = self.model.other_config
+ self.thr = self.config['yesOrno_thr']
+
+ def __call__(self,
+ in_audios: List[str],
+ thr: float = None) -> Dict[str, Any]:
+ if thr is not None:
+ self.thr = thr
+ if self.thr < -1 or self.thr > 1:
+ raise ValueError(
+ 'modelscope error: the thr value should be in [-1, 1], but found to be %f.'
+ % self.thr)
+ outputs = self.preprocess(in_audios)
+ outputs = self.forward(outputs)
+ outputs = self.postprocess(outputs)
+
+ return outputs
+
+ def forward(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+ emb1 = self.model(inputs['data1'])
+ emb2 = self.model(inputs['data2'])
+
+ return {'emb1': emb1, 'emb2': emb2}
+
+ def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+ score = self.compute_cos_similarity(inputs['emb1'], inputs['emb2'])
+ score = round(score, 5)
+ if score >= self.thr:
+ ans = 'yes'
+ else:
+ ans = 'no'
+
+ return {OutputKeys.SCORE: score, OutputKeys.TEXT: ans}
+
+ def preprocess(self, inputs: List[str],
+ **preprocess_params) -> Dict[str, Any]:
+ if len(inputs) != 2:
+ raise ValueError(
+ 'modelscope error: Two input audio files are required.')
+ output = {}
+ for i in range(len(inputs)):
+ if isinstance(inputs[i], str):
+ file_bytes = File.read(inputs[i])
+ data, fs = sf.read(io.BytesIO(file_bytes), dtype='float32')
+ if len(data.shape) == 2:
+ data = data[:, 0]
+ if fs != self.model_config['sample_rate']:
+ raise ValueError(
+ 'modelscope error: Only support %d sample rate files'
+ % self.model_cfg['sample_rate'])
+ output['data%d' %
+ (i + 1)] = torch.from_numpy(data).unsqueeze(0)
+ else:
+ raise ValueError(
+ 'modelscope error: The input type is temporarily restricted to audio file address'
+ % i)
+ return output
+
+ def compute_cos_similarity(self, emb1: torch.Tensor,
+ emb2: torch.Tensor) -> float:
+ assert len(emb1.shape) == 2 and len(emb2.shape) == 2
+ cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
+ cosine = cos(emb1, emb2)
+ return cosine.item()
diff --git a/modelscope/pipelines/audio/speaker_verification_pipeline.py b/modelscope/pipelines/audio/speaker_verification_pipeline.py
index 97e73627..e576885a 100644
--- a/modelscope/pipelines/audio/speaker_verification_pipeline.py
+++ b/modelscope/pipelines/audio/speaker_verification_pipeline.py
@@ -41,7 +41,10 @@ class SpeakerVerificationPipeline(Pipeline):
"""
- def __init__(self, model: Union[Model, str] = None, **kwargs):
+ def __init__(self,
+ model: Union[Model, str] = None,
+ ngpu: int = 1,
+ **kwargs):
"""use `model` to create an asr pipeline for prediction
"""
super().__init__(model=model, **kwargs)
@@ -67,6 +70,7 @@ class SpeakerVerificationPipeline(Pipeline):
embedding_node=self.cmd['embedding_node'],
sv_threshold=self.cmd['sv_threshold'],
param_dict=self.cmd['param_dict'],
+ **kwargs,
)
def __call__(self,
@@ -168,6 +172,7 @@ class SpeakerVerificationPipeline(Pipeline):
cmd[user_args].update(extra_args[user_args])
else:
cmd[user_args] = extra_args[user_args]
+ del extra_args[user_args]
return cmd
diff --git a/modelscope/pipelines/audio/speaker_verification_rdino_pipeline.py b/modelscope/pipelines/audio/speaker_verification_rdino_pipeline.py
new file mode 100644
index 00000000..dd08ccf4
--- /dev/null
+++ b/modelscope/pipelines/audio/speaker_verification_rdino_pipeline.py
@@ -0,0 +1,110 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import io
+from typing import Any, Dict, List, Union
+
+import soundfile as sf
+import torch
+
+from modelscope.fileio import File
+from modelscope.metainfo import Pipelines
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines.base import InputModel, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(
+ Tasks.speaker_verification,
+ module_name=Pipelines.speaker_verification_rdino)
+class RDINO_Pipeline(Pipeline):
+ """Speaker Verification Inference Pipeline
+ use `model` to create a Speaker Verification pipeline.
+
+ Args:
+ model (SpeakerVerificationPipeline): A model instance, or a model local dir, or a model id in the model hub.
+ kwargs (dict, `optional`):
+ Extra kwargs passed into the pipeline's constructor.
+ Example:
+ >>> from modelscope.pipelines import pipeline
+ >>> from modelscope.utils.constant import Tasks
+ >>> p = pipeline(
+ >>> task=Tasks.speaker_verification, model='damo/speech_ecapa-tdnn_sv_en_voxceleb_16k')
+ >>> print(p([audio_1, audio_2]))
+
+ """
+
+ def __init__(self, model: InputModel, **kwargs):
+ """use `model` to create a speaker verification pipeline for prediction
+ Args:
+ model (str): a valid offical model id
+ """
+ super().__init__(model=model, **kwargs)
+ self.model_config = self.model.model_config
+ self.config = self.model.other_config
+ self.thr = self.config['yesOrno_thr']
+
+ def __call__(self,
+ in_audios: List[str],
+ thr: float = None) -> Dict[str, Any]:
+ if thr is not None:
+ self.thr = thr
+ if self.thr < -1 or self.thr > 1:
+ raise ValueError(
+ 'modelscope error: the thr value should be in [-1, 1], but found to be %f.'
+ % self.thr)
+ outputs = self.preprocess(in_audios)
+ outputs = self.forward(outputs)
+ outputs = self.postprocess(outputs)
+
+ return outputs
+
+ def forward(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+ emb1 = self.model(inputs['data1'])
+ emb2 = self.model(inputs['data2'])
+
+ return {'emb1': emb1, 'emb2': emb2}
+
+ def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+ score = self.compute_cos_similarity(inputs['emb1'], inputs['emb2'])
+ score = round(score, 5)
+ if score >= self.thr:
+ ans = 'yes'
+ else:
+ ans = 'no'
+
+ return {OutputKeys.SCORE: score, OutputKeys.TEXT: ans}
+
+ def preprocess(self, inputs: List[str],
+ **preprocess_params) -> Dict[str, Any]:
+ if len(inputs) != 2:
+ raise ValueError(
+ 'modelscope error: Two input audio files are required.')
+ output = {}
+ for i in range(len(inputs)):
+ if isinstance(inputs[i], str):
+ file_bytes = File.read(inputs[i])
+ data, fs = sf.read(io.BytesIO(file_bytes), dtype='float32')
+ if len(data.shape) == 2:
+ data = data[:, 0]
+ if fs != self.model_config['sample_rate']:
+ raise ValueError(
+ 'modelscope error: Only support %d sample rate files'
+ % self.model_cfg['sample_rate'])
+ output['data%d' %
+ (i + 1)] = torch.from_numpy(data).unsqueeze(0)
+ else:
+ raise ValueError(
+ 'modelscope error: The input type is temporarily restricted to audio file address'
+ % i)
+ return output
+
+ def compute_cos_similarity(self, emb1: torch.Tensor,
+ emb2: torch.Tensor) -> float:
+ assert len(emb1.shape) == 2 and len(emb2.shape) == 2
+ cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
+ cosine = cos(emb1, emb2)
+ return cosine.item()
diff --git a/modelscope/pipelines/audio/timestamp_pipeline.py b/modelscope/pipelines/audio/timestamp_pipeline.py
index b60fef05..0968b359 100644
--- a/modelscope/pipelines/audio/timestamp_pipeline.py
+++ b/modelscope/pipelines/audio/timestamp_pipeline.py
@@ -40,7 +40,10 @@ class TimestampPipeline(Pipeline):
"""
- def __init__(self, model: Union[Model, str] = None, **kwargs):
+ def __init__(self,
+ model: Union[Model, str] = None,
+ ngpu: int = 1,
+ **kwargs):
"""
Use `model` and `preprocessor` to create an asr pipeline for prediction
Args:
@@ -84,7 +87,9 @@ class TimestampPipeline(Pipeline):
allow_variable_data_keys=self.cmd['allow_variable_data_keys'],
split_with_space=self.cmd['split_with_space'],
seg_dict_file=self.cmd['seg_dict_file'],
- param_dict=self.cmd['param_dict'])
+ param_dict=self.cmd['param_dict'],
+ **kwargs,
+ )
def __call__(self,
audio_in: Union[str, bytes],
@@ -264,6 +269,7 @@ class TimestampPipeline(Pipeline):
for user_args in user_args_dict:
if user_args in extra_args and extra_args[user_args] is not None:
cmd[user_args] = extra_args[user_args]
+ del extra_args[user_args]
return cmd
diff --git a/modelscope/pipelines/audio/voice_activity_detection_pipeline.py b/modelscope/pipelines/audio/voice_activity_detection_pipeline.py
index c1c6e01f..0121b242 100644
--- a/modelscope/pipelines/audio/voice_activity_detection_pipeline.py
+++ b/modelscope/pipelines/audio/voice_activity_detection_pipeline.py
@@ -41,7 +41,10 @@ class VoiceActivityDetectionPipeline(Pipeline):
"""
- def __init__(self, model: Union[Model, str] = None, **kwargs):
+ def __init__(self,
+ model: Union[Model, str] = None,
+ ngpu: int = 1,
+ **kwargs):
"""use `model` to create an vad pipeline for prediction
"""
super().__init__(model=model, **kwargs)
@@ -60,7 +63,9 @@ class VoiceActivityDetectionPipeline(Pipeline):
key_file=self.cmd['key_file'],
vad_infer_config=self.cmd['vad_infer_config'],
vad_model_file=self.cmd['vad_model_file'],
- vad_cmvn_file=self.cmd['vad_cmvn_file'])
+ vad_cmvn_file=self.cmd['vad_cmvn_file'],
+ **kwargs,
+ )
def __call__(self,
audio_in: Union[str, bytes],
@@ -209,6 +214,7 @@ class VoiceActivityDetectionPipeline(Pipeline):
for user_args in user_args_dict:
if user_args in extra_args and extra_args[user_args] is not None:
cmd[user_args] = extra_args[user_args]
+ del extra_args[user_args]
return cmd
diff --git a/modelscope/pipelines/cv/__init__.py b/modelscope/pipelines/cv/__init__.py
index 54289644..e9d7a785 100644
--- a/modelscope/pipelines/cv/__init__.py
+++ b/modelscope/pipelines/cv/__init__.py
@@ -9,7 +9,6 @@ if TYPE_CHECKING:
from .animal_recognition_pipeline import AnimalRecognitionPipeline
from .body_2d_keypoints_pipeline import Body2DKeypointsPipeline
from .body_3d_keypoints_pipeline import Body3DKeypointsPipeline
- from .hand_2d_keypoints_pipeline import Hand2DKeypointsPipeline
from .cmdssl_video_embedding_pipeline import CMDSSLVideoEmbeddingPipeline
from .card_detection_pipeline import CardDetectionPipeline
from .hicossl_video_embedding_pipeline import HICOSSLVideoEmbeddingPipeline
@@ -29,13 +28,10 @@ if TYPE_CHECKING:
from .image_classification_pipeline import GeneralImageClassificationPipeline
from .image_color_enhance_pipeline import ImageColorEnhancePipeline
from .image_colorization_pipeline import ImageColorizationPipeline
- from .image_classification_pipeline import ImageClassificationPipeline
from .image_denoise_pipeline import ImageDenoisePipeline
from .image_deblur_pipeline import ImageDeblurPipeline
from .image_instance_segmentation_pipeline import ImageInstanceSegmentationPipeline
from .image_matting_pipeline import ImageMattingPipeline
- from .image_panoptic_segmentation_pipeline import ImagePanopticSegmentationPipeline
- from .image_semantic_segmentation_pipeline import ImagePanopticSegmentationEasyCVPipeline
from .image_portrait_enhancement_pipeline import ImagePortraitEnhancementPipeline
from .image_reid_person_pipeline import ImageReidPersonPipeline
from .image_semantic_segmentation_pipeline import ImageSemanticSegmentationPipeline
@@ -46,7 +42,6 @@ if TYPE_CHECKING:
from .image_inpainting_pipeline import ImageInpaintingPipeline
from .image_paintbyexample_pipeline import ImagePaintbyexamplePipeline
from .product_retrieval_embedding_pipeline import ProductRetrievalEmbeddingPipeline
- from .realtime_object_detection_pipeline import RealtimeObjectDetectionPipeline
from .live_category_pipeline import LiveCategoryPipeline
from .ocr_detection_pipeline import OCRDetectionPipeline
from .ocr_recognition_pipeline import OCRRecognitionPipeline
@@ -59,10 +54,6 @@ if TYPE_CHECKING:
from .video_category_pipeline import VideoCategoryPipeline
from .virtual_try_on_pipeline import VirtualTryonPipeline
from .shop_segmentation_pipleline import ShopSegmentationPipeline
- from .easycv_pipelines import (EasyCVDetectionPipeline,
- EasyCVSegmentationPipeline,
- Face2DKeypointsPipeline,
- HumanWholebodyKeypointsPipeline)
from .text_driven_segmentation_pipleline import TextDrivenSegmentationPipeline
from .movie_scene_segmentation_pipeline import MovieSceneSegmentationPipeline
from .mog_face_detection_pipeline import MogFaceDetectionPipeline
@@ -123,7 +114,6 @@ else:
'animal_recognition_pipeline': ['AnimalRecognitionPipeline'],
'body_2d_keypoints_pipeline': ['Body2DKeypointsPipeline'],
'body_3d_keypoints_pipeline': ['Body3DKeypointsPipeline'],
- 'hand_2d_keypoints_pipeline': ['Hand2DKeypointsPipeline'],
'card_detection_pipeline': ['CardDetectionPipeline'],
'cmdssl_video_embedding_pipeline': ['CMDSSLVideoEmbeddingPipeline'],
'hicossl_video_embedding_pipeline': ['HICOSSLVideoEmbeddingPipeline'],
@@ -140,7 +130,7 @@ else:
'face_recognition_onnx_fm_pipeline': ['FaceRecognitionOnnxFmPipeline'],
'general_recognition_pipeline': ['GeneralRecognitionPipeline'],
'image_classification_pipeline':
- ['GeneralImageClassificationPipeline', 'ImageClassificationPipeline'],
+ ['GeneralImageClassificationPipeline'],
'image_cartoon_pipeline': ['ImageCartoonPipeline'],
'image_denoise_pipeline': ['ImageDenoisePipeline'],
'image_deblur_pipeline': ['ImageDeblurPipeline'],
@@ -149,10 +139,6 @@ else:
'image_instance_segmentation_pipeline':
['ImageInstanceSegmentationPipeline'],
'image_matting_pipeline': ['ImageMattingPipeline'],
- 'image_panoptic_segmentation_pipeline': [
- 'ImagePanopticSegmentationPipeline',
- 'ImagePanopticSegmentationEasyCVPipeline'
- ],
'image_portrait_enhancement_pipeline':
['ImagePortraitEnhancementPipeline'],
'image_reid_person_pipeline': ['ImageReidPersonPipeline'],
@@ -164,8 +150,6 @@ else:
['Image2ImageTranslationPipeline'],
'product_retrieval_embedding_pipeline':
['ProductRetrievalEmbeddingPipeline'],
- 'realtime_object_detection_pipeline':
- ['RealtimeObjectDetectionPipeline'],
'live_category_pipeline': ['LiveCategoryPipeline'],
'image_to_image_generate_pipeline': ['Image2ImageGenerationPipeline'],
'image_inpainting_pipeline': ['ImageInpaintingPipeline'],
@@ -180,12 +164,6 @@ else:
'video_category_pipeline': ['VideoCategoryPipeline'],
'virtual_try_on_pipeline': ['VirtualTryonPipeline'],
'shop_segmentation_pipleline': ['ShopSegmentationPipeline'],
- 'easycv_pipelines': [
- 'EasyCVDetectionPipeline',
- 'EasyCVSegmentationPipeline',
- 'Face2DKeypointsPipeline',
- 'HumanWholebodyKeypointsPipeline',
- ],
'text_driven_segmentation_pipleline':
['TextDrivenSegmentationPipeline'],
'movie_scene_segmentation_pipeline':
@@ -202,9 +180,8 @@ else:
['FaceAttributeRecognitionPipeline'],
'mtcnn_face_detection_pipeline': ['MtcnnFaceDetectionPipeline'],
'hand_static_pipeline': ['HandStaticPipeline'],
- 'referring_video_object_segmentation_pipeline': [
- 'ReferringVideoObjectSegmentationPipeline'
- ],
+ 'referring_video_object_segmentation_pipeline':
+ ['ReferringVideoObjectSegmentationPipeline'],
'language_guided_video_summarization_pipeline': [
'LanguageGuidedVideoSummarizationPipeline'
],
diff --git a/modelscope/pipelines/cv/easycv_pipelines/__init__.py b/modelscope/pipelines/cv/easycv_pipelines/__init__.py
deleted file mode 100644
index e0209b85..00000000
--- a/modelscope/pipelines/cv/easycv_pipelines/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
- from .detection_pipeline import EasyCVDetectionPipeline
- from .segmentation_pipeline import EasyCVSegmentationPipeline
- from .face_2d_keypoints_pipeline import Face2DKeypointsPipeline
- from .human_wholebody_keypoint_pipeline import HumanWholebodyKeypointsPipeline
-else:
- _import_structure = {
- 'detection_pipeline': ['EasyCVDetectionPipeline'],
- 'segmentation_pipeline': ['EasyCVSegmentationPipeline'],
- 'face_2d_keypoints_pipeline': ['Face2DKeypointsPipeline'],
- 'human_wholebody_keypoint_pipeline':
- ['HumanWholebodyKeypointsPipeline'],
- }
-
- import sys
-
- sys.modules[__name__] = LazyImportModule(
- __name__,
- globals()['__file__'],
- _import_structure,
- module_spec=__spec__,
- extra_objects={},
- )
diff --git a/modelscope/pipelines/cv/easycv_pipelines/base.py b/modelscope/pipelines/cv/easycv_pipelines/base.py
deleted file mode 100644
index 0a31be94..00000000
--- a/modelscope/pipelines/cv/easycv_pipelines/base.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import os.path as osp
-from typing import Any
-
-import numpy as np
-from easycv.utils.ms_utils import EasyCVMeta
-from PIL import ImageFile
-
-from modelscope.hub.snapshot_download import snapshot_download
-from modelscope.pipelines.util import is_official_hub_path
-from modelscope.utils.config import Config
-from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, Invoke,
- ModelFile, ThirdParty)
-from modelscope.utils.device import create_device
-
-
-class EasyCVPipeline(object):
- """Base pipeline for EasyCV.
- Loading configuration file of modelscope style by default,
- but it is actually use the predictor api of easycv to predict.
- So here we do some adaptation work for configuration and predict api.
- """
-
- def __init__(self, model: str, model_file_pattern='*.pt', *args, **kwargs):
- """
- model (str): model id on modelscope hub or local model path.
- model_file_pattern (str): model file pattern.
-
- """
- self.model_file_pattern = model_file_pattern
-
- assert isinstance(model, str)
- if osp.exists(model):
- model_dir = model
- else:
- assert is_official_hub_path(
- model), 'Only support local model path and official hub path!'
- model_dir = snapshot_download(
- model_id=model,
- revision=DEFAULT_MODEL_REVISION,
- user_agent={
- Invoke.KEY: Invoke.PIPELINE,
- ThirdParty.KEY: ThirdParty.EASYCV
- })
-
- assert osp.isdir(model_dir)
- model_files = glob.glob(
- os.path.join(model_dir, self.model_file_pattern))
- assert len(
- model_files
- ) == 1, f'Need one model file, but find {len(model_files)}: {model_files}'
-
- model_path = model_files[0]
- self.model_path = model_path
- self.model_dir = model_dir
-
- # get configuration file from source model dir
- self.config_file = os.path.join(model_dir, ModelFile.CONFIGURATION)
- assert os.path.exists(
- self.config_file
- ), f'Not find "{ModelFile.CONFIGURATION}" in model directory!'
-
- self.cfg = Config.from_file(self.config_file)
- if 'device' in kwargs:
- kwargs['device'] = create_device(kwargs['device'])
- if 'predictor_config' in kwargs:
- kwargs.pop('predictor_config')
- self.predict_op = self._build_predict_op(**kwargs)
-
- def _build_predict_op(self, **kwargs):
- """Build EasyCV predictor."""
- from easycv.predictors.builder import build_predictor
-
- easycv_config = self._to_easycv_config()
- pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, {
- 'model_path': self.model_path,
- 'config_file': easycv_config,
- **kwargs
- })
- return pipeline_op
-
- def _to_easycv_config(self):
- """Adapt to EasyCV predictor."""
- # TODO: refine config compatibility problems
-
- easycv_arch = self.cfg.model.pop(EasyCVMeta.ARCH, None)
- model_cfg = self.cfg.model
- # Revert to the configuration of easycv
- if easycv_arch is not None:
- model_cfg.update(easycv_arch)
-
- easycv_config = Config(dict(model=model_cfg))
-
- reserved_keys = []
- if hasattr(self.cfg, EasyCVMeta.META):
- easycv_meta_cfg = getattr(self.cfg, EasyCVMeta.META)
- reserved_keys = easycv_meta_cfg.get(EasyCVMeta.RESERVED_KEYS, [])
- for key in reserved_keys:
- easycv_config.merge_from_dict({key: getattr(self.cfg, key)})
- if 'test_pipeline' not in reserved_keys:
- easycv_config.merge_from_dict(
- {'test_pipeline': self.cfg.dataset.val.get('pipeline', [])})
-
- return easycv_config
-
- def _is_single_inputs(self, inputs):
- if isinstance(inputs, str) or (isinstance(inputs, list)
- and len(inputs) == 1) or isinstance(
- inputs, np.ndarray) or isinstance(
- inputs, ImageFile.ImageFile):
- return True
-
- return False
-
- def __call__(self, inputs) -> Any:
- outputs = self.predict_op(inputs)
-
- if self._is_single_inputs(inputs):
- outputs = outputs[0]
-
- return outputs
diff --git a/modelscope/pipelines/cv/easycv_pipelines/detection_pipeline.py b/modelscope/pipelines/cv/easycv_pipelines/detection_pipeline.py
deleted file mode 100644
index 2a95ebb4..00000000
--- a/modelscope/pipelines/cv/easycv_pipelines/detection_pipeline.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import Any
-
-from modelscope.metainfo import Pipelines
-from modelscope.outputs import OutputKeys
-from modelscope.pipelines.builder import PIPELINES
-from modelscope.utils.constant import ModelFile, Tasks
-from modelscope.utils.cv.image_utils import \
- show_image_object_detection_auto_result
-from .base import EasyCVPipeline
-
-
-@PIPELINES.register_module(
- Tasks.image_object_detection, module_name=Pipelines.easycv_detection)
-@PIPELINES.register_module(
- Tasks.image_object_detection,
- module_name=Pipelines.image_object_detection_auto)
-@PIPELINES.register_module(
- Tasks.domain_specific_object_detection,
- module_name=Pipelines.hand_detection)
-class EasyCVDetectionPipeline(EasyCVPipeline):
- """Pipeline for easycv detection task."""
-
- def __init__(self,
- model: str,
- model_file_pattern=ModelFile.TORCH_MODEL_FILE,
- *args,
- **kwargs):
- """
- model (str): model id on modelscope hub or local model path.
- model_file_pattern (str): model file pattern.
- """
-
- super(EasyCVDetectionPipeline, self).__init__(
- model=model,
- model_file_pattern=model_file_pattern,
- *args,
- **kwargs)
-
- def show_result(self, img_path, result, save_path=None):
- show_image_object_detection_auto_result(img_path, result, save_path)
-
- def __call__(self, inputs) -> Any:
- outputs = self.predict_op(inputs)
-
- scores = []
- labels = []
- boxes = []
- for output in outputs:
- for score, label, box in zip(output['detection_scores'],
- output['detection_classes'],
- output['detection_boxes']):
- scores.append(score)
- labels.append(self.cfg.CLASSES[label])
- boxes.append([b for b in box])
-
- results = [{
- OutputKeys.SCORES: scores,
- OutputKeys.LABELS: labels,
- OutputKeys.BOXES: boxes
- } for output in outputs]
-
- if self._is_single_inputs(inputs):
- results = results[0]
-
- return results
diff --git a/modelscope/pipelines/cv/easycv_pipelines/face_2d_keypoints_pipeline.py b/modelscope/pipelines/cv/easycv_pipelines/face_2d_keypoints_pipeline.py
deleted file mode 100644
index 0ddc6a6c..00000000
--- a/modelscope/pipelines/cv/easycv_pipelines/face_2d_keypoints_pipeline.py
+++ /dev/null
@@ -1,244 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import copy
-import math
-from typing import Any
-
-import cv2
-import numpy as np
-
-from modelscope.metainfo import Pipelines
-from modelscope.outputs import OutputKeys
-from modelscope.pipelines import pipeline
-from modelscope.pipelines.builder import PIPELINES
-from modelscope.preprocessors import LoadImage
-from modelscope.utils.constant import ModelFile, Tasks
-from modelscope.utils.logger import get_logger
-from .base import EasyCVPipeline
-
-logger = get_logger()
-
-
-@PIPELINES.register_module(
- Tasks.face_2d_keypoints, module_name=Pipelines.face_2d_keypoints)
-class Face2DKeypointsPipeline(EasyCVPipeline):
- """Pipeline for face 2d keypoints detection."""
-
- def __init__(self,
- model: str,
- model_file_pattern=ModelFile.TORCH_MODEL_FILE,
- *args,
- **kwargs):
- """
- model (str): model id on modelscope hub or local model path.
- model_file_pattern (str): model file pattern.
- """
-
- super(Face2DKeypointsPipeline, self).__init__(
- model=model,
- model_file_pattern=model_file_pattern,
- *args,
- **kwargs)
-
- # face detect pipeline
- det_model_id = 'damo/cv_resnet_facedetection_scrfd10gkps'
- self.face_detection = pipeline(
- Tasks.face_detection, model=det_model_id)
-
- def show_result(self, img, points, scale=2, save_path=None):
- return self.predict_op.show_result(img, points, scale, save_path)
-
- def _choose_face(self, det_result, min_face=10):
- """
- choose face with maximum area
- Args:
- det_result: output of face detection pipeline
- min_face: minimum size of valid face w/h
- """
- bboxes = np.array(det_result[OutputKeys.BOXES])
- landmarks = np.array(det_result[OutputKeys.KEYPOINTS])
- if bboxes.shape[0] == 0:
- logger.warning('No face detected!')
- return None
- # face idx with enough size
- face_idx = []
- for i in range(bboxes.shape[0]):
- box = bboxes[i]
- if (box[2] - box[0]) >= min_face and (box[3] - box[1]) >= min_face:
- face_idx += [i]
- if len(face_idx) == 0:
- logger.warning(
- f'Face size not enough, less than {min_face}x{min_face}!')
- return None
- bboxes = bboxes[face_idx]
- landmarks = landmarks[face_idx]
-
- return bboxes, landmarks
-
- def expend_box(self, box, w, h, scalex=0.3, scaley=0.5):
- x1 = box[0]
- y1 = box[1]
- wb = box[2] - x1
- hb = box[3] - y1
- deltax = int(wb * scalex)
- deltay1 = int(hb * scaley)
- deltay2 = int(hb * scalex)
- x1 = x1 - deltax
- y1 = y1 - deltay1
- if x1 < 0:
- deltax = deltax + x1
- x1 = 0
- if y1 < 0:
- deltay1 = deltay1 + y1
- y1 = 0
- x2 = x1 + wb + 2 * deltax
- y2 = y1 + hb + deltay1 + deltay2
- x2 = np.clip(x2, 0, w - 1)
- y2 = np.clip(y2, 0, h - 1)
- return [x1, y1, x2, y2]
-
- def rotate_point(self, angle, center, landmark):
- rad = angle * np.pi / 180.0
- alpha = np.cos(rad)
- beta = np.sin(rad)
- M = np.zeros((2, 3), dtype=np.float32)
- M[0, 0] = alpha
- M[0, 1] = beta
- M[0, 2] = (1 - alpha) * center[0] - beta * center[1]
- M[1, 0] = -beta
- M[1, 1] = alpha
- M[1, 2] = beta * center[0] + (1 - alpha) * center[1]
-
- landmark_ = np.asarray([(M[0, 0] * x + M[0, 1] * y + M[0, 2],
- M[1, 0] * x + M[1, 1] * y + M[1, 2])
- for (x, y) in landmark])
- return M, landmark_
-
- def rotate_crop_img(self, img, pts, M):
- imgT = cv2.warpAffine(img, M, (int(img.shape[1]), int(img.shape[0])))
-
- x1 = pts[5][0]
- x2 = pts[5][0]
- y1 = pts[5][1]
- y2 = pts[5][1]
- for i in range(0, 9):
- x1 = min(x1, pts[i][0])
- x2 = max(x2, pts[i][0])
- y1 = min(y1, pts[i][1])
- y2 = max(y2, pts[i][1])
-
- height, width, _ = imgT.shape
- x1 = min(max(0, int(x1)), width)
- y1 = min(max(0, int(y1)), height)
- x2 = min(max(0, int(x2)), width)
- y2 = min(max(0, int(y2)), height)
- sub_imgT = imgT[y1:y2, x1:x2]
-
- return sub_imgT, imgT, [x1, y1, x2, y2]
-
- def crop_img(self, imgT, pts):
- enlarge_ratio = 1.1
-
- x1 = np.min(pts[:, 0])
- x2 = np.max(pts[:, 0])
- y1 = np.min(pts[:, 1])
- y2 = np.max(pts[:, 1])
- w = x2 - x1 + 1
- h = y2 - y1 + 1
- x1 = int(x1 - (enlarge_ratio - 1.0) / 2.0 * w)
- y1 = int(y1 - (enlarge_ratio - 1.0) / 2.0 * h)
- x1 = max(0, x1)
- y1 = max(0, y1)
-
- new_w = int(enlarge_ratio * w)
- new_h = int(enlarge_ratio * h)
- new_x1 = x1
- new_y1 = y1
- new_x2 = new_x1 + new_w
- new_y2 = new_y1 + new_h
-
- height, width, _ = imgT.shape
-
- new_x1 = min(max(0, new_x1), width)
- new_y1 = min(max(0, new_y1), height)
- new_x2 = max(min(width, new_x2), 0)
- new_y2 = max(min(height, new_y2), 0)
-
- sub_imgT = imgT[new_y1:new_y2, new_x1:new_x2]
-
- return sub_imgT, [new_x1, new_y1, new_x2, new_y2]
-
- def __call__(self, inputs) -> Any:
- img = LoadImage.convert_to_ndarray(inputs)
- h, w, c = img.shape
- img_rgb = copy.deepcopy(img)
- img_rgb = img_rgb[:, :, ::-1]
- det_result = self.face_detection(img_rgb)
-
- bboxes = np.array(det_result[OutputKeys.BOXES])
- if bboxes.shape[0] == 0:
- logger.warning('No face detected!')
- results = {
- OutputKeys.KEYPOINTS: [],
- OutputKeys.POSES: [],
- OutputKeys.BOXES: []
- }
- return results
-
- boxes, keypoints = self._choose_face(det_result)
-
- output_boxes = []
- output_keypoints = []
- output_poses = []
- for index, box_ori in enumerate(boxes):
- box = self.expend_box(box_ori, w, h, scalex=0.1, scaley=0.1)
- y0 = int(box[1])
- y1 = int(box[3])
- x0 = int(box[0])
- x1 = int(box[2])
- sub_img = img[y0:y1, x0:x1]
-
- keypoint = keypoints[index]
- pts = [[keypoint[0], keypoint[1]], [keypoint[2], keypoint[3]],
- [keypoint[4], keypoint[5]], [keypoint[6], keypoint[7]],
- [keypoint[8], keypoint[9]], [box[0], box[1]],
- [box[2], box[1]], [box[0], box[3]], [box[2], box[3]]]
- # radian
- angle = math.atan2((pts[1][1] - pts[0][1]),
- (pts[1][0] - pts[0][0]))
- # angle
- theta = angle * (180 / np.pi)
-
- center = [w // 2, h // 2]
- cx, cy = center
- M, landmark_ = self.rotate_point(theta, (cx, cy), pts)
- sub_imgT, imgT, bbox = self.rotate_crop_img(img, landmark_, M)
-
- outputs = self.predict_op([sub_imgT])[0]
- tmp_keypoints = outputs['point']
-
- for idx in range(0, len(tmp_keypoints)):
- tmp_keypoints[idx][0] += bbox[0]
- tmp_keypoints[idx][1] += bbox[1]
-
- for idx in range(0, 6):
- sub_img, bbox = self.crop_img(imgT, tmp_keypoints)
- outputs = self.predict_op([sub_img])[0]
- tmp_keypoints = outputs['point']
- for idx in range(0, len(tmp_keypoints)):
- tmp_keypoints[idx][0] += bbox[0]
- tmp_keypoints[idx][1] += bbox[1]
-
- M2, tmp_keypoints = self.rotate_point(-theta, (cx, cy),
- tmp_keypoints)
-
- output_keypoints.append(np.array(tmp_keypoints))
- output_poses.append(np.array(outputs['pose']))
- output_boxes.append(np.array(box_ori))
-
- results = {
- OutputKeys.KEYPOINTS: output_keypoints,
- OutputKeys.POSES: output_poses,
- OutputKeys.BOXES: output_boxes
- }
-
- return results
diff --git a/modelscope/pipelines/cv/easycv_pipelines/human_wholebody_keypoint_pipeline.py b/modelscope/pipelines/cv/easycv_pipelines/human_wholebody_keypoint_pipeline.py
deleted file mode 100644
index 903c4106..00000000
--- a/modelscope/pipelines/cv/easycv_pipelines/human_wholebody_keypoint_pipeline.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import os.path
-from typing import Any
-
-from modelscope.metainfo import Pipelines
-from modelscope.outputs import OutputKeys
-from modelscope.pipelines.builder import PIPELINES
-from modelscope.utils.constant import ModelFile, Tasks
-from .base import EasyCVPipeline
-
-
-@PIPELINES.register_module(
- Tasks.human_wholebody_keypoint,
- module_name=Pipelines.human_wholebody_keypoint)
-class HumanWholebodyKeypointsPipeline(EasyCVPipeline):
- """Pipeline for human wholebody 2d keypoints detection."""
-
- def __init__(self,
- model: str,
- model_file_pattern=ModelFile.TORCH_MODEL_FILE,
- *args,
- **kwargs):
- """
- model (str): model id on modelscope hub or local model path.
- model_file_pattern (str): model file pattern.
- """
- super(HumanWholebodyKeypointsPipeline, self).__init__(
- model=model,
- model_file_pattern=model_file_pattern,
- *args,
- **kwargs)
-
- def _build_predict_op(self, **kwargs):
- """Build EasyCV predictor."""
- from easycv.predictors.builder import build_predictor
- detection_predictor_type = self.cfg['DETECTION']['type']
- detection_model_path = os.path.join(
- self.model_dir, self.cfg['DETECTION']['model_path'])
- detection_cfg_file = os.path.join(self.model_dir,
- self.cfg['DETECTION']['config_file'])
- detection_score_threshold = self.cfg['DETECTION']['score_threshold']
- self.cfg.pipeline.predictor_config[
- 'detection_predictor_config'] = dict(
- type=detection_predictor_type,
- model_path=detection_model_path,
- config_file=detection_cfg_file,
- score_threshold=detection_score_threshold)
- easycv_config = self._to_easycv_config()
- pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, {
- 'model_path': self.model_path,
- 'config_file': easycv_config,
- **kwargs
- })
- return pipeline_op
-
- def __call__(self, inputs) -> Any:
- outputs = self.predict_op(inputs)
-
- results = [{
- OutputKeys.KEYPOINTS: output['keypoints'],
- OutputKeys.BOXES: output['boxes']
- } for output in outputs]
-
- if self._is_single_inputs(inputs):
- results = results[0]
-
- return results
diff --git a/modelscope/pipelines/cv/easycv_pipelines/segmentation_pipeline.py b/modelscope/pipelines/cv/easycv_pipelines/segmentation_pipeline.py
deleted file mode 100644
index bd09fc9b..00000000
--- a/modelscope/pipelines/cv/easycv_pipelines/segmentation_pipeline.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import Any
-
-import numpy as np
-
-from modelscope.metainfo import Pipelines
-from modelscope.outputs import OutputKeys
-from modelscope.pipelines.builder import PIPELINES
-from modelscope.utils.constant import Tasks
-from .base import EasyCVPipeline
-
-
-@PIPELINES.register_module(
- Tasks.image_segmentation, module_name=Pipelines.easycv_segmentation)
-class EasyCVSegmentationPipeline(EasyCVPipeline):
- """Pipeline for easycv segmentation task."""
-
- def __init__(self, model: str, model_file_pattern='*.pt', *args, **kwargs):
- """
- model (str): model id on modelscope hub or local model path.
- model_file_pattern (str): model file pattern.
- """
-
- super(EasyCVSegmentationPipeline, self).__init__(
- model=model,
- model_file_pattern=model_file_pattern,
- *args,
- **kwargs)
-
- def __call__(self, inputs) -> Any:
- outputs = self.predict_op(inputs)
-
- semantic_result = outputs[0]['seg_pred']
-
- ids = np.unique(semantic_result)[::-1]
- legal_indices = ids != len(self.predict_op.CLASSES) # for VOID label
- ids = ids[legal_indices]
- segms = (semantic_result[None] == ids[:, None, None])
- masks = [it.astype(np.int) for it in segms]
- labels_txt = np.array(self.predict_op.CLASSES)[ids].tolist()
-
- results = {
- OutputKeys.MASKS: masks,
- OutputKeys.LABELS: labels_txt,
- OutputKeys.SCORES: [0.999 for _ in range(len(labels_txt))]
- }
- return results
diff --git a/modelscope/pipelines/cv/face_reconstruction_pipeline.py b/modelscope/pipelines/cv/face_reconstruction_pipeline.py
index f8240fc0..b9a8e320 100644
--- a/modelscope/pipelines/cv/face_reconstruction_pipeline.py
+++ b/modelscope/pipelines/cv/face_reconstruction_pipeline.py
@@ -134,7 +134,7 @@ class FaceReconstructionPipeline(Pipeline):
img = LoadImage.convert_to_ndarray(input)
if len(img.shape) == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
- img = img.astype(np.float)
+ img = img.astype(float)
result = {'img': img}
return result
diff --git a/modelscope/pipelines/cv/fast_instance_segmentation_pipeline.py b/modelscope/pipelines/cv/fast_instance_segmentation_pipeline.py
new file mode 100644
index 00000000..6ee341de
--- /dev/null
+++ b/modelscope/pipelines/cv/fast_instance_segmentation_pipeline.py
@@ -0,0 +1,116 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import Any, Dict, Optional, Union
+
+import numpy as np
+import torch
+import torchvision.transforms as T
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.image_instance_segmentation import FastInst
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines.base import Input, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import LoadImage
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(
+ Tasks.image_segmentation, module_name=Pipelines.fast_instance_segmentation)
+class FastInstanceSegmentationPipeline(Pipeline):
+
+ def __init__(self,
+ model: Union[FastInst, str],
+ preprocessor: Optional = None,
+ **kwargs):
+ r"""The inference pipeline for fastinst models.
+
+ The model outputs a dict with keys of `scores`, `labels`, and `masks`.
+
+ Args:
+ model (`str` or `Model` or module instance): A model instance or a model local dir
+ or a model id in the model hub.
+ preprocessor (`Preprocessor`, `optional`): A Preprocessor instance.
+ kwargs (dict, `optional`):
+ Extra kwargs passed into the preprocessor's constructor.
+
+ Examples:
+ >>> from modelscope.outputs import OutputKeys
+ >>> from modelscope.pipelines import pipeline
+ >>> pipeline_ins = pipeline('image-segmentation',
+ model='damo/cv_resnet50_fast-instance-segmentation_coco')
+ >>> input_img = 'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/image_instance_segmentation.jpg'
+ >>> print(pipeline_ins(input_img)[OutputKeys.LABELS])
+ """
+ super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+ self.model.eval()
+
+ def _get_preprocess_shape(self, oldh, oldw, short_edge_length, max_size):
+ h, w = oldh, oldw
+ size = short_edge_length * 1.0
+ scale = size / min(h, w)
+ if h < w:
+ newh, neww = size, scale * w
+ else:
+ newh, neww = scale * h, size
+ if max(newh, neww) > max_size:
+ scale = max_size * 1.0 / max(newh, neww)
+ newh = newh * scale
+ neww = neww * scale
+ neww = int(neww + 0.5)
+ newh = int(newh + 0.5)
+ return (newh, neww)
+
+ def preprocess(self,
+ input: Input,
+ min_size=640,
+ max_size=1333) -> Dict[str, Any]:
+ image = LoadImage.convert_to_img(input)
+ w, h = image.size[:2]
+ dataset_dict = {'width': w, 'height': h}
+ new_h, new_w = self._get_preprocess_shape(h, w, min_size, max_size)
+ test_transforms = T.Compose([
+ T.Resize((new_h, new_w)),
+ T.ToTensor(),
+ ])
+ image = test_transforms(image)
+ dataset_dict['image'] = image * 255.
+ result = {'batched_inputs': [dataset_dict]}
+ return result
+
+ def forward(self, input: Dict[str, Any],
+ **forward_params) -> Dict[str, Any]:
+ with torch.no_grad():
+ output = self.model(**input)
+ return output
+
+ def postprocess(self,
+ inputs: Dict[str, Any],
+ score_thr=0.5) -> Dict[str, Any]:
+ predictions = inputs['eval_result'][0]['instances']
+ scores = predictions['scores'].detach().cpu().numpy()
+ pred_masks = predictions['pred_masks'].detach().cpu().numpy()
+ pred_classes = predictions['pred_classes'].detach().cpu().numpy()
+
+ thresholded_idxs = np.array(scores) >= score_thr
+ scores = scores[thresholded_idxs]
+ pred_classes = pred_classes[thresholded_idxs]
+ pred_masks = pred_masks[thresholded_idxs]
+
+ results_dict = {
+ OutputKeys.MASKS: [],
+ OutputKeys.LABELS: [],
+ OutputKeys.SCORES: []
+ }
+ for score, cls, mask in zip(scores, pred_classes, pred_masks):
+ score = np.float64(score)
+ label = self.model.classes[int(cls)]
+ mask = np.array(mask, dtype=np.float64)
+
+ results_dict[OutputKeys.SCORES].append(score)
+ results_dict[OutputKeys.LABELS].append(label)
+ results_dict[OutputKeys.MASKS].append(mask)
+
+ return results_dict
diff --git a/modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py b/modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py
deleted file mode 100644
index 63281e80..00000000
--- a/modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import os.path
-
-from modelscope.metainfo import Pipelines
-from modelscope.pipelines.builder import PIPELINES
-from modelscope.utils.constant import ModelFile, Tasks
-from .easycv_pipelines.base import EasyCVPipeline
-
-
-@PIPELINES.register_module(
- Tasks.hand_2d_keypoints, module_name=Pipelines.hand_2d_keypoints)
-class Hand2DKeypointsPipeline(EasyCVPipeline):
- """Pipeline for hand pose keypoint task."""
-
- def __init__(self,
- model: str,
- model_file_pattern=ModelFile.TORCH_MODEL_FILE,
- *args,
- **kwargs):
- """
- model (str): model id on modelscope hub or local model path.
- model_file_pattern (str): model file pattern.
- """
- super(Hand2DKeypointsPipeline, self).__init__(
- model=model,
- model_file_pattern=model_file_pattern,
- *args,
- **kwargs)
-
- def _build_predict_op(self, **kwargs):
- """Build EasyCV predictor."""
- from easycv.predictors.builder import build_predictor
- detection_predictor_type = self.cfg['DETECTION']['type']
- detection_model_path = os.path.join(
- self.model_dir, self.cfg['DETECTION']['model_path'])
- detection_cfg_file = os.path.join(self.model_dir,
- self.cfg['DETECTION']['config_file'])
- detection_score_threshold = self.cfg['DETECTION']['score_threshold']
- self.cfg.pipeline.predictor_config[
- 'detection_predictor_config'] = dict(
- type=detection_predictor_type,
- model_path=detection_model_path,
- config_file=detection_cfg_file,
- score_threshold=detection_score_threshold)
- easycv_config = self._to_easycv_config()
- pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, {
- 'model_path': self.model_path,
- 'config_file': easycv_config,
- **kwargs
- })
- return pipeline_op
diff --git a/modelscope/pipelines/cv/image_detection_pipeline.py b/modelscope/pipelines/cv/image_detection_pipeline.py
index 86963c37..2b8275c2 100644
--- a/modelscope/pipelines/cv/image_detection_pipeline.py
+++ b/modelscope/pipelines/cv/image_detection_pipeline.py
@@ -30,7 +30,7 @@ class ImageDetectionPipeline(Pipeline):
def preprocess(self, input: Input) -> Dict[str, Any]:
img = LoadImage.convert_to_ndarray(input)
- img = img.astype(np.float)
+ img = img.astype(np.float64)
img = self.model.preprocess(img)
result = {'img': img}
return result
diff --git a/modelscope/pipelines/cv/image_matting_pipeline.py b/modelscope/pipelines/cv/image_matting_pipeline.py
index 5f5d1d56..bee655c5 100644
--- a/modelscope/pipelines/cv/image_matting_pipeline.py
+++ b/modelscope/pipelines/cv/image_matting_pipeline.py
@@ -53,7 +53,7 @@ class ImageMattingPipeline(Pipeline):
def preprocess(self, input: Input) -> Dict[str, Any]:
img = LoadImage.convert_to_ndarray(input)
- img = img.astype(np.float)
+ img = img.astype(float)
result = {'img': img}
return result
diff --git a/modelscope/pipelines/cv/image_panoptic_segmentation_pipeline.py b/modelscope/pipelines/cv/image_panoptic_segmentation_pipeline.py
deleted file mode 100644
index fe941d9f..00000000
--- a/modelscope/pipelines/cv/image_panoptic_segmentation_pipeline.py
+++ /dev/null
@@ -1,135 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import Any, Dict, Union
-
-import cv2
-import numpy as np
-import PIL
-import torch
-
-from modelscope.metainfo import Pipelines
-from modelscope.outputs import OutputKeys
-from modelscope.pipelines.base import Input, Pipeline
-from modelscope.pipelines.builder import PIPELINES
-from modelscope.pipelines.cv.easycv_pipelines.base import EasyCVPipeline
-from modelscope.preprocessors import load_image
-from modelscope.utils.constant import Tasks
-from modelscope.utils.logger import get_logger
-
-logger = get_logger()
-
-
-@PIPELINES.register_module(
- Tasks.image_segmentation,
- module_name=Pipelines.image_panoptic_segmentation)
-class ImagePanopticSegmentationPipeline(Pipeline):
-
- def __init__(self, model: str, **kwargs):
- """
- use `model` to create a image panoptic segmentation pipeline for prediction
- Args:
- model: model id on modelscope hub.
- """
- super().__init__(model=model, **kwargs)
-
- logger.info('panoptic segmentation model, pipeline init')
-
- def preprocess(self, input: Input) -> Dict[str, Any]:
- from mmdet.datasets.pipelines import Compose
- from mmcv.parallel import collate, scatter
- from mmdet.datasets import replace_ImageToTensor
-
- cfg = self.model.cfg
- # build the data pipeline
-
- if isinstance(input, str):
- cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
- img = np.array(load_image(input))
- img = img[:, :, ::-1] # convert to bgr
- elif isinstance(input, PIL.Image.Image):
- cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
- img = np.array(input.convert('RGB'))
- elif isinstance(input, np.ndarray):
- cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
- if len(input.shape) == 2:
- img = cv2.cvtColor(input, cv2.COLOR_GRAY2BGR)
- else:
- img = input
- else:
- raise TypeError(f'input should be either str, PIL.Image,'
- f' np.array, but got {type(input)}')
-
- # collect data
- data = dict(img=img)
- cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
- test_pipeline = Compose(cfg.data.test.pipeline)
-
- data = test_pipeline(data)
- # copy from mmdet_model collect data
- data = collate([data], samples_per_gpu=1)
- data['img_metas'] = [
- img_metas.data[0] for img_metas in data['img_metas']
- ]
- data['img'] = [img.data[0] for img in data['img']]
- if next(self.model.parameters()).is_cuda:
- # scatter to specified GPU
- data = scatter(data, [next(self.model.parameters()).device])[0]
-
- return data
-
- def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
- results = self.model.inference(input)
-
- return results
-
- def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
- # bz=1, tcguo
- pan_results = inputs[0]['pan_results']
- INSTANCE_OFFSET = 1000
-
- ids = np.unique(pan_results)[::-1]
- legal_indices = ids != self.model.num_classes # for VOID label
- ids = ids[legal_indices]
- labels = np.array([id % INSTANCE_OFFSET for id in ids], dtype=np.int64)
- segms = (pan_results[None] == ids[:, None, None])
- masks = [it.astype(np.int) for it in segms]
- labels_txt = np.array(self.model.CLASSES)[labels].tolist()
-
- outputs = {
- OutputKeys.MASKS: masks,
- OutputKeys.LABELS: labels_txt,
- OutputKeys.SCORES: [0.999 for _ in range(len(labels_txt))]
- }
- return outputs
-
-
-@PIPELINES.register_module(
- Tasks.image_segmentation,
- module_name=Pipelines.image_panoptic_segmentation_easycv)
-class ImagePanopticSegmentationEasyCVPipeline(EasyCVPipeline):
- """Pipeline built upon easycv for image segmentation."""
-
- def __init__(self, model: str, model_file_pattern='*.pt', *args, **kwargs):
- """
- model (str): model id on modelscope hub or local model path.
- model_file_pattern (str): model file pattern.
- """
- super(ImagePanopticSegmentationEasyCVPipeline, self).__init__(
- model=model,
- model_file_pattern=model_file_pattern,
- *args,
- **kwargs)
-
- def __call__(self, inputs) -> Any:
- outputs = self.predict_op(inputs)
- easycv_results = outputs[0]
-
- results = {
- OutputKeys.MASKS:
- easycv_results[OutputKeys.MASKS],
- OutputKeys.LABELS:
- easycv_results[OutputKeys.LABELS],
- OutputKeys.SCORES:
- [0.999 for _ in range(len(easycv_results[OutputKeys.LABELS]))]
- }
-
- return results
diff --git a/modelscope/pipelines/cv/image_style_transfer_pipeline.py b/modelscope/pipelines/cv/image_style_transfer_pipeline.py
index e5fd0d48..49a0bff0 100644
--- a/modelscope/pipelines/cv/image_style_transfer_pipeline.py
+++ b/modelscope/pipelines/cv/image_style_transfer_pipeline.py
@@ -73,12 +73,12 @@ class ImageStyleTransferPipeline(Pipeline):
content = LoadImage.convert_to_ndarray(content)
if len(content.shape) == 2:
content = cv2.cvtColor(content, cv2.COLOR_GRAY2BGR)
- content_img = content.astype(np.float)
+ content_img = content.astype(float)
style_img = LoadImage.convert_to_ndarray(style)
if len(style_img.shape) == 2:
style_img = cv2.cvtColor(style_img, cv2.COLOR_GRAY2BGR)
- style_img = style_img.astype(np.float)
+ style_img = style_img.astype(float)
result = {'content': content_img, 'style': style_img}
return result
diff --git a/modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py b/modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py
index 3fffc546..3cef5c28 100644
--- a/modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py
+++ b/modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py
@@ -43,23 +43,32 @@ class MovieSceneSegmentationPipeline(Pipeline):
"""
self.input_video_pth = input
if isinstance(input, str):
- shot_feat, sid = self.model.preprocess(input)
+ self.shot2keyf, self.anno, self.shot_timecode_lst, self.shot_idx_lst = self.model.preprocess(
+ input)
else:
raise TypeError(f'input should be a str,'
f' but got {type(input)}')
- result = {'sid': sid, 'shot_feat': shot_feat}
+ result = {
+ 'shot_timecode_lst': self.shot_timecode_lst,
+ 'shot_idx_lst': self.shot_idx_lst
+ }
- return result
+ with torch.no_grad():
+ output = self.model.inference(result)
+
+ return output
def forward(self, input: Dict[str, Any],
**forward_params) -> Dict[str, Any]:
- with torch.no_grad():
- output = self.model.inference(input)
- return output
+ return input
def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
- data = {'input_video_pth': self.input_video_pth, 'feat': inputs}
+ data = {
+ 'input_video_pth': self.input_video_pth,
+ 'feat': inputs,
+ 'shot2keyf': self.shot2keyf
+ }
scene_num, scene_meta_lst, shot_num, shot_meta_lst = self.model.postprocess(
data)
result = {
diff --git a/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py b/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py
index 39195bcd..123057f5 100644
--- a/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py
+++ b/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py
@@ -225,7 +225,7 @@ class ReferringVideoObjectSegmentationPipeline(Pipeline):
def apply_mask(image, mask, color, transparency=0.7):
mask = mask[..., np.newaxis].repeat(repeats=3, axis=2)
mask = mask * transparency
- color_matrix = np.ones(image.shape, dtype=np.float) * color
+ color_matrix = np.ones(image.shape, dtype=np.float64) * color
out_image = color_matrix * mask + image * (1.0 - mask)
return out_image
diff --git a/modelscope/pipelines/cv/skin_retouching_pipeline.py b/modelscope/pipelines/cv/skin_retouching_pipeline.py
index b2b5f4ca..da9b912f 100644
--- a/modelscope/pipelines/cv/skin_retouching_pipeline.py
+++ b/modelscope/pipelines/cv/skin_retouching_pipeline.py
@@ -105,7 +105,7 @@ class SkinRetouchingPipeline(Pipeline):
img = LoadImage.convert_to_ndarray(input)
if len(img.shape) == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
- img = img.astype(np.float)
+ img = img.astype(float)
result = {'img': img}
return result
diff --git a/modelscope/pipelines/cv/tbs_detection_pipeline.py b/modelscope/pipelines/cv/tbs_detection_pipeline.py
index 58831846..8bbac9c8 100644
--- a/modelscope/pipelines/cv/tbs_detection_pipeline.py
+++ b/modelscope/pipelines/cv/tbs_detection_pipeline.py
@@ -116,7 +116,7 @@ class TBSDetectionPipeline(Pipeline):
- **labels** (`List[str]`, optional) -- The boxes's class_names of detected object in image.
"""
img = LoadImage.convert_to_ndarray(input)
- img = img.astype(np.float)
+ img = img.astype(float)
result = {'img': img, 'img_path': input}
return result
diff --git a/modelscope/pipelines/multi_modal/__init__.py b/modelscope/pipelines/multi_modal/__init__.py
index 2e496952..b28e9a71 100644
--- a/modelscope/pipelines/multi_modal/__init__.py
+++ b/modelscope/pipelines/multi_modal/__init__.py
@@ -21,6 +21,7 @@ if TYPE_CHECKING:
from .diffusers_wrapped import StableDiffusionWrapperPipeline, ChineseStableDiffusionPipeline
from .soonet_video_temporal_grounding_pipeline import SOONetVideoTemporalGroundingPipeline
from .text_to_video_synthesis_pipeline import TextToVideoSynthesisPipeline
+ from .multimodal_dialogue_pipeline import MultimodalDialoguePipeline
else:
_import_structure = {
'image_captioning_pipeline': ['ImageCaptioningPipeline'],
@@ -45,6 +46,7 @@ else:
'soonet_video_temporal_grounding_pipeline':
['SOONetVideoTemporalGroundingPipeline'],
'text_to_video_synthesis_pipeline': ['TextToVideoSynthesisPipeline'],
+ 'multimodal_dialogue_pipeline': ['MultimodalDialoguePipeline']
}
import sys
diff --git a/modelscope/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py b/modelscope/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py
index d1e3a2ae..ce0455b6 100644
--- a/modelscope/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py
+++ b/modelscope/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py
@@ -39,10 +39,10 @@ class DiffusersPipeline(Pipeline):
self.models = [self.model]
self.has_multiple_models = len(self.models) > 1
- def preprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+ def preprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]:
return inputs
- def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+ def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]:
return inputs
def __call__(self, input: Union[Input, List[Input]], *args,
diff --git a/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py b/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py
index d1627962..539fd4ba 100644
--- a/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py
+++ b/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py
@@ -46,7 +46,9 @@ class ChineseStableDiffusionPipeline(DiffusersPipeline):
torch_dtype = kwargs.get('torch_dtype', torch.float32)
self.pipeline = _DiffuersChineseStableDiffusionPipeline.from_pretrained(
- model, torch_dtype=torch_dtype).to(self.device)
+ model, torch_dtype=torch_dtype)
+ self.pipeline.text_encoder.pooler = None
+ self.pipeline.to(self.device)
def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:
@@ -73,7 +75,7 @@ class ChineseStableDiffusionPipeline(DiffusersPipeline):
callback=inputs.get('callback'),
callback_steps=inputs.get('callback_steps', 1))
- def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+ def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]:
images = []
for img in inputs.images:
if isinstance(img, Image.Image):
diff --git a/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py b/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py
index b6d9d3bd..49b4ef37 100644
--- a/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py
+++ b/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py
@@ -65,7 +65,7 @@ class StableDiffusionWrapperPipeline(DiffusersPipeline):
callback=inputs.get('callback'),
callback_steps=inputs.get('callback_steps', 1))
- def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+ def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]:
images = []
for img in inputs.images:
if isinstance(img, Image.Image):
diff --git a/modelscope/pipelines/multi_modal/image_captioning_pipeline.py b/modelscope/pipelines/multi_modal/image_captioning_pipeline.py
index fbab88fd..17b850da 100644
--- a/modelscope/pipelines/multi_modal/image_captioning_pipeline.py
+++ b/modelscope/pipelines/multi_modal/image_captioning_pipeline.py
@@ -1,15 +1,18 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Any, Dict, Optional, Union
+import numpy as np
import torch
from modelscope.metainfo import Pipelines
-from modelscope.models.multi_modal import MPlugForAllTasks, OfaForAllTasks
+from modelscope.models.multi_modal import (CLIP_Interrogator, MPlugForAllTasks,
+ OfaForAllTasks)
from modelscope.pipelines.base import Model, Pipeline
from modelscope.pipelines.builder import PIPELINES
from modelscope.pipelines.util import batch_process
-from modelscope.preprocessors import (MPlugPreprocessor, OfaPreprocessor,
- Preprocessor)
+from modelscope.preprocessors import (
+ ImageCaptioningClipInterrogatorPreprocessor, MPlugPreprocessor,
+ OfaPreprocessor, Preprocessor, load_image)
from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.logger import get_logger
@@ -28,6 +31,17 @@ class ImageCaptioningPipeline(Pipeline):
use `model` and `preprocessor` to create a image captioning pipeline for prediction
Args:
model: model id on modelscope hub.
+ Examples:
+ from modelscope.pipelines import pipeline
+ from modelscope.utils.constant import Tasks
+
+ model_id = 'damo/cv_clip-interrogator'
+ input_image = "test.png"
+
+ pipeline_ci = pipeline(Tasks.image_captioning, model=model_id)
+ print(pipeline_ci(input_image))
+
+
"""
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
@@ -39,6 +53,9 @@ class ImageCaptioningPipeline(Pipeline):
self.preprocessor = OfaPreprocessor(self.model.model_dir)
elif isinstance(self.model, MPlugForAllTasks):
self.preprocessor = MPlugPreprocessor(self.model.model_dir)
+ elif isinstance(self.model, CLIP_Interrogator):
+ self.preprocessor = ImageCaptioningClipInterrogatorPreprocessor(
+ )
def _batch(self, data):
if isinstance(self.model, OfaForAllTasks):
diff --git a/modelscope/pipelines/multi_modal/multimodal_dialogue_pipeline.py b/modelscope/pipelines/multi_modal/multimodal_dialogue_pipeline.py
new file mode 100644
index 00000000..31df19fc
--- /dev/null
+++ b/modelscope/pipelines/multi_modal/multimodal_dialogue_pipeline.py
@@ -0,0 +1,90 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import Any, Dict, Optional, Union
+
+import torch
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.multi_modal import MplugOwlForConditionalGeneration
+from modelscope.outputs import OutputKeys, TokenGeneratorOutput
+from modelscope.pipelines.base import Model, Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.preprocessors import MplugOwlPreprocessor, Preprocessor
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(
+ Tasks.multimodal_dialogue, module_name=Pipelines.multimodal_dialogue)
+class MultimodalDialoguePipeline(Pipeline):
+ r""" Multimodal Dialogue Pipeline.
+
+ Examples:
+ >>> from modelscope.pipelines import pipeline
+ >>> chatbot = pipeline('multimodal-dialogue', 'damo/multi-modal_mplug_owl_multimodal-dialogue_7b')
+ >>> image = 'data/resource/portrait_input.png'
+ >>> system_prompt_1 = 'The following is a conversation between a curious human and AI assistant.'
+ >>> system_prompt_2 = "The assistant gives helpful, detailed, and polite answers to the user's questions."
+ >>> messages = {
+ >>> 'messages': [
+ >>> {
+ >>> 'role': 'system',
+ >>> 'content': system_prompt_1 + ' ' + system_prompt_2
+ >>> },
+ >>> {
+ >>> 'role': 'user',
+ >>> 'content': [{
+ >>> 'image': image
+ >>> }]
+ >>> },
+ >>> {
+ >>> 'role': 'user',
+ >>> 'content': 'Describe the facial expression of the man.'
+ >>> },
+ >>> ]
+ >>> }
+ >>> chatbot(messages)
+ >>> {
+ >>> "text": he is angry.
+ >>> }
+ >>>
+ """
+
+ def __init__(self,
+ model: Union[Model, str],
+ preprocessor: Optional[Preprocessor] = None,
+ **kwargs):
+ """
+ use `model` and `preprocessor` to create a multimodal dialogue pipeline for prediction
+ Args:
+ model: model id on modelscope hub.
+ """
+ super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+ self.model.eval()
+ if preprocessor is None:
+ if isinstance(self.model, MplugOwlForConditionalGeneration):
+ self.preprocessor = MplugOwlPreprocessor(self.model.model_dir)
+
+ def forward(self, inputs: Dict[str, Any],
+ **forward_params) -> Dict[str, Any]:
+ """
+ the `forward_params` can be the generation configurations listed in transformers library.
+ """
+ with torch.no_grad():
+ return super().forward(inputs, **forward_params)
+
+ def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
+ """process the prediction results
+
+ Args:
+ inputs (Dict[str, Any]): _description_
+
+ Returns:
+ Dict[str, str]: the prediction results
+ """
+ if isinstance(self.model, MplugOwlForConditionalGeneration):
+ output = self.preprocessor.tokenizer.decode(
+ inputs[0], skip_special_tokens=True)
+ inputs = {OutputKeys.TEXT: output}
+ return inputs
diff --git a/modelscope/pipelines/multi_modal/text_to_video_synthesis_pipeline.py b/modelscope/pipelines/multi_modal/text_to_video_synthesis_pipeline.py
index ee6635a6..50e2437b 100644
--- a/modelscope/pipelines/multi_modal/text_to_video_synthesis_pipeline.py
+++ b/modelscope/pipelines/multi_modal/text_to_video_synthesis_pipeline.py
@@ -1,5 +1,6 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
import tempfile
from typing import Any, Dict, Optional
@@ -62,8 +63,10 @@ class TextToVideoSynthesisPipeline(Pipeline):
**post_params) -> Dict[str, Any]:
video = tensor2vid(inputs['video'])
output_video_path = post_params.get('output_video', None)
+ temp_video_file = False
if output_video_path is None:
output_video_path = tempfile.NamedTemporaryFile(suffix='.mp4').name
+ temp_video_file = True
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
h, w, c = video[0].shape
@@ -72,7 +75,15 @@ class TextToVideoSynthesisPipeline(Pipeline):
for i in range(len(video)):
img = cv2.cvtColor(video[i], cv2.COLOR_RGB2BGR)
video_writer.write(img)
- return {OutputKeys.OUTPUT_VIDEO: output_video_path}
+ video_writer.release()
+ if temp_video_file:
+ video_file_content = b''
+ with open(output_video_path, 'rb') as f:
+ video_file_content = f.read()
+ os.remove(output_video_path)
+ return {OutputKeys.OUTPUT_VIDEO: video_file_content}
+ else:
+ return {OutputKeys.OUTPUT_VIDEO: output_video_path}
def tensor2vid(video, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
diff --git a/modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py b/modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py
index fa7b23b8..a0e75638 100644
--- a/modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py
+++ b/modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py
@@ -41,7 +41,8 @@ class DialogIntentPredictionPipeline(Pipeline):
config_file=config_file,
device=device,
auto_collate=auto_collate,
- **kwargs)
+ compile=kwargs.pop('compile', False),
+ compile_options=kwargs.pop('compile_options', {}))
if preprocessor is None:
self.preprocessor = DialogIntentPredictionPreprocessor(
self.model.model_dir, **kwargs)
diff --git a/modelscope/pipelines/nlp/document_grounded_dialog_generate_pipeline.py b/modelscope/pipelines/nlp/document_grounded_dialog_generate_pipeline.py
index 8c773dfe..dfcd95e6 100644
--- a/modelscope/pipelines/nlp/document_grounded_dialog_generate_pipeline.py
+++ b/modelscope/pipelines/nlp/document_grounded_dialog_generate_pipeline.py
@@ -47,7 +47,8 @@ class DocumentGroundedDialogGeneratePipeline(Pipeline):
config_file=config_file,
device=device,
auto_collate=auto_collate,
- **kwargs)
+ compile=kwargs.pop('compile', False),
+ compile_options=kwargs.pop('compile_options', {}))
if preprocessor is None:
self.preprocessor = DocumentGroundedDialogGeneratePreprocessor(
diff --git a/modelscope/pipelines/nlp/document_grounded_dialog_rerank_pipeline.py b/modelscope/pipelines/nlp/document_grounded_dialog_rerank_pipeline.py
index 8fdef380..29993594 100644
--- a/modelscope/pipelines/nlp/document_grounded_dialog_rerank_pipeline.py
+++ b/modelscope/pipelines/nlp/document_grounded_dialog_rerank_pipeline.py
@@ -65,7 +65,8 @@ class DocumentGroundedDialogRerankPipeline(Pipeline):
device=device,
auto_collate=auto_collate,
seed=seed,
- **kwarg)
+ compile=kwarg.pop('compile', False),
+ compile_options=kwarg.pop('compile_options', {}))
self.model = model
self.preprocessor = preprocessor
self.device = device
diff --git a/modelscope/pipelines/nlp/document_grounded_dialog_retrieval_pipeline.py b/modelscope/pipelines/nlp/document_grounded_dialog_retrieval_pipeline.py
index c3fb1a32..31890a73 100644
--- a/modelscope/pipelines/nlp/document_grounded_dialog_retrieval_pipeline.py
+++ b/modelscope/pipelines/nlp/document_grounded_dialog_retrieval_pipeline.py
@@ -56,7 +56,8 @@ class DocumentGroundedDialogRetrievalPipeline(Pipeline):
config_file=config_file,
device=device,
auto_collate=auto_collate,
- **kwargs)
+ compile=kwargs.pop('compile', False),
+ compile_options=kwargs.pop('compile_options', {}))
if preprocessor is None:
self.preprocessor = DocumentGroundedDialogRetrievalPreprocessor(
diff --git a/modelscope/pipelines/nlp/document_segmentation_pipeline.py b/modelscope/pipelines/nlp/document_segmentation_pipeline.py
index 6e195ed0..d528eee0 100644
--- a/modelscope/pipelines/nlp/document_segmentation_pipeline.py
+++ b/modelscope/pipelines/nlp/document_segmentation_pipeline.py
@@ -6,11 +6,9 @@ from typing import Any, Dict, List, Union
import numpy as np
import torch
from datasets import Dataset
-from transformers.models.bert.modeling_bert import BertConfig
from modelscope.metainfo import Pipelines
from modelscope.models import Model
-from modelscope.models.nlp.ponet.configuration import PoNetConfig
from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Pipeline, Tensor
from modelscope.pipelines.builder import PIPELINES
@@ -51,11 +49,9 @@ class DocumentSegmentationPipeline(Pipeline):
auto_collate=auto_collate,
**kwargs)
- kwargs = kwargs
- if 'compile' in kwargs.keys():
- kwargs.pop('compile')
- if 'compile_options' in kwargs.keys():
- kwargs.pop('compile_options')
+ kwargs.pop('compile', None)
+ kwargs.pop('compile_options', None)
+
self.model_dir = self.model.model_dir
self.model_cfg = self.model.model_cfg
if preprocessor is None:
diff --git a/modelscope/pipelines/nlp/extractive_summarization_pipeline.py b/modelscope/pipelines/nlp/extractive_summarization_pipeline.py
index c01f28fc..a4e67607 100644
--- a/modelscope/pipelines/nlp/extractive_summarization_pipeline.py
+++ b/modelscope/pipelines/nlp/extractive_summarization_pipeline.py
@@ -44,11 +44,8 @@ class ExtractiveSummarizationPipeline(Pipeline):
auto_collate=auto_collate,
**kwargs)
- kwargs = kwargs
- if 'compile' in kwargs.keys():
- kwargs.pop('compile')
- if 'compile_options' in kwargs.keys():
- kwargs.pop('compile_options')
+ kwargs.pop('compile', None)
+ kwargs.pop('compile_options', None)
self.model_dir = self.model.model_dir
self.model_cfg = self.model.model_cfg
diff --git a/modelscope/pipelines/nlp/feature_extraction_pipeline.py b/modelscope/pipelines/nlp/feature_extraction_pipeline.py
index 0f6979ba..c82db03c 100644
--- a/modelscope/pipelines/nlp/feature_extraction_pipeline.py
+++ b/modelscope/pipelines/nlp/feature_extraction_pipeline.py
@@ -54,7 +54,8 @@ class FeatureExtractionPipeline(Pipeline):
config_file=config_file,
device=device,
auto_collate=auto_collate,
- **kwargs)
+ compile=kwargs.pop('compile', False),
+ compile_options=kwargs.pop('compile_options', {}))
assert isinstance(self.model, Model), \
f'please check whether model config exists in {ModelFile.CONFIGURATION}'
diff --git a/modelscope/pipelines/nlp/fill_mask_pipeline.py b/modelscope/pipelines/nlp/fill_mask_pipeline.py
index 6bc7622f..7b034786 100644
--- a/modelscope/pipelines/nlp/fill_mask_pipeline.py
+++ b/modelscope/pipelines/nlp/fill_mask_pipeline.py
@@ -63,7 +63,8 @@ class FillMaskPipeline(Pipeline):
config_file=config_file,
device=device,
auto_collate=auto_collate,
- **kwargs)
+ compile=kwargs.pop('compile', False),
+ compile_options=kwargs.pop('compile_options', {}))
assert isinstance(self.model, Model), \
f'please check whether model config exists in {ModelFile.CONFIGURATION}'
diff --git a/modelscope/pipelines/nlp/named_entity_recognition_pipeline.py b/modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
index 2cf30037..d035802b 100644
--- a/modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
+++ b/modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
@@ -56,7 +56,8 @@ class NamedEntityRecognitionPipeline(TokenClassificationPipeline):
config_file=config_file,
device=device,
auto_collate=auto_collate,
- **kwargs)
+ compile=kwargs.pop('compile', False),
+ compile_options=kwargs.pop('compile_options', {}))
assert isinstance(self.model, Model), \
f'please check whether model config exists in {ModelFile.CONFIGURATION}'
diff --git a/modelscope/pipelines/nlp/sentence_embedding_pipeline.py b/modelscope/pipelines/nlp/sentence_embedding_pipeline.py
index 4e01397d..9d5cc80f 100644
--- a/modelscope/pipelines/nlp/sentence_embedding_pipeline.py
+++ b/modelscope/pipelines/nlp/sentence_embedding_pipeline.py
@@ -43,7 +43,8 @@ class SentenceEmbeddingPipeline(Pipeline):
config_file=config_file,
device=device,
auto_collate=auto_collate,
- **kwargs)
+ compile=kwargs.pop('compile', False),
+ compile_options=kwargs.pop('compile_options', {}))
assert isinstance(self.model, Model), \
f'please check whether model config exists in {ModelFile.CONFIGURATION}'
diff --git a/modelscope/pipelines/nlp/siamese_uie_pipeline.py b/modelscope/pipelines/nlp/siamese_uie_pipeline.py
index cdbd9119..d548d2e8 100644
--- a/modelscope/pipelines/nlp/siamese_uie_pipeline.py
+++ b/modelscope/pipelines/nlp/siamese_uie_pipeline.py
@@ -21,7 +21,7 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Pipeline
from modelscope.pipelines.builder import PIPELINES
from modelscope.preprocessors import Preprocessor, SiameseUiePreprocessor
-from modelscope.utils.constant import Tasks
+from modelscope.utils.constant import ModelFile, Tasks
Input = Union[str, tuple, MsDataset, 'Image.Image', 'numpy.ndarray']
@@ -68,7 +68,8 @@ class SiameseUiePipeline(Pipeline):
config_file=config_file,
device=device,
auto_collate=auto_collate,
- **kwargs)
+ compile=kwargs.pop('compile', False),
+ compile_options=kwargs.pop('compile_options', {}))
assert isinstance(self.model, Model), \
f'please check whether model config exists in {ModelFile.CONFIGURATION}'
diff --git a/modelscope/pipelines/nlp/table_question_answering_pipeline.py b/modelscope/pipelines/nlp/table_question_answering_pipeline.py
index 0472ecb8..7c064f57 100644
--- a/modelscope/pipelines/nlp/table_question_answering_pipeline.py
+++ b/modelscope/pipelines/nlp/table_question_answering_pipeline.py
@@ -52,7 +52,8 @@ class TableQuestionAnsweringPipeline(Pipeline):
config_file=config_file,
device=device,
auto_collate=auto_collate,
- **kwargs)
+ compile=kwargs.pop('compile', False),
+ compile_options=kwargs.pop('compile_options', {}))
assert isinstance(self.model, Model), \
f'please check whether model config exists in {ModelFile.CONFIGURATION}'
@@ -402,7 +403,7 @@ class TableQuestionAnsweringPipeline(Pipeline):
OutputKeys.SQL_STRING: sql.string,
OutputKeys.SQL_QUERY: sql.query,
OutputKeys.HISTORY: result['sql'],
- OutputKeys.QUERT_RESULT: tabledata,
+ OutputKeys.QUERY_RESULT: tabledata,
}
return {OutputKeys.OUTPUT: output}
diff --git a/modelscope/pipelines/nlp/text_classification_pipeline.py b/modelscope/pipelines/nlp/text_classification_pipeline.py
index a300b008..3b06f435 100644
--- a/modelscope/pipelines/nlp/text_classification_pipeline.py
+++ b/modelscope/pipelines/nlp/text_classification_pipeline.py
@@ -76,7 +76,7 @@ class TextClassificationPipeline(Pipeline):
field=Fields.multi_modal,
**kwargs)
else:
- first_sequence = kwargs.pop('first_sequence', 'first_sequence')
+ first_sequence = kwargs.pop('first_sequence', 'text')
second_sequence = kwargs.pop('second_sequence', None)
sequence_length = kwargs.pop('sequence_length', 512)
self.preprocessor = Preprocessor.from_pretrained(
diff --git a/modelscope/pipelines/nlp/text_generation_pipeline.py b/modelscope/pipelines/nlp/text_generation_pipeline.py
index 2b851dc4..d1aa5ff6 100644
--- a/modelscope/pipelines/nlp/text_generation_pipeline.py
+++ b/modelscope/pipelines/nlp/text_generation_pipeline.py
@@ -59,7 +59,8 @@ class TextGenerationPipeline(Pipeline):
config_file=config_file,
device=device,
auto_collate=auto_collate,
- **kwargs)
+ compile=kwargs.pop('compile', False),
+ compile_options=kwargs.pop('compile_options', {}))
assert isinstance(self.model, Model), \
f'please check whether model config exists in {ModelFile.CONFIGURATION}'
diff --git a/modelscope/pipelines/nlp/text_ranking_pipeline.py b/modelscope/pipelines/nlp/text_ranking_pipeline.py
index a42baaa2..7539634e 100644
--- a/modelscope/pipelines/nlp/text_ranking_pipeline.py
+++ b/modelscope/pipelines/nlp/text_ranking_pipeline.py
@@ -44,7 +44,8 @@ class TextRankingPipeline(Pipeline):
config_file=config_file,
device=device,
auto_collate=auto_collate,
- **kwargs)
+ compile=kwargs.pop('compile', False),
+ compile_options=kwargs.pop('compile_options', {}))
assert isinstance(self.model, Model), \
f'please check whether model config exists in {ModelFile.CONFIGURATION}'
diff --git a/modelscope/pipelines/nlp/token_classification_pipeline.py b/modelscope/pipelines/nlp/token_classification_pipeline.py
index daa4823c..9fd8e325 100644
--- a/modelscope/pipelines/nlp/token_classification_pipeline.py
+++ b/modelscope/pipelines/nlp/token_classification_pipeline.py
@@ -51,7 +51,9 @@ class TokenClassificationPipeline(Pipeline):
preprocessor=preprocessor,
config_file=config_file,
device=device,
- auto_collate=auto_collate)
+ auto_collate=auto_collate,
+ compile=kwargs.pop('compile', False),
+ compile_options=kwargs.pop('compile_options', {}))
assert isinstance(self.model, Model), \
f'please check whether model config exists in {ModelFile.CONFIGURATION}'
diff --git a/modelscope/pipelines/nlp/translation_evaluation_pipeline.py b/modelscope/pipelines/nlp/translation_evaluation_pipeline.py
index 8a339517..4450aad7 100644
--- a/modelscope/pipelines/nlp/translation_evaluation_pipeline.py
+++ b/modelscope/pipelines/nlp/translation_evaluation_pipeline.py
@@ -9,12 +9,11 @@ import torch
from modelscope.metainfo import Pipelines
from modelscope.models.base import Model
-from modelscope.models.nlp.unite.configuration_unite import EvaluationMode
+from modelscope.models.nlp.unite.configuration import InputFormat
from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import InputModel, Pipeline
from modelscope.pipelines.builder import PIPELINES
-from modelscope.preprocessors import (Preprocessor,
- TranslationEvaluationPreprocessor)
+from modelscope.preprocessors import Preprocessor
from modelscope.utils.config import Config
from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.logger import get_logger
@@ -31,16 +30,18 @@ class TranslationEvaluationPipeline(Pipeline):
def __init__(self,
model: InputModel,
preprocessor: Optional[Preprocessor] = None,
- eval_mode: EvaluationMode = EvaluationMode.SRC_REF,
+ input_format: InputFormat = InputFormat.SRC_REF,
device: str = 'gpu',
**kwargs):
- r"""Build a translation pipeline with a model dir or a model id in the model hub.
+ r"""Build a translation evaluation pipeline with a model dir or a model id in the model hub.
Args:
model: A Model instance.
- eval_mode: Evaluation mode, choosing one from `"EvaluationMode.SRC_REF"`,
- `"EvaluationMode.SRC"`, `"EvaluationMode.REF"`. Aside from hypothesis, the
+ preprocessor: The preprocessor for this pipeline.
+ input_format: Input format, choosing one from `"InputFormat.SRC_REF"`,
+ `"InputFormat.SRC"`, `"InputFormat.REF"`. Aside from hypothesis, the
source/reference/source+reference can be presented during evaluation.
+ device: Used device for this pipeline.
"""
super().__init__(
model=model,
@@ -48,44 +49,40 @@ class TranslationEvaluationPipeline(Pipeline):
compile=kwargs.pop('compile', False),
compile_options=kwargs.pop('compile_options', {}))
- self.eval_mode = eval_mode
- self.checking_eval_mode()
+ self.input_format = input_format
+ self.checking_input_format()
assert isinstance(self.model, Model), \
f'please check whether model config exists in {ModelFile.CONFIGURATION}'
- self.preprocessor = TranslationEvaluationPreprocessor(
- self.model.model_dir,
- self.eval_mode) if preprocessor is None else preprocessor
-
self.model.load_checkpoint(
osp.join(self.model.model_dir, ModelFile.TORCH_MODEL_BIN_FILE),
- self.device)
+ device=self.device,
+ plm_only=False)
self.model.eval()
return
- def checking_eval_mode(self):
- if self.eval_mode == EvaluationMode.SRC:
+ def checking_input_format(self):
+ if self.input_format == InputFormat.SRC:
logger.info('Evaluation mode: source-only')
- elif self.eval_mode == EvaluationMode.REF:
+ elif self.input_format == InputFormat.REF:
logger.info('Evaluation mode: reference-only')
- elif self.eval_mode == EvaluationMode.SRC_REF:
+ elif self.input_format == InputFormat.SRC_REF:
logger.info('Evaluation mode: source-reference-combined')
else:
- raise ValueError(
- 'Evaluation mode should be one choice among'
- '\'EvaluationMode.SRC\', \'EvaluationMode.REF\', and'
- '\'EvaluationMode.SRC_REF\'.')
+ raise ValueError('Evaluation mode should be one choice among'
+ '\'InputFormat.SRC\', \'InputFormat.REF\', and'
+ '\'InputFormat.SRC_REF\'.')
- def change_eval_mode(self,
- eval_mode: EvaluationMode = EvaluationMode.SRC_REF):
+ def change_input_format(self,
+ input_format: InputFormat = InputFormat.SRC_REF):
logger.info('Changing the evaluation mode.')
- self.eval_mode = eval_mode
- self.checking_eval_mode()
- self.preprocessor.eval_mode = eval_mode
+ self.input_format = input_format
+ self.checking_input_format()
+ self.preprocessor.change_input_format(input_format)
return
- def __call__(self, input: Dict[str, Union[str, List[str]]], **kwargs):
+ def __call__(self, input_dict: Dict[str, Union[str, List[str]]], **kwargs):
r"""Implementation of __call__ function.
Args:
@@ -108,12 +105,12 @@ class TranslationEvaluationPipeline(Pipeline):
}
```
"""
- return super().__call__(input=input, **kwargs)
+ return super().__call__(input=input_dict, **kwargs)
- def forward(self,
- input_ids: List[torch.Tensor]) -> Dict[str, torch.Tensor]:
- return self.model(input_ids)
+ def forward(
+ self, input_dict: Dict[str,
+ torch.Tensor]) -> Dict[str, torch.Tensor]:
+ return self.model(**input_dict)
def postprocess(self, output: torch.Tensor) -> Dict[str, Any]:
- result = {OutputKeys.SCORES: output.cpu().tolist()}
- return result
+ return output
diff --git a/modelscope/pipelines/nlp/user_satisfaction_estimation_pipeline.py b/modelscope/pipelines/nlp/user_satisfaction_estimation_pipeline.py
index 76fcd7a8..197a941f 100644
--- a/modelscope/pipelines/nlp/user_satisfaction_estimation_pipeline.py
+++ b/modelscope/pipelines/nlp/user_satisfaction_estimation_pipeline.py
@@ -51,7 +51,8 @@ class UserSatisfactionEstimationPipeline(Pipeline):
config_file=config_file,
device=device,
auto_collate=auto_collate,
- **kwargs)
+ compile=kwargs.pop('compile', False),
+ compile_options=kwargs.pop('compile_options', {}))
if hasattr(self.preprocessor, 'id2label'):
self.id2label = self.preprocessor.id2label
diff --git a/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py b/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
index 9cd27adc..18ba40c8 100644
--- a/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
+++ b/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
@@ -67,7 +67,8 @@ class ZeroShotClassificationPipeline(Pipeline):
config_file=config_file,
device=device,
auto_collate=auto_collate,
- **kwargs)
+ compile=kwargs.pop('compile', False),
+ compile_options=kwargs.pop('compile_options', {}))
self.entailment_id = 0
self.contradiction_id = 2
diff --git a/modelscope/pipelines/pipeline_template.py b/modelscope/pipelines/pipeline_template.py
new file mode 100644
index 00000000..a29ce5d7
--- /dev/null
+++ b/modelscope/pipelines/pipeline_template.py
@@ -0,0 +1,87 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from typing import Any, Dict
+
+import numpy as np
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.base.base_model import Model
+from modelscope.outputs.outputs import OutputKeys
+from modelscope.pipelines.base import Pipeline
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.utils.constant import Tasks
+
+__all__ = ['PipelineTemplate']
+
+
+@PIPELINES.register_module(
+ Tasks.task_template, module_name=Pipelines.pipeline_template)
+class PipelineTemplate(Pipeline):
+ """A pipeline template explain how to define parameters and input and
+ output information. As a rule, the first parameter is the input,
+ followed by the request parameters. The parameter must add type
+ hint information, and set the default value if necessary,
+ for the convenience of use.
+ """
+
+ def __init__(self, model: Model, **kwargs):
+ """A pipeline template to describe input and
+ output and parameter processing
+
+ Args:
+ model: A Model instance.
+ """
+ # call base init.
+ super().__init__(model=model, **kwargs)
+
+ def preprocess(self,
+ input: Any,
+ max_length: int = 1024,
+ top_p: float = 0.8) -> Any:
+ """Pipeline preprocess interface.
+
+ Args:
+ input (Any): The pipeline input, ref Tasks.task_template TASK_INPUTS.
+ max_length (int, optional): The max_length parameter. Defaults to 1024.
+ top_p (float, optional): The top_p parameter. Defaults to 0.8.
+
+ Returns:
+ Any: Return result process by forward.
+ """
+ pass
+
+ def forward(self,
+ input: Any,
+ max_length: int = 1024,
+ top_p: float = 0.8) -> Any:
+ """The forward interface.
+
+ Args:
+ input (Any): The output of the preprocess.
+ max_length (int, optional): max_length. Defaults to 1024.
+ top_p (float, optional): top_p. Defaults to 0.8.
+
+ Returns:
+ Any: Return result process by postprocess.
+ """
+ pass
+
+ def postprocess(self,
+ inputs: Any,
+ postprocess_param1: str = None) -> Dict[str, Any]:
+ """The postprocess interface.
+
+ Args:
+ input (Any): The output of the forward.
+ max_length (int, optional): max_length. Defaults to 1024.
+ top_p (float, optional): top_p. Defaults to 0.8.
+
+ Returns:
+ Any: Return result process by postprocess.
+ """
+ result = {
+ OutputKeys.BOXES: np.zeros(4),
+ OutputKeys.OUTPUT_IMG: np.zeros(10, 4),
+ OutputKeys.TEXT_EMBEDDING: np.zeros(1, 1000)
+ }
+ return result
diff --git a/modelscope/preprocessors/__init__.py b/modelscope/preprocessors/__init__.py
index a35f130a..dbcb0813 100644
--- a/modelscope/preprocessors/__init__.py
+++ b/modelscope/preprocessors/__init__.py
@@ -20,7 +20,8 @@ if TYPE_CHECKING:
from .tts import KanttsDataPreprocessor
from .multi_modal import (DiffusionImageGenerationPreprocessor,
OfaPreprocessor, MPlugPreprocessor,
- HiTeAPreprocessor)
+ HiTeAPreprocessor, MplugOwlPreprocessor,
+ ImageCaptioningClipInterrogatorPreprocessor)
from .nlp import (
DocumentSegmentationTransformersPreprocessor,
FaqQuestionAnsweringTransformersPreprocessor,
@@ -34,16 +35,16 @@ if TYPE_CHECKING:
TextErrorCorrectionPreprocessor, TextGenerationT5Preprocessor,
WordAlignmentPreprocessor, TextGenerationTransformersPreprocessor,
Tokenize, WordSegmentationBlankSetToLabelPreprocessor,
- CodeGeeXPreprocessor, MGLMSummarizationPreprocessor,
+ MGLMSummarizationPreprocessor,
ZeroShotClassificationTransformersPreprocessor,
TextGenerationJiebaPreprocessor, SentencePiecePreprocessor,
DialogIntentPredictionPreprocessor, DialogModelingPreprocessor,
DialogStateTrackingPreprocessor, ConversationalTextToSqlPreprocessor,
TableQuestionAnsweringPreprocessor, NERPreprocessorViet,
NERPreprocessorThai, WordSegmentationPreprocessorThai,
- TranslationEvaluationPreprocessor, CanmtTranslationPreprocessor,
- DialogueClassificationUsePreprocessor, SiameseUiePreprocessor,
- DocumentGroundedDialogGeneratePreprocessor,
+ TranslationEvaluationTransformersPreprocessor,
+ CanmtTranslationPreprocessor, DialogueClassificationUsePreprocessor,
+ SiameseUiePreprocessor, DocumentGroundedDialogGeneratePreprocessor,
DocumentGroundedDialogRetrievalPreprocessor,
DocumentGroundedDialogRerankPreprocessor)
from .video import ReadVideoData, MovieSceneSegmentationPreprocessor
@@ -70,7 +71,8 @@ else:
'tts': ['KanttsDataPreprocessor'],
'multi_modal': [
'DiffusionImageGenerationPreprocessor', 'OfaPreprocessor',
- 'MPlugPreprocessor', 'HiTeAPreprocessor'
+ 'MPlugPreprocessor', 'HiTeAPreprocessor', 'MplugOwlPreprocessor',
+ 'ImageCaptioningClipInterrogatorPreprocessor'
],
'nlp': [
'DocumentSegmentationTransformersPreprocessor',
@@ -96,7 +98,7 @@ else:
'DialogStateTrackingPreprocessor',
'ConversationalTextToSqlPreprocessor',
'TableQuestionAnsweringPreprocessor',
- 'TranslationEvaluationPreprocessor',
+ 'TranslationEvaluationTransformersPreprocessor',
'CanmtTranslationPreprocessor',
'DialogueClassificationUsePreprocessor', 'SiameseUiePreprocessor',
'DialogueClassificationUsePreprocessor',
diff --git a/modelscope/preprocessors/asr.py b/modelscope/preprocessors/asr.py
index ea867775..4696c675 100644
--- a/modelscope/preprocessors/asr.py
+++ b/modelscope/preprocessors/asr.py
@@ -74,14 +74,6 @@ class WavToScp(Preprocessor):
if code_base != 'funasr':
cmd = self.config_checking(cmd)
cmd = self.env_setting(cmd)
- if audio_format == 'wav':
- cmd['audio_lists'] = self.scp_generation_from_wav(cmd)
- elif audio_format == 'kaldi_ark':
- cmd['audio_lists'] = self.scp_generation_from_ark(cmd)
- elif audio_format == 'tfrecord':
- cmd['audio_lists'] = os.path.join(cmd['wav_path'], 'data.records')
- elif audio_format == 'pcm' or audio_format == 'scp':
- cmd['audio_lists'] = audio_in
return cmd
@@ -235,63 +227,4 @@ class WavToScp(Preprocessor):
inputs['model_lang'] = inputs['model_config']['lang']
else:
inputs['model_lang'] = 'zh-cn'
-
return inputs
-
- def scp_generation_from_wav(self, inputs: Dict[str, Any]) -> List[Any]:
- """scp generation from waveform files
- """
-
- # find all waveform files
- wav_list = []
- if inputs['recog_type'] == 'wav':
- file_path = inputs['wav_path']
- if os.path.isfile(file_path):
- if file_path.endswith('.wav') or file_path.endswith('.WAV'):
- wav_list.append(file_path)
- else:
- from easyasr.common import asr_utils
- wav_dir: str = inputs['wav_path']
- wav_list = asr_utils.recursion_dir_all_wav(wav_list, wav_dir)
-
- list_count: int = len(wav_list)
- inputs['wav_count'] = list_count
-
- # store all wav into audio list
- audio_lists = []
- j: int = 0
- while j < list_count:
- wav_file = wav_list[j]
- wave_key: str = os.path.splitext(os.path.basename(wav_file))[0]
- item = {'key': wave_key, 'file': wav_file}
- audio_lists.append(item)
- j += 1
-
- return audio_lists
-
- def scp_generation_from_ark(self, inputs: Dict[str, Any]) -> List[Any]:
- """scp generation from kaldi ark file
- """
-
- ark_scp_path = os.path.join(inputs['wav_path'], 'data.scp')
- ark_file_path = os.path.join(inputs['wav_path'], 'data.ark')
- assert os.path.exists(ark_scp_path), 'data.scp does not exist'
- assert os.path.exists(ark_file_path), 'data.ark does not exist'
-
- with open(ark_scp_path, 'r', encoding='utf-8') as f:
- lines = f.readlines()
-
- # store all ark item into audio list
- audio_lists = []
- for line in lines:
- outs = line.strip().split(' ')
- if len(outs) == 2:
- key = outs[0]
- sub = outs[1].split(':')
- if len(sub) == 2:
- nums = sub[1]
- content = ark_file_path + ':' + nums
- item = {'key': key, 'file': content}
- audio_lists.append(item)
-
- return audio_lists
diff --git a/modelscope/preprocessors/multi_modal.py b/modelscope/preprocessors/multi_modal.py
index bd37c620..faf796f4 100644
--- a/modelscope/preprocessors/multi_modal.py
+++ b/modelscope/preprocessors/multi_modal.py
@@ -1,5 +1,6 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os.path as osp
+import re
from io import BytesIO
from typing import Any, Dict, List, Tuple, Union
@@ -29,7 +30,7 @@ from .ofa.utils.constant import OFA_TASK_KEY_MAPPING
__all__ = [
'DiffusionImageGenerationPreprocessor', 'OfaPreprocessor',
- 'MPlugPreprocessor', 'HiTeAPreprocessor'
+ 'MPlugPreprocessor', 'HiTeAPreprocessor', 'MplugOwlPreprocessor'
]
@@ -642,3 +643,159 @@ class HiTeAPreprocessor(Preprocessor):
'answer_attention_mask': answer.attention_mask.squeeze(),
}
return output
+
+
+@PREPROCESSORS.register_module(
+ Fields.multi_modal, module_name=Preprocessors.mplug_owl_preprocessor)
+class MplugOwlPreprocessor(Preprocessor):
+
+ def __init__(self,
+ model_dir: str,
+ mode: str = ModeKeys.INFERENCE,
+ *args,
+ **kwargs):
+ super().__init__(*args, **kwargs)
+ self.model_dir = model_dir
+ self.mode = mode
+
+ self._tokenizer = None
+ self._patch_resize_transform = None
+ self.media_token = {'': 65}
+ self._image_map = {}
+
+ @property
+ def tokenizer(self):
+ from modelscope.models.nlp.llama import LlamaTokenizer
+
+ if self._tokenizer is None:
+ self._tokenizer = LlamaTokenizer.from_pretrained(self.model_dir)
+ return self._tokenizer
+
+ @property
+ def patch_resize_transform(self):
+ if self._patch_resize_transform is None:
+ from torchvision import transforms
+
+ mean = (0.48145466, 0.4578275, 0.40821073)
+ std = (0.26862954, 0.26130258, 0.27577711)
+
+ self._patch_resize_transform = transforms.Compose([
+ transforms.Resize((224, 224), interpolation=Image.BICUBIC),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=mean, std=std),
+ ])
+ return self._patch_resize_transform
+
+ def image_open(self, path: str) -> Tuple[Image.Image, int]:
+ if path not in self._image_map:
+ index = len(self._image_map)
+ self._image_map[path] = (load_image(path), index)
+ return self._image_map[path]
+
+ def tokenize_text(self, text: str) -> List[int]:
+ media_tokens = {
+ k: -int(i + 1)
+ for i, k in enumerate(self.media_token.keys())
+ }
+ media_lengths = self.media_token.copy()
+
+ prompt_chunk = [self.tokenizer.bos_token_id]
+
+ # Pure Text
+ condition = [
+ media_token not in text for media_token in media_tokens.keys()
+ ]
+ if all(condition):
+ enc_chunk = prompt_chunk + \
+ self.tokenizer(text, add_special_tokens=False)['input_ids']
+
+ # Multi-Modal Text
+ else:
+ enc_chunk = prompt_chunk
+ pattern = '|'.join(map(re.escape, list(media_tokens.keys())))
+ chunk_strs = re.split(f'({pattern})', text)
+ chunk_strs = [x for x in chunk_strs if len(x) > 0]
+ for idx, chunk_str in enumerate(chunk_strs):
+ if chunk_str in media_tokens:
+ enc_chunk += [media_tokens[chunk_str]] * \
+ media_lengths[chunk_str]
+ else:
+ tmp_chunk = self.tokenizer(
+ chunk_str, add_special_tokens=False)['input_ids']
+ enc_chunk += tmp_chunk
+ return enc_chunk
+
+ def convert(self, messages: Dict[str, List[Dict]]) -> str:
+ texts = []
+ image = []
+ messages = messages['messages']
+ for turn in messages:
+ if turn['role'] == 'system':
+ role = ''
+ elif turn['role'] == 'user':
+ role = 'Human: '
+ else:
+ role = 'AI: '
+ if isinstance(turn['content'], str):
+ text = f"{role}{turn['content']}"
+ texts.append(text)
+ else:
+ for t in turn['content']:
+ if isinstance(t, str):
+ text = f'{role}{t}'
+ else:
+ text = f'{role}'
+ image.append(t['image'])
+ texts.append(text)
+ texts = '\n'.join(texts)
+ texts += '\nAI: '
+ return image, texts
+
+ def __call__(self, messages: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Args:
+ messages: {[
+ {'role': 'system', 'content': 'message1'},
+ {'role': 'user', 'content': 'message2'},
+ {'role': 'user', 'content': ['message2', {"image": 'image_path'}, 'message3', ...]},
+ ]}
+ The 'role' should be choose from ['system', 'user', 'assistant'].
+ The 'content' can be either str or List[Union[str, Dict]]
+ Return:
+ output: Dict[str, Tensor]
+ """
+ output = {}
+ images, text = self.convert(messages)
+
+ if len(images) > 0:
+ pixel_values = []
+ for image in images:
+ pixel_values.append(
+ self.patch_resize_transform(self.image_open(image)[0]))
+ pixel_values = torch.stack(pixel_values, dim=0)
+ else:
+ pixel_values = None
+
+ input_ids = self.tokenize_text(text)
+ input_ids = torch.LongTensor([input_ids])
+
+ output = {
+ 'pixel_values': pixel_values,
+ 'input_ids': input_ids,
+ }
+
+ return output
+
+
+@PREPROCESSORS.register_module(
+ Fields.multi_modal,
+ module_name=Preprocessors.image_captioning_clip_interrogator_preprocessor)
+class ImageCaptioningClipInterrogatorPreprocessor(Preprocessor):
+
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+
+ def __call__(self, data) -> Dict[str, Any]:
+ image = load_image(data)
+ data = np.array(image).transpose(2, 0, 1)
+ return data
diff --git a/modelscope/preprocessors/nlp/__init__.py b/modelscope/preprocessors/nlp/__init__.py
index 5904d65e..19421fa0 100644
--- a/modelscope/preprocessors/nlp/__init__.py
+++ b/modelscope/preprocessors/nlp/__init__.py
@@ -29,7 +29,7 @@ if TYPE_CHECKING:
from .space_T_en import ConversationalTextToSqlPreprocessor
from .space_T_cn import TableQuestionAnsweringPreprocessor
from .mglm_summarization_preprocessor import MGLMSummarizationPreprocessor
- from .translation_evaluation_preprocessor import TranslationEvaluationPreprocessor
+ from .translation_evaluation_preprocessor import TranslationEvaluationTransformersPreprocessor
from .canmt_translation import CanmtTranslationPreprocessor
from .dialog_classification_use_preprocessor import DialogueClassificationUsePreprocessor
from .siamese_uie_preprocessor import SiameseUiePreprocessor
@@ -90,7 +90,7 @@ else:
'space_T_en': ['ConversationalTextToSqlPreprocessor'],
'space_T_cn': ['TableQuestionAnsweringPreprocessor'],
'translation_evaluation_preprocessor':
- ['TranslationEvaluationPreprocessor'],
+ ['TranslationEvaluationTransformersPreprocessor'],
'canmt_translation': [
'CanmtTranslationPreprocessor',
],
diff --git a/modelscope/preprocessors/nlp/token_classification_preprocessor.py b/modelscope/preprocessors/nlp/token_classification_preprocessor.py
index 66e57cc8..4b4fee1f 100644
--- a/modelscope/preprocessors/nlp/token_classification_preprocessor.py
+++ b/modelscope/preprocessors/nlp/token_classification_preprocessor.py
@@ -201,7 +201,7 @@ class TokenClassificationTransformersPreprocessor(
def __init__(self,
model_dir: str = None,
- first_sequence: str = None,
+ first_sequence: str = 'text',
label: str = 'label',
label2id: Dict = None,
label_all_tokens: bool = False,
diff --git a/modelscope/preprocessors/nlp/translation_evaluation_preprocessor.py b/modelscope/preprocessors/nlp/translation_evaluation_preprocessor.py
index 0bf62cdc..b0b2efd1 100644
--- a/modelscope/preprocessors/nlp/translation_evaluation_preprocessor.py
+++ b/modelscope/preprocessors/nlp/translation_evaluation_preprocessor.py
@@ -2,10 +2,13 @@
from typing import Any, Dict, List, Union
+import torch
from transformers import AutoTokenizer
from modelscope.metainfo import Preprocessors
-from modelscope.models.nlp.unite.configuration_unite import EvaluationMode
+from modelscope.models.nlp.unite.configuration import InputFormat
+from modelscope.models.nlp.unite.translation_evaluation import \
+ combine_input_sentences
from modelscope.preprocessors import Preprocessor
from modelscope.preprocessors.builder import PREPROCESSORS
from modelscope.utils.constant import Fields, ModeKeys
@@ -14,43 +17,98 @@ from .transformers_tokenizer import NLPTokenizer
@PREPROCESSORS.register_module(
Fields.nlp, module_name=Preprocessors.translation_evaluation)
-class TranslationEvaluationPreprocessor(Preprocessor):
+class TranslationEvaluationTransformersPreprocessor(Preprocessor):
r"""The tokenizer preprocessor used for translation evaluation.
"""
def __init__(self,
model_dir: str,
- eval_mode: EvaluationMode,
+ max_len: int,
+ pad_token_id: int,
+ eos_token_id: int,
+ input_format: InputFormat = InputFormat.SRC_REF,
mode=ModeKeys.INFERENCE,
*args,
**kwargs):
- r"""preprocess the data via the vocab file from the `model_dir` path
+ r"""Preprocessing the data for the model in `model_dir` path
Args:
model_dir: A Model instance.
- eval_mode: Evaluation mode, choosing one from `"EvaluationMode.SRC_REF"`,
- `"EvaluationMode.SRC"`, `"EvaluationMode.REF"`. Aside from hypothesis, the
+ max_len: Maximum length for input sequence.
+ pad_token_id: Token id for padding token.
+ eos_token_id: Token id for the ending-of-sequence (eos) token.
+ input_format: Input format, choosing one from `"InputFormat.SRC_REF"`,
+ `"InputFormat.SRC"`, `"InputFormat.REF"`. Aside from hypothesis, the
source/reference/source+reference can be presented during evaluation.
+ mode: The mode for this preprocessor.
"""
super().__init__(mode=mode)
self.tokenizer = NLPTokenizer(
model_dir=model_dir, use_fast=False, tokenize_kwargs=kwargs)
- self.eval_mode = eval_mode
+ self.input_format = input_format
+
+ self.max_len = max_len
+ self.pad_token_id = pad_token_id
+ self.eos_token_id = eos_token_id
return
- def __call__(self, input_dict: Dict[str, Any]) -> List[List[str]]:
- if self.eval_mode == EvaluationMode.SRC and 'src' not in input_dict.keys(
+ def change_input_format(self, input_format: InputFormat):
+ r"""Change the input format for the preprocessor.
+
+ Args:
+ input_format: Any choice in InputFormat.SRC_REF, InputFormat.SRC and InputFormat.REF.
+
+ """
+ self.input_format = input_format
+ return
+
+ def collect_input_ids(self, input_dict: Dict[str, Any]):
+ r"""Collect the input ids for the given examples.
+
+ Args:
+ input_dict: A dict containing hyp/src/ref sentences.
+
+ Returns:
+ The token ids for each example.
+
+ """
+ output_sents = [
+ self.tokenizer(
+ input_dict['hyp'], return_tensors='pt',
+ padding=True)['input_ids']
+ ]
+ if self.input_format == InputFormat.SRC or self.input_format == InputFormat.SRC_REF:
+ output_sents += [
+ self.tokenizer(
+ input_dict['src'], return_tensors='pt',
+ padding=True)['input_ids']
+ ]
+ if self.input_format == InputFormat.REF or self.input_format == InputFormat.SRC_REF:
+ output_sents += [
+ self.tokenizer(
+ input_dict['ref'], return_tensors='pt',
+ padding=True)['input_ids']
+ ]
+
+ input_ids = combine_input_sentences(output_sents, self.max_len,
+ self.pad_token_id,
+ self.eos_token_id)
+
+ return input_ids
+
+ def __call__(self, input_dict: Dict[str, Any]) -> Dict[str, Any]:
+ if self.input_format == InputFormat.SRC and 'src' not in input_dict.keys(
):
raise ValueError(
'Source sentences are required for source-only evaluation mode.'
)
- if self.eval_mode == EvaluationMode.REF and 'ref' not in input_dict.keys(
+ if self.input_format == InputFormat.REF and 'ref' not in input_dict.keys(
):
raise ValueError(
'Reference sentences are required for reference-only evaluation mode.'
)
- if self.eval_mode == EvaluationMode.SRC_REF and (
+ if self.input_format == InputFormat.SRC_REF and (
'src' not in input_dict.keys()
or 'ref' not in input_dict.keys()):
raise ValueError(
@@ -59,29 +117,58 @@ class TranslationEvaluationPreprocessor(Preprocessor):
if type(input_dict['hyp']) == str:
input_dict['hyp'] = [input_dict['hyp']]
- if (self.eval_mode == EvaluationMode.SRC or self.eval_mode
- == EvaluationMode.SRC_REF) and type(input_dict['src']) == str:
+ if (self.input_format == InputFormat.SRC or self.input_format
+ == InputFormat.SRC_REF) and type(input_dict['src']) == str:
input_dict['src'] = [input_dict['src']]
- if (self.eval_mode == EvaluationMode.REF or self.eval_mode
- == EvaluationMode.SRC_REF) and type(input_dict['ref']) == str:
+ if (self.input_format == InputFormat.REF or self.input_format
+ == InputFormat.SRC_REF) and type(input_dict['ref']) == str:
input_dict['ref'] = [input_dict['ref']]
- output_sents = [
- self.tokenizer(
- input_dict['hyp'], return_tensors='pt',
- padding=True)['input_ids']
- ]
- if self.eval_mode == EvaluationMode.SRC or self.eval_mode == EvaluationMode.SRC_REF:
- output_sents += [
- self.tokenizer(
- input_dict['src'], return_tensors='pt',
- padding=True)['input_ids']
- ]
- if self.eval_mode == EvaluationMode.REF or self.eval_mode == EvaluationMode.SRC_REF:
- output_sents += [
- self.tokenizer(
- input_dict['ref'], return_tensors='pt',
- padding=True)['input_ids']
- ]
+ if (self.input_format == InputFormat.SRC
+ or self.input_format == InputFormat.SRC_REF) and (len(
+ input_dict['hyp']) != len(input_dict['src'])):
+ raise ValueError(
+ 'The number of given hyp sentences (%d) is not equal to that of src (%d).'
+ % (len(input_dict['hyp']), len(input_dict['src'])))
+ if (self.input_format == InputFormat.REF
+ or self.input_format == InputFormat.SRC_REF) and (len(
+ input_dict['hyp']) != len(input_dict['ref'])):
+ raise ValueError(
+ 'The number of given hyp sentences (%d) is not equal to that of ref (%d).'
+ % (len(input_dict['hyp']), len(input_dict['ref'])))
- return output_sents
+ output_dict = {'input_ids': self.collect_input_ids(input_dict)}
+
+ if self.mode == ModeKeys.TRAIN or self.mode == ModeKeys.EVAL:
+ if 'score' not in input_dict.keys():
+ raise KeyError(
+ 'During training or evaluating, \'score\' should be provided.'
+ )
+ if (isinstance(input_dict['score'], List) and len(input_dict['score']) != len(output_dict['input_ids'])) \
+ or (isinstance(input_dict['score'], float) and len(output['input_ids']) != 1):
+ raise ValueError(
+ 'The number of score is not equal to that of the given examples. '
+ 'Required %d, given %d.' %
+ (len(output['input_ids']), len(input_dict['score'])))
+
+ output_dict['score'] = [input_dict['score']] if isinstance(
+ input_dict['score'], float) else input_dict['score']
+
+ if self.mode == ModeKeys.EVAL:
+ if 'lp' not in input_dict.keys():
+ raise ValueError(
+ 'Language pair should be provided for evaluation.')
+
+ if 'segment_id' not in input_dict.keys():
+ raise ValueError(
+ 'Segment id should be provided for evaluation.')
+
+ if 'raw_score' not in input_dict.keys():
+ raise ValueError(
+ 'Raw scores should be provided for evaluation.')
+
+ output_dict['lp'] = input_dict['lp']
+ output_dict['segment_id'] = input_dict['segment_id']
+ output_dict['raw_score'] = input_dict['raw_score']
+
+ return output_dict
diff --git a/modelscope/trainers/__init__.py b/modelscope/trainers/__init__.py
index 90f73a7f..0d20fe00 100644
--- a/modelscope/trainers/__init__.py
+++ b/modelscope/trainers/__init__.py
@@ -15,6 +15,8 @@ if TYPE_CHECKING:
from .nlp import SequenceClassificationTrainer, TextRankingTrainer, SiameseUIETrainer
from .nlp_trainer import NlpEpochBasedTrainer, VecoTrainer
from .trainer import EpochBasedTrainer
+ from .training_args import TrainingArgs, build_dataset_from_file
+ from .hooks import Hook, Priority
else:
_import_structure = {
@@ -32,7 +34,9 @@ else:
'SiameseUIETrainer'
],
'nlp_trainer': ['NlpEpochBasedTrainer', 'VecoTrainer'],
- 'trainer': ['EpochBasedTrainer']
+ 'trainer': ['EpochBasedTrainer'],
+ 'training_args': ['TrainingArgs', 'build_dataset_from_file'],
+ 'hooks': ['Hook']
}
import sys
diff --git a/modelscope/trainers/cli_argument_parser.py b/modelscope/trainers/cli_argument_parser.py
new file mode 100644
index 00000000..f183b9ea
--- /dev/null
+++ b/modelscope/trainers/cli_argument_parser.py
@@ -0,0 +1,151 @@
+from argparse import Action, ArgumentDefaultsHelpFormatter, ArgumentParser
+from dataclasses import fields
+from typing import List
+
+
+class CliArgumentParser(ArgumentParser):
+ """ Argument Parser to define and parse command-line args for training.
+
+ Args:
+ training_args: dict or list of dict which defines different
+ paramters for training.
+ """
+
+ def __init__(self, training_args=None, **kwargs):
+ if 'formatter_class' not in kwargs:
+ kwargs['formatter_class'] = ArgumentDefaultsHelpFormatter
+ super().__init__(**kwargs)
+ self.training_args = training_args
+ self.define_args()
+
+ def get_manual_args(self, args):
+ return [arg[2:] for arg in args if arg.startswith('--')]
+
+ def _parse_known_args(self, args: List = None, namespace=None):
+ self.model_id = namespace.model if namespace is not None else None
+ if '--model' in args:
+ self.model_id = args[args.index('--model') + 1]
+ self.manual_args = self.get_manual_args(args)
+ return super()._parse_known_args(args, namespace)
+
+ def print_help(self, file=None):
+ return super().print_help(file)
+
+ def define_args(self):
+ if self.training_args is not None:
+ for f in fields(self.training_args):
+ arg_name = f.name
+ arg_attr = getattr(self.training_args, f.name)
+ name = f'--{arg_name}'
+ kwargs = dict(type=f.type, help=f.metadata['help'])
+ kwargs['default'] = arg_attr
+
+ if 'choices' in f.metadata:
+ kwargs['choices'] = f.metadata['choices']
+
+ kwargs['action'] = SingleAction
+ self.add_argument(name, **kwargs)
+
+
+class DictAction(Action):
+ """
+ argparse action to split an argument into KEY=VALUE form
+ on the first = and append to a dictionary. List options can
+ be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit
+ brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build
+ list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]'
+ """
+
+ @staticmethod
+ def parse_int_float_bool_str(val):
+ try:
+ return int(val)
+ except ValueError:
+ pass
+ try:
+ return float(val)
+ except ValueError:
+ pass
+ if val.lower() in ['true', 'false']:
+ return val.lower() == 'true'
+ if val == 'None':
+ return None
+ return val
+
+ @staticmethod
+ def parse_iterable(val):
+ """Parse iterable values in the string.
+ All elements inside '()' or '[]' are treated as iterable values.
+ Args:
+ val (str): Value string.
+ Returns:
+ list | tuple: The expanded list or tuple from the string.
+ Examples:
+ >>> DictAction._parse_iterable('1,2,3')
+ [1, 2, 3]
+ >>> DictAction._parse_iterable('[a, b, c]')
+ ['a', 'b', 'c']
+ >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]')
+ [(1, 2, 3), ['a', 'b'], 'c']
+ """
+
+ def find_next_comma(string):
+ """Find the position of next comma in the string.
+ If no ',' is found in the string, return the string length. All
+ chars inside '()' and '[]' are treated as one element and thus ','
+ inside these brackets are ignored.
+ """
+ assert (string.count('(') == string.count(')')) and (
+ string.count('[')
+ == string.count(']')), f'Imbalanced brackets exist in {string}'
+ end = len(string)
+ for idx, char in enumerate(string):
+ pre = string[:idx]
+ # The string before this ',' is balanced
+ if ((char == ',') and (pre.count('(') == pre.count(')'))
+ and (pre.count('[') == pre.count(']'))):
+ end = idx
+ break
+ return end
+
+ # Strip ' and " characters and replace whitespace.
+ val = val.strip('\'\"').replace(' ', '')
+ is_tuple = False
+ if val.startswith('(') and val.endswith(')'):
+ is_tuple = True
+ val = val[1:-1]
+ elif val.startswith('[') and val.endswith(']'):
+ val = val[1:-1]
+ elif ',' not in val:
+ # val is a single value
+ return DictAction.parse_int_float_bool_str(val)
+
+ values = []
+ while len(val) > 0:
+ comma_idx = find_next_comma(val)
+ element = DictAction.parse_iterable(val[:comma_idx])
+ values.append(element)
+ val = val[comma_idx + 1:]
+ if is_tuple:
+ values = tuple(values)
+ return values
+
+ def __call__(self, parser, namespace, values, option_string):
+ options = {}
+ for kv in values:
+ key, val = kv.split('=', maxsplit=1)
+ options[key] = self.parse_iterable(val)
+ setattr(namespace, self.dest, options)
+
+
+class SingleAction(DictAction):
+ """ Argparse action to convert value to tuple or list or nested structure of
+ list and tuple, i.e 'V1,V2,V3', or with explicit brackets, i.e. '[V1,V2,V3]'.
+ It also support nested brackets to build list/tuple values. e.g. '[(V1,V2),(V3,V4)]'
+ """
+
+ def __call__(self, parser, namespace, value, option_string):
+ if isinstance(value, str):
+ setattr(namespace, self.dest, self.parse_iterable(value))
+ else:
+ setattr(namespace, self.dest, value)
diff --git a/modelscope/trainers/default_config.py b/modelscope/trainers/default_config.py
index 51a0df40..bb272695 100644
--- a/modelscope/trainers/default_config.py
+++ b/modelscope/trainers/default_config.py
@@ -4,38 +4,6 @@ from typing import Dict, List, Optional, Tuple
from modelscope.utils.config import Config
-DEFAULT_CONFIG = Config({
- 'framework': 'pytorch',
- 'train': {
- 'work_dir': '/tmp',
- 'max_epochs': 10,
- 'dataloader': {
- 'batch_size_per_gpu': 16,
- 'workers_per_gpu': 0
- },
- 'optimizer': {
- 'type': 'SGD',
- 'lr': 1e-3
- },
- 'lr_scheduler': {
- 'type': 'StepLR',
- 'step_size': 2
- },
- 'checkpoint': {
- 'period': {
- 'interval': 1
- }
- }
- },
- 'evaluation': {
- 'dataloader': {
- 'batch_size_per_gpu': 16,
- 'workers_per_gpu': 0,
- 'shuffle': False
- },
- }
-})
-
DEFAULT_HOOKS_CONFIG = {
'train.hooks': [{
'type': 'CheckpointHook',
@@ -68,7 +36,7 @@ def merge_cfg(cfg: Config):
def merge_hooks(cfg: Config) -> List[Dict]:
- hooks = cfg.train.hooks.copy()
+ hooks = getattr(cfg.train, 'hooks', []).copy()
for hook_type, key_chain in _HOOK_KEY_CHAIN_MAP.items():
hook = _key_chain_to_hook(cfg, key_chain, hook_type)
if hook is not None:
@@ -107,7 +75,8 @@ def _check_basic_hook(cfg: Config, key_chain: str, hook_type: str) -> bool:
if cfg.safe_get(key_chain) is None:
return False
hooks = list(
- filter(lambda hook: hook['type'] == hook_type, cfg.train.hooks))
+ filter(lambda hook: hook['type'] == hook_type,
+ getattr(cfg.train, 'hooks', [])))
assert len(hooks) == 0, f'The key_chain {key_chain} and the traditional hook ' \
f'cannot exist at the same time, ' \
f'please delete {hook_type} in the configuration file.'
diff --git a/modelscope/trainers/easycv/__init__.py b/modelscope/trainers/easycv/__init__.py
deleted file mode 100644
index b1b8fc15..00000000
--- a/modelscope/trainers/easycv/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
- from .utils import AddLrLogHook, EasyCVMetric
-else:
- _import_structure = {'utils': ['AddLrLogHook', 'EasyCVMetric']}
-
- import sys
-
- sys.modules[__name__] = LazyImportModule(
- __name__,
- globals()['__file__'],
- _import_structure,
- module_spec=__spec__,
- extra_objects={},
- )
diff --git a/modelscope/trainers/easycv/trainer.py b/modelscope/trainers/easycv/trainer.py
deleted file mode 100644
index 58d6a440..00000000
--- a/modelscope/trainers/easycv/trainer.py
+++ /dev/null
@@ -1,183 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from copy import deepcopy
-from functools import partial
-from typing import Callable, Optional, Tuple, Union
-
-import torch
-from easycv.utils.checkpoint import load_checkpoint as ev_load_checkpoint
-from torch import nn
-from torch.utils.data import Dataset
-
-from modelscope.metainfo import Trainers
-from modelscope.models.base import TorchModel
-from modelscope.msdatasets import MsDataset
-from modelscope.preprocessors import Preprocessor
-from modelscope.trainers import EpochBasedTrainer
-from modelscope.trainers.base import TRAINERS
-from modelscope.trainers.easycv.utils import register_util
-from modelscope.trainers.hooks import HOOKS
-from modelscope.trainers.parallel.builder import build_parallel
-from modelscope.trainers.parallel.utils import is_parallel
-from modelscope.utils.config import Config
-from modelscope.utils.constant import DEFAULT_MODEL_REVISION
-from modelscope.utils.import_utils import LazyImportModule
-from modelscope.utils.registry import default_group
-
-
-@TRAINERS.register_module(module_name=Trainers.easycv)
-class EasyCVEpochBasedTrainer(EpochBasedTrainer):
- """Epoch based Trainer for EasyCV.
-
- Args:
- cfg_file(str): The config file of EasyCV.
- model (:obj:`torch.nn.Module` or :obj:`TorchModel` or `str`): The model to be run, or a valid model dir
- or a model id. If model is None, build_model method will be called.
- train_dataset (`MsDataset` or `torch.utils.data.Dataset`, *optional*):
- The dataset to use for training.
- Note that if it's a `torch.utils.data.IterableDataset` with some randomization and you are training in a
- distributed fashion, your iterable dataset should either use a internal attribute `generator` that is a
- `torch.Generator` for the randomization that must be identical on all processes (and the Trainer will
- manually set the seed of this `generator` at each epoch) or have a `set_epoch()` method that internally
- sets the seed of the RNGs used.
- eval_dataset (`MsDataset` or `torch.utils.data.Dataset`, *optional*): The dataset to use for evaluation.
- preprocessor (:obj:`Preprocessor`, *optional*): The optional preprocessor.
- NOTE: If the preprocessor has been called before the dataset fed into this trainer by user's custom code,
- this parameter should be None, meanwhile remove the 'preprocessor' key from the cfg_file.
- Else the preprocessor will be instantiated from the cfg_file or assigned from this parameter and
- this preprocessing action will be executed every time the dataset's __getitem__ is called.
- optimizers (`Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler._LRScheduler]`, *optional*): A tuple
- containing the optimizer and the scheduler to use.
- max_epochs: (int, optional): Total training epochs.
- """
-
- def __init__(
- self,
- cfg_file: Optional[str] = None,
- model: Optional[Union[TorchModel, nn.Module, str]] = None,
- arg_parse_fn: Optional[Callable] = None,
- train_dataset: Optional[Union[MsDataset, Dataset]] = None,
- eval_dataset: Optional[Union[MsDataset, Dataset]] = None,
- preprocessor: Optional[Preprocessor] = None,
- optimizers: Tuple[torch.optim.Optimizer,
- torch.optim.lr_scheduler._LRScheduler] = (None,
- None),
- model_revision: Optional[str] = DEFAULT_MODEL_REVISION,
- **kwargs):
-
- register_util.register_parallel()
- register_util.register_part_mmcv_hooks_to_ms()
-
- super(EasyCVEpochBasedTrainer, self).__init__(
- model=model,
- cfg_file=cfg_file,
- arg_parse_fn=arg_parse_fn,
- preprocessor=preprocessor,
- optimizers=optimizers,
- model_revision=model_revision,
- train_dataset=train_dataset,
- eval_dataset=eval_dataset,
- **kwargs)
-
- # reset data_collator
- from mmcv.parallel import collate
-
- self.train_data_collator = partial(
- collate,
- samples_per_gpu=self.cfg.train.dataloader.batch_size_per_gpu)
- self.eval_data_collator = partial(
- collate,
- samples_per_gpu=self.cfg.evaluation.dataloader.batch_size_per_gpu)
-
- # load pretrained model
- load_from = self.cfg.get('load_from', None)
- if load_from is not None:
- ev_load_checkpoint(
- self.model,
- filename=load_from,
- map_location=self.device,
- strict=False,
- )
-
- # reset parallel
- if not self._dist:
- assert not is_parallel(
- self.model
- ), 'Not support model wrapped by custom parallel if not in distributed mode!'
- dp_cfg = dict(
- type='MMDataParallel',
- module=self.model,
- device_ids=[torch.cuda.current_device()])
- self.model = build_parallel(dp_cfg)
-
- def rebuild_config(self, cfg: Config):
- cfg = super().rebuild_config(cfg)
- # Register easycv hooks dynamicly. If the hook already exists in modelscope,
- # the hook in modelscope will be used, otherwise register easycv hook into ms.
- # We must manually trigger lazy import to detect whether the hook is in modelscope.
- # TODO: use ast index to detect whether the hook is in modelscope
- for h_i in cfg.train.get('hooks', []):
- sig = ('HOOKS', default_group, h_i['type'])
- LazyImportModule.import_module(sig)
- if h_i['type'] not in HOOKS._modules[default_group]:
- if h_i['type'] in [
- 'TensorboardLoggerHookV2', 'WandbLoggerHookV2'
- ]:
- raise ValueError(
- 'Not support hook %s now, we will support it in the future!'
- % h_i['type'])
- register_util.register_hook_to_ms(h_i['type'])
- return cfg
-
- def create_optimizer_and_scheduler(self):
- """ Create optimizer and lr scheduler
- """
- optimizer, lr_scheduler = self.optimizers
- if optimizer is None:
- optimizer_cfg = self.cfg.train.get('optimizer', None)
- else:
- optimizer_cfg = None
-
- optim_options = {}
- if optimizer_cfg is not None:
- optim_options = optimizer_cfg.pop('options', {})
- from easycv.apis.train import build_optimizer
- optimizer = build_optimizer(self.model, optimizer_cfg)
-
- if lr_scheduler is None:
- lr_scheduler_cfg = self.cfg.train.get('lr_scheduler', None)
- else:
- lr_scheduler_cfg = None
-
- lr_options = {}
- # Adapt to mmcv lr scheduler hook.
- # Please refer to: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py
- if lr_scheduler_cfg is not None:
- assert optimizer is not None
- lr_options = lr_scheduler_cfg.pop('options', {})
- assert 'policy' in lr_scheduler_cfg
- policy_type = lr_scheduler_cfg.pop('policy')
- if policy_type == policy_type.lower():
- policy_type = policy_type.title()
- hook_type = policy_type + 'LrUpdaterHook'
- lr_scheduler_cfg['type'] = hook_type
-
- self.cfg.train.lr_scheduler_hook = lr_scheduler_cfg
-
- self.optimizer = optimizer
- self.lr_scheduler = lr_scheduler
-
- return self.optimizer, self.lr_scheduler, optim_options, lr_options
-
- def to_parallel(self, model) -> Union[nn.Module, TorchModel]:
- if self.cfg.get('parallel', None) is not None:
- dp_cfg = deepcopy(self.cfg['parallel'])
- dp_cfg.update(
- dict(module=model, device_ids=[torch.cuda.current_device()]))
- return build_parallel(dp_cfg)
-
- dp_cfg = dict(
- type='MMDistributedDataParallel',
- module=model,
- device_ids=[torch.cuda.current_device()])
-
- return build_parallel(dp_cfg)
diff --git a/modelscope/trainers/easycv/utils/__init__.py b/modelscope/trainers/easycv/utils/__init__.py
deleted file mode 100644
index 23cfa36a..00000000
--- a/modelscope/trainers/easycv/utils/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import TYPE_CHECKING
-
-from modelscope.utils.import_utils import LazyImportModule
-
-if TYPE_CHECKING:
- from .hooks import AddLrLogHook
- from .metric import EasyCVMetric
-
-else:
- _import_structure = {'hooks': ['AddLrLogHook'], 'metric': ['EasyCVMetric']}
-
- import sys
-
- sys.modules[__name__] = LazyImportModule(
- __name__,
- globals()['__file__'],
- _import_structure,
- module_spec=__spec__,
- extra_objects={},
- )
diff --git a/modelscope/trainers/easycv/utils/hooks.py b/modelscope/trainers/easycv/utils/hooks.py
deleted file mode 100644
index 1f1a5c95..00000000
--- a/modelscope/trainers/easycv/utils/hooks.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-from modelscope.trainers.hooks import HOOKS, Priority
-from modelscope.trainers.hooks.lr_scheduler_hook import LrSchedulerHook
-from modelscope.utils.constant import LogKeys
-
-
-@HOOKS.register_module(module_name='AddLrLogHook')
-class AddLrLogHook(LrSchedulerHook):
- """For EasyCV to adapt to ModelScope, the lr log of EasyCV is added in the trainer,
- but the trainer of ModelScope does not and it is added in the lr scheduler hook.
- But The lr scheduler hook used by EasyCV is the hook of mmcv, and there is no lr log.
- It will be deleted in the future.
- """
- PRIORITY = Priority.NORMAL
-
- def __init__(self):
- pass
-
- def before_run(self, trainer):
- pass
-
- def after_train_iter(self, trainer):
- trainer.log_buffer.output[LogKeys.LR] = self._get_log_lr(trainer)
-
- def before_train_epoch(self, trainer):
- trainer.log_buffer.output[LogKeys.LR] = self._get_log_lr(trainer)
-
- def after_train_epoch(self, trainer):
- pass
diff --git a/modelscope/trainers/easycv/utils/metric.py b/modelscope/trainers/easycv/utils/metric.py
deleted file mode 100644
index d952ec3e..00000000
--- a/modelscope/trainers/easycv/utils/metric.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import itertools
-from typing import Dict
-
-import numpy as np
-import torch
-
-from modelscope.metrics.base import Metric
-from modelscope.metrics.builder import METRICS
-
-
-@METRICS.register_module(module_name='EasyCVMetric')
-class EasyCVMetric(Metric):
- """Adapt to ModelScope Metric for EasyCV evaluator.
- """
-
- def __init__(self, trainer=None, evaluators=None, *args, **kwargs):
- from easycv.core.evaluation.builder import build_evaluator
-
- self.trainer = trainer
- self.evaluators = build_evaluator(evaluators)
- self.preds = []
- self.grountruths = []
-
- def add(self, outputs: Dict, inputs: Dict):
- self.preds.append(outputs)
- del inputs
-
- def evaluate(self):
- results = {}
- for _, batch in enumerate(self.preds):
- for k, v in batch.items():
- if k not in results:
- results[k] = []
- results[k].append(v)
-
- for k, v in results.items():
- if len(v) == 0:
- raise ValueError(f'empty result for {k}')
-
- if isinstance(v[0], torch.Tensor):
- results[k] = torch.cat(v, 0)
- elif isinstance(v[0], (list, np.ndarray)):
- results[k] = list(itertools.chain.from_iterable(v))
- else:
- raise ValueError(
- f'value of batch prediction dict should only be tensor or list, {k} type is {v[0]}'
- )
-
- metric_values = self.trainer.eval_dataset.evaluate(
- results, self.evaluators)
- return metric_values
-
- def merge(self, other: 'EasyCVMetric'):
- self.preds.extend(other.preds)
-
- def __getstate__(self):
- return self.preds
-
- def __setstate__(self, state):
- self.__init__()
- self.preds = state
diff --git a/modelscope/trainers/easycv/utils/register_util.py b/modelscope/trainers/easycv/utils/register_util.py
deleted file mode 100644
index 04bf719b..00000000
--- a/modelscope/trainers/easycv/utils/register_util.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import inspect
-import logging
-
-from modelscope.trainers.hooks import HOOKS
-from modelscope.trainers.parallel.builder import PARALLEL
-from modelscope.utils.registry import default_group
-
-
-class _RegisterManager:
-
- def __init__(self):
- self.registries = {}
-
- def add(self, module, name, group_key=default_group):
- if module.name not in self.registries:
- self.registries[module.name] = {}
- if group_key not in self.registries[module.name]:
- self.registries[module.name][group_key] = []
-
- self.registries[module.name][group_key].append(name)
-
- def exists(self, module, name, group_key=default_group):
- if self.registries.get(module.name, None) is None:
- return False
- if self.registries[module.name].get(group_key, None) is None:
- return False
- if name in self.registries[module.name][group_key]:
- return True
-
- return False
-
-
-_dynamic_register = _RegisterManager()
-
-
-def register_parallel():
- from mmcv.parallel import MMDistributedDataParallel, MMDataParallel
-
- mmddp = 'MMDistributedDataParallel'
- mmdp = 'MMDataParallel'
-
- if not _dynamic_register.exists(PARALLEL, mmddp):
- _dynamic_register.add(PARALLEL, mmddp)
- PARALLEL.register_module(
- module_name=mmddp, module_cls=MMDistributedDataParallel)
- if not _dynamic_register.exists(PARALLEL, mmdp):
- _dynamic_register.add(PARALLEL, mmdp)
- PARALLEL.register_module(module_name=mmdp, module_cls=MMDataParallel)
-
-
-def register_hook_to_ms(hook_name, logger=None):
- """Register EasyCV hook to ModelScope."""
- from easycv.hooks import HOOKS as _EV_HOOKS
-
- if hook_name not in _EV_HOOKS._module_dict:
- raise ValueError(
- f'Not found hook "{hook_name}" in EasyCV hook registries!')
-
- if _dynamic_register.exists(HOOKS, hook_name):
- return
- _dynamic_register.add(HOOKS, hook_name)
-
- obj = _EV_HOOKS._module_dict[hook_name]
- HOOKS.register_module(module_name=hook_name, module_cls=obj)
-
- log_str = f'Register hook "{hook_name}" to modelscope hooks.'
- logger.info(log_str) if logger is not None else logging.info(log_str)
-
-
-def register_part_mmcv_hooks_to_ms():
- """Register required mmcv hooks to ModelScope.
- Currently we only registered all lr scheduler hooks in EasyCV and mmcv.
- Please refer to:
- EasyCV: https://github.com/alibaba/EasyCV/blob/master/easycv/hooks/lr_update_hook.py
- mmcv: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py
- """
- from mmcv.runner.hooks import lr_updater
- from mmcv.runner.hooks import HOOKS as _MMCV_HOOKS
- from easycv.hooks import StepFixCosineAnnealingLrUpdaterHook, YOLOXLrUpdaterHook
-
- mmcv_hooks_in_easycv = [('StepFixCosineAnnealingLrUpdaterHook',
- StepFixCosineAnnealingLrUpdaterHook),
- ('YOLOXLrUpdaterHook', YOLOXLrUpdaterHook)]
-
- members = inspect.getmembers(lr_updater)
- members.extend(mmcv_hooks_in_easycv)
-
- for name, obj in members:
- if name in _MMCV_HOOKS._module_dict:
- if _dynamic_register.exists(HOOKS, name):
- continue
- _dynamic_register.add(HOOKS, name)
- HOOKS.register_module(
- module_name=name,
- module_cls=obj,
- )
diff --git a/modelscope/trainers/hooks/__init__.py b/modelscope/trainers/hooks/__init__.py
index 51677f25..072105be 100644
--- a/modelscope/trainers/hooks/__init__.py
+++ b/modelscope/trainers/hooks/__init__.py
@@ -5,7 +5,6 @@ from modelscope.utils.import_utils import LazyImportModule
if TYPE_CHECKING:
from .builder import HOOKS, build_hook
- from .checkpoint_hook import BestCkptSaverHook, CheckpointHook, LoadCheckpointHook
from .early_stop_hook import EarlyStopHook
from .compression import SparsityHook
from .evaluation_hook import EvaluationHook
@@ -16,6 +15,10 @@ if TYPE_CHECKING:
from .optimizer import (ApexAMPOptimizerHook, NoneOptimizerHook,
OptimizerHook, TorchAMPOptimizerHook)
from .priority import Priority, get_priority
+ from .checkpoint import CheckpointHook, LoadCheckpointHook, BestCkptSaverHook
+ from .distributed.ddp_hook import DDPHook
+ from .distributed.deepspeed_hook import DeepspeedHook
+ from .distributed.megatron_hook import MegatronHook
else:
_import_structure = {
@@ -32,7 +35,12 @@ else:
'ApexAMPOptimizerHook', 'NoneOptimizerHook', 'OptimizerHook',
'TorchAMPOptimizerHook'
],
- 'priority': ['Priority', 'get']
+ 'checkpoint':
+ ['CheckpointHook', 'LoadCheckpointHook', 'BestCkptSaverHook'],
+ 'distributed.ddp_hook': ['DDPHook'],
+ 'distributed.deepspeed_hook': ['DeepspeedHook'],
+ 'distributed.megatron_hook': ['MegatronHook'],
+ 'priority': ['Priority', 'get_priority']
}
import sys
diff --git a/modelscope/trainers/hooks/checkpoint/__init__.py b/modelscope/trainers/hooks/checkpoint/__init__.py
new file mode 100644
index 00000000..e2abb272
--- /dev/null
+++ b/modelscope/trainers/hooks/checkpoint/__init__.py
@@ -0,0 +1,2 @@
+from .checkpoint_hook import BestCkptSaverHook, CheckpointHook
+from .load_checkpoint_hook import LoadCheckpointHook
diff --git a/modelscope/trainers/hooks/checkpoint/checkpoint_hook.py b/modelscope/trainers/hooks/checkpoint/checkpoint_hook.py
new file mode 100644
index 00000000..4b14a13f
--- /dev/null
+++ b/modelscope/trainers/hooks/checkpoint/checkpoint_hook.py
@@ -0,0 +1,435 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import random
+import time
+from typing import Optional
+
+import numpy as np
+import torch
+
+from modelscope.hub.check_model import check_model_is_id
+from modelscope.hub.push_to_hub import push_to_hub_async
+from modelscope.metainfo import Hooks
+from modelscope.trainers.hooks.builder import HOOKS
+from modelscope.trainers.hooks.checkpoint.checkpoint_processor import \
+ CheckpointProcessor
+from modelscope.trainers.hooks.hook import Hook
+from modelscope.trainers.hooks.priority import Priority
+from modelscope.utils.constant import (DEFAULT_REPOSITORY_REVISION, LogKeys,
+ ModelFile)
+from modelscope.utils.logger import get_logger
+from modelscope.utils.torch_utils import is_master
+
+
+class CheckpointStrategy:
+ by_epoch = 'by_epoch'
+ by_step = 'by_step'
+ no = 'no'
+
+
+@HOOKS.register_module(module_name=Hooks.CheckpointHook)
+class CheckpointHook(Hook):
+ """Save checkpoints periodically.
+
+ Args:
+ save_strategy(str): The strategy to save checkpoint, can be `by_epoch`, `by_step` or `no`
+ interval (int): The frequency to save model. If `by_epoch=True`,
+ it means the number of epochs, else means the number of iterations
+ save_dir (str): The directory to save checkpoints. If is None, use `trainer.work_dir`
+ output_dir (str): The absolute path to save the output files for inference. If it's not specified,
+ the default dir is `{sub_dir}/output`.
+ save_last (bool): Whether to save the last checkpoint. Default: True.
+ max_checkpoint_num (int): The max number of checkpoint files, default None which means never delete anything.
+ If the number exceeding the limit, earlier checkpoints will be deleted first.
+ push_to_hub (bool): Whether push the checkpoint to modelhub.
+ hub_repo_id (str): The hub repo id.
+ hub_token (str): The token of the modelhub. You can also set the environment variable `MODELSCOPE_API_TOKEN`.
+ private_hub (bool): Whether push to a private hub, default True.
+ hub_revision (str): Which branch to push the model to, default is `master`
+ kwargs:
+ by_epoch (bool): Same with `save_strategy`, but has a higher priority, legacy argument.
+ output_sub_dir (str): The folder under the `save_dir` to save the output checkpoint for inference.
+ This argument is kept to fit the existing configs.
+ """
+
+ PRIORITY = Priority.LOW
+
+ EVAL_RESULT_FILE = 'eval_result.txt'
+
+ def __init__(self,
+ save_strategy: Optional[str] = CheckpointStrategy.by_epoch,
+ interval: Optional[int] = 0,
+ save_dir: Optional[str] = None,
+ output_dir: Optional[str] = None,
+ save_last: Optional[bool] = True,
+ max_checkpoint_num: Optional[int] = None,
+ push_to_hub: Optional[bool] = False,
+ hub_repo_id: Optional[str] = None,
+ hub_token: Optional[str] = None,
+ private_hub: Optional[bool] = True,
+ hub_revision: Optional[str] = DEFAULT_REPOSITORY_REVISION,
+ **kwargs):
+ self.interval = interval
+ self.save_dir = save_dir
+ if 'by_epoch' in kwargs:
+ self.save_strategy = CheckpointStrategy.by_epoch if kwargs[
+ 'by_epoch'] else CheckpointStrategy.by_step
+ else:
+ self.save_strategy = save_strategy
+ if 'output_sub_dir' in kwargs:
+ self.output_sub_dir = kwargs['output_sub_dir']
+ self.output_dir = None
+ else:
+ self.output_sub_dir = None
+ self.output_dir = output_dir
+ self.save_last = save_last
+ self.rng_state = None
+ self.push_to_hub = push_to_hub
+ self.hub_repo_id = hub_repo_id
+ self.hub_token = hub_token
+ self.private_hub = private_hub
+ self.hub_revision = hub_revision
+ self.tag = -1
+ self.is_model_id = None
+ self.push_to_hub_future = None
+ self.max_checkpoint_num = None
+ if max_checkpoint_num is not None:
+ self.max_checkpoint_num = max(int(max_checkpoint_num), 1)
+ self.history_checkpoints = []
+ self.processor = CheckpointProcessor()
+
+ def set_processor(self, processor):
+ """
+ The checkpoint hook accepts a processor to finish the actual saving/deleting action.
+ """
+ self.processor = processor
+
+ def before_run(self, trainer):
+ self.tag = -1
+ if not self.save_dir:
+ self.save_dir = trainer.work_dir
+ if not self.output_dir:
+ if self.output_sub_dir:
+ self.output_dir = os.path.join(self.save_dir,
+ self.output_sub_dir)
+ else:
+ self.output_dir = os.path.join(self.save_dir,
+ ModelFile.TRAIN_OUTPUT_DIR)
+
+ if not os.path.exists(self.save_dir):
+ os.makedirs(self.save_dir, exist_ok=True)
+
+ if not hasattr(trainer, 'logger'):
+ self.logger = get_logger()
+ else:
+ self.logger = trainer.logger
+
+ if is_master():
+ output_dir = self.output_dir
+ # only global master prepares the output folder
+ self.processor.prepare_output(trainer, output_dir)
+ self.logger.info(f'Checkpoints will be saved to {self.save_dir}')
+
+ def generate_prefix(self, trainer, save_strategy):
+ if save_strategy == CheckpointStrategy.by_epoch:
+ return f'{LogKeys.EPOCH}_{trainer.epoch + 1}'
+ else:
+ return f'{LogKeys.ITER}_{trainer.iter + 1}'
+
+ def _do_save(self, trainer, save_strategy):
+ # prefix like 'epoch-1' or 'iter-1'
+ prefix = self.generate_prefix(trainer, save_strategy)
+ if self.processor.should_save_on_rank(trainer):
+ if is_master():
+ if save_strategy == CheckpointStrategy.by_epoch:
+ self.logger.info(
+ f'Saving checkpoint at {trainer.epoch + 1} epoch')
+ else:
+ self.logger.info(
+ f'Saving checkpoint at {trainer.iter + 1} iter')
+ self._save_checkpoint(trainer, prefix)
+ if is_master() and self.push_to_hub:
+ if self.push_to_hub_future is not None and not self.push_to_hub_future.done(
+ ):
+ self.logger.error(
+ f'Another uploading is running, '
+ f'this uploading with message {prefix} will be canceled.')
+ return
+ self.push_to_hub_future = self._push_to_hub(trainer, prefix)
+
+ def after_train_epoch(self, trainer):
+ if self.save_strategy != CheckpointStrategy.by_epoch:
+ return
+
+ if self._should_save(trainer):
+ self._do_save(trainer, CheckpointStrategy.by_epoch)
+
+ def after_train_iter(self, trainer):
+ if self.save_strategy != CheckpointStrategy.by_step:
+ return
+
+ if self._should_save(trainer):
+ self._do_save(trainer, CheckpointStrategy.by_step)
+
+ def after_run(self, trainer):
+ if self.push_to_hub_future is not None and not self.push_to_hub_future.done(
+ ):
+ self.logger.info('Train finished. Uploading models, waiting...')
+ while not self.push_to_hub_future.done():
+ time.sleep(1)
+ self.logger.info('Uploading models done.')
+
+ def _push_to_hub(self, trainer, prefix):
+ if self.is_model_id is None:
+ self.is_model_id = check_model_is_id(trainer.input_model_id,
+ self.hub_token)
+ self.tag += 1
+ return push_to_hub_async(
+ self.hub_repo_id,
+ self.output_dir,
+ token=self.hub_token,
+ private=self.private_hub,
+ commit_message=prefix,
+ tag=f'v1.{self.tag}',
+ revision=self.hub_revision,
+ source_repo=trainer.input_model_id if self.is_model_id else '')
+
+ def save_evaluate_results(self, trainer):
+ with open(os.path.join(self.output_dir, self.EVAL_RESULT_FILE),
+ 'w') as f:
+ f.write(str(trainer.metric_values))
+
+ def _save_checkpoint(self, trainer, prefix):
+ """Save checkpoint files and remove obsolete ones
+ """
+ checkpoint_path_prefix = os.path.join(self.save_dir, prefix)
+ meta = self._create_training_state(trainer)
+ self.processor.save_checkpoints(trainer, checkpoint_path_prefix,
+ self.output_dir, meta)
+ self.save_evaluate_results(trainer)
+ self.history_checkpoints.append(checkpoint_path_prefix)
+ self._remove_obsolete_checkpoints(trainer)
+ return prefix
+
+ def _remove_obsolete_checkpoints(self, trainer):
+ if self.max_checkpoint_num is not None and \
+ len(self.history_checkpoints) > self.max_checkpoint_num:
+ history_checkpoints = [ckpt for ckpt in self.history_checkpoints]
+ self.history_checkpoints.clear()
+ for i, checkpoint_path_prefix in enumerate(history_checkpoints):
+ if i < len(history_checkpoints) - self.max_checkpoint_num:
+ self.logger.info(
+ f'deleting checkpoint: {checkpoint_path_prefix}')
+ self.processor.remove_checkpoints(
+ trainer, checkpoint_path_prefix=checkpoint_path_prefix)
+ else:
+ self.history_checkpoints.append(checkpoint_path_prefix)
+
+ def _should_save(self, trainer):
+ if self.save_strategy == CheckpointStrategy.by_epoch:
+ check_last = self.is_last_epoch
+ check_frequency = self.every_n_epochs
+ elif self.save_strategy == CheckpointStrategy.by_step:
+ check_last = self.is_last_iter
+ check_frequency = self.every_n_iters
+ else:
+ return False
+
+ if check_frequency(trainer,
+ self.interval) or (self.save_last
+ and check_last(trainer)):
+ return True
+ return False
+
+ def _create_training_state(self, trainer):
+ self.rng_state = {
+ 'random': random.getstate(),
+ 'numpy': np.random.get_state(),
+ 'cpu': torch.random.get_rng_state(),
+ 'cuda': torch.cuda.get_rng_state_all(),
+ }
+
+ # keep epoch/iter/inner_iter/random_state
+ meta = {
+ 'epoch': trainer.epoch,
+ 'iter': trainer.iter + 1,
+ 'inner_iter': trainer.inner_iter + 1,
+ 'rng_state': self.rng_state,
+ }
+
+ # keep hooks state
+ i = 0
+ for hook in trainer.hooks:
+ if hasattr(hook, 'state_dict') and getattr(hook, '_should_save',
+ True):
+ meta[f'{hook.__class__}-{i}'] = hook.state_dict()
+ i += 1
+
+ return meta
+
+
+@HOOKS.register_module(module_name=Hooks.BestCkptSaverHook)
+class BestCkptSaverHook(CheckpointHook):
+ """
+ Save best checkpoints hook.
+
+ Args:
+ metric_key (str): Metric key to compare rule for best score.
+ save_best(bool): Save the best checkpoint, if set to False, this hook will have no effect.
+ rule (str): Comparison rule for best score. Support "max" and "min". If rule is "max", the checkpoint
+ at the maximum `metric_key` will be saved, If rule is "min", the checkpoint at the minimum `metric_key`
+ will be saved.
+ save_file_name: The manual specified saving file name.
+ restore_best (bool): Whether to restore the best checkpoint after training.
+ max_checkpoint_num (int): The max number of checkpoint files, default None which means never delete anything.
+ If the number exceeding the limit, checkpoints with worse metric will be deleted, which is judged by the
+ `rule` and `metric_key` arguments.
+
+ The `BestCkptSaverHook` class accepts `output_sub_dir` and `output_dir` argument as its super class do.
+ If neither of them are passed, the default value is `{save_dir}/output_best`.
+
+ This class will not accept the `interval` or `save_strategy` or `by_epoch` argument, because the saving interval
+ will follow the `EvaluationHook`.
+ """
+
+ PRIORITY = Priority.LOW
+ rule_map = {'max': lambda x, y: x > y, 'min': lambda x, y: x < y}
+
+ def __init__(self,
+ metric_key: str,
+ save_best: Optional[bool] = True,
+ rule: Optional[str] = 'max',
+ save_file_name: Optional[str] = None,
+ restore_best: Optional[bool] = False,
+ max_checkpoint_num: Optional[int] = 1,
+ **kwargs):
+ assert rule in ['max', 'min'], 'Only support "max" or "min" rule now.'
+ output_kwargs = {}
+ if 'output_sub_dir' not in kwargs and 'output_dir' not in kwargs:
+ output_kwargs['output_sub_dir'] = ModelFile.TRAIN_BEST_OUTPUT_DIR
+ kwargs.pop('interval', None)
+ kwargs.pop('save_strategy', None)
+ super().__init__(
+ max_checkpoint_num=max_checkpoint_num,
+ **kwargs,
+ **output_kwargs,
+ )
+ self.save_best = save_best
+ self.metric_key = metric_key
+ self.rule = rule
+ self._best_metric = None
+ self._best_ckpt_file = None
+ self.save_file_name = save_file_name
+ self.restore_best = restore_best
+ self.history_checkpoints = set()
+
+ def after_train_epoch(self, trainer):
+ from modelscope.trainers.hooks import EvaluationHook
+ eval_hook = trainer.get_hook(EvaluationHook)
+ if len(eval_hook) == 0:
+ self.logger.error(
+ 'Trying to save the best checkpoint, but there is no evaluation, skipping.'
+ )
+
+ if eval_hook[0].last_eval_tag == (
+ 'epoch', trainer.epoch) and self._should_save(trainer):
+ self._do_save(trainer, 'by_epoch')
+
+ def after_train_iter(self, trainer):
+ from modelscope.trainers.hooks import EvaluationHook
+ eval_hook = trainer.get_hook(EvaluationHook)
+ if len(eval_hook) == 0:
+ self.logger.error(
+ 'Trying to save the best checkpoint, but there is no evaluation, skipping.'
+ )
+
+ if eval_hook[0].last_eval_tag == (
+ 'iter', trainer.iter) and self._should_save(trainer):
+ self._do_save(trainer, 'by_step')
+
+ def _should_save(self, trainer):
+ return self.save_best and self._is_best_metric(trainer.metric_values)
+
+ def _is_best_metric(self, metric_values):
+ if metric_values is None:
+ return False
+
+ if self.metric_key not in metric_values:
+ raise ValueError(
+ f'Not find metric_key: {self.metric_key} in {metric_values}')
+
+ if self._best_metric is None:
+ self._best_metric = metric_values[self.metric_key]
+ return True
+ else:
+ compare_fn = self.rule_map[self.rule]
+ if compare_fn(metric_values[self.metric_key], self._best_metric):
+ self._best_metric = metric_values[self.metric_key]
+ return True
+ return False
+
+ def generate_prefix(self, trainer, save_strategy):
+ if save_strategy == CheckpointStrategy.by_epoch:
+ return f'best_{LogKeys.EPOCH}{trainer.epoch + 1}_{self.metric_key}{self._best_metric}'
+ else:
+ return f'best_{LogKeys.ITER}{trainer.iter + 1}_{self.metric_key}{self._best_metric}'
+
+ def _save_checkpoint(self, trainer, prefix):
+ checkpoint_path_prefix = self.save_file_name
+ if checkpoint_path_prefix is None:
+ checkpoint_path_prefix = os.path.join(self.save_dir, prefix)
+ else:
+ checkpoint_path_prefix = os.path.join(self.save_dir,
+ checkpoint_path_prefix)
+
+ self._best_ckpt_file = checkpoint_path_prefix
+ meta = self._create_training_state(trainer)
+ self.processor.save_checkpoints(trainer, checkpoint_path_prefix,
+ self.output_dir, meta)
+ self.save_evaluate_results(trainer)
+ self.history_checkpoints.add(checkpoint_path_prefix)
+ self._remove_obsolete_checkpoints(trainer)
+ return prefix
+
+ def _remove_obsolete_checkpoints(self, trainer):
+
+ def extract_metric_from_filename(name1):
+ metric1 = float(name1.split(self.metric_key)[1])
+ if self.rule == 'max':
+ return -metric1
+ else:
+ return metric1
+
+ if self.max_checkpoint_num is not None and \
+ len(self.history_checkpoints) > self.max_checkpoint_num:
+ history_checkpoints = sorted(
+ self.history_checkpoints, key=extract_metric_from_filename)
+ self.history_checkpoints.clear()
+ for i, checkpoint_path_prefix in enumerate(history_checkpoints):
+ if i < self.max_checkpoint_num:
+ self.history_checkpoints.add(checkpoint_path_prefix)
+ else:
+ self.logger.info(
+ f'deleting checkpoint: {checkpoint_path_prefix}')
+ self.processor.remove_checkpoints(
+ trainer, checkpoint_path_prefix=checkpoint_path_prefix)
+
+ def state_dict(self):
+ return {
+ 'best_metric': self._best_metric,
+ }
+
+ def load_state_dict(self, state_dict):
+ if state_dict is not None and len(state_dict) > 0:
+ self._best_metric = state_dict.get('best_metric')
+ else:
+ self.logger.warning(
+ 'The state_dict is not available, the best metric value will be affected.'
+ )
+
+ def after_run(self, trainer):
+ if self.restore_best:
+ # If restore_best is True, will call the LoadCheckpointHook to load the best checkpoint
+ # for later evaluation or prediction.
+ from modelscope.trainers.hooks.checkpoint.load_checkpoint_hook import LoadCheckpointHook
+ LoadCheckpointHook.load_checkpoint(self._best_ckpt_file, trainer)
diff --git a/modelscope/trainers/hooks/checkpoint/checkpoint_processor.py b/modelscope/trainers/hooks/checkpoint/checkpoint_processor.py
new file mode 100644
index 00000000..f28fc397
--- /dev/null
+++ b/modelscope/trainers/hooks/checkpoint/checkpoint_processor.py
@@ -0,0 +1,276 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import re
+import shutil
+
+from modelscope.metainfo import Pipelines
+from modelscope.utils.checkpoint import (load_checkpoint, save_checkpoint,
+ save_configuration)
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.logger import get_logger
+from modelscope.utils.torch_utils import is_master
+
+
+class CheckpointProcessor:
+
+ TRAINER_STATE_SUFFIX = '_trainer_state.pth'
+
+ MODEL_STATE_SUFFIX = '.pth'
+
+ def prepare_output(self, trainer, output_dir):
+ """Prepares the output of target folder.
+
+ This is a strategic function which can be registered by other hook's function.
+
+ Args:
+ trainer: The trainer instance.
+ output_dir: The target folder used in inference.
+ """
+ model = trainer.unwrap_module(trainer.model)
+ config = trainer.cfg
+
+ # override pipeline by tasks name after finetune done,
+ # avoid case like fill mask pipeline with a text cls task
+ if config['task'] in [
+ getattr(Pipelines, attr) for attr in dir(Pipelines)
+ if not attr.startswith('__')
+ ]:
+ # TODO a temp fix to avoid pipeline_name and task mismatch
+ config['pipeline'] = {'type': config['task']}
+
+ self.copy_files_and_dump_config(trainer, output_dir, config,
+ self._bin_file(model))
+
+ @staticmethod
+ def copy_files_and_dump_config(trainer, output_dir, config, bin_file):
+ """Copy useful files to target output folder and dumps the target configuration.json.
+ """
+ model = trainer.unwrap_module(trainer.model)
+
+ class SaveConfig:
+
+ def __init__(self, output_dir, config):
+ self.output_dir = output_dir
+ self.config = config
+
+ def __call__(self, _output_dir, _config):
+ self.config = _config
+
+ def save_config(self):
+ save_configuration(self.output_dir, self.config)
+
+ for pop_key in [
+ 'push_to_hub', 'hub_repo_id', 'hub_token', 'private_hub'
+ ]:
+ if config.safe_get('train.checkpoint.period.'
+ + pop_key) is not None:
+ config.safe_get('train.checkpoint.period').pop(pop_key)
+ if config.safe_get('train.checkpoint.best.' + pop_key) is not None:
+ config.safe_get('train.checkpoint.best').pop(pop_key)
+
+ save_config_fn = SaveConfig(output_dir, config)
+
+ if hasattr(model, 'save_pretrained'):
+ # Save pretrained of model, skip saving checkpoint
+ model.save_pretrained(
+ output_dir,
+ bin_file,
+ save_function=lambda *args, **kwargs: None,
+ config=save_config_fn.config,
+ save_config_function=save_config_fn)
+
+ if trainer.train_preprocessor is not None:
+ trainer.train_preprocessor.save_pretrained(
+ output_dir,
+ save_config_fn.config,
+ save_config_function=save_config_fn)
+ if trainer.eval_preprocessor is not None:
+ trainer.eval_preprocessor.save_pretrained(
+ output_dir,
+ save_config_fn.config,
+ save_config_function=save_config_fn)
+ save_config_fn.save_config()
+
+ @staticmethod
+ def _bin_file(model):
+ """Get bin file path.
+ """
+ default_bin_file = ModelFile.TORCH_MODEL_BIN_FILE
+ if hasattr(model,
+ 'model_dir') and ModelFile.TORCH_MODEL_FILE in os.listdir(
+ model.model_dir):
+ default_bin_file = ModelFile.TORCH_MODEL_FILE
+ return default_bin_file
+
+ def save_checkpoints(self,
+ trainer,
+ checkpoint_path_prefix,
+ output_dir,
+ meta=None):
+ """Save the state dict for trainer and model.
+
+ This is a strategic function which can be registered by other hook's function.
+
+ Args:
+ trainer(`EpochBasedTrainer`): The trainer instance.
+ checkpoint_path_prefix(`str`): The saving dir with a prefix.
+ like: /tmp/test/epoch_0
+ output_dir(`str`): The output dir for inference.
+ meta: (`dict`): The meta info needed to be saved into files.
+ """
+ model = trainer.unwrap_module(trainer.model)
+ _model_file, _train_state_file = self._get_state_file_name(
+ checkpoint_path_prefix)
+
+ # Save pth file without model state_dict
+ self.save_trainer_state(trainer, model, _train_state_file, meta)
+ self.save_model_state(model, _model_file)
+ self.link(model, _model_file, output_dir)
+
+ def remove_checkpoints(self, trainer, checkpoint_path_prefix):
+ """Remove obsolete checkpoint files.
+
+ This is a strategic function which can be registered by other hook's function.
+
+ Args:
+ trainer(`EpochBasedTrainer`): The trainer instance.
+ checkpoint_path_prefix(`str`): The saving dir with a prefix.
+ like: /tmp/test/epoch_0
+ """
+ _model_file, _train_state_file = self._get_state_file_name(
+ checkpoint_path_prefix)
+ if os.path.isfile(_train_state_file):
+ os.remove(_train_state_file)
+
+ if os.path.isfile(_model_file):
+ os.remove(_model_file)
+
+ def should_save_on_rank(self, trainer):
+ """Used in ddp or other distributed training scenario, returns whether do saving in current rank.
+
+ This is a strategic function which can be registered by other hook's function.
+
+ Args:
+ trainer(`EpochBasedTrainer`): The trainer instance.
+ """
+ return is_master()
+
+ def link(self, model, src_file, output_dir):
+ """Links the src bin file to the output folder.
+
+ Args:
+ model: The model instance.
+ src_file: The src bin file path.
+ output_dir: The target folder used in inference.
+ """
+
+ bin_file = self._bin_file(model)
+ dest_file = os.path.join(output_dir, bin_file)
+ if os.path.isfile(dest_file):
+ os.unlink(dest_file)
+
+ try:
+ os.link(src_file, dest_file)
+ except OSError as e:
+ get_logger().error(
+ f'Link {src_file} to {dest_file} error: {e}, '
+ 'changing to copy the bin file, this may use more disk space.')
+ shutil.copyfile(src_file, dest_file)
+
+ def save_trainer_state(self, trainer, model, train_state_file, meta):
+ """Save the trainer state, including optimizer/lr_scheduler's state dict, random states etc.
+
+ Args:
+ trainer: The trainer instance.
+ model: The model instance.
+ train_state_file: The target file name for saving trainer states.
+ meta: Some extra meta info.
+ """
+ save_checkpoint(
+ model,
+ train_state_file,
+ trainer.optimizer,
+ trainer.lr_scheduler,
+ meta=meta,
+ with_model=False)
+
+ def save_model_state(self, model, model_file):
+ """Save the model state.
+
+ Args:
+ model: The model instance.
+ model_file: The target file name for saving model states.
+ """
+ save_checkpoint(
+ model, model_file, None, None, meta=None, with_meta=False)
+
+ def load_checkpoints(self, checkpoint_path_prefix, trainer, load_all_state,
+ strict):
+ """Load checkpoint files of trainer state and model state.
+
+ This is a strategic function which can be registered by other hook's function.
+
+ Args:
+ checkpoint_path_prefix(str): The checkpoint dir with prefix or a model state file.
+ Example: '/tmp/test/epoch_0' or '/tmp/test/epoch_0.pth'
+ trainer(`EpochBasedTrainer`): The trainer instance.
+ load_all_state(`boolean`): Load all states (else load only module states).
+ strict(`boolean`): If strict, any unmatched keys will cause an error.
+
+ Returns:
+ The meta info in json.
+ """
+ _model_file, _train_state_file = self._get_state_file_name(
+ checkpoint_path_prefix)
+ meta = {}
+ if os.path.isfile(_train_state_file):
+ meta = self.load_trainer_state(trainer, _train_state_file,
+ load_all_state)
+ else:
+ print(f'No trainer state file {_train_state_file} found, skip.')
+ self.load_model_state(trainer, _model_file, strict)
+ return meta
+
+ @staticmethod
+ def load_trainer_state(trainer, train_state_file, load_all_state):
+ """Load trainer state file.
+ """
+
+ optimizer = getattr(trainer, 'optimizer',
+ None) if load_all_state else None
+ lr_scheduler = getattr(trainer, 'lr_scheduler',
+ None) if load_all_state else None
+ return load_checkpoint(train_state_file, None, optimizer, lr_scheduler)
+
+ def load_model_state(self, trainer, model_file, strict):
+ """Load model state file.
+ """
+ return load_checkpoint(model_file,
+ trainer.unwrap_module(trainer.model), None,
+ None)
+
+ @staticmethod
+ def _get_state_file_name(checkpoint_path_prefix):
+ """Get the default file name for state files.
+
+ If the input is a checkpoint dir with prefix, this function will append suffix for both checkpoint files.
+ If the input is an absolute file name, this function will return it as the model file name, and append
+ suffix for the trainer file name.
+
+ NOTE: a best checkpoint filename with float or int metric value inside
+ will not be judged as having a extension file name. like: '/tmp/test/epoch_0_accuracy0.85'
+
+ Args:
+ checkpoint_path_prefix(`str`): The checkpoint dir with prefix or a model state file
+ with extension file name. like: '/tmp/test/epoch_0'
+
+ Returns:
+ A tuple of model state file name and trainer state file name.
+ """
+ base, ext = os.path.splitext(checkpoint_path_prefix)
+ if len(ext) == 0 or re.match(r'^\d+$', ext[1:]):
+ return checkpoint_path_prefix + CheckpointProcessor.MODEL_STATE_SUFFIX, \
+ checkpoint_path_prefix + CheckpointProcessor.TRAINER_STATE_SUFFIX # noqa
+ else:
+ return checkpoint_path_prefix, base + CheckpointProcessor.TRAINER_STATE_SUFFIX.split(
+ '.')[0] + '.' + ext[1:]
diff --git a/modelscope/trainers/hooks/checkpoint/load_checkpoint_hook.py b/modelscope/trainers/hooks/checkpoint/load_checkpoint_hook.py
new file mode 100644
index 00000000..3ccb800f
--- /dev/null
+++ b/modelscope/trainers/hooks/checkpoint/load_checkpoint_hook.py
@@ -0,0 +1,138 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import random
+from typing import Optional
+
+import numpy as np
+import torch
+from packaging import version
+
+from modelscope.metainfo import Hooks
+from modelscope.trainers.hooks.builder import HOOKS
+from modelscope.trainers.hooks.checkpoint.checkpoint_processor import \
+ CheckpointProcessor
+from modelscope.trainers.hooks.hook import Hook
+from modelscope.trainers.hooks.priority import Priority
+from modelscope.utils.logger import get_logger
+
+
+@HOOKS.register_module(module_name=Hooks.LoadCheckpointHook)
+class LoadCheckpointHook(Hook):
+ """Load a checkpoint file at the beginning of training or evaluating.
+
+ This hook does not need to be configured or saved in the config file.
+ User should use it by:
+ >>> trainer.train('some-checkpoint', load_all_state=True)
+ or
+ >>> trainer.evaluate('some-checkpoint')
+ instead.
+
+ Args:
+ checkpoint_file (str): The checkpoint file to be loaded.
+ load_all_state (bool): Load all states(optimizer, epoch, lr_scheduler, random_state, etc.) when loading old
+ training state file or not. The model's state dict will only be loaded if False.
+ strict (bool): If strict, any unmatched keys will cause an error.
+ """
+
+ PRIORITY = Priority.HIGH
+
+ _should_save = False
+
+ # From 1.3.1 version we split one pth file to two files: trainer state pth file/model state pth file.
+ _TWO_PTH_FILE_VERSION = '1.3.1'
+
+ def __init__(
+ self,
+ checkpoint_file: Optional[str] = None,
+ load_all_state: Optional[bool] = True,
+ strict: Optional[bool] = False,
+ ):
+ self.checkpoint_file = checkpoint_file
+ self.rng_state = None
+ self.need_load_rng_state = False
+ self.load_all_state = load_all_state
+ self.strict = strict
+ self.processor = CheckpointProcessor()
+
+ def before_run(self, trainer):
+ if not hasattr(trainer, 'logger'):
+ self.logger = get_logger()
+ else:
+ self.logger = trainer.logger
+
+ if self.checkpoint_file is not None:
+ meta = self.load_checkpoint(self.checkpoint_file, trainer,
+ self.load_all_state, self.strict)
+ self.rng_state = meta.get('rng_state')
+ self.need_load_rng_state = self.load_all_state
+
+ def before_train_iter(self, trainer):
+ if self.need_load_rng_state:
+ if self.rng_state is not None:
+ random.setstate(self.rng_state['random'])
+ np.random.set_state(self.rng_state['numpy'])
+ torch.random.set_rng_state(self.rng_state['cpu'])
+ if torch.cuda.is_available():
+ torch.cuda.random.set_rng_state_all(self.rng_state['cuda'])
+ self.need_load_rng_state = False
+ else:
+ self.logger.info(
+ 'Random state cannot be found in checkpoint file, '
+ 'this may cause a random data order or model initialization.'
+ )
+
+ @staticmethod
+ def _restore_training_state(trainer, meta):
+ trainer._epoch = meta.get('epoch', trainer._epoch)
+ trainer._iter = meta.get('iter', trainer._iter)
+ trainer._inner_iter = meta.get('inner_iter', trainer._inner_iter)
+
+ i = 0
+ for hook in trainer.hooks:
+ if hasattr(hook, 'load_state_dict') and getattr(
+ hook, '_should_save', True):
+ key = f'{hook.__class__}-{i}'
+ if key in meta:
+ hook.load_state_dict(meta.get(key, {}))
+ else:
+ trainer.logger.warning(
+ f'The state_dict of hook {hook.__class__} at index {i} is not found in the checkpoint file.'
+ )
+ i += 1
+
+ @classmethod
+ def load_checkpoint(cls,
+ filename,
+ trainer,
+ load_all_state=True,
+ strict=False):
+ """A static method to load checkpoint files.
+
+ Args:
+ filename(str): An absolute model bin file(pth or bin) or a dir path with a file prefix(like epoch_1).
+ trainer(`EpochBasedTrainer`): The trainer instance.
+ load_all_state(`bool`): Load all states including the trainer states.
+ strict(`bool`): Load module state dict strictly.
+
+ Returns:
+ A dict containing the train states saved by `_create_training_state`
+ """
+ meta = cls().processor.load_checkpoints(filename, trainer,
+ load_all_state, strict)
+ if load_all_state:
+ cls._restore_training_state(trainer, meta)
+
+ if meta is not None:
+ _version = meta.get('modelscope')
+ if _version is not None and version.parse(
+ _version) < version.parse(
+ LoadCheckpointHook._TWO_PTH_FILE_VERSION):
+ trainer.logger.warning(
+ 'The unique pth file is split into a model file and '
+ f'a trainer file since version {LoadCheckpointHook._TWO_PTH_FILE_VERSION},'
+ 'consider re-training your model or '
+ 'using a converting script to split the single pth file into two.'
+ )
+ trainer.logger.info(
+ f'Checkpoint {filename} saving time: {meta.get("time")}, modelscope version: {_version}'
+ )
+ return meta
diff --git a/modelscope/trainers/hooks/checkpoint_hook.py b/modelscope/trainers/hooks/checkpoint_hook.py
deleted file mode 100644
index 59832105..00000000
--- a/modelscope/trainers/hooks/checkpoint_hook.py
+++ /dev/null
@@ -1,749 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import os
-import random
-import re
-import time
-
-import numpy as np
-import torch
-from packaging import version
-
-from modelscope.hub.check_model import check_model_is_id
-from modelscope.hub.push_to_hub import push_to_hub_async
-from modelscope.metainfo import Hooks, Pipelines
-from modelscope.utils.checkpoint import (load_checkpoint, save_checkpoint,
- save_configuration)
-from modelscope.utils.constant import LogKeys, ModelFile
-from modelscope.utils.logger import get_logger
-from modelscope.utils.torch_utils import is_master
-from .builder import HOOKS
-from .hook import Hook
-from .priority import Priority
-
-
-@HOOKS.register_module(module_name=Hooks.CheckpointHook)
-class CheckpointHook(Hook):
- """Save checkpoints periodically.
-
- Args:
- interval (int): The frequency to save model. If `by_epoch=True`,
- it means the number of epochs, else means the number of iterations
- by_epoch (bool): Saving checkpoints by epoch or by iteration.
- save_optimizer (bool): Whether to save optimizer state dict. Default: True.
- save_dir (str): The directory to save checkpoints. If is None, use `trainer.work_dir`
- output_sub_dir (str): The sub folder under the `save_dir` to save the output checkpoint for inference.
- Default 'output'.
- save_last (bool): Whether to save the last checkpoint. Default: True.
- max_checkpoint_num (int): The max number of checkpoint files, default None which means never delete anything.
- If the number exceeding the limit, earlier checkpoints will be deleted first.
- """
-
- PRIORITY = Priority.LOW
-
- TRAINER_STATE_SUFFIX = '_trainer_state.pth'
-
- MODEL_STATE_SUFFIX = '.pth'
-
- def __init__(self,
- interval=0,
- by_epoch=True,
- save_optimizer=True,
- save_dir=None,
- output_sub_dir=ModelFile.TRAIN_OUTPUT_DIR,
- save_last=True,
- max_checkpoint_num=None,
- push_to_hub=False,
- model_id_with_org=None,
- hub_token=None,
- private_hub=True,
- **kwargs):
- self.interval = interval
- self.by_epoch = by_epoch
- self.save_optimizer = save_optimizer
- self.save_dir = save_dir
- self.output_sub_dir = output_sub_dir
- self.save_last = save_last
- self.rng_state = None
- self.max_checkpoint_num = None
- self.push_to_hub = push_to_hub
- self.model_id_with_org = model_id_with_org
- self.hub_token = hub_token
- self.private_hub = private_hub
- self.is_model_id = None
- self.push_to_hub_future = None
- if max_checkpoint_num is not None:
- self.max_checkpoint_num = max(int(max_checkpoint_num), 1)
- self.history_checkpoints = []
-
- def before_run(self, trainer):
- if not self.save_dir:
- self.save_dir = trainer.work_dir
-
- if not os.path.exists(self.save_dir):
- os.makedirs(self.save_dir, exist_ok=True)
-
- if not hasattr(trainer, 'logger'):
- self.logger = get_logger()
- else:
- self.logger = trainer.logger
-
- if is_master():
- output_dir = os.path.join(self.save_dir, self.output_sub_dir)
- # only global master prepares the output folder
- self.prepare_output(trainer, output_dir)
- self.logger.info(f'Checkpoints will be saved to {self.save_dir}')
-
- def generate_prefix(self, trainer):
- if self.by_epoch:
- return f'{LogKeys.EPOCH}_{trainer.epoch + 1}'
- else:
- return f'{LogKeys.ITER}_{trainer.iter + 1}'
-
- def after_train_epoch(self, trainer):
- if not self.by_epoch:
- return
-
- if self._should_save(trainer):
- # prefix like 'epoch-1' or 'iter-1'
- prefix = self.generate_prefix(trainer)
- if self.should_save_on_rank(trainer):
- if is_master():
- self.logger.info(
- f'Saving checkpoint at {trainer.epoch + 1} epoch')
- self._save_checkpoint(trainer, prefix)
- if is_master() and self.push_to_hub:
- if self.push_to_hub_future is not None and not self.push_to_hub_future.done(
- ):
- self.logger.error(
- f'Another uploading is running, '
- f'this uploading with message {prefix} will be canceled.'
- )
- return
- self.push_to_hub_future = self._push_to_hub(trainer, prefix)
-
- def after_train_iter(self, trainer):
- if self.by_epoch:
- return
-
- if self._should_save(trainer):
- # prefix like 'epoch-1' or 'iter-1'
- prefix = self.generate_prefix(trainer)
- if self.should_save_on_rank(trainer):
- if is_master():
- self.logger.info(
- f'Saving checkpoint at {trainer.iter + 1} iter')
- self._save_checkpoint(trainer, prefix)
- if is_master() and self.push_to_hub:
- if self.push_to_hub_future is not None and not self.push_to_hub_future.done(
- ):
- self.logger.error(
- f'Another uploading is running, '
- f'this uploading with message {prefix} will be canceled.'
- )
- return
- self.push_to_hub_future = self._push_to_hub(trainer, prefix)
-
- def after_run(self, trainer):
- if self.push_to_hub_future is not None and not self.push_to_hub_future.done(
- ):
- self.logger.info('Train finished. Uploading models, waiting...')
- while not self.push_to_hub_future.done():
- time.sleep(1)
- self.logger.info('Uploading models done.')
-
- def _push_to_hub(self, trainer, prefix):
- if self.is_model_id is None:
- self.is_model_id = check_model_is_id(trainer.input_model_id,
- self.hub_token)
-
- return push_to_hub_async(
- self.model_id_with_org,
- os.path.join(self.save_dir, self.output_sub_dir),
- token=self.hub_token,
- private=self.private_hub,
- commit_message=prefix,
- source_repo=trainer.input_model_id if self.is_model_id else '')
-
- def _save_checkpoint(self, trainer, prefix):
- """Save checkpoint files and remove obsolete ones
- """
- checkpoint_path_prefix = os.path.join(self.save_dir, prefix)
- meta = self._create_training_state(trainer)
- self.save_checkpoints(trainer, checkpoint_path_prefix,
- self.output_sub_dir, meta)
- self.history_checkpoints.append(checkpoint_path_prefix)
- self._remove_obsolete_checkpoints(trainer)
- return prefix
-
- def _remove_obsolete_checkpoints(self, trainer):
- if self.max_checkpoint_num is not None and \
- len(self.history_checkpoints) > self.max_checkpoint_num:
- history_checkpoints = [ckpt for ckpt in self.history_checkpoints]
- self.history_checkpoints.clear()
- for i, checkpoint_path_prefix in enumerate(history_checkpoints):
- if i < len(history_checkpoints) - self.max_checkpoint_num:
- self.logger.info(
- f'deleting checkpoint: {checkpoint_path_prefix}')
- self.remove_checkpoints(
- trainer, checkpoint_path_prefix=checkpoint_path_prefix)
- else:
- self.history_checkpoints.append(checkpoint_path_prefix)
-
- def _should_save(self, trainer):
- if self.by_epoch:
- check_last = self.is_last_epoch
- check_frequency = self.every_n_epochs
- else:
- check_last = self.is_last_iter
- check_frequency = self.every_n_iters
-
- if check_frequency(trainer,
- self.interval) or (self.save_last
- and check_last(trainer)):
- return True
- return False
-
- def _create_training_state(self, trainer):
- self.rng_state = {
- 'random': random.getstate(),
- 'numpy': np.random.get_state(),
- 'cpu': torch.random.get_rng_state(),
- 'cuda': torch.cuda.get_rng_state_all(),
- }
-
- # keep epoch/iter/inner_iter/random_state
- meta = {
- 'epoch': trainer.epoch,
- 'iter': trainer.iter + 1,
- 'inner_iter': trainer.inner_iter + 1,
- 'rng_state': self.rng_state,
- }
-
- # keep hooks state
- i = 0
- for hook in trainer.hooks:
- if hasattr(hook, 'state_dict') and getattr(hook, '_should_save',
- True):
- meta[f'{hook.__class__}-{i}'] = hook.state_dict()
- i += 1
-
- return meta
-
- @staticmethod
- def copy_files_and_dump_config(trainer, output_dir, config, bin_file):
- """Copy useful files to target output folder and dumps the target configuration.json.
- """
- model = trainer.unwrap_module(trainer.model)
-
- class SaveConfig:
-
- def __init__(self, output_dir, config):
- self.output_dir = output_dir
- self.config = config
-
- def __call__(self, _output_dir, _config):
- self.config = _config
-
- def save_config(self):
- save_configuration(self.output_dir, self.config)
-
- for pop_key in [
- 'push_to_hub', 'model_id_with_org', 'hub_token', 'private_hub'
- ]:
- if config.safe_get('train.checkpoint.period.'
- + pop_key) is not None:
- config.safe_get('train.checkpoint.period').pop(pop_key)
- if config.safe_get('train.checkpoint.best.' + pop_key) is not None:
- config.safe_get('train.checkpoint.best').pop(pop_key)
-
- save_config_fn = SaveConfig(output_dir, config)
-
- if hasattr(model, 'save_pretrained'):
- # Save pretrained of model, skip saving checkpoint
- model.save_pretrained(
- output_dir,
- bin_file,
- save_function=lambda *args, **kwargs: None,
- config=save_config_fn.config,
- save_config_function=save_config_fn)
-
- if trainer.train_preprocessor is not None:
- trainer.train_preprocessor.save_pretrained(
- output_dir,
- save_config_fn.config,
- save_config_function=save_config_fn)
- if trainer.eval_preprocessor is not None:
- trainer.eval_preprocessor.save_pretrained(
- output_dir,
- save_config_fn.config,
- save_config_function=save_config_fn)
- save_config_fn.save_config()
-
- @staticmethod
- def _bin_file(model):
- """Get bin file path.
- """
- default_bin_file = ModelFile.TORCH_MODEL_BIN_FILE
- if hasattr(model,
- 'model_dir') and ModelFile.TORCH_MODEL_FILE in os.listdir(
- model.model_dir):
- default_bin_file = ModelFile.TORCH_MODEL_FILE
- return default_bin_file
-
- @Hook.overload_func(name='CheckpointHook.prepare_output')
- def prepare_output(self, trainer, output_dir):
- """Prepares the output of target folder.
-
- This is a strategic function which can be registered by other hook's function.
-
- Args:
- trainer: The trainer instance.
- output_dir: The target folder used in inference.
- """
- model = trainer.unwrap_module(trainer.model)
- config = trainer.cfg
-
- # override pipeline by tasks name after finetune done,
- # avoid case like fill mask pipeline with a text cls task
- if config['task'] in [
- getattr(Pipelines, attr) for attr in dir(Pipelines)
- if not attr.startswith('__')
- ]:
- # TODO a temp fix to avoid pipeline_name and task mismatch
- config['pipeline'] = {'type': config['task']}
-
- self.copy_files_and_dump_config(trainer, output_dir, config,
- self._bin_file(model))
-
- def link(self, model, src_file, output_dir):
- """Links the src bin file to the output folder.
-
- Args:
- model: The model instance.
- src_file: The src bin file path.
- output_dir: The target folder used in inference.
- """
-
- bin_file = self._bin_file(model)
- dest_file = os.path.join(output_dir, bin_file)
- if os.path.isfile(dest_file):
- os.unlink(dest_file)
-
- os.link(src_file, dest_file)
-
- def save_trainer_state(self, trainer, model, train_state_file, meta):
- """Save the trainer state, including optimizer/lr_scheduler's state dict, random states etc.
-
- Args:
- trainer: The trainer instance.
- model: The model instance.
- train_state_file: The target file name for saving trainer states.
- meta: Some extra meta info.
- """
- save_checkpoint(
- model,
- train_state_file,
- trainer.optimizer,
- trainer.lr_scheduler,
- meta=meta,
- with_model=False)
-
- def save_model_state(self, model, model_file):
- """Save the model state.
-
- Args:
- model: The model instance.
- model_file: The target file name for saving model states.
- """
- save_checkpoint(
- model, model_file, None, None, meta=None, with_meta=False)
-
- @Hook.overload_func(name='CheckpointHook.save_checkpoints')
- def save_checkpoints(self,
- trainer,
- checkpoint_path_prefix,
- output_sub_dir,
- meta=None):
- """Save the state dict for trainer and model.
-
- This is a strategic function which can be registered by other hook's function.
-
- Args:
- trainer(`EpochBasedTrainer`): The trainer instance.
- checkpoint_path_prefix(`str`): The saving dir with a prefix.
- like: /tmp/test/epoch_0
- output_sub_dir(`str`): The sub-dir in the saving dir used in inference.
- meta: (`dict`): The meta info needed to be saved into files.
- """
- model = trainer.unwrap_module(trainer.model)
- _model_file, _train_state_file = _get_state_file_name(
- checkpoint_path_prefix)
-
- # Save pth file without model state_dict
- self.save_trainer_state(trainer, model, _train_state_file, meta)
- self.save_model_state(model, _model_file)
- output_dir = os.path.join(self.save_dir, output_sub_dir)
- self.link(model, _model_file, output_dir)
-
- @Hook.overload_func(name='CheckpointHook.remove_checkpoints')
- def remove_checkpoints(self, trainer, checkpoint_path_prefix):
- """Remove obsolete checkpoint files.
-
- This is a strategic function which can be registered by other hook's function.
-
- Args:
- trainer(`EpochBasedTrainer`): The trainer instance.
- checkpoint_path_prefix(`str`): The saving dir with a prefix.
- like: /tmp/test/epoch_0
- """
- _model_file, _train_state_file = _get_state_file_name(
- checkpoint_path_prefix)
- if os.path.isfile(_train_state_file):
- os.remove(_train_state_file)
-
- if os.path.isfile(_model_file):
- os.remove(_model_file)
-
- @Hook.overload_func(name='CheckpointHook.should_save_on_rank')
- def should_save_on_rank(self, trainer):
- """Used in ddp or other distributed training scenario, returns whether do saving in current rank.
-
- This is a strategic function which can be registered by other hook's function.
-
- Args:
- trainer(`EpochBasedTrainer`): The trainer instance.
- """
- return is_master()
-
-
-@HOOKS.register_module(module_name=Hooks.BestCkptSaverHook)
-class BestCkptSaverHook(CheckpointHook):
- """
- Save best checkpoints hook.
-
- Args:
- metric_key (str): Metric key to compare rule for best score.
- rule (str): Comparison rule for best score. Support "max" and "min". If rule is "max", the checkpoint
- at the maximum `metric_key` will be saved, If rule is "min", the checkpoint at the minimum `metric_key`
- will be saved.
- by_epoch (bool): Save best checkpoints by epoch or by iteration.
- save_optimizer (bool): Whether to save optimizer state dict. Default: True.
- save_dir (str): Output directory to save best checkpoint.
- output_sub_dir (str): The sub folder under the `save_dir` to save the output checkpoint for inference.
- Default 'output_best'.
- restore_best (bool): Whether to restore the best checkpoint after training.
- max_checkpoint_num (int): The max number of checkpoint files, default None which means never delete anything.
- If the number exceeding the limit, checkpoints with worse metric will be deleted, which is judged by the
- `rule` and `metric_key` arguments.
- """
-
- PRIORITY = Priority.LOW
- rule_map = {'max': lambda x, y: x > y, 'min': lambda x, y: x < y}
-
- def __init__(self,
- metric_key,
- rule='max',
- by_epoch=True,
- save_optimizer=True,
- save_dir=None,
- output_sub_dir=ModelFile.TRAIN_BEST_OUTPUT_DIR,
- save_file_name=None,
- restore_best=False,
- max_checkpoint_num=1,
- interval=0,
- **kwargs):
- assert rule in ['max', 'min'], 'Only support "max" or "min" rule now.'
- super().__init__(
- interval=interval,
- by_epoch=by_epoch,
- save_optimizer=save_optimizer,
- save_dir=save_dir,
- output_sub_dir=output_sub_dir,
- max_checkpoint_num=max_checkpoint_num,
- **kwargs,
- )
- self.metric_key = metric_key
- self.rule = rule
- self._best_metric = None
- self._best_ckpt_file = None
- self.save_file_name = save_file_name
- self.restore_best = restore_best
- self.history_checkpoints = set()
-
- def _should_save(self, trainer):
- return self._is_best_metric(trainer.metric_values)
-
- def _is_best_metric(self, metric_values):
- if metric_values is None:
- return False
-
- if self.metric_key not in metric_values:
- raise ValueError(
- f'Not find metric_key: {self.metric_key} in {metric_values}')
-
- if self._best_metric is None:
- self._best_metric = metric_values[self.metric_key]
- return True
- else:
- compare_fn = self.rule_map[self.rule]
- if compare_fn(metric_values[self.metric_key], self._best_metric):
- self._best_metric = metric_values[self.metric_key]
- return True
- return False
-
- def generate_prefix(self, trainer):
- if self.by_epoch:
- return f'best_{LogKeys.EPOCH}{trainer.epoch + 1}_{self.metric_key}{self._best_metric}'
- else:
- return f'best_{LogKeys.ITER}{trainer.iter + 1}_{self.metric_key}{self._best_metric}'
-
- def _save_checkpoint(self, trainer, prefix):
- checkpoint_path_prefix = self.save_file_name
- if checkpoint_path_prefix is None:
- checkpoint_path_prefix = os.path.join(self.save_dir, prefix)
- else:
- checkpoint_path_prefix = os.path.join(self.save_dir,
- checkpoint_path_prefix)
-
- self._best_ckpt_file = checkpoint_path_prefix
- meta = self._create_training_state(trainer)
- self.save_checkpoints(trainer, checkpoint_path_prefix,
- self.output_sub_dir, meta)
- self.history_checkpoints.add(checkpoint_path_prefix)
- self._remove_obsolete_checkpoints(trainer)
- return prefix
-
- def _remove_obsolete_checkpoints(self, trainer):
-
- def extract_metric_from_filename(name1):
- metric1 = float(name1.split(self.metric_key)[1])
- if self.rule == 'max':
- return -metric1
- else:
- return metric1
-
- if self.max_checkpoint_num is not None and \
- len(self.history_checkpoints) > self.max_checkpoint_num:
- history_checkpoints = sorted(
- self.history_checkpoints, key=extract_metric_from_filename)
- self.history_checkpoints.clear()
- for i, checkpoint_path_prefix in enumerate(history_checkpoints):
- if i < self.max_checkpoint_num:
- self.history_checkpoints.add(checkpoint_path_prefix)
- else:
- self.logger.info(
- f'deleting checkpoint: {checkpoint_path_prefix}')
- self.remove_checkpoints(
- trainer, checkpoint_path_prefix=checkpoint_path_prefix)
-
- def state_dict(self):
- return {
- 'best_metric': self._best_metric,
- }
-
- def load_state_dict(self, state_dict):
- if state_dict is not None and len(state_dict) > 0:
- self._best_metric = state_dict.get('best_metric')
- else:
- self.logger.warning(
- 'The state_dict is not available, the best metric value will be affected.'
- )
-
- def after_run(self, trainer):
- if self.restore_best:
- # If restore_best is True, will call the LoadCheckpointHook to load the best checkpoint
- # for later evaluation or prediction.
- LoadCheckpointHook.load_checkpoint(self._best_ckpt_file, trainer)
-
-
-@HOOKS.register_module(module_name=Hooks.LoadCheckpointHook)
-class LoadCheckpointHook(Hook):
- """Load a checkpoint file at the beginning of training or evaluating.
-
- This hook does not need to be configured or saved in the config file.
- User should use it by:
- >>> trainer.train('some-checkpoint', load_all_state=True)
- or
- >>> trainer.evaluate('some-checkpoint')
- instead.
-
- Args:
- checkpoint_file (str): The checkpoint file to be loaded.
- load_all_state (bool): Load all states(optimizer, epoch, lr_scheduler, random_state, etc.) when loading old
- training state file or not. The model's state dict will only be loaded if False.
- strict (bool): If strict, any unmatched keys will cause an error.
- """
-
- PRIORITY = Priority.HIGH
-
- _should_save = False
-
- _TWO_PTH_FILE_VERSION = '1.3.1'
-
- def __init__(
- self,
- checkpoint_file=None,
- load_all_state=True,
- strict=False,
- ):
- self.checkpoint_file = checkpoint_file
- self.rng_state = None
- self.need_load_rng_state = False
- self.load_all_state = load_all_state
- self.strict = strict
-
- def before_run(self, trainer):
- if not hasattr(trainer, 'logger'):
- self.logger = get_logger()
- else:
- self.logger = trainer.logger
-
- if self.checkpoint_file is not None:
- meta = self.load_checkpoint(self.checkpoint_file, trainer,
- self.load_all_state, self.strict)
- self.rng_state = meta.get('rng_state')
- self.need_load_rng_state = self.load_all_state
-
- def before_train_iter(self, trainer):
- if self.need_load_rng_state:
- if self.rng_state is not None:
- random.setstate(self.rng_state['random'])
- np.random.set_state(self.rng_state['numpy'])
- torch.random.set_rng_state(self.rng_state['cpu'])
- if torch.cuda.is_available():
- torch.cuda.random.set_rng_state_all(self.rng_state['cuda'])
- self.need_load_rng_state = False
- else:
- self.logger.info(
- 'Random state cannot be found in checkpoint file, '
- 'this may cause a random data order or model initialization.'
- )
-
- @staticmethod
- def _restore_training_state(trainer, meta):
- trainer._epoch = meta.get('epoch', trainer._epoch)
- trainer._iter = meta.get('iter', trainer._iter)
- trainer._inner_iter = meta.get('inner_iter', trainer._inner_iter)
-
- i = 0
- for hook in trainer.hooks:
- if hasattr(hook, 'load_state_dict') and getattr(
- hook, '_should_save', True):
- key = f'{hook.__class__}-{i}'
- if key in meta:
- hook.load_state_dict(meta.get(key, {}))
- else:
- trainer.logger.warning(
- f'The state_dict of hook {hook.__class__} at index {i} is not found in the checkpoint file.'
- )
- i += 1
-
- @classmethod
- def load_checkpoint(cls,
- filename,
- trainer,
- load_all_state=True,
- strict=False):
- """A static method to load checkpoint files.
-
- Args:
- filename(str): An absolute model bin file(pth or bin) or a dir path with a file prefix(like epoch_1).
- trainer(`EpochBasedTrainer`): The trainer instance.
- load_all_state(`bool`): Load all states including the trainer states.
- strict(`bool`): Load module state dict strictly.
-
- Returns:
- A dict containing the train states saved by `_create_training_state`
- """
- meta = cls().load_checkpoints(filename, trainer, load_all_state,
- strict)
- if load_all_state:
- cls._restore_training_state(trainer, meta)
-
- if meta is not None:
- _version = meta.get('modelscope')
- if _version is not None and version.parse(
- _version) < version.parse(
- LoadCheckpointHook._TWO_PTH_FILE_VERSION):
- trainer.logger.warning(
- 'The unique pth file is split into a model file and '
- f'a trainer file since version {LoadCheckpointHook._TWO_PTH_FILE_VERSION},'
- 'consider re-training your model or '
- 'using a converting script to split the single pth file into two.'
- )
- trainer.logger.info(
- f'Checkpoint {filename} saving time: {meta.get("time")}, modelscope version: {_version}'
- )
- return meta
-
- @staticmethod
- def load_trainer_state(trainer, train_state_file, load_all_state):
- """Load trainer state file.
- """
-
- optimizer = getattr(trainer, 'optimizer',
- None) if load_all_state else None
- lr_scheduler = getattr(trainer, 'lr_scheduler',
- None) if load_all_state else None
- return load_checkpoint(train_state_file, None, optimizer, lr_scheduler)
-
- def load_model_state(self, trainer, model_file, strict):
- """Load model state file.
- """
- return load_checkpoint(model_file,
- trainer.unwrap_module(trainer.model), None,
- None)
-
- @Hook.overload_func(name='LoadCheckpointHook.load_checkpoints')
- def load_checkpoints(self, checkpoint_path_prefix, trainer, load_all_state,
- strict):
- """Load checkpoint files of trainer state and model state.
-
- This is a strategic function which can be registered by other hook's function.
-
- Args:
- checkpoint_path_prefix(str): The checkpoint dir with prefix or a model state file.
- Example: '/tmp/test/epoch_0' or '/tmp/test/epoch_0.pth'
- trainer(`EpochBasedTrainer`): The trainer instance.
- load_all_state(`boolean`): Load all states (else load only module states).
- strict(`boolean`): If strict, any unmatched keys will cause an error.
-
- Returns:
- The meta info in json.
- """
- _model_file, _train_state_file = _get_state_file_name(
- checkpoint_path_prefix)
- meta = {}
- if os.path.isfile(_train_state_file):
- meta = self.load_trainer_state(trainer, _train_state_file,
- load_all_state)
- else:
- print(f'No trainer state file {_train_state_file} found, skip.')
- self.load_model_state(trainer, _model_file, strict)
- return meta
-
-
-def _get_state_file_name(checkpoint_path_prefix):
- """Get the default file name for state files.
-
- If the input is a checkpoint dir with prefix, this function will append suffix for both checkpoint files.
- If the input is an absolute file name, this function will return it as the model file name, and append
- suffix for the trainer file name.
-
- NOTE: a best checkpoint filename with float or int metric value inside
- will not be judged as having a extension file name. like: '/tmp/test/epoch_0_accuracy0.85'
-
- Args:
- checkpoint_path_prefix(`str`): The checkpoint dir with prefix or a model state file with extension file name.
- like: '/tmp/test/epoch_0'
-
- Returns:
- A tuple of model state file name and trainer state file name.
- """
- base, ext = os.path.splitext(checkpoint_path_prefix)
- if len(ext) == 0 or re.match(r'^\d+$', ext[1:]):
- return checkpoint_path_prefix + CheckpointHook.MODEL_STATE_SUFFIX, \
- checkpoint_path_prefix + CheckpointHook.TRAINER_STATE_SUFFIX
- else:
- return checkpoint_path_prefix, base + CheckpointHook.TRAINER_STATE_SUFFIX.split(
- '.')[0] + '.' + ext[1:]
diff --git a/modelscope/trainers/hooks/compression/sparsity_hook.py b/modelscope/trainers/hooks/compression/sparsity_hook.py
index 993488d8..e71c269a 100644
--- a/modelscope/trainers/hooks/compression/sparsity_hook.py
+++ b/modelscope/trainers/hooks/compression/sparsity_hook.py
@@ -1,7 +1,6 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
-from modelscope import __version__
from modelscope.metainfo import Hooks
from modelscope.trainers.hooks.builder import HOOKS
from modelscope.trainers.hooks.hook import Hook
diff --git a/tests/pipelines/easycv_pipelines/__init__.py b/modelscope/trainers/hooks/distributed/__init__.py
similarity index 100%
rename from tests/pipelines/easycv_pipelines/__init__.py
rename to modelscope/trainers/hooks/distributed/__init__.py
diff --git a/modelscope/trainers/hooks/ddp_hook.py b/modelscope/trainers/hooks/distributed/ddp_hook.py
similarity index 89%
rename from modelscope/trainers/hooks/ddp_hook.py
rename to modelscope/trainers/hooks/distributed/ddp_hook.py
index eaae2d89..2bdbe939 100644
--- a/modelscope/trainers/hooks/ddp_hook.py
+++ b/modelscope/trainers/hooks/distributed/ddp_hook.py
@@ -1,11 +1,11 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from modelscope.metainfo import Hooks
+from modelscope.trainers.hooks.builder import HOOKS
+from modelscope.trainers.hooks.hook import Hook
+from modelscope.trainers.hooks.priority import Priority
from modelscope.utils.constant import DistributedParallelType
from modelscope.utils.device import create_device
from modelscope.utils.torch_utils import get_local_rank, init_dist
-from .builder import HOOKS
-from .hook import Hook
-from .priority import Priority
@HOOKS.register_module(module_name=Hooks.DDPHook)
diff --git a/modelscope/trainers/hooks/deepspeed_hook.py b/modelscope/trainers/hooks/distributed/deepspeed_hook.py
similarity index 64%
rename from modelscope/trainers/hooks/deepspeed_hook.py
rename to modelscope/trainers/hooks/distributed/deepspeed_hook.py
index a34b3f6f..7dddc5d9 100644
--- a/modelscope/trainers/hooks/deepspeed_hook.py
+++ b/modelscope/trainers/hooks/distributed/deepspeed_hook.py
@@ -8,72 +8,48 @@ from deepspeed import DeepSpeedEngine
from megatron_util import mpu, print_rank_0
from modelscope.metainfo import Hooks
+from modelscope.trainers.hooks import LoadCheckpointHook
from modelscope.trainers.hooks.builder import HOOKS
+from modelscope.trainers.hooks.checkpoint.checkpoint_hook import (
+ BestCkptSaverHook, CheckpointHook)
from modelscope.trainers.hooks.hook import Hook
from modelscope.trainers.hooks.priority import Priority
from modelscope.utils.checkpoint import save_checkpoint
from modelscope.utils.logger import get_logger
-from .checkpoint_hook import CheckpointHook, LoadCheckpointHook
-from .megatron_hook import MegatronHook
+from ..checkpoint.checkpoint_processor import CheckpointProcessor
+from ..lr_scheduler_hook import LrSchedulerProcessor
+from ..optimizer.base import OptimizerHook, OptimizerProcessor
-@HOOKS.register_module(module_name=Hooks.DeepspeedHook)
-class DeepspeedHook(MegatronHook):
- PRIORITY = Priority.VERY_HIGH
+class DeepspeedProcessor(CheckpointProcessor, LrSchedulerProcessor,
+ OptimizerProcessor):
- def __init__(self,
- deepspeed_activation_checkpointing=True,
- save_zero_checkpoint=False,
- with_mpu=True):
- self.save_zero_checkpoint = save_zero_checkpoint
- self.deepspeed_activation_checkpointing = deepspeed_activation_checkpointing
- # TODO without mpu
- self.with_mpu = with_mpu
- assert with_mpu, 'DeepspeedHook now is only for mpu models.'
+ _BIN_FILE_DIR = 'model'
- def register_strategy(self):
- Hook.overload(name='OptimizerHook.backward', function=self.backward)
- Hook.overload(
- name='OptimizerHook.initialize_optimizer', function=self.idle)
- Hook.overload(name='LrSchedulerHook.step', function=self.idle)
- Hook.overload(
- name='CheckpointHook.save_checkpoints',
- function=self.save_checkpoints)
- Hook.overload(
- name='LoadCheckpointHook.load_checkpoints',
- function=self.load_checkpoints)
- Hook.overload(
- name='CheckpointHook.remove_checkpoints',
- function=self.remove_checkpoints)
- Hook.overload(
- name='CheckpointHook.prepare_output', function=self.prepare_output)
- if self.with_mpu:
- Hook.overload(
- name='CheckpointHook.should_save_on_rank',
- function=self.should_save_on_rank)
+ def rank_name(self):
+ # TODO
+ try:
+ tp_world_size = mpu.get_tensor_model_parallel_world_size()
+ if tp_world_size == 1:
+ return ''
+ mp_rank = mpu.get_tensor_model_parallel_rank()
+ return '_mp_rank_{:02d}'.format(mp_rank)
+ except (ImportError, AssertionError):
+ return ''
- def backward(self, trainer, loss_keys, cumulative_iters, grad_clip):
- # assert cumulative_iters == 1, 'DeepSpeed only support cumulative_iters=1'
- # The `trainer.model` here is actually a deepspeed engine object.
- # backward step
- for k in loss_keys:
- loss = trainer.train_outputs[k]
- trainer.model.backward(loss)
-
- # update parameters
- trainer.model.step()
-
- def idle(self, *args, **kwargs):
- pass
+ def get_bin_file(self):
+ mp_rank = mpu.get_tensor_model_parallel_rank()
+ rank = '{:02d}'.format(mp_rank)
+ return f'mp_rank_{rank}_model_states.pt'
def save_checkpoints(self,
trainer,
checkpoint_path_prefix,
- output_sub_dir,
+ output_dir,
meta=None):
model = trainer.unwrap_module(trainer.model)
_train_state_file = checkpoint_path_prefix + self.rank_name(
- ) + CheckpointHook.TRAINER_STATE_SUFFIX
+ ) + CheckpointProcessor.TRAINER_STATE_SUFFIX
# Save pth file without model state_dict
save_checkpoint(
model, _train_state_file, None, None, meta=meta, with_model=False)
@@ -84,16 +60,22 @@ class DeepspeedHook(MegatronHook):
bin_file = self.get_bin_file()
src_file = os.path.join(checkpoint_path_prefix, bin_file)
- dest_file = os.path.join(save_dir, output_sub_dir, self._BIN_FILE_DIR,
- bin_file)
+ dest_file = os.path.join(output_dir, self._BIN_FILE_DIR, bin_file)
if os.path.isfile(dest_file):
os.unlink(dest_file)
- os.link(src_file, dest_file)
+ try:
+ os.link(src_file, dest_file)
+ except OSError as e:
+ get_logger().error(
+ f'Link {src_file} to {dest_file} error: {e}, '
+ 'changing to copy the bin file, this may case more space usage.'
+ )
+ shutil.copyfile(src_file, dest_file)
def remove_checkpoints(self, trainer, checkpoint_path_prefix):
_train_state_file = checkpoint_path_prefix + self.rank_name(
- ) + CheckpointHook.TRAINER_STATE_SUFFIX
+ ) + CheckpointProcessor.TRAINER_STATE_SUFFIX
if os.path.isfile(_train_state_file):
os.remove(_train_state_file)
@@ -107,10 +89,10 @@ class DeepspeedHook(MegatronHook):
meta = {}
_train_state_file = checkpoint_path_prefix + self.rank_name(
- ) + CheckpointHook.TRAINER_STATE_SUFFIX
+ ) + CheckpointProcessor.TRAINER_STATE_SUFFIX
if os.path.isfile(_train_state_file):
- meta = LoadCheckpointHook.load_trainer_state(
- trainer, _train_state_file, load_all_state)
+ meta = self.load_trainer_state(trainer, _train_state_file,
+ load_all_state)
if isinstance(trainer.model, DeepSpeedEngine):
# DeepSpeedEngine is initialized
@@ -138,6 +120,57 @@ class DeepspeedHook(MegatronHook):
checkpoint, strict=strict)
return meta
+ def backward(self, trainer, loss_keys, cumulative_iters, grad_clip):
+ # assert cumulative_iters == 1, 'DeepSpeed only support cumulative_iters=1'
+ # The `trainer.model` here is actually a deepspeed engine object.
+ # backward step
+ for k in loss_keys:
+ loss = trainer.train_outputs[k]
+ trainer.model.backward(loss)
+
+ # update parameters
+ trainer.model.step()
+
+ def initialize_optimizer(self, trainer):
+ pass
+
+ def step(self, trainer):
+ pass
+
+
+@HOOKS.register_module(module_name=Hooks.DeepspeedHook)
+class DeepspeedHook(Hook):
+ PRIORITY = Priority.VERY_HIGH
+
+ def __init__(self,
+ deepspeed_activation_checkpointing=True,
+ save_zero_checkpoint=False,
+ with_mpu=True):
+ self.save_zero_checkpoint = save_zero_checkpoint
+ self.deepspeed_activation_checkpointing = deepspeed_activation_checkpointing
+ # TODO without mpu
+ self.with_mpu = with_mpu
+ assert with_mpu, 'DeepspeedHook now is only for mpu models.'
+
+ def register_processor(self, trainer):
+ processor = DeepspeedProcessor()
+ optimizer_hook = trainer.get_hook(OptimizerHook)
+ if len(optimizer_hook) > 0 and not isinstance(
+ optimizer_hook[0].processor, DeepspeedProcessor):
+ optimizer_hook[0].set_processor(processor)
+ ckpt_hook = trainer.get_hook(CheckpointHook)
+ if len(ckpt_hook) > 0 and not isinstance(ckpt_hook[0].processor,
+ DeepspeedProcessor):
+ ckpt_hook[0].set_processor(processor)
+ best_ckpt_hook = trainer.get_hook(BestCkptSaverHook)
+ if len(best_ckpt_hook) > 0 and not isinstance(
+ best_ckpt_hook[0].processor, DeepspeedProcessor):
+ best_ckpt_hook[0].set_processor(processor)
+ load_ckpt_hook = trainer.get_hook(LoadCheckpointHook)
+ if len(load_ckpt_hook) > 0 and not isinstance(
+ load_ckpt_hook[0].processor, DeepspeedProcessor):
+ load_ckpt_hook[0].set_processor(processor)
+
def before_val(self, trainer):
pass
diff --git a/modelscope/trainers/hooks/megatron_hook.py b/modelscope/trainers/hooks/distributed/megatron_hook.py
similarity index 70%
rename from modelscope/trainers/hooks/megatron_hook.py
rename to modelscope/trainers/hooks/distributed/megatron_hook.py
index f01288de..c4aeaf19 100644
--- a/modelscope/trainers/hooks/megatron_hook.py
+++ b/modelscope/trainers/hooks/distributed/megatron_hook.py
@@ -1,19 +1,129 @@
import os
-from copy import deepcopy
+import shutil
import torch
from megatron_util import mpu
from modelscope.metainfo import Hooks
+from modelscope.trainers import EpochBasedTrainer
from modelscope.trainers.hooks.builder import HOOKS
+from modelscope.trainers.hooks.checkpoint.checkpoint_hook import (
+ BestCkptSaverHook, CheckpointHook, CheckpointProcessor)
+from modelscope.trainers.hooks.checkpoint.load_checkpoint_hook import \
+ LoadCheckpointHook
from modelscope.trainers.hooks.hook import Hook
-from modelscope.trainers.parallel.builder import build_parallel
from modelscope.utils.checkpoint import load_checkpoint, save_checkpoint
from modelscope.utils.constant import DistributedParallelType
from modelscope.utils.device import create_device
+from modelscope.utils.logger import get_logger
from modelscope.utils.megatron_utils import is_megatron_initialized
from modelscope.utils.torch_utils import get_local_rank
-from .checkpoint_hook import CheckpointHook, LoadCheckpointHook
+
+
+class MpuProcessor(CheckpointProcessor):
+
+ _BIN_FILE_DIR = 'model'
+
+ def rank_name(self):
+ # TODO
+ try:
+ tp_world_size = mpu.get_tensor_model_parallel_world_size()
+ if tp_world_size == 1:
+ return ''
+ mp_rank = mpu.get_tensor_model_parallel_rank()
+ return '_mp_rank_{:02d}'.format(mp_rank)
+ except (ImportError, AssertionError):
+ return ''
+
+ def get_bin_file(self):
+ mp_rank = mpu.get_tensor_model_parallel_rank()
+ rank = '{:02d}'.format(mp_rank)
+ return f'mp_rank_{rank}_model_states.pt'
+
+ def should_save_on_rank(self, trainer):
+ # TODO
+ return (not torch.distributed.is_initialized()
+ ) or mpu.get_data_parallel_rank() == 0
+
+ def prepare_output(self, trainer, output_dir):
+ config = trainer.cfg
+ CheckpointProcessor.copy_files_and_dump_config(trainer, output_dir,
+ config,
+ self._BIN_FILE_DIR)
+ os.makedirs(
+ os.path.join(output_dir, self._BIN_FILE_DIR), exist_ok=True)
+
+ def save_checkpoints(self,
+ trainer,
+ checkpoint_path_prefix,
+ output_dir,
+ meta=None):
+ model = trainer.unwrap_module(trainer.model)
+ _train_state_file = checkpoint_path_prefix + self.rank_name(
+ ) + CheckpointProcessor.TRAINER_STATE_SUFFIX
+ # Save pth file without model state_dict
+ save_checkpoint(
+ model,
+ _train_state_file,
+ trainer.optimizer,
+ trainer.lr_scheduler,
+ meta=meta,
+ with_model=False)
+
+ save_dir = os.path.dirname(checkpoint_path_prefix)
+ prefix = os.path.basename(checkpoint_path_prefix)
+ bin_file = self.get_bin_file()
+ prefix_bin_file = os.path.join(save_dir, prefix + '_' + bin_file)
+ save_checkpoint(model, prefix_bin_file, with_meta=False)
+
+ src_file = prefix_bin_file
+ dest_file = os.path.join(output_dir, self._BIN_FILE_DIR, bin_file)
+ if os.path.isfile(dest_file):
+ os.unlink(dest_file)
+
+ try:
+ os.link(src_file, dest_file)
+ except OSError as e:
+ get_logger().error(
+ f'Link {src_file} to {dest_file} error: {e}, '
+ 'changing to copy the bin file, this may case more space usage.'
+ )
+ shutil.copyfile(src_file, dest_file)
+
+ def remove_checkpoints(self, trainer, checkpoint_path_prefix):
+ _train_state_file = checkpoint_path_prefix + self.rank_name(
+ ) + CheckpointProcessor.TRAINER_STATE_SUFFIX
+ if os.path.isfile(_train_state_file):
+ os.remove(_train_state_file)
+
+ save_dir = os.path.dirname(checkpoint_path_prefix)
+ prefix = os.path.basename(checkpoint_path_prefix)
+ bin_file = self.get_bin_file()
+ absolute_file = os.path.join(save_dir, prefix + '_' + bin_file)
+ if os.path.isfile(absolute_file):
+ os.remove(absolute_file)
+
+ def load_checkpoints(self, checkpoint_path_prefix, trainer, load_all_state,
+ strict):
+ model = trainer.unwrap_module(trainer.model)
+ if os.path.isdir(checkpoint_path_prefix):
+ save_dir = checkpoint_path_prefix
+ bin_file = self.get_bin_file()
+ model_file = os.path.join(save_dir, bin_file)
+ load_checkpoint(model_file, model, None, None)
+ else:
+ _train_state_file = checkpoint_path_prefix + self.rank_name(
+ ) + CheckpointProcessor.TRAINER_STATE_SUFFIX
+ meta = LoadCheckpointHook.load_trainer_state(
+ trainer, _train_state_file, load_all_state)
+
+ save_dir = os.path.dirname(checkpoint_path_prefix)
+ prefix = os.path.basename(checkpoint_path_prefix)
+ bin_file = self.get_bin_file()
+
+ model_file = os.path.join(save_dir, prefix + '_' + bin_file)
+ load_checkpoint(model_file, model, None, None)
+ return meta
@HOOKS.register_module(module_name=Hooks.MegatronHook)
@@ -24,21 +134,20 @@ class MegatronHook(Hook):
def __init__(self):
self.wrapped = False
- def register_strategy(self):
- Hook.overload(
- name='CheckpointHook.should_save_on_rank',
- function=self.should_save_on_rank)
- Hook.overload(
- name='CheckpointHook.save_checkpoints',
- function=self.save_checkpoints)
- Hook.overload(
- name='LoadCheckpointHook.load_checkpoints',
- function=self.load_checkpoints)
- Hook.overload(
- name='CheckpointHook.remove_checkpoints',
- function=self.remove_checkpoints)
- Hook.overload(
- name='CheckpointHook.prepare_output', function=self.prepare_output)
+ def register_processor(self, trainer: EpochBasedTrainer):
+ processor = MpuProcessor()
+ ckpt_hook = trainer.get_hook(CheckpointHook)
+ if len(ckpt_hook) > 0 and not isinstance(ckpt_hook[0].processor,
+ MpuProcessor):
+ ckpt_hook[0].set_processor(processor)
+ best_ckpt_hook = trainer.get_hook(BestCkptSaverHook)
+ if len(best_ckpt_hook) > 0 and not isinstance(
+ best_ckpt_hook[0].processor, MpuProcessor):
+ best_ckpt_hook[0].set_processor(processor)
+ load_ckpt_hook = trainer.get_hook(LoadCheckpointHook)
+ if len(load_ckpt_hook) > 0 and not isinstance(
+ load_ckpt_hook[0].processor, MpuProcessor):
+ load_ckpt_hook[0].set_processor(processor)
def after_init(self, trainer):
assert is_megatron_initialized()
@@ -63,97 +172,3 @@ class MegatronHook(Hook):
if not self.wrapped:
trainer.model = trainer.to_parallel(trainer.model)
self.wrapped = True
-
- def should_save_on_rank(self, trainer):
- # TODO
- return (not torch.distributed.is_initialized()
- ) or mpu.get_data_parallel_rank() == 0
-
- def rank_name(self):
- # TODO
- try:
- tp_world_size = mpu.get_tensor_model_parallel_world_size()
- if tp_world_size == 1:
- return ''
- mp_rank = mpu.get_tensor_model_parallel_rank()
- return '_mp_rank_{:02d}'.format(mp_rank)
- except (ImportError, AssertionError):
- return ''
-
- def get_bin_file(self):
- mp_rank = mpu.get_tensor_model_parallel_rank()
- rank = '{:02d}'.format(mp_rank)
- return f'mp_rank_{rank}_model_states.pt'
-
- def save_checkpoints(self,
- trainer,
- checkpoint_path_prefix,
- output_sub_dir,
- meta=None):
- model = trainer.unwrap_module(trainer.model)
- _train_state_file = checkpoint_path_prefix + self.rank_name(
- ) + CheckpointHook.TRAINER_STATE_SUFFIX
- # Save pth file without model state_dict
- save_checkpoint(
- model,
- _train_state_file,
- trainer.optimizer,
- trainer.lr_scheduler,
- meta=meta,
- with_model=False)
-
- save_dir = os.path.dirname(checkpoint_path_prefix)
- prefix = os.path.basename(checkpoint_path_prefix)
- bin_file = self.get_bin_file()
- prefix_bin_file = os.path.join(save_dir, prefix + '_' + bin_file)
- save_checkpoint(model, prefix_bin_file, with_meta=False)
-
- src_file = prefix_bin_file
- dest_file = os.path.join(save_dir, output_sub_dir, self._BIN_FILE_DIR,
- bin_file)
- if os.path.isfile(dest_file):
- os.unlink(dest_file)
-
- os.link(src_file, dest_file)
-
- def remove_checkpoints(self, trainer, checkpoint_path_prefix):
- _train_state_file = checkpoint_path_prefix + self.rank_name(
- ) + CheckpointHook.TRAINER_STATE_SUFFIX
- if os.path.isfile(_train_state_file):
- os.remove(_train_state_file)
-
- save_dir = os.path.dirname(checkpoint_path_prefix)
- prefix = os.path.basename(checkpoint_path_prefix)
- bin_file = self.get_bin_file()
- absolute_file = os.path.join(save_dir, prefix + '_' + bin_file)
- if os.path.isfile(absolute_file):
- os.remove(absolute_file)
-
- def load_checkpoints(self, checkpoint_path_prefix, trainer, load_all_state,
- strict):
- model = trainer.unwrap_module(trainer.model)
- if os.path.isdir(checkpoint_path_prefix):
- save_dir = checkpoint_path_prefix
- bin_file = self.get_bin_file()
- model_file = os.path.join(save_dir, bin_file)
- load_checkpoint(model_file, model, None, None)
- else:
- _train_state_file = checkpoint_path_prefix + self.rank_name(
- ) + CheckpointHook.TRAINER_STATE_SUFFIX
- meta = LoadCheckpointHook.load_trainer_state(
- trainer, _train_state_file, load_all_state)
-
- save_dir = os.path.dirname(checkpoint_path_prefix)
- prefix = os.path.basename(checkpoint_path_prefix)
- bin_file = self.get_bin_file()
-
- model_file = os.path.join(save_dir, prefix + '_' + bin_file)
- load_checkpoint(model_file, model, None, None)
- return meta
-
- def prepare_output(self, trainer, output_dir):
- config = trainer.cfg
- CheckpointHook.copy_files_and_dump_config(trainer, output_dir, config,
- self._BIN_FILE_DIR)
- os.makedirs(
- os.path.join(output_dir, self._BIN_FILE_DIR), exist_ok=True)
diff --git a/modelscope/trainers/hooks/early_stop_hook.py b/modelscope/trainers/hooks/early_stop_hook.py
index b15e8e5a..7aba69a4 100644
--- a/modelscope/trainers/hooks/early_stop_hook.py
+++ b/modelscope/trainers/hooks/early_stop_hook.py
@@ -9,6 +9,12 @@ from .hook import Hook
from .priority import Priority
+class EarlyStopStrategy:
+ by_epoch = 'by_epoch'
+ by_step = 'by_step'
+ no = 'no'
+
+
@HOOKS.register_module(module_name=Hooks.EarlyStopHook)
class EarlyStopHook(Hook):
"""Early stop when a specific metric stops improving.
@@ -16,14 +22,13 @@ class EarlyStopHook(Hook):
Args:
metric_key (str): Metric key to be monitored.
rule (str): Comparison rule for best score. Support "max" and "min".
- If rule is "max", the training will stop when `metric_key` has stopped increaing.
+ If rule is "max", the training will stop when `metric_key` has stopped increasing.
If rule is "min", the training will stop when `metric_key` has stopped decreasing.
patience (int): Trainer will stop if the monitored metric did not improve for the last `patience` times.
- min_delta (float): Minimum change in the monitored metric to quailfy as an improvement.
+ min_delta (float): Minimum change in the monitored metric to qualify as an improvement.
check_finite (bool): If true, stops training when the metric becomes NaN or infinite.
- by_epoch (int): Saving checkpoints by epoch or by iteration.
- interval (int): The frequency to trigger early stop check. If `by_epoch=True`,
- it means the number of epochs, else means the number of iterations.
+ early_stop_strategy (str): The strategy to early stop, can be by_epoch/by_step/none
+ interval (int): The frequency to trigger early stop check, by epoch or step.
"""
PRIORITY = Priority.VERY_LOW
@@ -35,14 +40,19 @@ class EarlyStopHook(Hook):
patience: int = 3,
min_delta: float = 0.0,
check_finite: bool = True,
- by_epoch: bool = True,
- interval: int = 1):
+ early_stop_strategy: str = EarlyStopStrategy.by_epoch,
+ interval: int = 1,
+ **kwargs):
self.metric_key = metric_key
self.rule = rule
self.patience = patience
self.min_delta = min_delta
self.check_finite = check_finite
- self.by_epoch = by_epoch
+ if 'by_epoch' in kwargs:
+ self.early_stop_strategy = EarlyStopStrategy.by_epoch if kwargs[
+ 'by_epoch'] else EarlyStopStrategy.by_step
+ else:
+ self.early_stop_strategy = early_stop_strategy
self.interval = interval
self.wait_count = 0
@@ -89,7 +99,7 @@ class EarlyStopHook(Hook):
trainer._stop_training = True
def after_train_epoch(self, trainer):
- if not self.by_epoch:
+ if self.early_stop_strategy != EarlyStopStrategy.by_epoch:
return
if not self.every_n_epochs(trainer, self.interval):
@@ -99,7 +109,7 @@ class EarlyStopHook(Hook):
self._stop_training(trainer)
def after_train_iter(self, trainer):
- if self.by_epoch:
+ if self.early_stop_strategy != EarlyStopStrategy.by_step:
return
if not self.every_n_iters(trainer, self.interval):
diff --git a/modelscope/trainers/hooks/evaluation_hook.py b/modelscope/trainers/hooks/evaluation_hook.py
index 80c8c31a..c29a6d6a 100644
--- a/modelscope/trainers/hooks/evaluation_hook.py
+++ b/modelscope/trainers/hooks/evaluation_hook.py
@@ -1,11 +1,18 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from collections import OrderedDict
+from typing import Optional
from modelscope.metainfo import Hooks
from .builder import HOOKS
from .hook import Hook
+class EvaluationStrategy:
+ by_epoch = 'by_epoch'
+ by_step = 'by_step'
+ no = 'no'
+
+
@HOOKS.register_module(module_name=Hooks.EvaluationHook)
class EvaluationHook(Hook):
"""
@@ -18,21 +25,34 @@ class EvaluationHook(Hook):
Default: None, validate every interval epochs/iterations from scratch.
"""
- def __init__(self, interval=1, by_epoch=True, start_idx=None):
+ def __init__(self,
+ interval: Optional[int] = 1,
+ eval_strategy: Optional[str] = EvaluationStrategy.by_epoch,
+ start_idx: Optional[int] = None,
+ **kwargs):
assert interval > 0, 'interval must be a positive number'
self.interval = interval
self.start_idx = start_idx
- self.by_epoch = by_epoch
+ self.last_eval_tag = (None, None)
+ if 'by_epoch' in kwargs:
+ self.eval_strategy = EvaluationStrategy.by_epoch if kwargs[
+ 'by_epoch'] else EvaluationStrategy.by_step
+ else:
+ self.eval_strategy = eval_strategy
def after_train_iter(self, trainer):
"""Called after every training iter to evaluate the results."""
- if not self.by_epoch and self._should_evaluate(trainer):
+ if self.eval_strategy == EvaluationStrategy.by_step and self._should_evaluate(
+ trainer):
self.do_evaluate(trainer)
+ self.last_eval_tag = ('iter', trainer.iter)
def after_train_epoch(self, trainer):
"""Called after every training epoch to evaluate the results."""
- if self.by_epoch and self._should_evaluate(trainer):
+ if self.eval_strategy == EvaluationStrategy.by_epoch and self._should_evaluate(
+ trainer):
self.do_evaluate(trainer)
+ self.last_eval_tag = ('epoch', trainer.epoch)
def add_visualization_info(self, trainer, results):
if trainer.visualization_buffer.output.get('eval_results',
@@ -64,7 +84,7 @@ class EvaluationHook(Hook):
Returns:
bool: The flag indicating whether to perform evaluation.
"""
- if self.by_epoch:
+ if self.eval_strategy == EvaluationStrategy.by_epoch:
current = trainer.epoch
check_time = self.every_n_epochs
else:
diff --git a/modelscope/trainers/hooks/hook.py b/modelscope/trainers/hooks/hook.py
index 70e06fbd..93ea8541 100644
--- a/modelscope/trainers/hooks/hook.py
+++ b/modelscope/trainers/hooks/hook.py
@@ -22,9 +22,6 @@ class Hook:
PRIORITY = Priority.NORMAL
- # The strategic function dict.
- _strategies = dict()
-
def after_init(self, trainer):
"""
Will be called at the end of the trainer's `__init__` method
@@ -201,42 +198,48 @@ class Hook:
"""
self.after_iter(trainer)
- def every_n_epochs(self, trainer, n):
+ @staticmethod
+ def every_n_epochs(trainer, n):
"""
Whether to reach every ``n`` epochs
Returns: bool
"""
return (trainer.epoch + 1) % n == 0 if n > 0 else False
- def every_n_inner_iters(self, runner, n):
+ @staticmethod
+ def every_n_inner_iters(runner, n):
"""
Whether to reach every ``n`` iterations at every epoch
Returns: bool
"""
return (runner.inner_iter + 1) % n == 0 if n > 0 else False
- def every_n_iters(self, trainer, n):
+ @staticmethod
+ def every_n_iters(trainer, n):
"""
Whether to reach every ``n`` iterations
Returns: bool
"""
return (trainer.iter + 1) % n == 0 if n > 0 else False
- def end_of_epoch(self, trainer):
+ @staticmethod
+ def end_of_epoch(trainer):
"""
Whether to reach the end of every epoch
Returns: bool
"""
return trainer.inner_iter + 1 == trainer.iters_per_epoch
- def is_last_epoch(self, trainer):
+ @staticmethod
+ def is_last_epoch(trainer):
"""
Whether to reach the last epoch
Returns: bool
"""
return trainer.epoch + 1 == trainer.max_epochs
- def is_last_iter(self, trainer):
+ @staticmethod
+ def is_last_iter(trainer):
"""
Whether to reach the last iteration in the entire training process
Returns: bool
@@ -256,54 +259,3 @@ class Hook:
def load_state_dict(self, state_dict):
pass
-
- @staticmethod
- def clear_strategies():
- Hook._strategies.clear()
-
- @staticmethod
- def overload(function, name=None):
- """Register a function to a strategic function.
-
- Args:
- function(`method` or `Callable`): The function instance.
- name(`str`): The name of the strategic function, which specifies by the method `consume`
- """
-
- _name = name or function.__name__
- if _name not in Hook._strategies:
- Hook._strategies[_name] = []
-
- Hook._strategies[_name].append(function)
-
- @staticmethod
- def overload_func(name=None):
- """Declare a function as a strategic function, which can be replaced by some other functions.
-
- This function should be used in annotations.
-
- Args:
- name(str): The strategic function name.
- """
-
- def _register(function):
-
- @wraps(function)
- def _call(*args, **kwargs):
- _name = name or function.__name__
- producers = Hook._strategies.get(_name, [])
-
- if len(producers) == 0:
- return function(*args, **kwargs)
- else:
- if len(producers) > 1:
- raise ValueError(
- f'Multiple functions registered to {_name}, '
- f'here is the list: {producers}')
- if isinstance(args[0], Hook):
- args = args[1:]
- return producers[0](*args, **kwargs)
-
- return _call
-
- return _register
diff --git a/modelscope/trainers/hooks/lr_scheduler_hook.py b/modelscope/trainers/hooks/lr_scheduler_hook.py
index 28ce250c..51a8e858 100644
--- a/modelscope/trainers/hooks/lr_scheduler_hook.py
+++ b/modelscope/trainers/hooks/lr_scheduler_hook.py
@@ -1,4 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
+
from modelscope.metainfo import Hooks
from modelscope.trainers.lrscheduler.builder import build_lr_scheduler
from modelscope.utils.constant import LogKeys
@@ -9,6 +10,42 @@ from .hook import Hook
from .priority import Priority
+class LrSchedulerProcessor:
+
+ def __init__(self):
+ self.lr_strategy = None
+ self.warmup_lr_scheduler = None
+
+ def set_lr_strategy(self, lr_strategy):
+ self.lr_strategy = lr_strategy
+
+ def set_warmup_lr_scheduler(self, warmup_lr_scheduler):
+ self.warmup_lr_scheduler = warmup_lr_scheduler
+
+ def initialize_lr_scheduler(self, trainer):
+ """Initialize the lr scheduler.
+
+ This is a strategic function which can be registered by other hook's function.
+ """
+ pass
+
+ def step(self, trainer):
+ """Do lr scheduler's step.
+
+ This is a strategic function which can be registered by other hook's function.
+ """
+ if self.warmup_lr_scheduler is not None:
+ self.warmup_lr_scheduler.step()
+ else:
+ trainer.lr_scheduler.step()
+
+
+class LrStrategy:
+ by_epoch = 'by_epoch'
+ by_step = 'by_step'
+ no = 'no'
+
+
@HOOKS.register_module(module_name=Hooks.LrSchedulerHook)
class LrSchedulerHook(Hook):
"""Lr scheduler.
@@ -19,38 +56,33 @@ class LrSchedulerHook(Hook):
"""
PRIORITY = Priority.LOW
- def __init__(self, by_epoch=True, warmup=None, **kwargs) -> None:
+ def __init__(self,
+ lr_strategy=LrStrategy.by_epoch,
+ warmup=None,
+ **kwargs) -> None:
super().__init__()
- self.by_epoch = by_epoch
+ if 'by_epoch' in kwargs:
+ self.lr_strategy = LrStrategy.by_epoch if kwargs[
+ 'by_epoch'] else LrStrategy.by_step
+ else:
+ self.lr_strategy = lr_strategy
self.warmup = warmup
self.warmup_lr_scheduler = None
+ self.processor = LrSchedulerProcessor()
+
+ def set_processor(self, processor):
+ self.processor = processor
def before_run(self, trainer):
- self.initialize_lr_scheduler(trainer)
+ self.processor.set_lr_strategy(self.lr_strategy)
if self.warmup is not None:
assert isinstance(self.warmup, dict) and 'type' in self.warmup
self.warmup_lr_scheduler = build_lr_scheduler(
cfg=self.warmup,
default_args={'base_scheduler': trainer.lr_scheduler})
+ self.processor.set_warmup_lr_scheduler(self.warmup_lr_scheduler)
- @Hook.overload_func(name='LrSchedulerHook.initialize_lr_scheduler')
- def initialize_lr_scheduler(self, trainer):
- """Initialize the lr scheduler.
-
- This is a strategic function which can be registered by other hook's function.
- """
- pass
-
- @Hook.overload_func(name='LrSchedulerHook.step')
- def step(self, trainer):
- """Do lr scheduler's step.
-
- This is a strategic function which can be registered by other hook's function.
- """
- if self.warmup_lr_scheduler is not None:
- self.warmup_lr_scheduler.step()
- else:
- trainer.lr_scheduler.step()
+ self.processor.initialize_lr_scheduler(trainer)
def get_current_lr(self, trainer):
import torch
@@ -67,17 +99,17 @@ class LrSchedulerHook(Hook):
return lr
def after_train_iter(self, trainer):
- if not self.by_epoch and trainer.iter >= getattr(
+ if self.lr_strategy == LrStrategy.by_step and trainer.iter >= getattr(
trainer, 'cumulative_iters', 1) - 1:
- self.step(trainer)
+ self.processor.step(trainer)
trainer.log_buffer.output[LogKeys.LR] = self._get_log_lr(trainer)
def before_train_epoch(self, trainer):
trainer.log_buffer.output[LogKeys.LR] = self._get_log_lr(trainer)
def after_train_epoch(self, trainer):
- if self.by_epoch:
- self.step(trainer)
+ if self.lr_strategy == LrStrategy.by_epoch:
+ self.processor.step(trainer)
def _get_log_lr(self, trainer):
cur_lr = self.get_current_lr(trainer)
@@ -94,6 +126,29 @@ class LrSchedulerHook(Hook):
return lr
+class PlateauLrSchedulerProcessor(LrSchedulerProcessor):
+
+ def __init__(self, metric_key):
+ super().__init__()
+ self.metric_key = metric_key
+
+ def step(self, trainer):
+ # adapt to evaluation interval is greater than 1
+ if trainer.metric_values is None:
+ if is_master():
+ print(
+ f'Current epoch {trainer.epoch} has no evaluation metric values, skip lr_scheduler.step() !'
+ )
+ return
+
+ metrics = trainer.metric_values[self.metric_key]
+ if self.lr_strategy == LrStrategy.by_epoch:
+ if self.warmup_lr_scheduler is not None:
+ self.warmup_lr_scheduler.step(metrics=metrics)
+ else:
+ trainer.lr_scheduler.step(metrics=metrics)
+
+
@HOOKS.register_module(module_name=Hooks.PlateauLrSchedulerHook)
class PlateauLrSchedulerHook(Hook):
"""Lr scheduler hook for `ReduceLROnPlateau`.
@@ -105,10 +160,16 @@ class PlateauLrSchedulerHook(Hook):
PRIORITY = Priority.LOW # should be after EvaluationHook
def __init__(self, metric_key, **kwargs):
+ super().__init__()
self.metric_key = metric_key
- def register_strategy(self):
- Hook.overload(name='LrSchedulerHook.step', function=self.step)
+ def register_processor(self, trainer):
+ lr_scheduler_hook = trainer.get_hook(LrSchedulerHook)
+ if len(lr_scheduler_hook) > 0 and type(
+ lr_scheduler_hook[0].processor) in (type(None),
+ LrSchedulerProcessor):
+ lr_scheduler_hook[0].set_processor(
+ PlateauLrSchedulerProcessor(self.metric_key))
def before_run(self, trainer):
if not hasattr(trainer, 'logger'):
@@ -116,23 +177,6 @@ class PlateauLrSchedulerHook(Hook):
else:
self.logger = trainer.logger
- def step(self, trainer):
- # adapt to evaluation intervel is greater than 1
- if trainer.metric_values is None:
- if is_master():
- self.logger.warning(
- f'Current epoch {trainer.epoch} has no evaluation metric values, skip lr_scheduler.step() !'
- )
- return
-
- metrics = trainer.metric_values[self.metric_key]
- lr_scheduler_hook = trainer.get_hook(LrSchedulerHook)[0]
- if lr_scheduler_hook.by_epoch:
- if lr_scheduler_hook.warmup_lr_scheduler is not None:
- lr_scheduler_hook.warmup_lr_scheduler.step(metrics=metrics)
- else:
- trainer.lr_scheduler.step(metrics=metrics)
-
@HOOKS.register_module(module_name=Hooks.NoneLrSchedulerHook)
class NoneLrSchedulerHook(LrSchedulerHook):
diff --git a/modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py b/modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py
index bd1034f3..3c874ccf 100644
--- a/modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py
+++ b/modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py
@@ -7,40 +7,14 @@ from packaging import version
from modelscope.metainfo import Hooks
from modelscope.trainers.hooks import Hook
from modelscope.trainers.hooks.builder import HOOKS
-from .base import OptimizerHook
+from .base import OptimizerHook, OptimizerProcessor
-@HOOKS.register_module(module_name=Hooks.ApexAMPOptimizerHook)
-class ApexAMPOptimizerHook(Hook):
- """
- Fp16 optimizer, if torch version is less than 1.6.0,
- you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default
+class ApexOptimizerProcessor(OptimizerProcessor):
- Args:
- opt_level (str): "O0" and "O3" are not true mixed precision,
- but they are useful for establishing accuracy and speed baselines, respectively.
- "O1" and "O2" are different implementations of mixed precision.
- Try both, and see what gives the best speedup and accuracy for your model.
- """
-
- PRIORITY = OptimizerHook.PRIORITY
-
- def __init__(self, opt_level='O1', **kwargs):
+ def __init__(self, opt_level):
self.opt_level = opt_level
- try:
- from apex import amp
- except ImportError:
- raise ValueError(
- 'apex not installed, please install apex from https://www.github.com/nvidia/apex.'
- )
-
- def register_strategy(self):
- Hook.overload(
- name='OptimizerHook.initialize_optimizer',
- function=self.initialize_optimizer)
- Hook.overload(name='OptimizerHook.backward', function=self.backward)
-
def initialize_optimizer(self, trainer):
from apex import amp
@@ -68,10 +42,44 @@ class ApexAMPOptimizerHook(Hook):
trainer.optimizer) as scaled_loss:
scaled_loss.backward()
- if self.every_n_iters(trainer, cumulative_iters):
+ if Hook.every_n_iters(trainer, cumulative_iters):
if grad_clip is not None:
- OptimizerHook.clip_grads(trainer.model.parameters(),
- **grad_clip)
+ OptimizerProcessor.clip_grads(trainer.model.parameters(),
+ **grad_clip)
trainer.optimizer.step()
trainer.optimizer.zero_grad()
+
+
+@HOOKS.register_module(module_name=Hooks.ApexAMPOptimizerHook)
+class ApexAMPOptimizerHook(Hook):
+ """
+ Fp16 optimizer, if torch version is less than 1.6.0,
+ you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default
+
+ Args:
+ opt_level (str): "O0" and "O3" are not true mixed precision,
+ but they are useful for establishing accuracy and speed baselines, respectively.
+ "O1" and "O2" are different implementations of mixed precision.
+ Try both, and see what gives the best speedup and accuracy for your model.
+ """
+
+ PRIORITY = OptimizerHook.PRIORITY
+
+ def __init__(self, opt_level='O1', **kwargs):
+ self.opt_level = opt_level
+
+ try:
+ from apex import amp
+ except ImportError:
+ raise ValueError(
+ 'apex not installed, please install apex from https://www.github.com/nvidia/apex.'
+ )
+
+ def register_processor(self, trainer):
+ optimizer_hook = trainer.get_hook(OptimizerHook)
+ if len(optimizer_hook) > 0 and type(
+ optimizer_hook[0].processor) in (type(None),
+ OptimizerProcessor):
+ optimizer_hook[0].set_processor(
+ ApexOptimizerProcessor(self.opt_level))
diff --git a/modelscope/trainers/hooks/optimizer/base.py b/modelscope/trainers/hooks/optimizer/base.py
index f0d62612..ca20720d 100644
--- a/modelscope/trainers/hooks/optimizer/base.py
+++ b/modelscope/trainers/hooks/optimizer/base.py
@@ -10,6 +10,48 @@ from modelscope.trainers.hooks.hook import Hook
from modelscope.trainers.hooks.priority import Priority
+class OptimizerProcessor:
+
+ def initialize_optimizer(self, trainer):
+ """Initialize the optimizer.
+
+ This is a strategic function which can be registered by other hook's function.
+ """
+ trainer.optimizer.zero_grad()
+
+ def before_forward(self, trainer):
+ pass
+
+ def backward(self, trainer, loss_keys, cumulative_iters, grad_clip):
+ """Do module backward, optimizer's step and zero_grad and clip the grads.
+
+ This is a strategic function which can be registered by other hook's function.
+
+ Args:
+ trainer(`EpochBasedTrainer`): The trainer instance.
+ loss_keys(`list`): The list of loss keys.
+ cumulative_iters(`int`): The cumulative iters for gradients.
+ grad_clip(`dict`): The grad clipping options.
+ """
+ for k in loss_keys:
+ trainer.train_outputs[k] /= cumulative_iters
+ trainer.train_outputs[k].backward()
+
+ if Hook.every_n_iters(trainer, cumulative_iters):
+ if grad_clip is not None:
+ self.clip_grads(trainer.model.parameters(), **grad_clip)
+
+ trainer.optimizer.step()
+ trainer.optimizer.zero_grad()
+
+ @staticmethod
+ def clip_grads(params, **clip_args):
+ params = list(
+ filter(lambda p: p.requires_grad and p.grad is not None, params))
+ if len(params) > 0:
+ return clip_grad.clip_grad_norm_(params, **clip_args)
+
+
@HOOKS.register_module(module_name=Hooks.OptimizerHook)
class OptimizerHook(Hook):
"""Optimizer hook
@@ -36,52 +78,21 @@ class OptimizerHook(Hook):
self.loss_keys = loss_keys
self.cumulative_iters = cumulative_iters
self.grad_clip = grad_clip
+ self.processor = OptimizerProcessor()
- @staticmethod
- def clip_grads(params, **clip_args):
- params = list(
- filter(lambda p: p.requires_grad and p.grad is not None, params))
- if len(params) > 0:
- return clip_grad.clip_grad_norm_(params, **clip_args)
-
- @Hook.overload_func(name='OptimizerHook.initialize_optimizer')
- def initialize_optimizer(self, trainer):
- """Initialize the optimizer.
-
- This is a strategic function which can be registered by other hook's function.
- """
- trainer.optimizer.zero_grad()
+ def set_processor(self, processor):
+ self.processor = processor
def before_run(self, trainer):
- self.initialize_optimizer(trainer)
trainer.cumulative_iters = self.cumulative_iters
+ self.processor.initialize_optimizer(trainer)
- @Hook.overload_func(name='OptimizerHook.backward')
- def backward(self, trainer, loss_keys, cumulative_iters, grad_clip):
- """Do module backward, optimizer's step and zero_grad and clip the grads.
-
- This is a strategic function which can be registered by other hook's function.
-
- Args:
- trainer(`EpochBasedTrainer`): The trainer instance.
- loss_keys(`list`): The list of loss keys.
- cumulative_iters(`int`): The cumulative iters for gradients.
- grad_clip(`dict`): The grad clipping options.
- """
- for k in loss_keys:
- trainer.train_outputs[k] /= cumulative_iters
- trainer.train_outputs[k].backward()
-
- if self.every_n_iters(trainer, cumulative_iters):
- if grad_clip is not None:
- self.clip_grads(trainer.model.parameters(), **grad_clip)
-
- trainer.optimizer.step()
- trainer.optimizer.zero_grad()
+ def before_train_iter(self, trainer):
+ self.processor.before_forward(trainer)
def after_train_iter(self, trainer):
- self.backward(trainer, self.loss_keys, self.cumulative_iters,
- self.grad_clip)
+ self.processor.backward(trainer, self.loss_keys, self.cumulative_iters,
+ self.grad_clip)
@HOOKS.register_module(module_name=Hooks.NoneOptimizerHook)
diff --git a/modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py b/modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py
index 1ab89720..fc7d2672 100644
--- a/modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py
+++ b/modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py
@@ -4,7 +4,45 @@ import logging
from modelscope.metainfo import Hooks
from modelscope.trainers.hooks import Hook
from modelscope.trainers.hooks.builder import HOOKS
-from .base import OptimizerHook
+from .base import OptimizerHook, OptimizerProcessor
+
+
+class TorchAMPOptimizerProcessor(OptimizerProcessor):
+
+ def __init__(self, scaler, scale_update_param):
+ self.scaler = scaler
+ self.scale_update_param = scale_update_param
+
+ def before_forward(self, trainer):
+ from torch.cuda import amp
+ setattr(self._model, 'forward', amp.autocast()(self._model.forward))
+
+ def initialize_optimizer(self, trainer):
+ logging.info('open fp16')
+ trainer.optimizer.zero_grad()
+
+ model = trainer.unwrap_module(trainer.model)
+ self._ori_model_forward = model.forward
+ self._model = model
+
+ def backward(self, trainer, loss_keys, cumulative_iters, grad_clip):
+ for k in loss_keys:
+ trainer.train_outputs[k] /= cumulative_iters
+
+ for k in loss_keys:
+ self.scaler.scale(trainer.train_outputs[k]).backward()
+
+ if Hook.every_n_iters(trainer, cumulative_iters):
+ self.scaler.unscale_(trainer.optimizer)
+ if grad_clip is not None:
+ OptimizerProcessor.clip_grads(trainer.model.parameters(),
+ **grad_clip)
+
+ self.scaler.step(trainer.optimizer)
+ self.scaler.update(self.scale_update_param)
+ trainer.optimizer.zero_grad()
+
+ setattr(self._model, 'forward', self._ori_model_forward)
@HOOKS.register_module(module_name=Hooks.TorchAMPOptimizerHook)
@@ -44,39 +82,11 @@ class TorchAMPOptimizerHook(Hook):
'`loss_scale` type must be in [float, dict], but got {loss_scale}'
)
- def register_strategy(self):
- Hook.overload(
- name='OptimizerHook.initialize_optimizer',
- function=self.initialize_optimizer)
- Hook.overload(name='OptimizerHook.backward', function=self.backward)
-
- def initialize_optimizer(self, trainer):
- logging.info('open fp16')
- trainer.optimizer.zero_grad()
-
- model = trainer.unwrap_module(trainer.model)
- self._ori_model_forward = model.forward
- self._model = model
-
- def before_train_iter(self, trainer):
- from torch.cuda import amp
- setattr(self._model, 'forward', amp.autocast()(self._model.forward))
-
- def backward(self, trainer, loss_keys, cumulative_iters, grad_clip):
- for k in loss_keys:
- trainer.train_outputs[k] /= cumulative_iters
-
- for k in loss_keys:
- self.scaler.scale(trainer.train_outputs[k]).backward()
-
- if self.every_n_iters(trainer, cumulative_iters):
- self.scaler.unscale_(trainer.optimizer)
- if grad_clip is not None:
- OptimizerHook.clip_grads(trainer.model.parameters(),
- **grad_clip)
-
- self.scaler.step(trainer.optimizer)
- self.scaler.update(self._scale_update_param)
- trainer.optimizer.zero_grad()
-
- setattr(self._model, 'forward', self._ori_model_forward)
+ def register_processor(self, trainer):
+ optimizer_hook = trainer.get_hook(OptimizerHook)
+ if len(optimizer_hook) > 0 and type(
+ optimizer_hook[0].processor) in (type(None),
+ OptimizerProcessor):
+ optimizer_hook[0].set_processor(
+ TorchAMPOptimizerProcessor(self.scaler,
+ self._scale_update_param))
diff --git a/modelscope/trainers/multi_modal/clip/clip_trainer.py b/modelscope/trainers/multi_modal/clip/clip_trainer.py
index b0415bc2..ae00232f 100644
--- a/modelscope/trainers/multi_modal/clip/clip_trainer.py
+++ b/modelscope/trainers/multi_modal/clip/clip_trainer.py
@@ -176,11 +176,10 @@ class CLIPTrainer(EpochBasedTrainer):
self.dataset_cfg = cfg.dataset
if hasattr(self.dataset_cfg, 'column_map'):
# cases where dataset key names are not "img" and "text"
- img_key_name = getattr(self.dataset_cfg.column_map, 'img', 'img')
+ img_key_name = self.dataset_cfg['column_map'].get('img', 'img')
preprocessor[ConfigKeys.train].set_input_img_key(img_key_name)
preprocessor[ConfigKeys.val].set_input_img_key(img_key_name)
- text_key_name = getattr(self.dataset_cfg.column_map, 'text',
- 'text')
+ text_key_name = self.dataset_cfg['column_map'].get('text', 'text')
preprocessor[ConfigKeys.train].set_input_text_key(text_key_name)
preprocessor[ConfigKeys.val].set_input_text_key(text_key_name)
self.global_batch_size = cfg.train.dataloader.batch_size_per_gpu * world_size
diff --git a/modelscope/trainers/nlp/__init__.py b/modelscope/trainers/nlp/__init__.py
index 755e5387..ae102efa 100644
--- a/modelscope/trainers/nlp/__init__.py
+++ b/modelscope/trainers/nlp/__init__.py
@@ -10,6 +10,7 @@ if TYPE_CHECKING:
from .text_generation_trainer import TextGenerationTrainer
from .sentence_embedding_trainer import SentenceEmbeddingTrainer
from .siamese_uie_trainer import SiameseUIETrainer
+ from .translation_evaluation_trainer import TranslationEvaluationTrainer
else:
_import_structure = {
'sequence_classification_trainer': ['SequenceClassificationTrainer'],
@@ -17,7 +18,8 @@ else:
'text_ranking_trainer': ['TextRankingTrainer'],
'text_generation_trainer': ['TextGenerationTrainer'],
'sentence_emebedding_trainer': ['SentenceEmbeddingTrainer'],
- 'siamese_uie_trainer': ['SiameseUIETrainer']
+ 'siamese_uie_trainer': ['SiameseUIETrainer'],
+ 'translation_evaluation_trainer': ['TranslationEvaluationTrainer']
}
import sys
diff --git a/modelscope/trainers/nlp/translation_evaluation_trainer.py b/modelscope/trainers/nlp/translation_evaluation_trainer.py
new file mode 100644
index 00000000..05e9db89
--- /dev/null
+++ b/modelscope/trainers/nlp/translation_evaluation_trainer.py
@@ -0,0 +1,396 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+"""PyTorch trainer for UniTE model."""
+
+import os.path as osp
+import random
+from math import ceil
+from os import mkdir
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+import torch
+from pandas import DataFrame
+from torch.nn.functional import pad
+from torch.nn.utils import clip_grad_norm_
+from torch.optim import AdamW, Optimizer
+from torch.utils.data import (BatchSampler, DataLoader, Dataset, Sampler,
+ SequentialSampler, SubsetRandomSampler)
+from torch.utils.tensorboard import SummaryWriter
+from tqdm import tqdm
+from transformers import AutoTokenizer
+
+from modelscope.metainfo import Metrics, Trainers
+from modelscope.metrics import Metric
+from modelscope.metrics.builder import MetricKeys, build_metric
+from modelscope.models.base import TorchModel
+from modelscope.models.nlp.unite.configuration import InputFormat
+from modelscope.models.nlp.unite.translation_evaluation import (
+ UniTEForTranslationEvaluation, combine_input_sentences)
+from modelscope.msdatasets import MsDataset
+from modelscope.preprocessors import Preprocessor
+from modelscope.trainers.builder import TRAINERS
+from modelscope.trainers.hooks import Hook
+from modelscope.trainers.trainer import EpochBasedTrainer
+from modelscope.utils.config import ConfigDict
+from modelscope.utils.constant import (ConfigKeys, Fields, ModeKeys, ModelFile,
+ TrainerStages)
+from modelscope.utils.device import create_device
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+class TranslationEvaluationTrainingSampler(Sampler):
+
+ def __init__(self, num_of_samples: int,
+ batch_size_for_each_input_format: int):
+ r"""Build a sampler for model training with translation evaluation trainer.
+ The trainer should derive samples for each subset of the entire dataset.
+
+ Args:
+ num_of_samples: The number of samples in total.
+ batch_size_for_each_input_format: During training, the batch size for each input format
+
+ Returns:
+ A data sampler for translation evaluation model training.
+
+ """
+
+ self.num_of_samples = num_of_samples
+ self.batch_size_for_each_input_format = batch_size_for_each_input_format
+
+ self.num_of_samples_for_each_input_format = self.num_of_samples // 3
+ num_of_samples_to_use = self.num_of_samples_for_each_input_format * 3
+
+ logger.info(
+ '%d samples are given for training. '
+ 'Using %d samples for each input format. '
+ 'Leaving the last %d samples unused.' %
+ (self.num_of_samples, self.num_of_samples_for_each_input_format,
+ self.num_of_samples - num_of_samples_to_use))
+ self.num_of_samples = num_of_samples_to_use
+
+ random_permutations = torch.randperm(
+ self.num_of_samples).cpu().tolist()
+
+ self.subset_iterators = dict()
+ self.subset_samplers = dict()
+ self.indices_for_each_input_format = dict()
+ for input_format_index, input_format in \
+ enumerate((InputFormat.SRC_REF, InputFormat.SRC, InputFormat.REF)):
+ start_idx = input_format_index * self.num_of_samples_for_each_input_format
+ end_idx = start_idx + self.num_of_samples_for_each_input_format
+ self.indices_for_each_input_format[
+ input_format] = random_permutations[start_idx:end_idx]
+ self.subset_samplers[input_format] = \
+ BatchSampler(SubsetRandomSampler(self.indices_for_each_input_format[input_format]),
+ batch_size=self.batch_size_for_each_input_format,
+ drop_last=True)
+ self.subset_iterators[input_format] = iter(
+ self.subset_samplers[input_format])
+
+ self.num_of_sampled_batches = 0
+
+ if self.__len__() == 0:
+ raise ValueError(
+ 'The dataset doesn\'t contain enough examples to form a single batch.',
+ 'Please reduce the batch_size or use more examples for training.'
+ )
+
+ return
+
+ def __iter__(self):
+ while True:
+ try:
+ if self.num_of_sampled_batches == self.__len__():
+ for input_format in (InputFormat.SRC_REF, InputFormat.SRC,
+ InputFormat.REF):
+ while True:
+ try:
+ next(self.subset_iterators[input_format])
+ except StopIteration:
+ self.subset_iterators[input_format] = \
+ iter(self.subset_samplers[input_format])
+ break
+
+ self.num_of_sampled_batches = 0
+
+ output = list()
+ for input_format_idx, input_format in \
+ enumerate((InputFormat.SRC_REF, InputFormat.SRC, InputFormat.REF)):
+ output += next(self.subset_iterators[input_format])
+
+ self.num_of_sampled_batches += 1
+
+ yield output
+ except StopIteration:
+ break
+
+ def __len__(self) -> int:
+ return self.num_of_samples_for_each_input_format // self.batch_size_for_each_input_format
+
+
+def convert_csv_dict_to_input(
+ batch: List[Dict[str, Any]],
+ preprocessor: Preprocessor) -> Tuple[List[torch.Tensor]]:
+
+ input_dict = dict()
+
+ for key in batch[0].keys():
+ input_dict[key] = list(x[key] for x in batch)
+
+ input_dict = preprocessor(input_dict)
+
+ return input_dict
+
+
+def data_collate_fn(batch: List[Dict[str, Any]], batch_size: int,
+ preprocessor: Preprocessor) -> List[Dict[str, Any]]:
+
+ output_dict = dict()
+ output_dict['input_format'] = list()
+
+ if preprocessor.mode == ModeKeys.TRAIN:
+ for input_format_index, input_format in \
+ enumerate((InputFormat.SRC_REF, InputFormat.SRC, InputFormat.REF)):
+ start_idx = input_format_index * batch_size
+ end_idx = start_idx + batch_size
+ batch_to_process = batch[start_idx:end_idx]
+ output_dict['input_format'] += [input_format] * batch_size
+ preprocessor.change_input_format(input_format)
+ batch_to_process = convert_csv_dict_to_input(
+ batch_to_process, preprocessor)
+
+ for key, value in batch_to_process.items():
+ if key not in output_dict.keys():
+ output_dict[key] = list()
+ output_dict[key].append(value)
+ elif preprocessor.mode == ModeKeys.EVAL:
+ output_dict['input_format'] += [preprocessor.input_format] * len(batch)
+ batch = convert_csv_dict_to_input(batch, preprocessor)
+
+ for key, value in batch.items():
+ if key not in output_dict.keys():
+ output_dict[key] = list()
+ output_dict[key].append(value)
+ else:
+ raise ValueError(
+ 'During training, %s mode is not allowed for preprocessor.'
+ % preprocessor.mode)
+
+ input_max_lengths = max(x.size(-1) for x in output_dict['input_ids'])
+ output_dict['input_ids'] = list(
+ pad(x,
+ pad=(0, input_max_lengths - x.size(-1)),
+ value=preprocessor.pad_token_id) for x in output_dict['input_ids'])
+
+ output_dict['input_ids'] = torch.cat(output_dict['input_ids'], dim=0)
+ output_dict['score'] = torch.Tensor(output_dict['score']).view(-1)
+
+ if preprocessor.mode == ModeKeys.EVAL:
+ output_dict['lp'] = sum(output_dict['lp'], list())
+ output_dict['raw_score'] = sum(output_dict['raw_score'], list())
+ output_dict['segment_id'] = sum(output_dict['segment_id'], list())
+
+ return output_dict
+
+
+@TRAINERS.register_module(module_name=Trainers.translation_evaluation_trainer)
+class TranslationEvaluationTrainer(EpochBasedTrainer):
+
+ def __init__(self,
+ model: Optional[Union[TorchModel, torch.nn.Module,
+ str]] = None,
+ cfg_file: Optional[str] = None,
+ device: str = 'gpu',
+ *args,
+ **kwargs):
+ r"""Build a translation evaluation trainer with a model dir or a model id in the model hub.
+
+ Args:
+ model: A Model instance.
+ cfg_file: The path for the configuration file (configuration.json).
+ device: Used device for this trainer.
+
+ """
+
+ def data_collator_for_train(x):
+ return data_collate_fn(
+ x,
+ batch_size=self.cfg.train.batch_size,
+ preprocessor=self.train_preprocessor)
+
+ def data_collator_for_eval(x):
+ return data_collate_fn(
+ x,
+ batch_size=self.cfg.evaluation.batch_size,
+ preprocessor=self.eval_preprocessor)
+
+ data_collator = {
+ ConfigKeys.train: data_collator_for_train,
+ ConfigKeys.val: data_collator_for_eval
+ }
+
+ super().__init__(
+ model,
+ cfg_file=cfg_file,
+ data_collator=data_collator,
+ *args,
+ **kwargs)
+
+ self.train_dataloader = None
+ self.eval_dataloader = None
+
+ return
+
+ def build_optimizer(self, cfg: ConfigDict) -> Optimizer:
+ r"""Sets the optimizers to be used during training."""
+ if self.cfg.train.optimizer.type != 'AdamW':
+ return super().build_optimizer(cfg)
+
+ # Freezing embedding layers for more efficient training.
+ for param in self.model.encoder.embeddings.parameters():
+ param.requires_grad = False
+
+ logger.info('Building AdamW optimizer ...')
+ learning_rates_and_parameters = list({
+ 'params':
+ self.model.encoder.encoder.layer[i].parameters(),
+ 'lr':
+ self.cfg.train.optimizer.plm_lr
+ * self.cfg.train.optimizer.plm_lr_layerwise_decay**i,
+ } for i in range(0, self.cfg.model.num_hidden_layers))
+
+ learning_rates_and_parameters.append({
+ 'params':
+ self.model.encoder.embeddings.parameters(),
+ 'lr':
+ self.cfg.train.optimizer.plm_lr,
+ })
+
+ learning_rates_and_parameters.append({
+ 'params':
+ self.model.estimator.parameters(),
+ 'lr':
+ self.cfg.train.optimizer.mlp_lr
+ })
+
+ learning_rates_and_parameters.append({
+ 'params':
+ self.model.layerwise_attention.parameters(),
+ 'lr':
+ self.cfg.train.optimizer.mlp_lr,
+ })
+
+ optimizer = AdamW(
+ learning_rates_and_parameters,
+ lr=self.cfg.train.optimizer.plm_lr,
+ betas=self.cfg.train.optimizer.betas,
+ eps=self.cfg.train.optimizer.eps,
+ weight_decay=self.cfg.train.optimizer.weight_decay,
+ )
+
+ return optimizer
+
+ def get_train_dataloader(self) -> DataLoader:
+ logger.info('Building dataloader for training ...')
+
+ if self.train_dataset is None:
+ logger.info('Reading train csv file from %s ...'
+ % self.cfg.dataset.train.name)
+ self.train_dataset = MsDataset.load(
+ osp.join(self.model_dir, self.cfg.dataset.train.name),
+ split=self.cfg.dataset.train.split)
+
+ train_dataloader = DataLoader(
+ self.train_dataset,
+ batch_sampler=TranslationEvaluationTrainingSampler(
+ len(self.train_dataset),
+ batch_size_for_each_input_format=self.cfg.train.batch_size),
+ num_workers=4,
+ collate_fn=self.train_data_collator,
+ generator=None)
+
+ logger.info('Reading done, %d items in total'
+ % len(self.train_dataset))
+
+ return train_dataloader
+
+ def get_eval_data_loader(self) -> DataLoader:
+ logger.info('Building dataloader for evaluating ...')
+
+ if self.eval_dataset is None:
+ logger.info('Reading eval csv file from %s ...'
+ % self.cfg.dataset.valid.name)
+
+ self.eval_dataset = MsDataset.load(
+ osp.join(self.model_dir, self.cfg.dataset.valid.name),
+ split=self.cfg.dataset.valid.split)
+
+ eval_dataloader = DataLoader(
+ self.eval_dataset,
+ batch_sampler=BatchSampler(
+ SequentialSampler(range(0, len(self.eval_dataset))),
+ batch_size=self.cfg.evaluation.batch_size,
+ drop_last=False),
+ num_workers=4,
+ collate_fn=self.eval_data_collator,
+ generator=None)
+
+ logger.info('Reading done, %d items in total' % len(self.eval_dataset))
+
+ return eval_dataloader
+
+ def evaluation_loop(self, data_loader, metric_classes):
+ """ Evaluation loop used by `TranslationEvaluationTrainer.evaluate()`.
+
+ The evaluation process of UniTE model should be arranged with three loops,
+ corresponding to the input formats of `InputFormat.SRC_REF`, `InputFormat.REF`,
+ and `InputFormat.SRC`.
+
+ Here we directly copy the codes of `EpochBasedTrainer.evaluation_loop`, and change
+ the input format during each evaluation subloop.
+ """
+ vis_closure = None
+ if hasattr(self.cfg.evaluation, 'visualization'):
+ vis_cfg = self.cfg.evaluation.visualization
+ vis_closure = partial(
+ self.visualization, dataset=self.eval_dataset, **vis_cfg)
+
+ self.invoke_hook(TrainerStages.before_val)
+ metric_values = dict()
+
+ for input_format in (InputFormat.SRC_REF, InputFormat.SRC,
+ InputFormat.REF):
+ self.eval_preprocessor.change_input_format(input_format)
+
+ if self._dist:
+ from modelscope.trainers.utils.inference import multi_gpu_test
+ # list of batched result and data samples
+ metric_values.update(
+ multi_gpu_test(
+ self,
+ data_loader,
+ device=self.device,
+ metric_classes=metric_classes,
+ vis_closure=vis_closure,
+ tmpdir=self.cfg.evaluation.get('cache_dir', None),
+ gpu_collect=self.cfg.evaluation.get(
+ 'gpu_collect', False),
+ data_loader_iters_per_gpu=self._eval_iters_per_epoch))
+ else:
+ from modelscope.trainers.utils.inference import single_gpu_test
+ metric_values.update(
+ single_gpu_test(
+ self,
+ data_loader,
+ device=self.device,
+ metric_classes=metric_classes,
+ vis_closure=vis_closure,
+ data_loader_iters=self._eval_iters_per_epoch))
+
+ for m in metric_classes:
+ if hasattr(m, 'clear') and callable(m.clear):
+ m.clear()
+
+ self.invoke_hook(TrainerStages.after_val)
+ return metric_values
diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py
index 683ff2f5..c980de04 100644
--- a/modelscope/trainers/trainer.py
+++ b/modelscope/trainers/trainer.py
@@ -11,7 +11,7 @@ import json
import torch
from torch import distributed as dist
from torch import nn
-from torch.utils.data import DataLoader, Dataset
+from torch.utils.data import DataLoader, Dataset, Sampler
from torch.utils.data.dataloader import default_collate
from torch.utils.data.distributed import DistributedSampler
@@ -88,7 +88,7 @@ class EpochBasedTrainer(BaseTrainer):
compile_options (dict, optional): The compile options if compile=True,
default None to use the default params of 'TorchModel.compile'.
efficient_tuners (dict, optional): The tuners to use to train the model
-
+ samplers: (:obj:`Sampler` or `Dict[Sampler]`, *optional*): samplers used in the train/eval DataLoader.
Examples of cfg_modify_fn:
>>> def cfg_modify_fn(cfg):
>>> cfg.preprocessor.first_sequence= 'text1'
@@ -114,6 +114,7 @@ class EpochBasedTrainer(BaseTrainer):
model_revision: Optional[str] = DEFAULT_MODEL_REVISION,
seed: int = 42,
callbacks: Optional[List[Hook]] = None,
+ samplers: Optional[Union[Sampler, Dict[str, Sampler]]] = None,
efficient_tuners: List[Dict] = None,
**kwargs):
@@ -132,6 +133,7 @@ class EpochBasedTrainer(BaseTrainer):
self.train_dataloader = None
self.eval_dataloader = None
self.data_loader = None
+ self._samplers = samplers
if isinstance(model, str):
third_party = kwargs.get(ThirdParty.KEY)
@@ -224,9 +226,6 @@ class EpochBasedTrainer(BaseTrainer):
# Please check the DDPHook and MegatronHook for details.
self.parallel_groups = {}
- # Clear the Hook overload functions to avoid duplication.
- Hook.clear_strategies()
-
if self.launcher is not None and not self.cfg.safe_get(
'train.hooks.DDPHook'):
# A logic to fit the current code
@@ -681,6 +680,7 @@ class EpochBasedTrainer(BaseTrainer):
self.train_dataloader = self.get_train_dataloader()
self.data_loader = self.train_dataloader
self.register_optimizers_hook()
+ self.register_processors()
self.print_hook_info()
self.set_checkpoint_file_to_hook(checkpoint_path, load_all_state,
kwargs.get('strict', False))
@@ -720,6 +720,7 @@ class EpochBasedTrainer(BaseTrainer):
strict(`boolean`): If strict, any unmatched keys will cause an error.
"""
+ self.register_processors()
self.print_hook_info()
if checkpoint_path is not None:
from modelscope.trainers.hooks import LoadCheckpointHook
@@ -758,6 +759,7 @@ class EpochBasedTrainer(BaseTrainer):
kwargs:
strict(`boolean`): If strict, any unmatched keys will cause an error.
"""
+ self.register_processors()
self.print_hook_info()
if checkpoint_path is not None:
from modelscope.trainers.hooks import LoadCheckpointHook
@@ -897,11 +899,18 @@ class EpochBasedTrainer(BaseTrainer):
"""
if self.train_dataset is None:
raise 'The train_dataset cannot be None.'
+
+ sampler_cfg = {}
+ if self._samplers is not None:
+ sampler_cfg['sampler'] = self._samplers[
+ ConfigKeys.train] if isinstance(self._samplers,
+ dict) else self._samplers
data_loader = self._build_dataloader_with_dataset(
self.train_dataset,
dist=self._dist,
seed=self._seed,
collate_fn=self.train_data_collator,
+ **sampler_cfg,
**self.cfg.train.get('dataloader', {}))
return data_loader
@@ -915,6 +924,11 @@ class EpochBasedTrainer(BaseTrainer):
if self.eval_dataset is None:
raise 'The eval_dataset cannot be None.'
+ sampler_cfg = {}
+ if self._samplers is not None:
+ sampler_cfg['sampler'] = self._samplers[
+ ConfigKeys.val] if isinstance(self._samplers,
+ dict) else self._samplers
default_config = {'shuffle': False}
default_config.update(self.cfg.evaluation.get('dataloader', {}))
data_loader = self._build_dataloader_with_dataset(
@@ -922,6 +936,7 @@ class EpochBasedTrainer(BaseTrainer):
dist=self._dist,
seed=self._seed,
collate_fn=self.eval_data_collator,
+ **sampler_cfg,
**default_config)
return data_loader
@@ -938,6 +953,11 @@ class EpochBasedTrainer(BaseTrainer):
mode=ModeKeys.EVAL,
preprocessor=self.eval_preprocessor)
+ sampler_cfg = {}
+ if self._samplers is not None:
+ sampler_cfg['sampler'] = self._samplers[
+ ConfigKeys.val] if isinstance(self._samplers,
+ dict) else self._samplers
default_config = {'shuffle': False}
default_config.update(self.cfg.evaluation.get('dataloader', {}))
data_loader = self._build_dataloader_with_dataset(
@@ -945,6 +965,7 @@ class EpochBasedTrainer(BaseTrainer):
dist=self._dist,
seed=self._seed,
collate_fn=self.eval_data_collator,
+ **sampler_cfg,
**default_config)
return data_loader
@@ -1132,13 +1153,19 @@ class EpochBasedTrainer(BaseTrainer):
batch_size = batch_size_per_gpu
num_workers = workers_per_gpu
- if dist and not isinstance(dataset, torch.utils.data.IterableDataset):
- sampler = DistributedSampler(
- dataset, num_replicas=world_size, rank=rank, shuffle=shuffle)
- else:
- sampler = None
- if not isinstance(dataset, torch.utils.data.IterableDataset):
- kwargs['shuffle'] = shuffle
+ sampler = kwargs.pop('sampler', None)
+ if sampler is None:
+ if dist and not isinstance(dataset,
+ torch.utils.data.IterableDataset):
+ sampler = DistributedSampler(
+ dataset,
+ num_replicas=world_size,
+ rank=rank,
+ shuffle=shuffle)
+ else:
+ sampler = None
+ if not isinstance(dataset, torch.utils.data.IterableDataset):
+ kwargs['shuffle'] = shuffle
batch_sampler = None
@@ -1169,7 +1196,6 @@ class EpochBasedTrainer(BaseTrainer):
""" Training loop used by `EpochBasedTrainer.train()`
"""
self.invoke_hook(TrainerStages.before_run)
- kwargs = {}
self.model.train()
for _ in range(self._epoch, self._max_epochs):
self.invoke_hook(TrainerStages.before_train_epoch)
@@ -1181,7 +1207,7 @@ class EpochBasedTrainer(BaseTrainer):
self.data_batch = data_batch
self._inner_iter = i
self.invoke_hook(TrainerStages.before_train_iter)
- self.train_step(self.model, data_batch, **kwargs)
+ self.train_step(self.model, data_batch)
self.invoke_hook(TrainerStages.after_train_iter)
# Value changed after the hooks are invoked, do not move them above the invoke_hook code.
del self.data_batch
@@ -1320,12 +1346,17 @@ class EpochBasedTrainer(BaseTrainer):
hooks = []
for cfg_i in hook_cfg:
hook = build_from_cfg(cfg_i, HOOKS)
- if hasattr(hook, 'register_strategy'):
- hook.register_strategy()
self.register_hook(hook)
hooks.append(hook)
return hooks
+ def register_processors(self):
+ """Register processors to hooks
+ """
+ for hook in self.hooks:
+ if hasattr(hook, 'register_processor'):
+ hook.register_processor(self)
+
def get_hook(self, cls):
return [h for h in self._hooks if h.__class__ == cls]
@@ -1381,14 +1412,7 @@ class EpochBasedTrainer(BaseTrainer):
info += '\n -------------------- '
stage_hook_infos.append(info)
stage_hook_infos = '\n'.join(stage_hook_infos)
-
- strategy_info = '\n --- Hook strategies info --- \n'
- for consumer, methods in Hook._strategies.items():
- strategy_info += f'Method: {consumer} ' \
- f'replaced by: ' \
- f'{[method.__self__.__class__.__name__ + "." + method.__name__ for method in methods]}\n'
- strategy_info += '\n --- Hook strategies info end --- \n'
- return stage_hook_infos + strategy_info
+ return stage_hook_infos
def worker_init_fn(worker_id, num_workers, rank, seed):
diff --git a/modelscope/trainers/training_args.py b/modelscope/trainers/training_args.py
index f4e4e138..b7236163 100644
--- a/modelscope/trainers/training_args.py
+++ b/modelscope/trainers/training_args.py
@@ -1,108 +1,560 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
-
import re
-from argparse import Action, ArgumentDefaultsHelpFormatter, ArgumentParser
+from copy import deepcopy
from dataclasses import dataclass, field, fields
-from functools import partial
-from typing import Any, Dict, List, Tuple, Union
+from typing import List, Union
-from modelscope.trainers.default_config import DEFAULT_CONFIG
-from modelscope.utils.config import Config, ConfigDict
-from modelscope.utils.hub import read_config
+import addict
+import json
+
+from modelscope.trainers.cli_argument_parser import CliArgumentParser
+from modelscope.utils.config import Config
-def get_flatten_value(config: Config, metadata: Dict, exclusions=None):
- cfg_node = metadata['cfg_node']
- if exclusions is None:
- exclusions = []
-
- values = config.safe_get(cfg_node)
- if isinstance(values, dict):
- param_map = []
- for key, value in values.items():
- if key in exclusions or not isinstance(value,
- (str, int, float, bool)):
- continue
- value = add_quotes_for_str(value)
- param_map.append(f'{key}={value}')
- return ','.join(param_map)
- else:
- return values
-
-
-def set_flatten_value(config: Config, values: Union[str, List[str]],
- metadata: Dict):
- cfg_node = metadata['cfg_node']
- if values is None:
- return config
-
+def set_flatten_value(values: Union[str, List[str]]):
pairs = values.split(',') if isinstance(values, str) else values
- for kv in pairs:
+ _params = {}
+ for kv in pairs or []:
if len(kv.strip()) == 0:
continue
key, value = kv.split('=')
- value = parse_value(value)
- config.merge_from_dict({cfg_node + '.' + key: value})
- return config
+ _params[key] = parse_value(value)
+ return _params
-def get_base_hook_args(config: Config, metadata: Dict):
- cfg_node = metadata['cfg_node']
- hook_type = metadata['hook_type']
- key = metadata['key']
- value = config.safe_get(cfg_node)
- if value is None:
- return get_hook_param(config, hook_type, key)
- else:
- return True if key == 'type' else value
+@dataclass
+class DatasetArgs:
+
+ train_dataset_name: str = field(
+ default=None,
+ metadata={
+ 'help':
+ 'The dataset name used for training, can be an id in the datahub or a local dir',
+ })
+
+ val_dataset_name: str = field(
+ default=None,
+ metadata={
+ 'help':
+ 'The subset name used for evaluating, can be an id in the datahub or a local dir',
+ })
+
+ train_subset_name: str = field(
+ default=None,
+ metadata={
+ 'help': 'The subset name used for training, can be None',
+ })
+
+ val_subset_name: str = field(
+ default=None,
+ metadata={
+ 'help': 'The subset name used for evaluating, can be None',
+ })
+
+ train_split: str = field(
+ default=None, metadata={
+ 'help': 'The split of train dataset',
+ })
+
+ val_split: str = field(
+ default=None, metadata={
+ 'help': 'The split of val dataset',
+ })
+
+ train_dataset_namespace: str = field(
+ default=None,
+ metadata={
+ 'help': 'The dataset namespace used for training',
+ })
+
+ val_dataset_namespace: str = field(
+ default=None,
+ metadata={
+ 'help': 'The dataset namespace used for evaluating',
+ })
+
+ dataset_json_file: str = field(
+ default=None,
+ metadata={
+ 'help':
+ 'The json file to parse all datasets from, used in a complex dataset scenario,'
+ 'the json format should be like:'
+ '''
+ [
+ {
+ "dataset": {
+ # All args used in the MsDataset.load function
+ "dataset_name": "xxx",
+ ...
+ },
+ # All columns used, mapping the column names in each dataset in same names.
+ "column_mapping": {
+ "text1": "sequence1",
+ "text2": "sequence2",
+ "label": "label",
+ },
+ # float or str, float means to split the dataset into train/val,
+ # or just str(train/val)
+ "split": 0.8,
+ }
+ ]
+ ''',
+ })
-def set_base_hook_args(config: Config, value: Any, metadata: Dict):
- cfg_node = metadata['cfg_node']
- hook_type = metadata['hook_type']
- key = metadata['key']
- if 'hooks' in config.train:
- config.train.hooks = [
- hook for hook in config.train.hooks if hook['type'] != hook_type
+@dataclass
+class ModelArgs:
+ task: str = field(
+ default=None,
+ metadata={
+ 'help': 'The task code to be used',
+ 'cfg_node': 'task'
+ })
+
+ model: str = field(
+ default=None, metadata={
+ 'help': 'A model id or model dir',
+ })
+
+ model_type: str = field(
+ default=None,
+ metadata={
+ 'help':
+ 'The mode type, if load_model_config is False, user need to fill this field',
+ 'cfg_node': 'model.type'
+ })
+
+
+@dataclass
+class TrainArgs:
+
+ seed: int = field(
+ default=42, metadata={
+ 'help': 'The random seed',
+ })
+
+ per_device_train_batch_size: int = field(
+ default=16,
+ metadata={
+ 'cfg_node': 'train.dataloader.batch_size_per_gpu',
+ 'help':
+ 'The `batch_size_per_gpu` argument for the train dataloader',
+ })
+
+ train_data_worker: int = field(
+ default=0,
+ metadata={
+ 'cfg_node': 'train.dataloader.workers_per_gpu',
+ 'help': 'The `workers_per_gpu` argument for the train dataloader',
+ })
+
+ train_shuffle: bool = field(
+ default=False,
+ metadata={
+ 'cfg_node': 'train.dataloader.shuffle',
+ 'help': 'The `shuffle` argument for the train dataloader',
+ })
+
+ train_drop_last: bool = field(
+ default=False,
+ metadata={
+ 'cfg_node': 'train.dataloader.drop_last',
+ 'help': 'The `drop_last` argument for the train dataloader',
+ })
+
+ per_device_eval_batch_size: int = field(
+ default=16,
+ metadata={
+ 'cfg_node': 'evaluation.dataloader.batch_size_per_gpu',
+ 'help':
+ 'The `batch_size_per_gpu` argument for the eval dataloader',
+ })
+
+ eval_data_worker: int = field(
+ default=0,
+ metadata={
+ 'cfg_node': 'evaluation.dataloader.workers_per_gpu',
+ 'help': 'The `workers_per_gpu` argument for the eval dataloader',
+ })
+
+ eval_shuffle: bool = field(
+ default=False,
+ metadata={
+ 'cfg_node': 'evaluation.dataloader.shuffle',
+ 'help': 'The `shuffle` argument for the eval dataloader',
+ })
+
+ eval_drop_last: bool = field(
+ default=False,
+ metadata={
+ 'cfg_node': 'evaluation.dataloader.drop_last',
+ 'help': 'The `drop_last` argument for the eval dataloader',
+ })
+
+ max_epochs: int = field(
+ default=5,
+ metadata={
+ 'cfg_node': 'train.max_epochs',
+ 'help': 'The training epochs',
+ })
+
+ work_dir: str = field(
+ default='./train_target',
+ metadata={
+ 'cfg_node': 'train.work_dir',
+ 'help': 'The directory to save models and logs',
+ })
+
+ lr: float = field(
+ default=5e-5,
+ metadata={
+ 'cfg_node': 'train.optimizer.lr',
+ 'help': 'The learning rate of the optimizer',
+ })
+
+ lr_scheduler: str = field(
+ default='LinearLR',
+ metadata={
+ 'cfg_node': 'train.lr_scheduler.type',
+ 'help': 'The lr_scheduler type in torch',
+ })
+
+ optimizer: str = field(
+ default='AdamW',
+ metadata={
+ 'cfg_node': 'train.optimizer.type',
+ 'help': 'The optimizer type in PyTorch, like `AdamW`',
+ })
+
+ optimizer_params: str = field(
+ default=None,
+ metadata={
+ 'cfg_node': 'train.optimizer',
+ 'help': 'The optimizer params',
+ 'cfg_setter': set_flatten_value,
+ })
+
+ lr_scheduler_params: str = field(
+ default=None,
+ metadata={
+ 'cfg_node': 'train.lr_scheduler',
+ 'help': 'The lr scheduler params',
+ 'cfg_setter': set_flatten_value,
+ })
+
+ lr_strategy: str = field(
+ default='by_epoch',
+ metadata={
+ 'cfg_node': 'train.lr_scheduler.options.lr_strategy',
+ 'help': 'The lr decay strategy',
+ 'choices': ['by_epoch', 'by_step', 'no'],
+ })
+
+ local_rank: int = field(
+ default=0, metadata={
+ 'help': 'The local rank',
+ })
+
+ logging_interval: int = field(
+ default=5,
+ metadata={
+ 'help': 'The interval of iter of logging information',
+ 'cfg_node': 'train.logging.interval',
+ })
+
+ eval_strategy: str = field(
+ default='by_epoch',
+ metadata={
+ 'help': 'Eval strategy, can be `by_epoch` or `by_step` or `no`',
+ 'cfg_node': 'evaluation.period.eval_strategy',
+ 'choices': ['by_epoch', 'by_step', 'no'],
+ })
+
+ eval_interval: int = field(
+ default=1,
+ metadata={
+ 'help': 'Eval interval',
+ 'cfg_node': 'evaluation.period.interval',
+ })
+
+ eval_metrics: str = field(
+ default=None,
+ metadata={
+ 'help': 'The metric name for evaluation',
+ 'cfg_node': 'evaluation.metrics'
+ })
+
+ save_strategy: str = field(
+ default='by_epoch',
+ metadata={
+ 'help':
+ 'Checkpointing strategy, can be `by_epoch` or `by_step` or `no`',
+ 'cfg_node': 'train.checkpoint.period.save_strategy',
+ 'choices': ['by_epoch', 'by_step', 'no'],
+ })
+
+ save_interval: int = field(
+ default=1,
+ metadata={
+ 'help':
+ 'The interval of epoch or iter of saving checkpoint period',
+ 'cfg_node': 'train.checkpoint.period.interval',
+ })
+
+ save_best_checkpoint: bool = field(
+ default=False,
+ metadata={
+ 'help':
+ 'Save the checkpoint(if it\'s the best) after the evaluation.',
+ 'cfg_node': 'train.checkpoint.best.save_best',
+ })
+
+ metric_for_best_model: str = field(
+ default=None,
+ metadata={
+ 'help': 'The metric used to measure the model.',
+ 'cfg_node': 'train.checkpoint.best.metric_key',
+ })
+
+ metric_rule_for_best_model: str = field(
+ default='max',
+ metadata={
+ 'help':
+ 'The rule to measure the model with the metric, can be `max` or `min`',
+ 'cfg_node': 'train.checkpoint.best.rule',
+ })
+
+ max_checkpoint_num: int = field(
+ default=None,
+ metadata={
+ 'help':
+ 'The max number of checkpoints to keep, older ones will be deleted.',
+ 'cfg_node': 'train.checkpoint.period.max_checkpoint_num',
+ })
+
+ max_checkpoint_num_best: int = field(
+ default=1,
+ metadata={
+ 'help':
+ 'The max number of best checkpoints to keep, worse ones will be deleted.',
+ 'cfg_node': 'train.checkpoint.best.max_checkpoint_num',
+ })
+
+ push_to_hub: bool = field(
+ default=False,
+ metadata={
+ 'help': 'Push to hub after each checkpointing',
+ 'cfg_node': 'train.checkpoint.period.push_to_hub',
+ })
+
+ repo_id: str = field(
+ default=None,
+ metadata={
+ 'help':
+ 'The repo id in modelhub, usually the format is "group/model"',
+ 'cfg_node': 'train.checkpoint.period.hub_repo_id',
+ })
+
+ hub_token: str = field(
+ default=None,
+ metadata={
+ 'help':
+ 'The modelhub token, you can also set the token to the env variable `MODELSCOPE_API_TOKEN`',
+ 'cfg_node': 'train.checkpoint.period.hub_token',
+ })
+
+ private_hub: bool = field(
+ default=True,
+ metadata={
+ 'help': 'Upload to a private hub',
+ 'cfg_node': 'train.checkpoint.period.private_hub',
+ })
+
+ hub_revision: str = field(
+ default='master',
+ metadata={
+ 'help': 'Which branch to commit to',
+ 'cfg_node': 'train.checkpoint.period.hub_revision',
+ })
+
+ push_to_hub_best: bool = field(
+ default=False,
+ metadata={
+ 'help': 'Push to hub after each checkpointing',
+ 'cfg_node': 'train.checkpoint.best.push_to_hub',
+ })
+
+ repo_id_best: str = field(
+ default=None,
+ metadata={
+ 'help':
+ 'The repo id in modelhub, usually the format is "group/model"',
+ 'cfg_node': 'train.checkpoint.best.hub_repo_id',
+ })
+
+ hub_token_best: str = field(
+ default=None,
+ metadata={
+ 'help':
+ 'The modelhub token, you can also set the token to the env variable `MODELSCOPE_API_TOKEN`',
+ 'cfg_node': 'train.checkpoint.best.hub_token',
+ })
+
+ private_hub_best: bool = field(
+ default=True,
+ metadata={
+ 'help': 'Upload to a private hub',
+ 'cfg_node': 'train.checkpoint.best.private_hub',
+ })
+
+ hub_revision_best: str = field(
+ default='master',
+ metadata={
+ 'help': 'Which branch to commit to',
+ 'cfg_node': 'train.checkpoint.best.hub_revision',
+ })
+
+
+@dataclass(init=False)
+class TrainingArgs(DatasetArgs, TrainArgs, ModelArgs):
+
+ use_model_config: bool = field(
+ default=False,
+ metadata={
+ 'help':
+ 'Use the configuration of the model, '
+ 'default will only use the parameters in the CLI and the dataclass',
+ })
+
+ def __init__(self, **kwargs):
+ self.manual_args = list(kwargs.keys())
+ for f in fields(self):
+ if f.name in kwargs:
+ setattr(self, f.name, kwargs[f.name])
+ self._unknown_args = {}
+
+ def parse_cli(self, parser_args=None):
+ """Construct a TrainingArg class by the parameters of CLI.
+
+ Returns:
+ Self
+ """
+ parser = CliArgumentParser(self)
+ args, unknown = parser.parse_known_args(parser_args)
+ unknown = [
+ item for item in unknown
+ if item not in ('\\', '\n') and '--local-rank=' not in item
]
- if key == 'type':
- if value and config.safe_get(cfg_node) is None:
- config.merge_from_dict({cfg_node: {}})
- else:
- config.merge_from_dict({cfg_node: value})
+ _unknown = {}
+ for i in range(0, len(unknown), 2):
+ _unknown[unknown[i].replace('-', '')] = parse_value(unknown[i + 1])
+ args_dict = vars(args)
+ self.manual_args += parser.manual_args
+ for key, value in deepcopy(args_dict).items():
+ if key is not None and hasattr(self, key):
+ setattr(self, key, value)
+ return self
-def get_strategy(config: Config,
- metadata: Dict,
- value_pair: Tuple[str] = ('by_epoch', 'by_step')):
- flag = get_base_hook_args(config, metadata)
- if flag is None:
+ def to_config(self, ignore_default_config=None):
+ """Convert the TrainingArgs to the `Config`
+
+ Returns:
+ The Config, and extra parameters in dict.
+ """
+ cfg = Config()
+ args_dict = addict.Dict()
+
+ if ignore_default_config is None:
+ ignore_default_config = self.use_model_config
+
+ for f in fields(self):
+ cfg_node = f.metadata.get('cfg_node')
+ cfg_setter = f.metadata.get('cfg_setter') or (lambda x: x)
+ if cfg_node is not None:
+ if f.name in self.manual_args or not ignore_default_config:
+ if isinstance(cfg_node, str):
+ cfg_node = [cfg_node]
+ for _node in cfg_node:
+ cfg.merge_from_dict(
+ {_node: cfg_setter(getattr(self, f.name))})
+ else:
+ args_dict[f.name] = getattr(self, f.name)
+
+ cfg.merge_from_dict(self._unknown_args)
+ return cfg, args_dict
+
+ def get_metadata(self, key):
+ _fields = fields(self)
+ for f in _fields:
+ if f.name == key:
+ return f
return None
- return value_pair[0] if flag else value_pair[1]
-def set_strategy(config: Config,
- value: Any,
- metadata: Dict,
- value_pair: Tuple[str] = ('by_epoch', 'by_step')):
- set_base_hook_args(config, value == value_pair[0], metadata)
+def build_dataset_from_file(filename):
+ """
+ The filename format:
+ [
+ {
+ "dataset": {
+ "dataset_name": "xxx",
+ ...
+ },
+ "column_mapping": {
+ "text1": "sequence1",
+ "text2": "sequence2",
+ "label": "label",
+ }
+ "split": 0.8,
+ }
+ ]
+ """
+ from modelscope import MsDataset
+ train_set = []
+ eval_set = []
+ with open(filename, 'r') as f:
+ ds_json = json.load(f)
+ for ds in ds_json:
+ dataset = MsDataset.load(**ds['dataset']).to_hf_dataset()
+ all_columns = dataset.column_names
+ keep_columns = ds['column_mapping'].keys()
+ remove_columns = [
+ column for column in all_columns if column not in keep_columns
+ ]
+ from datasets import Features
+ from datasets import Value
+ from datasets import ClassLabel
+ features = [
+ f for f in dataset.features.items() if f[0] in keep_columns
+ ]
+ new_features = {}
+ for f in features:
+ if isinstance(f[1], ClassLabel):
+ new_features[f[0]] = Value(f[1].dtype)
+ else:
+ new_features[f[0]] = f[1]
+ new_features = Features(new_features)
+ dataset = dataset.map(
+ lambda x: x,
+ remove_columns=remove_columns,
+ features=new_features).rename_columns(ds['column_mapping'])
+ split = ds['split']
+ if isinstance(split, str):
+ assert split in ('train', 'val')
+ if split == 'train':
+ train_set.append(dataset)
+ else:
+ eval_set.append(dataset)
+ else:
+ assert isinstance(split, float) and 0 < split < 1
+ ds_dict = dataset.train_test_split(train_size=split)
+ train_set.append(ds_dict['train'])
+ eval_set.append(ds_dict['test'])
-def get_hook_param(config, hook_type: str, key='type'):
- hooks = config.safe_get('train.hooks', [])
- _hooks = list(filter(lambda hook: hook['type'] == hook_type, hooks))
- if key == 'type':
- return len(_hooks) > 0
- elif len(_hooks) > 0:
- return getattr(_hooks[0], key, None)
- return None
-
-
-def add_quotes_for_str(value: Union[str, float, bool, None]) -> str:
- if isinstance(value, str):
- return f'"{value}"'
- else:
- return str(value)
+ from datasets import concatenate_datasets
+ return concatenate_datasets(train_set), concatenate_datasets(eval_set)
def parse_value(value: str) -> Union[str, float, bool, None]:
@@ -126,717 +578,3 @@ def parse_value(value: str) -> Union[str, float, bool, None]:
return float(value)
else:
return value
-
-
-@dataclass
-class TrainingArgs:
- model: str = field(
- default=None, metadata={
- 'help': 'A model id or model dir',
- })
-
- seed: int = field(
- default=42, metadata={
- 'help': 'The random seed',
- })
-
- task: str = field(
- default=None,
- metadata={
- 'help': 'The task code to be used',
- 'cfg_node': 'task'
- })
-
- dataset_name: str = field(
- default=None, metadata={
- 'help': 'The dataset name',
- })
-
- subset_name: str = field(
- default=None, metadata={
- 'help': 'The subset name of the dataset',
- })
-
- train_dataset_name: str = field(
- default=None, metadata={
- 'help': 'The train dataset name',
- })
-
- val_dataset_name: str = field(
- default=None, metadata={
- 'help': 'The validation dataset name',
- })
-
- per_device_train_batch_size: int = field(
- default=None,
- metadata={
- 'cfg_node': 'train.dataloader.batch_size_per_gpu',
- 'help': 'The training batch size per GPU',
- })
-
- train_data_worker: int = field(
- default=0,
- metadata={
- 'cfg_node': 'train.dataloader.workers_per_gpu',
- 'help': 'The number of data workers for train dataloader',
- })
-
- train_shuffle: bool = field(
- default=None,
- metadata={
- 'cfg_node': 'train.dataloader.shuffle',
- 'help': 'Shuffle the train dataset or not',
- })
-
- train_drop_last: bool = field(
- default=None,
- metadata={
- 'cfg_node':
- 'train.dataloader.drop_last',
- 'help':
- 'Whether to drop out the last set of data in the train_dataset',
- })
-
- per_device_eval_batch_size: int = field(
- default=None,
- metadata={
- 'cfg_node': 'evaluation.dataloader.batch_size_per_gpu',
- 'help': 'The eval batch size per GPU',
- })
-
- eval_data_worker: int = field(
- default=0,
- metadata={
- 'cfg_node': 'evaluation.dataloader.workers_per_gpu',
- 'help': 'The number of data workers for eval dataloader',
- })
-
- eval_shuffle: bool = field(
- default=None,
- metadata={
- 'cfg_node': 'evaluation.dataloader.shuffle',
- 'help': 'Shuffle the eval dataset or not',
- })
-
- eval_drop_last: bool = field(
- default=None,
- metadata={
- 'cfg_node': 'evaluation.dataloader.drop_last',
- 'help':
- 'Whether to drop out the last set of data in the eval_dataset',
- })
-
- max_epochs: int = field(
- default=None,
- metadata={
- 'cfg_node': 'train.max_epochs',
- 'help': 'The training epochs',
- })
-
- work_dir: str = field(
- default=None,
- metadata={
- 'cfg_node': 'train.work_dir',
- 'help': 'The training dir to save models and logs',
- })
-
- lr: float = field(
- default=None,
- metadata={
- 'cfg_node': 'train.optimizer.lr',
- 'help': 'The learning rate of the optimizer',
- })
-
- optimizer: str = field(
- default=None,
- metadata={
- 'cfg_node': 'train.optimizer.type',
- 'help': 'The optimizer type',
- })
-
- optimizer_params: str = field(
- default=None,
- metadata={
- 'cfg_node':
- 'train.optimizer',
- 'cfg_getter':
- partial(get_flatten_value, exclusions=['type', 'lr', 'options']),
- 'cfg_setter':
- set_flatten_value,
- 'help':
- 'The optimizer init params except `lr`',
- })
-
- lr_scheduler_params: str = field(
- default=None,
- metadata={
- 'cfg_node':
- 'train.lr_scheduler',
- 'cfg_getter':
- partial(get_flatten_value, exclusions=['type', 'lr', 'options']),
- 'cfg_setter':
- set_flatten_value,
- 'help':
- 'The lr_scheduler init params',
- })
-
- local_rank: int = field(
- default=0, metadata={
- 'help': 'The training local rank',
- })
-
- save_ckpt: bool = field(
- default=True,
- metadata={
- 'help':
- 'Periodically save checkpoint when True, corresponding to CheckpointHook',
- 'cfg_node': 'train.checkpoint.period',
- 'hook_type': 'CheckpointHook',
- 'key': 'type',
- 'cfg_getter': get_base_hook_args,
- 'cfg_setter': set_base_hook_args,
- })
-
- save_ckpt_best: bool = field(
- default=None,
- metadata={
- 'help':
- 'Save best checkpoint when True, corresponding to BestCkptSaverHook',
- 'cfg_node': 'train.checkpoint.best',
- 'hook_type': 'BestCkptSaverHook',
- 'key': 'type',
- 'cfg_getter': get_base_hook_args,
- 'cfg_setter': set_base_hook_args,
- })
-
- evaluate: bool = field(
- default=True,
- metadata={
- 'help': 'Evaluate when True, corresponding to EvaluationHook',
- 'cfg_node': 'evaluation.period',
- 'hook_type': 'EvaluationHook',
- 'key': 'type',
- 'cfg_getter': get_base_hook_args,
- 'cfg_setter': set_base_hook_args,
- })
-
- save_ckpt_strategy: str = field(
- default=None,
- metadata={
- 'help': 'Periodically save checkpoint by epoch or by step'
- 'use with `CheckpointHook`, can be `by_epoch` or `by_step`',
- 'cfg_node': 'train.checkpoint.period.by_epoch',
- 'hook_type': 'CheckpointHook',
- 'key': 'by_epoch',
- 'choices': ['by_epoch', 'by_step'],
- 'cfg_getter': get_strategy,
- 'cfg_setter': set_strategy,
- })
-
- save_ckpt_best_strategy: str = field(
- default=None,
- metadata={
- 'help': 'Save best checkpoint by epoch or by step'
- 'use with `BestCkptSaverHook`, can be `by_epoch` or `by_step`',
- 'cfg_node': 'train.checkpoint.best.by_epoch',
- 'hook_type': 'BestCkptSaverHook',
- 'key': 'by_epoch',
- 'choices': ['by_epoch', 'by_step'],
- 'cfg_getter': get_strategy,
- 'cfg_setter': set_strategy,
- })
-
- push_to_hub: bool = field(
- default=None,
- metadata={
- 'help':
- 'Push to hub after one checkpoint saved by CheckpointHook in the local disk',
- 'cfg_node': 'train.checkpoint.period.push_to_hub',
- 'hook_type': 'CheckpointHook',
- 'key': 'push_to_hub',
- 'cfg_getter': get_base_hook_args,
- 'cfg_setter': set_base_hook_args,
- })
-
- model_id_with_org: str = field(
- default=None,
- metadata={
- 'help':
- 'The repo id in modelhub, usually it\'s like "group/model"',
- 'cfg_node': 'train.checkpoint.period.model_id_with_org',
- 'hook_type': 'CheckpointHook',
- 'key': 'model_id_with_org',
- 'cfg_getter': get_base_hook_args,
- 'cfg_setter': set_base_hook_args,
- })
-
- hub_token: str = field(
- default=None,
- metadata={
- 'help':
- 'The token to push to hub, you can also set the token to the env variable `MODELSCOPE_API_TOKEN`',
- 'cfg_node': 'train.checkpoint.period.hub_token',
- 'hook_type': 'CheckpointHook',
- 'key': 'hub_token',
- 'cfg_getter': get_base_hook_args,
- 'cfg_setter': set_base_hook_args,
- })
-
- private_hub: bool = field(
- default=None,
- metadata={
- 'help': 'Upload to a private hub',
- 'cfg_node': 'train.checkpoint.period.private_hub',
- 'hook_type': 'CheckpointHook',
- 'key': 'private_hub',
- 'cfg_getter': get_base_hook_args,
- 'cfg_setter': set_base_hook_args,
- })
-
- push_to_hub_best_model: bool = field(
- default=None,
- metadata={
- 'help':
- 'Push to hub after one checkpoint saved by BestCkptSaverHook in the local disk',
- 'cfg_node': 'train.checkpoint.best.push_to_hub',
- 'hook_type': 'BestCkptSaverHook',
- 'key': 'push_to_hub',
- 'cfg_getter': get_base_hook_args,
- 'cfg_setter': set_base_hook_args,
- })
-
- model_id_with_org_best_model: str = field(
- default=None,
- metadata={
- 'help':
- 'The repo id in modelhub, usually it\'s like "group/model"',
- 'cfg_node': 'train.checkpoint.best.model_id_with_org',
- 'hook_type': 'BestCkptSaverHook',
- 'key': 'model_id_with_org',
- 'cfg_getter': get_base_hook_args,
- 'cfg_setter': set_base_hook_args,
- })
-
- hub_token_best_model: str = field(
- default=None,
- metadata={
- 'help':
- 'The token to push to hub, you can also set the token to the env variable `MODELSCOPE_API_TOKEN`',
- 'cfg_node': 'train.checkpoint.best.hub_token',
- 'hook_type': 'BestCkptSaverHook',
- 'key': 'hub_token',
- 'cfg_getter': get_base_hook_args,
- 'cfg_setter': set_base_hook_args,
- })
-
- private_hub_best_model: bool = field(
- default=None,
- metadata={
- 'help': 'Upload to a private hub',
- 'cfg_node': 'train.checkpoint.best.private_hub',
- 'hook_type': 'BestCkptSaverHook',
- 'key': 'private_hub',
- 'cfg_getter': get_base_hook_args,
- 'cfg_setter': set_base_hook_args,
- })
-
- ckpt_period_interval: int = field(
- default=1,
- metadata={
- 'help':
- 'The interval of epoch or iter of saving checkpoint period',
- 'cfg_node': 'train.checkpoint.period.interval',
- 'hook_type': 'CheckpointHook',
- 'key': 'interval',
- 'cfg_getter': get_base_hook_args,
- 'cfg_setter': set_base_hook_args,
- })
-
- ckpt_best_interval: int = field(
- default=None,
- metadata={
- 'help': 'The interval of epoch or iter of saving checkpoint best',
- 'cfg_node': 'train.checkpoint.best.interval',
- 'hook_type': 'BestCkptSaverHook',
- 'key': 'interval',
- 'cfg_getter': get_base_hook_args,
- 'cfg_setter': set_base_hook_args,
- })
-
- metric_for_best_model: str = field(
- default=None,
- metadata={
- 'help':
- 'Which metric key to judge the checkpoint is better or not, use with `BestCkptSaverHook`, '
- 'please make sure this key is returned by the `evaluation_metrics` classes',
- 'cfg_node':
- 'train.checkpoint.best.metric_key',
- 'hook_type':
- 'BestCkptSaverHook',
- 'key':
- 'metric_key',
- 'cfg_getter':
- get_base_hook_args,
- 'cfg_setter':
- set_base_hook_args,
- })
-
- metric_rule_for_best_model: str = field(
- default=None,
- metadata={
- 'help':
- 'Which rule to compare the value of `checkpoint_saving_metric`, '
- 'use with `BestCkptSaverHook`, can be `max` or `min`',
- 'cfg_node':
- 'train.checkpoint.best.rule',
- 'hook_type':
- 'BestCkptSaverHook',
- 'key':
- 'rule',
- 'cfg_getter':
- get_base_hook_args,
- 'cfg_setter':
- set_base_hook_args,
- })
-
- save_ckpt_peroid_limit: int = field(
- default=None,
- metadata={
- 'help':
- 'The max saving number of checkpoint, older checkpoints will be deleted.',
- 'cfg_node': 'train.checkpoint.period.max_checkpoint_num',
- 'hook_type': 'CheckpointHook',
- 'key': 'max_checkpoint_num',
- 'cfg_getter': get_base_hook_args,
- 'cfg_setter': set_base_hook_args,
- })
-
- save_ckpt_best_limit: int = field(
- default=None,
- metadata={
- 'help':
- 'The max saving number of checkpoint, worse checkpoints will be deleted.',
- 'cfg_node': 'train.checkpoint.best.max_checkpoint_num',
- 'hook_type': 'BestCkptSaverHook',
- 'key': 'max_checkpoint_num',
- 'cfg_getter': get_base_hook_args,
- 'cfg_setter': set_base_hook_args,
- })
-
- logging_interval: int = field(
- default=None,
- metadata={
- 'help': 'The interval of iter of logging information',
- 'cfg_node': 'train.logging.interval',
- 'hook_type': 'TextLoggerHook',
- 'key': 'interval',
- 'cfg_getter': get_base_hook_args,
- 'cfg_setter': set_base_hook_args,
- })
-
- eval_strategy: str = field(
- default=None,
- metadata={
- 'help': 'Evaluate model by epoch or by step'
- 'use with `EvaluationHook`, can be `by_epoch` or `by_step`',
- 'cfg_node': 'evaluation.period.by_epoch',
- 'hook_type': 'EvaluationHook',
- 'key': 'by_epoch',
- 'choices': ['by_epoch', 'by_step'],
- 'cfg_getter': get_strategy,
- 'cfg_setter': set_strategy,
- })
-
- eval_interval: int = field(
- default=None,
- metadata={
- 'help': 'Evaluation interval by epoch or iter',
- 'cfg_node': 'evaluation.period.interval',
- 'hook_type': 'EvaluationHook',
- 'key': 'interval',
- 'cfg_getter': get_base_hook_args,
- 'cfg_setter': set_base_hook_args,
- })
-
- eval_metrics: str = field(
- default=None,
- metadata={
- 'help': 'The metric module name used in evaluation',
- 'cfg_node': 'evaluation.metrics'
- })
-
- namespace: str = field(
- default=None, metadata={'help': 'The namespace of dataset'})
-
- @classmethod
- def from_cli(cls, parser_args=None, **extra_kwargs):
- """Construct a TrainingArg class by the parameters of CLI.
-
- Args:
- **extra_kwargs: Extra args which can be defined in code.
-
- Returns:
- The output TrainingArg class with the parameters from CLI.
- """
- self = cls(**extra_kwargs)
- parser = CliArgumentParser(self)
- args, unknown = parser.parse_known_args(parser_args)
- unknown = [item for item in unknown if item not in ('\\', '\n')]
- _unknown = {}
- for i in range(0, len(unknown), 2):
- _unknown[unknown[i].replace('-', '')] = parse_value(unknown[i + 1])
- cfg_dict = vars(args)
-
- if args.model is not None:
- try:
- cfg = read_config(args.model)
- except Exception as e:
- print('Read config failed with error:', e)
- else:
- self = cls.from_config(cfg, **extra_kwargs)
- for key, value in cfg_dict.items():
- if key is not None and hasattr(self,
- key) and key in parser.manual_args:
- setattr(self, key, value)
- self.extra_args = _unknown
- return self
-
- def to_args(self):
- """Convert the TrainingArg class to key-value pairs.
-
- Returns: The key-value pair.
-
- """
- _args = {}
- for f in fields(self):
- _args[f.name] = getattr(self, f.name)
- return _args
-
- @classmethod
- def from_config(cls, config=DEFAULT_CONFIG, **kwargs):
- """Construct the TrainingArg class by a `Config` class.
-
- Args:
- config: The Config class. By default, `DEFAULT_CONFIG` is used.
- **kwargs: Extra args which can be defined in code.
-
- Returns: The output TrainingArg class with the parameters from the config.
-
- """
-
- self = cls(**kwargs)
- for f in fields(self):
- if 'cfg_node' in f.metadata and getattr(self, f.name) is None:
- self._to_field(f, config)
- return self
-
- def _to_field(self, f, config):
- assert 'cfg_node' in f.metadata
- if 'cfg_getter' in f.metadata:
- cfg_getter = f.metadata['cfg_getter']
- setattr(self, f.name, cfg_getter(config, f.metadata))
- else:
- cfg_node = f.metadata['cfg_node']
- setattr(self, f.name, config.safe_get(cfg_node))
-
- def _to_config(self, f, config: Config):
- assert 'cfg_node' in f.metadata
- value = getattr(self, f.name)
- if 'cfg_setter' in f.metadata:
- cfg_setter = f.metadata['cfg_setter']
- config = cfg_setter(config, value, f.metadata)
- else:
- cfg_node = f.metadata['cfg_node']
- if isinstance(cfg_node, str):
- cfg_node = [cfg_node]
- for _node in cfg_node:
- config.merge_from_dict({_node: value})
- return config
-
- def __call__(self, cfg: Config):
- for f in fields(self):
- if 'cfg_node' not in f.metadata:
- continue
-
- value = getattr(self, f.name)
- if value is not None:
- self._to_config(f, cfg)
- if hasattr(self, 'extra_args'):
- cfg.merge_from_dict(self.extra_args)
- else:
- self._to_field(f, cfg)
- return cfg
-
-
-class CliArgumentParser(ArgumentParser):
- """ Argument Parser to define and parse command-line args for training.
-
- Args:
- training_args (TrainingArgs): dict or list of dict which defines different
- paramters for training.
- """
-
- def __init__(self, training_args: TrainingArgs = None, **kwargs):
- if 'formatter_class' not in kwargs:
- kwargs['formatter_class'] = ArgumentDefaultsHelpFormatter
- super().__init__(**kwargs)
- self.training_args = training_args
- self.define_args()
-
- def get_manual_args(self, args):
- return [arg[2:] for arg in args if arg.startswith('--')]
-
- def _parse_known_args(self, args: List = None, namespace=None):
- self.model_id = namespace.model if namespace is not None else None
- if '--model' in args:
- self.model_id = args[args.index('--model') + 1]
- self.manual_args = self.get_manual_args(args)
- return super()._parse_known_args(args, namespace)
-
- def print_help(self, file=None):
- config = DEFAULT_CONFIG
- if self.model_id is not None:
- try:
- config = read_config(self.model_id)
- except Exception as e:
- print('Read config failed with error:', e)
-
- if config is not None:
- for action_group in self._optionals._group_actions:
- if hasattr(self.training_args, action_group.dest):
- value = getattr(self.training_args, action_group.dest)
- f = {f.name: f
- for f in fields(self.training_args)
- }.get(action_group.dest)
- if value is not None:
- action_group.default = value
- elif 'cfg_node' in f.metadata:
- cfg_node = f.metadata['cfg_node']
- if isinstance(cfg_node, str):
- cfg_node = [cfg_node]
-
- assert isinstance(cfg_node, (list, tuple))
- if isinstance(cfg_node[0], str):
- action_group.default = config.safe_get(cfg_node[0])
- else:
- action_group.default = cfg_node[0](config)
- return super().print_help(file)
-
- def define_args(self):
- if self.training_args is not None:
- for f in fields(self.training_args):
- arg_name = f.name
- arg_attr = getattr(self.training_args, f.name)
- name = f'--{arg_name}'
- kwargs = dict(type=f.type, help=f.metadata['help'])
- kwargs['default'] = arg_attr
-
- if 'choices' in f.metadata:
- kwargs['choices'] = f.metadata['choices']
-
- kwargs['action'] = SingleAction
- self.add_argument(name, **kwargs)
-
-
-class DictAction(Action):
- """
- argparse action to split an argument into KEY=VALUE form
- on the first = and append to a dictionary. List options can
- be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit
- brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build
- list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]'
- """
-
- @staticmethod
- def parse_int_float_bool_str(val):
- try:
- return int(val)
- except ValueError:
- pass
- try:
- return float(val)
- except ValueError:
- pass
- if val.lower() in ['true', 'false']:
- return val.lower() == 'true'
- if val == 'None':
- return None
- return val
-
- @staticmethod
- def parse_iterable(val):
- """Parse iterable values in the string.
- All elements inside '()' or '[]' are treated as iterable values.
- Args:
- val (str): Value string.
- Returns:
- list | tuple: The expanded list or tuple from the string.
- Examples:
- >>> DictAction._parse_iterable('1,2,3')
- [1, 2, 3]
- >>> DictAction._parse_iterable('[a, b, c]')
- ['a', 'b', 'c']
- >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]')
- [(1, 2, 3), ['a', 'b'], 'c']
- """
-
- def find_next_comma(string):
- """Find the position of next comma in the string.
- If no ',' is found in the string, return the string length. All
- chars inside '()' and '[]' are treated as one element and thus ','
- inside these brackets are ignored.
- """
- assert (string.count('(') == string.count(')')) and (
- string.count('[')
- == string.count(']')), f'Imbalanced brackets exist in {string}'
- end = len(string)
- for idx, char in enumerate(string):
- pre = string[:idx]
- # The string before this ',' is balanced
- if ((char == ',') and (pre.count('(') == pre.count(')'))
- and (pre.count('[') == pre.count(']'))):
- end = idx
- break
- return end
-
- # Strip ' and " characters and replace whitespace.
- val = val.strip('\'\"').replace(' ', '')
- is_tuple = False
- if val.startswith('(') and val.endswith(')'):
- is_tuple = True
- val = val[1:-1]
- elif val.startswith('[') and val.endswith(']'):
- val = val[1:-1]
- elif ',' not in val:
- # val is a single value
- return DictAction.parse_int_float_bool_str(val)
-
- values = []
- while len(val) > 0:
- comma_idx = find_next_comma(val)
- element = DictAction.parse_iterable(val[:comma_idx])
- values.append(element)
- val = val[comma_idx + 1:]
- if is_tuple:
- values = tuple(values)
- return values
-
- def __call__(self, parser, namespace, values, option_string):
- options = {}
- for kv in values:
- key, val = kv.split('=', maxsplit=1)
- options[key] = self.parse_iterable(val)
- setattr(namespace, self.dest, options)
-
-
-class SingleAction(DictAction):
- """ Argparse action to convert value to tuple or list or nested structure of
- list and tuple, i.e 'V1,V2,V3', or with explicit brackets, i.e. '[V1,V2,V3]'.
- It also support nested brackets to build list/tuple values. e.g. '[(V1,V2),(V3,V4)]'
- """
-
- def __call__(self, parser, namespace, value, option_string):
- if isinstance(value, str):
- setattr(namespace, self.dest, self.parse_iterable(value))
- else:
- setattr(namespace, self.dest, value)
diff --git a/modelscope/utils/ast_index_file.py b/modelscope/utils/ast_index_file.py
new file mode 100644
index 00000000..5aedf1bb
--- /dev/null
+++ b/modelscope/utils/ast_index_file.py
@@ -0,0 +1 @@
+{"index": {"('MODELS', 'protein-structure', 'unifold')": {"filepath": "TEMPLATE_PATH/models/science/unifold/model.py", "imports": ["torch", "os", "typing", "argparse"], "module": "modelscope.models.science.unifold.model"}, "('MODELS', 'acoustic-noise-suppression', 'speech_dfsmn_ans')": {"filepath": "TEMPLATE_PATH/models/audio/ans/denoise_net.py", "imports": ["torch"], "module": "modelscope.models.audio.ans.denoise_net"}, "('MODELS', 'acoustic-noise-suppression', 'speech_frcrn_ans_cirm_16k')": {"filepath": "TEMPLATE_PATH/models/audio/ans/frcrn.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.audio.ans.frcrn"}, "('MODELS', 'speaker-verification', 'ecapa-tdnn-sv')": {"filepath": "TEMPLATE_PATH/models/audio/sv/ecapa_tdnn.py", "imports": ["torch", "torchaudio", "math", "os", "typing"], "module": "modelscope.models.audio.sv.ecapa_tdnn"}, "('MODELS', 'speaker-verification', 'eres2net-sv')": {"filepath": "TEMPLATE_PATH/models/audio/sv/ERes2Net.py", "imports": ["torch", "torchaudio", "math", "os", "typing"], "module": "modelscope.models.audio.sv.ERes2Net"}, "('MODELS', 'speaker-verification', 'cam++-sv')": {"filepath": "TEMPLATE_PATH/models/audio/sv/DTDNN.py", "imports": ["torch", "torchaudio", "collections", "os", "typing"], "module": "modelscope.models.audio.sv.DTDNN"}, "('MODELS', 'speaker-verification', 'generic-sv')": {"filepath": "TEMPLATE_PATH/models/audio/sv/generic_speaker_verification.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.sv.generic_speaker_verification"}, "('MODELS', 'speaker-diarization', 'generic-sv')": {"filepath": "TEMPLATE_PATH/models/audio/sv/generic_speaker_verification.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.sv.generic_speaker_verification"}, "('MODELS', 'speaker-diarization', 'scl-sd')": {"filepath": "TEMPLATE_PATH/models/audio/sv/speaker_change_locator.py", "imports": ["numpy", "torch", "torchaudio", "collections", "os", "typing"], "module": "modelscope.models.audio.sv.speaker_change_locator"}, "('MODELS', 'speaker-verification', 'rdino_ecapa-tdnn-sv')": {"filepath": "TEMPLATE_PATH/models/audio/sv/rdino.py", "imports": ["torch", "torchaudio", "math", "os", "typing"], "module": "modelscope.models.audio.sv.rdino"}, "('MODELS', 'inverse-text-processing', 'generic-itn')": {"filepath": "TEMPLATE_PATH/models/audio/itn/generic_inverse_text_processing.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.itn.generic_inverse_text_processing"}, "('MODELS', 'auto-speech-recognition', 'wenet-asr')": {"filepath": "TEMPLATE_PATH/models/audio/asr/wenet_automatic_speech_recognition.py", "imports": ["json", "os", "wenetruntime", "typing"], "module": "modelscope.models.audio.asr.wenet_automatic_speech_recognition"}, "('MODELS', 'auto-speech-recognition', 'generic-asr')": {"filepath": "TEMPLATE_PATH/models/audio/asr/generic_automatic_speech_recognition.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.asr.generic_automatic_speech_recognition"}, "('MODELS', 'voice-activity-detection', 'generic-asr')": {"filepath": "TEMPLATE_PATH/models/audio/asr/generic_automatic_speech_recognition.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.asr.generic_automatic_speech_recognition"}, "('MODELS', 'language-score-prediction', 'generic-asr')": {"filepath": "TEMPLATE_PATH/models/audio/asr/generic_automatic_speech_recognition.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.asr.generic_automatic_speech_recognition"}, "('MODELS', 'speech-timestamp', 'generic-asr')": {"filepath": "TEMPLATE_PATH/models/audio/asr/generic_automatic_speech_recognition.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.asr.generic_automatic_speech_recognition"}, "('MODELS', 'punctuation', 'generic-punc')": {"filepath": "TEMPLATE_PATH/models/audio/punc/generic_punctuation.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.punc.generic_punctuation"}, "('MODELS', 'text-to-speech', 'sambert-hifigan')": {"filepath": "TEMPLATE_PATH/models/audio/tts/sambert_hifi.py", "imports": ["shutil", "numpy", "json", "__future__", "wave", "matplotlib", "datetime", "yaml", "os", "zipfile"], "module": "modelscope.models.audio.tts.sambert_hifi"}, "('MODELS', 'speech-separation', 'speech_mossformer_separation_temporal_8k')": {"filepath": "TEMPLATE_PATH/models/audio/separation/mossformer.py", "imports": ["torch", "os", "copy", "typing"], "module": "modelscope.models.audio.separation.mossformer"}, "('MODELS', 'keyword-spotting', 'speech_dfsmn_kws_char_farfield')": {"filepath": "TEMPLATE_PATH/models/audio/kws/farfield/model.py", "imports": ["os", "typing", "tempfile"], "module": "modelscope.models.audio.kws.farfield.model"}, "('MODELS', 'keyword-spotting', 'speech_dfsmn_kws_char_farfield_iot')": {"filepath": "TEMPLATE_PATH/models/audio/kws/farfield/model.py", "imports": ["os", "typing", "tempfile"], "module": "modelscope.models.audio.kws.farfield.model"}, "('MODELS', 'keyword-spotting', 'kws-kwsbp')": {"filepath": "TEMPLATE_PATH/models/audio/kws/generic_key_word_spotting.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.kws.generic_key_word_spotting"}, "('MODELS', 'keyword-spotting', 'speech_kws_fsmn_char_ctc_nearfield')": {"filepath": "TEMPLATE_PATH/models/audio/kws/nearfield/model.py", "imports": ["torch", "tempfile", "sys", "os", "typing"], "module": "modelscope.models.audio.kws.nearfield.model"}, "('MODELS', 'image-captioning', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'ocr-recognition', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'visual-grounding', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'visual-question-answering', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'visual-entailment', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'image-classification', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'text-summarization', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'text-classification', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'auto-speech-recognition', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'sudoku', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'text2sql', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'multi-modal-embedding', 'clip-multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/models/multi_modal/clip/model.py", "imports": ["numpy", "json", "torch", "collections", "os", "typing"], "module": "modelscope.models.multi_modal.clip.model"}, "('MODELS', 'visual-question-answering', 'mplug')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mplug_for_all_tasks.py", "imports": ["os", "typing"], "module": "modelscope.models.multi_modal.mplug_for_all_tasks"}, "('MODELS', 'image-captioning', 'mplug')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mplug_for_all_tasks.py", "imports": ["os", "typing"], "module": "modelscope.models.multi_modal.mplug_for_all_tasks"}, "('MODELS', 'image-text-retrieval', 'mplug')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mplug_for_all_tasks.py", "imports": ["os", "typing"], "module": "modelscope.models.multi_modal.mplug_for_all_tasks"}, "('MODELS', 'video-question-answering', 'hitea')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mplug_for_all_tasks.py", "imports": ["os", "typing"], "module": "modelscope.models.multi_modal.mplug_for_all_tasks"}, "('MODELS', 'video-captioning', 'hitea')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mplug_for_all_tasks.py", "imports": ["os", "typing"], "module": "modelscope.models.multi_modal.mplug_for_all_tasks"}, "('MODELS', 'text-to-image-synthesis', 'multi-stage-diffusion-text-to-image-synthesis')": {"filepath": "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/model.py", "imports": ["PIL", "numpy", "json", "torch", "math", "os", "typing"], "module": "modelscope.models.multi_modal.multi_stage_diffusion.model"}, "('MODELS', 'text-to-image-synthesis', 'diffusion-text-to-image-synthesis')": {"filepath": "TEMPLATE_PATH/models/multi_modal/diffusion/model.py", "imports": ["numpy", "json", "torch", "os", "typing"], "module": "modelscope.models.multi_modal.diffusion.model"}, "('MODELS', 'efficient-diffusion-tuning', 'efficient-diffusion-tuning')": {"filepath": "TEMPLATE_PATH/models/multi_modal/efficient_diffusion_tuning/efficient_stable_diffusion.py", "imports": ["transformers", "torch", "functools", "diffusers", "os", "typing"], "module": "modelscope.models.multi_modal.efficient_diffusion_tuning.efficient_stable_diffusion"}, "('MODELS', 'generative-multi-modal-embedding', 'gemm-generative-multi-modal')": {"filepath": "TEMPLATE_PATH/models/multi_modal/gemm/gemm_model.py", "imports": ["PIL", "numpy", "json", "torch", "torchvision", "os", "typing"], "module": "modelscope.models.multi_modal.gemm.gemm_model"}, "('MODELS', 'video-multi-modal-embedding', 'video-clip-multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py", "imports": ["urllib", "PIL", "random", "numpy", "json", "torch", "decord", "tempfile", "os", "typing", "uuid"], "module": "modelscope.models.multi_modal.mmr.models.clip_for_mm_video_embedding"}, "('MODELS', 'multi-modal-similarity', 'team-multi-modal-similarity')": {"filepath": "TEMPLATE_PATH/models/multi_modal/team/team_model.py", "imports": ["PIL", "numpy", "torch", "cv2", "tokenizers", "torchvision", "typing"], "module": "modelscope.models.multi_modal.team.team_model"}, "('MODELS', 'document-vl-embedding', 'vldoc')": {"filepath": "TEMPLATE_PATH/models/multi_modal/vldoc/model.py", "imports": ["json", "torch", "logging", "re", "math", "sys", "copy", "torchvision", "os"], "module": "modelscope.models.multi_modal.vldoc.model"}, "('MODELS', 'video-temporal-grounding', 'soonet')": {"filepath": "TEMPLATE_PATH/models/multi_modal/soonet/model.py", "imports": ["torch", "os"], "module": "modelscope.models.multi_modal.soonet.model"}, "('MODELS', 'text-ranking', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/text_ranking.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.text_ranking"}, "('MODELS', 'backbone', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/backbone.py", "imports": ["dataclasses", "transformers", "random", "torch", "math", "warnings", "os", "typing"], "module": "modelscope.models.multi_modal.mgeo.backbone"}, "('MODELS', 'text-classification', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/text_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.text_classification"}, "('MODELS', 'nli', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/text_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.text_classification"}, "('MODELS', 'sentiment-classification', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/text_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.text_classification"}, "('MODELS', 'sentence-similarity', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/text_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.text_classification"}, "('MODELS', 'zero-shot-classification', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/text_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.text_classification"}, "('MODELS', 'token-classification', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/token_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.token_classification"}, "('MODELS', 'part-of-speech', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/token_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.token_classification"}, "('MODELS', 'word-segmentation', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/token_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.token_classification"}, "('MODELS', 'multimodal-dialogue', 'mplug-owl')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mplug_owl/modeling_mplug_owl.py", "imports": ["dataclasses", "transformers", "random", "torch", "logging", "math", "copy", "io", "os", "typing"], "module": "modelscope.models.multi_modal.mplug_owl.modeling_mplug_owl"}, "('MODELS', 'text-to-image-synthesis', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_text_to_image_synthesis_model.py", "imports": ["PIL", "pkg_resources", "numpy", "json", "torch", "taming", "torchvision", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_text_to_image_synthesis_model"}, "('MODELS', 'text-to-video-synthesis', 'latent-text-to-video-synthesis')": {"filepath": "TEMPLATE_PATH/models/multi_modal/video_synthesis/text_to_video_synthesis_model.py", "imports": ["open_clip", "torch", "einops", "os", "typing"], "module": "modelscope.models.multi_modal.video_synthesis.text_to_video_synthesis_model"}, "('MODELS', 'image-captioning', 'clip-interrogator')": {"filepath": "TEMPLATE_PATH/models/multi_modal/clip_interrogator/model.py", "imports": ["PIL", "hashlib", "numpy", "open_clip", "torch", "dataclasses", "os", "typing", "requests", "transformers", "safetensors", "tqdm", "math", "time", "torchvision"], "module": "modelscope.models.multi_modal.clip_interrogator.model"}, "('MODELS', 'generative-multi-modal-embedding', 'rleg-generative-multi-modal')": {"filepath": "TEMPLATE_PATH/models/multi_modal/rleg/rleg.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.models.multi_modal.rleg.rleg"}, "('MODELS', 'translation-evaluation', 'unite')": {"filepath": "TEMPLATE_PATH/models/nlp/unite/translation_evaluation.py", "imports": ["transformers", "numpy", "torch", "math", "warnings", "packaging", "dataclasses", "typing"], "module": "modelscope.models.nlp.unite.translation_evaluation"}, "('MODELS', 'text-generation', 'palm-v2')": {"filepath": "TEMPLATE_PATH/models/nlp/palm_v2/text_generation.py", "imports": ["dataclasses", "subprocess", "codecs", "transformers", "numpy", "json", "torch", "math", "copy", "os", "typing"], "module": "modelscope.models.nlp.palm_v2.text_generation"}, "('MODELS', 'fill-mask', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/fill_mask.py", "imports": ["torch", "transformers"], "module": "modelscope.models.nlp.structbert.fill_mask"}, "('MODELS', 'backbone', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/backbone.py", "imports": ["transformers", "torch", "math", "packaging", "dataclasses", "typing"], "module": "modelscope.models.nlp.structbert.backbone"}, "('MODELS', 'faq-question-answering', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/faq_question_answering.py", "imports": ["torch", "math", "collections", "os", "typing"], "module": "modelscope.models.nlp.structbert.faq_question_answering"}, "('MODELS', 'text-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.text_classification"}, "('MODELS', 'nli', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.text_classification"}, "('MODELS', 'sentiment-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.text_classification"}, "('MODELS', 'sentence-similarity', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.text_classification"}, "('MODELS', 'zero-shot-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.text_classification"}, "('MODELS', 'token-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/token_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.token_classification"}, "('MODELS', 'word-segmentation', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/token_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.token_classification"}, "('MODELS', 'part-of-speech', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/token_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.token_classification"}, "('MODELS', 'backbone', 'transformers')": {"filepath": "TEMPLATE_PATH/models/nlp/hf_transformers/backbone.py", "imports": ["transformers"], "module": "modelscope.models.nlp.hf_transformers.backbone"}, "('MODELS', 'fill-mask', 'fill-mask')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/fill_mask.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.models.nlp.task_models.fill_mask"}, "('MODELS', 'text-ranking', 'text-ranking')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/text_ranking.py", "imports": ["typing", "numpy"], "module": "modelscope.models.nlp.task_models.text_ranking"}, "('MODELS', 'feature-extraction', 'feature-extraction')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/feature_extraction.py", "imports": ["typing", "numpy"], "module": "modelscope.models.nlp.task_models.feature_extraction"}, "('MODELS', 'text-classification', 'text-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/text_classification.py", "imports": ["typing", "numpy"], "module": "modelscope.models.nlp.task_models.text_classification"}, "('MODELS', 'text-generation', 'text-generation')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/text_generation.py", "imports": ["torch", "typing", "transformers", "numpy"], "module": "modelscope.models.nlp.task_models.text_generation"}, "('MODELS', 'information-extraction', 'information-extraction')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/information_extraction.py", "imports": ["typing", "numpy"], "module": "modelscope.models.nlp.task_models.information_extraction"}, "('MODELS', 'relation-extraction', 'information-extraction')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/information_extraction.py", "imports": ["typing", "numpy"], "module": "modelscope.models.nlp.task_models.information_extraction"}, "('MODELS', 'token-classification', 'token-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'part-of-speech', 'token-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'named-entity-recognition', 'token-classification-for-ner')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'transformer-crf', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'token-classification', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'token-classification', 'transformer-crf-for-word-segmentation')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'named-entity-recognition', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'part-of-speech', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'word-segmentation', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'word-segmentation', 'transformer-crf-for-word-segmentation')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'fill-mask', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/fill_mask.py", "imports": ["transformers"], "module": "modelscope.models.nlp.veco.fill_mask"}, "('MODELS', 'backbone', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/backbone.py", "imports": ["transformers"], "module": "modelscope.models.nlp.veco.backbone"}, "('MODELS', 'nli', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/text_classification.py", "imports": ["transformers"], "module": "modelscope.models.nlp.veco.text_classification"}, "('MODELS', 'sentiment-classification', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/text_classification.py", "imports": ["transformers"], "module": "modelscope.models.nlp.veco.text_classification"}, "('MODELS', 'sentence-similarity', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/text_classification.py", "imports": ["transformers"], "module": "modelscope.models.nlp.veco.text_classification"}, "('MODELS', 'text-classification', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/text_classification.py", "imports": ["transformers"], "module": "modelscope.models.nlp.veco.text_classification"}, "('MODELS', 'token-classification', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/token_classification.py", "imports": ["torch", "transformers"], "module": "modelscope.models.nlp.veco.token_classification"}, "('MODELS', 'text-generation', 'glm130b')": {"filepath": "TEMPLATE_PATH/models/nlp/glm_130b/text_generation.py", "imports": ["random", "stat", "torch", "SwissArmyTransformer", "re", "functools", "sys", "copy", "time", "os", "typing"], "module": "modelscope.models.nlp.glm_130b.text_generation"}, "('MODELS', 'text-summarization', 'mglm')": {"filepath": "TEMPLATE_PATH/models/nlp/mglm/mglm_for_text_summarization.py", "imports": ["random", "numpy", "torch", "megatron_util", "os", "typing"], "module": "modelscope.models.nlp.mglm.mglm_for_text_summarization"}, "('MODELS', 'backbone', 'plug-mental')": {"filepath": "TEMPLATE_PATH/models/nlp/plug_mental/backbone.py", "imports": ["transformers", "torch", "math", "packaging", "dataclasses", "typing"], "module": "modelscope.models.nlp.plug_mental.backbone"}, "('MODELS', 'text-classification', 'plug-mental')": {"filepath": "TEMPLATE_PATH/models/nlp/plug_mental/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.plug_mental.text_classification"}, "('MODELS', 'nli', 'plug-mental')": {"filepath": "TEMPLATE_PATH/models/nlp/plug_mental/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.plug_mental.text_classification"}, "('MODELS', 'sentiment-classification', 'plug-mental')": {"filepath": "TEMPLATE_PATH/models/nlp/plug_mental/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.plug_mental.text_classification"}, "('MODELS', 'sentence-similarity', 'plug-mental')": {"filepath": "TEMPLATE_PATH/models/nlp/plug_mental/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.plug_mental.text_classification"}, "('MODELS', 'zero-shot-classification', 'plug-mental')": {"filepath": "TEMPLATE_PATH/models/nlp/plug_mental/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.plug_mental.text_classification"}, "('MODELS', 'text-generation', 'gpt-moe')": {"filepath": "TEMPLATE_PATH/models/nlp/gpt_moe/text_generation.py", "imports": ["typing", "transformers"], "module": "modelscope.models.nlp.gpt_moe.text_generation"}, "('MODELS', 'translation', 'csanmt-translation')": {"filepath": "TEMPLATE_PATH/models/nlp/csanmt/translation.py", "imports": ["tensorflow", "typing", "math", "collections"], "module": "modelscope.models.nlp.csanmt.translation"}, "('MODELS', 'text2text-generation', 'T5')": {"filepath": "TEMPLATE_PATH/models/nlp/T5/text2text_generation.py", "imports": ["transformers", "torch", "copy", "warnings", "typing"], "module": "modelscope.models.nlp.T5.text2text_generation"}, "('MODELS', 'backbone', 'T5')": {"filepath": "TEMPLATE_PATH/models/nlp/T5/backbone.py", "imports": ["transformers", "torch", "math", "copy", "warnings", "os", "typing"], "module": "modelscope.models.nlp.T5.backbone"}, "('HEADS', 'text-classification', 'text-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/text_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.text_classification_head"}, "('HEADS', 'sentence-similarity', 'text-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/text_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.text_classification_head"}, "('HEADS', 'nli', 'text-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/text_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.text_classification_head"}, "('HEADS', 'sentiment-classification', 'text-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/text_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.text_classification_head"}, "('HEADS', 'information-extraction', 'information-extraction')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/infromation_extraction_head.py", "imports": ["torch"], "module": "modelscope.models.nlp.heads.infromation_extraction_head"}, "('HEADS', 'relation-extraction', 'information-extraction')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/infromation_extraction_head.py", "imports": ["torch"], "module": "modelscope.models.nlp.heads.infromation_extraction_head"}, "('HEADS', 'token-classification', 'token-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/token_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.token_classification_head"}, "('HEADS', 'named-entity-recognition', 'token-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/token_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.token_classification_head"}, "('HEADS', 'part-of-speech', 'token-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/token_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.token_classification_head"}, "('HEADS', 'text-generation', 'text-generation')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/text_generation_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.text_generation_head"}, "('HEADS', 'token-classification', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'named-entity-recognition', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'word-segmentation', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'part-of-speech', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'transformer-crf', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'token-classification', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'named-entity-recognition', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'word-segmentation', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'part-of-speech', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'fill-mask', 'roberta-mlm')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/torch_pretrain_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.torch_pretrain_head"}, "('HEADS', 'fill-mask', 'bert-mlm')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/fill_mask_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.fill_mask_head"}, "('HEADS', 'fill-mask', 'fill-mask')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/fill_mask_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.fill_mask_head"}, "('HEADS', 'fill-mask', 'xlm-roberta-mlm')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/fill_mask_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.fill_mask_head"}, "('HEADS', 'text-ranking', 'text-ranking')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/text_ranking_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.text_ranking_head"}, "('BACKBONES', 'backbone', 'bloom')": {"filepath": "TEMPLATE_PATH/models/nlp/bloom/backbone.py", "imports": ["transformers"], "module": "modelscope.models.nlp.bloom.backbone"}, "('MODELS', 'backbone', 'xlm-roberta')": {"filepath": "TEMPLATE_PATH/models/nlp/xlm_roberta/backbone.py", "imports": ["torch", "transformers", "math", "packaging"], "module": "modelscope.models.nlp.xlm_roberta.backbone"}, "('MODELS', 'text-classification', 'peer')": {"filepath": "TEMPLATE_PATH/models/nlp/peer/text_classification.py", "imports": ["copy", "torch"], "module": "modelscope.models.nlp.peer.text_classification"}, "('MODELS', 'nli', 'peer')": {"filepath": "TEMPLATE_PATH/models/nlp/peer/text_classification.py", "imports": ["copy", "torch"], "module": "modelscope.models.nlp.peer.text_classification"}, "('MODELS', 'sentiment-classification', 'peer')": {"filepath": "TEMPLATE_PATH/models/nlp/peer/text_classification.py", "imports": ["copy", "torch"], "module": "modelscope.models.nlp.peer.text_classification"}, "('MODELS', 'sentence-similarity', 'peer')": {"filepath": "TEMPLATE_PATH/models/nlp/peer/text_classification.py", "imports": ["copy", "torch"], "module": "modelscope.models.nlp.peer.text_classification"}, "('MODELS', 'zero-shot-classification', 'peer')": {"filepath": "TEMPLATE_PATH/models/nlp/peer/text_classification.py", "imports": ["copy", "torch"], "module": "modelscope.models.nlp.peer.text_classification"}, "('MODELS', 'fid-dialogue', 'fid-T5')": {"filepath": "TEMPLATE_PATH/models/nlp/fid_T5/text_generation.py", "imports": ["torch", "os", "io", "transformers"], "module": "modelscope.models.nlp.fid_T5.text_generation"}, "('MODELS', 'table-question-answering', 'space-T-en')": {"filepath": "TEMPLATE_PATH/models/nlp/space_T_en/text_to_sql.py", "imports": ["torch", "os", "typing", "text2sql_lgesql"], "module": "modelscope.models.nlp.space_T_en.text_to_sql"}, "('MODELS', 'competency-aware-translation', 'canmt')": {"filepath": "TEMPLATE_PATH/models/nlp/canmt/canmt_translation.py", "imports": ["numpy", "torch", "math", "os", "typing"], "module": "modelscope.models.nlp.canmt.canmt_translation"}, "('MODELS', 'text-error-correction', 'bart')": {"filepath": "TEMPLATE_PATH/models/nlp/bart/text_error_correction.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.nlp.bart.text_error_correction"}, "('MODELS', 'text-classification', 'user-satisfaction-estimation')": {"filepath": "TEMPLATE_PATH/models/nlp/use/user_satisfaction_estimation.py", "imports": ["transformers", "numpy", "torch", "os", "typing"], "module": "modelscope.models.nlp.use.user_satisfaction_estimation"}, "('BACKBONES', 'backbone', 'gpt-neo')": {"filepath": "TEMPLATE_PATH/models/nlp/gpt_neo/backbone.py", "imports": ["transformers"], "module": "modelscope.models.nlp.gpt_neo.backbone"}, "('MODELS', 'siamese-uie', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/siamese_uie.py", "imports": ["torch", "copy"], "module": "modelscope.models.nlp.bert.siamese_uie"}, "('MODELS', 'fill-mask', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/fill_mask.py", "imports": [], "module": "modelscope.models.nlp.bert.fill_mask"}, "('MODELS', 'word-alignment', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/word_alignment.py", "imports": ["torch"], "module": "modelscope.models.nlp.bert.word_alignment"}, "('MODELS', 'text-ranking', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/text_ranking.py", "imports": [], "module": "modelscope.models.nlp.bert.text_ranking"}, "('MODELS', 'backbone', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/backbone.py", "imports": ["torch", "transformers", "math", "packaging"], "module": "modelscope.models.nlp.bert.backbone"}, "('MODELS', 'text-classification', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/text_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.text_classification"}, "('MODELS', 'nli', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/text_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.text_classification"}, "('MODELS', 'sentiment-classification', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/text_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.text_classification"}, "('MODELS', 'sentence-similarity', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/text_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.text_classification"}, "('MODELS', 'zero-shot-classification', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/text_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.text_classification"}, "('MODELS', 'sentence-embedding', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/sentence_embedding.py", "imports": ["torch"], "module": "modelscope.models.nlp.bert.sentence_embedding"}, "('MODELS', 'document-segmentation', 'bert-for-document-segmentation')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/document_segmentation.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.bert.document_segmentation"}, "('MODELS', 'token-classification', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/token_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.token_classification"}, "('MODELS', 'part-of-speech', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/token_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.token_classification"}, "('MODELS', 'word-segmentation', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/token_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.token_classification"}, "('MODELS', 'document-grounded-dialog-rerank', 'doc2bot')": {"filepath": "TEMPLATE_PATH/models/nlp/dgds/document_grounded_dialog_rerank.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.nlp.dgds.document_grounded_dialog_rerank"}, "('MODELS', 'document-grounded-dialog-generate', 'doc2bot')": {"filepath": "TEMPLATE_PATH/models/nlp/dgds/document_grounded_dialog_generate.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.nlp.dgds.document_grounded_dialog_generate"}, "('MODELS', 'document-grounded-dialog-retrieval', 'doc2bot')": {"filepath": "TEMPLATE_PATH/models/nlp/dgds/document_grounded_dialog_retrieval.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.nlp.dgds.document_grounded_dialog_retrieval"}, "('MODELS', 'text-generation', 'gpt3')": {"filepath": "TEMPLATE_PATH/models/nlp/gpt3/text_generation.py", "imports": ["torch", "typing", "transformers", "collections"], "module": "modelscope.models.nlp.gpt3.text_generation"}, "('MODELS', 'fill-mask', 'deberta_v2')": {"filepath": "TEMPLATE_PATH/models/nlp/deberta_v2/fill_mask.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.deberta_v2.fill_mask"}, "('MODELS', 'backbone', 'deberta_v2')": {"filepath": "TEMPLATE_PATH/models/nlp/deberta_v2/backbone.py", "imports": ["torch", "typing", "transformers", "collections"], "module": "modelscope.models.nlp.deberta_v2.backbone"}, "('MODELS', 'code-translation', 'codegeex')": {"filepath": "TEMPLATE_PATH/models/nlp/codegeex/codegeex_for_code_translation.py", "imports": ["torch", "copy", "typing"], "module": "modelscope.models.nlp.codegeex.codegeex_for_code_translation"}, "('MODELS', 'code-generation', 'codegeex')": {"filepath": "TEMPLATE_PATH/models/nlp/codegeex/codegeex_for_code_generation.py", "imports": ["torch", "copy", "typing"], "module": "modelscope.models.nlp.codegeex.codegeex_for_code_generation"}, "('MODELS', 'task-oriented-conversation', 'space-modeling')": {"filepath": "TEMPLATE_PATH/models/nlp/space/dialog_modeling.py", "imports": ["os", "typing"], "module": "modelscope.models.nlp.space.dialog_modeling"}, "('MODELS', 'task-oriented-conversation', 'space-dst')": {"filepath": "TEMPLATE_PATH/models/nlp/space/dialog_state_tracking.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.space.dialog_state_tracking"}, "('MODELS', 'task-oriented-conversation', 'space-intent')": {"filepath": "TEMPLATE_PATH/models/nlp/space/dialog_intent_prediction.py", "imports": ["os", "typing"], "module": "modelscope.models.nlp.space.dialog_intent_prediction"}, "('MODELS', 'fid-dialogue', 'fid-plug')": {"filepath": "TEMPLATE_PATH/models/nlp/fid_plug/text_generation.py", "imports": ["torch", "os", "io", "transformers"], "module": "modelscope.models.nlp.fid_plug.text_generation"}, "('BACKBONES', 'backbone', 'gpt2')": {"filepath": "TEMPLATE_PATH/models/nlp/gpt2/backbone.py", "imports": ["transformers"], "module": "modelscope.models.nlp.gpt2.backbone"}, "('MODELS', 'fill-mask', 'megatron-bert')": {"filepath": "TEMPLATE_PATH/models/nlp/megatron_bert/fill_mask.py", "imports": ["torch", "transformers"], "module": "modelscope.models.nlp.megatron_bert.fill_mask"}, "('MODELS', 'backbone', 'megatron-bert')": {"filepath": "TEMPLATE_PATH/models/nlp/megatron_bert/backbone.py", "imports": ["torch", "transformers", "math"], "module": "modelscope.models.nlp.megatron_bert.backbone"}, "('MODELS', 'table-question-answering', 'space-T-cn')": {"filepath": "TEMPLATE_PATH/models/nlp/space_T_cn/table_question_answering.py", "imports": ["transformers", "numpy", "torch", "os", "typing"], "module": "modelscope.models.nlp.space_T_cn.table_question_answering"}, "('MODELS', 'fill-mask', 'ponet')": {"filepath": "TEMPLATE_PATH/models/nlp/ponet/fill_mask.py", "imports": ["torch", "transformers"], "module": "modelscope.models.nlp.ponet.fill_mask"}, "('MODELS', 'backbone', 'ponet')": {"filepath": "TEMPLATE_PATH/models/nlp/ponet/backbone.py", "imports": ["distutils", "transformers", "torch", "math", "packaging"], "module": "modelscope.models.nlp.ponet.backbone"}, "('MODELS', 'document-segmentation', 'ponet-for-document-segmentation')": {"filepath": "TEMPLATE_PATH/models/nlp/ponet/document_segmentation.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.ponet.document_segmentation"}, "('MODELS', 'extractive-summarization', 'ponet-for-document-segmentation')": {"filepath": "TEMPLATE_PATH/models/nlp/ponet/document_segmentation.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.ponet.document_segmentation"}, "('MODELS', 'backbone', 'llama')": {"filepath": "TEMPLATE_PATH/models/nlp/llama/backbone.py", "imports": ["torch", "typing", "transformers", "math"], "module": "modelscope.models.nlp.llama.backbone"}, "('MODELS', 'text-generation', 'llama')": {"filepath": "TEMPLATE_PATH/models/nlp/llama/text_generation.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.llama.text_generation"}, "('MODELS', 'backbone', 'lstm')": {"filepath": "TEMPLATE_PATH/models/nlp/lstm/backbone.py", "imports": ["torch"], "module": "modelscope.models.nlp.lstm.backbone"}, "('MODELS', 'token-classification', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/lstm/token_classification.py", "imports": [], "module": "modelscope.models.nlp.lstm.token_classification"}, "('MODELS', 'named-entity-recognition', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/lstm/token_classification.py", "imports": [], "module": "modelscope.models.nlp.lstm.token_classification"}, "('MODELS', 'part-of-speech', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/lstm/token_classification.py", "imports": [], "module": "modelscope.models.nlp.lstm.token_classification"}, "('MODELS', 'word-segmentation', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/lstm/token_classification.py", "imports": [], "module": "modelscope.models.nlp.lstm.token_classification"}, "('MODELS', 'word-segmentation', 'lstm-crf-for-word-segmentation')": {"filepath": "TEMPLATE_PATH/models/nlp/lstm/token_classification.py", "imports": [], "module": "modelscope.models.nlp.lstm.token_classification"}, "('MODELS', 'image-deblurring', 'nafnet')": {"filepath": "TEMPLATE_PATH/models/cv/image_deblur/nafnet_for_image_deblur.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_deblur.nafnet_for_image_deblur"}, "('MODELS', 'image-segmentation', 'vision-middleware')": {"filepath": "TEMPLATE_PATH/models/cv/vision_middleware/model.py", "imports": ["json", "torch", "typing", "os"], "module": "modelscope.models.cv.vision_middleware.model"}, "('MODELS', 'image-quality-assessment-mos', 'image-quality-assessment-man')": {"filepath": "TEMPLATE_PATH/models/cv/image_quality_assessment_man/image_quality_assessment_man.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_quality_assessment_man.image_quality_assessment_man"}, "('MODELS', 'product-retrieval-embedding', 'product-retrieval-embedding')": {"filepath": "TEMPLATE_PATH/models/cv/product_retrieval_embedding/item_model.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.models.cv.product_retrieval_embedding.item_model"}, "('MODELS', 'body-2d-keypoints', 'body-2d-keypoints')": {"filepath": "TEMPLATE_PATH/models/cv/body_2d_keypoints/hrnet_v2.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.body_2d_keypoints.hrnet_v2"}, "('MODELS', 'indoor-layout-estimation', 'panovit-layout-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/indoor_layout_estimation/panovit.py", "imports": ["torch", "os", "yacs", "numpy"], "module": "modelscope.models.cv.indoor_layout_estimation.panovit"}, "('MODELS', 'semantic-segmentation', 'detection')": {"filepath": "TEMPLATE_PATH/models/cv/salient_detection/salient_model.py", "imports": ["PIL", "torch", "cv2", "torchvision", "os"], "module": "modelscope.models.cv.salient_detection.salient_model"}, "('MODELS', 'image-quality-assessment-degradation', 'image-quality-assessment-degradation')": {"filepath": "TEMPLATE_PATH/models/cv/image_quality_assessment_degradation/image_quality_assessment_degradation.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_quality_assessment_degradation.image_quality_assessment_degradation"}, "('MODELS', 'image-portrait-enhancement', 'gpen')": {"filepath": "TEMPLATE_PATH/models/cv/image_portrait_enhancement/image_portrait_enhancement.py", "imports": ["torch", "os", "typing", "math"], "module": "modelscope.models.cv.image_portrait_enhancement.image_portrait_enhancement"}, "('HEADS', 'default', 'MaskScoringNRoIHead')": {"filepath": "TEMPLATE_PATH/models/cv/abnormal_object_detection/mmdet_ms/roi_head/mask_scoring_roi_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.abnormal_object_detection.mmdet_ms.roi_head.mask_scoring_roi_head"}, "('ROI_EXTRACTORS', 'default', 'SingleRoINExtractor')": {"filepath": "TEMPLATE_PATH/models/cv/abnormal_object_detection/mmdet_ms/roi_head/roi_extractors/single_level_roi_extractor.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.abnormal_object_detection.mmdet_ms.roi_head.roi_extractors.single_level_roi_extractor"}, "('MODELS', 'image-object-detection', 'MaskScoring')": {"filepath": "TEMPLATE_PATH/models/cv/abnormal_object_detection/mmdet_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.abnormal_object_detection.mmdet_model"}, "('MODELS', 'image-classification', 'image-probing-model')": {"filepath": "TEMPLATE_PATH/models/cv/image_probing_model/model.py", "imports": ["json", "torch", "typing", "os"], "module": "modelscope.models.cv.image_probing_model.model"}, "('MODELS', 'video-human-matting', 'video-human-matting')": {"filepath": "TEMPLATE_PATH/models/cv/video_human_matting/model.py", "imports": ["numpy", "torch", "torchvision", "os", "typing"], "module": "modelscope.models.cv.video_human_matting.model"}, "('MODELS', 'language-guided-video-summarization', 'clip-it-language-guided-video-summarization')": {"filepath": "TEMPLATE_PATH/models/cv/language_guided_video_summarization/summarizer.py", "imports": ["numpy", "videofeatures_clipit", "torch", "bmt_clipit", "os", "typing", "argparse"], "module": "modelscope.models.cv.language_guided_video_summarization.summarizer"}, "('MODELS', 'face-2d-keypoints', 'flc')": {"filepath": "TEMPLATE_PATH/models/cv/facial_landmark_confidence/flc/facial_landmark_confidence.py", "imports": ["PIL", "numpy", "torch", "cv2", "os"], "module": "modelscope.models.cv.facial_landmark_confidence.flc.facial_landmark_confidence"}, "('MODELS', 'image-body-reshaping', 'image-body-reshaping')": {"filepath": "TEMPLATE_PATH/models/cv/image_body_reshaping/image_body_reshaping.py", "imports": ["numpy", "cv2", "torch", "os", "typing"], "module": "modelscope.models.cv.image_body_reshaping.image_body_reshaping"}, "('MODELS', 'image-segmentation', 'm2fp')": {"filepath": "TEMPLATE_PATH/models/cv/image_human_parsing/m2fp_net.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_human_parsing.m2fp_net"}, "('PREPROCESSORS', 'cv', 'image-sky-change-preprocessor')": {"filepath": "TEMPLATE_PATH/models/cv/image_skychange/preprocessor.py", "imports": ["numbers", "pdb", "numpy", "cv2", "json", "torch", "torchvision", "typing"], "module": "modelscope.models.cv.image_skychange.preprocessor"}, "('MODELS', 'image-skychange', 'image-skychange')": {"filepath": "TEMPLATE_PATH/models/cv/image_skychange/skychange_model.py", "imports": ["pdb", "cv2", "torch", "json", "math", "collections", "time", "os", "typing"], "module": "modelscope.models.cv.image_skychange.skychange_model"}, "('MODELS', 'video-object-segmentation', 'video-object-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/video_object_segmentation/model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.video_object_segmentation.model"}, "('MODELS', 'face-reconstruction', 'face_reconstruction')": {"filepath": "TEMPLATE_PATH/models/cv/face_reconstruction/models/facerecon_model.py", "imports": ["numpy", "cv2", "torch", "collections", "os"], "module": "modelscope.models.cv.face_reconstruction.models.facerecon_model"}, "('MODELS', 'facial-expression-recognition', 'fer')": {"filepath": "TEMPLATE_PATH/models/cv/facial_expression_recognition/fer/facial_expression_recognition.py", "imports": ["PIL", "numpy", "torch", "cv2", "os"], "module": "modelscope.models.cv.facial_expression_recognition.fer.facial_expression_recognition"}, "('MODELS', 'face-recognition', 'rts-backbone')": {"filepath": "TEMPLATE_PATH/models/cv/face_recognition/torchkit/rts_backbone.py", "imports": ["torch", "os", "math", "collections"], "module": "modelscope.models.cv.face_recognition.torchkit.rts_backbone"}, "('MODELS', 'shop-segmentation', 'shop-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/shop_segmentation/shop_seg_model.py", "imports": ["PIL", "numpy", "torch", "json", "os", "typing"], "module": "modelscope.models.cv.shop_segmentation.shop_seg_model"}, "('MODELS', 'image-segmentation', 'fastinst')": {"filepath": "TEMPLATE_PATH/models/cv/image_instance_segmentation/fastinst_model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_instance_segmentation.fastinst_model"}, "('MODELS', 'image-segmentation', 'cascade_mask_rcnn_swin')": {"filepath": "TEMPLATE_PATH/models/cv/image_instance_segmentation/model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_instance_segmentation.model"}, "('MODELS', 'image-segmentation', 'maskdino_swin')": {"filepath": "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino_model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_instance_segmentation.maskdino_model"}, "('MODELS', 'video-text-retrieval', 'vop-retrieval-model')": {"filepath": "TEMPLATE_PATH/models/cv/vop_retrieval/model.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.vop_retrieval.model"}, "('MODELS', 'video-text-retrieval', 'vop-retrieval-model-se')": {"filepath": "TEMPLATE_PATH/models/cv/vop_retrieval/model_se.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.vop_retrieval.model_se"}, "('HEADS', 'default', 'KernelUpdateHeadVideo')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/track/kernel_update_head.py", "imports": ["torch", "mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.video_instance_segmentation.track.kernel_update_head"}, "('MATCH_COST', 'default', 'MaskCost')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/track/mask_hungarian_assigner.py", "imports": ["torch", "scipy", "mmdet", "numpy"], "module": "modelscope.models.cv.video_instance_segmentation.track.mask_hungarian_assigner"}, "('BBOX_ASSIGNERS', 'default', 'MaskHungarianAssignerVideo')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/track/mask_hungarian_assigner.py", "imports": ["torch", "scipy", "mmdet", "numpy"], "module": "modelscope.models.cv.video_instance_segmentation.track.mask_hungarian_assigner"}, "('MODELS', 'video-instance-segmentation', 'swinb-video-instance-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/video_knet.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.video_instance_segmentation.video_knet"}, "('TRANSFORMER_LAYER', 'default', 'KernelUpdator')": {"filepath": "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_updator.py", "imports": ["torch", "mmcv"], "module": "modelscope.models.cv.video_panoptic_segmentation.head.kernel_updator"}, "('HEADS', 'default', 'KernelUpdateHead')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_update_head.py", "imports": ["torch", "mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.video_instance_segmentation.head.kernel_update_head"}, "('HEADS', 'default', 'KernelFrameIterHeadVideo')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_frame_iter_head.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.video_instance_segmentation.head.kernel_frame_iter_head"}, "('HEADS', 'default', 'ConvKernelHeadVideo')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_head.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.video_instance_segmentation.head.kernel_head"}, "('HEADS', 'default', 'KernelIterHeadVideo')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_iter_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.video_instance_segmentation.head.kernel_iter_head"}, "('NECKS', 'default', 'MSDeformAttnPixelDecoder')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/neck/msdeformattn_decoder.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.video_instance_segmentation.neck.msdeformattn_decoder"}, "('MODELS', 'image-super-resolution', 'ecbsr')": {"filepath": "TEMPLATE_PATH/models/cv/super_resolution/ecbsr_model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.super_resolution.ecbsr_model"}, "('PREPROCESSORS', 'cv', 'ocr-detection')": {"filepath": "TEMPLATE_PATH/models/cv/ocr_detection/preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "module": "modelscope.models.cv.ocr_detection.preprocessor"}, "('MODELS', 'ocr-detection', 'OCRDetection')": {"filepath": "TEMPLATE_PATH/models/cv/ocr_detection/model.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.models.cv.ocr_detection.model"}, "('MODELS', 'panorama-depth-estimation', 'unifuse-depth-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/panorama_depth_estimation/unifuse_model.py", "imports": ["torch", "os", "torchvision", "numpy"], "module": "modelscope.models.cv.panorama_depth_estimation.unifuse_model"}, "('MODELS', 'video-object-detection', 'realtime-video-object-detection')": {"filepath": "TEMPLATE_PATH/models/cv/stream_yolo/realtime_video_detector.py", "imports": ["numpy", "cv2", "torch", "logging", "json", "tqdm", "time", "os", "argparse"], "module": "modelscope.models.cv.stream_yolo.realtime_video_detector"}, "('MODELS', 'bad-image-detecting', 'bad-image-detecting')": {"filepath": "TEMPLATE_PATH/models/cv/bad_image_detecting/bad_image_detecting.py", "imports": ["numpy", "torch", "torchvision", "os", "typing"], "module": "modelscope.models.cv.bad_image_detecting.bad_image_detecting"}, "('MODELS', 'human-reconstruction', 'human-reconstruction')": {"filepath": "TEMPLATE_PATH/models/cv/human_reconstruction/Reconstruction.py", "imports": ["PIL", "skimage", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.models.cv.human_reconstruction.Reconstruction"}, "('PREPROCESSORS', 'cv', 'image-driving-perception-preprocessor')": {"filepath": "TEMPLATE_PATH/models/cv/image_driving_perception/preprocessor.py", "imports": ["cv2", "torch", "typing", "numpy"], "module": "modelscope.models.cv.image_driving_perception.preprocessor"}, "('MODELS', 'image-driving-perception', 'yolopv2')": {"filepath": "TEMPLATE_PATH/models/cv/image_driving_perception/image_driving_percetion_model.py", "imports": ["numpy", "cv2", "torch", "os", "typing"], "module": "modelscope.models.cv.image_driving_perception.image_driving_percetion_model"}, "('MODELS', 'video-object-detection', 'longshortnet')": {"filepath": "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/longshortnet.py", "imports": ["numpy", "cv2", "torch", "logging", "json", "tqdm", "time", "os", "argparse"], "module": "modelscope.models.cv.video_streaming_perception.longshortnet.longshortnet"}, "('MODELS', 'image-paintbyexample', 'Stablediffusion-Paintbyexample')": {"filepath": "TEMPLATE_PATH/models/cv/image_paintbyexample/model.py", "imports": ["torch", "paint_ldm", "omegaconf", "os", "typing"], "module": "modelscope.models.cv.image_paintbyexample.model"}, "('MODELS', 'image-inpainting', 'FFTInpainting')": {"filepath": "TEMPLATE_PATH/models/cv/image_inpainting/model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_inpainting.model"}, "('BBOX_CODERS', 'default', 'NMSFreeCoder')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.coders.nms_free_coder"}, "('MATCH_COST', 'default', 'BBox3DL1Cost')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/match_costs/match_cost.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.match_costs.match_cost"}, "('BBOX_ASSIGNERS', 'default', 'HungarianAssigner3D')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py", "imports": ["torch", "scipy", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.assigners.hungarian_assigner_3d"}, "('DATASETS', 'default', 'CustomNuScenesDataset')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/nuscenes_dataset.py", "imports": ["mmdet3d", "mmdet", "numpy"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.nuscenes_dataset"}, "('PIPELINES', 'default', 'LoadMultiViewImageFromMultiSweepsFiles')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/loading.py", "imports": ["mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.pipelines.loading"}, "('PIPELINES', 'default', 'PadMultiViewImage')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/transform_3d.py", "imports": ["PIL", "copy", "mmcv", "numpy", "torch", "mmdet3d", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.pipelines.transform_3d"}, "('PIPELINES', 'default', 'NormalizeMultiviewImage')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/transform_3d.py", "imports": ["PIL", "copy", "mmcv", "numpy", "torch", "mmdet3d", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.pipelines.transform_3d"}, "('PIPELINES', 'default', 'ResizeCropFlipImage')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/transform_3d.py", "imports": ["PIL", "copy", "mmcv", "numpy", "torch", "mmdet3d", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.pipelines.transform_3d"}, "('HEADS', 'default', 'PETRv2DEDNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/petrv2_dednhead.py", "imports": ["mmcv", "numpy", "torch", "math", "copy", "mmdet3d", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.dense_heads.petrv2_dednhead"}, "('NECKS', 'default', 'CPFPN')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/necks/cp_fpn.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.necks.cp_fpn"}, "('TRANSFORMER', 'default', 'PETRDNTransformer')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py", "imports": ["copy", "warnings", "mmcv", "torch", "mmdet", "typing", "math"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.petr_transformer"}, "('TRANSFORMER_LAYER', 'default', 'PETRTransformerDecoderLayer')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py", "imports": ["copy", "warnings", "mmcv", "torch", "mmdet", "typing", "math"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.petr_transformer"}, "('ATTENTION', 'default', 'PETRMultiheadAttention')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py", "imports": ["copy", "warnings", "mmcv", "torch", "mmdet", "typing", "math"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.petr_transformer"}, "('TRANSFORMER_LAYER_SEQUENCE', 'default', 'PETRTransformerEncoder')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py", "imports": ["copy", "warnings", "mmcv", "torch", "mmdet", "typing", "math"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.petr_transformer"}, "('TRANSFORMER_LAYER_SEQUENCE', 'default', 'PETRTransformerDecoder')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py", "imports": ["copy", "warnings", "mmcv", "torch", "mmdet", "typing", "math"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.petr_transformer"}, "('POSITIONAL_ENCODING', 'default', 'SinePositionalEncoding3D')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/positional_encoding.py", "imports": ["torch", "mmcv", "math"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.positional_encoding"}, "('BACKBONES', 'default', 'VoVNet')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/backbones/vovnet.py", "imports": ["torch", "mmdet", "mmcv", "collections"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.backbones.vovnet"}, "('DETECTORS', 'default', 'Petr3D')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/detectors/petr3d.py", "imports": ["mmcv", "numpy", "torch", "mmdet3d", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.detectors.petr3d"}, "('MODELS', 'object-detection-3d', 'depe')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/depe_detect.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.models.cv.object_detection_3d.depe.depe_detect"}, "('MODELS', 'image-quality-assessment-mos', 'image-quality-assessment-mos')": {"filepath": "TEMPLATE_PATH/models/cv/image_quality_assessment_mos/image_quality_assessment_mos.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_quality_assessment_mos.image_quality_assessment_mos"}, "('MODELS', 'image-debanding', 'rrdb')": {"filepath": "TEMPLATE_PATH/models/cv/image_debanding/rrdb/rrdb_image_debanding.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_debanding.rrdb.rrdb_image_debanding"}, "('MODELS', 'image-demoireing', 'image-restoration')": {"filepath": "TEMPLATE_PATH/models/cv/image_restoration/image_restoration_model.py", "imports": ["cv2", "torch", "os", "numpy"], "module": "modelscope.models.cv.image_restoration.image_restoration_model"}, "('MODELS', 'vision-efficient-tuning', 'vision-efficient-tuning')": {"filepath": "TEMPLATE_PATH/models/cv/vision_efficient_tuning/model.py", "imports": ["torch", "typing"], "module": "modelscope.models.cv.vision_efficient_tuning.model"}, "('MODELS', 'movie-scene-segmentation', 'resnet50-bert')": {"filepath": "TEMPLATE_PATH/models/cv/movie_scene_segmentation/model.py", "imports": ["PIL", "numpy", "torch", "einops", "tqdm", "math", "shotdetect_scenedetect_lgss", "torchvision", "os", "typing"], "module": "modelscope.models.cv.movie_scene_segmentation.model"}, "('MODELS', 'video-summarization', 'pgl-video-summarization')": {"filepath": "TEMPLATE_PATH/models/cv/video_summarization/summarizer.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.models.cv.video_summarization.summarizer"}, "('MODELS', 'lineless-table-recognition', 'LoreModel')": {"filepath": "TEMPLATE_PATH/models/cv/table_recognition/model_lore.py", "imports": ["numpy", "torch", "math", "copy", "os", "typing"], "module": "modelscope.models.cv.table_recognition.model_lore"}, "('MODELS', 'image-matching', 'quadtree-attention-image-matching')": {"filepath": "TEMPLATE_PATH/models/cv/image_matching/quadtree_attention_model.py", "imports": ["numpy", "cv2", "torch", "pathlib", "os"], "module": "modelscope.models.cv.image_matching.quadtree_attention_model"}, "('MODELS', 'image-object-detection', 'tinynas-detection')": {"filepath": "TEMPLATE_PATH/models/cv/tinynas_detection/tinynas_detector.py", "imports": [], "module": "modelscope.models.cv.tinynas_detection.tinynas_detector"}, "('MODELS', 'domain-specific-object-detection', 'tinynas-damoyolo')": {"filepath": "TEMPLATE_PATH/models/cv/tinynas_detection/tinynas_damoyolo.py", "imports": [], "module": "modelscope.models.cv.tinynas_detection.tinynas_damoyolo"}, "('MODELS', 'image-object-detection', 'tinynas-damoyolo')": {"filepath": "TEMPLATE_PATH/models/cv/tinynas_detection/tinynas_damoyolo.py", "imports": [], "module": "modelscope.models.cv.tinynas_detection.tinynas_damoyolo"}, "('MODELS', 'nerf-recon-acc', 'nerf-recon-acc')": {"filepath": "TEMPLATE_PATH/models/cv/nerf_recon_acc/nerf_recon_acc.py", "imports": ["numpy", "cv2", "torch", "tqdm", "time", "os", "glob"], "module": "modelscope.models.cv.nerf_recon_acc.nerf_recon_acc"}, "('PREPROCESSORS', 'cv', 'nerf-recon-acc-preprocessor')": {"filepath": "TEMPLATE_PATH/models/cv/nerf_recon_acc/nerf_preprocess.py", "imports": ["subprocess", "tensorflow", "numpy", "cv2", "glob", "os", "typing"], "module": "modelscope.models.cv.nerf_recon_acc.nerf_preprocess"}, "('MODELS', 'video-deinterlace', 'video-deinterlace')": {"filepath": "TEMPLATE_PATH/models/cv/video_deinterlace/UNet_for_video_deinterlace.py", "imports": ["torch", "os", "copy", "typing"], "module": "modelscope.models.cv.video_deinterlace.UNet_for_video_deinterlace"}, "('MODELS', 'image-depth-estimation', 'bts-depth-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/image_depth_estimation_bts/depth_estimation_bts_model.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.image_depth_estimation_bts.depth_estimation_bts_model"}, "('MODELS', 'image-fewshot-detection', 'defrcn')": {"filepath": "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/defrcn_for_fewshot.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_defrcn_fewshot.defrcn_for_fewshot"}, "('PREPROCESSORS', 'cv', 'ocr-recognition')": {"filepath": "TEMPLATE_PATH/models/cv/ocr_recognition/preprocessor.py", "imports": ["PIL", "numpy", "torch", "cv2", "os"], "module": "modelscope.models.cv.ocr_recognition.preprocessor"}, "('MODELS', 'ocr-recognition', 'OCRRecognition')": {"filepath": "TEMPLATE_PATH/models/cv/ocr_recognition/model.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.ocr_recognition.model"}, "('TRACKERS', 'default', 'QuasiDenseEmbedTracker')": {"filepath": "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/track/quasi_dense_embed_tracker.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.video_panoptic_segmentation.track.quasi_dense_embed_tracker"}, "('HEADS', 'default', 'VideoKernelUpdateHead')": {"filepath": "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_update_head.py", "imports": ["torch", "mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.video_panoptic_segmentation.head.kernel_update_head"}, "('NECKS', 'default', 'SemanticFPNWrapper')": {"filepath": "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/semantic_fpn_wrapper.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.video_panoptic_segmentation.head.semantic_fpn_wrapper"}, "('HEADS', 'default', 'VideoKernelIterHead')": {"filepath": "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_iter_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.video_panoptic_segmentation.head.kernel_iter_head"}, "('MODELS', 'video-panoptic-segmentation', 'swinb-video-panoptic-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/video_k_net.py", "imports": ["torch", "mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.video_panoptic_segmentation.video_k_net"}, "('MODELS', 'open-vocabulary-detection', 'open-vocabulary-detection-vild')": {"filepath": "TEMPLATE_PATH/models/cv/open_vocabulary_detection_vild/vild.py", "imports": ["tensorflow", "numpy", "torch", "scipy", "clip", "os", "typing"], "module": "modelscope.models.cv.open_vocabulary_detection_vild.vild"}, "('MODELS', 'image-reid-person', 'passvitb')": {"filepath": "TEMPLATE_PATH/models/cv/image_reid_person/pass_model.py", "imports": ["torch", "os", "enum"], "module": "modelscope.models.cv.image_reid_person.pass_model"}, "('MODELS', 'image-face-fusion', 'image-face-fusion')": {"filepath": "TEMPLATE_PATH/models/cv/image_face_fusion/image_face_fusion.py", "imports": ["PIL", "numpy", "torch", "cv2", "collections", "torchvision", "os", "typing"], "module": "modelscope.models.cv.image_face_fusion.image_face_fusion"}, "('MODELS', 'product-segmentation', 'product-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/product_segmentation/seg_infer.py", "imports": ["PIL", "torch", "cv2", "numpy"], "module": "modelscope.models.cv.product_segmentation.seg_infer"}, "('MODELS', 'controllable-image-generation', 'controllable-image-generation')": {"filepath": "TEMPLATE_PATH/models/cv/controllable_image_generation/controlnet.py", "imports": ["PIL", "random", "numpy", "cv2", "torch", "einops", "tempfile", "sys", "math", "control_ldm", "os", "typing"], "module": "modelscope.models.cv.controllable_image_generation.controlnet"}, "('MODELS', 'video-inpainting', 'video-inpainting')": {"filepath": "TEMPLATE_PATH/models/cv/video_inpainting/inpainting_model.py", "imports": ["torch", "torchvision", "math", "numpy"], "module": "modelscope.models.cv.video_inpainting.inpainting_model"}, "('MODELS', 'image-multi-view-depth-estimation', 'image-casmvs-depth-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/casmvs_model.py", "imports": ["numpy", "cv2", "torch", "os", "easydict"], "module": "modelscope.models.cv.image_mvs_depth_estimation.casmvs_model"}, "('MODELS', 'image-classification', 'bnext')": {"filepath": "TEMPLATE_PATH/models/cv/image_binary_quant_classification/binary_quant_model.py", "imports": ["torch", "os", "collections"], "module": "modelscope.models.cv.image_binary_quant_classification.binary_quant_model"}, "('MODELS', 'body-3d-keypoints', 'hdformer')": {"filepath": "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/hdformer_detector.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.models.cv.body_3d_keypoints.hdformer.hdformer_detector"}, "('MODELS', 'body-3d-keypoints', 'body-3d-keypoints')": {"filepath": "TEMPLATE_PATH/models/cv/body_3d_keypoints/cannonical_pose/body_3d_pose.py", "imports": ["numpy", "torch", "logging", "os", "typing"], "module": "modelscope.models.cv.body_3d_keypoints.cannonical_pose.body_3d_pose"}, "('MODELS', 'video-frame-interpolation', 'video-frame-interpolation')": {"filepath": "TEMPLATE_PATH/models/cv/video_frame_interpolation/VFINet_for_video_frame_interpolation.py", "imports": ["torch", "os", "copy", "typing"], "module": "modelscope.models.cv.video_frame_interpolation.VFINet_for_video_frame_interpolation"}, "('HEADS', 'default', 'RPNNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/dense_heads/rpn_head.py", "imports": ["torch", "copy", "mmcv", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.dense_heads.rpn_head"}, "('HEADS', 'default', 'AnchorNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/dense_heads/anchor_head.py", "imports": ["mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.dense_heads.anchor_head"}, "('NECKS', 'default', 'FPNF')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/necks/fpn.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.necks.fpn"}, "('BACKBONES', 'default', 'ViT')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/backbones/vit.py", "imports": ["timm", "torch", "functools", "math", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.backbones.vit"}, "('HEADS', 'default', 'ConvFCBBoxNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.roi_heads.bbox_heads.convfc_bbox_head"}, "('HEADS', 'default', 'Shared2FCBBoxNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.roi_heads.bbox_heads.convfc_bbox_head"}, "('HEADS', 'default', 'Shared4Conv1FCBBoxNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.roi_heads.bbox_heads.convfc_bbox_head"}, "('HEADS', 'default', 'FCNMaskNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/fcn_mask_head.py", "imports": ["mmcv", "numpy", "torch", "warnings", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.roi_heads.mask_heads.fcn_mask_head"}, "('MODELS', 'human-detection', 'detection')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.object_detection.mmdet_model"}, "('MODELS', 'image-object-detection', 'detection')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.object_detection.mmdet_model"}, "('MODELS', 'pedestrian-attribute-recognition', 'pedestrian-attribute-recognition')": {"filepath": "TEMPLATE_PATH/models/cv/pedestrian_attribute_recognition/model.py", "imports": ["torch", "os", "torchvision", "numpy"], "module": "modelscope.models.cv.pedestrian_attribute_recognition.model"}, "('MODELS', 'pointcloud-sceneflow-estimation', 'rcp-sceneflow-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/pointcloud_sceneflow_estimation/rcp_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.pointcloud_sceneflow_estimation.rcp_model"}, "('MODELS', 'video-stabilization', 'video-stabilization')": {"filepath": "TEMPLATE_PATH/models/cv/video_stabilization/DUTRAFTStabilizer.py", "imports": ["numpy", "cv2", "torch", "tempfile", "sys", "math", "os", "typing"], "module": "modelscope.models.cv.video_stabilization.DUTRAFTStabilizer"}, "('MODELS', 'video-depth-estimation', 'dro-resnet18-depth-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/video_depth_estimation/dro_model.py", "imports": ["numpy", "cv2", "torch", "tqdm", "os", "glob"], "module": "modelscope.models.cv.video_depth_estimation.dro_model"}, "('MODELS', 'image-object-detection', 'vidt')": {"filepath": "TEMPLATE_PATH/models/cv/vidt/model.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.vidt.model"}, "('MODELS', 'face-human-hand-detection', 'face-human-hand-detection')": {"filepath": "TEMPLATE_PATH/models/cv/face_human_hand_detection/det_infer.py", "imports": ["cv2", "torch", "numpy"], "module": "modelscope.models.cv.face_human_hand_detection.det_infer"}, "('MODELS', 'referring-video-object-segmentation', 'swinT-referring-video-object-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.referring_video_object_segmentation.model"}, "('MODELS', 'hand-static', 'hand-static')": {"filepath": "TEMPLATE_PATH/models/cv/hand_static/hand_model.py", "imports": ["PIL", "numpy", "torch", "cv2", "sys", "torchvision", "os"], "module": "modelscope.models.cv.hand_static.hand_model"}, "('MODELS', 'image-depth-estimation', 'newcrfs-depth-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/image_depth_estimation/newcrfs_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.image_depth_estimation.newcrfs_model"}, "('MODELS', 'image-colorization', 'ddcolor')": {"filepath": "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/ddcolor_for_image_colorization.py", "imports": ["numpy", "torch", "copy", "os", "typing"], "module": "modelscope.models.cv.image_colorization.ddcolor.ddcolor_for_image_colorization"}, "('MODELS', 'face-detection', 'retinaface')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/retinaface/detection.py", "imports": ["cv2", "torch", "numpy"], "module": "modelscope.models.cv.face_detection.retinaface.detection"}, "('MODELS', 'face-detection', 'mtcnn')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/mtcnn/models/detector.py", "imports": ["PIL", "torch", "os", "numpy"], "module": "modelscope.models.cv.face_detection.mtcnn.models.detector"}, "('MODELS', 'face-detection', 'ulfd')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/detection.py", "imports": ["cv2", "torch", "os", "numpy"], "module": "modelscope.models.cv.face_detection.ulfd_slim.detection"}, "('MODELS', 'face-detection', 'scrfd')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/scrfd_detect.py", "imports": ["numpy", "torch", "copy", "os", "typing"], "module": "modelscope.models.cv.face_detection.scrfd.scrfd_detect"}, "('MODELS', 'card-detection', 'scrfd')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/scrfd_detect.py", "imports": ["numpy", "torch", "copy", "os", "typing"], "module": "modelscope.models.cv.face_detection.scrfd.scrfd_detect"}, "('DATASETS', 'default', 'RetinaFaceDataset')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/retinaface.py", "imports": ["mmdet", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.retinaface"}, "('PIPELINES', 'default', 'RotateV2')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/auto_augment.py", "imports": ["copy", "mmcv", "numpy", "cv2", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.auto_augment"}, "('PIPELINES', 'default', 'ResizeV2')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py", "imports": ["mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.transforms"}, "('PIPELINES', 'default', 'RandomFlipV2')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py", "imports": ["mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.transforms"}, "('PIPELINES', 'default', 'RandomSquareCrop')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py", "imports": ["mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.transforms"}, "('PIPELINES', 'default', 'LoadAnnotationsV2')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/loading.py", "imports": ["os", "mmdet", "numpy", "pycocotools"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.loading"}, "('PIPELINES', 'default', 'DefaultFormatBundleV2')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/formating.py", "imports": ["torch", "mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.formating"}, "('HEADS', 'default', 'SCRFDHead')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/scrfd_head.py", "imports": ["torch", "mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.dense_heads.scrfd_head"}, "('BACKBONES', 'default', 'MasterNet')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/master_net.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones.master_net"}, "('BACKBONES', 'default', 'MobileNetV1')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/mobilenet.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones.mobilenet"}, "('BACKBONES', 'default', 'ResNetV1e')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/resnet.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones.resnet"}, "('DETECTORS', 'default', 'SCRFD')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/scrfd.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.scrfd"}, "('DETECTORS', 'default', 'CustomSingleStageDetector')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/single_stage.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.single_stage"}, "('DETECTORS', 'default', 'TinyMog')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/tinymog.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.tinymog"}, "('MODELS', 'face-detection', 'tinymog')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/tinymog_detect.py", "imports": ["torch", "os", "copy", "typing"], "module": "modelscope.models.cv.face_detection.scrfd.tinymog_detect"}, "('PREPROCESSORS', 'cv', 'object-detection-scrfd')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/preprocessor.py", "imports": ["PIL", "typing", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.preprocessor"}, "('MODELS', 'face-detection', 'damofd')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/damofd_detect.py", "imports": ["torch", "os", "copy", "typing"], "module": "modelscope.models.cv.face_detection.scrfd.damofd_detect"}, "('MODELS', 'face-detection', 'mogface')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/mogface/models/detectors.py", "imports": ["cv2", "torch", "os", "numpy"], "module": "modelscope.models.cv.face_detection.mogface.models.detectors"}, "('MODELS', 'image-classification', 'EasyRobustModel')": {"filepath": "TEMPLATE_PATH/models/cv/robust_image_classification/easyrobust_model.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.robust_image_classification.easyrobust_model"}, "('MODELS', 'semantic-segmentation', 'ddpm')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/ddpm_segmentation_model.py", "imports": ["torch", "os", "typing", "ddpm_guided_diffusion"], "module": "modelscope.models.cv.image_semantic_segmentation.ddpm_segmentation_model"}, "('PIPELINES', 'default', 'ResizeToMultiple')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/utils/data_process_func.py", "imports": ["mmcv", "mmdet"], "module": "modelscope.models.cv.image_semantic_segmentation.vit_adapter.utils.data_process_func"}, "('BACKBONES', 'default', 'BEiTAdapter')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/beit_adapter.py", "imports": ["timm", "torch", "logging", "math", "mmdet"], "module": "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.backbone.beit_adapter"}, "('BACKBONES', 'default', 'BASEBEiT')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/base/beit.py", "imports": ["timm", "mmcv", "torch", "mmdet", "functools", "math"], "module": "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.backbone.base.beit"}, "('DETECTORS', 'default', 'EncoderDecoderMask2Former')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/encoder_decoder_mask2former.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.segmentors.encoder_decoder_mask2former"}, "('HEADS', 'default', 'Mask2FormerHeadFromMMSeg')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/mask2former_head_from_mmseg.py", "imports": ["torch", "copy", "mmcv", "mmdet"], "module": "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.decode_heads.mask2former_head_from_mmseg"}, "('MODELS', 'image-segmentation', 'swinL-semantic-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/semantic_seg_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.image_semantic_segmentation.semantic_seg_model"}, "('MODELS', 'image-segmentation', 'vitadapter-semantic-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/semantic_seg_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.image_semantic_segmentation.semantic_seg_model"}, "('HEADS', 'default', 'MaskFormerSemanticHead')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/pan_merge/maskformer_semantic_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.image_semantic_segmentation.pan_merge.maskformer_semantic_head"}, "('MODELS', 'text-driven-segmentation', 'text-driven-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/text_driven_segmentation/lseg_model.py", "imports": ["PIL", "numpy", "torch", "json", "os", "typing"], "module": "modelscope.models.cv.text_driven_segmentation.lseg_model"}, "('MODELS', 'crowd-counting', 'HRNetCrowdCounting')": {"filepath": "TEMPLATE_PATH/models/cv/crowd_counting/cc_model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.crowd_counting.cc_model"}, "('MODELS', 'image-segmentation', 'swinL-panoptic-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/image_panoptic_segmentation/panseg_model.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.image_panoptic_segmentation.panseg_model"}, "('MODELS', 'face-emotion', 'face-emotion')": {"filepath": "TEMPLATE_PATH/models/cv/face_emotion/emotion_model.py", "imports": ["torch", "os", "sys"], "module": "modelscope.models.cv.face_emotion.emotion_model"}, "('MODELS', 'video-super-resolution', 'msrresnet-lite')": {"filepath": "TEMPLATE_PATH/models/cv/video_super_resolution/msrresnet_lite_model.py", "imports": ["torch", "os", "functools", "typing"], "module": "modelscope.models.cv.video_super_resolution.msrresnet_lite_model"}, "('MODELS', 'video-super-resolution', 'real-basicvsr')": {"filepath": "TEMPLATE_PATH/models/cv/video_super_resolution/real_basicvsr_for_video_super_resolution.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.video_super_resolution.real_basicvsr_for_video_super_resolution"}, "('MODELS', 'face-attribute-recognition', 'fairface')": {"filepath": "TEMPLATE_PATH/models/cv/face_attribute_recognition/fair_face/face_attribute_recognition.py", "imports": ["PIL", "numpy", "torch", "cv2", "torchvision", "os"], "module": "modelscope.models.cv.face_attribute_recognition.fair_face.face_attribute_recognition"}, "('MODELS', 'image-denoising', 'nafnet')": {"filepath": "TEMPLATE_PATH/models/cv/image_denoise/nafnet_for_image_denoise.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_denoise.nafnet_for_image_denoise"}, "('MODELS', 'image-classification', 'ClassificationModel')": {"filepath": "TEMPLATE_PATH/models/cv/image_classification/mmcls_model.py", "imports": ["os"], "module": "modelscope.models.cv.image_classification.mmcls_model"}, "('BACKBONES', 'default', 'BEiTv2')": {"filepath": "TEMPLATE_PATH/models/cv/image_classification/backbones/beit_v2.py", "imports": ["itertools", "mmcv", "torch", "einops", "functools", "mmcls", "math", "collections", "warnings", "os", "typing"], "module": "modelscope.models.cv.image_classification.backbones.beit_v2"}, "('BACKBONES', 'default', 'NextViT')": {"filepath": "TEMPLATE_PATH/models/cv/image_classification/backbones/nextvit.py", "imports": ["itertools", "mmcv", "torch", "einops", "functools", "mmcls", "math", "collections", "warnings", "os", "typing"], "module": "modelscope.models.cv.image_classification.backbones.nextvit"}, "('MODELS', 'image-classification', 'content-check')": {"filepath": "TEMPLATE_PATH/models/cv/image_classification/resnet50_cc.py", "imports": ["torch", "math", "collections", "torchvision", "os"], "module": "modelscope.models.cv.image_classification.resnet50_cc"}, "('MODELS', 'image-color-enhancement', 'deeplpfnet')": {"filepath": "TEMPLATE_PATH/models/cv/image_color_enhance/deeplpf/deeplpf_image_color_enhance.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_color_enhance.deeplpf.deeplpf_image_color_enhance"}, "('MODELS', 'image-color-enhancement', 'csrnet')": {"filepath": "TEMPLATE_PATH/models/cv/image_color_enhance/image_color_enhance.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_color_enhance.image_color_enhance"}, "('MODELS', 'image-color-enhancement', 'adaint')": {"filepath": "TEMPLATE_PATH/models/cv/image_color_enhance/adaint/adaint.py", "imports": ["numbers", "torch", "torchvision", "os", "typing"], "module": "modelscope.models.cv.image_color_enhance.adaint.adaint"}, "('METRICS', 'default', 'image-quality-assessment-degradation-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_quality_assessment_degradation_metric.py", "imports": ["numpy", "cv2", "torch", "scipy", "tempfile", "sys", "collections", "tqdm", "os", "typing"], "module": "modelscope.metrics.image_quality_assessment_degradation_metric"}, "('METRICS', 'default', 'prediction-saving-wrapper')": {"filepath": "TEMPLATE_PATH/metrics/prediction_saving_wrapper.py", "imports": ["typing", "sklearn", "numpy"], "module": "modelscope.metrics.prediction_saving_wrapper"}, "('METRICS', 'default', 'video-stabilization-metric')": {"filepath": "TEMPLATE_PATH/metrics/video_stabilization_metric.py", "imports": ["numpy", "cv2", "tqdm", "tempfile", "sys", "os", "typing"], "module": "modelscope.metrics.video_stabilization_metric"}, "('METRICS', 'default', 'ppl')": {"filepath": "TEMPLATE_PATH/metrics/ppl_metric.py", "imports": ["torch", "typing", "math", "numpy"], "module": "modelscope.metrics.ppl_metric"}, "('METRICS', 'default', 'inbatch_recall')": {"filepath": "TEMPLATE_PATH/metrics/inbatch_recall_metric.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.metrics.inbatch_recall_metric"}, "('METRICS', 'default', 'loss-metric')": {"filepath": "TEMPLATE_PATH/metrics/loss_metric.py", "imports": ["typing", "sklearn", "numpy"], "module": "modelscope.metrics.loss_metric"}, "('METRICS', 'default', 'ocr-recognition-metric')": {"filepath": "TEMPLATE_PATH/metrics/ocr_recognition_metric.py", "imports": ["torch", "edit_distance", "typing", "numpy"], "module": "modelscope.metrics.ocr_recognition_metric"}, "('METRICS', 'default', 'mAP')": {"filepath": "TEMPLATE_PATH/metrics/map_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.map_metric"}, "('METRICS', 'default', 'image-colorization-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_colorization_metric.py", "imports": ["numpy", "cv2", "torch", "scipy", "torchvision", "typing"], "module": "modelscope.metrics.image_colorization_metric"}, "('METRICS', 'default', 'seq-cls-metric')": {"filepath": "TEMPLATE_PATH/metrics/sequence_classification_metric.py", "imports": ["typing", "sklearn", "numpy"], "module": "modelscope.metrics.sequence_classification_metric"}, "('METRICS', 'default', 'audio-noise-metric')": {"filepath": "TEMPLATE_PATH/metrics/audio_noise_metric.py", "imports": ["typing"], "module": "modelscope.metrics.audio_noise_metric"}, "('METRICS', 'default', 'translation-evaluation-metric')": {"filepath": "TEMPLATE_PATH/metrics/translation_evaluation_metric.py", "imports": ["pandas", "typing", "importlib"], "module": "modelscope.metrics.translation_evaluation_metric"}, "('METRICS', 'default', 'video-frame-interpolation-metric')": {"filepath": "TEMPLATE_PATH/metrics/video_frame_interpolation_metric.py", "imports": ["numpy", "torch", "lpips", "math", "typing"], "module": "modelscope.metrics.video_frame_interpolation_metric"}, "('METRICS', 'default', 'image-inpainting-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_inpainting_metric.py", "imports": ["torch", "scipy", "typing", "numpy"], "module": "modelscope.metrics.image_inpainting_metric"}, "('METRICS', 'default', 'image-denoise-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_denoise_metric.py", "imports": ["cv2", "torch", "typing", "numpy"], "module": "modelscope.metrics.image_denoise_metric"}, "('METRICS', 'default', 'referring-video-object-segmentation-metric')": {"filepath": "TEMPLATE_PATH/metrics/referring_video_object_segmentation_metric.py", "imports": ["numpy", "pycocotools", "torch", "tqdm", "typing"], "module": "modelscope.metrics.referring_video_object_segmentation_metric"}, "('METRICS', 'default', 'token-cls-metric')": {"filepath": "TEMPLATE_PATH/metrics/token_classification_metric.py", "imports": ["typing", "numpy", "importlib"], "module": "modelscope.metrics.token_classification_metric"}, "('METRICS', 'default', 'video-summarization-metric')": {"filepath": "TEMPLATE_PATH/metrics/video_summarization_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.video_summarization_metric"}, "('METRICS', 'default', 'image-quality-assessment-mos-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_quality_assessment_mos_metric.py", "imports": ["numpy", "cv2", "torch", "scipy", "tempfile", "sys", "tqdm", "os", "typing"], "module": "modelscope.metrics.image_quality_assessment_mos_metric"}, "('METRICS', 'default', 'ned')": {"filepath": "TEMPLATE_PATH/metrics/ned_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.ned_metric"}, "('METRICS', 'default', 'text-ranking-metric')": {"filepath": "TEMPLATE_PATH/metrics/text_ranking_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.text_ranking_metric"}, "('METRICS', 'default', 'movie-scene-segmentation-metric')": {"filepath": "TEMPLATE_PATH/metrics/movie_scene_segmentation_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.movie_scene_segmentation_metric"}, "('METRICS', 'default', 'accuracy')": {"filepath": "TEMPLATE_PATH/metrics/accuracy_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.accuracy_metric"}, "('METRICS', 'default', 'image-ins-seg-coco-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_instance_segmentation_metric.py", "imports": ["numpy", "pycocotools", "tempfile", "collections", "os", "typing"], "module": "modelscope.metrics.image_instance_segmentation_metric"}, "('METRICS', 'default', 'video-super-resolution-metric')": {"filepath": "TEMPLATE_PATH/metrics/video_super_resolution_metric/video_super_resolution_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.video_super_resolution_metric.video_super_resolution_metric"}, "('METRICS', 'default', 'image-color-enhance-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_color_enhance_metric.py", "imports": ["cv2", "typing", "numpy"], "module": "modelscope.metrics.image_color_enhance_metric"}, "('METRICS', 'default', 'image-portrait-enhancement-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_portrait_enhancement_metric.py", "imports": ["cv2", "typing", "numpy"], "module": "modelscope.metrics.image_portrait_enhancement_metric"}, "('METRICS', 'default', 'bleu')": {"filepath": "TEMPLATE_PATH/metrics/bleu_metric.py", "imports": ["typing", "itertools", "sacrebleu"], "module": "modelscope.metrics.bleu_metric"}, "('METRICS', 'default', 'text-gen-metric')": {"filepath": "TEMPLATE_PATH/metrics/text_generation_metric.py", "imports": ["nltk", "rouge", "typing"], "module": "modelscope.metrics.text_generation_metric"}, "('PIPELINES', 'protein-structure', 'unifold-protein-structure')": {"filepath": "TEMPLATE_PATH/pipelines/science/protein_structure_pipeline.py", "imports": ["numpy", "json", "torch", "unicore", "time", "os", "typing"], "module": "modelscope.pipelines.science.protein_structure_pipeline"}, "('PIPELINES', 'task-template', 'pipeline-template')": {"filepath": "TEMPLATE_PATH/pipelines/pipeline_template.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.pipeline_template"}, "('PIPELINES', 'speech-timestamp', 'speech-timestamp-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/timestamp_pipeline.py", "imports": ["json", "typing", "yaml", "os", "funasr"], "module": "modelscope.pipelines.audio.timestamp_pipeline"}, "('PIPELINES', 'keyword-spotting', 'speech_dfsmn_kws_char_farfield')": {"filepath": "TEMPLATE_PATH/pipelines/audio/kws_farfield_pipeline.py", "imports": ["numpy", "wave", "soundfile", "io", "typing"], "module": "modelscope.pipelines.audio.kws_farfield_pipeline"}, "('PIPELINES', 'speaker-verification', 'sv-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/speaker_verification_pipeline.py", "imports": ["os", "typing", "shutil", "yaml"], "module": "modelscope.pipelines.audio.speaker_verification_pipeline"}, "('PIPELINES', 'inverse-text-processing', 'itn-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/inverse_text_processing_pipeline.py", "imports": ["os", "typing", "shutil", "yaml"], "module": "modelscope.pipelines.audio.inverse_text_processing_pipeline"}, "('PIPELINES', 'speech-separation', 'speech-separation')": {"filepath": "TEMPLATE_PATH/pipelines/audio/separation_pipeline.py", "imports": ["numpy", "torch", "soundfile", "io", "typing"], "module": "modelscope.pipelines.audio.separation_pipeline"}, "('PIPELINES', 'voice-activity-detection', 'vad-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/voice_activity_detection_pipeline.py", "imports": ["json", "typing", "yaml", "os", "funasr"], "module": "modelscope.pipelines.audio.voice_activity_detection_pipeline"}, "('PIPELINES', 'text-to-speech', 'sambert-hifigan-tts')": {"filepath": "TEMPLATE_PATH/pipelines/audio/text_to_speech_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.audio.text_to_speech_pipeline"}, "('PIPELINES', 'keyword-spotting', 'kws-kwsbp')": {"filepath": "TEMPLATE_PATH/pipelines/audio/kws_kwsbp_pipeline.py", "imports": ["json", "os", "typing"], "module": "modelscope.pipelines.audio.kws_kwsbp_pipeline"}, "('PIPELINES', 'acoustic-echo-cancellation', 'speech-dfsmn-aec-psm-16k')": {"filepath": "TEMPLATE_PATH/pipelines/audio/linear_aec_pipeline.py", "imports": ["numpy", "torch", "scipy", "yaml", "importlib", "os", "typing"], "module": "modelscope.pipelines.audio.linear_aec_pipeline"}, "('PIPELINES', 'acoustic-noise-suppression', 'speech_frcrn_ans_cirm_16k')": {"filepath": "TEMPLATE_PATH/pipelines/audio/ans_pipeline.py", "imports": ["numpy", "torch", "librosa", "soundfile", "io", "typing"], "module": "modelscope.pipelines.audio.ans_pipeline"}, "('PIPELINES', 'speaker-verification', 'speaker-verification-eres2net')": {"filepath": "TEMPLATE_PATH/pipelines/audio/speaker_verification_eres2net_pipeline.py", "imports": ["torch", "io", "typing", "soundfile"], "module": "modelscope.pipelines.audio.speaker_verification_eres2net_pipeline"}, "('PIPELINES', 'language-score-prediction', 'language-score-prediction')": {"filepath": "TEMPLATE_PATH/pipelines/audio/lm_infer_pipeline.py", "imports": ["os", "typing"], "module": "modelscope.pipelines.audio.lm_infer_pipeline"}, "('PIPELINES', 'acoustic-noise-suppression', 'speech_dfsmn_ans_psm_48k_causal')": {"filepath": "TEMPLATE_PATH/pipelines/audio/ans_dfsmn_pipeline.py", "imports": ["numpy", "torch", "sys", "collections", "librosa", "soundfile", "io", "os", "typing"], "module": "modelscope.pipelines.audio.ans_dfsmn_pipeline"}, "('PIPELINES', 'auto-speech-recognition', 'asr-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/asr_inference_pipeline.py", "imports": ["json", "os", "typing", "yaml"], "module": "modelscope.pipelines.audio.asr_inference_pipeline"}, "('PIPELINES', 'speaker-diarization', 'speaker-diarization-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/speaker_diarization_pipeline.py", "imports": ["shutil", "numpy", "json", "yaml", "os", "typing"], "module": "modelscope.pipelines.audio.speaker_diarization_pipeline"}, "('PIPELINES', 'speaker-verification', 'speaker-verification-rdino')": {"filepath": "TEMPLATE_PATH/pipelines/audio/speaker_verification_rdino_pipeline.py", "imports": ["torch", "io", "typing", "soundfile"], "module": "modelscope.pipelines.audio.speaker_verification_rdino_pipeline"}, "('PIPELINES', 'punctuation', 'punc-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/punctuation_processing_pipeline.py", "imports": ["os", "typing", "shutil", "yaml"], "module": "modelscope.pipelines.audio.punctuation_processing_pipeline"}, "('PIPELINES', 'speaker-verification', 'speaker-verification')": {"filepath": "TEMPLATE_PATH/pipelines/audio/speaker_verification_light_pipeline.py", "imports": ["torch", "io", "typing", "soundfile"], "module": "modelscope.pipelines.audio.speaker_verification_light_pipeline"}, "('PIPELINES', 'speaker-diarization', 'speaker-change-locating')": {"filepath": "TEMPLATE_PATH/pipelines/audio/speaker_change_locating_pipeline.py", "imports": ["numpy", "torch", "soundfile", "io", "typing"], "module": "modelscope.pipelines.audio.speaker_change_locating_pipeline"}, "('PIPELINES', 'auto-speech-recognition', 'asr-wenet-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/asr_wenet_inference_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.audio.asr_wenet_inference_pipeline"}, "('PIPELINES', 'auto-speech-recognition', 'ofa-asr')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/asr_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.asr_pipeline"}, "('PIPELINES', 'image-captioning', 'image-captioning')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/image_captioning_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.multi_modal.image_captioning_pipeline"}, "('PIPELINES', 'text-to-video-synthesis', 'latent-text-to-video-synthesis')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/text_to_video_synthesis_pipeline.py", "imports": ["cv2", "torch", "einops", "tempfile", "os", "typing"], "module": "modelscope.pipelines.multi_modal.text_to_video_synthesis_pipeline"}, "('PIPELINES', 'text-ranking', 'mgeo-ranking')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/mgeo_ranking_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.multi_modal.mgeo_ranking_pipeline"}, "('PIPELINES', 'generative-multi-modal-embedding', 'generative-multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/generative_multi_modal_embedding_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.multi_modal.generative_multi_modal_embedding_pipeline"}, "('PIPELINES', 'multimodal-dialogue', 'multimodal-dialogue')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/multimodal_dialogue_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.multimodal_dialogue_pipeline"}, "('PIPELINES', 'text-to-image-synthesis', 'text-to-image-synthesis')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/text_to_image_synthesis_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.text_to_image_synthesis_pipeline"}, "('PIPELINES', 'text2sql', 'ofa-text2sql')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/text2sql_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.text2sql_pipeline"}, "('PIPELINES', 'visual-entailment', 'visual-entailment')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/visual_entailment_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.visual_entailment_pipeline"}, "('PIPELINES', 'text-to-image-synthesis', 'disco_guided_diffusion')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/disco_guided_diffusion_pipeline/disco_guided_diffusion.py", "imports": ["PIL", "gc", "numpy", "cv2", "json", "torch", "math", "clip", "importlib", "torchvision", "os"], "module": "modelscope.pipelines.multi_modal.disco_guided_diffusion_pipeline.disco_guided_diffusion"}, "('PIPELINES', 'visual-question-answering', 'visual-question-answering')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/visual_question_answering_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.visual_question_answering_pipeline"}, "('PIPELINES', 'video-question-answering', 'video-question-answering')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/video_question_answering_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.video_question_answering_pipeline"}, "('PIPELINES', 'video-captioning', 'video-captioning')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/video_captioning_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.video_captioning_pipeline"}, "('PIPELINES', 'video-multi-modal-embedding', 'video-multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/video_multi_modal_embedding_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.multi_modal.video_multi_modal_embedding_pipeline"}, "('PIPELINES', 'efficient-diffusion-tuning', 'efficient-diffusion-tuning')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/efficient_diffusion_tuning_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "torchvision", "typing"], "module": "modelscope.pipelines.multi_modal.efficient_diffusion_tuning_pipeline"}, "('PIPELINES', 'multi-modal-similarity', 'multi-modal-similarity')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/team_multi_modal_similarity_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.multi_modal.team_multi_modal_similarity_pipeline"}, "('PIPELINES', 'text-to-image-synthesis', 'diffusers-stable-diffusion')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "diffusers", "typing"], "module": "modelscope.pipelines.multi_modal.diffusers_wrapped.stable_diffusion.stable_diffusion_pipeline"}, "('PIPELINES', 'text-to-image-synthesis', 'chinese-stable-diffusion')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py", "imports": ["PIL", "transformers", "numpy", "cv2", "torch", "diffusers", "typing"], "module": "modelscope.pipelines.multi_modal.diffusers_wrapped.stable_diffusion.chinese_stable_diffusion_pipeline"}, "('PIPELINES', 'image-text-retrieval', 'multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/multi_modal_embedding_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.multi_modal.multi_modal_embedding_pipeline"}, "('PIPELINES', 'multi-modal-embedding', 'multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/multi_modal_embedding_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.multi_modal.multi_modal_embedding_pipeline"}, "('PIPELINES', 'ocr-recognition', 'ofa-ocr-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/ocr_recognition_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.ocr_recognition_pipeline"}, "('PIPELINES', 'document-vl-embedding', 'document-vl-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/document_vl_embedding_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.document_vl_embedding_pipeline"}, "('PIPELINES', 'image-text-retrieval', 'image-text-retrieval')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/image_text_retrieval_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.image_text_retrieval_pipeline"}, "('PIPELINES', 'visual-question-answering', 'gridvlp-multi-modal-classification')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/gridvlp_pipeline.py", "imports": ["PIL", "transformers", "numpy", "json", "torch", "time", "os", "traceback", "typing"], "module": "modelscope.pipelines.multi_modal.gridvlp_pipeline"}, "('PIPELINES', 'multi-modal-embedding', 'gridvlp-multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/gridvlp_pipeline.py", "imports": ["PIL", "transformers", "numpy", "json", "torch", "time", "os", "traceback", "typing"], "module": "modelscope.pipelines.multi_modal.gridvlp_pipeline"}, "('PIPELINES', 'visual-grounding', 'visual-grounding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/visual_grounding_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.visual_grounding_pipeline"}, "('PIPELINES', 'video-temporal-grounding', 'soonet-video-temporal-grounding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/soonet_video_temporal_grounding_pipeline.py", "imports": ["numpy", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.multi_modal.soonet_video_temporal_grounding_pipeline"}, "('PIPELINES', 'sudoku', 'ofa-sudoku')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/sudoku_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.sudoku_pipeline"}, "('PIPELINES', 'translation-evaluation', 'translation-evaluation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/translation_evaluation_pipeline.py", "imports": ["numpy", "enum", "torch", "os", "typing"], "module": "modelscope.pipelines.nlp.translation_evaluation_pipeline"}, "('PIPELINES', 'text-generation', 'glm130b-text-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/glm130b_text_generation_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.glm130b_text_generation_pipeline"}, "('PIPELINES', 'faq-question-answering', 'faq-question-answering')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/faq_question_answering_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.faq_question_answering_pipeline"}, "('PIPELINES', 'document-grounded-dialog-generate', 'document-grounded-dialog-generate')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/document_grounded_dialog_generate_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.document_grounded_dialog_generate_pipeline"}, "('PIPELINES', 'translation', 'automatic-post-editing')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/automatic_post_editing_pipeline.py", "imports": ["tensorflow", "sacremoses", "numpy", "jieba", "sentencepiece", "os", "typing", "html"], "module": "modelscope.pipelines.nlp.automatic_post_editing_pipeline"}, "('PIPELINES', 'named-entity-recognition', 'named-entity-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/named_entity_recognition_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.named_entity_recognition_pipeline"}, "('PIPELINES', 'named-entity-recognition', 'named-entity-recognition-thai')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/named_entity_recognition_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.named_entity_recognition_pipeline"}, "('PIPELINES', 'named-entity-recognition', 'named-entity-recognition-viet')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/named_entity_recognition_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.named_entity_recognition_pipeline"}, "('PIPELINES', 'translation', 'interactive-translation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/interactive_translation_pipeline.py", "imports": ["tensorflow", "sacremoses", "numpy", "jieba", "subword_nmt", "os", "typing"], "module": "modelscope.pipelines.nlp.interactive_translation_pipeline"}, "('PIPELINES', 'text-summarization', 'text-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/summarization_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.summarization_pipeline"}, "('PIPELINES', 'document-grounded-dialog-retrieval', 'document-grounded-dialog-retrieval')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/document_grounded_dialog_retrieval_pipeline.py", "imports": ["numpy", "json", "faiss", "os", "typing"], "module": "modelscope.pipelines.nlp.document_grounded_dialog_retrieval_pipeline"}, "('PIPELINES', 'text-classification', 'domain-classification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/fasttext_text_classification_pipeline.py", "imports": ["numpy", "fasttext", "sentencepiece", "os", "typing"], "module": "modelscope.pipelines.nlp.fasttext_text_classification_pipeline"}, "('PIPELINES', 'word-alignment', 'word-alignment')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/word_alignment_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.nlp.word_alignment_pipeline"}, "('PIPELINES', 'feature-extraction', 'feature-extraction')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/feature_extraction_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.nlp.feature_extraction_pipeline"}, "('PIPELINES', 'text-ranking', 'text-ranking')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_ranking_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.nlp.text_ranking_pipeline"}, "('PIPELINES', 'fid-dialogue', 'fid-dialogue')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/fid_dialogue_pipeline.py", "imports": ["torch", "re", "typing"], "module": "modelscope.pipelines.nlp.fid_dialogue_pipeline"}, "('PIPELINES', 'text-classification', 'sentiment-analysis')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'nli', 'nli')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'sentence-similarity', 'sentence-similarity')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'text-classification', 'text-classification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'text-classification', 'sentiment-classification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'text-classification', 'sentence-similarity')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'sentiment-classification', 'sentiment-classification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'code-generation', 'codegeex-code-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/codegeex_code_generation_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.codegeex_code_generation_pipeline"}, "('PIPELINES', 'sentence-similarity', 'translation-quality-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/translation_quality_estimation_pipeline.py", "imports": ["transformers", "torch", "io", "os", "typing"], "module": "modelscope.pipelines.nlp.translation_quality_estimation_pipeline"}, "('PIPELINES', 'fill-mask', 'fill-mask')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/fill_mask_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.nlp.fill_mask_pipeline"}, "('PIPELINES', 'fill-mask', 'fill-mask-ponet')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/fill_mask_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.nlp.fill_mask_pipeline"}, "('PIPELINES', 'text-generation', 'plug-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/distributed_plug_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.distributed_plug_pipeline"}, "('PIPELINES', 'table-question-answering', 'conversational-text-to-sql')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/conversational_text_to_sql_pipeline.py", "imports": ["torch", "typing", "text2sql_lgesql"], "module": "modelscope.pipelines.nlp.conversational_text_to_sql_pipeline"}, "('PIPELINES', 'text-generation', 'gpt3-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/distributed_gpt3_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.distributed_gpt3_pipeline"}, "('PIPELINES', 'information-extraction', 'relation-extraction')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/information_extraction_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.information_extraction_pipeline"}, "('PIPELINES', 'relation-extraction', 'relation-extraction')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/information_extraction_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.information_extraction_pipeline"}, "('PIPELINES', 'table-question-answering', 'table-question-answering-pipeline')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/table_question_answering_pipeline.py", "imports": ["transformers", "json", "torch", "os", "typing"], "module": "modelscope.pipelines.nlp.table_question_answering_pipeline"}, "('PIPELINES', 'text-classification', 'user-satisfaction-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/user_satisfaction_estimation_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.user_satisfaction_estimation_pipeline"}, "('PIPELINES', 'task-oriented-conversation', 'dialog-modeling')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/dialog_modeling_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.dialog_modeling_pipeline"}, "('PIPELINES', 'competency-aware-translation', 'canmt-translation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/canmt_translation_pipeline.py", "imports": ["torch", "os", "sacremoses", "typing"], "module": "modelscope.pipelines.nlp.canmt_translation_pipeline"}, "('PIPELINES', 'word-segmentation', 'word-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/word_segmentation_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.word_segmentation_pipeline"}, "('PIPELINES', 'word-segmentation', 'multilingual-word-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/word_segmentation_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.word_segmentation_pipeline"}, "('PIPELINES', 'word-segmentation', 'word-segmentation-thai')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/word_segmentation_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.word_segmentation_pipeline"}, "('PIPELINES', 'document-segmentation', 'document-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/document_segmentation_pipeline.py", "imports": ["datasets", "numpy", "torch", "re", "typing"], "module": "modelscope.pipelines.nlp.document_segmentation_pipeline"}, "('PIPELINES', 'text-generation', 'gpt-moe-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/distributed_gpt_moe_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.distributed_gpt_moe_pipeline"}, "('PIPELINES', 'extractive-summarization', 'extractive-summarization')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/extractive_summarization_pipeline.py", "imports": ["datasets", "numpy", "torch", "re", "typing"], "module": "modelscope.pipelines.nlp.extractive_summarization_pipeline"}, "('PIPELINES', 'text-error-correction', 'text-error-correction')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_error_correction_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.text_error_correction_pipeline"}, "('PIPELINES', 'task-oriented-conversation', 'dialog-state-tracking')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/dialog_state_tracking_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.dialog_state_tracking_pipeline"}, "('PIPELINES', 'text-summarization', 'mglm-text-summarization')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/mglm_text_summarization_pipeline.py", "imports": ["os", "typing"], "module": "modelscope.pipelines.nlp.mglm_text_summarization_pipeline"}, "('PIPELINES', 'translation', 'csanmt-translation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/translation_pipeline.py", "imports": ["tensorflow", "sacremoses", "numpy", "jieba", "subword_nmt", "os", "typing"], "module": "modelscope.pipelines.nlp.translation_pipeline"}, "('PIPELINES', 'siamese-uie', 'siamese-uie')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/siamese_uie_pipeline.py", "imports": ["json", "torch", "logging", "scipy", "tqdm", "math", "copy", "time", "pathlib", "os", "typing"], "module": "modelscope.pipelines.nlp.siamese_uie_pipeline"}, "('PIPELINES', 'task-oriented-conversation', 'dialog-intent-prediction')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/dialog_intent_prediction_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.dialog_intent_prediction_pipeline"}, "('PIPELINES', 'sentence-embedding', 'sentence-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/sentence_embedding_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.sentence_embedding_pipeline"}, "('PIPELINES', 'document-grounded-dialog-rerank', 'document-grounded-dialog-rerank')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/document_grounded_dialog_rerank_pipeline.py", "imports": ["ujson", "transformers", "random", "numpy", "torch", "re", "sys", "collections", "time", "os", "typing", "pprint"], "module": "modelscope.pipelines.nlp.document_grounded_dialog_rerank_pipeline"}, "('PIPELINES', 'zero-shot-classification', 'zero-shot-classification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/zero_shot_classification_pipeline.py", "imports": ["torch", "scipy", "typing"], "module": "modelscope.pipelines.nlp.zero_shot_classification_pipeline"}, "('PIPELINES', 'text-generation', 'text-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_generation_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.nlp.text_generation_pipeline"}, "('PIPELINES', 'text2text-generation', 'translation_en_to_de')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_generation_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.nlp.text_generation_pipeline"}, "('PIPELINES', 'text2text-generation', 'translation_en_to_ro')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_generation_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.nlp.text_generation_pipeline"}, "('PIPELINES', 'text2text-generation', 'translation_en_to_fr')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_generation_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.nlp.text_generation_pipeline"}, "('PIPELINES', 'text2text-generation', 'text2text-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_generation_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.nlp.text_generation_pipeline"}, "('PIPELINES', 'text-classification', 'language_identification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/language_identification_pipline.py", "imports": ["tensorflow", "numpy", "re", "os", "typing"], "module": "modelscope.pipelines.nlp.language_identification_pipline"}, "('PIPELINES', 'token-classification', 'token-classification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/token_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.token_classification_pipeline"}, "('PIPELINES', 'token-classification', 'part-of-speech')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/token_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.token_classification_pipeline"}, "('PIPELINES', 'token-classification', 'word-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/token_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.token_classification_pipeline"}, "('PIPELINES', 'token-classification', 'named-entity-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/token_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.token_classification_pipeline"}, "('PIPELINES', 'part-of-speech', 'part-of-speech')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/token_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.token_classification_pipeline"}, "('PIPELINES', 'code-translation', 'codegeex-code-translation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/codegeex_code_translation_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.codegeex_code_translation_pipeline"}, "('PIPELINES', 'bad-image-detecting', 'bad-image-detecting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/bad_image_detecting_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.bad_image_detecting_pipeline"}, "('PIPELINES', 'image-portrait-stylization', 'unet-person-image-cartoon')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_cartoon_pipeline.py", "imports": ["tensorflow", "numpy", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.image_cartoon_pipeline"}, "('PIPELINES', 'image-to-image-generation', 'image-to-image-generation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_to_image_generate_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.image_to_image_generate_pipeline"}, "('PIPELINES', 'facial-expression-recognition', 'vgg19-facial-expression-recognition-fer')": {"filepath": "TEMPLATE_PATH/pipelines/cv/facial_expression_recognition_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.facial_expression_recognition_pipeline"}, "('PIPELINES', 'face-detection', 'resnet50-face-detection-retinaface')": {"filepath": "TEMPLATE_PATH/pipelines/cv/retina_face_detection_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.retina_face_detection_pipeline"}, "('PIPELINES', 'image-style-transfer', 'AAMS-style-transfer')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_style_transfer_pipeline.py", "imports": ["cv2", "os", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_style_transfer_pipeline"}, "('PIPELINES', 'image-face-fusion', 'image-face-fusion')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_face_fusion_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.image_face_fusion_pipeline"}, "('PIPELINES', 'face-detection', 'manual-face-detection-ulfd')": {"filepath": "TEMPLATE_PATH/pipelines/cv/ulfd_face_detection_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.ulfd_face_detection_pipeline"}, "('PIPELINES', 'pedestrian-attribute-recognition', 'resnet50_pedestrian-attribute-recognition_image')": {"filepath": "TEMPLATE_PATH/pipelines/cv/pedestrian_attribute_recognition_pipeline.py", "imports": ["PIL", "numpy", "cv2", "json", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.pedestrian_attribute_recognition_pipeline"}, "('PIPELINES', 'image-denoising', 'nafnet-image-denoise')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_denoise_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_denoise_pipeline"}, "('PIPELINES', 'video-text-retrieval', 'vop-video-text-retrieval-se')": {"filepath": "TEMPLATE_PATH/pipelines/cv/vop_retrieval_se_pipeline.py", "imports": ["numpy", "torch", "gzip", "os", "typing"], "module": "modelscope.pipelines.cv.vop_retrieval_se_pipeline"}, "('PIPELINES', 'portrait-matting', 'unet-image-matting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_matting_pipeline.py", "imports": ["tensorflow", "numpy", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.image_matting_pipeline"}, "('PIPELINES', 'universal-matting', 'unet-universal-matting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_matting_pipeline.py", "imports": ["tensorflow", "numpy", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.image_matting_pipeline"}, "('PIPELINES', 'image-deblurring', 'nafnet-image-deblur')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_deblur_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_deblur_pipeline"}, "('PIPELINES', 'video-human-matting', 'video-human-matting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_human_matting_pipeline.py", "imports": ["numpy", "cv2", "torch", "moviepy", "os", "typing"], "module": "modelscope.pipelines.cv.video_human_matting_pipeline"}, "('PIPELINES', 'live-category', 'live-category')": {"filepath": "TEMPLATE_PATH/pipelines/cv/live_category_pipeline.py", "imports": ["PIL", "numpy", "torch", "decord", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.live_category_pipeline"}, "('PIPELINES', 'image-classification', 'image-structured-model-probing')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_structured_model_probing_pipeline.py", "imports": ["mmcv", "numpy", "torch", "math", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.image_structured_model_probing_pipeline"}, "('PIPELINES', 'face-quality-assessment', 'manual-face-quality-assessment-fqa')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_quality_assessment_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "module": "modelscope.pipelines.cv.face_quality_assessment_pipeline"}, "('PIPELINES', 'image-portrait-enhancement', 'gpen-image-portrait-enhancement')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_portrait_enhancement_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "scipy", "math", "typing"], "module": "modelscope.pipelines.cv.image_portrait_enhancement_pipeline"}, "('PIPELINES', 'image-color-enhancement', 'adaint-image-color-enhance')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_color_enhance_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_color_enhance_pipeline"}, "('PIPELINES', 'image-color-enhancement', 'deeplpf-image-color-enhance')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_color_enhance_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_color_enhance_pipeline"}, "('PIPELINES', 'image-color-enhancement', 'csrnet-image-color-enhance')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_color_enhance_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_color_enhance_pipeline"}, "('PIPELINES', 'vision-efficient-tuning', 'vision-efficient-tuning')": {"filepath": "TEMPLATE_PATH/pipelines/cv/vision_efficient_tuning_pipeline.py", "imports": ["torch", "torchvision", "typing", "numpy"], "module": "modelscope.pipelines.cv.vision_efficient_tuning_pipeline"}, "('PIPELINES', 'video-object-segmentation', 'video-object-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_object_segmentation_pipeline.py", "imports": ["PIL", "numpy", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.video_object_segmentation_pipeline"}, "('PIPELINES', 'face-detection', 'resnet-face-detection-scrfd10gkps')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_detection_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.face_detection_pipeline"}, "('PIPELINES', 'body-3d-keypoints', 'canonical_body-3d-keypoints_video')": {"filepath": "TEMPLATE_PATH/pipelines/cv/body_3d_keypoints_pipeline.py", "imports": ["numpy", "cv2", "torch", "tempfile", "matplotlib", "datetime", "mpl_toolkits", "os", "typing"], "module": "modelscope.pipelines.cv.body_3d_keypoints_pipeline"}, "('PIPELINES', 'image-paintbyexample', 'stablediffusion-paintbyexample')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_paintbyexample_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "einops", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_paintbyexample_pipeline"}, "('PIPELINES', 'face-recognition', 'ir-face-recognition-rts')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_recognition_ood_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.face_recognition_ood_pipeline"}, "('PIPELINES', 'image-classification', 'image-classification')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'vit-base_image-classification_ImageNet-labels')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'vit-base_image-classification_Dailylife-labels')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'nextvit-small_image-classification_Dailylife-labels')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'convnext-base_image-classification_garbage')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'common-image-classification')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'easyrobust-classification')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'bnext-small_image-classification_ImageNet-labels')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'card-detection', 'resnet-card-detection-scrfd34gkps')": {"filepath": "TEMPLATE_PATH/pipelines/cv/card_detection_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.card_detection_pipeline"}, "('PIPELINES', 'table-recognition', 'dla34-table-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/table_recognition_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "module": "modelscope.pipelines.cv.table_recognition_pipeline"}, "('PIPELINES', 'image-to-image-translation', 'image-to-image-translation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_to_image_translation_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "sys", "io", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.image_to_image_translation_pipeline"}, "('PIPELINES', 'face-attribute-recognition', 'resnet34-face-attribute-recognition-fairface')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_attribute_recognition_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.face_attribute_recognition_pipeline"}, "('PIPELINES', 'image-debanding', 'rrdb-image-debanding')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_debanding_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_debanding_pipeline"}, "('PIPELINES', 'video-instance-segmentation', 'video-instance-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_instance_segmentation_pipeline.py", "imports": ["mmcv", "numpy", "cv2", "torch", "tqdm", "os", "typing"], "module": "modelscope.pipelines.cv.video_instance_segmentation_pipeline"}, "('PIPELINES', 'image-classification', 'tinynas-classification')": {"filepath": "TEMPLATE_PATH/pipelines/cv/tinynas_classification_pipeline.py", "imports": ["torch", "math", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.tinynas_classification_pipeline"}, "('PIPELINES', 'human-reconstruction', 'human-reconstruction')": {"filepath": "TEMPLATE_PATH/pipelines/cv/human_reconstruction_pipeline.py", "imports": ["trimesh", "shutil", "numpy", "torch", "os", "typing"], "module": "modelscope.pipelines.cv.human_reconstruction_pipeline"}, "('PIPELINES', 'video-multi-object-tracking', 'video-multi-object-tracking')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_multi_object_tracking_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.cv.video_multi_object_tracking_pipeline"}, "('PIPELINES', 'controllable-image-generation', 'controllable-image-generation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/controllable_image_generation_pipeline.py", "imports": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "glob", "os", "typing"], "module": "modelscope.pipelines.cv.controllable_image_generation_pipeline"}, "('PIPELINES', 'image-fewshot-detection', 'image-fewshot-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_defrcn_fewshot_pipeline.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_defrcn_fewshot_pipeline"}, "('PIPELINES', 'semantic-segmentation', 'ddpm-image-semantic-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/ddpm_semantic_segmentation_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.ddpm_semantic_segmentation_pipeline"}, "('PIPELINES', 'image-classification', 'resnet50-image-classification-cc')": {"filepath": "TEMPLATE_PATH/pipelines/cv/content_check_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.content_check_pipeline"}, "('PIPELINES', 'video-text-retrieval', 'vop-video-text-retrieval')": {"filepath": "TEMPLATE_PATH/pipelines/cv/vop_retrieval_pipeline.py", "imports": ["random", "numpy", "torch", "tqdm", "math", "collections", "gzip", "os", "typing", "pickle"], "module": "modelscope.pipelines.cv.vop_retrieval_pipeline"}, "('PIPELINES', 'object-detection-3d', 'object-detection-3d-depe')": {"filepath": "TEMPLATE_PATH/pipelines/cv/object_detection_3d_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "tempfile", "os", "typing"], "module": "modelscope.pipelines.cv.object_detection_3d_pipeline"}, "('PIPELINES', 'lineless-table-recognition', 'lore-lineless-table-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/lineless_table_recognition_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "module": "modelscope.pipelines.cv.lineless_table_recognition_pipeline"}, "('PIPELINES', 'video-embedding', 'cmdssl-r2p1d_video_embedding')": {"filepath": "TEMPLATE_PATH/pipelines/cv/cmdssl_video_embedding_pipeline.py", "imports": ["PIL", "numpy", "torch", "decord", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.cmdssl_video_embedding_pipeline"}, "('PIPELINES', 'domain-specific-object-detection', 'tinynas-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/tinynas_detection_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.tinynas_detection_pipeline"}, "('PIPELINES', 'image-object-detection', 'tinynas-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/tinynas_detection_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.tinynas_detection_pipeline"}, "('PIPELINES', 'video-deinterlace', 'video-deinterlace')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_deinterlace_pipeline.py", "imports": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.video_deinterlace_pipeline"}, "('PIPELINES', 'open-vocabulary-detection', 'open-vocabulary-detection-vild')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_open_vocabulary_detection_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.image_open_vocabulary_detection_pipeline"}, "('PIPELINES', 'language-guided-video-summarization', 'clip-it-video-summarization')": {"filepath": "TEMPLATE_PATH/pipelines/cv/language_guided_video_summarization_pipeline.py", "imports": ["PIL", "shutil", "random", "numpy", "cv2", "torch", "tempfile", "clip", "os", "typing"], "module": "modelscope.pipelines.cv.language_guided_video_summarization_pipeline"}, "('PIPELINES', 'body-2d-keypoints', 'hrnetv2w32_body-2d-keypoints_image')": {"filepath": "TEMPLATE_PATH/pipelines/cv/body_2d_keypoints_pipeline.py", "imports": ["PIL", "numpy", "cv2", "json", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.body_2d_keypoints_pipeline"}, "('PIPELINES', 'face-human-hand-detection', 'face-human-hand-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_human_hand_detection_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.face_human_hand_detection_pipeline"}, "('PIPELINES', 'video-embedding', 'hicossl-s3dg-video_embedding')": {"filepath": "TEMPLATE_PATH/pipelines/cv/hicossl_video_embedding_pipeline.py", "imports": ["torch", "os", "typing", "math"], "module": "modelscope.pipelines.cv.hicossl_video_embedding_pipeline"}, "('PIPELINES', 'face-recognition', 'ir101-face-recognition-cfglint')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_recognition_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.face_recognition_pipeline"}, "('PIPELINES', 'image-body-reshaping', 'flow-based-body-reshaping')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_body_reshaping_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.image_body_reshaping_pipeline"}, "('PIPELINES', 'image-inpainting', 'fft-inpainting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_inpainting_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.image_inpainting_pipeline"}, "('PIPELINES', 'face-recognition', 'manual-face-recognition-frfm')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_recognition_onnx_fm_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "module": "modelscope.pipelines.cv.face_recognition_onnx_fm_pipeline"}, "('PIPELINES', 'image-driving-perception', 'yolopv2_image-driving-percetion_bdd100k')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_driving_perception_pipeline.py", "imports": ["cv2", "os", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_driving_perception_pipeline"}, "('PIPELINES', 'video-stabilization', 'video-stabilization')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_stabilization_pipeline.py", "imports": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "glob", "os", "typing"], "module": "modelscope.pipelines.cv.video_stabilization_pipeline"}, "('PIPELINES', 'indoor-layout-estimation', 'indoor-layout-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/indoor_layout_estimation_pipeline.py", "imports": ["cv2", "typing", "numpy"], "module": "modelscope.pipelines.cv.indoor_layout_estimation_pipeline"}, "('PIPELINES', 'image-colorization', 'ddcolor-image-colorization')": {"filepath": "TEMPLATE_PATH/pipelines/cv/ddcolor_image_colorization_pipeline.py", "imports": ["numpy", "cv2", "torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.ddcolor_image_colorization_pipeline"}, "('PIPELINES', 'face-emotion', 'face-emotion')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_emotion_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.face_emotion_pipeline"}, "('PIPELINES', 'face-detection', 'manual-face-detection-mtcnn')": {"filepath": "TEMPLATE_PATH/pipelines/cv/mtcnn_face_detection_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.cv.mtcnn_face_detection_pipeline"}, "('PIPELINES', 'nerf-recon-acc', 'nerf-recon-acc')": {"filepath": "TEMPLATE_PATH/pipelines/cv/nerf_recon_acc_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.nerf_recon_acc_pipeline"}, "('PIPELINES', 'image-depth-estimation', 'image-bts-depth-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_bts_depth_estimation_pipeline.py", "imports": ["albumentations", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.image_bts_depth_estimation_pipeline"}, "('PIPELINES', 'face-2d-keypoints', 'manual-facial-landmark-confidence-flcm')": {"filepath": "TEMPLATE_PATH/pipelines/cv/facial_landmark_confidence_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.facial_landmark_confidence_pipeline"}, "('PIPELINES', 'face-reconstruction', 'resnet50-face-reconstruction')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_reconstruction_pipeline.py", "imports": ["PIL", "tensorflow", "shutil", "numpy", "cv2", "torch", "scipy", "io", "face_alignment", "os", "typing"], "module": "modelscope.pipelines.cv.face_reconstruction_pipeline"}, "('PIPELINES', 'face-detection', 'resnet101-face-detection-cvpr22papermogface')": {"filepath": "TEMPLATE_PATH/pipelines/cv/mog_face_detection_pipeline.py", "imports": ["os", "typing", "numpy"], "module": "modelscope.pipelines.cv.mog_face_detection_pipeline"}, "('PIPELINES', 'skin-retouching', 'unet-skin-retouching')": {"filepath": "TEMPLATE_PATH/pipelines/cv/skin_retouching_pipeline.py", "imports": ["PIL", "tensorflow", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.skin_retouching_pipeline"}, "('PIPELINES', 'image-segmentation', 'vision-middleware-multi-task')": {"filepath": "TEMPLATE_PATH/pipelines/cv/vision_middleware_pipeline.py", "imports": ["mmcv", "numpy", "torch", "math", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.vision_middleware_pipeline"}, "('PIPELINES', 'face-liveness', 'manual-face-liveness-flir')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_liveness_ir_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "module": "modelscope.pipelines.cv.face_liveness_ir_pipeline"}, "('PIPELINES', 'human-detection', 'resnet18-human-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_detection_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.image_detection_pipeline"}, "('PIPELINES', 'image-object-detection', 'vit-object-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_detection_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.image_detection_pipeline"}, "('PIPELINES', 'image-object-detection', 'abnormal-object-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_detection_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.image_detection_pipeline"}, "('PIPELINES', 'video-object-detection', 'cspnet_realtime-video-object-detection_streamyolo')": {"filepath": "TEMPLATE_PATH/pipelines/cv/realtime_video_object_detection_pipeline.py", "imports": ["PIL", "numpy", "cv2", "json", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.realtime_video_object_detection_pipeline"}, "('PIPELINES', 'video-panoptic-segmentation', 'video-panoptic-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_panoptic_segmentation_pipeline.py", "imports": ["mmcv", "numpy", "cv2", "torch", "tqdm", "os", "typing"], "module": "modelscope.pipelines.cv.video_panoptic_segmentation_pipeline"}, "('PIPELINES', 'action-detection', 'ResNetC3D-action-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/action_detection_pipeline.py", "imports": ["os", "typing", "math"], "module": "modelscope.pipelines.cv.action_detection_pipeline"}, "('PIPELINES', 'product-segmentation', 'product-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/product_segmentation_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.product_segmentation_pipeline"}, "('PIPELINES', 'image-object-detection', 'tbs-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/tbs_detection_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "colorsys", "os", "typing"], "module": "modelscope.pipelines.cv.tbs_detection_pipeline"}, "('PIPELINES', 'image-matching', 'image-matching')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_matching_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.image_matching_pipeline"}, "('PIPELINES', 'video-category', 'video-category')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_category_pipeline.py", "imports": ["PIL", "numpy", "json", "torch", "decord", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.video_category_pipeline"}, "('PIPELINES', 'hand-static', 'hand-static')": {"filepath": "TEMPLATE_PATH/pipelines/cv/hand_static_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.hand_static_pipeline"}, "('PIPELINES', 'animal-recognition', 'resnet101-animal-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/animal_recognition_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.animal_recognition_pipeline"}, "('PIPELINES', 'pointcloud-sceneflow-estimation', 'pointcloud-sceneflow-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/pointcloud_sceneflow_estimation_pipeline.py", "imports": ["torch", "typing", "plyfile", "numpy"], "module": "modelscope.pipelines.cv.pointcloud_sceneflow_estimation_pipeline"}, "('PIPELINES', 'image-segmentation', 'cascade-mask-rcnn-swin-image-instance-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_instance_segmentation_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.image_instance_segmentation_pipeline"}, "('PIPELINES', 'video-frame-interpolation', 'video-frame-interpolation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_frame_interpolation_pipeline.py", "imports": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "glob", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.video_frame_interpolation_pipeline"}, "('PIPELINES', 'image-quality-assessment-mos', 'image-quality-assessment-mos')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_quality_assessment_mos_pipeline.py", "imports": ["numpy", "cv2", "torch", "tempfile", "math", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_quality_assessment_mos_pipeline"}, "('PIPELINES', 'video-summarization', 'googlenet_pgl_video_summarization')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_summarization_pipeline.py", "imports": ["numpy", "cv2", "torch", "tqdm", "os", "typing"], "module": "modelscope.pipelines.cv.video_summarization_pipeline"}, "('PIPELINES', 'panorama-depth-estimation', 'panorama-depth-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/panorama_depth_estimation_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.panorama_depth_estimation_pipeline"}, "('PIPELINES', 'image-segmentation', 'fast-instance-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/fast_instance_segmentation_pipeline.py", "imports": ["torch", "torchvision", "typing", "numpy"], "module": "modelscope.pipelines.cv.fast_instance_segmentation_pipeline"}, "('PIPELINES', 'image-object-detection', 'vidt')": {"filepath": "TEMPLATE_PATH/pipelines/cv/vidt_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.vidt_pipeline"}, "('PIPELINES', 'image-skychange', 'image-skychange')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_skychange_pipeline.py", "imports": ["PIL", "pdb", "numpy", "cv2", "time", "typing"], "module": "modelscope.pipelines.cv.image_skychange_pipeline"}, "('PIPELINES', 'image-quality-assessment-mos', 'image-quality-assessment-man')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_quality_assessment_man_pipeline.py", "imports": ["numpy", "cv2", "torch", "tempfile", "math", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_quality_assessment_man_pipeline"}, "('PIPELINES', 'image-demoireing', 'uhdm-image-demoireing')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_restoration_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.image_restoration_pipeline"}, "('PIPELINES', 'video-inpainting', 'video-inpainting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_inpainting_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.video_inpainting_pipeline"}, "('PIPELINES', 'face-image-generation', 'gan-face-image-generation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_image_generation_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.face_image_generation_pipeline"}, "('PIPELINES', 'video-super-resolution', 'realbasicvsr-video-super-resolution')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_super_resolution_pipeline.py", "imports": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.video_super_resolution_pipeline"}, "('PIPELINES', 'referring-video-object-segmentation', 'referring-video-object-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/referring_video_object_segmentation_pipeline.py", "imports": ["PIL", "numpy", "torch", "einops", "tqdm", "tempfile", "moviepy", "torchvision", "typing"], "module": "modelscope.pipelines.cv.referring_video_object_segmentation_pipeline"}, "('PIPELINES', 'virtual-try-on', 'virtual-try-on')": {"filepath": "TEMPLATE_PATH/pipelines/cv/virtual_try_on_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.virtual_try_on_pipeline"}, "('PIPELINES', 'ocr-recognition', 'convnextTiny-ocr-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/ocr_recognition_pipeline.py", "imports": [], "module": "modelscope.pipelines.cv.ocr_recognition_pipeline"}, "('PIPELINES', 'ocr-detection', 'resnet18-ocr-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/ocr_detection_pipeline.py", "imports": ["tensorflow", "tf_slim", "numpy", "cv2", "torch", "math", "os", "typing"], "module": "modelscope.pipelines.cv.ocr_detection_pipeline"}, "('PIPELINES', 'movie-scene-segmentation', 'resnet50-bert-movie-scene-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/movie_scene_segmentation_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.cv.movie_scene_segmentation_pipeline"}, "('PIPELINES', 'image-segmentation', 'maskdino-swin-image-instance-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/maskdino_instance_segmentation_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.maskdino_instance_segmentation_pipeline"}, "('PIPELINES', 'video-colorization', 'video-colorization')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_colorization_pipeline.py", "imports": ["PIL", "subprocess", "numpy", "cv2", "torch", "tempfile", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.video_colorization_pipeline"}, "('PIPELINES', 'image-segmentation', 'm2fp-image-human-parsing')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_human_parsing_pipeline.py", "imports": ["torch", "torchvision", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_human_parsing_pipeline"}, "('PIPELINES', 'face-liveness', 'manual-face-liveness-flxc')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_liveness_xc_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "module": "modelscope.pipelines.cv.face_liveness_xc_pipeline"}, "('PIPELINES', 'crowd-counting', 'hrnet-crowd-counting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/crowd_counting_pipeline.py", "imports": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "module": "modelscope.pipelines.cv.crowd_counting_pipeline"}, "('PIPELINES', 'video-depth-estimation', 'video-depth-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_depth_estimation_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.video_depth_estimation_pipeline"}, "('PIPELINES', 'image-colorization', 'unet-image-colorization')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_colorization_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_colorization_pipeline"}, "('PIPELINES', 'face-recognition', 'ir50-face-recognition-arcface')": {"filepath": "TEMPLATE_PATH/pipelines/cv/arc_face_recognition_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.arc_face_recognition_pipeline"}, "('PIPELINES', 'image-quality-assessment-degradation', 'image-quality-assessment-degradation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_quality_assessment_degradation_pipeline.py", "imports": ["numpy", "cv2", "torch", "tempfile", "math", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_quality_assessment_degradation_pipeline"}, "('PIPELINES', 'image-inpainting', 'image-inpainting-sdv2')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_inpainting_sdv2_pipeline.py", "imports": ["numpy", "cv2", "torch", "tempfile", "sys", "math", "diffusers", "os", "typing"], "module": "modelscope.pipelines.cv.image_inpainting_sdv2_pipeline"}, "('PIPELINES', 'image-super-resolution', 'rrdb-image-super-resolution')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_super_resolution_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.image_super_resolution_pipeline"}, "('PIPELINES', 'semantic-segmentation', 'u2net-salient-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_salient_detection_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.image_salient_detection_pipeline"}, "('PIPELINES', 'semantic-segmentation', 'res2net-salient-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_salient_detection_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.image_salient_detection_pipeline"}, "('PIPELINES', 'semantic-segmentation', 'res2net-camouflaged-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_salient_detection_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.image_salient_detection_pipeline"}, "('PIPELINES', 'video-single-object-tracking', 'procontext-vitb-video-single-object-tracking')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_single_object_tracking_pipeline.py", "imports": ["cv2", "os", "typing"], "module": "modelscope.pipelines.cv.video_single_object_tracking_pipeline"}, "('PIPELINES', 'video-single-object-tracking', 'ostrack-vitb-video-single-object-tracking')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_single_object_tracking_pipeline.py", "imports": ["cv2", "os", "typing"], "module": "modelscope.pipelines.cv.video_single_object_tracking_pipeline"}, "('PIPELINES', 'face-recognition', 'manual-face-recognition-frir')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_recognition_onnx_ir_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "module": "modelscope.pipelines.cv.face_recognition_onnx_ir_pipeline"}, "('PIPELINES', 'product-retrieval-embedding', 'resnet50-product-retrieval-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/cv/product_retrieval_embedding_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.product_retrieval_embedding_pipeline"}, "('PIPELINES', 'face-recognition', 'resnet-face-recognition-facemask')": {"filepath": "TEMPLATE_PATH/pipelines/cv/mask_face_recognition_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "collections", "os", "typing"], "module": "modelscope.pipelines.cv.mask_face_recognition_pipeline"}, "('PIPELINES', 'image-super-resolution', 'mobile-image-super-resolution')": {"filepath": "TEMPLATE_PATH/pipelines/cv/mobile_image_super_resolution_pipeline.py", "imports": ["skimage", "numpy", "torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.mobile_image_super_resolution_pipeline"}, "('PIPELINES', 'license-plate-detection', 'resnet18-license-plate-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/license_plate_detection_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "module": "modelscope.pipelines.cv.license_plate_detection_pipeline"}, "('PIPELINES', 'image-segmentation', 'image-semantic-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_semantic_segmentation_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.image_semantic_segmentation_pipeline"}, "('PIPELINES', 'text-driven-segmentation', 'text-driven-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/text_driven_segmentation_pipleline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.text_driven_segmentation_pipleline"}, "('PIPELINES', 'motion-generation', 'mdm-motion-generation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/motion_generation_pipeline.py", "imports": ["numpy", "torch", "tempfile", "os", "typing"], "module": "modelscope.pipelines.cv.motion_generation_pipeline"}, "('PIPELINES', 'image-multi-view-depth-estimation', 'image-multi-view-depth-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_mvs_depth_estimation_pipeline.py", "imports": ["os", "typing", "tempfile", "shutil"], "module": "modelscope.pipelines.cv.image_mvs_depth_estimation_pipeline"}, "('PIPELINES', 'image-depth-estimation', 'image-depth-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_depth_estimation_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.image_depth_estimation_pipeline"}, "('PIPELINES', 'action-recognition', 'TAdaConv_action-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/action_recognition_pipeline.py", "imports": ["torch", "os", "typing", "math"], "module": "modelscope.pipelines.cv.action_recognition_pipeline"}, "('PIPELINES', 'action-recognition', 'patchshift-action-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/action_recognition_pipeline.py", "imports": ["torch", "os", "typing", "math"], "module": "modelscope.pipelines.cv.action_recognition_pipeline"}, "('PIPELINES', 'image-reid-person', 'passvitb-image-reid-person')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_reid_person_pipeline.py", "imports": ["PIL", "torch", "math", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.image_reid_person_pipeline"}, "('PIPELINES', 'general-recognition', 'resnet101-general-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/general_recognition_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.general_recognition_pipeline"}, "('PIPELINES', 'shop-segmentation', 'shop-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/shop_segmentation_pipleline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.shop_segmentation_pipleline"}, "('PREPROCESSORS', 'audio', 'wav-to-lists')": {"filepath": "TEMPLATE_PATH/preprocessors/kws.py", "imports": ["os", "typing", "yaml"], "module": "modelscope.preprocessors.kws"}, "('PREPROCESSORS', 'multi-modal', 'diffusion-image-generation-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'ofa-tasks-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'clip-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'mplug-tasks-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'vldoc-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'hitea-tasks-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'mplug-owl-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'image-captioning-clip-interrogator-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'science', 'unifold-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/science/uni_fold.py", "imports": ["unittest", "hashlib", "ipdb", "random", "numpy", "torch", "json", "tarfile", "pathlib", "os", "typing", "requests", "logging", "re", "tqdm", "time", "gzip", "pickle"], "module": "modelscope.preprocessors.science.uni_fold"}, "('PREPROCESSORS', 'text-to-speech', 'kantts-data-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/tts.py", "imports": ["os", "kantts", "typing"], "module": "modelscope.preprocessors.tts"}, "('PREPROCESSORS', 'audio', 'wav-to-scp')": {"filepath": "TEMPLATE_PATH/preprocessors/asr.py", "imports": ["os", "typing"], "module": "modelscope.preprocessors.asr"}, "('PREPROCESSORS', 'default', 'Compose')": {"filepath": "TEMPLATE_PATH/preprocessors/common.py", "imports": ["numpy", "torch", "collections", "time", "typing"], "module": "modelscope.preprocessors.common"}, "('PREPROCESSORS', 'default', 'ToTensor')": {"filepath": "TEMPLATE_PATH/preprocessors/common.py", "imports": ["numpy", "torch", "collections", "time", "typing"], "module": "modelscope.preprocessors.common"}, "('PREPROCESSORS', 'default', 'Filter')": {"filepath": "TEMPLATE_PATH/preprocessors/common.py", "imports": ["numpy", "torch", "collections", "time", "typing"], "module": "modelscope.preprocessors.common"}, "('PREPROCESSORS', 'default', 'ToNumpy')": {"filepath": "TEMPLATE_PATH/preprocessors/common.py", "imports": ["numpy", "torch", "collections", "time", "typing"], "module": "modelscope.preprocessors.common"}, "('PREPROCESSORS', 'default', 'Rename')": {"filepath": "TEMPLATE_PATH/preprocessors/common.py", "imports": ["numpy", "torch", "collections", "time", "typing"], "module": "modelscope.preprocessors.common"}, "('PREPROCESSORS', 'default', 'Identity')": {"filepath": "TEMPLATE_PATH/preprocessors/common.py", "imports": ["numpy", "torch", "collections", "time", "typing"], "module": "modelscope.preprocessors.common"}, "('PREPROCESSORS', 'nlp', 'word-segment-text-to-label-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_preprocessor.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.token_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'ner-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_preprocessor.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.token_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'token-cls-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_preprocessor.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.token_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'sequence-labeling-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_preprocessor.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.token_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'siamese-uie-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/siamese_uie_preprocessor.py", "imports": ["typing", "transformers"], "module": "modelscope.preprocessors.nlp.siamese_uie_preprocessor"}, "('PREPROCESSORS', 'nlp', 're-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/relation_extraction_preprocessor.py", "imports": ["typing", "transformers"], "module": "modelscope.preprocessors.nlp.relation_extraction_preprocessor"}, "('PREPROCESSORS', 'nlp', 'viet-ner-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_viet_preprocessor.py", "imports": ["torch", "typing"], "module": "modelscope.preprocessors.nlp.token_classification_viet_preprocessor"}, "('PREPROCESSORS', 'nlp', 'translation-evaluation-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/translation_evaluation_preprocessor.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.translation_evaluation_preprocessor"}, "('PREPROCESSORS', 'nlp', 'nli-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_classification_preprocessor.py", "imports": ["typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'sen-sim-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_classification_preprocessor.py", "imports": ["typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'bert-seq-cls-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_classification_preprocessor.py", "imports": ["typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'sen-cls-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_classification_preprocessor.py", "imports": ["typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'document-grounded-dialog-retrieval')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/document_grounded_dialog_retrieval_preprocessor.py", "imports": ["torch", "os", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.document_grounded_dialog_retrieval_preprocessor"}, "('PREPROCESSORS', 'nlp', 'zero-shot-cls-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/zero_shot_classification_preprocessor.py", "imports": ["typing"], "module": "modelscope.preprocessors.nlp.zero_shot_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'canmt-translation')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/canmt_translation.py", "imports": ["sacremoses", "jieba", "torch", "subword_nmt", "os", "typing"], "module": "modelscope.preprocessors.nlp.canmt_translation"}, "('PREPROCESSORS', 'nlp', 'fill-mask')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/fill_mask_preprocessor.py", "imports": ["numpy", "torch", "abc", "re", "os", "typing"], "module": "modelscope.preprocessors.nlp.fill_mask_preprocessor"}, "('PREPROCESSORS', 'nlp', 'fill-mask-ponet')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/fill_mask_preprocessor.py", "imports": ["numpy", "torch", "abc", "re", "os", "typing"], "module": "modelscope.preprocessors.nlp.fill_mask_preprocessor"}, "('PREPROCESSORS', 'nlp', 'word-alignment')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/word_alignment_preprocessor.py", "imports": ["itertools", "numpy", "torch", "os", "typing"], "module": "modelscope.preprocessors.nlp.word_alignment_preprocessor"}, "('PREPROCESSORS', 'nlp', 'conversational-text-to-sql')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/space_T_en/conversational_text_to_sql_preprocessor.py", "imports": ["json", "torch", "text2sql_lgesql", "os", "typing"], "module": "modelscope.preprocessors.nlp.space_T_en.conversational_text_to_sql_preprocessor"}, "('PREPROCESSORS', 'nlp', 'document-grounded-dialog-generate')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/document_grounded_dialog_generate_preprocessor.py", "imports": ["torch", "os", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.document_grounded_dialog_generate_preprocessor"}, "('PREPROCESSORS', 'nlp', 'text-error-correction')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_error_correction.py", "imports": ["torch", "os", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.text_error_correction"}, "('PREPROCESSORS', 'nlp', 'text-ranking')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_ranking_preprocessor.py", "imports": ["typing", "transformers"], "module": "modelscope.preprocessors.nlp.text_ranking_preprocessor"}, "('PREPROCESSORS', 'nlp', 'Tokenize')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/bert_seq_cls_tokenizer.py", "imports": ["typing", "transformers"], "module": "modelscope.preprocessors.nlp.bert_seq_cls_tokenizer"}, "('PREPROCESSORS', 'nlp', 'document-segmentation')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/document_segmentation_preprocessor.py", "imports": ["typing"], "module": "modelscope.preprocessors.nlp.document_segmentation_preprocessor"}, "('PREPROCESSORS', 'nlp', 'sentence-embedding')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/sentence_embedding_preprocessor.py", "imports": ["typing"], "module": "modelscope.preprocessors.nlp.sentence_embedding_preprocessor"}, "('PREPROCESSORS', 'nlp', 'mglm-summarization')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/mglm_summarization_preprocessor.py", "imports": ["os", "re", "typing"], "module": "modelscope.preprocessors.nlp.mglm_summarization_preprocessor"}, "('PREPROCESSORS', 'nlp', 'thai-ner-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_thai_preprocessor.py", "imports": ["typing"], "module": "modelscope.preprocessors.nlp.token_classification_thai_preprocessor"}, "('PREPROCESSORS', 'nlp', 'thai-wseg-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_thai_preprocessor.py", "imports": ["typing"], "module": "modelscope.preprocessors.nlp.token_classification_thai_preprocessor"}, "('PREPROCESSORS', 'nlp', 'mgeo-ranking')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/mgeo_ranking_preprocessor.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.mgeo_ranking_preprocessor"}, "('PREPROCESSORS', 'nlp', 'dialog-intent-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/space/dialog_intent_prediction_preprocessor.py", "imports": ["json", "os", "typing"], "module": "modelscope.preprocessors.nlp.space.dialog_intent_prediction_preprocessor"}, "('PREPROCESSORS', 'nlp', 'dialog-state-tracking-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/space/dialog_state_tracking_preprocessor.py", "imports": ["typing"], "module": "modelscope.preprocessors.nlp.space.dialog_state_tracking_preprocessor"}, "('PREPROCESSORS', 'nlp', 'dialog-modeling-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/space/dialog_modeling_preprocessor.py", "imports": ["os", "typing"], "module": "modelscope.preprocessors.nlp.space.dialog_modeling_preprocessor"}, "('PREPROCESSORS', 'nlp', 'dialog-use-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/dialog_classification_use_preprocessor.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.dialog_classification_use_preprocessor"}, "('PREPROCESSORS', 'nlp', 'text-gen-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_generation_preprocessor.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_generation_preprocessor"}, "('PREPROCESSORS', 'nlp', 'text-gen-jieba-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_generation_preprocessor.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_generation_preprocessor"}, "('PREPROCESSORS', 'nlp', 'sentence-piece')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_generation_preprocessor.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_generation_preprocessor"}, "('PREPROCESSORS', 'nlp', 'text2text-gen-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_generation_preprocessor.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_generation_preprocessor"}, "('PREPROCESSORS', 'nlp', 'table-question-answering-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/space_T_cn/table_question_answering_preprocessor.py", "imports": ["torch", "os", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.space_T_cn.table_question_answering_preprocessor"}, "('PREPROCESSORS', 'nlp', 'document-grounded-dialog-rerank')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/document_grounded_dialog_rerank_preprocessor.py", "imports": ["transformers", "torch", "copy", "os", "typing"], "module": "modelscope.preprocessors.nlp.document_grounded_dialog_rerank_preprocessor"}, "('PREPROCESSORS', 'nlp', 'feature-extraction')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/feature_extraction_preprocessor.py", "imports": ["typing", "numpy"], "module": "modelscope.preprocessors.nlp.feature_extraction_preprocessor"}, "('PREPROCESSORS', 'nlp', 'faq-question-answering-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/faq_question_answering_preprocessor.py", "imports": ["torch", "typing"], "module": "modelscope.preprocessors.nlp.faq_question_answering_preprocessor"}, "('PREPROCESSORS', 'audio', 'LinearAECAndFbank')": {"filepath": "TEMPLATE_PATH/preprocessors/audio.py", "imports": ["numpy", "torch", "scipy", "io", "os", "typing"], "module": "modelscope.preprocessors.audio"}, "('PREPROCESSORS', 'cv', 'RandomCrop')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'RandomResizedCrop')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'Resize')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'CenterCrop')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'RandomHorizontalFlip')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'Normalize')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'ImageToTensor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'image-classification-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'bad-image-detecting-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/bad_image_detecting_preprocessor.py", "imports": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "module": "modelscope.preprocessors.cv.bad_image_detecting_preprocessor"}, "('PREPROCESSORS', 'cv', 'image-classification-mmcv-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/mmcls_preprocessor.py", "imports": ["os", "typing", "numpy"], "module": "modelscope.preprocessors.cv.mmcls_preprocessor"}, "('PREPROCESSORS', 'cv', 'controllable-image-generation-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/controllable_image_generation.py", "imports": ["PIL", "numpy", "cv2", "torch", "math", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.controllable_image_generation"}, "('PREPROCESSORS', 'cv', 'image-quality_assessment-mos-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_quality_assessment_mos.py", "imports": ["numpy", "cv2", "math", "torchvision", "typing"], "module": "modelscope.preprocessors.cv.image_quality_assessment_mos"}, "('PREPROCESSORS', 'cv', 'image-demoire-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_restoration_preprocessor.py", "imports": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "module": "modelscope.preprocessors.cv.image_restoration_preprocessor"}, "('PREPROCESSORS', 'cv', 'image-quality_assessment-man-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_quality_assessment_man.py", "imports": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "module": "modelscope.preprocessors.cv.image_quality_assessment_man"}, "('PREPROCESSORS', 'cv', 'movie-scene-segmentation-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/video.py", "imports": ["urllib", "numpy", "random", "torch", "decord", "tempfile", "math", "torchvision", "os", "uuid"], "module": "modelscope.preprocessors.video"}, "('PREPROCESSORS', 'cv', 'load-image')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'object-detection-tinynas-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'image-color-enhance-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'image-denoise-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'image-deblur-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'image-portrait-enhancement-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'image-instance-segmentation-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'video-summarization-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'image-classification-bypass-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PARALLEL', 'default', 'DistributedDataParallel')": {"filepath": "TEMPLATE_PATH/trainers/parallel/builder.py", "imports": ["torch"], "module": "modelscope.trainers.parallel.builder"}, "('OPTIMIZERS', 'default', 'ChildTuningAdamW')": {"filepath": "TEMPLATE_PATH/trainers/optimizer/child_tuning_adamw_optimizer.py", "imports": ["numpy", "torch", "types", "math", "typing"], "module": "modelscope.trainers.optimizer.child_tuning_adamw_optimizer"}, "('LR_SCHEDULER', 'default', 'ConstantWarmup')": {"filepath": "TEMPLATE_PATH/trainers/lrscheduler/warmup/warmup.py", "imports": [], "module": "modelscope.trainers.lrscheduler.warmup.warmup"}, "('LR_SCHEDULER', 'default', 'LinearWarmup')": {"filepath": "TEMPLATE_PATH/trainers/lrscheduler/warmup/warmup.py", "imports": [], "module": "modelscope.trainers.lrscheduler.warmup.warmup"}, "('LR_SCHEDULER', 'default', 'ExponentialWarmup')": {"filepath": "TEMPLATE_PATH/trainers/lrscheduler/warmup/warmup.py", "imports": [], "module": "modelscope.trainers.lrscheduler.warmup.warmup"}, "('TRAINERS', 'default', 'nlp-base-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp_trainer.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.trainers.nlp_trainer"}, "('TRAINERS', 'default', 'nlp-veco-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp_trainer.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.trainers.nlp_trainer"}, "('TRAINERS', 'default', 'speech_kws_fsmn_char_ctc_nearfield')": {"filepath": "TEMPLATE_PATH/trainers/audio/kws_nearfield_trainer.py", "imports": ["torch", "re", "tensorboardX", "copy", "datetime", "yaml", "os", "typing"], "module": "modelscope.trainers.audio.kws_nearfield_trainer"}, "('TRAINERS', 'default', 'speech_dfsmn_kws_char_farfield')": {"filepath": "TEMPLATE_PATH/trainers/audio/kws_farfield_trainer.py", "imports": ["numpy", "torch", "math", "datetime", "glob", "os", "typing", "pickle"], "module": "modelscope.trainers.audio.kws_farfield_trainer"}, "('TRAINERS', 'default', 'speech-separation')": {"filepath": "TEMPLATE_PATH/trainers/audio/separation_trainer.py", "imports": ["numpy", "torch", "torchaudio", "tqdm", "csv", "os", "speechbrain", "typing"], "module": "modelscope.trainers.audio.separation_trainer"}, "('TRAINERS', 'default', 'speech-asr-trainer')": {"filepath": "TEMPLATE_PATH/trainers/audio/asr_trainer.py", "imports": ["shutil", "json", "typing", "tempfile", "os", "funasr"], "module": "modelscope.trainers.audio.asr_trainer"}, "('TRAINERS', 'default', 'speech-kantts-trainer')": {"filepath": "TEMPLATE_PATH/trainers/audio/tts_trainer.py", "imports": ["shutil", "json", "tempfile", "os", "typing", "zipfile"], "module": "modelscope.trainers.audio.tts_trainer"}, "('TRAINERS', 'default', 'speech_frcrn_ans_cirm_16k')": {"filepath": "TEMPLATE_PATH/trainers/audio/ans_trainer.py", "imports": [], "module": "modelscope.trainers.audio.ans_trainer"}, "('HOOKS', 'default', 'CheckpointHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/checkpoint/checkpoint_hook.py", "imports": ["random", "numpy", "torch", "time", "os", "typing"], "module": "modelscope.trainers.hooks.checkpoint.checkpoint_hook"}, "('HOOKS', 'default', 'BestCkptSaverHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/checkpoint/checkpoint_hook.py", "imports": ["random", "numpy", "torch", "time", "os", "typing"], "module": "modelscope.trainers.hooks.checkpoint.checkpoint_hook"}, "('HOOKS', 'default', 'LoadCheckpointHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/checkpoint/load_checkpoint_hook.py", "imports": ["random", "numpy", "torch", "packaging", "typing"], "module": "modelscope.trainers.hooks.checkpoint.load_checkpoint_hook"}, "('HOOKS', 'default', 'TextLoggerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/logger/text_logger_hook.py", "imports": ["json", "torch", "collections", "datetime", "os"], "module": "modelscope.trainers.hooks.logger.text_logger_hook"}, "('HOOKS', 'default', 'TensorboardHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/logger/tensorboard_hook.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.trainers.hooks.logger.tensorboard_hook"}, "('HOOKS', 'default', 'ApexAMPOptimizerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/optimizer/apex_optimizer_hook.py", "imports": ["torch", "logging", "packaging"], "module": "modelscope.trainers.hooks.optimizer.apex_optimizer_hook"}, "('HOOKS', 'default', 'TorchAMPOptimizerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/optimizer/torch_optimizer_hook.py", "imports": ["logging"], "module": "modelscope.trainers.hooks.optimizer.torch_optimizer_hook"}, "('HOOKS', 'default', 'OptimizerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/optimizer/base.py", "imports": ["torch", "logging"], "module": "modelscope.trainers.hooks.optimizer.base"}, "('HOOKS', 'default', 'NoneOptimizerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/optimizer/base.py", "imports": ["torch", "logging"], "module": "modelscope.trainers.hooks.optimizer.base"}, "('HOOKS', 'default', 'MegatronHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/distributed/megatron_hook.py", "imports": ["torch", "os", "shutil", "megatron_util"], "module": "modelscope.trainers.hooks.distributed.megatron_hook"}, "('HOOKS', 'default', 'DeepspeedHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/distributed/deepspeed_hook.py", "imports": ["shutil", "torch", "megatron_util", "deepspeed", "os"], "module": "modelscope.trainers.hooks.distributed.deepspeed_hook"}, "('HOOKS', 'default', 'DDPHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/distributed/ddp_hook.py", "imports": [], "module": "modelscope.trainers.hooks.distributed.ddp_hook"}, "('HOOKS', 'default', 'LrSchedulerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/lr_scheduler_hook.py", "imports": [], "module": "modelscope.trainers.hooks.lr_scheduler_hook"}, "('HOOKS', 'default', 'PlateauLrSchedulerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/lr_scheduler_hook.py", "imports": [], "module": "modelscope.trainers.hooks.lr_scheduler_hook"}, "('HOOKS', 'default', 'NoneLrSchedulerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/lr_scheduler_hook.py", "imports": [], "module": "modelscope.trainers.hooks.lr_scheduler_hook"}, "('HOOKS', 'default', 'EarlyStopHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/early_stop_hook.py", "imports": ["numpy"], "module": "modelscope.trainers.hooks.early_stop_hook"}, "('HOOKS', 'default', 'ClipClampLogitScaleHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/clip_clamp_logit_scale_hook.py", "imports": ["torch"], "module": "modelscope.trainers.hooks.clip_clamp_logit_scale_hook"}, "('HOOKS', 'default', 'SparsityHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/compression/sparsity_hook.py", "imports": ["os"], "module": "modelscope.trainers.hooks.compression.sparsity_hook"}, "('HOOKS', 'default', 'IterTimerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/iter_timer_hook.py", "imports": ["time"], "module": "modelscope.trainers.hooks.iter_timer_hook"}, "('HOOKS', 'default', 'EvaluationHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/evaluation_hook.py", "imports": ["typing", "collections"], "module": "modelscope.trainers.hooks.evaluation_hook"}, "('TRAINERS', 'default', 'clip-multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/trainers/multi_modal/clip/clip_trainer.py", "imports": ["torch", "os", "typing", "math"], "module": "modelscope.trainers.multi_modal.clip.clip_trainer"}, "('TRAINERS', 'default', 'efficient-diffusion-tuning')": {"filepath": "TEMPLATE_PATH/trainers/multi_modal/efficient_diffusion_tuning/efficient_diffusion_tuning_trainer.py", "imports": ["torch", "typing"], "module": "modelscope.trainers.multi_modal.efficient_diffusion_tuning.efficient_diffusion_tuning_trainer"}, "('TRAINERS', 'default', 'mplug')": {"filepath": "TEMPLATE_PATH/trainers/multi_modal/mplug/mplug_trainer.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.trainers.multi_modal.mplug.mplug_trainer"}, "('TRAINERS', 'default', 'image-classification-team')": {"filepath": "TEMPLATE_PATH/trainers/multi_modal/team/team_trainer.py", "imports": ["numpy", "torch", "collections", "sklearn", "os", "typing"], "module": "modelscope.trainers.multi_modal.team.team_trainer"}, "('TRAINERS', 'default', 'mgeo-ranking-trainer')": {"filepath": "TEMPLATE_PATH/trainers/multi_modal/mgeo_ranking_trainer.py", "imports": ["torch", "dataclasses", "typing"], "module": "modelscope.trainers.multi_modal.mgeo_ranking_trainer"}, "('TRAINERS', 'default', 'ofa')": {"filepath": "TEMPLATE_PATH/trainers/multi_modal/ofa/ofa_trainer.py", "imports": ["shutil", "json", "torch", "functools", "tempfile", "math", "os", "typing"], "module": "modelscope.trainers.multi_modal.ofa.ofa_trainer"}, "('TRAINERS', 'default', 'nlp-gpt-moe-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/gpt_moe_trainer.py", "imports": ["torch", "collections", "megatron_util", "os", "typing"], "module": "modelscope.trainers.nlp.gpt_moe_trainer"}, "('TRAINERS', 'default', 'nlp-plug-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/plug_trainer.py", "imports": ["torch", "megatron_util", "deepspeed", "os", "typing"], "module": "modelscope.trainers.nlp.plug_trainer"}, "('TRAINERS', 'default', 'text-generation-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/text_generation_trainer.py", "imports": ["torch", "collections"], "module": "modelscope.trainers.nlp.text_generation_trainer"}, "('TRAINERS', 'default', 'document-grounded-dialog-rerank-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/document_grounded_dialog_rerank_trainer.py", "imports": ["transformers", "numpy", "random", "torch", "time", "os", "typing"], "module": "modelscope.trainers.nlp.document_grounded_dialog_rerank_trainer"}, "('TRAINERS', 'default', 'csanmt-translation')": {"filepath": "TEMPLATE_PATH/trainers/nlp/csanmt_translation_trainer.py", "imports": ["os", "tensorflow", "typing", "time"], "module": "modelscope.trainers.nlp.csanmt_translation_trainer"}, "('TRAINERS', 'default', 'translation-evaluation-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/translation_evaluation_trainer.py", "imports": ["transformers", "random", "torch", "tqdm", "math", "pandas", "os", "typing"], "module": "modelscope.trainers.nlp.translation_evaluation_trainer"}, "('TRAINERS', 'default', 'faq-question-answering-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/faq_question_answering_trainer.py", "imports": ["distutils", "contextlib", "numpy", "torch", "functools", "collections", "dataclasses", "typing"], "module": "modelscope.trainers.nlp.faq_question_answering_trainer"}, "('TRAINERS', 'default', 'table-question-answering-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/table_question_answering_trainer.py", "imports": ["numpy", "json", "torch", "tqdm", "time", "os", "typing"], "module": "modelscope.trainers.nlp.table_question_answering_trainer"}, "('TRAINERS', 'default', 'bert-sentiment-analysis')": {"filepath": "TEMPLATE_PATH/trainers/nlp/sequence_classification_trainer.py", "imports": ["time", "typing", "numpy"], "module": "modelscope.trainers.nlp.sequence_classification_trainer"}, "('TRAINERS', 'default', 'nlp-sentence-embedding-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/sentence_embedding_trainer.py", "imports": ["transformers", "numpy", "torch", "tqdm", "time", "dataclasses", "typing"], "module": "modelscope.trainers.nlp.sentence_embedding_trainer"}, "('TRAINERS', 'default', 'nlp-gpt3-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/gpt3_trainer.py", "imports": ["torch", "os", "copy", "typing"], "module": "modelscope.trainers.nlp.gpt3_trainer"}, "('TRAINERS', 'default', 'nlp-text-ranking-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/text_ranking_trainer.py", "imports": ["numpy", "torch", "tqdm", "time", "dataclasses", "typing"], "module": "modelscope.trainers.nlp.text_ranking_trainer"}, "('TRAINERS', 'default', 'siamese-uie-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/siamese_uie_trainer.py", "imports": ["random", "numpy", "json", "torch", "collections", "math", "time", "os", "typing"], "module": "modelscope.trainers.nlp.siamese_uie_trainer"}, "('TRAINERS', 'default', 'dialog-intent-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/space/dialog_intent_trainer.py", "imports": ["os", "typing", "numpy"], "module": "modelscope.trainers.nlp.space.dialog_intent_trainer"}, "('TRAINERS', 'default', 'dialog-modeling-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/space/dialog_modeling_trainer.py", "imports": ["os", "time", "typing", "numpy"], "module": "modelscope.trainers.nlp.space.dialog_modeling_trainer"}, "('TRAINERS', 'default', 'document-grounded-dialog-retrieval-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/document_grounded_dialog_retrieval_trainer.py", "imports": ["transformers", "numpy", "json", "torch", "tqdm", "faiss", "os"], "module": "modelscope.trainers.nlp.document_grounded_dialog_retrieval_trainer"}, "('TRAINERS', 'default', 'document-grounded-dialog-generate-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/document_grounded_dialog_generate_trainer.py", "imports": ["string", "transformers", "json", "torch", "rouge", "re", "tqdm", "collections", "os", "sacrebleu"], "module": "modelscope.trainers.nlp.document_grounded_dialog_generate_trainer"}, "('TRAINERS', 'default', 'ocr-recognition')": {"filepath": "TEMPLATE_PATH/trainers/cv/ocr_recognition_trainer.py", "imports": ["torch", "time", "collections"], "module": "modelscope.trainers.cv.ocr_recognition_trainer"}, "('TRAINERS', 'default', 'image-instance-segmentation')": {"filepath": "TEMPLATE_PATH/trainers/cv/image_instance_segmentation_trainer.py", "imports": [], "module": "modelscope.trainers.cv.image_instance_segmentation_trainer"}, "('TRAINERS', 'default', 'referring-video-object-segmentation')": {"filepath": "TEMPLATE_PATH/trainers/cv/referring_video_object_segmentation_trainer.py", "imports": ["torch", "os"], "module": "modelscope.trainers.cv.referring_video_object_segmentation_trainer"}, "('TRAINERS', 'default', 'vision-efficient-tuning')": {"filepath": "TEMPLATE_PATH/trainers/cv/vision_efficient_tuning_trainer.py", "imports": ["torch", "typing"], "module": "modelscope.trainers.cv.vision_efficient_tuning_trainer"}, "('TRAINERS', 'default', 'movie-scene-segmentation')": {"filepath": "TEMPLATE_PATH/trainers/cv/movie_scene_segmentation_trainer.py", "imports": [], "module": "modelscope.trainers.cv.movie_scene_segmentation_trainer"}, "('TRAINERS', 'default', 'nerf-recon-acc')": {"filepath": "TEMPLATE_PATH/trainers/cv/nerf_recon_acc_trainer.py", "imports": ["random", "numpy", "cv2", "torch", "tqdm", "time", "datetime", "glob", "os", "typing"], "module": "modelscope.trainers.cv.nerf_recon_acc_trainer"}, "('TRAINERS', 'default', 'tinynas-damoyolo')": {"filepath": "TEMPLATE_PATH/trainers/cv/image_detection_damoyolo_trainer.py", "imports": ["torch", "math", "datetime", "time", "os", "easydict", "typing"], "module": "modelscope.trainers.cv.image_detection_damoyolo_trainer"}, "('TRAINERS', 'default', 'image-classification')": {"filepath": "TEMPLATE_PATH/trainers/cv/image_classifition_trainer.py", "imports": ["numpy", "torch", "copy", "time", "os", "typing"], "module": "modelscope.trainers.cv.image_classifition_trainer"}, "('TRAINERS', 'default', 'cartoon-translation')": {"filepath": "TEMPLATE_PATH/trainers/cv/cartoon_translation_trainer.py", "imports": ["tensorflow", "numpy", "tqdm", "packaging", "os", "typing"], "module": "modelscope.trainers.cv.cartoon_translation_trainer"}, "('TRAINERS', 'default', 'ocr-detection-db')": {"filepath": "TEMPLATE_PATH/trainers/cv/ocr_detection_db_trainer.py", "imports": ["numpy", "torch", "tqdm", "math", "copy", "datetime", "time", "os", "easydict", "typing"], "module": "modelscope.trainers.cv.ocr_detection_db_trainer"}, "('TRAINERS', 'default', 'card-detection-scrfd')": {"filepath": "TEMPLATE_PATH/trainers/cv/card_detection_scrfd_trainer.py", "imports": [], "module": "modelscope.trainers.cv.card_detection_scrfd_trainer"}, "('TRAINERS', 'default', 'face-detection-scrfd')": {"filepath": "TEMPLATE_PATH/trainers/cv/face_detection_scrfd_trainer.py", "imports": ["copy", "time", "typing", "os"], "module": "modelscope.trainers.cv.face_detection_scrfd_trainer"}, "('TRAINERS', 'default', 'image-inpainting')": {"filepath": "TEMPLATE_PATH/trainers/cv/image_inpainting_trainer.py", "imports": ["torch", "time", "collections"], "module": "modelscope.trainers.cv.image_inpainting_trainer"}, "('TRAINERS', 'default', 'image-portrait-enhancement')": {"filepath": "TEMPLATE_PATH/trainers/cv/image_portrait_enhancement_trainer.py", "imports": ["torch", "collections"], "module": "modelscope.trainers.cv.image_portrait_enhancement_trainer"}, "('TRAINERS', 'default', 'action-detection')": {"filepath": "TEMPLATE_PATH/trainers/cv/action_detection_trainer.py", "imports": ["torch", "fvcore", "os", "typing", "detectron2"], "module": "modelscope.trainers.cv.action_detection_trainer"}, "('TRAINERS', 'default', 'image-fewshot-detection')": {"filepath": "TEMPLATE_PATH/trainers/cv/image_defrcn_fewshot_detection_trainer.py", "imports": ["torch", "collections", "os", "typing", "detectron2"], "module": "modelscope.trainers.cv.image_defrcn_fewshot_detection_trainer"}, "('TRAINERS', 'default', 'trainer')": {"filepath": "TEMPLATE_PATH/trainers/trainer.py", "imports": ["distutils", "json", "torch", "functools", "collections", "copy", "inspect", "os", "typing"], "module": "modelscope.trainers.trainer"}, "('TRAINERS', 'default', 'dummy')": {"filepath": "TEMPLATE_PATH/trainers/base.py", "imports": ["os", "abc", "typing", "time"], "module": "modelscope.trainers.base"}, "('CUSTOM_DATASETS', 'image-quality-assessment-degradation', 'image-quality-assessment-degradation')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_quality_assessment_degradation/image_quality_assessment_degradation_dataset.py", "imports": ["torchvision"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.image_quality_assessment_degradation.image_quality_assessment_degradation_dataset"}, "('CUSTOM_DATASETS', 'image-portrait-enhancement', 'PairedDataset')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_portrait_enhancement/image_portrait_enhancement_dataset.py", "imports": ["cv2", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.image_portrait_enhancement.image_portrait_enhancement_dataset"}, "('CUSTOM_DATASETS', 'nli', 'veco')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/veco_dataset.py", "imports": ["datasets", "typing", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.veco_dataset"}, "('CUSTOM_DATASETS', 'image-segmentation', 'cascade_mask_rcnn_swin')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_instance_segmentation_coco_dataset.py", "imports": ["os", "numpy", "pycocotools"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.image_instance_segmentation_coco_dataset"}, "('CUSTOM_DATASETS', 'ocr-recognition', 'OCRRecognition')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py", "imports": ["PIL", "numpy", "cv2", "json", "torch", "six", "lmdb", "os"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_recognition_dataset"}, "('CUSTOM_DATASETS', 'bad-image-detecting', 'bad-image-detecting')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/bad_image_detecting/bad_image_detecting_dataset.py", "imports": [], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.bad_image_detecting.bad_image_detecting_dataset"}, "('CUSTOM_DATASETS', 'image-inpainting', 'FFTInpainting')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_inpainting/image_inpainting_dataset.py", "imports": ["albumentations", "numpy", "enum", "cv2", "os", "glob"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.image_inpainting.image_inpainting_dataset"}, "('CUSTOM_DATASETS', 'language-guided-video-summarization', 'clip-it-language-guided-video-summarization')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/language_guided_video_summarization_dataset.py", "imports": ["numpy", "json", "torch", "h5py", "os"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.language_guided_video_summarization_dataset"}, "('CUSTOM_DATASETS', 'movie-scene-segmentation', 'resnet50-bert')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py", "imports": ["random", "json", "torch", "copy", "torchvision", "os"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.movie_scene_segmentation.movie_scene_segmentation_dataset"}, "('CUSTOM_DATASETS', 'text-ranking', 'bert')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/text_ranking_dataset.py", "imports": ["torch", "typing", "random"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.text_ranking_dataset"}, "('CUSTOM_DATASETS', 'sentence-embedding', 'bert')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/text_ranking_dataset.py", "imports": ["torch", "typing", "random"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.text_ranking_dataset"}, "('CUSTOM_DATASETS', 'image-denoising', 'SiddDataset')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/sidd_image_denoising_dataset.py", "imports": ["cv2", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.sidd_image_denoising.sidd_image_denoising_dataset"}, "('CUSTOM_DATASETS', 'image-deblurring', 'RedsDataset')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/reds_image_deblurring_dataset.py", "imports": ["cv2", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.reds_image_deblurring_dataset"}, "('CUSTOM_DATASETS', 'video-frame-interpolation', 'video-frame-interpolation')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_frame_interpolation/video_frame_interpolation_dataset.py", "imports": ["cv2", "torch", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.video_frame_interpolation.video_frame_interpolation_dataset"}, "('CUSTOM_DATASETS', 'image-quality-assessment-mos', 'image-quality-assessment-mos')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_quality_assmessment_mos/image_quality_assessment_mos_dataset.py", "imports": [], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.image_quality_assmessment_mos.image_quality_assessment_mos_dataset"}, "('CUSTOM_DATASETS', 'text-ranking', 'mgeo')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/mgeo_ranking_dataset.py", "imports": ["json", "torch", "typing", "random"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.mgeo_ranking_dataset"}, "('CUSTOM_DATASETS', 'video-stabilization', 'video-stabilization')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_stabilization/video_stabilization_dataset.py", "imports": [], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.video_stabilization.video_stabilization_dataset"}, "('CUSTOM_DATASETS', 'image-deblurring', 'GoproDataset')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/gopro_image_deblurring_dataset.py", "imports": ["cv2", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.gopro_image_deblurring_dataset"}, "('CUSTOM_DATASETS', 'referring-video-object-segmentation', 'swinT-referring-video-object-segmentation')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py", "imports": ["numpy", "pycocotools", "json", "torch", "tqdm", "h5py", "glob", "torchvision", "pandas", "os"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.referring_video_object_segmentation.referring_video_object_segmentation_dataset"}, "('CUSTOM_DATASETS', 'image-colorization', 'ddcolor')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_colorization/image_colorization_dataset.py", "imports": ["cv2", "torch", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.image_colorization.image_colorization_dataset"}, "('CUSTOM_DATASETS', 'video-super-resolution', 'real-basicvsr')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_super_resolution/video_super_resolution_dataset.py", "imports": ["cv2", "torch", "collections", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.video_super_resolution.video_super_resolution_dataset"}, "('EXPORTERS', 'acoustic-noise-suppression', 'speech_dfsmn_ans')": {"filepath": "TEMPLATE_PATH/exporters/audio/ans_dfsmn_exporter.py", "imports": ["torch", "os"], "module": "modelscope.exporters.audio.ans_dfsmn_exporter"}, "('EXPORTERS', 'translation', 'csanmt-translation')": {"filepath": "TEMPLATE_PATH/exporters/nlp/csanmt_for_translation_exporter.py", "imports": ["os", "typing", "tensorflow"], "module": "modelscope.exporters.nlp.csanmt_for_translation_exporter"}, "('EXPORTERS', 'transformer-crf', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/exporters/nlp/model_for_token_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.model_for_token_classification_exporter"}, "('EXPORTERS', 'token-classification', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/exporters/nlp/model_for_token_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.model_for_token_classification_exporter"}, "('EXPORTERS', 'named-entity-recognition', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/exporters/nlp/model_for_token_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.model_for_token_classification_exporter"}, "('EXPORTERS', 'part-of-speech', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/exporters/nlp/model_for_token_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.model_for_token_classification_exporter"}, "('EXPORTERS', 'word-segmentation', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/exporters/nlp/model_for_token_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.model_for_token_classification_exporter"}, "('EXPORTERS', 'text-classification', 'bert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'text-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'sentence-similarity', 'bert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'sentiment-classification', 'bert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'nli', 'bert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'sentence-similarity', 'structbert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'sentiment-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'nli', 'structbert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'zero-shot-classification', 'bert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_zero_shot_classification_exporter.py", "imports": ["typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_zero_shot_classification_exporter"}, "('EXPORTERS', 'zero-shot-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_zero_shot_classification_exporter.py", "imports": ["typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_zero_shot_classification_exporter"}, "('EXPORTERS', 'image-object-detection', 'tinynas-damoyolo')": {"filepath": "TEMPLATE_PATH/exporters/cv/object_detection_damoyolo_exporter.py", "imports": ["numpy", "torch", "functools", "onnx", "os", "typing"], "module": "modelscope.exporters.cv.object_detection_damoyolo_exporter"}, "('EXPORTERS', 'face-detection', 'scrfd')": {"filepath": "TEMPLATE_PATH/exporters/cv/face_detection_scrfd_exporter.py", "imports": ["numpy", "torch", "functools", "onnx", "os", "typing"], "module": "modelscope.exporters.cv.face_detection_scrfd_exporter"}, "('EXPORTERS', 'default', 'cartoon-translation')": {"filepath": "TEMPLATE_PATH/exporters/cv/cartoon_translation_exporter.py", "imports": ["os", "tensorflow", "typing", "packaging"], "module": "modelscope.exporters.cv.cartoon_translation_exporter"}}, "requirements": {"modelscope.models.science.unifold.config": ["copy", "typing", "ml_collections"], "modelscope.models.science.unifold.msa.tools.hmmsearch": ["os", "subprocess", "absl", "typing"], "modelscope.models.science.unifold.msa.tools.hhblits": ["subprocess", "absl", "glob", "os", "typing"], "modelscope.models.science.unifold.msa.tools.kalign": ["os", "subprocess", "absl", "typing"], "modelscope.models.science.unifold.msa.tools.utils": ["time", "contextlib", "shutil", "absl", "typing", "tempfile"], "modelscope.models.science.unifold.msa.tools.hmmbuild": ["os", "subprocess", "absl", "re"], "modelscope.models.science.unifold.msa.tools.jackhmmer": ["urllib", "subprocess", "absl", "glob", "os", "concurrent", "typing"], "modelscope.models.science.unifold.msa.tools.hhsearch": ["subprocess", "absl", "glob", "os", "typing"], "modelscope.models.science.unifold.msa.mmcif": ["io", "dataclasses", "absl", "functools", "typing", "collections", "Bio"], "modelscope.models.science.unifold.msa.msa_identifiers": ["dataclasses", "re", "typing"], "modelscope.models.science.unifold.msa.parsers": ["string", "itertools", "dataclasses", "re", "typing", "collections"], "modelscope.models.science.unifold.msa.templates": ["numpy", "abc", "absl", "functools", "re", "datetime", "glob", "os", "dataclasses", "typing"], "modelscope.models.science.unifold.msa.utils": ["json", "os", "absl", "typing"], "modelscope.models.science.unifold.msa.pipeline": ["os", "absl", "typing", "numpy"], "modelscope.models.science.unifold.model": ["torch", "os", "typing", "argparse"], "modelscope.models.science.unifold.dataset": ["numpy", "json", "torch", "logging", "unicore", "copy", "ml_collections", "os", "typing"], "modelscope.models.science.unifold.modules.confidence": ["torch", "typing"], "modelscope.models.science.unifold.modules.alphafold": ["torch", "unicore"], "modelscope.models.science.unifold.modules.evoformer": ["torch", "functools", "typing", "unicore"], "modelscope.models.science.unifold.modules.auxillary_heads": ["torch", "typing", "unicore"], "modelscope.models.science.unifold.modules.attentions": ["torch", "functools", "typing", "unicore"], "modelscope.models.science.unifold.modules.embedders": ["torch", "typing", "unicore"], "modelscope.models.science.unifold.modules.structure_module": ["torch", "typing", "math", "unicore"], "modelscope.models.science.unifold.modules.common": ["torch", "functools", "typing", "unicore"], "modelscope.models.science.unifold.modules.frame": ["torch", "__future__", "typing", "numpy"], "modelscope.models.science.unifold.modules.template": ["torch", "functools", "math", "unicore", "typing"], "modelscope.models.science.unifold.modules.triangle_multiplication": ["torch", "functools", "typing", "unicore"], "modelscope.models.science.unifold.modules.featurization": ["torch", "typing", "unicore"], "modelscope.models.science.unifold.data.process_multimer": ["typing", "collections", "numpy"], "modelscope.models.science.unifold.data.protein": ["numpy", "Bio", "io", "dataclasses", "typing"], "modelscope.models.science.unifold.data.residue_constants": ["numpy", "os", "functools", "typing", "collections", "unicore"], "modelscope.models.science.unifold.data.utils": ["numpy", "json", "functools", "scipy", "copy", "gzip", "typing", "pickle"], "modelscope.models.science.unifold.data.process": ["torch", "typing", "numpy"], "modelscope.models.science.unifold.data.msa_pairing": ["numpy", "scipy", "collections", "pandas", "typing"], "modelscope.models.science.unifold.data.data_ops": ["itertools", "numpy", "torch", "functools", "operator", "unicore", "typing"], "modelscope.models.builder": [], "modelscope.models.audio.ans.layers.activations": ["torch"], "modelscope.models.audio.ans.layers.layer_base": ["six", "abc", "torch", "numpy"], "modelscope.models.audio.ans.layers.affine_transform": ["torch"], "modelscope.models.audio.ans.layers.uni_deep_fsmn": ["torch", "numpy"], "modelscope.models.audio.ans.unet": ["torch"], "modelscope.models.audio.ans.conv_stft": ["torch", "scipy", "numpy"], "modelscope.models.audio.ans.denoise_net": ["torch"], "modelscope.models.audio.ans.complex_nn": ["torch"], "modelscope.models.audio.ans.se_module_complex": ["torch"], "modelscope.models.audio.ans.frcrn": ["torch", "os", "typing"], "modelscope.models.audio.sv.DTDNN_layers": ["torch"], "modelscope.models.audio.sv.ecapa_tdnn": ["torch", "torchaudio", "math", "os", "typing"], "modelscope.models.audio.sv.ERes2Net": ["torch", "torchaudio", "math", "os", "typing"], "modelscope.models.audio.sv.pooling_layers": ["torch"], "modelscope.models.audio.sv.DTDNN": ["torch", "torchaudio", "collections", "os", "typing"], "modelscope.models.audio.sv.fusion": ["torch"], "modelscope.models.audio.sv.generic_speaker_verification": ["os", "typing"], "modelscope.models.audio.sv.speaker_change_locator": ["numpy", "torch", "torchaudio", "collections", "os", "typing"], "modelscope.models.audio.sv.rdino": ["torch", "torchaudio", "math", "os", "typing"], "modelscope.models.audio.itn.generic_inverse_text_processing": ["os", "typing"], "modelscope.models.audio.aec.layers.activations": ["torch"], "modelscope.models.audio.aec.layers.layer_base": ["torch", "abc", "re", "numpy"], "modelscope.models.audio.aec.layers.deep_fsmn": ["torch", "numpy"], "modelscope.models.audio.aec.layers.affine_transform": ["torch", "numpy"], "modelscope.models.audio.aec.layers.uni_deep_fsmn": ["torch", "numpy"], "modelscope.models.audio.aec.network.se_net": ["torch"], "modelscope.models.audio.aec.network.loss": ["torch"], "modelscope.models.audio.aec.network.modulation_loss": ["torch", "torchaudio", "math"], "modelscope.models.audio.asr.wenet_automatic_speech_recognition": ["json", "os", "wenetruntime", "typing"], "modelscope.models.audio.asr.generic_automatic_speech_recognition": ["os", "typing"], "modelscope.models.audio.punc.generic_punctuation": ["os", "typing"], "modelscope.models.audio.tts.voice": ["numpy", "json", "torch", "kantts", "collections", "time", "yaml", "os", "threading", "pickle"], "modelscope.models.audio.tts.sambert_hifi": ["shutil", "numpy", "json", "__future__", "wave", "matplotlib", "datetime", "yaml", "os", "zipfile"], "modelscope.models.audio.separation.mossformer": ["torch", "os", "copy", "typing"], "modelscope.models.audio.separation.mossformer_conv_module": ["torch"], "modelscope.models.audio.separation.mossformer_block": ["torch"], "modelscope.models.audio.separation.layer_norm": ["torch", "__future__"], "modelscope.models.audio.kws.farfield.fsmn": ["torch", "numpy"], "modelscope.models.audio.kws.farfield.fsmn_sele_v2": ["torch"], "modelscope.models.audio.kws.farfield.fsmn_sele_v3": ["torch"], "modelscope.models.audio.kws.farfield.model_def": ["math", "struct", "enum"], "modelscope.models.audio.kws.farfield.model": ["os", "typing", "tempfile"], "modelscope.models.audio.kws.generic_key_word_spotting": ["os", "typing"], "modelscope.models.audio.kws.nearfield.fsmn": ["torch", "typing", "numpy"], "modelscope.models.audio.kws.nearfield.model": ["torch", "tempfile", "sys", "os", "typing"], "modelscope.models.audio.kws.nearfield.cmvn": ["torch", "re", "numpy"], "modelscope.models.multi_modal.ofa_for_all_tasks": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "modelscope.models.multi_modal.clip.configuration_bert": ["__future__", "logging"], "modelscope.models.multi_modal.clip.bert_tokenizer": ["six", "unicodedata", "__future__", "re", "os", "collections"], "modelscope.models.multi_modal.clip.model": ["numpy", "json", "torch", "collections", "os", "typing"], "modelscope.models.multi_modal.clip.modeling_bert": ["json", "torch", "logging", "__future__", "sys", "math", "io", "os"], "modelscope.models.multi_modal.mplug_for_all_tasks": ["os", "typing"], "modelscope.models.multi_modal.multi_stage_diffusion.decoder": ["torch", "math"], "modelscope.models.multi_modal.multi_stage_diffusion.prior": ["torch", "math"], "modelscope.models.multi_modal.multi_stage_diffusion.upsampler": ["torch", "math"], "modelscope.models.multi_modal.multi_stage_diffusion.model": ["PIL", "numpy", "json", "torch", "math", "os", "typing"], "modelscope.models.multi_modal.multi_stage_diffusion.tokenizer": ["transformers", "gzip", "torch", "regex", "functools", "ftfy", "html"], "modelscope.models.multi_modal.multi_stage_diffusion.xglm": ["torch", "math"], "modelscope.models.multi_modal.multi_stage_diffusion.gaussian_diffusion": ["torch", "math"], "modelscope.models.multi_modal.multi_stage_diffusion.clip": ["torch", "math"], "modelscope.models.multi_modal.diffusion.structbert": ["copy", "six", "numpy", "json", "torch", "__future__", "math"], "modelscope.models.multi_modal.diffusion.diffusion": ["torch", "math"], "modelscope.models.multi_modal.diffusion.unet_generator": ["torch", "math"], "modelscope.models.multi_modal.diffusion.model": ["numpy", "json", "torch", "os", "typing"], "modelscope.models.multi_modal.diffusion.tokenizer": ["unicodedata", "six", "__future__", "collections"], "modelscope.models.multi_modal.diffusion.unet_upsampler_256": ["torch", "functools", "math"], "modelscope.models.multi_modal.diffusion.unet_upsampler_1024": ["torch", "math"], "modelscope.models.multi_modal.efficient_diffusion_tuning.efficient_stable_diffusion": ["transformers", "torch", "functools", "diffusers", "os", "typing"], "modelscope.models.multi_modal.gemm.gemm_base": ["numpy", "json", "torch", "collections", "os", "typing"], "modelscope.models.multi_modal.gemm.gemm_model": ["PIL", "numpy", "json", "torch", "torchvision", "os", "typing"], "modelscope.models.multi_modal.gemm.tokenizer": ["gzip", "torch", "os", "regex", "functools", "ftfy", "html"], "modelscope.models.multi_modal.mmr.dataloaders.rawvideo_util": ["PIL", "numpy", "torch", "cv2", "torchvision"], "modelscope.models.multi_modal.mmr.models.module_clip": ["urllib", "hashlib", "torch", "tqdm", "collections", "warnings", "os", "typing"], "modelscope.models.multi_modal.mmr.models.clip_for_mm_video_embedding": ["urllib", "PIL", "random", "numpy", "json", "torch", "decord", "tempfile", "os", "typing", "uuid"], "modelscope.models.multi_modal.mmr.models.module_cross": ["json", "torch", "logging", "collections", "__future__"], "modelscope.models.multi_modal.mmr.models.until_module": ["torch", "logging", "math", "numpy"], "modelscope.models.multi_modal.mmr.models.tokenization_clip": ["gzip", "os", "regex", "functools", "ftfy", "html"], "modelscope.models.multi_modal.mmr.models.modeling": ["torch", "types", "collections", "platform", "os"], "modelscope.models.multi_modal.mmr.models.dynamic_inverted_softmax": ["numpy"], "modelscope.models.multi_modal.mplug.predictor": ["torch", "__future__"], "modelscope.models.multi_modal.mplug.clip.clip": ["torch", "typing", "collections"], "modelscope.models.multi_modal.mplug.modeling_mplug": ["transformers", "torch", "math", "os", "typing"], "modelscope.models.multi_modal.mplug.mvit": ["timm", "numpy", "torch", "functools", "fairscale", "collections"], "modelscope.models.multi_modal.mplug.configuration_mplug": ["os", "typing", "transformers", "yaml"], "modelscope.models.multi_modal.team.team_model": ["PIL", "numpy", "torch", "cv2", "tokenizers", "torchvision", "typing"], "modelscope.models.multi_modal.team.utils": ["transformers", "numpy", "torch", "typing", "collections"], "modelscope.models.multi_modal.guided_diffusion.respace": ["torch", "numpy"], "modelscope.models.multi_modal.guided_diffusion.unet": ["transformers", "numpy", "torch", "abc", "math"], "modelscope.models.multi_modal.guided_diffusion.gaussian_diffusion": ["torch", "math", "numpy", "enum"], "modelscope.models.multi_modal.guided_diffusion.script": [], "modelscope.models.multi_modal.vldoc.tokenization": ["os", "transformers"], "modelscope.models.multi_modal.vldoc.model": ["json", "torch", "logging", "re", "math", "sys", "copy", "torchvision", "os"], "modelscope.models.multi_modal.vldoc.conv_fpn_trans": ["timm", "random", "torch", "collections", "apex"], "modelscope.models.multi_modal.vldoc.transformer_local": ["copy", "torch"], "modelscope.models.multi_modal.vldoc.modeling_layout_roberta": ["transformers", "packaging", "torch", "os", "math"], "modelscope.models.multi_modal.vldoc.processing": ["PIL", "timm", "numpy", "cv2", "torch", "collections", "torchvision", "typing"], "modelscope.models.multi_modal.vldoc.convnext": ["torch", "os", "timm"], "modelscope.models.multi_modal.soonet.model": ["torch", "os"], "modelscope.models.multi_modal.soonet.tokenizer": ["gzip", "torch", "regex", "functools", "ftfy", "html"], "modelscope.models.multi_modal.soonet.utils": ["copy", "decord", "numpy", "tqdm"], "modelscope.models.multi_modal.soonet.blocks": ["torch", "math"], "modelscope.models.multi_modal.soonet.swin_transformer": ["torch", "numpy"], "modelscope.models.multi_modal.soonet.clip": ["warnings", "numpy", "torch", "typing", "collections"], "modelscope.models.multi_modal.mgeo.text_ranking": ["torch"], "modelscope.models.multi_modal.mgeo.backbone": ["dataclasses", "transformers", "random", "torch", "math", "warnings", "os", "typing"], "modelscope.models.multi_modal.mgeo.text_classification": ["torch"], "modelscope.models.multi_modal.mgeo.token_classification": ["torch"], "modelscope.models.multi_modal.mplug_owl.configuration_mplug_owl": ["copy", "os", "typing", "transformers"], "modelscope.models.multi_modal.mplug_owl.modeling_mplug_owl": ["dataclasses", "transformers", "random", "torch", "logging", "math", "copy", "io", "os", "typing"], "modelscope.models.multi_modal.ofa_for_text_to_image_synthesis_model": ["PIL", "pkg_resources", "numpy", "json", "torch", "taming", "torchvision", "os", "typing"], "modelscope.models.multi_modal.video_synthesis.diffusion": ["torch"], "modelscope.models.multi_modal.video_synthesis.text_to_video_synthesis_model": ["open_clip", "torch", "einops", "os", "typing"], "modelscope.models.multi_modal.video_synthesis.autoencoder": ["torch", "numpy"], "modelscope.models.multi_modal.video_synthesis.unet_sd": ["torch", "einops", "math"], "modelscope.models.multi_modal.clip_interrogator.model": ["PIL", "hashlib", "numpy", "open_clip", "torch", "dataclasses", "os", "typing", "requests", "transformers", "safetensors", "tqdm", "math", "time", "torchvision"], "modelscope.models.multi_modal.rleg.model": ["json", "os", "torch"], "modelscope.models.multi_modal.rleg.rleg": ["torch", "torchvision", "typing"], "modelscope.models.multi_modal.dpm_solver_pytorch": ["torch", "math"], "modelscope.models.multi_modal.ofa.modeling_ofa": ["transformers", "random", "torch", "math", "packaging", "apex", "dataclasses", "typing"], "modelscope.models.multi_modal.ofa.utils.utils": ["torch", "typing"], "modelscope.models.multi_modal.ofa.utils.constant": [], "modelscope.models.multi_modal.ofa.vit": ["torch", "collections", "fairseq"], "modelscope.models.multi_modal.ofa.modeling_mmspeech": ["transformers", "numpy", "torch", "math", "fairseq", "packaging", "apex", "dataclasses", "typing"], "modelscope.models.multi_modal.ofa.resnet": ["torch"], "modelscope.models.multi_modal.ofa.tokenization_ofa": ["os", "typing", "transformers", "collections"], "modelscope.models.multi_modal.ofa.generate.multihead_attention": ["torch", "typing", "math", "fairseq"], "modelscope.models.multi_modal.ofa.generate.ngram_repeat_block": ["warnings", "torch", "typing", "math", "fairseq"], "modelscope.models.multi_modal.ofa.generate.sequence_generator": ["torch", "math", "typing", "sys"], "modelscope.models.multi_modal.ofa.generate.incremental_decoding_utils": ["torch", "typing", "uuid"], "modelscope.models.multi_modal.ofa.generate.utils": ["amp_C", "itertools", "torch_xla", "torch", "collections"], "modelscope.models.multi_modal.ofa.generate.search": ["torch", "typing", "math"], "modelscope.models.multi_modal.ofa.generate.token_generation_constraints": ["torch", "typing", "collections"], "modelscope.models.multi_modal.ofa.tokenization_ofa_fast": ["json", "typing", "transformers", "tokenizers"], "modelscope.models.multi_modal.ofa.configuration_mmspeech": ["warnings", "transformers"], "modelscope.models.multi_modal.ofa.configuration_ofa": ["warnings", "transformers"], "modelscope.models.nlp.unite.configuration": ["enum"], "modelscope.models.nlp.unite.translation_evaluation": ["transformers", "numpy", "torch", "math", "warnings", "packaging", "dataclasses", "typing"], "modelscope.models.nlp.palm_v2.configuration": ["transformers"], "modelscope.models.nlp.palm_v2.dureader_eval": ["zipfile", "numpy", "json", "rouge", "re", "sys", "math", "collections", "copy", "argparse"], "modelscope.models.nlp.palm_v2.text_generation": ["dataclasses", "subprocess", "codecs", "transformers", "numpy", "json", "torch", "math", "copy", "os", "typing"], "modelscope.models.nlp.structbert.configuration": ["transformers"], "modelscope.models.nlp.structbert.fill_mask": ["torch", "transformers"], "modelscope.models.nlp.structbert.backbone": ["transformers", "torch", "math", "packaging", "dataclasses", "typing"], "modelscope.models.nlp.structbert.faq_question_answering": ["torch", "math", "collections", "os", "typing"], "modelscope.models.nlp.structbert.adv_utils": ["torch"], "modelscope.models.nlp.structbert.text_classification": ["torch"], "modelscope.models.nlp.structbert.token_classification": ["torch"], "modelscope.models.nlp.hf_transformers.backbone": ["transformers"], "modelscope.models.nlp.task_models.fill_mask": ["torch", "typing", "numpy"], "modelscope.models.nlp.task_models.text_ranking": ["typing", "numpy"], "modelscope.models.nlp.task_models.feature_extraction": ["typing", "numpy"], "modelscope.models.nlp.task_models.text_classification": ["typing", "numpy"], "modelscope.models.nlp.task_models.task_model": ["torch", "abc", "re", "collections", "os", "typing"], "modelscope.models.nlp.task_models.text_generation": ["torch", "typing", "transformers", "numpy"], "modelscope.models.nlp.task_models.information_extraction": ["typing", "numpy"], "modelscope.models.nlp.task_models.token_classification": ["torch", "typing"], "modelscope.models.nlp.veco.configuration": ["transformers"], "modelscope.models.nlp.veco.fill_mask": ["transformers"], "modelscope.models.nlp.veco.backbone": ["transformers"], "modelscope.models.nlp.veco.text_classification": ["transformers"], "modelscope.models.nlp.veco.token_classification": ["torch", "transformers"], "modelscope.models.nlp.glm_130b.initialize": ["torch", "time", "argparse", "SwissArmyTransformer"], "modelscope.models.nlp.glm_130b.quantization.functional": ["torch"], "modelscope.models.nlp.glm_130b.quantization.layers": ["torch", "SwissArmyTransformer"], "modelscope.models.nlp.glm_130b.text_generation": ["random", "stat", "torch", "SwissArmyTransformer", "re", "functools", "sys", "copy", "time", "os", "typing"], "modelscope.models.nlp.glm_130b.generation.strategies": ["torch", "numpy", "SwissArmyTransformer"], "modelscope.models.nlp.mglm.tasks.superglue.pvp": ["string", "tasks", "random", "numpy", "abc", "utils", "math", "collections", "copy", "typing"], "modelscope.models.nlp.mglm.tasks.superglue.dataset": ["random", "numpy", "json", "abc", "torch", "collections", "os", "typing", "re", "tqdm", "utils", "csv", "copy", "glob", "pandas", "data_utils"], "modelscope.models.nlp.mglm.tasks.superglue.evaluate": ["string", "tasks", "__future__", "functools", "typing", "re", "collections"], "modelscope.models.nlp.mglm.tasks.superglue.finetune": ["tasks", "collections", "finetune_glm"], "modelscope.models.nlp.mglm.tasks.data_utils": ["numpy", "json", "torch", "re", "copy", "megatron_util", "typing", "pickle"], "modelscope.models.nlp.mglm.tasks.seq2seq.dataset": ["tasks", "random", "numpy", "json", "torch", "tqdm", "utils", "os", "data_utils"], "modelscope.models.nlp.mglm.tasks.seq2seq.evaluate": ["string", "rouge_score", "datetime", "random", "megatron_util", "torch", "generation_utils"], "modelscope.models.nlp.mglm.tasks.seq2seq.finetune": ["tasks", "pretrain_glm", "megatron_util", "torch", "functools", "collections", "finetune_glm"], "modelscope.models.nlp.mglm.tasks.language_model.detokenizer": ["re"], "modelscope.models.nlp.mglm.tasks.language_model.dataset": ["tasks", "itertools", "numpy", "json", "torch", "utils", "math", "bisect"], "modelscope.models.nlp.mglm.tasks.language_model.finetune": ["tasks", "pretrain_glm", "megatron_util", "torch", "functools", "math", "finetune_glm"], "modelscope.models.nlp.mglm.tasks.eval_utils": ["tasks", "random", "torch", "utils", "collections", "finetune_glm", "datetime", "time", "sklearn", "megatron_util", "os", "typing"], "modelscope.models.nlp.mglm.blocklm_utils": ["copy", "numpy", "random", "torch", "megatron_util", "scipy", "math"], "modelscope.models.nlp.mglm.train_utils": ["torch", "apex", "deepspeed", "megatron_util"], "modelscope.models.nlp.mglm.test.test_block": ["numpy", "argparse", "blocklm_utils", "random"], "modelscope.models.nlp.mglm.test.test_rel_shift": ["torch", "learning_rates", "numpy", "matplotlib"], "modelscope.models.nlp.mglm.arguments": ["json", "torch", "deepspeed", "os", "argparse"], "modelscope.models.nlp.mglm.data_utils.tokenization_gpt2": ["json", "logging", "__future__", "sys", "functools", "io", "os", "regex"], "modelscope.models.nlp.mglm.data_utils.lazy_loader": ["time", "itertools", "mmap", "numpy", "torch", "os", "pickle"], "modelscope.models.nlp.mglm.data_utils.wordpiece": ["logging", "collections", "io", "unicodedata", "__future__", "os"], "modelscope.models.nlp.mglm.data_utils.datasets": ["random", "numpy", "json", "torch", "operator", "nltk", "bisect", "os", "itertools", "tqdm", "math", "csv", "time", "pandas"], "modelscope.models.nlp.mglm.data_utils.tokenization": ["itertools", "random", "torch", "collections", "csv", "sentencepiece", "nltk", "os", "regex"], "modelscope.models.nlp.mglm.data_utils.extraction": ["os", "glob", "json", "nltk"], "modelscope.models.nlp.mglm.data_utils.file_utils": ["urllib", "hashlib", "json", "botocore", "sys", "io", "pathlib", "os", "requests", "shutil", "logging", "functools", "tempfile", "tqdm", "boto3", "__future__"], "modelscope.models.nlp.mglm.data_utils.sp_tokenizer": ["os"], "modelscope.models.nlp.mglm.data_utils.corpora": ["multiprocessing", "random", "json", "torch", "tqdm", "collections", "queue", "os"], "modelscope.models.nlp.mglm.data_utils.samplers": ["numpy", "torch", "math", "os", "sys"], "modelscope.models.nlp.mglm.mglm_for_text_summarization": ["random", "numpy", "torch", "megatron_util", "os", "typing"], "modelscope.models.nlp.mglm.process_grid": ["os", "json", "glob", "statistics", "sys"], "modelscope.models.nlp.mglm.generation_utils": ["torch", "abc", "typing", "collections"], "modelscope.models.nlp.mglm.utils": ["subprocess", "random", "numpy", "json", "torch", "time", "megatron_util", "os"], "modelscope.models.nlp.mglm.configure_data": ["itertools", "random", "numpy", "torch", "copy", "bisect", "megatron_util", "os"], "modelscope.models.nlp.mglm.model.distributed": ["torch", "megatron_util"], "modelscope.models.nlp.mglm.model.transformer": ["apex", "deepspeed", "megatron_util", "torch", "math"], "modelscope.models.nlp.mglm.model.modeling_bert": ["shutil", "json", "torch", "logging", "__future__", "tempfile", "math", "copy", "apex", "megatron_util", "tarfile", "os", "data_utils"], "modelscope.models.nlp.mglm.model.prompt": ["torch", "random"], "modelscope.models.nlp.mglm.model.modeling_glm": ["torch", "megatron_util"], "modelscope.models.nlp.mglm.model.downstream": ["torch"], "modelscope.models.nlp.mglm.run_test": ["sys", "test"], "modelscope.models.nlp.plug_mental.configuration": ["transformers"], "modelscope.models.nlp.plug_mental.backbone": ["transformers", "torch", "math", "packaging", "dataclasses", "typing"], "modelscope.models.nlp.plug_mental.adv_utils": ["torch"], "modelscope.models.nlp.plug_mental.text_classification": ["torch"], "modelscope.models.nlp.gpt_moe.configuration": ["torch", "transformers"], "modelscope.models.nlp.gpt_moe.backbone": ["transformers", "torch", "typing", "math", "os", "addict"], "modelscope.models.nlp.gpt_moe.tokenizer": ["tokenizers"], "modelscope.models.nlp.gpt_moe.distributed_gpt_moe": ["torch", "transformers", "math", "megatron_util"], "modelscope.models.nlp.gpt_moe.text_generation": ["typing", "transformers"], "modelscope.models.nlp.gpt_moe.moe.sharded_moe": ["tutel", "torch", "scipy", "math", "apex", "megatron_util", "typing"], "modelscope.models.nlp.gpt_moe.moe.utils": ["torch", "typing"], "modelscope.models.nlp.gpt_moe.moe.layer": ["torch", "typing", "megatron_util"], "modelscope.models.nlp.gpt_moe.moe.experts": ["copy", "torch"], "modelscope.models.nlp.gpt_moe.moe.mappings": ["torch", "megatron_util"], "modelscope.models.nlp.gpt_moe.checkpointing": ["torch", "os", "megatron_util"], "modelscope.models.nlp.csanmt.translation": ["tensorflow", "typing", "math", "collections"], "modelscope.models.nlp.T5.text2text_generation": ["transformers", "torch", "copy", "warnings", "typing"], "modelscope.models.nlp.T5.configuration": ["typing", "transformers"], "modelscope.models.nlp.T5.backbone": ["transformers", "torch", "math", "copy", "warnings", "os", "typing"], "modelscope.models.nlp.heads.text_classification_head": ["torch", "typing"], "modelscope.models.nlp.heads.infromation_extraction_head": ["torch"], "modelscope.models.nlp.heads.token_classification_head": ["torch", "typing"], "modelscope.models.nlp.heads.text_generation_head": ["torch", "typing"], "modelscope.models.nlp.heads.crf_head": ["torch", "typing", "transformers"], "modelscope.models.nlp.heads.torch_pretrain_head": ["torch", "typing", "transformers"], "modelscope.models.nlp.heads.fill_mask_head": ["torch", "typing", "transformers"], "modelscope.models.nlp.heads.text_ranking_head": ["torch", "typing"], "modelscope.models.nlp.bloom.backbone": ["transformers"], "modelscope.models.nlp.xlm_roberta.configuration": ["typing", "transformers", "collections"], "modelscope.models.nlp.xlm_roberta.backbone": ["torch", "transformers", "math", "packaging"], "modelscope.models.nlp.peer.configuration": ["transformers"], "modelscope.models.nlp.peer.sas_utils": ["numpy", "nltk", "torch", "random"], "modelscope.models.nlp.peer.backbone": ["transformers", "torch", "math", "dataclasses", "typing"], "modelscope.models.nlp.peer.text_classification": ["copy", "torch"], "modelscope.models.nlp.fid_T5.text_generation": ["torch", "os", "io", "transformers"], "modelscope.models.nlp.space_T_en.text_to_sql": ["torch", "os", "typing", "text2sql_lgesql"], "modelscope.models.nlp.canmt.sequence_generator": ["numpy", "torch", "math", "typing", "sys", "fairseq"], "modelscope.models.nlp.canmt.canmt_translation": ["numpy", "torch", "math", "os", "typing"], "modelscope.models.nlp.canmt.canmt_model": ["numpy", "torch", "typing", "math", "fairseq"], "modelscope.models.nlp.bart.text_error_correction": ["torch", "os", "typing"], "modelscope.models.nlp.use.transformer": ["torch", "math"], "modelscope.models.nlp.use.user_satisfaction_estimation": ["transformers", "numpy", "torch", "os", "typing"], "modelscope.models.nlp.gpt_neo.backbone": ["transformers"], "modelscope.models.nlp.bert.configuration": ["typing", "transformers", "collections"], "modelscope.models.nlp.bert.siamese_uie": ["torch", "copy"], "modelscope.models.nlp.bert.fill_mask": [], "modelscope.models.nlp.bert.word_alignment": ["torch"], "modelscope.models.nlp.bert.text_ranking": [], "modelscope.models.nlp.bert.backbone": ["torch", "transformers", "math", "packaging"], "modelscope.models.nlp.bert.text_classification": [], "modelscope.models.nlp.bert.sentence_embedding": ["torch"], "modelscope.models.nlp.bert.document_segmentation": ["torch", "typing"], "modelscope.models.nlp.bert.token_classification": [], "modelscope.models.nlp.dgds.backbone": ["torch", "__future__", "os", "transformers"], "modelscope.models.nlp.dgds.document_grounded_dialog_rerank": ["torch", "os", "typing"], "modelscope.models.nlp.dgds.document_grounded_dialog_generate": ["torch", "os", "typing"], "modelscope.models.nlp.dgds.document_grounded_dialog_retrieval": ["torch", "os", "typing"], "modelscope.models.nlp.gpt3.configuration": ["torch", "transformers"], "modelscope.models.nlp.gpt3.backbone": ["transformers", "torch", "typing", "math", "os", "addict"], "modelscope.models.nlp.gpt3.tokenizer": ["typing", "tokenizers"], "modelscope.models.nlp.gpt3.distributed_gpt3": ["transformers", "torch", "math", "collections", "megatron_util", "os", "typing"], "modelscope.models.nlp.gpt3.text_generation": ["torch", "typing", "transformers", "collections"], "modelscope.models.nlp.deberta_v2.configuration": ["transformers"], "modelscope.models.nlp.deberta_v2.fill_mask": ["torch", "typing", "transformers"], "modelscope.models.nlp.deberta_v2.backbone": ["torch", "typing", "transformers", "collections"], "modelscope.models.nlp.deberta_v2.tokenization": ["transformers", "unicodedata", "sentencepiece", "typing", "os"], "modelscope.models.nlp.deberta_v2.tokenization_fast": ["os", "typing", "transformers", "shutil"], "modelscope.models.nlp.codegeex.codegeex_for_code_translation": ["torch", "copy", "typing"], "modelscope.models.nlp.codegeex.tokenizer": ["torch", "typing", "transformers"], "modelscope.models.nlp.codegeex.codegeex_for_code_generation": ["torch", "copy", "typing"], "modelscope.models.nlp.codegeex.inference": ["torch", "typing"], "modelscope.models.nlp.codegeex.codegeex": ["torch", "math"], "modelscope.models.nlp.space.configuration": [], "modelscope.models.nlp.space.dialog_modeling": ["os", "typing"], "modelscope.models.nlp.space.dialog_state_tracking": ["torch", "typing", "transformers"], "modelscope.models.nlp.space.model.intent_unified_transformer": ["torch"], "modelscope.models.nlp.space.model.tokenization_space": ["transformers"], "modelscope.models.nlp.space.model.unified_transformer": ["torch", "numpy"], "modelscope.models.nlp.space.model.model_base": ["torch", "os"], "modelscope.models.nlp.space.model.generator": ["torch", "math", "numpy"], "modelscope.models.nlp.space.model.gen_unified_transformer": ["torch"], "modelscope.models.nlp.space.dialog_intent_prediction": ["os", "typing"], "modelscope.models.nlp.space.modules.transformer_block": ["torch"], "modelscope.models.nlp.space.modules.functions": ["torch", "numpy"], "modelscope.models.nlp.space.modules.multihead_attention": ["torch"], "modelscope.models.nlp.space.modules.feedforward": ["torch"], "modelscope.models.nlp.space.modules.embedder": ["torch"], "modelscope.models.nlp.fid_plug.configuration": ["transformers"], "modelscope.models.nlp.fid_plug.backbone": ["dataclasses", "transformers", "numpy", "torch", "math", "copy", "os", "typing"], "modelscope.models.nlp.fid_plug.text_generation": ["torch", "os", "io", "transformers"], "modelscope.models.nlp.gpt2.backbone": ["transformers"], "modelscope.models.nlp.plug.distributed_plug": ["torch", "typing", "megatron_util"], "modelscope.models.nlp.plug.configuration": ["copy", "json", "transformers"], "modelscope.models.nlp.plug.backbone": ["torch", "logging", "math", "megatron_util", "__future__"], "modelscope.models.nlp.plug.AnnealingLR": ["torch", "math"], "modelscope.models.nlp.plug.generator": ["torch"], "modelscope.models.nlp.megatron_bert.configuration": ["typing", "transformers", "collections"], "modelscope.models.nlp.megatron_bert.fill_mask": ["torch", "transformers"], "modelscope.models.nlp.megatron_bert.backbone": ["torch", "transformers", "math"], "modelscope.models.nlp.space_T_cn.configuration": ["copy", "__future__", "logging", "json"], "modelscope.models.nlp.space_T_cn.backbone": ["shutil", "numpy", "torch", "__future__", "tempfile", "math", "copy", "tarfile", "os"], "modelscope.models.nlp.space_T_cn.table_question_answering": ["transformers", "numpy", "torch", "os", "typing"], "modelscope.models.nlp.ponet.configuration": ["transformers"], "modelscope.models.nlp.ponet.fill_mask": ["torch", "transformers"], "modelscope.models.nlp.ponet.backbone": ["distutils", "transformers", "torch", "math", "packaging"], "modelscope.models.nlp.ponet.tokenization": ["typing", "transformers"], "modelscope.models.nlp.ponet.document_segmentation": ["torch", "typing"], "modelscope.models.nlp.llama.configuration": ["transformers"], "modelscope.models.nlp.llama.convert_llama_weights_to_hf": ["shutil", "gc", "json", "torch", "math", "os", "argparse"], "modelscope.models.nlp.llama.backbone": ["torch", "typing", "transformers", "math"], "modelscope.models.nlp.llama.tokenization": ["transformers", "shutil", "sentencepiece", "os", "typing"], "modelscope.models.nlp.llama.tokenization_fast": ["os", "typing", "transformers", "shutil"], "modelscope.models.nlp.llama.text_generation": ["torch", "typing"], "modelscope.models.nlp.lstm.backbone": ["torch"], "modelscope.models.nlp.lstm.token_classification": [], "modelscope.models.cv.image_deblur.nafnet_for_image_deblur": ["torch", "os", "typing"], "modelscope.models.cv.vision_middleware.backbone": ["numpy", "torch", "math", "collections", "os", "typing"], "modelscope.models.cv.vision_middleware.model": ["json", "torch", "typing", "os"], "modelscope.models.cv.vision_middleware.head": ["torch", "abc", "mmcv", "numpy"], "modelscope.models.cv.vision_middleware.vim": ["torch", "einops", "math"], "modelscope.models.cv.image_quality_assessment_man.swin": ["warnings", "itertools", "torch", "einops", "math", "collections"], "modelscope.models.cv.image_quality_assessment_man.maniqa": ["timm", "torch", "einops"], "modelscope.models.cv.image_quality_assessment_man.image_quality_assessment_man": ["torch", "os", "typing"], "modelscope.models.cv.product_retrieval_embedding.item_detection": ["cv2", "numpy"], "modelscope.models.cv.product_retrieval_embedding.item_model": ["torch", "os", "typing", "numpy"], "modelscope.models.cv.product_retrieval_embedding.item_embedding": ["cv2", "torch", "numpy"], "modelscope.models.cv.body_2d_keypoints.w48": [], "modelscope.models.cv.body_2d_keypoints.hrnet_v2": ["torch", "os", "numpy"], "modelscope.models.cv.body_2d_keypoints.hrnet_basic_modules": ["torch"], "modelscope.models.cv.indoor_layout_estimation.panovit": ["torch", "os", "yacs", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.modality.layout": ["numpy", "torch", "scipy", "math", "shapely"], "modelscope.models.cv.indoor_layout_estimation.networks.misc.panostretch": ["functools", "scipy", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.misc.fourier": ["PIL", "scipy", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.misc.post_proc": ["scipy", "sklearn", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.panovit": ["torch", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.utils": ["torch", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.backbone.vit_horizon_pry_image": ["timm", "torch", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.backbone.resnet_DA": ["torch", "torchvision"], "modelscope.models.cv.salient_detection.salient_model": ["PIL", "torch", "cv2", "torchvision", "os"], "modelscope.models.cv.salient_detection.models.senet": ["torch"], "modelscope.models.cv.salient_detection.models.utils": ["torch"], "modelscope.models.cv.salient_detection.models.modules": ["torch"], "modelscope.models.cv.salient_detection.models.u2net": ["torch"], "modelscope.models.cv.salient_detection.models.backbone.Res2Net_v1b": ["torch", "math"], "modelscope.models.cv.image_quality_assessment_degradation.degradation_model": ["time", "torchvision", "json", "numpy", "cv2", "torch", "collections"], "modelscope.models.cv.image_quality_assessment_degradation.image_quality_assessment_degradation": ["torch", "os", "typing"], "modelscope.models.cv.image_portrait_enhancement.losses.model_irse": ["torch"], "modelscope.models.cv.image_portrait_enhancement.losses.losses": ["torch"], "modelscope.models.cv.image_portrait_enhancement.losses.helpers": ["torch", "collections"], "modelscope.models.cv.image_portrait_enhancement.retinaface.detection": ["cv2", "torch", "os", "numpy"], "modelscope.models.cv.image_portrait_enhancement.retinaface.models.retinaface": ["torch", "torchvision", "collections"], "modelscope.models.cv.image_portrait_enhancement.retinaface.models.net": ["torch", "time", "torchvision"], "modelscope.models.cv.image_portrait_enhancement.retinaface.utils": ["torch", "itertools", "math", "numpy"], "modelscope.models.cv.image_portrait_enhancement.gpen": ["itertools", "random", "torch", "functools", "operator", "math"], "modelscope.models.cv.image_portrait_enhancement.image_portrait_enhancement": ["torch", "os", "typing", "math"], "modelscope.models.cv.image_portrait_enhancement.align_faces": ["cv2", "skimage", "numpy"], "modelscope.models.cv.image_portrait_enhancement.eqface.fqa": ["cv2", "torch", "os", "numpy"], "modelscope.models.cv.image_portrait_enhancement.eqface.model_resnet": ["torch"], "modelscope.models.cv.abnormal_object_detection.mmdet_ms.roi_head.mask_scoring_roi_head": ["torch", "mmdet"], "modelscope.models.cv.abnormal_object_detection.mmdet_ms.roi_head.roi_extractors.single_level_roi_extractor": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.abnormal_object_detection.mmdet_model": ["torch", "os", "numpy"], "modelscope.models.cv.image_probing_model.backbone": ["PIL", "numpy", "torch", "functools", "operator", "sys", "math", "collections", "torchvision"], "modelscope.models.cv.image_probing_model.model": ["json", "torch", "typing", "os"], "modelscope.models.cv.image_probing_model.utils": ["torch", "re"], "modelscope.models.cv.tinynas_classfication.super_res_kxkx": ["torch", "uuid"], "modelscope.models.cv.tinynas_classfication.super_res_k1kxk1": ["torch", "uuid"], "modelscope.models.cv.tinynas_classfication.model_zoo": [], "modelscope.models.cv.tinynas_classfication.super_blocks": ["torch", "uuid"], "modelscope.models.cv.tinynas_classfication.basic_blocks": ["torch", "uuid", "numpy"], "modelscope.models.cv.tinynas_classfication.master_net": ["torch"], "modelscope.models.cv.tinynas_classfication.plain_net_utils": ["torch"], "modelscope.models.cv.tinynas_classfication.super_res_idwexkx": ["torch", "uuid"], "modelscope.models.cv.tinynas_classfication.global_utils": [], "modelscope.models.cv.image_to_image_translation.model_translation": ["torch", "math"], "modelscope.models.cv.image_to_image_translation.models.autoencoder": ["torch", "math"], "modelscope.models.cv.image_to_image_translation.models.clip": ["torch", "math"], "modelscope.models.cv.image_to_image_translation.ops.metrics": ["torch", "scipy", "numpy"], "modelscope.models.cv.image_to_image_translation.ops.diffusion": ["torch", "math"], "modelscope.models.cv.image_to_image_translation.ops.apps": ["PIL", "numpy", "torch", "artist", "torchvision", "os"], "modelscope.models.cv.image_to_image_translation.ops.svd": ["torch"], "modelscope.models.cv.image_to_image_translation.ops.random_mask": ["cv2", "numpy"], "modelscope.models.cv.image_to_image_translation.ops.degradation": ["random", "numpy", "cv2", "torch", "scipy", "os", "math"], "modelscope.models.cv.image_to_image_translation.ops.random_color": ["colorsys", "random"], "modelscope.models.cv.image_to_image_translation.ops.utils": ["PIL", "hashlib", "multiprocessing", "base64", "numpy", "cv2", "json", "torch", "math", "io", "binascii", "os", "zipfile"], "modelscope.models.cv.image_to_image_translation.ops.losses": ["torch", "math"], "modelscope.models.cv.image_to_image_translation.data.transforms": ["torchvision", "PIL", "math", "random"], "modelscope.models.cv.video_human_matting.models.decoder": ["torch", "typing"], "modelscope.models.cv.video_human_matting.models.effv2": ["torch"], "modelscope.models.cv.video_human_matting.models.lraspp": ["torch"], "modelscope.models.cv.video_human_matting.models.matting": ["torch", "typing"], "modelscope.models.cv.video_human_matting.models.deep_guided_filter": ["torch"], "modelscope.models.cv.video_human_matting.model": ["numpy", "torch", "torchvision", "os", "typing"], "modelscope.models.cv.language_guided_video_summarization.transformer.models": ["torch", "numpy"], "modelscope.models.cv.language_guided_video_summarization.transformer.modules": ["torch"], "modelscope.models.cv.language_guided_video_summarization.transformer.sub_layers": ["torch", "numpy"], "modelscope.models.cv.language_guided_video_summarization.transformer.layers": ["torch"], "modelscope.models.cv.language_guided_video_summarization.summarizer": ["numpy", "videofeatures_clipit", "torch", "bmt_clipit", "os", "typing", "argparse"], "modelscope.models.cv.facial_landmark_confidence.flc.facial_landmark_confidence": ["PIL", "numpy", "torch", "cv2", "os"], "modelscope.models.cv.facial_landmark_confidence.flc.manual_landmark_net": ["torch", "math"], "modelscope.models.cv.image_to_image_generation.models.autoencoder": ["torch", "math"], "modelscope.models.cv.image_to_image_generation.models.clip": ["torch", "math"], "modelscope.models.cv.image_to_image_generation.model": ["torch", "math"], "modelscope.models.cv.image_to_image_generation.ops.diffusion": ["torch", "math"], "modelscope.models.cv.image_to_image_generation.ops.losses": ["torch", "math"], "modelscope.models.cv.image_to_image_generation.data.transforms": ["torchvision", "PIL", "math", "random"], "modelscope.models.cv.image_body_reshaping.person_info": ["torch", "cv2", "copy", "numpy"], "modelscope.models.cv.image_body_reshaping.model": ["torch"], "modelscope.models.cv.image_body_reshaping.slim_utils": ["random", "numpy", "cv2", "torch", "os", "math", "numba"], "modelscope.models.cv.image_body_reshaping.pose_estimator.body": ["numpy", "cv2", "torch", "scipy", "math"], "modelscope.models.cv.image_body_reshaping.pose_estimator.util": ["numpy"], "modelscope.models.cv.image_body_reshaping.pose_estimator.model": ["torch", "collections"], "modelscope.models.cv.image_body_reshaping.image_body_reshaping": ["numpy", "cv2", "torch", "os", "typing"], "modelscope.models.cv.image_human_parsing.m2fp_net": ["torch", "os", "typing"], "modelscope.models.cv.image_human_parsing.m2fp.m2fp_decoder": ["torch"], "modelscope.models.cv.image_human_parsing.m2fp.m2fp_encoder": ["torch", "typing", "numpy"], "modelscope.models.cv.image_human_parsing.parsing_utils": ["copy", "torch", "PIL", "numpy"], "modelscope.models.cv.image_human_parsing.backbone.deeplab_resnet": ["torch", "numpy"], "modelscope.models.cv.image_skychange.ptsemseg.hrnet_super_and_ocr": ["torch", "__future__", "numpy"], "modelscope.models.cv.image_skychange.ptsemseg.BlockModules": ["torch"], "modelscope.models.cv.image_skychange.ptsemseg.unet": ["torch"], "modelscope.models.cv.image_skychange.ptsemseg.hrnet_backnone": ["torch", "os", "logging", "numpy"], "modelscope.models.cv.image_skychange.skychange": ["numbers", "PIL", "pdb", "numpy", "cv2", "json", "torch", "collections", "torchvision", "os"], "modelscope.models.cv.image_skychange.preprocessor": ["numbers", "pdb", "numpy", "cv2", "json", "torch", "torchvision", "typing"], "modelscope.models.cv.image_skychange.skychange_model": ["pdb", "cv2", "torch", "json", "math", "collections", "time", "os", "typing"], "modelscope.models.cv.video_object_segmentation.aggregate": ["torch"], "modelscope.models.cv.video_object_segmentation.inference_memory_bank": ["torch", "math"], "modelscope.models.cv.video_object_segmentation.inference_core": ["torch"], "modelscope.models.cv.video_object_segmentation.model": ["torch", "os", "typing"], "modelscope.models.cv.video_object_segmentation.eval_network": ["torch"], "modelscope.models.cv.video_object_segmentation.mod_resnet": ["torch", "math", "collections"], "modelscope.models.cv.video_object_segmentation.network": ["torch", "math"], "modelscope.models.cv.video_object_segmentation.modules": ["torch", "torchvision"], "modelscope.models.cv.video_object_segmentation.cbam": ["torch"], "modelscope.models.cv.face_reconstruction.models.nv_diffrast": ["nvdiffrast", "numpy", "torch", "warnings", "typing"], "modelscope.models.cv.face_reconstruction.models.renderer": ["torch", "imageio", "skimage", "numpy"], "modelscope.models.cv.face_reconstruction.models.unet": ["torch", "warnings"], "modelscope.models.cv.face_reconstruction.models.bfm": ["torch", "os", "scipy", "numpy"], "modelscope.models.cv.face_reconstruction.models.opt": [], "modelscope.models.cv.face_reconstruction.models.networks": ["torch", "os", "typing", "kornia"], "modelscope.models.cv.face_reconstruction.models.de_retouching_module": ["torch"], "modelscope.models.cv.face_reconstruction.models.losses": ["torch", "numpy", "kornia"], "modelscope.models.cv.face_reconstruction.models.pix2pix.pix2pix_options": [], "modelscope.models.cv.face_reconstruction.models.pix2pix.pix2pix_model": ["torch"], "modelscope.models.cv.face_reconstruction.models.pix2pix.networks": ["torch", "functools"], "modelscope.models.cv.face_reconstruction.models.facelandmark.nets.large_eyeball_net": ["torch"], "modelscope.models.cv.face_reconstruction.models.facelandmark.nets.large_base_lmks_net": ["torch"], "modelscope.models.cv.face_reconstruction.models.facelandmark.large_base_lmks_infer": ["torch", "numpy"], "modelscope.models.cv.face_reconstruction.models.facerecon_model": ["numpy", "cv2", "torch", "collections", "os"], "modelscope.models.cv.face_reconstruction.utils": ["PIL", "numpy", "array", "cv2", "torch", "scipy", "math", "numba", "os", "argparse"], "modelscope.models.cv.facial_expression_recognition.fer.transforms": ["numbers", "PIL", "numpy", "torch", "types"], "modelscope.models.cv.facial_expression_recognition.fer.vgg": ["torch"], "modelscope.models.cv.facial_expression_recognition.fer.facial_expression_recognition": ["PIL", "numpy", "torch", "cv2", "os"], "modelscope.models.cv.face_recognition.align_face": ["cv2", "skimage", "numpy"], "modelscope.models.cv.face_recognition.torchkit.rts_backbone": ["torch", "os", "math", "collections"], "modelscope.models.cv.face_recognition.torchkit.backbone.facemask_backbone": ["torch", "collections"], "modelscope.models.cv.face_recognition.torchkit.backbone.model_irse": ["torch", "collections"], "modelscope.models.cv.face_recognition.torchkit.backbone.model_resnet": ["torch"], "modelscope.models.cv.face_recognition.torchkit.backbone.common": ["torch"], "modelscope.models.cv.face_recognition.torchkit.backbone.arcface_backbone": ["torch"], "modelscope.models.cv.face_generation.stylegan2": ["random", "torch", "functools", "operator", "math"], "modelscope.models.cv.face_generation.op.fused_act": ["torch", "os"], "modelscope.models.cv.face_generation.op.upfirdn2d": ["torch", "os", "collections"], "modelscope.models.cv.face_generation.op.conv2d_gradfix": ["torch", "warnings", "contextlib"], "modelscope.models.cv.shop_segmentation.head_fpn": ["timm", "torch", "mmcv", "numpy"], "modelscope.models.cv.shop_segmentation.models": ["torch", "timm", "math", "collections"], "modelscope.models.cv.shop_segmentation.common": ["torch", "warnings"], "modelscope.models.cv.shop_segmentation.utils": ["torch", "functools", "ftfy", "gzip", "os", "regex", "typing", "html"], "modelscope.models.cv.shop_segmentation.shop_seg_base": ["torch"], "modelscope.models.cv.shop_segmentation.neck_fpn": ["torch", "mmcv", "timm"], "modelscope.models.cv.shop_segmentation.shop_seg_model": ["PIL", "numpy", "torch", "json", "os", "typing"], "modelscope.models.cv.image_instance_segmentation.maskdino.ms_deform_attn": ["warnings", "mmcv", "torch", "__future__", "math"], "modelscope.models.cv.image_instance_segmentation.maskdino.position_encoding": ["torch", "math"], "modelscope.models.cv.image_instance_segmentation.maskdino.dino_decoder": ["torch", "typing"], "modelscope.models.cv.image_instance_segmentation.maskdino.maskdino_encoder": ["torch", "typing", "numpy"], "modelscope.models.cv.image_instance_segmentation.maskdino.utils": ["copy", "torch", "math"], "modelscope.models.cv.image_instance_segmentation.maskdino.maskdino_decoder": ["torch"], "modelscope.models.cv.image_instance_segmentation.maskdino_swin": ["torch", "os"], "modelscope.models.cv.image_instance_segmentation.datasets.transforms": ["os", "numpy"], "modelscope.models.cv.image_instance_segmentation.fastinst.fastinst_encoder": ["torch", "logging", "typing"], "modelscope.models.cv.image_instance_segmentation.fastinst.fastinst_decoder": ["torch", "math"], "modelscope.models.cv.image_instance_segmentation.cascade_mask_rcnn_swin": ["torch", "os", "collections"], "modelscope.models.cv.image_instance_segmentation.fastinst_model": ["torch", "os", "typing"], "modelscope.models.cv.image_instance_segmentation.model": ["torch", "os", "typing"], "modelscope.models.cv.image_instance_segmentation.postprocess_utils": ["itertools", "numpy", "pycocotools", "cv2", "torch"], "modelscope.models.cv.image_instance_segmentation.backbones.resnet": ["torch"], "modelscope.models.cv.image_instance_segmentation.backbones.swin_transformer": ["torch", "timm", "numpy"], "modelscope.models.cv.image_instance_segmentation.maskdino_model": ["torch", "os", "typing"], "modelscope.models.cv.action_detection.modules.resnet": ["torch", "detectron2"], "modelscope.models.cv.action_detection.modules.action_detection_pytorch": ["torch", "fvcore", "logging", "typing", "detectron2"], "modelscope.models.cv.action_detection.action_detection_onnx": ["urllib", "subprocess", "shutil", "numpy", "cv2", "tempfile", "onnxruntime", "os", "uuid"], "modelscope.models.cv.vop_retrieval.backbone": ["urllib", "hashlib", "numpy", "torch", "tqdm", "collections", "warnings", "os", "typing"], "modelscope.models.cv.vop_retrieval.basic_utils": ["PIL", "ujson", "shutil", "random", "numpy", "cv2", "torch", "collections", "torchvision", "os", "pickle", "zipfile"], "modelscope.models.cv.vop_retrieval.model": ["torch", "os"], "modelscope.models.cv.vop_retrieval.tokenization_clip": ["gzip", "torch", "os", "regex", "functools", "ftfy", "html"], "modelscope.models.cv.vop_retrieval.model_se": ["torch", "os"], "modelscope.models.cv.video_instance_segmentation.track.kernel_update_head": ["torch", "mmcv", "mmdet", "numpy"], "modelscope.models.cv.video_instance_segmentation.track.mask_hungarian_assigner": ["torch", "scipy", "mmdet", "numpy"], "modelscope.models.cv.video_instance_segmentation.video_knet": ["torch", "mmdet"], "modelscope.models.cv.video_instance_segmentation.head.kernel_updator": ["torch", "mmcv"], "modelscope.models.cv.video_instance_segmentation.head.kernel_update_head": ["torch", "mmcv", "mmdet", "numpy"], "modelscope.models.cv.video_instance_segmentation.head.kernel_frame_iter_head": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.video_instance_segmentation.head.kernel_head": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.video_instance_segmentation.head.kernel_iter_head": ["torch", "mmdet"], "modelscope.models.cv.video_instance_segmentation.utils": ["torch", "mmdet", "numpy"], "modelscope.models.cv.video_instance_segmentation.neck.msdeformattn_decoder": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.super_resolution.ecb": ["torch"], "modelscope.models.cv.super_resolution.ecbsr_model": ["torch", "os", "typing"], "modelscope.models.cv.super_resolution.rrdbnet_arch": ["torch"], "modelscope.models.cv.super_resolution.arch_util": ["torchvision", "warnings", "itertools", "torch", "math", "collections"], "modelscope.models.cv.ocr_detection.preprocessor": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "modelscope.models.cv.ocr_detection.model": ["torch", "os", "typing", "numpy"], "modelscope.models.cv.ocr_detection.utils": ["cv2", "pyclipper", "shapely", "numpy"], "modelscope.models.cv.ocr_detection.modules.dbnet": ["torch", "math", "os", "sys", "collections"], "modelscope.models.cv.ocr_detection.modules.seg_detector_loss": ["torch", "sys"], "modelscope.models.cv.panorama_depth_estimation.networks.util": ["cv2", "scipy", "numpy"], "modelscope.models.cv.panorama_depth_estimation.networks.mobilenet": ["torch"], "modelscope.models.cv.panorama_depth_estimation.networks.equi": ["torch", "__future__", "collections", "numpy"], "modelscope.models.cv.panorama_depth_estimation.networks.resnet": ["torch"], "modelscope.models.cv.panorama_depth_estimation.networks.unifuse": ["torch", "__future__", "collections", "numpy"], "modelscope.models.cv.panorama_depth_estimation.networks.layers": ["torch", "numpy"], "modelscope.models.cv.panorama_depth_estimation.unifuse_model": ["torch", "os", "torchvision", "numpy"], "modelscope.models.cv.stream_yolo.utils.format": ["math"], "modelscope.models.cv.stream_yolo.utils.boxes": ["torch", "torchvision"], "modelscope.models.cv.stream_yolo.models.tal_head": ["torch"], "modelscope.models.cv.stream_yolo.models.dfp_pafpn": ["torch"], "modelscope.models.cv.stream_yolo.models.streamyolo": ["torch"], "modelscope.models.cv.stream_yolo.models.network_blocks": ["torch"], "modelscope.models.cv.stream_yolo.models.darknet": ["torch"], "modelscope.models.cv.stream_yolo.realtime_video_detector": ["numpy", "cv2", "torch", "logging", "json", "tqdm", "time", "os", "argparse"], "modelscope.models.cv.stream_yolo.exp.build": ["os", "sys"], "modelscope.models.cv.stream_yolo.exp.base_exp": ["torch", "abc"], "modelscope.models.cv.stream_yolo.exp.default.streamyolo": ["torch", "os", "sys"], "modelscope.models.cv.stream_yolo.exp.yolox_base": ["torch", "os", "random"], "modelscope.models.cv.stream_yolo.data.data_augment": ["cv2", "math", "random", "numpy"], "modelscope.models.cv.virual_tryon.sdafnet": ["torch", "random", "numpy"], "modelscope.models.cv.bad_image_detecting.bad_image_detecting": ["numpy", "torch", "torchvision", "os", "typing"], "modelscope.models.cv.human_reconstruction.Reconstruction": ["PIL", "skimage", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.models.cv.human_reconstruction.models.Surface_head": ["torch"], "modelscope.models.cv.human_reconstruction.models.Res_backbone": ["torch", "numpy"], "modelscope.models.cv.human_reconstruction.models.Embedding": ["torch"], "modelscope.models.cv.human_reconstruction.models.PixToMesh": ["torch"], "modelscope.models.cv.human_reconstruction.models.networks": ["torch", "functools", "numpy"], "modelscope.models.cv.human_reconstruction.models.human_segmenter": ["cv2", "tensorflow", "numpy"], "modelscope.models.cv.human_reconstruction.models.geometry": ["torch"], "modelscope.models.cv.human_reconstruction.models.detectors": ["torch", "numpy"], "modelscope.models.cv.human_reconstruction.utils": ["mcubes", "os", "torch", "numpy"], "modelscope.models.cv.image_driving_perception.preprocessor": ["cv2", "torch", "typing", "numpy"], "modelscope.models.cv.image_driving_perception.utils": ["torch", "time", "torchvision", "numpy"], "modelscope.models.cv.image_driving_perception.image_driving_percetion_model": ["numpy", "cv2", "torch", "os", "typing"], "modelscope.models.cv.video_streaming_perception.longshortnet.longshortnet": ["numpy", "cv2", "torch", "logging", "json", "tqdm", "time", "os", "argparse"], "modelscope.models.cv.video_streaming_perception.longshortnet.models.longshort_backbone_neck": ["torch"], "modelscope.models.cv.video_streaming_perception.longshortnet.models.longshort": ["torch"], "modelscope.models.cv.video_streaming_perception.longshortnet.models.dfp_pafpn_short": ["torch", "collections"], "modelscope.models.cv.video_streaming_perception.longshortnet.models.dfp_pafpn_long": ["torch", "collections"], "modelscope.models.cv.video_streaming_perception.longshortnet.exp.longshortnet_base": [], "modelscope.models.cv.image_paintbyexample.model": ["torch", "paint_ldm", "omegaconf", "os", "typing"], "modelscope.models.cv.image_inpainting.refinement": ["numpy", "cv2", "torch", "tqdm", "kornia"], "modelscope.models.cv.image_inpainting.model": ["torch", "os", "typing"], "modelscope.models.cv.image_inpainting.default": ["torch", "bisect"], "modelscope.models.cv.image_inpainting.modules.ade20k.resnet": ["torch", "os", "math"], "modelscope.models.cv.image_inpainting.modules.ade20k.base": ["torch", "os"], "modelscope.models.cv.image_inpainting.modules.adversarial": ["torch", "typing"], "modelscope.models.cv.image_inpainting.modules.perceptual": ["torch", "torchvision"], "modelscope.models.cv.image_inpainting.modules.inception": ["torch", "torchvision"], "modelscope.models.cv.image_inpainting.modules.ffc": ["torch", "numpy", "kornia"], "modelscope.models.cv.image_inpainting.modules.pix2pixhd": ["numpy", "torch", "logging", "functools", "collections"], "modelscope.models.cv.image_inpainting.modules.feature_matching": ["torch", "typing"], "modelscope.models.cv.image_inpainting.base": ["torch", "typing"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.coders.nms_free_coder": ["torch", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.util": ["torch", "mmdet3d", "numpy"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.match_costs.match_cost": ["torch", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.assigners.hungarian_assigner_3d": ["torch", "scipy", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.nuscenes_dataset": ["mmdet3d", "mmdet", "numpy"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.pipelines.loading": ["mmcv", "mmdet", "numpy"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.pipelines.transform_3d": ["PIL", "copy", "mmcv", "numpy", "torch", "mmdet3d", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.dense_heads.petrv2_dednhead": ["mmcv", "numpy", "torch", "math", "copy", "mmdet3d", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.dense_heads.depth_net": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.necks.cp_fpn": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.petr_transformer": ["copy", "warnings", "mmcv", "torch", "mmdet", "typing", "math"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.positional_encoding": ["torch", "mmcv", "math"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.backbones.vovnet": ["torch", "mmdet", "mmcv", "collections"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.detectors.petr3d": ["mmcv", "numpy", "torch", "mmdet3d", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.result_vis": ["numpy", "cv2", "json", "pyquaternion", "os", "mmdet3d", "pickle", "argparse"], "modelscope.models.cv.object_detection_3d.depe.depe_detect": ["torch", "os", "typing", "numpy"], "modelscope.models.cv.image_quality_assessment_mos.image_quality_assessment_mos": ["torch", "os", "typing"], "modelscope.models.cv.image_quality_assessment_mos.heads.simple_head": ["torch"], "modelscope.models.cv.image_quality_assessment_mos.backbones.resnet": ["torch", "os"], "modelscope.models.cv.image_quality_assessment_mos.censeo_ivqa_model": ["torch"], "modelscope.models.cv.image_debanding.rrdb.rrdb_image_debanding": ["torch", "os", "typing"], "modelscope.models.cv.image_restoration.demoire_models.nets": ["torch"], "modelscope.models.cv.image_restoration.image_restoration_model": ["cv2", "torch", "os", "numpy"], "modelscope.models.cv.cartoon.model_tf": ["tensorflow", "typing"], "modelscope.models.cv.cartoon.facelib.facer": ["cv2", "time", "numpy"], "modelscope.models.cv.cartoon.facelib.config": ["os", "easydict", "numpy"], "modelscope.models.cv.cartoon.facelib.LK.lk": ["numpy"], "modelscope.models.cv.cartoon.facelib.face_detector": ["cv2", "tensorflow", "numpy", "time"], "modelscope.models.cv.cartoon.facelib.face_landmark": ["cv2", "tensorflow", "numpy"], "modelscope.models.cv.cartoon.loss": ["tensorflow", "joblib", "skimage", "numpy", "scipy", "os"], "modelscope.models.cv.cartoon.utils": ["tensorflow", "random", "numpy", "cv2", "os"], "modelscope.models.cv.cartoon.mtcnn_pytorch.src.align_trans": ["cv2", "numpy"], "modelscope.models.cv.cartoon.mtcnn_pytorch.src.matlab_cp2tform": ["numpy"], "modelscope.models.cv.cartoon.network": ["tensorflow"], "modelscope.models.cv.vision_efficient_tuning.vision_efficient_tuning": ["torch", "os", "collections"], "modelscope.models.cv.vision_efficient_tuning.timm_vision_transformer": ["itertools", "torch", "logging", "functools", "math", "collections"], "modelscope.models.cv.vision_efficient_tuning.backbone": ["torch", "functools"], "modelscope.models.cv.vision_efficient_tuning.timm_weight_init": ["torch", "warnings", "math"], "modelscope.models.cv.vision_efficient_tuning.timm_helpers": ["torch", "typing", "itertools", "math"], "modelscope.models.cv.vision_efficient_tuning.petl": ["torch", "torchvision", "math", "collections"], "modelscope.models.cv.vision_efficient_tuning.model": ["torch", "typing"], "modelscope.models.cv.vision_efficient_tuning.head": ["torch"], "modelscope.models.cv.movie_scene_segmentation.utils.save_op": ["subprocess", "numpy", "cv2", "os", "tqdm"], "modelscope.models.cv.movie_scene_segmentation.utils.shot_encoder": ["torch", "typing"], "modelscope.models.cv.movie_scene_segmentation.utils.trn": ["torch", "transformers"], "modelscope.models.cv.movie_scene_segmentation.utils.head": ["torch"], "modelscope.models.cv.movie_scene_segmentation.model": ["PIL", "numpy", "torch", "einops", "tqdm", "math", "shotdetect_scenedetect_lgss", "torchvision", "os", "typing"], "modelscope.models.cv.movie_scene_segmentation.get_model": [], "modelscope.models.cv.video_summarization.pgl_sum": ["torch", "math"], "modelscope.models.cv.video_summarization.base_model": ["cv2", "torch", "numpy"], "modelscope.models.cv.video_summarization.summarizer": ["torch", "os", "typing", "numpy"], "modelscope.models.cv.video_summarization.kts.cpd_auto": ["numpy"], "modelscope.models.cv.video_summarization.kts.cpd_nonlin": ["numpy"], "modelscope.models.cv.table_recognition.lineless_table_process": ["cv2", "torch", "shapely", "numpy"], "modelscope.models.cv.table_recognition.model_lore": ["numpy", "torch", "math", "copy", "os", "typing"], "modelscope.models.cv.table_recognition.modules.lore_processor": ["copy", "numpy", "torch", "os", "math"], "modelscope.models.cv.table_recognition.modules.lore_detector": ["copy", "numpy", "torch", "os", "math"], "modelscope.models.cv.image_matching.quadtree_attention_model": ["numpy", "cv2", "torch", "pathlib", "os"], "modelscope.models.cv.image_matching.config.default": ["yacs"], "modelscope.models.cv.image_matching.utils.misc": ["yacs"], "modelscope.models.cv.image_matching.loftr_quadtree.loftr": ["torch", "einops"], "modelscope.models.cv.image_matching.loftr_quadtree.utils.position_encoding": ["torch", "math"], "modelscope.models.cv.image_matching.loftr_quadtree.utils.coarse_matching": ["torch", "einops"], "modelscope.models.cv.image_matching.loftr_quadtree.utils.fine_matching": ["torch", "math", "kornia"], "modelscope.models.cv.image_matching.loftr_quadtree.loftr_module.quadtree_attention": ["torch", "timm"], "modelscope.models.cv.image_matching.loftr_quadtree.loftr_module.fine_preprocess": ["torch", "einops"], "modelscope.models.cv.image_matching.loftr_quadtree.loftr_module.transformer": ["timm", "torch", "einops", "math", "copy"], "modelscope.models.cv.image_matching.loftr_quadtree.loftr_module.linear_attention": ["torch"], "modelscope.models.cv.image_matching.loftr_quadtree.backbone.resnet_fpn": ["torch"], "modelscope.models.cv.tinynas_detection.detector": ["torch", "os", "torchvision", "pickle"], "modelscope.models.cv.tinynas_detection.tinynas_detector": [], "modelscope.models.cv.tinynas_detection.damo.apis.detector_evaluater": ["torch", "os"], "modelscope.models.cv.tinynas_detection.damo.apis.detector_inference": ["torch", "os", "tqdm"], "modelscope.models.cv.tinynas_detection.damo.structures.boxlist_ops": ["torch"], "modelscope.models.cv.tinynas_detection.damo.structures.bounding_box": ["torch"], "modelscope.models.cv.tinynas_detection.damo.structures.image_list": ["torch", "__future__"], "modelscope.models.cv.tinynas_detection.damo.utils.model_utils": ["copy", "time", "torch", "thop", "math"], "modelscope.models.cv.tinynas_detection.damo.utils.boxes": ["torch", "torchvision", "numpy"], "modelscope.models.cv.tinynas_detection.damo.utils.scheduler": ["math"], "modelscope.models.cv.tinynas_detection.damo.augmentations.box_level_augs.box_level_augs": ["random", "numpy"], "modelscope.models.cv.tinynas_detection.damo.augmentations.box_level_augs.gaussian_maps": ["torch", "math"], "modelscope.models.cv.tinynas_detection.damo.augmentations.box_level_augs.color_augs": ["torch", "random"], "modelscope.models.cv.tinynas_detection.damo.augmentations.box_level_augs.geometric_augs": ["torch", "torchvision", "copy", "random"], "modelscope.models.cv.tinynas_detection.damo.augmentations.scale_aware_aug": ["copy"], "modelscope.models.cv.tinynas_detection.damo.detectors.detector": ["torch"], "modelscope.models.cv.tinynas_detection.damo.base_models.losses.distill_loss": ["torch"], "modelscope.models.cv.tinynas_detection.damo.base_models.losses.gfocal_loss": ["torch", "functools"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.ota_assigner": ["torch", "warnings"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.base_ops": ["torch", "math"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.repvgg_block": ["torch", "numpy"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.neck_ops": ["torch", "numpy"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.ops": ["torch", "numpy"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.utils": ["torch", "functools"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.weight_init": ["torch", "numpy"], "modelscope.models.cv.tinynas_detection.damo.base_models.necks.giraffe_config": ["collections", "networkx"], "modelscope.models.cv.tinynas_detection.damo.base_models.necks.giraffe_fpn": ["timm", "numpy", "torch", "functools", "math", "collections", "typing"], "modelscope.models.cv.tinynas_detection.damo.base_models.necks.giraffe_fpn_btn": ["torch"], "modelscope.models.cv.tinynas_detection.damo.base_models.heads.gfocal_v2_tiny": ["torch", "functools", "numpy"], "modelscope.models.cv.tinynas_detection.damo.base_models.heads.zero_head": ["torch"], "modelscope.models.cv.tinynas_detection.damo.base_models.backbones.tinynas_csp": ["torch"], "modelscope.models.cv.tinynas_detection.damo.base_models.backbones.tinynas_res": ["torch"], "modelscope.models.cv.tinynas_detection.damo.base_models.backbones.darknet": ["torch"], "modelscope.models.cv.tinynas_detection.utils": ["shutil", "importlib", "os", "easydict", "tempfile", "sys"], "modelscope.models.cv.tinynas_detection.tinynas_damoyolo": [], "modelscope.models.cv.video_multi_object_tracking.utils.visualization": ["cv2", "numpy"], "modelscope.models.cv.video_multi_object_tracking.utils.utils": ["cv2", "torch", "numpy"], "modelscope.models.cv.video_multi_object_tracking.utils.kalman_filter": ["scipy", "numpy"], "modelscope.models.cv.video_multi_object_tracking.utils.image": ["cv2", "numpy"], "modelscope.models.cv.video_multi_object_tracking.tracker.matching": ["scipy", "lap", "numpy"], "modelscope.models.cv.video_multi_object_tracking.tracker.multitracker": ["torch", "collections", "numpy"], "modelscope.models.cv.video_multi_object_tracking.tracker.basetrack": ["collections", "numpy"], "modelscope.models.cv.video_multi_object_tracking.models.model": ["torch"], "modelscope.models.cv.video_multi_object_tracking.models.common": ["torch"], "modelscope.models.cv.video_multi_object_tracking.models.yolo": ["torch", "copy", "math"], "modelscope.models.cv.video_multi_object_tracking.models.decode": ["torch"], "modelscope.models.cv.nerf_recon_acc.nerf_recon_acc": ["numpy", "cv2", "torch", "tqdm", "time", "os", "glob"], "modelscope.models.cv.nerf_recon_acc.network.nerf": ["torch", "nerfacc", "numpy", "tinycudann"], "modelscope.models.cv.nerf_recon_acc.network.utils": ["mcubes", "numpy", "gc", "torch", "collections", "tinycudann"], "modelscope.models.cv.nerf_recon_acc.network.segmenter": ["tensorflow", "numpy"], "modelscope.models.cv.nerf_recon_acc.nerf_preprocess": ["subprocess", "tensorflow", "numpy", "cv2", "glob", "os", "typing"], "modelscope.models.cv.nerf_recon_acc.dataloader.nerf_dataset": ["PIL", "numpy", "json", "torch", "math", "torchvision", "os"], "modelscope.models.cv.nerf_recon_acc.dataloader.read_write_model": ["struct", "numpy", "os", "collections", "argparse"], "modelscope.models.cv.video_deinterlace.UNet_for_video_deinterlace": ["torch", "os", "copy", "typing"], "modelscope.models.cv.video_deinterlace.deinterlace_arch": ["torch"], "modelscope.models.cv.video_deinterlace.models.deep_fourier_upsampling": ["torch", "numpy"], "modelscope.models.cv.video_deinterlace.models.fre": ["torch"], "modelscope.models.cv.video_deinterlace.models.utils": ["torch"], "modelscope.models.cv.video_deinterlace.models.archs": ["torch", "numpy"], "modelscope.models.cv.video_deinterlace.models.enh": ["torch"], "modelscope.models.cv.cmdssl_video_embedding.resnet3d": ["torch"], "modelscope.models.cv.cmdssl_video_embedding.resnet2p1d": ["torch"], "modelscope.models.cv.cmdssl_video_embedding.c3d": ["torch"], "modelscope.models.cv.image_depth_estimation_bts.depth_estimation_bts_model": ["torch", "os"], "modelscope.models.cv.image_depth_estimation_bts.networks.decoder": ["torch"], "modelscope.models.cv.image_depth_estimation_bts.networks.bts_model": ["torch"], "modelscope.models.cv.image_depth_estimation_bts.networks.encoder": ["torch", "torchvision"], "modelscope.models.cv.image_depth_estimation_bts.networks.utils": ["torch", "math"], "modelscope.models.cv.motion_generation.model": [], "modelscope.models.cv.motion_generation.modules.rotation2xyz": ["torch"], "modelscope.models.cv.motion_generation.modules.respace": ["torch", "numpy"], "modelscope.models.cv.motion_generation.modules.smpl": ["contextlib", "numpy", "torch", "os", "smplx"], "modelscope.models.cv.motion_generation.modules.mdm": ["torch", "numpy", "clip"], "modelscope.models.cv.motion_generation.modules.gaussian_diffusion": ["copy", "numpy", "enum", "torch", "math"], "modelscope.models.cv.motion_generation.modules.cfg_sampler": ["torch", "copy"], "modelscope.models.cv.image_defrcn_fewshot.utils.requirements_check": ["importlib_metadata", "sys", "collections", "packaging", "importlib"], "modelscope.models.cv.image_defrcn_fewshot.utils.voc_register": ["numpy", "os", "fvcore", "xml", "detectron2"], "modelscope.models.cv.image_defrcn_fewshot.utils.configuration_mapper": ["detectron2"], "modelscope.models.cv.image_defrcn_fewshot.utils.model_surgery_op": ["torch", "os", "argparse"], "modelscope.models.cv.image_defrcn_fewshot.utils.coco_register": ["io", "contextlib", "pycocotools", "os", "fvcore", "detectron2"], "modelscope.models.cv.image_defrcn_fewshot.utils.register_data": [], "modelscope.models.cv.image_defrcn_fewshot.models.fast_rcnn": ["torch", "fvcore", "detectron2", "numpy"], "modelscope.models.cv.image_defrcn_fewshot.models.defrcn": ["torch", "os", "typing", "detectron2"], "modelscope.models.cv.image_defrcn_fewshot.models.resnet": ["torch", "torchvision"], "modelscope.models.cv.image_defrcn_fewshot.models.calibration_layer": ["cv2", "torch", "sklearn", "detectron2"], "modelscope.models.cv.image_defrcn_fewshot.models.gdl": ["torch"], "modelscope.models.cv.image_defrcn_fewshot.models.roi_heads": ["torch", "detectron2"], "modelscope.models.cv.image_defrcn_fewshot.defrcn_for_fewshot": ["torch", "os", "typing"], "modelscope.models.cv.image_defrcn_fewshot.evaluation.coco_evaluation": ["contextlib", "itertools", "numpy", "pycocotools", "json", "torch", "logging", "fvcore", "tabulate", "collections", "copy", "io", "os", "detectron2"], "modelscope.models.cv.image_defrcn_fewshot.evaluation.pascal_voc_evaluation": ["detectron2", "numpy", "os", "tempfile", "collections"], "modelscope.models.cv.image_defrcn_fewshot.evaluation.evaluator": ["torch", "logging", "datetime", "time", "detectron2"], "modelscope.models.cv.ocr_recognition.preprocessor": ["PIL", "numpy", "torch", "cv2", "os"], "modelscope.models.cv.ocr_recognition.model": ["torch", "os"], "modelscope.models.cv.ocr_recognition.modules.convnextvit": ["torch"], "modelscope.models.cv.ocr_recognition.modules.crnn": ["torch"], "modelscope.models.cv.ocr_recognition.modules.vitstr": ["torch", "logging", "functools", "copy", "__future__"], "modelscope.models.cv.ocr_recognition.modules.timm_tinyc": ["copy", "itertools", "torch", "logging", "functools", "math", "collections"], "modelscope.models.cv.ocr_recognition.modules.convnext": ["torch"], "modelscope.models.cv.video_panoptic_segmentation.track.quasi_dense_embed_tracker": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.video_panoptic_segmentation.head.mask": ["numpy", "pycocotools", "cv2", "torch", "__future__"], "modelscope.models.cv.video_panoptic_segmentation.head.kernel_updator": ["torch", "mmcv"], "modelscope.models.cv.video_panoptic_segmentation.head.kernel_update_head": ["torch", "mmcv", "mmdet", "numpy"], "modelscope.models.cv.video_panoptic_segmentation.head.semantic_fpn_wrapper": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.video_panoptic_segmentation.head.kernel_head": ["torch", "mmcv"], "modelscope.models.cv.video_panoptic_segmentation.head.kernel_iter_head": ["torch", "mmdet"], "modelscope.models.cv.video_panoptic_segmentation.head.track_heads": ["torch", "mmcv", "numpy"], "modelscope.models.cv.video_panoptic_segmentation.neck.fpn": ["torch", "mmcv"], "modelscope.models.cv.video_panoptic_segmentation.video_k_net": ["torch", "mmcv", "mmdet", "numpy"], "modelscope.models.cv.video_panoptic_segmentation.backbone.swin_checkpoint": ["pkgutil", "torchvision", "importlib", "torch", "os", "collections"], "modelscope.models.cv.video_panoptic_segmentation.backbone.swin_transformer": ["timm", "torch", "mmdet", "numpy"], "modelscope.models.cv.video_panoptic_segmentation.visualizer": ["cv2", "hashlib", "numpy"], "modelscope.models.cv.open_vocabulary_detection_vild.vild": ["tensorflow", "numpy", "torch", "scipy", "clip", "os", "typing"], "modelscope.models.cv.image_reid_person.pass_model": ["torch", "os", "enum"], "modelscope.models.cv.image_reid_person.transreid_model": ["torch", "functools", "itertools", "collections"], "modelscope.models.cv.image_face_fusion.facelib.align_trans": ["cv2", "numpy"], "modelscope.models.cv.image_face_fusion.facelib.matlab_cp2tform": ["numpy"], "modelscope.models.cv.image_face_fusion.network.aad_layer": ["torch"], "modelscope.models.cv.image_face_fusion.network.dense_motion": ["torch"], "modelscope.models.cv.image_face_fusion.network.model_irse": ["torch", "collections"], "modelscope.models.cv.image_face_fusion.network.bfm": ["torch", "os", "scipy", "numpy"], "modelscope.models.cv.image_face_fusion.network.ops": ["torch"], "modelscope.models.cv.image_face_fusion.network.aei_flow_net": ["torch"], "modelscope.models.cv.image_face_fusion.network.facerecon_model": ["torch", "os", "typing"], "modelscope.models.cv.image_face_fusion.image_face_fusion": ["PIL", "numpy", "torch", "cv2", "collections", "torchvision", "os", "typing"], "modelscope.models.cv.image_face_fusion.facegan.gan_wrap": ["PIL", "numpy", "torch", "cv2", "torchvision", "os"], "modelscope.models.cv.image_face_fusion.facegan.op.fused_act": ["torch"], "modelscope.models.cv.image_face_fusion.facegan.op.upfirdn2d": ["torch", "collections"], "modelscope.models.cv.image_face_fusion.facegan.op.conv2d_gradfix": ["torch", "warnings", "contextlib"], "modelscope.models.cv.image_face_fusion.facegan.model": ["torch", "math", "random"], "modelscope.models.cv.product_segmentation.net": ["torch"], "modelscope.models.cv.product_segmentation.seg_infer": ["PIL", "torch", "cv2", "numpy"], "modelscope.models.cv.controllable_image_generation.controlnet": ["PIL", "random", "numpy", "cv2", "torch", "einops", "tempfile", "sys", "math", "control_ldm", "os", "typing"], "modelscope.models.cv.controllable_image_generation.annotator.openpose.body": ["numpy", "cv2", "torch", "scipy", "math", "matplotlib", "time", "torchvision"], "modelscope.models.cv.controllable_image_generation.annotator.openpose.util": ["cv2", "math", "numpy", "matplotlib"], "modelscope.models.cv.controllable_image_generation.annotator.openpose.model": ["torch", "collections"], "modelscope.models.cv.controllable_image_generation.annotator.openpose.hand": ["skimage", "numpy", "cv2", "json", "torch", "scipy", "math", "matplotlib", "time"], "modelscope.models.cv.controllable_image_generation.annotator.annotator": ["mmcv", "mmseg", "numpy", "cv2", "torch", "einops", "os"], "modelscope.models.cv.controllable_image_generation.annotator.midas.api": ["cv2", "torch", "torchvision", "os"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.midas_net_custom": ["torch"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.transforms": ["cv2", "math", "numpy"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.midas_net": ["torch"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.dpt_depth": ["torch"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.base_model": ["torch"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.vit": ["timm", "torch", "types", "math"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.blocks": ["torch"], "modelscope.models.cv.controllable_image_generation.annotator.midas.utils": ["numpy", "cv2", "torch", "re", "sys"], "modelscope.models.cv.controllable_image_generation.annotator.mlsd.utils": ["cv2", "os", "torch", "numpy"], "modelscope.models.cv.controllable_image_generation.annotator.mlsd.mbv2_mlsd_large": ["torch", "os", "sys"], "modelscope.models.cv.video_inpainting.inpainting": ["PIL", "time", "torchvision", "numpy", "cv2", "torch", "os"], "modelscope.models.cv.video_inpainting.inpainting_model": ["torch", "torchvision", "math", "numpy"], "modelscope.models.cv.image_mvs_depth_estimation.casmvs_model": ["numpy", "cv2", "torch", "os", "easydict"], "modelscope.models.cv.image_mvs_depth_estimation.colmap2mvsnet": ["multiprocessing", "struct", "shutil", "numpy", "cv2", "__future__", "functools", "collections", "os"], "modelscope.models.cv.image_mvs_depth_estimation.cas_mvsnet": ["torch"], "modelscope.models.cv.image_mvs_depth_estimation.utils": ["torch", "numpy", "torchvision", "random"], "modelscope.models.cv.image_mvs_depth_estimation.depth_filter": ["PIL", "numpy", "cv2", "plyfile", "os"], "modelscope.models.cv.image_mvs_depth_estimation.module": ["torch"], "modelscope.models.cv.image_mvs_depth_estimation.general_eval_dataset": ["PIL", "numpy", "cv2", "torch", "re", "os", "sys"], "modelscope.models.cv.image_binary_quant_classification.binary_quant_model": ["torch", "os", "collections"], "modelscope.models.cv.image_binary_quant_classification.bnext": ["torch", "numpy"], "modelscope.models.cv.skin_retouching.detection_model.detection_unet_in": ["torch"], "modelscope.models.cv.skin_retouching.detection_model.detection_module": ["torch"], "modelscope.models.cv.skin_retouching.retinaface.net": ["torch", "typing"], "modelscope.models.cv.skin_retouching.retinaface.prior_box": ["torch", "itertools", "math"], "modelscope.models.cv.skin_retouching.retinaface.box_utils": ["torch", "typing", "numpy"], "modelscope.models.cv.skin_retouching.retinaface.utils": ["pathlib", "numpy", "cv2", "torch", "re", "typing"], "modelscope.models.cv.skin_retouching.retinaface.network": ["torch", "torchvision", "typing"], "modelscope.models.cv.skin_retouching.retinaface.predict_single": ["albumentations", "numpy", "torch", "torchvision", "typing"], "modelscope.models.cv.skin_retouching.unet_deploy": ["torch", "warnings"], "modelscope.models.cv.skin_retouching.weights_init": ["torch"], "modelscope.models.cv.skin_retouching.utils": ["time", "numpy", "cv2", "torch", "einops", "typing"], "modelscope.models.cv.skin_retouching.inpainting_model.gconv": ["torch"], "modelscope.models.cv.skin_retouching.inpainting_model.inpainting_unet": ["torch"], "modelscope.models.cv.body_3d_keypoints.hdformer.directed_graph": ["typing", "sys", "numpy"], "modelscope.models.cv.body_3d_keypoints.hdformer.hdformer_detector": ["torch", "os", "typing", "numpy"], "modelscope.models.cv.body_3d_keypoints.hdformer.skeleton": ["numpy"], "modelscope.models.cv.body_3d_keypoints.hdformer.backbone": ["torch"], "modelscope.models.cv.body_3d_keypoints.hdformer.hdformer": ["torch"], "modelscope.models.cv.body_3d_keypoints.hdformer.block": ["torch", "einops", "math"], "modelscope.models.cv.body_3d_keypoints.cannonical_pose.canonical_pose_modules": ["torch"], "modelscope.models.cv.body_3d_keypoints.cannonical_pose.body_3d_pose": ["numpy", "torch", "logging", "os", "typing"], "modelscope.models.cv.action_recognition.models": ["torch"], "modelscope.models.cv.action_recognition.s3dg": ["torch"], "modelscope.models.cv.action_recognition.tada_convnext": ["torch", "math"], "modelscope.models.cv.action_recognition.temporal_patch_shift_transformer": ["timm", "numpy", "torch", "abc", "einops", "functools", "operator", "torchvision"], "modelscope.models.cv.video_frame_interpolation.interp_model.flow_reversal": ["torch"], "modelscope.models.cv.video_frame_interpolation.interp_model.UNet": ["torch"], "modelscope.models.cv.video_frame_interpolation.interp_model.IFNet_swin": ["torch", "timm", "numpy"], "modelscope.models.cv.video_frame_interpolation.interp_model.refinenet_arch": ["torch", "numpy"], "modelscope.models.cv.video_frame_interpolation.interp_model.transformer_layers": ["timm", "torch", "math", "functools", "sys"], "modelscope.models.cv.video_frame_interpolation.utils.utils": ["torch", "scipy", "numpy"], "modelscope.models.cv.video_frame_interpolation.utils.scene_change_detection": ["torch", "numpy"], "modelscope.models.cv.video_frame_interpolation.VFINet_for_video_frame_interpolation": ["torch", "os", "copy", "typing"], "modelscope.models.cv.video_frame_interpolation.VFINet_arch": ["torch"], "modelscope.models.cv.video_frame_interpolation.flow_model.update": ["torch"], "modelscope.models.cv.video_frame_interpolation.flow_model.corr": ["torch"], "modelscope.models.cv.video_frame_interpolation.flow_model.extractor": ["torch"], "modelscope.models.cv.video_frame_interpolation.flow_model.raft": ["torch", "numpy"], "modelscope.models.cv.object_detection.mmdet_ms.dense_heads.rpn_head": ["torch", "copy", "mmcv", "mmdet"], "modelscope.models.cv.object_detection.mmdet_ms.dense_heads.anchor_head": ["mmdet"], "modelscope.models.cv.object_detection.mmdet_ms.necks.fpn": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.object_detection.mmdet_ms.utils.convModule_norm": ["mmcv"], "modelscope.models.cv.object_detection.mmdet_ms.utils.checkpoint": ["mmcv", "torch", "tempfile", "collections", "time", "pkgutil", "io", "warnings", "importlib", "torchvision", "os"], "modelscope.models.cv.object_detection.mmdet_ms.backbones.vit": ["timm", "torch", "functools", "math", "mmdet"], "modelscope.models.cv.object_detection.mmdet_ms.roi_heads.bbox_heads.convfc_bbox_head": ["torch", "mmdet"], "modelscope.models.cv.object_detection.mmdet_ms.roi_heads.mask_heads.fcn_mask_head": ["mmcv", "numpy", "torch", "warnings", "mmdet"], "modelscope.models.cv.object_detection.mmdet_model": ["torch", "os", "numpy"], "modelscope.models.cv.pedestrian_attribute_recognition.model": ["torch", "os", "torchvision", "numpy"], "modelscope.models.cv.pointcloud_sceneflow_estimation.sf_rcp": ["torch"], "modelscope.models.cv.pointcloud_sceneflow_estimation.rcp_model": ["torch", "os", "numpy"], "modelscope.models.cv.pointcloud_sceneflow_estimation.common": ["torch", "math"], "modelscope.models.cv.pointcloud_sceneflow_estimation.pointnet2_utils": ["torch", "typing", "pointnet2_cuda"], "modelscope.models.cv.animal_recognition.splat": ["torch"], "modelscope.models.cv.animal_recognition.resnet": ["torch", "math"], "modelscope.models.cv.video_stabilization.utils.image_utils": ["torch", "skimage"], "modelscope.models.cv.video_stabilization.utils.RAFTUtils": ["torch", "scipy", "numpy"], "modelscope.models.cv.video_stabilization.utils.math_utils": ["torch", "numpy"], "modelscope.models.cv.video_stabilization.utils.ProjectionUtils": ["cv2", "torch", "math", "numpy"], "modelscope.models.cv.video_stabilization.utils.WarpUtils": ["torch", "tqdm", "numpy"], "modelscope.models.cv.video_stabilization.utils.MedianFilter": ["cv2", "torch", "math", "numpy"], "modelscope.models.cv.video_stabilization.utils.IterativeSmooth": ["torch", "os", "math", "numpy"], "modelscope.models.cv.video_stabilization.DUTRAFTStabilizer": ["numpy", "cv2", "torch", "tempfile", "sys", "math", "os", "typing"], "modelscope.models.cv.video_stabilization.DUT.config": ["__future__", "easydict"], "modelscope.models.cv.video_stabilization.DUT.rf_det_so": ["torch"], "modelscope.models.cv.video_stabilization.DUT.Smoother": ["torch", "math", "numpy"], "modelscope.models.cv.video_stabilization.DUT.DUT_raft": ["cv2", "torch", "sys", "numpy"], "modelscope.models.cv.video_stabilization.DUT.MotionPro": ["numpy", "cv2", "torch", "math", "os"], "modelscope.models.cv.video_stabilization.DUT.RAFT.update": ["torch"], "modelscope.models.cv.video_stabilization.DUT.RAFT.corr": ["torch", "alt_cuda_corr"], "modelscope.models.cv.video_stabilization.DUT.RAFT.extractor": ["torch"], "modelscope.models.cv.video_stabilization.DUT.RAFT.raft": ["torch", "numpy"], "modelscope.models.cv.video_stabilization.DUT.rf_det_module": ["torch"], "modelscope.models.cv.video_depth_estimation.dro_model": ["numpy", "cv2", "torch", "tqdm", "os", "glob"], "modelscope.models.cv.video_depth_estimation.utils.misc": ["termcolor"], "modelscope.models.cv.video_depth_estimation.utils.config": ["torch", "datetime", "yacs", "os"], "modelscope.models.cv.video_depth_estimation.utils.horovod": ["horovod"], "modelscope.models.cv.video_depth_estimation.utils.image_gt": ["PIL", "torch", "cv2", "functools"], "modelscope.models.cv.video_depth_estimation.utils.types": ["torch", "yacs", "numpy"], "modelscope.models.cv.video_depth_estimation.utils.depth": ["torch", "torchvision", "numpy", "matplotlib"], "modelscope.models.cv.video_depth_estimation.utils.load": ["torch", "logging", "collections", "inspect", "warnings", "importlib", "os"], "modelscope.models.cv.video_depth_estimation.utils.image": ["PIL", "numpy", "torch", "cv2", "functools", "os"], "modelscope.models.cv.video_depth_estimation.utils.augmentations": ["PIL", "random", "numpy", "cv2", "torchvision"], "modelscope.models.cv.video_depth_estimation.models.model_utils": [], "modelscope.models.cv.video_depth_estimation.models.sfm_model_mf": ["torch", "random"], "modelscope.models.cv.video_depth_estimation.models.model_checkpoint": ["torch", "os", "re", "numpy"], "modelscope.models.cv.video_depth_estimation.models.model_wrapper": ["random", "numpy", "torch", "collections", "importlib"], "modelscope.models.cv.video_depth_estimation.models.sup_model_mf": [], "modelscope.models.cv.video_depth_estimation.networks.layers.resnet.pose_decoder": ["torch", "__future__", "collections"], "modelscope.models.cv.video_depth_estimation.networks.layers.resnet.resnet_encoder": ["torch", "__future__", "torchvision", "numpy"], "modelscope.models.cv.video_depth_estimation.networks.layers.resnet.layers": ["torch", "__future__"], "modelscope.models.cv.video_depth_estimation.networks.layers.resnet.depth_decoder": ["torch", "__future__", "collections", "numpy"], "modelscope.models.cv.video_depth_estimation.networks.optim.update": ["torch"], "modelscope.models.cv.video_depth_estimation.networks.optim.extractor": ["torch", "torchvision"], "modelscope.models.cv.video_depth_estimation.networks.depth_pose.depth_pose_net": ["torch", "functools"], "modelscope.models.cv.video_depth_estimation.configs.default_config": ["os", "yacs"], "modelscope.models.cv.video_depth_estimation.geometry.pose_utils": ["torch", "numpy"], "modelscope.models.cv.video_depth_estimation.geometry.camera_utils": ["torch"], "modelscope.models.cv.video_depth_estimation.geometry.camera": ["torch", "functools"], "modelscope.models.cv.video_depth_estimation.geometry.pose": ["torch"], "modelscope.models.cv.vidt.backbone": ["timm", "numpy", "torch", "os", "math"], "modelscope.models.cv.vidt.model": ["torch", "os"], "modelscope.models.cv.vidt.head": ["copy", "torch", "math"], "modelscope.models.cv.vidt.fpn_fusion": ["torch"], "modelscope.models.cv.vidt.deformable_transformer": ["timm", "copy", "warnings", "torch", "math"], "modelscope.models.cv.face_human_hand_detection.shufflenetv2": ["torch"], "modelscope.models.cv.face_human_hand_detection.one_stage_detector": ["torch"], "modelscope.models.cv.face_human_hand_detection.nanodet_plus_head": ["numpy", "cv2", "torch", "math", "torchvision"], "modelscope.models.cv.face_human_hand_detection.det_infer": ["cv2", "torch", "numpy"], "modelscope.models.cv.face_human_hand_detection.ghost_pan": ["torch", "math"], "modelscope.models.cv.face_human_hand_detection.utils": ["torch"], "modelscope.models.cv.referring_video_object_segmentation.utils.misc": ["torch", "torchvision", "typing", "pickle"], "modelscope.models.cv.referring_video_object_segmentation.utils.mttr": ["torch", "einops"], "modelscope.models.cv.referring_video_object_segmentation.utils.multimodal_transformer": ["transformers", "torch", "einops", "copy", "os", "typing"], "modelscope.models.cv.referring_video_object_segmentation.utils.matcher": ["torch", "scipy"], "modelscope.models.cv.referring_video_object_segmentation.utils.backbone": ["torch", "torchvision", "einops"], "modelscope.models.cv.referring_video_object_segmentation.utils.position_encoding_2d": ["torch", "math"], "modelscope.models.cv.referring_video_object_segmentation.utils.postprocessing": ["torch", "einops", "numpy", "pycocotools"], "modelscope.models.cv.referring_video_object_segmentation.utils.criterion": ["torch"], "modelscope.models.cv.referring_video_object_segmentation.utils.swin_transformer": ["timm", "numpy", "torch", "einops", "functools", "operator"], "modelscope.models.cv.referring_video_object_segmentation.utils.segmentation": ["torch", "typing"], "modelscope.models.cv.referring_video_object_segmentation.model": ["torch", "os", "typing"], "modelscope.models.cv.hand_static.networks": ["torch", "os", "torchvision"], "modelscope.models.cv.hand_static.hand_model": ["PIL", "numpy", "torch", "cv2", "sys", "torchvision", "os"], "modelscope.models.cv.image_depth_estimation.newcrfs_model": ["torch", "os", "numpy"], "modelscope.models.cv.image_depth_estimation.networks.uper_crf_head": ["torch", "mmcv"], "modelscope.models.cv.image_depth_estimation.networks.newcrf_layers": ["torch", "timm", "numpy"], "modelscope.models.cv.image_depth_estimation.networks.newcrf_depth": ["torch"], "modelscope.models.cv.image_depth_estimation.networks.newcrf_utils": ["pkgutil", "warnings", "torchvision", "importlib", "torch", "os", "collections"], "modelscope.models.cv.image_depth_estimation.networks.swin_transformer": ["torch", "timm", "numpy"], "modelscope.models.cv.image_colorization.unet.unet": ["torch", "numpy"], "modelscope.models.cv.image_colorization.unet.utils": ["torch", "functools", "enum"], "modelscope.models.cv.image_colorization.ddcolor.ddcolor_for_image_colorization": ["numpy", "torch", "copy", "os", "typing"], "modelscope.models.cv.image_colorization.ddcolor.ddcolor": ["torch"], "modelscope.models.cv.image_colorization.ddcolor.utils.vgg": ["torch", "os", "torchvision", "collections"], "modelscope.models.cv.image_colorization.ddcolor.utils.unet": ["torch", "collections", "enum"], "modelscope.models.cv.image_colorization.ddcolor.utils.transformer_utils": ["torch", "typing"], "modelscope.models.cv.image_colorization.ddcolor.utils.position_encoding": ["torch", "math"], "modelscope.models.cv.image_colorization.ddcolor.utils.convnext": ["torch", "timm"], "modelscope.models.cv.image_colorization.ddcolor.loss": ["torch"], "modelscope.models.cv.face_detection.retinaface.detection": ["cv2", "torch", "numpy"], "modelscope.models.cv.face_detection.retinaface.models.retinaface": ["torch", "torchvision", "collections"], "modelscope.models.cv.face_detection.retinaface.models.net": ["torch", "time", "torchvision"], "modelscope.models.cv.face_detection.retinaface.utils": ["torch", "itertools", "math", "numpy"], "modelscope.models.cv.face_detection.mtcnn.models.detector": ["PIL", "torch", "os", "numpy"], "modelscope.models.cv.face_detection.mtcnn.models.get_nets": ["torch", "collections", "numpy"], "modelscope.models.cv.face_detection.mtcnn.models.box_utils": ["PIL", "numpy"], "modelscope.models.cv.face_detection.mtcnn.models.first_stage": ["PIL", "torch", "math", "numpy"], "modelscope.models.cv.face_detection.ulfd_slim.detection": ["cv2", "torch", "os", "numpy"], "modelscope.models.cv.face_detection.ulfd_slim.vision.ssd.predictor": ["torch"], "modelscope.models.cv.face_detection.ulfd_slim.vision.ssd.data_preprocessing": [], "modelscope.models.cv.face_detection.ulfd_slim.vision.ssd.ssd": ["torch", "typing", "collections", "numpy"], "modelscope.models.cv.face_detection.ulfd_slim.vision.ssd.mb_tiny_fd": ["torch"], "modelscope.models.cv.face_detection.ulfd_slim.vision.ssd.fd_config": ["numpy"], "modelscope.models.cv.face_detection.ulfd_slim.vision.transforms": ["cv2", "torch", "types", "numpy"], "modelscope.models.cv.face_detection.ulfd_slim.vision.box_utils": ["torch", "math"], "modelscope.models.cv.face_detection.ulfd_slim.vision.mb_tiny": ["torch"], "modelscope.models.cv.face_detection.peppa_pig_face.facer": ["cv2", "numpy"], "modelscope.models.cv.face_detection.peppa_pig_face.LK.lk": ["numpy"], "modelscope.models.cv.face_detection.peppa_pig_face.face_detector": ["cv2", "tensorflow", "numpy"], "modelscope.models.cv.face_detection.peppa_pig_face.face_landmark": ["cv2", "tensorflow", "numpy"], "modelscope.models.cv.face_detection.scrfd.scrfd_detect": ["numpy", "torch", "copy", "os", "typing"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.core.post_processing.bbox_nms": ["torch"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.core.bbox.transforms": ["torch", "numpy"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.retinaface": ["mmdet", "numpy"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.auto_augment": ["copy", "mmcv", "numpy", "cv2", "mmdet"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.transforms": ["mmcv", "mmdet", "numpy"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.loading": ["os", "mmdet", "numpy", "pycocotools"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.formating": ["torch", "mmcv", "mmdet", "numpy"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.dense_heads.scrfd_head": ["torch", "mmcv", "mmdet", "numpy"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones.master_net": ["torch", "mmdet"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones.mobilenet": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones.resnet": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.scrfd": ["torch", "mmdet"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.single_stage": ["torch", "mmdet"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.base": ["mmcv", "numpy", "torch", "abc", "mmdet", "collections"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.tinymog": ["torch", "mmdet"], "modelscope.models.cv.face_detection.scrfd.tinymog_detect": ["torch", "os", "copy", "typing"], "modelscope.models.cv.face_detection.scrfd.preprocessor": ["PIL", "typing", "numpy"], "modelscope.models.cv.face_detection.scrfd.damofd_detect": ["torch", "os", "copy", "typing"], "modelscope.models.cv.face_detection.mogface.models.mogprednet": ["torch", "math"], "modelscope.models.cv.face_detection.mogface.models.resnet": ["torch"], "modelscope.models.cv.face_detection.mogface.models.utils": ["torch", "itertools", "math", "numpy"], "modelscope.models.cv.face_detection.mogface.models.detectors": ["cv2", "torch", "os", "numpy"], "modelscope.models.cv.face_detection.mogface.models.mogface": ["torch"], "modelscope.models.cv.robust_image_classification.easyrobust_model": ["torch", "os"], "modelscope.models.cv.image_semantic_segmentation.ddpm_segmentation_model": ["torch", "os", "typing", "ddpm_guided_diffusion"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.utils.data_process_func": ["mmcv", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.utils.builder": ["mmcv"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.utils.seg_func": ["torch", "warnings"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.backbone.beit_adapter": ["timm", "torch", "logging", "math", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.backbone.adapter_modules": ["timm", "torch", "logging", "functools", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.backbone.base.beit": ["timm", "mmcv", "torch", "mmdet", "functools", "math"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.segmentors.encoder_decoder_mask2former": ["torch", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.segmentors.base_segmentor": ["warnings", "mmcv", "numpy", "torch", "abc", "collections"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.decode_heads.mask2former_head_from_mmseg": ["torch", "copy", "mmcv", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.decode_heads.base_decode_head": ["torch", "abc", "mmcv", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.semantic_seg_model": ["torch", "os", "numpy"], "modelscope.models.cv.image_semantic_segmentation.pan_merge.base_panoptic_fusion_head": ["abc", "mmcv", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.pan_merge.maskformer_semantic_head": ["torch", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.ddpm_seg.data_util": [], "modelscope.models.cv.image_semantic_segmentation.ddpm_seg.utils": ["torch", "numpy", "PIL", "random"], "modelscope.models.cv.image_semantic_segmentation.ddpm_seg.feature_extractors": ["torch", "typing"], "modelscope.models.cv.image_semantic_segmentation.ddpm_seg.pixel_classifier": ["PIL", "numpy", "torch", "collections", "os"], "modelscope.models.cv.video_single_object_tracking.config.ostrack": ["easydict"], "modelscope.models.cv.video_single_object_tracking.utils.utils": ["numpy", "cv2", "torch", "typing", "math"], "modelscope.models.cv.video_single_object_tracking.tracker.procontext": ["torch", "copy"], "modelscope.models.cv.video_single_object_tracking.tracker.ostrack": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.layers.attn_blocks": ["torch", "math", "timm"], "modelscope.models.cv.video_single_object_tracking.models.layers.head": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.layers.patch_embed": ["torch", "timm"], "modelscope.models.cv.video_single_object_tracking.models.layers.attn": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.ostrack.base_backbone": ["torch", "timm"], "modelscope.models.cv.video_single_object_tracking.models.ostrack.ostrack": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.ostrack.utils": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.ostrack.vit_ce": ["timm", "torch", "functools"], "modelscope.models.cv.video_single_object_tracking.models.procontext.procontext": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.procontext.utils": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.procontext.vit_ce": ["timm", "torch", "functools"], "modelscope.models.cv.text_driven_segmentation.lseg_net": ["torch", "numpy"], "modelscope.models.cv.text_driven_segmentation.lseg_blocks": ["torch"], "modelscope.models.cv.text_driven_segmentation.lseg_model": ["PIL", "numpy", "torch", "json", "os", "typing"], "modelscope.models.cv.text_driven_segmentation.model": ["torch", "typing", "collections", "numpy"], "modelscope.models.cv.text_driven_segmentation.lseg_vit": ["timm", "torch", "types", "math"], "modelscope.models.cv.text_driven_segmentation.clip": ["urllib", "hashlib", "PIL", "pkg_resources", "torch", "tqdm", "warnings", "torchvision", "os", "typing"], "modelscope.models.cv.text_driven_segmentation.simple_tokenizer": ["gzip", "os", "regex", "functools", "ftfy", "html"], "modelscope.models.cv.text_driven_segmentation.lseg_base": ["torch"], "modelscope.models.cv.crowd_counting.hrnet_aspp_relu": ["numpy", "torch", "logging", "functools", "os"], "modelscope.models.cv.crowd_counting.cc_model": ["torch", "os", "typing"], "modelscope.models.cv.image_panoptic_segmentation.panseg_model": ["torch", "os"], "modelscope.models.cv.face_emotion.emotion_model": ["torch", "os", "sys"], "modelscope.models.cv.face_emotion.emotion_infer": ["PIL", "torch", "torchvision"], "modelscope.models.cv.face_emotion.face_alignment.face_align": ["PIL", "numpy", "cv2", "sys", "os"], "modelscope.models.cv.face_emotion.face_alignment.face": ["cv2", "os", "numpy", "tensorflow"], "modelscope.models.cv.face_emotion.efficient.model": ["torch"], "modelscope.models.cv.face_emotion.efficient.utils": ["functools", "torch", "re", "math", "collections"], "modelscope.models.cv.video_super_resolution.real_basicvsr_net": ["torch"], "modelscope.models.cv.video_super_resolution.msrresnet_lite_model": ["torch", "os", "functools", "typing"], "modelscope.models.cv.video_super_resolution.common": ["torch"], "modelscope.models.cv.video_super_resolution.real_basicvsr_for_video_super_resolution": ["torch", "os", "typing"], "modelscope.models.cv.video_super_resolution.basicvsr_net": ["torch"], "modelscope.models.cv.face_attribute_recognition.fair_face.face_attribute_recognition": ["PIL", "numpy", "torch", "cv2", "torchvision", "os"], "modelscope.models.cv.image_denoise.nafnet.NAFNet_arch": ["torch", "numpy"], "modelscope.models.cv.image_denoise.nafnet.arch_util": ["torch"], "modelscope.models.cv.image_denoise.nafnet_for_image_denoise": ["torch", "os", "typing"], "modelscope.models.cv.image_classification.mmcls_model": ["os"], "modelscope.models.cv.image_classification.utils": ["itertools", "numpy", "torch", "os", "mmcls", "math", "collections"], "modelscope.models.cv.image_classification.backbones.beit_v2": ["itertools", "mmcv", "torch", "einops", "functools", "mmcls", "math", "collections", "warnings", "os", "typing"], "modelscope.models.cv.image_classification.backbones.nextvit": ["itertools", "mmcv", "torch", "einops", "functools", "mmcls", "math", "collections", "warnings", "os", "typing"], "modelscope.models.cv.image_classification.resnet50_cc": ["torch", "math", "collections", "torchvision", "os"], "modelscope.models.cv.image_color_enhance.csrnet": ["torch", "functools", "math"], "modelscope.models.cv.image_color_enhance.deeplpf.deeplpfnet": ["torch", "math", "matplotlib"], "modelscope.models.cv.image_color_enhance.deeplpf.deeplpf_image_color_enhance": ["torch", "os", "typing"], "modelscope.models.cv.image_color_enhance.image_color_enhance": ["torch", "os", "typing"], "modelscope.models.cv.image_color_enhance.adaint.adaint": ["numbers", "torch", "torchvision", "os", "typing"], "modelscope.models.base.base_torch_head": ["torch", "typing"], "modelscope.models.base.base_model": ["os", "abc", "typing"], "modelscope.models.base.base_torch_model": ["torch", "functools", "copy", "packaging", "os", "typing"], "modelscope.models.base.base_head": ["abc", "typing"], "modelscope.metrics.image_quality_assessment_degradation_metric": ["numpy", "cv2", "torch", "scipy", "tempfile", "sys", "collections", "tqdm", "os", "typing"], "modelscope.metrics.prediction_saving_wrapper": ["typing", "sklearn", "numpy"], "modelscope.metrics.video_stabilization_metric": ["numpy", "cv2", "tqdm", "tempfile", "sys", "os", "typing"], "modelscope.metrics.ppl_metric": ["torch", "typing", "math", "numpy"], "modelscope.metrics.inbatch_recall_metric": ["torch", "typing", "numpy"], "modelscope.metrics.loss_metric": ["typing", "sklearn", "numpy"], "modelscope.metrics.ocr_recognition_metric": ["torch", "edit_distance", "typing", "numpy"], "modelscope.metrics.map_metric": ["typing", "numpy"], "modelscope.metrics.image_colorization_metric": ["numpy", "cv2", "torch", "scipy", "torchvision", "typing"], "modelscope.metrics.sequence_classification_metric": ["typing", "sklearn", "numpy"], "modelscope.metrics.audio_noise_metric": ["typing"], "modelscope.metrics.translation_evaluation_metric": ["pandas", "typing", "importlib"], "modelscope.metrics.video_frame_interpolation_metric": ["numpy", "torch", "lpips", "math", "typing"], "modelscope.metrics.image_inpainting_metric": ["torch", "scipy", "typing", "numpy"], "modelscope.metrics.image_denoise_metric": ["cv2", "torch", "typing", "numpy"], "modelscope.metrics.referring_video_object_segmentation_metric": ["numpy", "pycocotools", "torch", "tqdm", "typing"], "modelscope.metrics.token_classification_metric": ["typing", "numpy", "importlib"], "modelscope.metrics.video_summarization_metric": ["typing", "numpy"], "modelscope.metrics.builder": ["typing"], "modelscope.metrics.image_quality_assessment_mos_metric": ["numpy", "cv2", "torch", "scipy", "tempfile", "sys", "tqdm", "os", "typing"], "modelscope.metrics.ned_metric": ["typing", "numpy"], "modelscope.metrics.text_ranking_metric": ["typing", "numpy"], "modelscope.metrics.movie_scene_segmentation_metric": ["typing", "numpy"], "modelscope.metrics.accuracy_metric": ["typing", "numpy"], "modelscope.metrics.image_instance_segmentation_metric": ["numpy", "pycocotools", "tempfile", "collections", "os", "typing"], "modelscope.metrics.video_super_resolution_metric.metric_util": ["numpy"], "modelscope.metrics.video_super_resolution_metric.video_super_resolution_metric": ["typing", "numpy"], "modelscope.metrics.video_super_resolution_metric.niqe": ["cv2", "scipy", "math", "numpy"], "modelscope.metrics.video_super_resolution_metric.matlab_functions": ["torch", "math", "numpy"], "modelscope.metrics.ciderD.ciderD": ["__future__"], "modelscope.metrics.ciderD.ciderD_scorer": ["pdb", "numpy", "__future__", "math", "collections", "copy", "six", "os"], "modelscope.metrics.action_detection_evaluator": ["numpy", "logging", "scipy", "pandas", "collections", "copy", "os", "detectron2"], "modelscope.metrics.image_color_enhance_metric": ["cv2", "typing", "numpy"], "modelscope.metrics.image_portrait_enhancement_metric": ["cv2", "typing", "numpy"], "modelscope.metrics.bleu_metric": ["typing", "itertools", "sacrebleu"], "modelscope.metrics.text_generation_metric": ["nltk", "rouge", "typing"], "modelscope.metrics.base": ["abc", "typing"], "modelscope.pipelines.util": ["os", "typing"], "modelscope.pipelines.science.protein_structure_pipeline": ["numpy", "json", "torch", "unicore", "time", "os", "typing"], "modelscope.pipelines.builder": ["os", "typing"], "modelscope.pipelines.pipeline_template": ["typing", "numpy"], "modelscope.pipelines.audio.timestamp_pipeline": ["json", "typing", "yaml", "os", "funasr"], "modelscope.pipelines.audio.kws_farfield_pipeline": ["numpy", "wave", "soundfile", "io", "typing"], "modelscope.pipelines.audio.speaker_verification_pipeline": ["os", "typing", "shutil", "yaml"], "modelscope.pipelines.audio.inverse_text_processing_pipeline": ["os", "typing", "shutil", "yaml"], "modelscope.pipelines.audio.separation_pipeline": ["numpy", "torch", "soundfile", "io", "typing"], "modelscope.pipelines.audio.voice_activity_detection_pipeline": ["json", "typing", "yaml", "os", "funasr"], "modelscope.pipelines.audio.text_to_speech_pipeline": ["typing", "numpy"], "modelscope.pipelines.audio.kws_kwsbp_pipeline": ["json", "os", "typing"], "modelscope.pipelines.audio.linear_aec_pipeline": ["numpy", "torch", "scipy", "yaml", "importlib", "os", "typing"], "modelscope.pipelines.audio.ans_pipeline": ["numpy", "torch", "librosa", "soundfile", "io", "typing"], "modelscope.pipelines.audio.speaker_verification_eres2net_pipeline": ["torch", "io", "typing", "soundfile"], "modelscope.pipelines.audio.lm_infer_pipeline": ["os", "typing"], "modelscope.pipelines.audio.ans_dfsmn_pipeline": ["numpy", "torch", "sys", "collections", "librosa", "soundfile", "io", "os", "typing"], "modelscope.pipelines.audio.asr_inference_pipeline": ["json", "os", "typing", "yaml"], "modelscope.pipelines.audio.speaker_diarization_pipeline": ["shutil", "numpy", "json", "yaml", "os", "typing"], "modelscope.pipelines.audio.speaker_verification_rdino_pipeline": ["torch", "io", "typing", "soundfile"], "modelscope.pipelines.audio.punctuation_processing_pipeline": ["os", "typing", "shutil", "yaml"], "modelscope.pipelines.audio.speaker_verification_light_pipeline": ["torch", "io", "typing", "soundfile"], "modelscope.pipelines.audio.speaker_change_locating_pipeline": ["numpy", "torch", "soundfile", "io", "typing"], "modelscope.pipelines.audio.asr_wenet_inference_pipeline": ["typing"], "modelscope.pipelines.multi_modal.asr_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.image_captioning_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.multi_modal.text_to_video_synthesis_pipeline": ["cv2", "torch", "einops", "tempfile", "os", "typing"], "modelscope.pipelines.multi_modal.mgeo_ranking_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.multi_modal.generative_multi_modal_embedding_pipeline": ["typing"], "modelscope.pipelines.multi_modal.multimodal_dialogue_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.text_to_image_synthesis_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.text2sql_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.visual_entailment_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.disco_guided_diffusion_pipeline.disco_guided_diffusion": ["PIL", "gc", "numpy", "cv2", "json", "torch", "math", "clip", "importlib", "torchvision", "os"], "modelscope.pipelines.multi_modal.disco_guided_diffusion_pipeline.utils": ["fractions", "warnings", "numpy", "torch", "math"], "modelscope.pipelines.multi_modal.visual_question_answering_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.video_question_answering_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.video_captioning_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.video_multi_modal_embedding_pipeline": ["typing"], "modelscope.pipelines.multi_modal.efficient_diffusion_tuning_pipeline": ["PIL", "numpy", "torch", "cv2", "torchvision", "typing"], "modelscope.pipelines.multi_modal.team_multi_modal_similarity_pipeline": ["typing"], "modelscope.pipelines.multi_modal.diffusers_wrapped.diffusers_pipeline": ["os", "typing"], "modelscope.pipelines.multi_modal.diffusers_wrapped.stable_diffusion.stable_diffusion_pipeline": ["PIL", "numpy", "torch", "cv2", "diffusers", "typing"], "modelscope.pipelines.multi_modal.diffusers_wrapped.stable_diffusion.chinese_stable_diffusion_pipeline": ["PIL", "transformers", "numpy", "cv2", "torch", "diffusers", "typing"], "modelscope.pipelines.multi_modal.multi_modal_embedding_pipeline": ["typing"], "modelscope.pipelines.multi_modal.ocr_recognition_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.document_vl_embedding_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.image_text_retrieval_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.gridvlp_pipeline": ["PIL", "transformers", "numpy", "json", "torch", "time", "os", "traceback", "typing"], "modelscope.pipelines.multi_modal.visual_grounding_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.soonet_video_temporal_grounding_pipeline": ["numpy", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.multi_modal.sudoku_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.translation_evaluation_pipeline": ["numpy", "enum", "torch", "os", "typing"], "modelscope.pipelines.nlp.glm130b_text_generation_pipeline": ["typing"], "modelscope.pipelines.nlp.faq_question_answering_pipeline": ["typing"], "modelscope.pipelines.nlp.document_grounded_dialog_generate_pipeline": ["typing"], "modelscope.pipelines.nlp.automatic_post_editing_pipeline": ["tensorflow", "sacremoses", "numpy", "jieba", "sentencepiece", "os", "typing", "html"], "modelscope.pipelines.nlp.named_entity_recognition_pipeline": ["typing"], "modelscope.pipelines.nlp.interactive_translation_pipeline": ["tensorflow", "sacremoses", "numpy", "jieba", "subword_nmt", "os", "typing"], "modelscope.pipelines.nlp.summarization_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.document_grounded_dialog_retrieval_pipeline": ["numpy", "json", "faiss", "os", "typing"], "modelscope.pipelines.nlp.fasttext_text_classification_pipeline": ["numpy", "fasttext", "sentencepiece", "os", "typing"], "modelscope.pipelines.nlp.word_alignment_pipeline": ["typing", "numpy"], "modelscope.pipelines.nlp.feature_extraction_pipeline": ["torch", "os", "typing"], "modelscope.pipelines.nlp.text_ranking_pipeline": ["typing", "numpy"], "modelscope.pipelines.nlp.fid_dialogue_pipeline": ["torch", "re", "typing"], "modelscope.pipelines.nlp.text_classification_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.nlp.codegeex_code_generation_pipeline": ["typing"], "modelscope.pipelines.nlp.translation_quality_estimation_pipeline": ["transformers", "torch", "io", "os", "typing"], "modelscope.pipelines.nlp.fill_mask_pipeline": ["typing", "numpy"], "modelscope.pipelines.nlp.distributed_plug_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.conversational_text_to_sql_pipeline": ["torch", "typing", "text2sql_lgesql"], "modelscope.pipelines.nlp.distributed_gpt3_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.information_extraction_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.table_question_answering_pipeline": ["transformers", "json", "torch", "os", "typing"], "modelscope.pipelines.nlp.user_satisfaction_estimation_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.nlp.dialog_modeling_pipeline": ["typing"], "modelscope.pipelines.nlp.canmt_translation_pipeline": ["torch", "os", "sacremoses", "typing"], "modelscope.pipelines.nlp.word_segmentation_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.document_segmentation_pipeline": ["datasets", "numpy", "torch", "re", "typing"], "modelscope.pipelines.nlp.distributed_gpt_moe_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.extractive_summarization_pipeline": ["datasets", "numpy", "torch", "re", "typing"], "modelscope.pipelines.nlp.text_error_correction_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.dialog_state_tracking_pipeline": ["typing"], "modelscope.pipelines.nlp.mglm_text_summarization_pipeline": ["os", "typing"], "modelscope.pipelines.nlp.translation_pipeline": ["tensorflow", "sacremoses", "numpy", "jieba", "subword_nmt", "os", "typing"], "modelscope.pipelines.nlp.siamese_uie_pipeline": ["json", "torch", "logging", "scipy", "tqdm", "math", "copy", "time", "pathlib", "os", "typing"], "modelscope.pipelines.nlp.dialog_intent_prediction_pipeline": ["typing"], "modelscope.pipelines.nlp.sentence_embedding_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.nlp.document_grounded_dialog_rerank_pipeline": ["ujson", "transformers", "random", "numpy", "torch", "re", "sys", "collections", "time", "os", "typing", "pprint"], "modelscope.pipelines.nlp.zero_shot_classification_pipeline": ["torch", "scipy", "typing"], "modelscope.pipelines.nlp.text_generation_pipeline": ["torch", "os", "typing"], "modelscope.pipelines.nlp.language_identification_pipline": ["tensorflow", "numpy", "re", "os", "typing"], "modelscope.pipelines.nlp.token_classification_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.nlp.codegeex_code_translation_pipeline": ["typing"], "modelscope.pipelines.cv.bad_image_detecting_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.cv.image_cartoon_pipeline": ["tensorflow", "numpy", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_to_image_generate_pipeline": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.facial_expression_recognition_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.retina_face_detection_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_style_transfer_pipeline": ["cv2", "os", "typing", "numpy"], "modelscope.pipelines.cv.image_face_fusion_pipeline": ["typing", "numpy"], "modelscope.pipelines.cv.ulfd_face_detection_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.pedestrian_attribute_recognition_pipeline": ["PIL", "numpy", "cv2", "json", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.image_denoise_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.vop_retrieval_se_pipeline": ["numpy", "torch", "gzip", "os", "typing"], "modelscope.pipelines.cv.image_matting_pipeline": ["tensorflow", "numpy", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_deblur_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.video_human_matting_pipeline": ["numpy", "cv2", "torch", "moviepy", "os", "typing"], "modelscope.pipelines.cv.live_category_pipeline": ["PIL", "numpy", "torch", "decord", "torchvision", "os", "typing"], "modelscope.pipelines.cv.image_structured_model_probing_pipeline": ["mmcv", "numpy", "torch", "math", "torchvision", "os", "typing"], "modelscope.pipelines.cv.face_quality_assessment_pipeline": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "modelscope.pipelines.cv.face_processing_base_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_portrait_enhancement_pipeline": ["PIL", "numpy", "cv2", "torch", "scipy", "math", "typing"], "modelscope.pipelines.cv.image_color_enhance_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.vision_efficient_tuning_pipeline": ["torch", "torchvision", "typing", "numpy"], "modelscope.pipelines.cv.tbs_detection_utils.utils": ["PIL", "numpy", "torch", "__future__", "colorsys", "pandas", "matplotlib", "torchvision", "os"], "modelscope.pipelines.cv.video_object_segmentation_pipeline": ["PIL", "numpy", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.face_detection_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.body_3d_keypoints_pipeline": ["numpy", "cv2", "torch", "tempfile", "matplotlib", "datetime", "mpl_toolkits", "os", "typing"], "modelscope.pipelines.cv.image_paintbyexample_pipeline": ["PIL", "numpy", "cv2", "torch", "einops", "torchvision", "typing"], "modelscope.pipelines.cv.face_recognition_ood_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_classification_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.cv.card_detection_pipeline": ["typing"], "modelscope.pipelines.cv.table_recognition_pipeline": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "modelscope.pipelines.cv.image_to_image_translation_pipeline": ["PIL", "numpy", "cv2", "torch", "sys", "io", "torchvision", "os", "typing"], "modelscope.pipelines.cv.face_attribute_recognition_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_debanding_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.video_instance_segmentation_pipeline": ["mmcv", "numpy", "cv2", "torch", "tqdm", "os", "typing"], "modelscope.pipelines.cv.tinynas_classification_pipeline": ["torch", "math", "torchvision", "os", "typing"], "modelscope.pipelines.cv.human_reconstruction_pipeline": ["trimesh", "shutil", "numpy", "torch", "os", "typing"], "modelscope.pipelines.cv.video_multi_object_tracking_pipeline": ["torch", "os", "typing"], "modelscope.pipelines.cv.controllable_image_generation_pipeline": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "glob", "os", "typing"], "modelscope.pipelines.cv.image_defrcn_fewshot_pipeline": ["torch", "os", "typing", "numpy"], "modelscope.pipelines.cv.ddpm_semantic_segmentation_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.content_check_pipeline": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.vop_retrieval_pipeline": ["random", "numpy", "torch", "tqdm", "math", "collections", "gzip", "os", "typing", "pickle"], "modelscope.pipelines.cv.object_detection_3d_pipeline": ["PIL", "numpy", "cv2", "torch", "tempfile", "os", "typing"], "modelscope.pipelines.cv.lineless_table_recognition_pipeline": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "modelscope.pipelines.cv.cmdssl_video_embedding_pipeline": ["PIL", "numpy", "torch", "decord", "torchvision", "os", "typing"], "modelscope.pipelines.cv.tinynas_detection_pipeline": ["typing"], "modelscope.pipelines.cv.video_deinterlace_pipeline": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "torchvision", "os", "typing"], "modelscope.pipelines.cv.image_open_vocabulary_detection_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.language_guided_video_summarization_pipeline": ["PIL", "shutil", "random", "numpy", "cv2", "torch", "tempfile", "clip", "os", "typing"], "modelscope.pipelines.cv.body_2d_keypoints_pipeline": ["PIL", "numpy", "cv2", "json", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.face_human_hand_detection_pipeline": ["typing", "numpy"], "modelscope.pipelines.cv.hicossl_video_embedding_pipeline": ["torch", "os", "typing", "math"], "modelscope.pipelines.cv.face_recognition_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_body_reshaping_pipeline": ["typing"], "modelscope.pipelines.cv.image_inpainting_pipeline": ["PIL", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.face_recognition_onnx_fm_pipeline": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "modelscope.pipelines.cv.image_driving_perception_pipeline": ["cv2", "os", "typing", "numpy"], "modelscope.pipelines.cv.video_stabilization_pipeline": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "glob", "os", "typing"], "modelscope.pipelines.cv.indoor_layout_estimation_pipeline": ["cv2", "typing", "numpy"], "modelscope.pipelines.cv.ddcolor_image_colorization_pipeline": ["numpy", "cv2", "torch", "torchvision", "typing"], "modelscope.pipelines.cv.face_emotion_pipeline": ["typing", "numpy"], "modelscope.pipelines.cv.mtcnn_face_detection_pipeline": ["torch", "os", "typing"], "modelscope.pipelines.cv.nerf_recon_acc_pipeline": ["typing"], "modelscope.pipelines.cv.image_bts_depth_estimation_pipeline": ["albumentations", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.facial_landmark_confidence_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.face_reconstruction_pipeline": ["PIL", "tensorflow", "shutil", "numpy", "cv2", "torch", "scipy", "io", "face_alignment", "os", "typing"], "modelscope.pipelines.cv.mog_face_detection_pipeline": ["os", "typing", "numpy"], "modelscope.pipelines.cv.skin_retouching_pipeline": ["PIL", "tensorflow", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.vision_middleware_pipeline": ["mmcv", "numpy", "torch", "math", "torchvision", "os", "typing"], "modelscope.pipelines.cv.face_liveness_ir_pipeline": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "modelscope.pipelines.cv.image_detection_pipeline": ["typing", "numpy"], "modelscope.pipelines.cv.realtime_video_object_detection_pipeline": ["PIL", "numpy", "cv2", "json", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.video_panoptic_segmentation_pipeline": ["mmcv", "numpy", "cv2", "torch", "tqdm", "os", "typing"], "modelscope.pipelines.cv.action_detection_pipeline": ["os", "typing", "math"], "modelscope.pipelines.cv.product_segmentation_pipeline": ["typing", "numpy"], "modelscope.pipelines.cv.tbs_detection_pipeline": ["PIL", "numpy", "cv2", "torch", "colorsys", "os", "typing"], "modelscope.pipelines.cv.image_matching_pipeline": ["PIL", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.video_category_pipeline": ["PIL", "numpy", "json", "torch", "decord", "torchvision", "os", "typing"], "modelscope.pipelines.cv.hand_static_pipeline": ["typing", "numpy"], "modelscope.pipelines.cv.animal_recognition_pipeline": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.pointcloud_sceneflow_estimation_pipeline": ["torch", "typing", "plyfile", "numpy"], "modelscope.pipelines.cv.image_instance_segmentation_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.video_frame_interpolation_pipeline": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "glob", "torchvision", "os", "typing"], "modelscope.pipelines.cv.image_quality_assessment_mos_pipeline": ["numpy", "cv2", "torch", "tempfile", "math", "torchvision", "typing"], "modelscope.pipelines.cv.video_summarization_pipeline": ["numpy", "cv2", "torch", "tqdm", "os", "typing"], "modelscope.pipelines.cv.panorama_depth_estimation_pipeline": ["PIL", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.fast_instance_segmentation_pipeline": ["torch", "torchvision", "typing", "numpy"], "modelscope.pipelines.cv.vidt_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.image_skychange_pipeline": ["PIL", "pdb", "numpy", "cv2", "time", "typing"], "modelscope.pipelines.cv.image_quality_assessment_man_pipeline": ["numpy", "cv2", "torch", "tempfile", "math", "torchvision", "typing"], "modelscope.pipelines.cv.image_restoration_pipeline": ["typing"], "modelscope.pipelines.cv.video_inpainting_pipeline": ["typing"], "modelscope.pipelines.cv.face_image_generation_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.video_super_resolution_pipeline": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "torchvision", "os", "typing"], "modelscope.pipelines.cv.referring_video_object_segmentation_pipeline": ["PIL", "numpy", "torch", "einops", "tqdm", "tempfile", "moviepy", "torchvision", "typing"], "modelscope.pipelines.cv.virtual_try_on_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.ocr_recognition_pipeline": [], "modelscope.pipelines.cv.ocr_detection_pipeline": ["tensorflow", "tf_slim", "numpy", "cv2", "torch", "math", "os", "typing"], "modelscope.pipelines.cv.movie_scene_segmentation_pipeline": ["torch", "typing"], "modelscope.pipelines.cv.maskdino_instance_segmentation_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.video_colorization_pipeline": ["PIL", "subprocess", "numpy", "cv2", "torch", "tempfile", "torchvision", "os", "typing"], "modelscope.pipelines.cv.image_human_parsing_pipeline": ["torch", "torchvision", "typing", "numpy"], "modelscope.pipelines.cv.face_liveness_xc_pipeline": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "modelscope.pipelines.cv.crowd_counting_pipeline": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "modelscope.pipelines.cv.video_depth_estimation_pipeline": ["typing"], "modelscope.pipelines.cv.image_colorization_pipeline": ["PIL", "numpy", "torch", "cv2", "torchvision", "typing"], "modelscope.pipelines.cv.arc_face_recognition_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_quality_assessment_degradation_pipeline": ["numpy", "cv2", "torch", "tempfile", "math", "torchvision", "typing"], "modelscope.pipelines.cv.ocr_utils.model_convnext_transformer": ["torch"], "modelscope.pipelines.cv.ocr_utils.model_resnet18_half": ["torch", "os"], "modelscope.pipelines.cv.ocr_utils.resnet18_v1": ["tensorflow", "tf_slim"], "modelscope.pipelines.cv.ocr_utils.model_dla34": ["torch", "os", "math", "numpy"], "modelscope.pipelines.cv.ocr_utils.ocr_modules.vitstr": ["torch", "logging", "functools", "copy", "__future__"], "modelscope.pipelines.cv.ocr_utils.ocr_modules.timm_tinyc": ["copy", "itertools", "torch", "logging", "functools", "math", "collections"], "modelscope.pipelines.cv.ocr_utils.ocr_modules.convnext": ["torch"], "modelscope.pipelines.cv.ocr_utils.table_process": ["copy", "numpy", "random", "cv2", "torch", "math"], "modelscope.pipelines.cv.ocr_utils.resnet_utils": ["tensorflow", "collections", "tf_slim"], "modelscope.pipelines.cv.ocr_utils.ops": ["tensorflow", "shutil", "numpy", "cv2", "absl", "sys", "math", "os", "uuid"], "modelscope.pipelines.cv.ocr_utils.utils": ["cv2", "pyclipper", "shapely", "numpy"], "modelscope.pipelines.cv.ocr_utils.model_vlpt": ["torch", "os", "sys", "math"], "modelscope.pipelines.cv.ocr_utils.model_resnet_mutex_v4_linewithchar": ["tensorflow", "tf_slim"], "modelscope.pipelines.cv.image_inpainting_sdv2_pipeline": ["numpy", "cv2", "torch", "tempfile", "sys", "math", "diffusers", "os", "typing"], "modelscope.pipelines.cv.image_super_resolution_pipeline": ["PIL", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.image_salient_detection_pipeline": ["typing"], "modelscope.pipelines.cv.video_single_object_tracking_pipeline": ["cv2", "os", "typing"], "modelscope.pipelines.cv.face_recognition_onnx_ir_pipeline": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "modelscope.pipelines.cv.product_retrieval_embedding_pipeline": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.mask_face_recognition_pipeline": ["PIL", "numpy", "torch", "cv2", "collections", "os", "typing"], "modelscope.pipelines.cv.mobile_image_super_resolution_pipeline": ["skimage", "numpy", "torch", "torchvision", "typing"], "modelscope.pipelines.cv.license_plate_detection_pipeline": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "modelscope.pipelines.cv.image_semantic_segmentation_pipeline": ["PIL", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.text_driven_segmentation_pipleline": ["typing"], "modelscope.pipelines.cv.motion_generation_pipeline": ["numpy", "torch", "tempfile", "os", "typing"], "modelscope.pipelines.cv.image_mvs_depth_estimation_pipeline": ["os", "typing", "tempfile", "shutil"], "modelscope.pipelines.cv.image_depth_estimation_pipeline": ["PIL", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.action_recognition_pipeline": ["torch", "os", "typing", "math"], "modelscope.pipelines.cv.image_reid_person_pipeline": ["PIL", "torch", "math", "torchvision", "os", "typing"], "modelscope.pipelines.cv.general_recognition_pipeline": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.shop_segmentation_pipleline": ["typing"], "modelscope.pipelines.base": ["multiprocessing", "numpy", "random", "torch", "abc", "functools", "packaging", "os", "threading", "typing"], "modelscope.preprocessors.kws": ["os", "typing", "yaml"], "modelscope.preprocessors.multi_modal": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "modelscope.preprocessors.science.uni_fold": ["unittest", "hashlib", "ipdb", "random", "numpy", "torch", "json", "tarfile", "pathlib", "os", "typing", "requests", "logging", "re", "tqdm", "time", "gzip", "pickle"], "modelscope.preprocessors.tts": ["os", "kantts", "typing"], "modelscope.preprocessors.asr": ["os", "typing"], "modelscope.preprocessors.builder": [], "modelscope.preprocessors.movie_scene_segmentation.transforms": ["numbers", "PIL", "random", "numpy", "torch", "torchvision", "os", "typing"], "modelscope.preprocessors.common": ["numpy", "torch", "collections", "time", "typing"], "modelscope.preprocessors.nlp.token_classification_preprocessor": ["torch", "typing", "numpy"], "modelscope.preprocessors.nlp.siamese_uie_preprocessor": ["typing", "transformers"], "modelscope.preprocessors.nlp.relation_extraction_preprocessor": ["typing", "transformers"], "modelscope.preprocessors.nlp.token_classification_viet_preprocessor": ["torch", "typing"], "modelscope.preprocessors.nlp.translation_evaluation_preprocessor": ["torch", "typing", "transformers"], "modelscope.preprocessors.nlp.text_classification_preprocessor": ["typing", "numpy"], "modelscope.preprocessors.nlp.document_grounded_dialog_retrieval_preprocessor": ["torch", "os", "typing", "transformers"], "modelscope.preprocessors.nlp.zero_shot_classification_preprocessor": ["typing"], "modelscope.preprocessors.nlp.canmt_translation": ["sacremoses", "jieba", "torch", "subword_nmt", "os", "typing"], "modelscope.preprocessors.nlp.fill_mask_preprocessor": ["numpy", "torch", "abc", "re", "os", "typing"], "modelscope.preprocessors.nlp.word_alignment_preprocessor": ["itertools", "numpy", "torch", "os", "typing"], "modelscope.preprocessors.nlp.space_T_en.fields.preprocess_dataset": ["text2sql_lgesql"], "modelscope.preprocessors.nlp.space_T_en.fields.parse": [], "modelscope.preprocessors.nlp.space_T_en.fields.common_utils": ["itertools", "numpy", "nltk", "sqlite3", "text2sql_lgesql", "os"], "modelscope.preprocessors.nlp.space_T_en.fields.process_dataset": ["os", "sys", "pickle", "text2sql_lgesql"], "modelscope.preprocessors.nlp.space_T_en.conversational_text_to_sql_preprocessor": ["json", "torch", "text2sql_lgesql", "os", "typing"], "modelscope.preprocessors.nlp.document_grounded_dialog_generate_preprocessor": ["torch", "os", "typing", "transformers"], "modelscope.preprocessors.nlp.text_error_correction": ["torch", "os", "typing", "transformers"], "modelscope.preprocessors.nlp.text_ranking_preprocessor": ["typing", "transformers"], "modelscope.preprocessors.nlp.transformers_tokenizer": ["json", "os", "transformers", "collections"], "modelscope.preprocessors.nlp.bert_seq_cls_tokenizer": ["typing", "transformers"], "modelscope.preprocessors.nlp.text_clean": ["re", "codecs", "sys"], "modelscope.preprocessors.nlp.utils": ["transformers", "numpy", "json", "collections", "os", "typing"], "modelscope.preprocessors.nlp.document_segmentation_preprocessor": ["typing"], "modelscope.preprocessors.nlp.sentence_embedding_preprocessor": ["typing"], "modelscope.preprocessors.nlp.mglm_summarization_preprocessor": ["os", "re", "typing"], "modelscope.preprocessors.nlp.token_classification_thai_preprocessor": ["typing"], "modelscope.preprocessors.nlp.mgeo_ranking_preprocessor": ["torch", "typing", "transformers"], "modelscope.preprocessors.nlp.space.dialog_intent_prediction_preprocessor": ["json", "os", "typing"], "modelscope.preprocessors.nlp.space.lazy_dataset": ["json"], "modelscope.preprocessors.nlp.space.dialog_state_tracking_preprocessor": ["typing"], "modelscope.preprocessors.nlp.space.preprocess": ["os", "glob"], "modelscope.preprocessors.nlp.space.data_loader": ["os", "math", "numpy"], "modelscope.preprocessors.nlp.space.batch": [], "modelscope.preprocessors.nlp.space.dialog_modeling_preprocessor": ["os", "typing"], "modelscope.preprocessors.nlp.space.tokenizer": ["json", "logging", "__future__", "functools", "sys", "collections", "unicodedata", "os", "regex"], "modelscope.preprocessors.nlp.space.dst_processors": ["six", "numpy", "json", "logging", "re", "tqdm"], "modelscope.preprocessors.nlp.space.args": ["json", "argparse"], "modelscope.preprocessors.nlp.space.fields.gen_field": ["itertools", "random", "numpy", "json", "collections", "asyncio", "os"], "modelscope.preprocessors.nlp.space.fields.intent_field": ["multiprocessing", "itertools", "random", "numpy", "json", "re", "tqdm", "collections", "time", "glob", "os"], "modelscope.preprocessors.nlp.space.sampler": ["numpy"], "modelscope.preprocessors.nlp.space.tensorlistdataset": ["torch"], "modelscope.preprocessors.nlp.dialog_classification_use_preprocessor": ["torch", "typing", "transformers"], "modelscope.preprocessors.nlp.text_generation_preprocessor": ["torch", "os", "typing", "numpy"], "modelscope.preprocessors.nlp.space_T_cn.table_question_answering_preprocessor": ["torch", "os", "typing", "transformers"], "modelscope.preprocessors.nlp.space_T_cn.fields.database": ["json", "sqlite3", "tqdm"], "modelscope.preprocessors.nlp.space_T_cn.fields.schema_link": ["re"], "modelscope.preprocessors.nlp.space_T_cn.fields.struct": [], "modelscope.preprocessors.nlp.document_grounded_dialog_rerank_preprocessor": ["transformers", "torch", "copy", "os", "typing"], "modelscope.preprocessors.nlp.feature_extraction_preprocessor": ["typing", "numpy"], "modelscope.preprocessors.nlp.faq_question_answering_preprocessor": ["torch", "typing"], "modelscope.preprocessors.audio": ["numpy", "torch", "scipy", "io", "os", "typing"], "modelscope.preprocessors.cv.image_classification_preprocessor": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.preprocessors.cv.util": ["os", "sys", "shutil", "collections"], "modelscope.preprocessors.cv.timer": ["time"], "modelscope.preprocessors.cv.bad_image_detecting_preprocessor": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "modelscope.preprocessors.cv.mmcls_preprocessor": ["os", "typing", "numpy"], "modelscope.preprocessors.cv.controllable_image_generation": ["PIL", "numpy", "cv2", "torch", "math", "torchvision", "os", "typing"], "modelscope.preprocessors.cv.image_quality_assessment_mos": ["numpy", "cv2", "math", "torchvision", "typing"], "modelscope.preprocessors.cv.image_restoration_preprocessor": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "modelscope.preprocessors.cv.cv2_transforms": ["numbers", "random", "numpy", "cv2", "torch", "math", "collections"], "modelscope.preprocessors.cv.video_super_resolution": ["cv2", "os", "collections"], "modelscope.preprocessors.cv.image_quality_assessment_man": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "modelscope.preprocessors.cv.action_detection_mapper": ["copy", "numpy", "random", "torch", "decord", "scipy", "detectron2"], "modelscope.preprocessors.cv.video_stabilization": ["cv2", "torch", "numpy"], "modelscope.preprocessors.video": ["urllib", "numpy", "random", "torch", "decord", "tempfile", "math", "torchvision", "os", "uuid"], "modelscope.preprocessors.image": ["PIL", "numpy", "cv2", "io", "typing"], "modelscope.preprocessors.base": ["os", "abc", "typing"], "modelscope.preprocessors.ofa.visual_question_answering": ["PIL", "torch", "torchvision", "typing"], "modelscope.preprocessors.ofa.image_classification": ["PIL", "timm", "torch", "functools", "torchvision", "typing"], "modelscope.preprocessors.ofa.utils.transforms": ["PIL", "torchvision", "numpy", "random", "torch"], "modelscope.preprocessors.ofa.utils.bridge_content_encoder": ["sqlite3", "difflib", "rapidfuzz", "functools", "typing"], "modelscope.preprocessors.ofa.utils.collate": ["torch", "typing", "numpy"], "modelscope.preprocessors.ofa.utils.get_tables": ["traceback", "sqlite3", "sys"], "modelscope.preprocessors.ofa.utils.text2phone": [], "modelscope.preprocessors.ofa.utils.audio_helper": ["torch", "typing", "numpy"], "modelscope.preprocessors.ofa.utils.random_help": ["torch", "torch_xla"], "modelscope.preprocessors.ofa.utils.vision_helper": ["cv2", "numpy"], "modelscope.preprocessors.ofa.utils.constant": [], "modelscope.preprocessors.ofa.asr": ["random", "torch", "librosa", "fairseq", "soundfile", "pathlib", "os", "typing"], "modelscope.preprocessors.ofa.text2sql": ["random", "torch", "re", "os", "typing"], "modelscope.preprocessors.ofa.text_classification": ["torch", "typing"], "modelscope.preprocessors.ofa.image_captioning": ["torch", "torchvision", "typing"], "modelscope.preprocessors.ofa.ocr_recognition": ["torch", "unicodedata2", "torchvision", "typing", "zhconv"], "modelscope.preprocessors.ofa.visual_entailment": ["PIL", "torch", "torchvision", "typing"], "modelscope.preprocessors.ofa.visual_grounding": ["PIL", "numpy", "torch", "torchvision", "typing"], "modelscope.preprocessors.ofa.summarization": ["torch", "typing"], "modelscope.preprocessors.ofa.text_to_image_synthesis": ["torch", "typing"], "modelscope.preprocessors.ofa.sudoku": ["torch", "typing", "numpy"], "modelscope.preprocessors.ofa.base": ["PIL", "string", "numpy", "json", "torch", "torchaudio", "re", "io", "os"], "modelscope.trainers.parallel.builder": ["torch"], "modelscope.trainers.parallel.utils": [], "modelscope.trainers.optimizer.builder": ["torch", "inspect", "typing"], "modelscope.trainers.optimizer.child_tuning_adamw_optimizer": ["numpy", "torch", "types", "math", "typing"], "modelscope.trainers.lrscheduler.builder": ["torch", "inspect", "packaging"], "modelscope.trainers.lrscheduler.warmup.warmup": [], "modelscope.trainers.lrscheduler.warmup.base": ["torch"], "modelscope.trainers.nlp_trainer": ["torch", "os", "typing", "numpy"], "modelscope.trainers.utils.inference": ["shutil", "torch", "logging", "tqdm", "collections", "os", "pickle"], "modelscope.trainers.utils.log_buffer": ["collections", "numpy"], "modelscope.trainers.training_args": ["json", "re", "addict", "copy", "dataclasses", "typing"], "modelscope.trainers.builder": [], "modelscope.trainers.audio.kws_nearfield_trainer": ["torch", "re", "tensorboardX", "copy", "datetime", "yaml", "os", "typing"], "modelscope.trainers.audio.kws_utils.model_utils": ["shutil", "numpy", "torch", "re", "glob", "yaml", "os"], "modelscope.trainers.audio.kws_utils.runtime_utils": ["codecs", "shutil", "json", "re", "sys", "collections", "os", "stat"], "modelscope.trainers.audio.kws_utils.det_utils": ["kaldiio", "numpy", "json", "torch", "matplotlib", "os", "glob", "threading"], "modelscope.trainers.audio.kws_utils.batch_utils": ["numpy", "torch", "sys", "math", "collections", "datetime", "os", "typing"], "modelscope.trainers.audio.kws_utils.file_utils": ["re"], "modelscope.trainers.audio.kws_farfield_trainer": ["numpy", "torch", "math", "datetime", "glob", "os", "typing", "pickle"], "modelscope.trainers.audio.separation_trainer": ["numpy", "torch", "torchaudio", "tqdm", "csv", "os", "speechbrain", "typing"], "modelscope.trainers.audio.asr_trainer": ["shutil", "json", "typing", "tempfile", "os", "funasr"], "modelscope.trainers.audio.tts_trainer": ["shutil", "json", "tempfile", "os", "typing", "zipfile"], "modelscope.trainers.audio.ans_trainer": [], "modelscope.trainers.hooks.checkpoint.checkpoint_hook": ["random", "numpy", "torch", "time", "os", "typing"], "modelscope.trainers.hooks.checkpoint.checkpoint_processor": ["os", "re", "shutil"], "modelscope.trainers.hooks.checkpoint.load_checkpoint_hook": ["random", "numpy", "torch", "packaging", "typing"], "modelscope.trainers.hooks.logger.text_logger_hook": ["json", "torch", "collections", "datetime", "os"], "modelscope.trainers.hooks.logger.tensorboard_hook": ["torch", "os", "numpy"], "modelscope.trainers.hooks.logger.base": ["numbers", "torch", "abc", "numpy"], "modelscope.trainers.hooks.optimizer.apex_optimizer_hook": ["torch", "logging", "packaging"], "modelscope.trainers.hooks.optimizer.torch_optimizer_hook": ["logging"], "modelscope.trainers.hooks.optimizer.base": ["torch", "logging"], "modelscope.trainers.hooks.distributed.megatron_hook": ["torch", "os", "shutil", "megatron_util"], "modelscope.trainers.hooks.distributed.deepspeed_hook": ["shutil", "torch", "megatron_util", "deepspeed", "os"], "modelscope.trainers.hooks.distributed.ddp_hook": [], "modelscope.trainers.hooks.lr_scheduler_hook": [], "modelscope.trainers.hooks.early_stop_hook": ["numpy"], "modelscope.trainers.hooks.hook": ["functools"], "modelscope.trainers.hooks.priority": ["typing", "enum"], "modelscope.trainers.hooks.builder": [], "modelscope.trainers.hooks.clip_clamp_logit_scale_hook": ["torch"], "modelscope.trainers.hooks.compression.sparsity_hook": ["os"], "modelscope.trainers.hooks.compression.utils": ["torch"], "modelscope.trainers.hooks.iter_timer_hook": ["time"], "modelscope.trainers.hooks.evaluation_hook": ["typing", "collections"], "modelscope.trainers.multi_modal.clip.clip_trainer": ["torch", "os", "typing", "math"], "modelscope.trainers.multi_modal.clip.clip_trainer_utils": ["torch", "functools", "math", "inspect", "os"], "modelscope.trainers.multi_modal.efficient_diffusion_tuning.efficient_diffusion_tuning_trainer": ["torch", "typing"], "modelscope.trainers.multi_modal.mplug.mplug_trainer": ["torch", "typing", "collections"], "modelscope.trainers.multi_modal.team.team_trainer": ["numpy", "torch", "collections", "sklearn", "os", "typing"], "modelscope.trainers.multi_modal.team.team_trainer_utils": ["torch", "torchvision", "PIL"], "modelscope.trainers.multi_modal.mgeo_ranking_trainer": ["torch", "dataclasses", "typing"], "modelscope.trainers.multi_modal.ofa.ofa_trainer": ["shutil", "json", "torch", "functools", "tempfile", "math", "os", "typing"], "modelscope.trainers.multi_modal.ofa.ofa_trainer_utils": ["transformers", "shutil", "numpy", "torch", "os", "math"], "modelscope.trainers.default_config": ["typing"], "modelscope.trainers.nlp.gpt_moe_trainer": ["torch", "collections", "megatron_util", "os", "typing"], "modelscope.trainers.nlp.plug_trainer": ["torch", "megatron_util", "deepspeed", "os", "typing"], "modelscope.trainers.nlp.text_generation_trainer": ["torch", "collections"], "modelscope.trainers.nlp.document_grounded_dialog_rerank_trainer": ["transformers", "numpy", "random", "torch", "time", "os", "typing"], "modelscope.trainers.nlp.csanmt_translation_trainer": ["os", "tensorflow", "typing", "time"], "modelscope.trainers.nlp.translation_evaluation_trainer": ["transformers", "random", "torch", "tqdm", "math", "pandas", "os", "typing"], "modelscope.trainers.nlp.faq_question_answering_trainer": ["distutils", "contextlib", "numpy", "torch", "functools", "collections", "dataclasses", "typing"], "modelscope.trainers.nlp.table_question_answering_trainer": ["numpy", "json", "torch", "tqdm", "time", "os", "typing"], "modelscope.trainers.nlp.sequence_classification_trainer": ["time", "typing", "numpy"], "modelscope.trainers.nlp.sentence_embedding_trainer": ["transformers", "numpy", "torch", "tqdm", "time", "dataclasses", "typing"], "modelscope.trainers.nlp.gpt3_trainer": ["torch", "os", "copy", "typing"], "modelscope.trainers.nlp.text_ranking_trainer": ["numpy", "torch", "tqdm", "time", "dataclasses", "typing"], "modelscope.trainers.nlp.siamese_uie_trainer": ["random", "numpy", "json", "torch", "collections", "math", "time", "os", "typing"], "modelscope.trainers.nlp.space.metrics.metrics_tracker": ["math", "collections"], "modelscope.trainers.nlp.space.dialog_intent_trainer": ["os", "typing", "numpy"], "modelscope.trainers.nlp.space.eval": ["numpy", "json", "math", "collections", "nltk", "sklearn"], "modelscope.trainers.nlp.space.trainer.intent_trainer": ["transformers", "numpy", "json", "torch", "tqdm", "collections", "time", "os"], "modelscope.trainers.nlp.space.trainer.gen_trainer": ["transformers", "numpy", "json", "torch", "tqdm", "collections", "time", "os"], "modelscope.trainers.nlp.space.dialog_modeling_trainer": ["os", "time", "typing", "numpy"], "modelscope.trainers.nlp.document_grounded_dialog_retrieval_trainer": ["transformers", "numpy", "json", "torch", "tqdm", "faiss", "os"], "modelscope.trainers.nlp.document_grounded_dialog_generate_trainer": ["string", "transformers", "json", "torch", "rouge", "re", "tqdm", "collections", "os", "sacrebleu"], "modelscope.trainers.cli_argument_parser": ["dataclasses", "typing", "argparse"], "modelscope.trainers.cv.ocr_recognition_trainer": ["torch", "time", "collections"], "modelscope.trainers.cv.image_instance_segmentation_trainer": [], "modelscope.trainers.cv.referring_video_object_segmentation_trainer": ["torch", "os"], "modelscope.trainers.cv.vision_efficient_tuning_trainer": ["torch", "typing"], "modelscope.trainers.cv.movie_scene_segmentation_trainer": [], "modelscope.trainers.cv.nerf_recon_acc_trainer": ["random", "numpy", "cv2", "torch", "tqdm", "time", "datetime", "glob", "os", "typing"], "modelscope.trainers.cv.image_detection_damoyolo_trainer": ["torch", "math", "datetime", "time", "os", "easydict", "typing"], "modelscope.trainers.cv.image_classifition_trainer": ["numpy", "torch", "copy", "time", "os", "typing"], "modelscope.trainers.cv.cartoon_translation_trainer": ["tensorflow", "numpy", "tqdm", "packaging", "os", "typing"], "modelscope.trainers.cv.ocr_detection_db_trainer": ["numpy", "torch", "tqdm", "math", "copy", "datetime", "time", "os", "easydict", "typing"], "modelscope.trainers.cv.card_detection_scrfd_trainer": [], "modelscope.trainers.cv.face_detection_scrfd_trainer": ["copy", "time", "typing", "os"], "modelscope.trainers.cv.image_inpainting_trainer": ["torch", "time", "collections"], "modelscope.trainers.cv.image_portrait_enhancement_trainer": ["torch", "collections"], "modelscope.trainers.cv.action_detection_trainer": ["torch", "fvcore", "os", "typing", "detectron2"], "modelscope.trainers.cv.image_defrcn_fewshot_detection_trainer": ["torch", "collections", "os", "typing", "detectron2"], "modelscope.trainers.trainer": ["distutils", "json", "torch", "functools", "collections", "copy", "inspect", "os", "typing"], "modelscope.trainers.base": ["os", "abc", "typing", "time"], "modelscope.msdatasets.ms_dataset": ["datasets", "numpy", "warnings", "os", "typing"], "modelscope.msdatasets.context.dataset_context_config": ["typing"], "modelscope.msdatasets.auth.auth_config": ["http", "typing"], "modelscope.msdatasets.meta.data_meta_config": [], "modelscope.msdatasets.meta.data_meta_manager": ["datasets", "shutil", "json", "collections", "os"], "modelscope.msdatasets.utils.oss_utils": ["multiprocessing", "datasets", "__future__", "oss2", "os"], "modelscope.msdatasets.utils.maxcompute_utils": ["pandas", "math"], "modelscope.msdatasets.utils.dataset_utils": ["os", "typing", "collections"], "modelscope.msdatasets.utils.delete_utils": [], "modelscope.msdatasets.utils.upload_utils": ["os", "tqdm", "multiprocessing"], "modelscope.msdatasets.task_datasets.video_summarization_dataset": [], "modelscope.msdatasets.task_datasets.sidd_image_denoising": [], "modelscope.msdatasets.task_datasets.torch_base_dataset": [], "modelscope.msdatasets.task_datasets.reds_image_deblurring_dataset": [], "modelscope.msdatasets.task_datasets.gopro_image_deblurring_dataset": [], "modelscope.msdatasets.data_files.data_files_manager": ["os", "datasets", "typing"], "modelscope.msdatasets.audio.asr_dataset": [], "modelscope.msdatasets.download.download_config": ["datasets", "typing"], "modelscope.msdatasets.download.download_manager": ["datasets"], "modelscope.msdatasets.download.dataset_builder": ["datasets", "pandas", "pyarrow", "os", "typing"], "modelscope.msdatasets.dataset_cls.dataset": ["copy", "pandas", "datasets", "os"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_quality_assessment_degradation.image_quality_assessment_degradation_dataset": ["torchvision"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_portrait_enhancement.data_utils": ["cv2", "torch"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_portrait_enhancement.image_portrait_enhancement_dataset": ["cv2", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.veco_dataset": ["datasets", "typing", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_instance_segmentation_coco_dataset": ["os", "numpy", "pycocotools"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_recognition_dataset": ["PIL", "numpy", "cv2", "json", "torch", "six", "lmdb", "os"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.image_dataset": ["numpy", "cv2", "torch", "logging", "functools", "math", "bisect", "os", "glob"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.make_border_map": ["cv2", "pyclipper", "shapely", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.normalize_image": ["torch", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.make_icdar_data": ["cv2", "torch", "collections", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.make_seg_detection_data": ["cv2", "pyclipper", "shapely", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.data_process": [], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.augment_data": ["cv2", "imgaug", "math", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.random_crop_data": ["cv2", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.data_loader": ["numpy", "torch", "math", "imgaug", "bisect"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.measures.quad_measurer": ["numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.measures.iou_evaluator": ["numpy", "shapely", "collections"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.augmenter": ["imgaug"], "modelscope.msdatasets.dataset_cls.custom_datasets.bad_image_detecting.bad_image_detecting_dataset": [], "modelscope.msdatasets.dataset_cls.custom_datasets.video_summarization_dataset": ["numpy", "json", "torch", "h5py", "os"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_inpainting.image_inpainting_dataset": ["albumentations", "numpy", "enum", "cv2", "os", "glob"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_inpainting.aug": ["albumentations", "imgaug"], "modelscope.msdatasets.dataset_cls.custom_datasets.language_guided_video_summarization_dataset": ["numpy", "json", "torch", "h5py", "os"], "modelscope.msdatasets.dataset_cls.custom_datasets.builder": [], "modelscope.msdatasets.dataset_cls.custom_datasets.movie_scene_segmentation.movie_scene_segmentation_dataset": ["random", "json", "torch", "copy", "torchvision", "os"], "modelscope.msdatasets.dataset_cls.custom_datasets.movie_scene_segmentation.sampler": ["random", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.text_ranking_dataset": ["torch", "typing", "random"], "modelscope.msdatasets.dataset_cls.custom_datasets.audio.kws_nearfield_dataset": ["torch", "random"], "modelscope.msdatasets.dataset_cls.custom_datasets.audio.kws_farfield_dataset": ["numpy", "torch", "math", "queue", "os", "threading"], "modelscope.msdatasets.dataset_cls.custom_datasets.audio.kws_nearfield_processor": ["kaldiio", "numpy", "random", "json", "torch", "torchaudio"], "modelscope.msdatasets.dataset_cls.custom_datasets.audio.asr_dataset": ["os"], "modelscope.msdatasets.dataset_cls.custom_datasets.sidd_image_denoising.transforms": ["random"], "modelscope.msdatasets.dataset_cls.custom_datasets.sidd_image_denoising.sidd_image_denoising_dataset": ["cv2", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.sidd_image_denoising.data_utils": ["cv2", "torch"], "modelscope.msdatasets.dataset_cls.custom_datasets.reds_image_deblurring_dataset": ["cv2", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.video_frame_interpolation.data_utils": ["cv2", "torch"], "modelscope.msdatasets.dataset_cls.custom_datasets.video_frame_interpolation.video_frame_interpolation_dataset": ["cv2", "torch", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_quality_assmessment_mos.image_quality_assessment_mos_dataset": [], "modelscope.msdatasets.dataset_cls.custom_datasets.mgeo_ranking_dataset": ["json", "torch", "typing", "random"], "modelscope.msdatasets.dataset_cls.custom_datasets.video_stabilization.video_stabilization_dataset": [], "modelscope.msdatasets.dataset_cls.custom_datasets.gopro_image_deblurring_dataset": ["cv2", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.referring_video_object_segmentation.transformers": ["PIL", "torch", "torchvision", "random"], "modelscope.msdatasets.dataset_cls.custom_datasets.referring_video_object_segmentation.referring_video_object_segmentation_dataset": ["numpy", "pycocotools", "json", "torch", "tqdm", "h5py", "glob", "torchvision", "pandas", "os"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_colorization.image_colorization_dataset": ["cv2", "torch", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.build": ["torch", "copy", "bisect", "math"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.datasets.coco": ["cv2", "torch", "torchvision", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.datasets.mosaic_wrapper": ["random", "numpy", "cv2", "torch", "math"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.transforms.build": [], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.transforms.transforms": ["torchvision", "numpy", "random", "cv2", "torch"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.collate_batch": [], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.samplers.grouped_batch_sampler": ["torch", "itertools"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.samplers.distributed": ["torch", "math"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.samplers.iteration_based_batch_sampler": ["torch"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.evaluation.coco.coco_eval": ["torch", "os", "tempfile", "collections"], "modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base": ["os"], "modelscope.msdatasets.dataset_cls.custom_datasets.torch_custom_dataset": ["torch", "typing"], "modelscope.msdatasets.dataset_cls.custom_datasets.video_super_resolution.video_super_resolution_dataset": ["cv2", "torch", "collections", "numpy"], "modelscope.msdatasets.data_loader.data_loader_manager": ["os", "abc", "datasets", "enum"], "modelscope.msdatasets.data_loader.data_loader": ["os", "abc", "datasets", "typing"], "modelscope.exporters.torch_model_exporter": ["itertools", "contextlib", "torch", "os", "typing"], "modelscope.exporters.builder": [], "modelscope.exporters.audio.ans_dfsmn_exporter": ["torch", "os"], "modelscope.exporters.nlp.csanmt_for_translation_exporter": ["os", "typing", "tensorflow"], "modelscope.exporters.nlp.model_for_token_classification_exporter": ["torch", "typing", "collections"], "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter": ["torch", "typing", "collections"], "modelscope.exporters.nlp.sbert_for_zero_shot_classification_exporter": ["typing", "collections"], "modelscope.exporters.cv.object_detection_damoyolo_exporter": ["numpy", "torch", "functools", "onnx", "os", "typing"], "modelscope.exporters.cv.face_detection_scrfd_exporter": ["numpy", "torch", "functools", "onnx", "os", "typing"], "modelscope.exporters.cv.cartoon_translation_exporter": ["os", "tensorflow", "typing", "packaging"], "modelscope.exporters.tf_model_exporter": ["os", "tensorflow", "typing"], "modelscope.exporters.base": ["os", "abc", "typing"]}, "version": "1.6.0", "md5": "5e46ad1c70848d28c7aeafd9db9c3aac", "files_mtime": {"TEMPLATE_PATH/models/science/unifold/config.py": 1666778289.6766584, "TEMPLATE_PATH/models/science/unifold/msa/tools/hmmsearch.py": 1666778289.6888485, "TEMPLATE_PATH/models/science/unifold/msa/tools/hhblits.py": 1666778289.6881094, "TEMPLATE_PATH/models/science/unifold/msa/tools/kalign.py": 1666778289.689356, "TEMPLATE_PATH/models/science/unifold/msa/tools/utils.py": 1666778289.6895845, "TEMPLATE_PATH/models/science/unifold/msa/tools/hmmbuild.py": 1666778289.6885293, "TEMPLATE_PATH/models/science/unifold/msa/tools/jackhmmer.py": 1666778289.6891205, "TEMPLATE_PATH/models/science/unifold/msa/tools/hhsearch.py": 1666778289.6883202, "TEMPLATE_PATH/models/science/unifold/msa/mmcif.py": 1666778289.6854372, "TEMPLATE_PATH/models/science/unifold/msa/msa_identifiers.py": 1666778289.6857276, "TEMPLATE_PATH/models/science/unifold/msa/parsers.py": 1666778289.6860957, "TEMPLATE_PATH/models/science/unifold/msa/templates.py": 1684246001.5188344, "TEMPLATE_PATH/models/science/unifold/msa/utils.py": 1666778289.6898172, "TEMPLATE_PATH/models/science/unifold/msa/pipeline.py": 1669108798.6335008, "TEMPLATE_PATH/models/science/unifold/model.py": 1669108798.6326127, "TEMPLATE_PATH/models/science/unifold/dataset.py": 1669108798.63184, "TEMPLATE_PATH/models/science/unifold/modules/confidence.py": 1666778289.6826582, "TEMPLATE_PATH/models/science/unifold/modules/alphafold.py": 1666778289.6816177, "TEMPLATE_PATH/models/science/unifold/modules/evoformer.py": 1666778289.683275, "TEMPLATE_PATH/models/science/unifold/modules/auxillary_heads.py": 1666778289.682163, "TEMPLATE_PATH/models/science/unifold/modules/attentions.py": 1678345974.7664688, "TEMPLATE_PATH/models/science/unifold/modules/embedders.py": 1666778289.6829705, "TEMPLATE_PATH/models/science/unifold/modules/structure_module.py": 1669108798.6331663, "TEMPLATE_PATH/models/science/unifold/modules/common.py": 1666778289.6823854, "TEMPLATE_PATH/models/science/unifold/modules/frame.py": 1666778289.683827, "TEMPLATE_PATH/models/science/unifold/modules/template.py": 1666778289.6844184, "TEMPLATE_PATH/models/science/unifold/modules/triangle_multiplication.py": 1666778289.6846595, "TEMPLATE_PATH/models/science/unifold/modules/featurization.py": 1666778289.6835535, "TEMPLATE_PATH/models/science/unifold/data/process_multimer.py": 1666778289.6789792, "TEMPLATE_PATH/models/science/unifold/data/protein.py": 1666778289.6792727, "TEMPLATE_PATH/models/science/unifold/data/residue_constants.py": 1669108798.6314445, "TEMPLATE_PATH/models/science/unifold/data/utils.py": 1666778289.6802187, "TEMPLATE_PATH/models/science/unifold/data/process.py": 1666778289.6786027, "TEMPLATE_PATH/models/science/unifold/data/msa_pairing.py": 1684246001.5181975, "TEMPLATE_PATH/models/science/unifold/data/data_ops.py": 1678345974.7659872, "TEMPLATE_PATH/models/builder.py": 1678695526.2830884, "TEMPLATE_PATH/models/audio/ans/layers/activations.py": 1678695526.2749481, "TEMPLATE_PATH/models/audio/ans/layers/layer_base.py": 1678695526.2758405, "TEMPLATE_PATH/models/audio/ans/layers/affine_transform.py": 1678695526.2755635, "TEMPLATE_PATH/models/audio/ans/layers/uni_deep_fsmn.py": 1678695526.2761767, "TEMPLATE_PATH/models/audio/ans/unet.py": 1666757257.138642, "TEMPLATE_PATH/models/audio/ans/conv_stft.py": 1684246001.4631696, "TEMPLATE_PATH/models/audio/ans/denoise_net.py": 1678695526.2738411, "TEMPLATE_PATH/models/audio/ans/complex_nn.py": 1678695526.273521, "TEMPLATE_PATH/models/audio/ans/se_module_complex.py": 1666757257.1383736, "TEMPLATE_PATH/models/audio/ans/frcrn.py": 1678695526.2743342, "TEMPLATE_PATH/models/audio/sv/DTDNN_layers.py": 1683889954.4686143, "TEMPLATE_PATH/models/audio/sv/ecapa_tdnn.py": 1678345974.1721325, "TEMPLATE_PATH/models/audio/sv/ERes2Net.py": 1684247769.663397, "TEMPLATE_PATH/models/audio/sv/pooling_layers.py": 1684247769.6642458, "TEMPLATE_PATH/models/audio/sv/DTDNN.py": 1684246001.4648209, "TEMPLATE_PATH/models/audio/sv/fusion.py": 1684247769.6637704, "TEMPLATE_PATH/models/audio/sv/generic_speaker_verification.py": 1678695526.2780309, "TEMPLATE_PATH/models/audio/sv/speaker_change_locator.py": 1684246001.4659781, "TEMPLATE_PATH/models/audio/sv/rdino.py": 1684246001.4655278, "TEMPLATE_PATH/models/audio/itn/generic_inverse_text_processing.py": 1678345974.1680963, "TEMPLATE_PATH/models/audio/aec/layers/activations.py": 1666757257.1350431, "TEMPLATE_PATH/models/audio/aec/layers/layer_base.py": 1666757257.1358142, "TEMPLATE_PATH/models/audio/aec/layers/deep_fsmn.py": 1666757257.1355417, "TEMPLATE_PATH/models/audio/aec/layers/affine_transform.py": 1666757257.1352675, "TEMPLATE_PATH/models/audio/aec/layers/uni_deep_fsmn.py": 1666757257.1360576, "TEMPLATE_PATH/models/audio/aec/network/se_net.py": 1666757257.1370454, "TEMPLATE_PATH/models/audio/aec/network/loss.py": 1666757257.1365721, "TEMPLATE_PATH/models/audio/aec/network/modulation_loss.py": 1666757257.136794, "TEMPLATE_PATH/models/audio/asr/wenet_automatic_speech_recognition.py": 1678345974.1674347, "TEMPLATE_PATH/models/audio/asr/generic_automatic_speech_recognition.py": 1684246001.463599, "TEMPLATE_PATH/models/audio/punc/generic_punctuation.py": 1678345974.1698205, "TEMPLATE_PATH/models/audio/tts/voice.py": 1684246001.466414, "TEMPLATE_PATH/models/audio/tts/sambert_hifi.py": 1678695526.2786689, "TEMPLATE_PATH/models/audio/separation/mossformer.py": 1678345974.1705601, "TEMPLATE_PATH/models/audio/separation/mossformer_conv_module.py": 1678345974.1713047, "TEMPLATE_PATH/models/audio/separation/mossformer_block.py": 1678345974.1711044, "TEMPLATE_PATH/models/audio/separation/layer_norm.py": 1678345974.1702788, "TEMPLATE_PATH/models/audio/kws/farfield/fsmn.py": 1666757257.1401393, "TEMPLATE_PATH/models/audio/kws/farfield/fsmn_sele_v2.py": 1683889954.4661622, "TEMPLATE_PATH/models/audio/kws/farfield/fsmn_sele_v3.py": 1684246001.4639575, "TEMPLATE_PATH/models/audio/kws/farfield/model_def.py": 1666757257.140835, "TEMPLATE_PATH/models/audio/kws/farfield/model.py": 1684246001.4643233, "TEMPLATE_PATH/models/audio/kws/generic_key_word_spotting.py": 1666757257.1410184, "TEMPLATE_PATH/models/audio/kws/nearfield/fsmn.py": 1683889954.4674246, "TEMPLATE_PATH/models/audio/kws/nearfield/model.py": 1683889954.4677804, "TEMPLATE_PATH/models/audio/kws/nearfield/cmvn.py": 1678345974.1689863, "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py": 1678345974.6520555, "TEMPLATE_PATH/models/multi_modal/clip/configuration_bert.py": 1666757257.302656, "TEMPLATE_PATH/models/multi_modal/clip/bert_tokenizer.py": 1669108798.597482, "TEMPLATE_PATH/models/multi_modal/clip/model.py": 1678345974.6126437, "TEMPLATE_PATH/models/multi_modal/clip/modeling_bert.py": 1678345974.6139398, "TEMPLATE_PATH/models/multi_modal/mplug_for_all_tasks.py": 1678345974.6332867, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/decoder.py": 1666757257.3277674, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/prior.py": 1666757257.3294334, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/upsampler.py": 1666757257.3308744, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/model.py": 1678345974.6361222, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/tokenizer.py": 1678695526.5035207, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/xglm.py": 1678695526.5041978, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/gaussian_diffusion.py": 1678695526.502133, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/clip.py": 1678695526.5006785, "TEMPLATE_PATH/models/multi_modal/diffusion/structbert.py": 1678345974.617392, "TEMPLATE_PATH/models/multi_modal/diffusion/diffusion.py": 1681714768.8946908, "TEMPLATE_PATH/models/multi_modal/diffusion/unet_generator.py": 1678695526.4923015, "TEMPLATE_PATH/models/multi_modal/diffusion/model.py": 1678345974.6162271, "TEMPLATE_PATH/models/multi_modal/diffusion/tokenizer.py": 1678345974.6183596, "TEMPLATE_PATH/models/multi_modal/diffusion/unet_upsampler_256.py": 1678695526.494478, "TEMPLATE_PATH/models/multi_modal/diffusion/unet_upsampler_1024.py": 1678695526.4934785, "TEMPLATE_PATH/models/multi_modal/efficient_diffusion_tuning/efficient_stable_diffusion.py": 1683889954.5002546, "TEMPLATE_PATH/models/multi_modal/gemm/gemm_base.py": 1669108798.5997014, "TEMPLATE_PATH/models/multi_modal/gemm/gemm_model.py": 1666757257.312342, "TEMPLATE_PATH/models/multi_modal/gemm/tokenizer.py": 1666757257.3128963, "TEMPLATE_PATH/models/multi_modal/mmr/dataloaders/rawvideo_util.py": 1666757257.3151526, "TEMPLATE_PATH/models/multi_modal/mmr/models/module_clip.py": 1666757257.3185143, "TEMPLATE_PATH/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py": 1684246001.5114832, "TEMPLATE_PATH/models/multi_modal/mmr/models/module_cross.py": 1666757257.319208, "TEMPLATE_PATH/models/multi_modal/mmr/models/until_module.py": 1666757257.3205154, "TEMPLATE_PATH/models/multi_modal/mmr/models/tokenization_clip.py": 1666757257.3197618, "TEMPLATE_PATH/models/multi_modal/mmr/models/modeling.py": 1666757257.3177187, "TEMPLATE_PATH/models/multi_modal/mmr/models/dynamic_inverted_softmax.py": 1666757257.3171651, "TEMPLATE_PATH/models/multi_modal/mplug/predictor.py": 1666757257.3251338, "TEMPLATE_PATH/models/multi_modal/mplug/clip/clip.py": 1666757257.322875, "TEMPLATE_PATH/models/multi_modal/mplug/modeling_mplug.py": 1678345974.631151, "TEMPLATE_PATH/models/multi_modal/mplug/mvit.py": 1678345974.632255, "TEMPLATE_PATH/models/multi_modal/mplug/configuration_mplug.py": 1678345974.629229, "TEMPLATE_PATH/models/multi_modal/team/team_model.py": 1666757257.344364, "TEMPLATE_PATH/models/multi_modal/team/utils.py": 1666757257.3448434, "TEMPLATE_PATH/models/multi_modal/guided_diffusion/respace.py": 1681714768.8972325, "TEMPLATE_PATH/models/multi_modal/guided_diffusion/unet.py": 1681714768.8985677, "TEMPLATE_PATH/models/multi_modal/guided_diffusion/gaussian_diffusion.py": 1681714768.896804, "TEMPLATE_PATH/models/multi_modal/guided_diffusion/script.py": 1681714768.8976767, "TEMPLATE_PATH/models/multi_modal/vldoc/tokenization.py": 1678345974.6881084, "TEMPLATE_PATH/models/multi_modal/vldoc/model.py": 1678345974.6853945, "TEMPLATE_PATH/models/multi_modal/vldoc/conv_fpn_trans.py": 1678345974.6839283, "TEMPLATE_PATH/models/multi_modal/vldoc/transformer_local.py": 1678345974.6888812, "TEMPLATE_PATH/models/multi_modal/vldoc/modeling_layout_roberta.py": 1678345974.6863377, "TEMPLATE_PATH/models/multi_modal/vldoc/processing.py": 1678345974.6873274, "TEMPLATE_PATH/models/multi_modal/vldoc/convnext.py": 1678345974.6846218, "TEMPLATE_PATH/models/multi_modal/soonet/model.py": 1681714768.901114, "TEMPLATE_PATH/models/multi_modal/soonet/tokenizer.py": 1681714768.9022171, "TEMPLATE_PATH/models/multi_modal/soonet/utils.py": 1681714768.9025855, "TEMPLATE_PATH/models/multi_modal/soonet/blocks.py": 1681714768.9001248, "TEMPLATE_PATH/models/multi_modal/soonet/swin_transformer.py": 1681714768.90172, "TEMPLATE_PATH/models/multi_modal/soonet/clip.py": 1681714768.90064, "TEMPLATE_PATH/models/multi_modal/mgeo/text_ranking.py": 1678345974.626834, "TEMPLATE_PATH/models/multi_modal/mgeo/backbone.py": 1678345974.6254547, "TEMPLATE_PATH/models/multi_modal/mgeo/text_classification.py": 1678345974.6262727, "TEMPLATE_PATH/models/multi_modal/mgeo/token_classification.py": 1678345974.6274276, "TEMPLATE_PATH/models/multi_modal/mplug_owl/configuration_mplug_owl.py": 1684246001.513214, "TEMPLATE_PATH/models/multi_modal/mplug_owl/modeling_mplug_owl.py": 1684246001.5142066, "TEMPLATE_PATH/models/multi_modal/ofa_for_text_to_image_synthesis_model.py": 1678345974.6531072, "TEMPLATE_PATH/models/multi_modal/video_synthesis/diffusion.py": 1681714768.9037023, "TEMPLATE_PATH/models/multi_modal/video_synthesis/text_to_video_synthesis_model.py": 1678695526.5113559, "TEMPLATE_PATH/models/multi_modal/video_synthesis/autoencoder.py": 1678695526.510036, "TEMPLATE_PATH/models/multi_modal/video_synthesis/unet_sd.py": 1678695526.5119526, "TEMPLATE_PATH/models/multi_modal/clip_interrogator/model.py": 1684246001.5105355, "TEMPLATE_PATH/models/multi_modal/rleg/model.py": 1678695526.5053334, "TEMPLATE_PATH/models/multi_modal/rleg/rleg.py": 1678695526.5057476, "TEMPLATE_PATH/models/multi_modal/dpm_solver_pytorch.py": 1678695526.4955242, "TEMPLATE_PATH/models/multi_modal/ofa/modeling_ofa.py": 1678345974.6454003, "TEMPLATE_PATH/models/multi_modal/ofa/utils/utils.py": 1678345974.6500447, "TEMPLATE_PATH/models/multi_modal/ofa/utils/constant.py": 1678345974.649251, "TEMPLATE_PATH/models/multi_modal/ofa/vit.py": 1678345974.6508958, "TEMPLATE_PATH/models/multi_modal/ofa/modeling_mmspeech.py": 1678345974.6442, "TEMPLATE_PATH/models/multi_modal/ofa/resnet.py": 1678345974.6463652, "TEMPLATE_PATH/models/multi_modal/ofa/tokenization_ofa.py": 1678345974.6473439, "TEMPLATE_PATH/models/multi_modal/ofa/generate/multihead_attention.py": 1666757257.335432, "TEMPLATE_PATH/models/multi_modal/ofa/generate/ngram_repeat_block.py": 1666757257.335963, "TEMPLATE_PATH/models/multi_modal/ofa/generate/sequence_generator.py": 1678345974.642128, "TEMPLATE_PATH/models/multi_modal/ofa/generate/incremental_decoding_utils.py": 1666757257.3349085, "TEMPLATE_PATH/models/multi_modal/ofa/generate/utils.py": 1678345974.6431253, "TEMPLATE_PATH/models/multi_modal/ofa/generate/search.py": 1678345974.6410236, "TEMPLATE_PATH/models/multi_modal/ofa/generate/token_generation_constraints.py": 1666757257.3377285, "TEMPLATE_PATH/models/multi_modal/ofa/tokenization_ofa_fast.py": 1678345974.6482744, "TEMPLATE_PATH/models/multi_modal/ofa/configuration_mmspeech.py": 1678345974.6392608, "TEMPLATE_PATH/models/multi_modal/ofa/configuration_ofa.py": 1678345974.640075, "TEMPLATE_PATH/models/nlp/unite/configuration.py": 1684246001.5170493, "TEMPLATE_PATH/models/nlp/unite/translation_evaluation.py": 1684246001.5173905, "TEMPLATE_PATH/models/nlp/palm_v2/configuration.py": 1678345974.7403622, "TEMPLATE_PATH/models/nlp/palm_v2/dureader_eval.py": 1666757257.3743646, "TEMPLATE_PATH/models/nlp/palm_v2/text_generation.py": 1681714768.9220717, "TEMPLATE_PATH/models/nlp/structbert/configuration.py": 1678345974.7552435, "TEMPLATE_PATH/models/nlp/structbert/fill_mask.py": 1678345974.7563565, "TEMPLATE_PATH/models/nlp/structbert/backbone.py": 1678345974.7548847, "TEMPLATE_PATH/models/nlp/structbert/faq_question_answering.py": 1678345974.7559564, "TEMPLATE_PATH/models/nlp/structbert/adv_utils.py": 1678695526.531147, "TEMPLATE_PATH/models/nlp/structbert/text_classification.py": 1678345974.7566974, "TEMPLATE_PATH/models/nlp/structbert/token_classification.py": 1678345974.7570403, "TEMPLATE_PATH/models/nlp/hf_transformers/backbone.py": 1678695526.5259144, "TEMPLATE_PATH/models/nlp/task_models/fill_mask.py": 1678345974.7579868, "TEMPLATE_PATH/models/nlp/task_models/text_ranking.py": 1678345974.7599752, "TEMPLATE_PATH/models/nlp/task_models/feature_extraction.py": 1678345974.7576537, "TEMPLATE_PATH/models/nlp/task_models/text_classification.py": 1678345974.7593715, "TEMPLATE_PATH/models/nlp/task_models/task_model.py": 1683889954.520566, "TEMPLATE_PATH/models/nlp/task_models/text_generation.py": 1683889954.521497, "TEMPLATE_PATH/models/nlp/task_models/information_extraction.py": 1678345974.758443, "TEMPLATE_PATH/models/nlp/task_models/token_classification.py": 1678345974.7602658, "TEMPLATE_PATH/models/nlp/veco/configuration.py": 1678345974.76297, "TEMPLATE_PATH/models/nlp/veco/fill_mask.py": 1678345974.7633657, "TEMPLATE_PATH/models/nlp/veco/backbone.py": 1678345974.762673, "TEMPLATE_PATH/models/nlp/veco/text_classification.py": 1678345974.7637107, "TEMPLATE_PATH/models/nlp/veco/token_classification.py": 1678345974.7641091, "TEMPLATE_PATH/models/nlp/glm_130b/initialize.py": 1683889954.5107641, "TEMPLATE_PATH/models/nlp/glm_130b/quantization/functional.py": 1683889954.512782, "TEMPLATE_PATH/models/nlp/glm_130b/quantization/layers.py": 1683889954.5129745, "TEMPLATE_PATH/models/nlp/glm_130b/text_generation.py": 1683889954.5132122, "TEMPLATE_PATH/models/nlp/glm_130b/generation/strategies.py": 1683889954.5105143, "TEMPLATE_PATH/models/nlp/mglm/tasks/superglue/pvp.py": 1678345974.739084, "TEMPLATE_PATH/models/nlp/mglm/tasks/superglue/dataset.py": 1669108798.6253061, "TEMPLATE_PATH/models/nlp/mglm/tasks/superglue/evaluate.py": 1669108798.6255116, "TEMPLATE_PATH/models/nlp/mglm/tasks/superglue/finetune.py": 1669108798.6256893, "TEMPLATE_PATH/models/nlp/mglm/tasks/data_utils.py": 1678345974.737032, "TEMPLATE_PATH/models/nlp/mglm/tasks/seq2seq/dataset.py": 1669108798.6240597, "TEMPLATE_PATH/models/nlp/mglm/tasks/seq2seq/evaluate.py": 1678345974.738028, "TEMPLATE_PATH/models/nlp/mglm/tasks/seq2seq/finetune.py": 1678345974.7383432, "TEMPLATE_PATH/models/nlp/mglm/tasks/language_model/detokenizer.py": 1669108798.6234415, "TEMPLATE_PATH/models/nlp/mglm/tasks/language_model/dataset.py": 1669108798.6232784, "TEMPLATE_PATH/models/nlp/mglm/tasks/language_model/finetune.py": 1678345974.7376661, "TEMPLATE_PATH/models/nlp/mglm/tasks/eval_utils.py": 1678345974.7373278, "TEMPLATE_PATH/models/nlp/mglm/blocklm_utils.py": 1684246001.5148673, "TEMPLATE_PATH/models/nlp/mglm/train_utils.py": 1678345974.7394702, "TEMPLATE_PATH/models/nlp/mglm/test/test_block.py": 1684246001.515939, "TEMPLATE_PATH/models/nlp/mglm/test/test_rel_shift.py": 1684246001.5163302, "TEMPLATE_PATH/models/nlp/mglm/arguments.py": 1669108798.609255, "TEMPLATE_PATH/models/nlp/mglm/data_utils/tokenization_gpt2.py": 1669108798.6142075, "TEMPLATE_PATH/models/nlp/mglm/data_utils/lazy_loader.py": 1678345974.7336698, "TEMPLATE_PATH/models/nlp/mglm/data_utils/wordpiece.py": 1678345974.7343767, "TEMPLATE_PATH/models/nlp/mglm/data_utils/datasets.py": 1684246001.515576, "TEMPLATE_PATH/models/nlp/mglm/data_utils/tokenization.py": 1669108798.613975, "TEMPLATE_PATH/models/nlp/mglm/data_utils/extraction.py": 1678345974.733312, "TEMPLATE_PATH/models/nlp/mglm/data_utils/file_utils.py": 1669108798.6124434, "TEMPLATE_PATH/models/nlp/mglm/data_utils/sp_tokenizer.py": 1669108798.6136456, "TEMPLATE_PATH/models/nlp/mglm/data_utils/corpora.py": 1669108798.6114604, "TEMPLATE_PATH/models/nlp/mglm/data_utils/samplers.py": 1669108798.6129339, "TEMPLATE_PATH/models/nlp/mglm/mglm_for_text_summarization.py": 1678345974.7347617, "TEMPLATE_PATH/models/nlp/mglm/process_grid.py": 1669108798.6219385, "TEMPLATE_PATH/models/nlp/mglm/generation_utils.py": 1669108798.6156476, "TEMPLATE_PATH/models/nlp/mglm/utils.py": 1678345974.739819, "TEMPLATE_PATH/models/nlp/mglm/configure_data.py": 1678345974.7326682, "TEMPLATE_PATH/models/nlp/mglm/model/distributed.py": 1678345974.735162, "TEMPLATE_PATH/models/nlp/mglm/model/transformer.py": 1678345974.7363741, "TEMPLATE_PATH/models/nlp/mglm/model/modeling_bert.py": 1678345974.735637, "TEMPLATE_PATH/models/nlp/mglm/model/prompt.py": 1669108798.617736, "TEMPLATE_PATH/models/nlp/mglm/model/modeling_glm.py": 1678345974.7359483, "TEMPLATE_PATH/models/nlp/mglm/model/downstream.py": 1669108798.6170213, "TEMPLATE_PATH/models/nlp/mglm/run_test.py": 1669108798.6222408, "TEMPLATE_PATH/models/nlp/plug_mental/configuration.py": 1678345974.7478015, "TEMPLATE_PATH/models/nlp/plug_mental/backbone.py": 1678345974.7475746, "TEMPLATE_PATH/models/nlp/plug_mental/adv_utils.py": 1678345974.7465599, "TEMPLATE_PATH/models/nlp/plug_mental/text_classification.py": 1678345974.747977, "TEMPLATE_PATH/models/nlp/gpt_moe/configuration.py": 1678345974.7217705, "TEMPLATE_PATH/models/nlp/gpt_moe/backbone.py": 1678345974.7208388, "TEMPLATE_PATH/models/nlp/gpt_moe/tokenizer.py": 1678345974.725533, "TEMPLATE_PATH/models/nlp/gpt_moe/distributed_gpt_moe.py": 1678695526.523395, "TEMPLATE_PATH/models/nlp/gpt_moe/text_generation.py": 1678345974.7252653, "TEMPLATE_PATH/models/nlp/gpt_moe/moe/sharded_moe.py": 1678345974.7245455, "TEMPLATE_PATH/models/nlp/gpt_moe/moe/utils.py": 1678345974.7249217, "TEMPLATE_PATH/models/nlp/gpt_moe/moe/layer.py": 1678345974.7238333, "TEMPLATE_PATH/models/nlp/gpt_moe/moe/experts.py": 1678345974.7235267, "TEMPLATE_PATH/models/nlp/gpt_moe/moe/mappings.py": 1678345974.7241268, "TEMPLATE_PATH/models/nlp/gpt_moe/checkpointing.py": 1678695526.5199594, "TEMPLATE_PATH/models/nlp/csanmt/translation.py": 1678345974.710362, "TEMPLATE_PATH/models/nlp/T5/text2text_generation.py": 1678345974.6919267, "TEMPLATE_PATH/models/nlp/T5/configuration.py": 1678345974.6909628, "TEMPLATE_PATH/models/nlp/T5/backbone.py": 1683889954.5021315, "TEMPLATE_PATH/models/nlp/heads/text_classification_head.py": 1678345974.727904, "TEMPLATE_PATH/models/nlp/heads/infromation_extraction_head.py": 1678345974.7273557, "TEMPLATE_PATH/models/nlp/heads/token_classification_head.py": 1678345974.728869, "TEMPLATE_PATH/models/nlp/heads/text_generation_head.py": 1678345974.7283216, "TEMPLATE_PATH/models/nlp/heads/crf_head.py": 1678695526.5250702, "TEMPLATE_PATH/models/nlp/heads/torch_pretrain_head.py": 1666757257.3713884, "TEMPLATE_PATH/models/nlp/heads/fill_mask_head.py": 1683889954.5144427, "TEMPLATE_PATH/models/nlp/heads/text_ranking_head.py": 1678345974.7285597, "TEMPLATE_PATH/models/nlp/bloom/backbone.py": 1669108798.6061795, "TEMPLATE_PATH/models/nlp/xlm_roberta/configuration.py": 1678345974.7653904, "TEMPLATE_PATH/models/nlp/xlm_roberta/backbone.py": 1678345974.7651584, "TEMPLATE_PATH/models/nlp/peer/configuration.py": 1678695526.529261, "TEMPLATE_PATH/models/nlp/peer/sas_utils.py": 1678695526.5296216, "TEMPLATE_PATH/models/nlp/peer/backbone.py": 1678695526.5284507, "TEMPLATE_PATH/models/nlp/peer/text_classification.py": 1678695526.5302649, "TEMPLATE_PATH/models/nlp/fid_T5/text_generation.py": 1683889954.5068686, "TEMPLATE_PATH/models/nlp/space_T_en/text_to_sql.py": 1666757257.3954694, "TEMPLATE_PATH/models/nlp/canmt/sequence_generator.py": 1683889954.5052524, "TEMPLATE_PATH/models/nlp/canmt/canmt_translation.py": 1683889954.5044076, "TEMPLATE_PATH/models/nlp/canmt/canmt_model.py": 1683889954.5040576, "TEMPLATE_PATH/models/nlp/bart/text_error_correction.py": 1678345974.693962, "TEMPLATE_PATH/models/nlp/use/transformer.py": 1678345974.7618728, "TEMPLATE_PATH/models/nlp/use/user_satisfaction_estimation.py": 1678345974.7620804, "TEMPLATE_PATH/models/nlp/gpt_neo/backbone.py": 1666757257.3668969, "TEMPLATE_PATH/models/nlp/bert/configuration.py": 1678345974.6969304, "TEMPLATE_PATH/models/nlp/bert/siamese_uie.py": 1678695526.5135634, "TEMPLATE_PATH/models/nlp/bert/fill_mask.py": 1678345974.6990001, "TEMPLATE_PATH/models/nlp/bert/word_alignment.py": 1678695526.5139036, "TEMPLATE_PATH/models/nlp/bert/text_ranking.py": 1678345974.703262, "TEMPLATE_PATH/models/nlp/bert/backbone.py": 1678345974.6959348, "TEMPLATE_PATH/models/nlp/bert/text_classification.py": 1678345974.7023563, "TEMPLATE_PATH/models/nlp/bert/sentence_embedding.py": 1678345974.7002544, "TEMPLATE_PATH/models/nlp/bert/document_segmentation.py": 1678345974.6980228, "TEMPLATE_PATH/models/nlp/bert/token_classification.py": 1678345974.7041605, "TEMPLATE_PATH/models/nlp/dgds/backbone.py": 1683889954.5060863, "TEMPLATE_PATH/models/nlp/dgds/document_grounded_dialog_rerank.py": 1678345974.7150524, "TEMPLATE_PATH/models/nlp/dgds/document_grounded_dialog_generate.py": 1678345974.71487, "TEMPLATE_PATH/models/nlp/dgds/document_grounded_dialog_retrieval.py": 1678345974.715238, "TEMPLATE_PATH/models/nlp/gpt3/configuration.py": 1678695526.5156965, "TEMPLATE_PATH/models/nlp/gpt3/backbone.py": 1681714768.9173203, "TEMPLATE_PATH/models/nlp/gpt3/tokenizer.py": 1678695526.5179377, "TEMPLATE_PATH/models/nlp/gpt3/distributed_gpt3.py": 1683889954.51408, "TEMPLATE_PATH/models/nlp/gpt3/text_generation.py": 1681714768.9190643, "TEMPLATE_PATH/models/nlp/deberta_v2/configuration.py": 1678345974.7123609, "TEMPLATE_PATH/models/nlp/deberta_v2/fill_mask.py": 1678345974.7131743, "TEMPLATE_PATH/models/nlp/deberta_v2/backbone.py": 1678345974.7115374, "TEMPLATE_PATH/models/nlp/deberta_v2/tokenization.py": 1666757257.361269, "TEMPLATE_PATH/models/nlp/deberta_v2/tokenization_fast.py": 1678345974.7137625, "TEMPLATE_PATH/models/nlp/codegeex/codegeex_for_code_translation.py": 1678345974.707734, "TEMPLATE_PATH/models/nlp/codegeex/tokenizer.py": 1678345974.7089796, "TEMPLATE_PATH/models/nlp/codegeex/codegeex_for_code_generation.py": 1678345974.7071722, "TEMPLATE_PATH/models/nlp/codegeex/inference.py": 1678345974.7083764, "TEMPLATE_PATH/models/nlp/codegeex/codegeex.py": 1678345974.706545, "TEMPLATE_PATH/models/nlp/space/configuration.py": 1678345974.7504852, "TEMPLATE_PATH/models/nlp/space/dialog_modeling.py": 1678345974.7508473, "TEMPLATE_PATH/models/nlp/space/dialog_state_tracking.py": 1666757257.3844292, "TEMPLATE_PATH/models/nlp/space/model/intent_unified_transformer.py": 1666757257.386785, "TEMPLATE_PATH/models/nlp/space/model/tokenization_space.py": 1678345974.7516365, "TEMPLATE_PATH/models/nlp/space/model/unified_transformer.py": 1678345974.7521238, "TEMPLATE_PATH/models/nlp/space/model/model_base.py": 1678345974.7511904, "TEMPLATE_PATH/models/nlp/space/model/generator.py": 1666757257.3862689, "TEMPLATE_PATH/models/nlp/space/model/gen_unified_transformer.py": 1666757257.3857656, "TEMPLATE_PATH/models/nlp/space/dialog_intent_prediction.py": 1666757257.3833244, "TEMPLATE_PATH/models/nlp/space/modules/transformer_block.py": 1666757257.391351, "TEMPLATE_PATH/models/nlp/space/modules/functions.py": 1666757257.3904216, "TEMPLATE_PATH/models/nlp/space/modules/multihead_attention.py": 1666757257.3908985, "TEMPLATE_PATH/models/nlp/space/modules/feedforward.py": 1666757257.3899465, "TEMPLATE_PATH/models/nlp/space/modules/embedder.py": 1666757257.3894768, "TEMPLATE_PATH/models/nlp/fid_plug/configuration.py": 1683889954.50833, "TEMPLATE_PATH/models/nlp/fid_plug/backbone.py": 1683889954.507869, "TEMPLATE_PATH/models/nlp/fid_plug/text_generation.py": 1683889954.5088115, "TEMPLATE_PATH/models/nlp/gpt2/backbone.py": 1678345974.7169101, "TEMPLATE_PATH/models/nlp/plug/distributed_plug.py": 1678345974.7456992, "TEMPLATE_PATH/models/nlp/plug/configuration.py": 1678345974.7445607, "TEMPLATE_PATH/models/nlp/plug/backbone.py": 1678345974.7441673, "TEMPLATE_PATH/models/nlp/plug/AnnealingLR.py": 1678345974.7434573, "TEMPLATE_PATH/models/nlp/plug/generator.py": 1678345974.7459483, "TEMPLATE_PATH/models/nlp/megatron_bert/configuration.py": 1678345974.7317162, "TEMPLATE_PATH/models/nlp/megatron_bert/fill_mask.py": 1678345974.7319267, "TEMPLATE_PATH/models/nlp/megatron_bert/backbone.py": 1678345974.731479, "TEMPLATE_PATH/models/nlp/space_T_cn/configuration.py": 1666757257.3935158, "TEMPLATE_PATH/models/nlp/space_T_cn/backbone.py": 1678345974.752695, "TEMPLATE_PATH/models/nlp/space_T_cn/table_question_answering.py": 1678345974.7536259, "TEMPLATE_PATH/models/nlp/ponet/configuration.py": 1678345974.7491364, "TEMPLATE_PATH/models/nlp/ponet/fill_mask.py": 1678345974.7497096, "TEMPLATE_PATH/models/nlp/ponet/backbone.py": 1678345974.7488022, "TEMPLATE_PATH/models/nlp/ponet/tokenization.py": 1678345974.7501063, "TEMPLATE_PATH/models/nlp/ponet/document_segmentation.py": 1678345974.749312, "TEMPLATE_PATH/models/nlp/llama/configuration.py": 1683889954.5161562, "TEMPLATE_PATH/models/nlp/llama/convert_llama_weights_to_hf.py": 1683889954.5163944, "TEMPLATE_PATH/models/nlp/llama/backbone.py": 1683889954.5156515, "TEMPLATE_PATH/models/nlp/llama/tokenization.py": 1683889954.517054, "TEMPLATE_PATH/models/nlp/llama/tokenization_fast.py": 1683889954.5174031, "TEMPLATE_PATH/models/nlp/llama/text_generation.py": 1683889954.5166035, "TEMPLATE_PATH/models/nlp/lstm/backbone.py": 1678345974.7302816, "TEMPLATE_PATH/models/nlp/lstm/token_classification.py": 1678345974.7304647, "TEMPLATE_PATH/models/cv/image_deblur/nafnet_for_image_deblur.py": 1678345974.289103, "TEMPLATE_PATH/models/cv/vision_middleware/backbone.py": 1678345974.6052146, "TEMPLATE_PATH/models/cv/vision_middleware/model.py": 1678345974.6064956, "TEMPLATE_PATH/models/cv/vision_middleware/head.py": 1678345974.605873, "TEMPLATE_PATH/models/cv/vision_middleware/vim.py": 1678345974.607082, "TEMPLATE_PATH/models/cv/image_quality_assessment_man/swin.py": 1678695526.3478003, "TEMPLATE_PATH/models/cv/image_quality_assessment_man/maniqa.py": 1678695526.3473833, "TEMPLATE_PATH/models/cv/image_quality_assessment_man/image_quality_assessment_man.py": 1678695526.3470078, "TEMPLATE_PATH/models/cv/product_retrieval_embedding/item_detection.py": 1666757257.2308764, "TEMPLATE_PATH/models/cv/product_retrieval_embedding/item_model.py": 1666757257.231389, "TEMPLATE_PATH/models/cv/product_retrieval_embedding/item_embedding.py": 1666757257.2311432, "TEMPLATE_PATH/models/cv/body_2d_keypoints/w48.py": 1666757257.1529067, "TEMPLATE_PATH/models/cv/body_2d_keypoints/hrnet_v2.py": 1684246001.4672918, "TEMPLATE_PATH/models/cv/body_2d_keypoints/hrnet_basic_modules.py": 1666757257.1524448, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/panovit.py": 1678345974.3350315, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/modality/layout.py": 1678345974.3345408, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/misc/panostretch.py": 1678345974.3337135, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/misc/fourier.py": 1678345974.3334966, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/misc/post_proc.py": 1678345974.3339539, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/panovit.py": 1678345974.3347096, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/utils.py": 1678345974.3348787, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/backbone/vit_horizon_pry_image.py": 1678345974.3330128, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/backbone/resnet_DA.py": 1678345974.332792, "TEMPLATE_PATH/models/cv/salient_detection/salient_model.py": 1678345974.3975854, "TEMPLATE_PATH/models/cv/salient_detection/models/senet.py": 1678345974.3712454, "TEMPLATE_PATH/models/cv/salient_detection/models/utils.py": 1678695526.4316845, "TEMPLATE_PATH/models/cv/salient_detection/models/modules.py": 1678345974.3710551, "TEMPLATE_PATH/models/cv/salient_detection/models/u2net.py": 1666757257.241171, "TEMPLATE_PATH/models/cv/salient_detection/models/backbone/Res2Net_v1b.py": 1678695526.4297223, "TEMPLATE_PATH/models/cv/image_quality_assessment_degradation/degradation_model.py": 1678345974.321454, "TEMPLATE_PATH/models/cv/image_quality_assessment_degradation/image_quality_assessment_degradation.py": 1678345974.3216996, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/losses/model_irse.py": 1666757257.2036955, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/losses/losses.py": 1666757257.203465, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/losses/helpers.py": 1666757257.203164, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/retinaface/detection.py": 1673508904.826248, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/retinaface/models/retinaface.py": 1666757257.2049234, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/retinaface/models/net.py": 1666757257.2047052, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/retinaface/utils.py": 1666757257.2051783, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/gpen.py": 1666757257.2019858, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/image_portrait_enhancement.py": 1678345974.3197925, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/align_faces.py": 1666757257.2006574, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/eqface/fqa.py": 1666757257.201287, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/eqface/model_resnet.py": 1666757257.2015626, "TEMPLATE_PATH/models/cv/abnormal_object_detection/mmdet_ms/roi_head/mask_scoring_roi_head.py": 1678695526.2852845, "TEMPLATE_PATH/models/cv/abnormal_object_detection/mmdet_ms/roi_head/roi_extractors/single_level_roi_extractor.py": 1678695526.2864377, "TEMPLATE_PATH/models/cv/abnormal_object_detection/mmdet_model.py": 1678345974.1796575, "TEMPLATE_PATH/models/cv/image_probing_model/backbone.py": 1678345974.3205512, "TEMPLATE_PATH/models/cv/image_probing_model/model.py": 1678345974.320754, "TEMPLATE_PATH/models/cv/image_probing_model/utils.py": 1678345974.320998, "TEMPLATE_PATH/models/cv/tinynas_classfication/super_res_kxkx.py": 1666757257.268841, "TEMPLATE_PATH/models/cv/tinynas_classfication/super_res_k1kxk1.py": 1666757257.2682607, "TEMPLATE_PATH/models/cv/tinynas_classfication/model_zoo.py": 1666757257.265972, "TEMPLATE_PATH/models/cv/tinynas_classfication/super_blocks.py": 1666757257.267099, "TEMPLATE_PATH/models/cv/tinynas_classfication/basic_blocks.py": 1666757257.2643123, "TEMPLATE_PATH/models/cv/tinynas_classfication/master_net.py": 1666757257.2654593, "TEMPLATE_PATH/models/cv/tinynas_classfication/plain_net_utils.py": 1669108798.596394, "TEMPLATE_PATH/models/cv/tinynas_classfication/super_res_idwexkx.py": 1666757257.267718, "TEMPLATE_PATH/models/cv/tinynas_classfication/global_utils.py": 1666757257.264953, "TEMPLATE_PATH/models/cv/image_to_image_translation/model_translation.py": 1666757257.2173638, "TEMPLATE_PATH/models/cv/image_to_image_translation/models/autoencoder.py": 1666757257.2180924, "TEMPLATE_PATH/models/cv/image_to_image_translation/models/clip.py": 1678695526.3520553, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/metrics.py": 1666757257.2199914, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/diffusion.py": 1678695526.3534672, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/apps.py": 1666757257.2189667, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/svd.py": 1666757257.2207708, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/random_mask.py": 1666757257.2205741, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/degradation.py": 1666757257.2193289, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/random_color.py": 1666757257.2203503, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/utils.py": 1666757257.2211437, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/losses.py": 1666757257.2197845, "TEMPLATE_PATH/models/cv/image_to_image_translation/data/transforms.py": 1666757257.217096, "TEMPLATE_PATH/models/cv/video_human_matting/models/decoder.py": 1678345974.4896257, "TEMPLATE_PATH/models/cv/video_human_matting/models/effv2.py": 1678345974.4909832, "TEMPLATE_PATH/models/cv/video_human_matting/models/lraspp.py": 1678345974.4915452, "TEMPLATE_PATH/models/cv/video_human_matting/models/matting.py": 1678345974.4921389, "TEMPLATE_PATH/models/cv/video_human_matting/models/deep_guided_filter.py": 1678345974.4903216, "TEMPLATE_PATH/models/cv/video_human_matting/model.py": 1678345974.488256, "TEMPLATE_PATH/models/cv/language_guided_video_summarization/transformer/models.py": 1673508904.8344479, "TEMPLATE_PATH/models/cv/language_guided_video_summarization/transformer/modules.py": 1673508904.8346016, "TEMPLATE_PATH/models/cv/language_guided_video_summarization/transformer/sub_layers.py": 1673508904.8347619, "TEMPLATE_PATH/models/cv/language_guided_video_summarization/transformer/layers.py": 1673508904.8342712, "TEMPLATE_PATH/models/cv/language_guided_video_summarization/summarizer.py": 1678345974.3353753, "TEMPLATE_PATH/models/cv/facial_landmark_confidence/flc/facial_landmark_confidence.py": 1678345974.2760296, "TEMPLATE_PATH/models/cv/facial_landmark_confidence/flc/manual_landmark_net.py": 1678345974.276284, "TEMPLATE_PATH/models/cv/image_to_image_generation/models/autoencoder.py": 1666757257.2149377, "TEMPLATE_PATH/models/cv/image_to_image_generation/models/clip.py": 1678695526.3506653, "TEMPLATE_PATH/models/cv/image_to_image_generation/model.py": 1666757257.2143965, "TEMPLATE_PATH/models/cv/image_to_image_generation/ops/diffusion.py": 1666757257.215869, "TEMPLATE_PATH/models/cv/image_to_image_generation/ops/losses.py": 1666757257.2161045, "TEMPLATE_PATH/models/cv/image_to_image_generation/data/transforms.py": 1666757257.2141688, "TEMPLATE_PATH/models/cv/image_body_reshaping/person_info.py": 1666757257.1893692, "TEMPLATE_PATH/models/cv/image_body_reshaping/model.py": 1666757257.1891172, "TEMPLATE_PATH/models/cv/image_body_reshaping/slim_utils.py": 1666757257.1903415, "TEMPLATE_PATH/models/cv/image_body_reshaping/pose_estimator/body.py": 1666757257.1897807, "TEMPLATE_PATH/models/cv/image_body_reshaping/pose_estimator/util.py": 1666757257.1901324, "TEMPLATE_PATH/models/cv/image_body_reshaping/pose_estimator/model.py": 1666757257.1899562, "TEMPLATE_PATH/models/cv/image_body_reshaping/image_body_reshaping.py": 1666757257.188921, "TEMPLATE_PATH/models/cv/image_human_parsing/m2fp_net.py": 1678345974.3071952, "TEMPLATE_PATH/models/cv/image_human_parsing/m2fp/m2fp_decoder.py": 1678345974.3068166, "TEMPLATE_PATH/models/cv/image_human_parsing/m2fp/m2fp_encoder.py": 1678345974.3069928, "TEMPLATE_PATH/models/cv/image_human_parsing/parsing_utils.py": 1678345974.307423, "TEMPLATE_PATH/models/cv/image_human_parsing/backbone/deeplab_resnet.py": 1678345974.3061016, "TEMPLATE_PATH/models/cv/image_skychange/ptsemseg/hrnet_super_and_ocr.py": 1684246001.4751763, "TEMPLATE_PATH/models/cv/image_skychange/ptsemseg/BlockModules.py": 1678345974.3284485, "TEMPLATE_PATH/models/cv/image_skychange/ptsemseg/unet.py": 1678345974.3294759, "TEMPLATE_PATH/models/cv/image_skychange/ptsemseg/hrnet_backnone.py": 1684246001.4747965, "TEMPLATE_PATH/models/cv/image_skychange/skychange.py": 1678345974.330122, "TEMPLATE_PATH/models/cv/image_skychange/preprocessor.py": 1678345974.3279777, "TEMPLATE_PATH/models/cv/image_skychange/skychange_model.py": 1678345974.3304164, "TEMPLATE_PATH/models/cv/video_object_segmentation/aggregate.py": 1678345974.5072932, "TEMPLATE_PATH/models/cv/video_object_segmentation/inference_memory_bank.py": 1678345974.5102427, "TEMPLATE_PATH/models/cv/video_object_segmentation/inference_core.py": 1678345974.5094788, "TEMPLATE_PATH/models/cv/video_object_segmentation/model.py": 1678345974.51162, "TEMPLATE_PATH/models/cv/video_object_segmentation/eval_network.py": 1678345974.5086596, "TEMPLATE_PATH/models/cv/video_object_segmentation/mod_resnet.py": 1678345974.5108964, "TEMPLATE_PATH/models/cv/video_object_segmentation/network.py": 1678345974.5134938, "TEMPLATE_PATH/models/cv/video_object_segmentation/modules.py": 1678345974.5123272, "TEMPLATE_PATH/models/cv/video_object_segmentation/cbam.py": 1678345974.5079415, "TEMPLATE_PATH/models/cv/face_reconstruction/models/nv_diffrast.py": 1681714768.8716514, "TEMPLATE_PATH/models/cv/face_reconstruction/models/renderer.py": 1681714768.8736632, "TEMPLATE_PATH/models/cv/face_reconstruction/models/unet.py": 1681714768.873916, "TEMPLATE_PATH/models/cv/face_reconstruction/models/bfm.py": 1681714768.8695195, "TEMPLATE_PATH/models/cv/face_reconstruction/models/opt.py": 1681714768.8720403, "TEMPLATE_PATH/models/cv/face_reconstruction/models/networks.py": 1678345974.2734904, "TEMPLATE_PATH/models/cv/face_reconstruction/models/de_retouching_module.py": 1681714768.8699348, "TEMPLATE_PATH/models/cv/face_reconstruction/models/losses.py": 1681714768.8712077, "TEMPLATE_PATH/models/cv/face_reconstruction/models/pix2pix/pix2pix_options.py": 1681714768.8733847, "TEMPLATE_PATH/models/cv/face_reconstruction/models/pix2pix/pix2pix_model.py": 1681714768.873153, "TEMPLATE_PATH/models/cv/face_reconstruction/models/pix2pix/networks.py": 1681714768.8728101, "TEMPLATE_PATH/models/cv/face_reconstruction/models/facelandmark/nets/large_eyeball_net.py": 1678345974.2724826, "TEMPLATE_PATH/models/cv/face_reconstruction/models/facelandmark/nets/large_base_lmks_net.py": 1678345974.2721982, "TEMPLATE_PATH/models/cv/face_reconstruction/models/facelandmark/large_base_lmks_infer.py": 1678345974.2711725, "TEMPLATE_PATH/models/cv/face_reconstruction/models/facerecon_model.py": 1681714768.870774, "TEMPLATE_PATH/models/cv/face_reconstruction/utils.py": 1681714768.8743782, "TEMPLATE_PATH/models/cv/facial_expression_recognition/fer/transforms.py": 1666757257.186491, "TEMPLATE_PATH/models/cv/facial_expression_recognition/fer/vgg.py": 1666757257.1866848, "TEMPLATE_PATH/models/cv/facial_expression_recognition/fer/facial_expression_recognition.py": 1673508904.8041663, "TEMPLATE_PATH/models/cv/face_recognition/align_face.py": 1678695526.3292472, "TEMPLATE_PATH/models/cv/face_recognition/torchkit/rts_backbone.py": 1678345974.2696226, "TEMPLATE_PATH/models/cv/face_recognition/torchkit/backbone/facemask_backbone.py": 1678345974.2694073, "TEMPLATE_PATH/models/cv/face_recognition/torchkit/backbone/model_irse.py": 1666757257.1852279, "TEMPLATE_PATH/models/cv/face_recognition/torchkit/backbone/model_resnet.py": 1666757257.1854684, "TEMPLATE_PATH/models/cv/face_recognition/torchkit/backbone/common.py": 1666757257.1850357, "TEMPLATE_PATH/models/cv/face_recognition/torchkit/backbone/arcface_backbone.py": 1678345974.269119, "TEMPLATE_PATH/models/cv/face_generation/stylegan2.py": 1666757257.1819198, "TEMPLATE_PATH/models/cv/face_generation/op/fused_act.py": 1666757257.181432, "TEMPLATE_PATH/models/cv/face_generation/op/upfirdn2d.py": 1666757257.18165, "TEMPLATE_PATH/models/cv/face_generation/op/conv2d_gradfix.py": 1666757257.1812036, "TEMPLATE_PATH/models/cv/shop_segmentation/head_fpn.py": 1666757257.242077, "TEMPLATE_PATH/models/cv/shop_segmentation/models.py": 1666757257.2425845, "TEMPLATE_PATH/models/cv/shop_segmentation/common.py": 1666757257.241814, "TEMPLATE_PATH/models/cv/shop_segmentation/utils.py": 1666757257.2446902, "TEMPLATE_PATH/models/cv/shop_segmentation/shop_seg_base.py": 1666757257.243692, "TEMPLATE_PATH/models/cv/shop_segmentation/neck_fpn.py": 1666757257.2431688, "TEMPLATE_PATH/models/cv/shop_segmentation/shop_seg_model.py": 1666757257.2441843, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino/ms_deform_attn.py": 1678345974.3103385, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino/position_encoding.py": 1678345974.310542, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino/dino_decoder.py": 1678345974.3097205, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino/maskdino_encoder.py": 1678345974.3101413, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino/utils.py": 1678345974.3107386, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino/maskdino_decoder.py": 1678345974.3099248, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino_swin.py": 1678345974.3111138, "TEMPLATE_PATH/models/cv/image_instance_segmentation/datasets/transforms.py": 1666757257.1984863, "TEMPLATE_PATH/models/cv/image_instance_segmentation/fastinst/fastinst_encoder.py": 1684246001.4722662, "TEMPLATE_PATH/models/cv/image_instance_segmentation/fastinst/fastinst_decoder.py": 1684246001.4720163, "TEMPLATE_PATH/models/cv/image_instance_segmentation/cascade_mask_rcnn_swin.py": 1678345974.3089857, "TEMPLATE_PATH/models/cv/image_instance_segmentation/fastinst_model.py": 1684246001.472576, "TEMPLATE_PATH/models/cv/image_instance_segmentation/model.py": 1666757257.198652, "TEMPLATE_PATH/models/cv/image_instance_segmentation/postprocess_utils.py": 1684246001.4729404, "TEMPLATE_PATH/models/cv/image_instance_segmentation/backbones/resnet.py": 1684246001.4712603, "TEMPLATE_PATH/models/cv/image_instance_segmentation/backbones/swin_transformer.py": 1678345974.3086588, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino_model.py": 1678345974.3109038, "TEMPLATE_PATH/models/cv/action_detection/modules/resnet.py": 1678695526.2903874, "TEMPLATE_PATH/models/cv/action_detection/modules/action_detection_pytorch.py": 1678695526.288069, "TEMPLATE_PATH/models/cv/action_detection/action_detection_onnx.py": 1678345974.1868067, "TEMPLATE_PATH/models/cv/vop_retrieval/backbone.py": 1678695526.4892921, "TEMPLATE_PATH/models/cv/vop_retrieval/basic_utils.py": 1678345974.6089652, "TEMPLATE_PATH/models/cv/vop_retrieval/model.py": 1678345974.6095595, "TEMPLATE_PATH/models/cv/vop_retrieval/tokenization_clip.py": 1678695526.4906054, "TEMPLATE_PATH/models/cv/vop_retrieval/model_se.py": 1678695526.489979, "TEMPLATE_PATH/models/cv/video_instance_segmentation/track/kernel_update_head.py": 1681714768.8891828, "TEMPLATE_PATH/models/cv/video_instance_segmentation/track/mask_hungarian_assigner.py": 1681714768.8895793, "TEMPLATE_PATH/models/cv/video_instance_segmentation/video_knet.py": 1681714768.8901427, "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_updator.py": 1681714768.8875823, "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_update_head.py": 1681714768.887322, "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_frame_iter_head.py": 1681714768.8861332, "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_head.py": 1681714768.8865519, "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_iter_head.py": 1681714768.88694, "TEMPLATE_PATH/models/cv/video_instance_segmentation/utils.py": 1681714768.8898368, "TEMPLATE_PATH/models/cv/video_instance_segmentation/neck/msdeformattn_decoder.py": 1681714768.888164, "TEMPLATE_PATH/models/cv/super_resolution/ecb.py": 1678345974.436123, "TEMPLATE_PATH/models/cv/super_resolution/ecbsr_model.py": 1678345974.4364467, "TEMPLATE_PATH/models/cv/super_resolution/rrdbnet_arch.py": 1666757257.2570488, "TEMPLATE_PATH/models/cv/super_resolution/arch_util.py": 1666757257.2563787, "TEMPLATE_PATH/models/cv/ocr_detection/preprocessor.py": 1684246001.5023808, "TEMPLATE_PATH/models/cv/ocr_detection/model.py": 1678695526.3969364, "TEMPLATE_PATH/models/cv/ocr_detection/utils.py": 1678695526.3993652, "TEMPLATE_PATH/models/cv/ocr_detection/modules/dbnet.py": 1678695526.3981876, "TEMPLATE_PATH/models/cv/ocr_detection/modules/seg_detector_loss.py": 1678695526.3986294, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/networks/util.py": 1678345974.3670025, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/networks/mobilenet.py": 1678345974.3609436, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/networks/equi.py": 1678345974.360484, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/networks/resnet.py": 1678345974.3611743, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/networks/unifuse.py": 1678345974.3667643, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/networks/layers.py": 1684246001.5053837, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/unifuse_model.py": 1678345974.3672006, "TEMPLATE_PATH/models/cv/stream_yolo/utils/format.py": 1678345974.4351218, "TEMPLATE_PATH/models/cv/stream_yolo/utils/boxes.py": 1678345974.4347887, "TEMPLATE_PATH/models/cv/stream_yolo/models/tal_head.py": 1678345974.4333599, "TEMPLATE_PATH/models/cv/stream_yolo/models/dfp_pafpn.py": 1678345974.4322102, "TEMPLATE_PATH/models/cv/stream_yolo/models/streamyolo.py": 1678345974.4329953, "TEMPLATE_PATH/models/cv/stream_yolo/models/network_blocks.py": 1678345974.4326873, "TEMPLATE_PATH/models/cv/stream_yolo/models/darknet.py": 1678345974.4318306, "TEMPLATE_PATH/models/cv/stream_yolo/realtime_video_detector.py": 1678345974.433779, "TEMPLATE_PATH/models/cv/stream_yolo/exp/build.py": 1678345974.4007049, "TEMPLATE_PATH/models/cv/stream_yolo/exp/base_exp.py": 1678345974.4003, "TEMPLATE_PATH/models/cv/stream_yolo/exp/default/streamyolo.py": 1678345974.4303985, "TEMPLATE_PATH/models/cv/stream_yolo/exp/yolox_base.py": 1678345974.4308836, "TEMPLATE_PATH/models/cv/stream_yolo/data/data_augment.py": 1678345974.3993874, "TEMPLATE_PATH/models/cv/virual_tryon/sdafnet.py": 1666757257.299963, "TEMPLATE_PATH/models/cv/bad_image_detecting/bad_image_detecting.py": 1678695526.2924836, "TEMPLATE_PATH/models/cv/human_reconstruction/Reconstruction.py": 1681714768.874672, "TEMPLATE_PATH/models/cv/human_reconstruction/models/Surface_head.py": 1681714768.8764422, "TEMPLATE_PATH/models/cv/human_reconstruction/models/Res_backbone.py": 1681714768.87622, "TEMPLATE_PATH/models/cv/human_reconstruction/models/Embedding.py": 1681714768.8756416, "TEMPLATE_PATH/models/cv/human_reconstruction/models/PixToMesh.py": 1681714768.875951, "TEMPLATE_PATH/models/cv/human_reconstruction/models/networks.py": 1681714768.877559, "TEMPLATE_PATH/models/cv/human_reconstruction/models/human_segmenter.py": 1684246001.4695294, "TEMPLATE_PATH/models/cv/human_reconstruction/models/geometry.py": 1681714768.8770833, "TEMPLATE_PATH/models/cv/human_reconstruction/models/detectors.py": 1681714768.876841, "TEMPLATE_PATH/models/cv/human_reconstruction/utils.py": 1684246001.4699862, "TEMPLATE_PATH/models/cv/image_driving_perception/preprocessor.py": 1678695526.3451977, "TEMPLATE_PATH/models/cv/image_driving_perception/utils.py": 1678695526.3456447, "TEMPLATE_PATH/models/cv/image_driving_perception/image_driving_percetion_model.py": 1678695526.3447573, "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/longshortnet.py": 1678695526.4507868, "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/models/longshort_backbone_neck.py": 1678695526.4542763, "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/models/longshort.py": 1678695526.4533079, "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/models/dfp_pafpn_short.py": 1678695526.45279, "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/models/dfp_pafpn_long.py": 1678695526.452267, "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/exp/longshortnet_base.py": 1678695526.4501612, "TEMPLATE_PATH/models/cv/image_paintbyexample/model.py": 1678345974.3189397, "TEMPLATE_PATH/models/cv/image_inpainting/refinement.py": 1666757257.1970024, "TEMPLATE_PATH/models/cv/image_inpainting/model.py": 1666757257.1947935, "TEMPLATE_PATH/models/cv/image_inpainting/default.py": 1666757257.1946204, "TEMPLATE_PATH/models/cv/image_inpainting/modules/ade20k/resnet.py": 1666757257.1956348, "TEMPLATE_PATH/models/cv/image_inpainting/modules/ade20k/base.py": 1666757257.1954472, "TEMPLATE_PATH/models/cv/image_inpainting/modules/adversarial.py": 1666757257.1958177, "TEMPLATE_PATH/models/cv/image_inpainting/modules/perceptual.py": 1666757257.196634, "TEMPLATE_PATH/models/cv/image_inpainting/modules/inception.py": 1666757257.1964645, "TEMPLATE_PATH/models/cv/image_inpainting/modules/ffc.py": 1666757257.1962402, "TEMPLATE_PATH/models/cv/image_inpainting/modules/pix2pixhd.py": 1666757257.1967993, "TEMPLATE_PATH/models/cv/image_inpainting/modules/feature_matching.py": 1666757257.196007, "TEMPLATE_PATH/models/cv/image_inpainting/base.py": 1666757257.1944175, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py": 1678695526.3792994, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/util.py": 1678695526.381083, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/match_costs/match_cost.py": 1678695526.3804727, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py": 1678695526.37819, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/nuscenes_dataset.py": 1678695526.3820806, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/loading.py": 1678695526.383117, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/transform_3d.py": 1678695526.3837686, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/petrv2_dednhead.py": 1678695526.3906348, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/depth_net.py": 1678695526.3890011, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/necks/cp_fpn.py": 1678695526.3925595, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py": 1678695526.393985, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/positional_encoding.py": 1678695526.3945107, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/backbones/vovnet.py": 1678695526.3856297, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/detectors/petr3d.py": 1678695526.3916428, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/result_vis.py": 1684246001.4759786, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/depe_detect.py": 1678345974.347357, "TEMPLATE_PATH/models/cv/image_quality_assessment_mos/image_quality_assessment_mos.py": 1678345974.3244548, "TEMPLATE_PATH/models/cv/image_quality_assessment_mos/heads/simple_head.py": 1678345974.3242753, "TEMPLATE_PATH/models/cv/image_quality_assessment_mos/backbones/resnet.py": 1678345974.3235202, "TEMPLATE_PATH/models/cv/image_quality_assessment_mos/censeo_ivqa_model.py": 1678345974.3237197, "TEMPLATE_PATH/models/cv/image_debanding/rrdb/rrdb_image_debanding.py": 1678345974.2884033, "TEMPLATE_PATH/models/cv/image_restoration/demoire_models/nets.py": 1678345974.3251178, "TEMPLATE_PATH/models/cv/image_restoration/image_restoration_model.py": 1678345974.325296, "TEMPLATE_PATH/models/cv/cartoon/model_tf.py": 1678695526.2941835, "TEMPLATE_PATH/models/cv/cartoon/facelib/facer.py": 1683889954.472153, "TEMPLATE_PATH/models/cv/cartoon/facelib/config.py": 1666757257.1560297, "TEMPLATE_PATH/models/cv/cartoon/facelib/LK/lk.py": 1666757257.1556334, "TEMPLATE_PATH/models/cv/cartoon/facelib/face_detector.py": 1666757257.1562476, "TEMPLATE_PATH/models/cv/cartoon/facelib/face_landmark.py": 1684246001.4677038, "TEMPLATE_PATH/models/cv/cartoon/loss.py": 1678695526.2937913, "TEMPLATE_PATH/models/cv/cartoon/utils.py": 1678695526.295007, "TEMPLATE_PATH/models/cv/cartoon/mtcnn_pytorch/src/align_trans.py": 1666757257.1581075, "TEMPLATE_PATH/models/cv/cartoon/mtcnn_pytorch/src/matlab_cp2tform.py": 1666757257.158323, "TEMPLATE_PATH/models/cv/cartoon/network.py": 1678695526.2945373, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/vision_efficient_tuning.py": 1678695526.463801, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/timm_vision_transformer.py": 1678345974.6026883, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/backbone.py": 1678695526.4603705, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/timm_weight_init.py": 1678345974.6033437, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/timm_helpers.py": 1678345974.601856, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/petl.py": 1678695526.4632218, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/model.py": 1678695526.4610043, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/head.py": 1678345974.5989482, "TEMPLATE_PATH/models/cv/movie_scene_segmentation/utils/save_op.py": 1684121077.52684, "TEMPLATE_PATH/models/cv/movie_scene_segmentation/utils/shot_encoder.py": 1666757257.2231948, "TEMPLATE_PATH/models/cv/movie_scene_segmentation/utils/trn.py": 1666757257.2234836, "TEMPLATE_PATH/models/cv/movie_scene_segmentation/utils/head.py": 1678695526.3568585, "TEMPLATE_PATH/models/cv/movie_scene_segmentation/model.py": 1684121077.525873, "TEMPLATE_PATH/models/cv/movie_scene_segmentation/get_model.py": 1666757257.2217636, "TEMPLATE_PATH/models/cv/video_summarization/pgl_sum.py": 1666757257.297918, "TEMPLATE_PATH/models/cv/video_summarization/base_model.py": 1666757257.2957783, "TEMPLATE_PATH/models/cv/video_summarization/summarizer.py": 1678345974.5919068, "TEMPLATE_PATH/models/cv/video_summarization/kts/cpd_auto.py": 1666757257.2969224, "TEMPLATE_PATH/models/cv/video_summarization/kts/cpd_nonlin.py": 1666757257.2974133, "TEMPLATE_PATH/models/cv/table_recognition/lineless_table_process.py": 1678695526.4324372, "TEMPLATE_PATH/models/cv/table_recognition/model_lore.py": 1678695526.4331207, "TEMPLATE_PATH/models/cv/table_recognition/modules/lore_processor.py": 1678695526.434709, "TEMPLATE_PATH/models/cv/table_recognition/modules/lore_detector.py": 1678695526.4340818, "TEMPLATE_PATH/models/cv/image_matching/quadtree_attention_model.py": 1678345974.3155432, "TEMPLATE_PATH/models/cv/image_matching/config/default.py": 1678345974.3125448, "TEMPLATE_PATH/models/cv/image_matching/utils/misc.py": 1678345974.315888, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/loftr.py": 1678345974.3134868, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/utils/position_encoding.py": 1678345974.3152256, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/utils/coarse_matching.py": 1678345974.3148923, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/utils/fine_matching.py": 1678345974.3150685, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/loftr_module/quadtree_attention.py": 1678345974.3143134, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/loftr_module/fine_preprocess.py": 1678345974.3139389, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/loftr_module/transformer.py": 1678345974.3145041, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/loftr_module/linear_attention.py": 1678345974.3141232, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/backbone/resnet_fpn.py": 1678345974.3133032, "TEMPLATE_PATH/models/cv/tinynas_detection/detector.py": 1678695526.4374578, "TEMPLATE_PATH/models/cv/tinynas_detection/tinynas_detector.py": 1678345974.460455, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/apis/detector_evaluater.py": 1681714768.8838654, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/apis/detector_inference.py": 1681714768.8841915, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/structures/boxlist_ops.py": 1678345974.4569457, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/structures/bounding_box.py": 1678345974.4566479, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/structures/image_list.py": 1678345974.4573236, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/utils/model_utils.py": 1678345974.4585514, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/utils/boxes.py": 1678345974.4581728, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/utils/scheduler.py": 1678345974.4589145, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/augmentations/box_level_augs/box_level_augs.py": 1678345974.4441965, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/augmentations/box_level_augs/gaussian_maps.py": 1678345974.4447448, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/augmentations/box_level_augs/color_augs.py": 1678345974.4444985, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/augmentations/box_level_augs/geometric_augs.py": 1683889954.4839153, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/augmentations/scale_aware_aug.py": 1678345974.4453552, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/detectors/detector.py": 1678345974.4558744, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/losses/distill_loss.py": 1678345974.4532282, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/losses/gfocal_loss.py": 1678345974.4535718, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/ota_assigner.py": 1678345974.4496946, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/base_ops.py": 1678345974.4481623, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/repvgg_block.py": 1678345974.4501693, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/neck_ops.py": 1678345974.4485013, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/ops.py": 1678345974.4491763, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/utils.py": 1678345974.450523, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/weight_init.py": 1678345974.4508731, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/necks/giraffe_config.py": 1678345974.4543374, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/necks/giraffe_fpn.py": 1678345974.4547024, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/necks/giraffe_fpn_btn.py": 1678345974.4552062, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/heads/gfocal_v2_tiny.py": 1678345974.4517708, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/heads/zero_head.py": 1678345974.45238, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/backbones/tinynas_csp.py": 1678345974.4469912, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/backbones/tinynas_res.py": 1678345974.447312, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/backbones/darknet.py": 1678345974.4466953, "TEMPLATE_PATH/models/cv/tinynas_detection/utils.py": 1678345974.4609265, "TEMPLATE_PATH/models/cv/tinynas_detection/tinynas_damoyolo.py": 1678345974.4599845, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/utils/visualization.py": 1678345974.5058522, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/utils/utils.py": 1678345974.5049293, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/utils/kalman_filter.py": 1678345974.5041819, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/utils/image.py": 1678345974.503496, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/tracker/matching.py": 1684246001.5073156, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/tracker/multitracker.py": 1684246001.507944, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/tracker/basetrack.py": 1678345974.501055, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/models/model.py": 1678345974.4990714, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/models/common.py": 1678345974.497754, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/models/yolo.py": 1678345974.4996593, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/models/decode.py": 1678345974.4984286, "TEMPLATE_PATH/models/cv/nerf_recon_acc/nerf_recon_acc.py": 1678695526.3603125, "TEMPLATE_PATH/models/cv/nerf_recon_acc/network/nerf.py": 1678345974.340633, "TEMPLATE_PATH/models/cv/nerf_recon_acc/network/utils.py": 1678345974.340971, "TEMPLATE_PATH/models/cv/nerf_recon_acc/network/segmenter.py": 1684246001.4756348, "TEMPLATE_PATH/models/cv/nerf_recon_acc/nerf_preprocess.py": 1678695526.359052, "TEMPLATE_PATH/models/cv/nerf_recon_acc/dataloader/nerf_dataset.py": 1678345974.3388634, "TEMPLATE_PATH/models/cv/nerf_recon_acc/dataloader/read_write_model.py": 1678345974.3391125, "TEMPLATE_PATH/models/cv/video_deinterlace/UNet_for_video_deinterlace.py": 1678345974.461295, "TEMPLATE_PATH/models/cv/video_deinterlace/deinterlace_arch.py": 1678345974.4619946, "TEMPLATE_PATH/models/cv/video_deinterlace/models/deep_fourier_upsampling.py": 1678345974.4627986, "TEMPLATE_PATH/models/cv/video_deinterlace/models/fre.py": 1678345974.4634838, "TEMPLATE_PATH/models/cv/video_deinterlace/models/utils.py": 1678345974.464179, "TEMPLATE_PATH/models/cv/video_deinterlace/models/archs.py": 1678345974.4625406, "TEMPLATE_PATH/models/cv/video_deinterlace/models/enh.py": 1678345974.4631467, "TEMPLATE_PATH/models/cv/cmdssl_video_embedding/resnet3d.py": 1666757257.1593952, "TEMPLATE_PATH/models/cv/cmdssl_video_embedding/resnet2p1d.py": 1666757257.1591942, "TEMPLATE_PATH/models/cv/cmdssl_video_embedding/c3d.py": 1666757257.1590006, "TEMPLATE_PATH/models/cv/image_depth_estimation_bts/depth_estimation_bts_model.py": 1678695526.3416724, "TEMPLATE_PATH/models/cv/image_depth_estimation_bts/networks/decoder.py": 1678695526.3428533, "TEMPLATE_PATH/models/cv/image_depth_estimation_bts/networks/bts_model.py": 1678695526.3423235, "TEMPLATE_PATH/models/cv/image_depth_estimation_bts/networks/encoder.py": 1678695526.3433921, "TEMPLATE_PATH/models/cv/image_depth_estimation_bts/networks/utils.py": 1678695526.343739, "TEMPLATE_PATH/models/cv/motion_generation/model.py": 1678345974.3359873, "TEMPLATE_PATH/models/cv/motion_generation/modules/rotation2xyz.py": 1678345974.3373065, "TEMPLATE_PATH/models/cv/motion_generation/modules/respace.py": 1678345974.3370926, "TEMPLATE_PATH/models/cv/motion_generation/modules/smpl.py": 1678345974.3374798, "TEMPLATE_PATH/models/cv/motion_generation/modules/mdm.py": 1678345974.33691, "TEMPLATE_PATH/models/cv/motion_generation/modules/gaussian_diffusion.py": 1678345974.3366945, "TEMPLATE_PATH/models/cv/motion_generation/modules/cfg_sampler.py": 1678345974.3364377, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/utils/requirements_check.py": 1678345974.2937963, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/utils/voc_register.py": 1684246001.4703872, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/utils/configuration_mapper.py": 1678345974.2932599, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/utils/model_surgery_op.py": 1678345974.293452, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/utils/coco_register.py": 1678345974.2930408, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/utils/register_data.py": 1678345974.293619, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/models/fast_rcnn.py": 1678345974.2918143, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/models/defrcn.py": 1678345974.2915351, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/models/resnet.py": 1678345974.2922988, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/models/calibration_layer.py": 1678345974.2913256, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/models/gdl.py": 1678345974.2920313, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/models/roi_heads.py": 1678345974.2925265, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/defrcn_for_fewshot.py": 1678345974.289662, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/evaluation/coco_evaluation.py": 1678345974.2904465, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/evaluation/pascal_voc_evaluation.py": 1678345974.2908285, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/evaluation/evaluator.py": 1678345974.2906368, "TEMPLATE_PATH/models/cv/ocr_recognition/preprocessor.py": 1681714768.8827155, "TEMPLATE_PATH/models/cv/ocr_recognition/model.py": 1684246001.5035024, "TEMPLATE_PATH/models/cv/ocr_recognition/modules/convnextvit.py": 1681714768.881167, "TEMPLATE_PATH/models/cv/ocr_recognition/modules/crnn.py": 1681714768.8814888, "TEMPLATE_PATH/models/cv/ocr_recognition/modules/vitstr.py": 1681714768.8823054, "TEMPLATE_PATH/models/cv/ocr_recognition/modules/timm_tinyc.py": 1678345974.3579545, "TEMPLATE_PATH/models/cv/ocr_recognition/modules/convnext.py": 1678345974.3574538, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/track/quasi_dense_embed_tracker.py": 1678345974.5580919, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/mask.py": 1678345974.5271971, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_updator.py": 1678345974.5264003, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_update_head.py": 1678345974.5256743, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/semantic_fpn_wrapper.py": 1681714768.8905349, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_head.py": 1678345974.519709, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_iter_head.py": 1678345974.5233328, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/track_heads.py": 1678345974.5286357, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/neck/fpn.py": 1678345974.5311077, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/video_k_net.py": 1678345974.5597517, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/backbone/swin_checkpoint.py": 1678345974.516286, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/backbone/swin_transformer.py": 1678345974.517262, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/visualizer.py": 1678345974.5604084, "TEMPLATE_PATH/models/cv/open_vocabulary_detection_vild/vild.py": 1684246001.5045104, "TEMPLATE_PATH/models/cv/image_reid_person/pass_model.py": 1666757257.2059593, "TEMPLATE_PATH/models/cv/image_reid_person/transreid_model.py": 1666757257.2062182, "TEMPLATE_PATH/models/cv/image_face_fusion/facelib/align_trans.py": 1678345974.3027532, "TEMPLATE_PATH/models/cv/image_face_fusion/facelib/matlab_cp2tform.py": 1678345974.3029947, "TEMPLATE_PATH/models/cv/image_face_fusion/network/aad_layer.py": 1678345974.3037808, "TEMPLATE_PATH/models/cv/image_face_fusion/network/dense_motion.py": 1678345974.3045554, "TEMPLATE_PATH/models/cv/image_face_fusion/network/model_irse.py": 1678345974.3051307, "TEMPLATE_PATH/models/cv/image_face_fusion/network/bfm.py": 1678345974.3042998, "TEMPLATE_PATH/models/cv/image_face_fusion/network/ops.py": 1678345974.3053207, "TEMPLATE_PATH/models/cv/image_face_fusion/network/aei_flow_net.py": 1678345974.3040216, "TEMPLATE_PATH/models/cv/image_face_fusion/network/facerecon_model.py": 1678345974.3048775, "TEMPLATE_PATH/models/cv/image_face_fusion/image_face_fusion.py": 1678345974.3033106, "TEMPLATE_PATH/models/cv/image_face_fusion/facegan/gan_wrap.py": 1678345974.3008904, "TEMPLATE_PATH/models/cv/image_face_fusion/facegan/op/fused_act.py": 1678345974.3021884, "TEMPLATE_PATH/models/cv/image_face_fusion/facegan/op/upfirdn2d.py": 1678345974.3023663, "TEMPLATE_PATH/models/cv/image_face_fusion/facegan/op/conv2d_gradfix.py": 1678345974.3019848, "TEMPLATE_PATH/models/cv/image_face_fusion/facegan/model.py": 1678345974.3014028, "TEMPLATE_PATH/models/cv/product_segmentation/net.py": 1678695526.4043183, "TEMPLATE_PATH/models/cv/product_segmentation/seg_infer.py": 1666778289.670906, "TEMPLATE_PATH/models/cv/controllable_image_generation/controlnet.py": 1678695526.3069751, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/openpose/body.py": 1678695526.3047397, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/openpose/util.py": 1678695526.3063028, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/openpose/model.py": 1678695526.3059084, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/openpose/hand.py": 1678695526.305337, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/annotator.py": 1678695526.296671, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/api.py": 1678695526.2974072, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/midas_net_custom.py": 1678695526.299506, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/transforms.py": 1678695526.2998872, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/midas_net.py": 1678695526.2992017, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/dpt_depth.py": 1678695526.298864, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/base_model.py": 1678695526.2981143, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/vit.py": 1678695526.300227, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/blocks.py": 1678695526.298546, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/utils.py": 1678695526.3005583, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/mlsd/utils.py": 1678695526.3033025, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/mlsd/mbv2_mlsd_large.py": 1678695526.3022327, "TEMPLATE_PATH/models/cv/video_inpainting/inpainting.py": 1678695526.438486, "TEMPLATE_PATH/models/cv/video_inpainting/inpainting_model.py": 1678695526.438962, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/casmvs_model.py": 1678345974.3164253, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/colmap2mvsnet.py": 1684246001.4733398, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/cas_mvsnet.py": 1678345974.3162477, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/utils.py": 1678345974.317991, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/depth_filter.py": 1684246001.4736886, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/module.py": 1678345974.31774, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/general_eval_dataset.py": 1678345974.3174586, "TEMPLATE_PATH/models/cv/image_binary_quant_classification/binary_quant_model.py": 1678345974.2778409, "TEMPLATE_PATH/models/cv/image_binary_quant_classification/bnext.py": 1678345974.2784865, "TEMPLATE_PATH/models/cv/skin_retouching/detection_model/detection_unet_in.py": 1666757257.24693, "TEMPLATE_PATH/models/cv/skin_retouching/detection_model/detection_module.py": 1666757257.2464738, "TEMPLATE_PATH/models/cv/skin_retouching/retinaface/net.py": 1666757257.2504349, "TEMPLATE_PATH/models/cv/skin_retouching/retinaface/prior_box.py": 1666757257.2523744, "TEMPLATE_PATH/models/cv/skin_retouching/retinaface/box_utils.py": 1666757257.249882, "TEMPLATE_PATH/models/cv/skin_retouching/retinaface/utils.py": 1666757257.2532027, "TEMPLATE_PATH/models/cv/skin_retouching/retinaface/network.py": 1666757257.2511058, "TEMPLATE_PATH/models/cv/skin_retouching/retinaface/predict_single.py": 1666757257.251693, "TEMPLATE_PATH/models/cv/skin_retouching/unet_deploy.py": 1666757257.2537475, "TEMPLATE_PATH/models/cv/skin_retouching/weights_init.py": 1666757257.2549121, "TEMPLATE_PATH/models/cv/skin_retouching/utils.py": 1666757257.2543528, "TEMPLATE_PATH/models/cv/skin_retouching/inpainting_model/gconv.py": 1666757257.2480178, "TEMPLATE_PATH/models/cv/skin_retouching/inpainting_model/inpainting_unet.py": 1666757257.248478, "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/directed_graph.py": 1683889954.471591, "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/hdformer_detector.py": 1678345974.191, "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/skeleton.py": 1678345974.1912234, "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/backbone.py": 1678345974.190157, "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/hdformer.py": 1678345974.19077, "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/block.py": 1678345974.1903841, "TEMPLATE_PATH/models/cv/body_3d_keypoints/cannonical_pose/canonical_pose_modules.py": 1678345974.189541, "TEMPLATE_PATH/models/cv/body_3d_keypoints/cannonical_pose/body_3d_pose.py": 1683889954.4713006, "TEMPLATE_PATH/models/cv/action_recognition/models.py": 1666757257.1497922, "TEMPLATE_PATH/models/cv/action_recognition/s3dg.py": 1666757257.1501145, "TEMPLATE_PATH/models/cv/action_recognition/tada_convnext.py": 1666757257.1504557, "TEMPLATE_PATH/models/cv/action_recognition/temporal_patch_shift_transformer.py": 1683889954.4706383, "TEMPLATE_PATH/models/cv/video_frame_interpolation/interp_model/flow_reversal.py": 1678345974.4836097, "TEMPLATE_PATH/models/cv/video_frame_interpolation/interp_model/UNet.py": 1678345974.4825158, "TEMPLATE_PATH/models/cv/video_frame_interpolation/interp_model/IFNet_swin.py": 1678345974.481858, "TEMPLATE_PATH/models/cv/video_frame_interpolation/interp_model/refinenet_arch.py": 1678345974.4842384, "TEMPLATE_PATH/models/cv/video_frame_interpolation/interp_model/transformer_layers.py": 1678345974.4849417, "TEMPLATE_PATH/models/cv/video_frame_interpolation/utils/utils.py": 1678345974.4869297, "TEMPLATE_PATH/models/cv/video_frame_interpolation/utils/scene_change_detection.py": 1684246001.5067093, "TEMPLATE_PATH/models/cv/video_frame_interpolation/VFINet_for_video_frame_interpolation.py": 1678345974.4789994, "TEMPLATE_PATH/models/cv/video_frame_interpolation/VFINet_arch.py": 1678345974.4787207, "TEMPLATE_PATH/models/cv/video_frame_interpolation/flow_model/update.py": 1678345974.4809961, "TEMPLATE_PATH/models/cv/video_frame_interpolation/flow_model/corr.py": 1678345974.4801412, "TEMPLATE_PATH/models/cv/video_frame_interpolation/flow_model/extractor.py": 1678345974.480411, "TEMPLATE_PATH/models/cv/video_frame_interpolation/flow_model/raft.py": 1678345974.4806812, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/dense_heads/rpn_head.py": 1678695526.3648705, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/dense_heads/anchor_head.py": 1678695526.3625498, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/necks/fpn.py": 1678695526.3662295, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/utils/convModule_norm.py": 1678695526.3763406, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/utils/checkpoint.py": 1678695526.375762, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/backbones/vit.py": 1666757257.2256925, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py": 1678695526.3729281, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/fcn_mask_head.py": 1678695526.3743196, "TEMPLATE_PATH/models/cv/object_detection/mmdet_model.py": 1666757257.2243414, "TEMPLATE_PATH/models/cv/pedestrian_attribute_recognition/model.py": 1683889954.4834628, "TEMPLATE_PATH/models/cv/pointcloud_sceneflow_estimation/sf_rcp.py": 1678345974.3682785, "TEMPLATE_PATH/models/cv/pointcloud_sceneflow_estimation/rcp_model.py": 1678345974.3680415, "TEMPLATE_PATH/models/cv/pointcloud_sceneflow_estimation/common.py": 1678345974.367603, "TEMPLATE_PATH/models/cv/pointcloud_sceneflow_estimation/pointnet2_utils.py": 1678345974.36784, "TEMPLATE_PATH/models/cv/animal_recognition/splat.py": 1666757257.151845, "TEMPLATE_PATH/models/cv/animal_recognition/resnet.py": 1666757257.1516247, "TEMPLATE_PATH/models/cv/video_stabilization/utils/image_utils.py": 1678345974.5841804, "TEMPLATE_PATH/models/cv/video_stabilization/utils/RAFTUtils.py": 1678345974.5826185, "TEMPLATE_PATH/models/cv/video_stabilization/utils/math_utils.py": 1678345974.5846765, "TEMPLATE_PATH/models/cv/video_stabilization/utils/ProjectionUtils.py": 1678345974.5819445, "TEMPLATE_PATH/models/cv/video_stabilization/utils/WarpUtils.py": 1678345974.5831873, "TEMPLATE_PATH/models/cv/video_stabilization/utils/MedianFilter.py": 1678345974.5813267, "TEMPLATE_PATH/models/cv/video_stabilization/utils/IterativeSmooth.py": 1678345974.5807827, "TEMPLATE_PATH/models/cv/video_stabilization/DUTRAFTStabilizer.py": 1678345974.5794287, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/config.py": 1678345974.5730486, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/rf_det_so.py": 1678345974.5783482, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/Smoother.py": 1678345974.572002, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/DUT_raft.py": 1678345974.5681, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/MotionPro.py": 1678345974.568633, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/RAFT/update.py": 1678345974.5714862, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/RAFT/corr.py": 1678345974.5699692, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/RAFT/extractor.py": 1678345974.5704808, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/RAFT/raft.py": 1678345974.5709665, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/rf_det_module.py": 1678345974.573552, "TEMPLATE_PATH/models/cv/video_depth_estimation/dro_model.py": 1678345974.4664078, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/misc.py": 1678345974.4781265, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/config.py": 1678345974.4763255, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/horovod.py": 1678345974.4769518, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/image_gt.py": 1678345974.477618, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/types.py": 1678345974.4784274, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/depth.py": 1684246001.5061839, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/load.py": 1678345974.4778461, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/image.py": 1678345974.4773502, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/augmentations.py": 1678345974.4760456, "TEMPLATE_PATH/models/cv/video_depth_estimation/models/model_utils.py": 1678345974.4694006, "TEMPLATE_PATH/models/cv/video_depth_estimation/models/sfm_model_mf.py": 1678345974.4700265, "TEMPLATE_PATH/models/cv/video_depth_estimation/models/model_checkpoint.py": 1678345974.4691477, "TEMPLATE_PATH/models/cv/video_depth_estimation/models/model_wrapper.py": 1678345974.469756, "TEMPLATE_PATH/models/cv/video_depth_estimation/models/sup_model_mf.py": 1678345974.4702911, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/layers/resnet/pose_decoder.py": 1678345974.4731765, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/layers/resnet/resnet_encoder.py": 1678345974.473565, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/layers/resnet/layers.py": 1678345974.4729247, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/layers/resnet/depth_decoder.py": 1678345974.4725597, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/optim/update.py": 1678345974.4748883, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/optim/extractor.py": 1678345974.4745526, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/depth_pose/depth_pose_net.py": 1678345974.4713385, "TEMPLATE_PATH/models/cv/video_depth_estimation/configs/default_config.py": 1678345974.4657435, "TEMPLATE_PATH/models/cv/video_depth_estimation/geometry/pose_utils.py": 1678345974.4684362, "TEMPLATE_PATH/models/cv/video_depth_estimation/geometry/camera_utils.py": 1678345974.467637, "TEMPLATE_PATH/models/cv/video_depth_estimation/geometry/camera.py": 1678345974.4673057, "TEMPLATE_PATH/models/cv/video_depth_estimation/geometry/pose.py": 1678345974.4680007, "TEMPLATE_PATH/models/cv/vidt/backbone.py": 1681714768.8921459, "TEMPLATE_PATH/models/cv/vidt/model.py": 1681714768.8937347, "TEMPLATE_PATH/models/cv/vidt/head.py": 1681714768.8931575, "TEMPLATE_PATH/models/cv/vidt/fpn_fusion.py": 1681714768.8928485, "TEMPLATE_PATH/models/cv/vidt/deformable_transformer.py": 1681714768.8925443, "TEMPLATE_PATH/models/cv/face_human_hand_detection/shufflenetv2.py": 1678695526.3277714, "TEMPLATE_PATH/models/cv/face_human_hand_detection/one_stage_detector.py": 1678695526.3271508, "TEMPLATE_PATH/models/cv/face_human_hand_detection/nanodet_plus_head.py": 1678695526.326374, "TEMPLATE_PATH/models/cv/face_human_hand_detection/det_infer.py": 1666778289.6696548, "TEMPLATE_PATH/models/cv/face_human_hand_detection/ghost_pan.py": 1678695526.3257587, "TEMPLATE_PATH/models/cv/face_human_hand_detection/utils.py": 1678695526.328504, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/misc.py": 1666757257.2392309, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/mttr.py": 1673508904.8399704, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/multimodal_transformer.py": 1673508904.8402708, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/matcher.py": 1669108798.5943944, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/backbone.py": 1666757257.2390404, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/position_encoding_2d.py": 1666757257.239805, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/postprocessing.py": 1666757257.239986, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/criterion.py": 1669108798.5941, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/swin_transformer.py": 1669108798.5960565, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/segmentation.py": 1666757257.240216, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/model.py": 1673508904.8396866, "TEMPLATE_PATH/models/cv/hand_static/networks.py": 1678695526.3304467, "TEMPLATE_PATH/models/cv/hand_static/hand_model.py": 1666778289.6702523, "TEMPLATE_PATH/models/cv/image_depth_estimation/newcrfs_model.py": 1678345974.297565, "TEMPLATE_PATH/models/cv/image_depth_estimation/networks/uper_crf_head.py": 1678345974.2973852, "TEMPLATE_PATH/models/cv/image_depth_estimation/networks/newcrf_layers.py": 1678345974.2962215, "TEMPLATE_PATH/models/cv/image_depth_estimation/networks/newcrf_depth.py": 1678345974.2958264, "TEMPLATE_PATH/models/cv/image_depth_estimation/networks/newcrf_utils.py": 1678345974.2965019, "TEMPLATE_PATH/models/cv/image_depth_estimation/networks/swin_transformer.py": 1678345974.2970595, "TEMPLATE_PATH/models/cv/image_colorization/unet/unet.py": 1678345974.287222, "TEMPLATE_PATH/models/cv/image_colorization/unet/utils.py": 1678345974.287506, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/ddcolor_for_image_colorization.py": 1681714768.8788333, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/ddcolor.py": 1678345974.284877, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/utils/vgg.py": 1681714768.8797908, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/utils/unet.py": 1678345974.2865462, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/utils/transformer_utils.py": 1678345974.2863536, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/utils/position_encoding.py": 1678345974.2861621, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/utils/convnext.py": 1678345974.285967, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/loss.py": 1681714768.879511, "TEMPLATE_PATH/models/cv/face_detection/retinaface/detection.py": 1673508904.7995956, "TEMPLATE_PATH/models/cv/face_detection/retinaface/models/retinaface.py": 1666757257.1662319, "TEMPLATE_PATH/models/cv/face_detection/retinaface/models/net.py": 1666757257.1660082, "TEMPLATE_PATH/models/cv/face_detection/retinaface/utils.py": 1666757257.166439, "TEMPLATE_PATH/models/cv/face_detection/mtcnn/models/detector.py": 1673508904.7983325, "TEMPLATE_PATH/models/cv/face_detection/mtcnn/models/get_nets.py": 1666757257.1649437, "TEMPLATE_PATH/models/cv/face_detection/mtcnn/models/box_utils.py": 1666757257.1642718, "TEMPLATE_PATH/models/cv/face_detection/mtcnn/models/first_stage.py": 1666757257.1647036, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/detection.py": 1673508904.8020747, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/ssd/predictor.py": 1678345974.263985, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/ssd/data_preprocessing.py": 1666757257.1760805, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/ssd/ssd.py": 1666757257.1772814, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/ssd/mb_tiny_fd.py": 1666757257.1766155, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/ssd/fd_config.py": 1666757257.1763618, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/transforms.py": 1666757257.1775296, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/box_utils.py": 1666757257.1750665, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/mb_tiny.py": 1666757257.1753407, "TEMPLATE_PATH/models/cv/face_detection/peppa_pig_face/facer.py": 1678345974.2030537, "TEMPLATE_PATH/models/cv/face_detection/peppa_pig_face/LK/lk.py": 1678345974.202424, "TEMPLATE_PATH/models/cv/face_detection/peppa_pig_face/face_detector.py": 1678345974.2027018, "TEMPLATE_PATH/models/cv/face_detection/peppa_pig_face/face_landmark.py": 1684246001.4691453, "TEMPLATE_PATH/models/cv/face_detection/scrfd/scrfd_detect.py": 1678345974.262096, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/core/post_processing/bbox_nms.py": 1678695526.308986, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/core/bbox/transforms.py": 1678695526.3082, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/retinaface.py": 1678695526.3129826, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/auto_augment.py": 1678695526.3106256, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py": 1678695526.312393, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/loading.py": 1678695526.3117406, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/formating.py": 1678695526.311245, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/scrfd_head.py": 1678695526.316167, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/master_net.py": 1683889954.473481, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/mobilenet.py": 1678695526.3142238, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/resnet.py": 1678695526.3147054, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/scrfd.py": 1678695526.31966, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/single_stage.py": 1678695526.3215094, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/base.py": 1678695526.3180547, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/tinymog.py": 1678695526.3232183, "TEMPLATE_PATH/models/cv/face_detection/scrfd/tinymog_detect.py": 1678345974.263417, "TEMPLATE_PATH/models/cv/face_detection/scrfd/preprocessor.py": 1678345974.2381473, "TEMPLATE_PATH/models/cv/face_detection/scrfd/damofd_detect.py": 1683889954.4730425, "TEMPLATE_PATH/models/cv/face_detection/mogface/models/mogprednet.py": 1666757257.1628885, "TEMPLATE_PATH/models/cv/face_detection/mogface/models/resnet.py": 1666757257.1630945, "TEMPLATE_PATH/models/cv/face_detection/mogface/models/utils.py": 1666757257.1633208, "TEMPLATE_PATH/models/cv/face_detection/mogface/models/detectors.py": 1673508904.7980537, "TEMPLATE_PATH/models/cv/face_detection/mogface/models/mogface.py": 1666757257.162678, "TEMPLATE_PATH/models/cv/robust_image_classification/easyrobust_model.py": 1678345974.369159, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/ddpm_segmentation_model.py": 1678695526.3499827, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/utils/data_process_func.py": 1666757257.213139, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/utils/builder.py": 1666757257.212899, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/utils/seg_func.py": 1678345974.3271163, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/beit_adapter.py": 1666757257.2102795, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/adapter_modules.py": 1666757257.2093182, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/base/beit.py": 1666757257.210009, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/encoder_decoder_mask2former.py": 1666757257.2121763, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/base_segmentor.py": 1666757257.2117958, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/mask2former_head_from_mmseg.py": 1666757257.2112045, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/base_decode_head.py": 1666757257.2109008, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/semantic_seg_model.py": 1684246001.4743931, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/pan_merge/base_panoptic_fusion_head.py": 1666757257.2071388, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/pan_merge/maskformer_semantic_head.py": 1666757257.2074032, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/ddpm_seg/data_util.py": 1678695526.3485208, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/ddpm_seg/utils.py": 1678345974.3266795, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/ddpm_seg/feature_extractors.py": 1678695526.3490577, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/ddpm_seg/pixel_classifier.py": 1678695526.3495007, "TEMPLATE_PATH/models/cv/video_single_object_tracking/config/ostrack.py": 1666757257.2861888, "TEMPLATE_PATH/models/cv/video_single_object_tracking/utils/utils.py": 1666757257.294515, "TEMPLATE_PATH/models/cv/video_single_object_tracking/tracker/procontext.py": 1678695526.4486487, "TEMPLATE_PATH/models/cv/video_single_object_tracking/tracker/ostrack.py": 1666757257.2933815, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/layers/attn_blocks.py": 1678695526.4463873, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/layers/head.py": 1678695526.446728, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/layers/patch_embed.py": 1666757257.2896674, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/layers/attn.py": 1666757257.2881665, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/ostrack/base_backbone.py": 1666757257.290771, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/ostrack/ostrack.py": 1678695526.44714, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/ostrack/utils.py": 1666757257.291744, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/ostrack/vit_ce.py": 1666757257.292233, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/procontext/procontext.py": 1678695526.4476662, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/procontext/utils.py": 1678695526.447871, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/procontext/vit_ce.py": 1678695526.4480832, "TEMPLATE_PATH/models/cv/text_driven_segmentation/lseg_net.py": 1666757257.2610106, "TEMPLATE_PATH/models/cv/text_driven_segmentation/lseg_blocks.py": 1666757257.2598405, "TEMPLATE_PATH/models/cv/text_driven_segmentation/lseg_model.py": 1666757257.2603793, "TEMPLATE_PATH/models/cv/text_driven_segmentation/model.py": 1666757257.2622228, "TEMPLATE_PATH/models/cv/text_driven_segmentation/lseg_vit.py": 1666757257.2616358, "TEMPLATE_PATH/models/cv/text_driven_segmentation/clip.py": 1666757257.258649, "TEMPLATE_PATH/models/cv/text_driven_segmentation/simple_tokenizer.py": 1666757257.2628324, "TEMPLATE_PATH/models/cv/text_driven_segmentation/lseg_base.py": 1666757257.2592633, "TEMPLATE_PATH/models/cv/crowd_counting/hrnet_aspp_relu.py": 1684246001.4687607, "TEMPLATE_PATH/models/cv/crowd_counting/cc_model.py": 1666757257.1599863, "TEMPLATE_PATH/models/cv/image_panoptic_segmentation/panseg_model.py": 1666757257.1997347, "TEMPLATE_PATH/models/cv/face_emotion/emotion_model.py": 1666757257.1793659, "TEMPLATE_PATH/models/cv/face_emotion/emotion_infer.py": 1678345974.266066, "TEMPLATE_PATH/models/cv/face_emotion/face_alignment/face_align.py": 1666757257.1801705, "TEMPLATE_PATH/models/cv/face_emotion/face_alignment/face.py": 1666757257.1799781, "TEMPLATE_PATH/models/cv/face_emotion/efficient/model.py": 1678695526.3245037, "TEMPLATE_PATH/models/cv/face_emotion/efficient/utils.py": 1678695526.3249586, "TEMPLATE_PATH/models/cv/video_super_resolution/real_basicvsr_net.py": 1678345974.5962114, "TEMPLATE_PATH/models/cv/video_super_resolution/msrresnet_lite_model.py": 1678345974.5949175, "TEMPLATE_PATH/models/cv/video_super_resolution/common.py": 1678345974.5942144, "TEMPLATE_PATH/models/cv/video_super_resolution/real_basicvsr_for_video_super_resolution.py": 1678345974.5955362, "TEMPLATE_PATH/models/cv/video_super_resolution/basicvsr_net.py": 1678345974.5935404, "TEMPLATE_PATH/models/cv/face_attribute_recognition/fair_face/face_attribute_recognition.py": 1678345974.201238, "TEMPLATE_PATH/models/cv/image_denoise/nafnet/NAFNet_arch.py": 1666757257.1934595, "TEMPLATE_PATH/models/cv/image_denoise/nafnet/arch_util.py": 1666757257.1938126, "TEMPLATE_PATH/models/cv/image_denoise/nafnet_for_image_denoise.py": 1678345974.2943788, "TEMPLATE_PATH/models/cv/image_classification/mmcls_model.py": 1678345974.2806082, "TEMPLATE_PATH/models/cv/image_classification/utils.py": 1678345974.2812235, "TEMPLATE_PATH/models/cv/image_classification/backbones/beit_v2.py": 1678345974.279836, "TEMPLATE_PATH/models/cv/image_classification/backbones/nextvit.py": 1678345974.2801979, "TEMPLATE_PATH/models/cv/image_classification/resnet50_cc.py": 1678345974.2809508, "TEMPLATE_PATH/models/cv/image_color_enhance/csrnet.py": 1666757257.1914177, "TEMPLATE_PATH/models/cv/image_color_enhance/deeplpf/deeplpfnet.py": 1678345974.2835712, "TEMPLATE_PATH/models/cv/image_color_enhance/deeplpf/deeplpf_image_color_enhance.py": 1678345974.2831166, "TEMPLATE_PATH/models/cv/image_color_enhance/image_color_enhance.py": 1678345974.283899, "TEMPLATE_PATH/models/cv/image_color_enhance/adaint/adaint.py": 1678345974.2824776, "TEMPLATE_PATH/models/base/base_torch_head.py": 1678345974.176039, "TEMPLATE_PATH/models/base/base_model.py": 1681714768.8640296, "TEMPLATE_PATH/models/base/base_torch_model.py": 1681714768.8644474, "TEMPLATE_PATH/models/base/base_head.py": 1678695526.2807148, "TEMPLATE_PATH/metrics/image_quality_assessment_degradation_metric.py": 1678345974.1558983, "TEMPLATE_PATH/metrics/prediction_saving_wrapper.py": 1678345974.1587963, "TEMPLATE_PATH/metrics/video_stabilization_metric.py": 1678345974.161685, "TEMPLATE_PATH/metrics/ppl_metric.py": 1678345974.158113, "TEMPLATE_PATH/metrics/inbatch_recall_metric.py": 1678345974.1564841, "TEMPLATE_PATH/metrics/loss_metric.py": 1678345974.1567907, "TEMPLATE_PATH/metrics/ocr_recognition_metric.py": 1681714768.859318, "TEMPLATE_PATH/metrics/map_metric.py": 1678695526.2701354, "TEMPLATE_PATH/metrics/image_colorization_metric.py": 1681714768.8586574, "TEMPLATE_PATH/metrics/sequence_classification_metric.py": 1678345974.159533, "TEMPLATE_PATH/metrics/audio_noise_metric.py": 1678345974.1516218, "TEMPLATE_PATH/metrics/translation_evaluation_metric.py": 1684246001.462436, "TEMPLATE_PATH/metrics/video_frame_interpolation_metric.py": 1678345974.1614027, "TEMPLATE_PATH/metrics/image_inpainting_metric.py": 1678345974.1546395, "TEMPLATE_PATH/metrics/image_denoise_metric.py": 1678345974.1542509, "TEMPLATE_PATH/metrics/referring_video_object_segmentation_metric.py": 1678345974.1591957, "TEMPLATE_PATH/metrics/token_classification_metric.py": 1678345974.1608303, "TEMPLATE_PATH/metrics/video_summarization_metric.py": 1678345974.1620147, "TEMPLATE_PATH/metrics/builder.py": 1684246001.462111, "TEMPLATE_PATH/metrics/image_quality_assessment_mos_metric.py": 1678345974.1561337, "TEMPLATE_PATH/metrics/ned_metric.py": 1678345974.1578484, "TEMPLATE_PATH/metrics/text_ranking_metric.py": 1678345974.1604652, "TEMPLATE_PATH/metrics/movie_scene_segmentation_metric.py": 1678345974.1574643, "TEMPLATE_PATH/metrics/accuracy_metric.py": 1678345974.151063, "TEMPLATE_PATH/metrics/image_instance_segmentation_metric.py": 1678345974.1552966, "TEMPLATE_PATH/metrics/video_super_resolution_metric/metric_util.py": 1678345974.1631625, "TEMPLATE_PATH/metrics/video_super_resolution_metric/video_super_resolution_metric.py": 1678345974.163586, "TEMPLATE_PATH/metrics/video_super_resolution_metric/niqe.py": 1678695526.272421, "TEMPLATE_PATH/metrics/video_super_resolution_metric/matlab_functions.py": 1678695526.2713144, "TEMPLATE_PATH/metrics/ciderD/ciderD.py": 1666757257.1302783, "TEMPLATE_PATH/metrics/ciderD/ciderD_scorer.py": 1678695526.2677228, "TEMPLATE_PATH/metrics/action_detection_evaluator.py": 1678695526.2662494, "TEMPLATE_PATH/metrics/image_color_enhance_metric.py": 1678345974.153906, "TEMPLATE_PATH/metrics/image_portrait_enhancement_metric.py": 1678345974.1556726, "TEMPLATE_PATH/metrics/bleu_metric.py": 1678345974.1524482, "TEMPLATE_PATH/metrics/text_generation_metric.py": 1678345974.1598558, "TEMPLATE_PATH/metrics/base.py": 1678345974.152117, "TEMPLATE_PATH/pipelines/util.py": 1678345974.9337575, "TEMPLATE_PATH/pipelines/science/protein_structure_pipeline.py": 1678345974.9334872, "TEMPLATE_PATH/pipelines/builder.py": 1681714768.9746857, "TEMPLATE_PATH/pipelines/pipeline_template.py": 1684246001.5603435, "TEMPLATE_PATH/pipelines/audio/timestamp_pipeline.py": 1684246001.539448, "TEMPLATE_PATH/pipelines/audio/kws_farfield_pipeline.py": 1678695526.592166, "TEMPLATE_PATH/pipelines/audio/speaker_verification_pipeline.py": 1684246001.538074, "TEMPLATE_PATH/pipelines/audio/inverse_text_processing_pipeline.py": 1678345974.833208, "TEMPLATE_PATH/pipelines/audio/separation_pipeline.py": 1678345974.835587, "TEMPLATE_PATH/pipelines/audio/voice_activity_detection_pipeline.py": 1684246001.5403378, "TEMPLATE_PATH/pipelines/audio/text_to_speech_pipeline.py": 1678345974.837081, "TEMPLATE_PATH/pipelines/audio/kws_kwsbp_pipeline.py": 1678345974.8338838, "TEMPLATE_PATH/pipelines/audio/linear_aec_pipeline.py": 1678345974.8341885, "TEMPLATE_PATH/pipelines/audio/ans_pipeline.py": 1678695526.5817752, "TEMPLATE_PATH/pipelines/audio/speaker_verification_eres2net_pipeline.py": 1684247769.6647675, "TEMPLATE_PATH/pipelines/audio/lm_infer_pipeline.py": 1684246001.5343251, "TEMPLATE_PATH/pipelines/audio/ans_dfsmn_pipeline.py": 1678695526.5813322, "TEMPLATE_PATH/pipelines/audio/asr_inference_pipeline.py": 1684246001.5326667, "TEMPLATE_PATH/pipelines/audio/speaker_diarization_pipeline.py": 1684246001.537162, "TEMPLATE_PATH/pipelines/audio/speaker_verification_rdino_pipeline.py": 1684246001.5384402, "TEMPLATE_PATH/pipelines/audio/punctuation_processing_pipeline.py": 1684246001.5355213, "TEMPLATE_PATH/pipelines/audio/speaker_verification_light_pipeline.py": 1678345974.8364737, "TEMPLATE_PATH/pipelines/audio/speaker_change_locating_pipeline.py": 1684246001.5362113, "TEMPLATE_PATH/pipelines/audio/asr_wenet_inference_pipeline.py": 1678345974.8329349, "TEMPLATE_PATH/pipelines/multi_modal/asr_pipeline.py": 1678345974.9102848, "TEMPLATE_PATH/pipelines/multi_modal/image_captioning_pipeline.py": 1684246001.5449712, "TEMPLATE_PATH/pipelines/multi_modal/text_to_video_synthesis_pipeline.py": 1684246001.54651, "TEMPLATE_PATH/pipelines/multi_modal/mgeo_ranking_pipeline.py": 1678345974.913822, "TEMPLATE_PATH/pipelines/multi_modal/generative_multi_modal_embedding_pipeline.py": 1666757257.5339417, "TEMPLATE_PATH/pipelines/multi_modal/multimodal_dialogue_pipeline.py": 1684246001.5457838, "TEMPLATE_PATH/pipelines/multi_modal/text_to_image_synthesis_pipeline.py": 1683891255.6625693, "TEMPLATE_PATH/pipelines/multi_modal/text2sql_pipeline.py": 1678345974.9150336, "TEMPLATE_PATH/pipelines/multi_modal/visual_entailment_pipeline.py": 1678345974.916273, "TEMPLATE_PATH/pipelines/multi_modal/disco_guided_diffusion_pipeline/disco_guided_diffusion.py": 1681714768.987968, "TEMPLATE_PATH/pipelines/multi_modal/disco_guided_diffusion_pipeline/utils.py": 1681714768.988303, "TEMPLATE_PATH/pipelines/multi_modal/visual_question_answering_pipeline.py": 1678345974.916901, "TEMPLATE_PATH/pipelines/multi_modal/video_question_answering_pipeline.py": 1678345974.9160104, "TEMPLATE_PATH/pipelines/multi_modal/video_captioning_pipeline.py": 1678345974.915723, "TEMPLATE_PATH/pipelines/multi_modal/video_multi_modal_embedding_pipeline.py": 1666757257.5376425, "TEMPLATE_PATH/pipelines/multi_modal/efficient_diffusion_tuning_pipeline.py": 1683889954.550607, "TEMPLATE_PATH/pipelines/multi_modal/team_multi_modal_similarity_pipeline.py": 1666757257.5365796, "TEMPLATE_PATH/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py": 1684121077.5650501, "TEMPLATE_PATH/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py": 1684121077.566238, "TEMPLATE_PATH/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py": 1684121077.5659308, "TEMPLATE_PATH/pipelines/multi_modal/multi_modal_embedding_pipeline.py": 1678345974.9142408, "TEMPLATE_PATH/pipelines/multi_modal/ocr_recognition_pipeline.py": 1678345974.914581, "TEMPLATE_PATH/pipelines/multi_modal/document_vl_embedding_pipeline.py": 1678345974.9124298, "TEMPLATE_PATH/pipelines/multi_modal/image_text_retrieval_pipeline.py": 1678345974.913492, "TEMPLATE_PATH/pipelines/multi_modal/gridvlp_pipeline.py": 1678345974.9127157, "TEMPLATE_PATH/pipelines/multi_modal/visual_grounding_pipeline.py": 1678345974.9165354, "TEMPLATE_PATH/pipelines/multi_modal/soonet_video_temporal_grounding_pipeline.py": 1681714768.9888954, "TEMPLATE_PATH/pipelines/multi_modal/sudoku_pipeline.py": 1678345974.9148157, "TEMPLATE_PATH/pipelines/nlp/translation_evaluation_pipeline.py": 1684246001.5580392, "TEMPLATE_PATH/pipelines/nlp/glm130b_text_generation_pipeline.py": 1683889954.5535533, "TEMPLATE_PATH/pipelines/nlp/faq_question_answering_pipeline.py": 1678345974.9225557, "TEMPLATE_PATH/pipelines/nlp/document_grounded_dialog_generate_pipeline.py": 1684246001.5480056, "TEMPLATE_PATH/pipelines/nlp/automatic_post_editing_pipeline.py": 1666757257.5406618, "TEMPLATE_PATH/pipelines/nlp/named_entity_recognition_pipeline.py": 1684246001.5530941, "TEMPLATE_PATH/pipelines/nlp/interactive_translation_pipeline.py": 1678345974.9250765, "TEMPLATE_PATH/pipelines/nlp/summarization_pipeline.py": 1678345974.9273708, "TEMPLATE_PATH/pipelines/nlp/document_grounded_dialog_retrieval_pipeline.py": 1684246001.5499012, "TEMPLATE_PATH/pipelines/nlp/fasttext_text_classification_pipeline.py": 1678345974.9229462, "TEMPLATE_PATH/pipelines/nlp/word_alignment_pipeline.py": 1678695526.6479418, "TEMPLATE_PATH/pipelines/nlp/feature_extraction_pipeline.py": 1684246001.5515616, "TEMPLATE_PATH/pipelines/nlp/text_ranking_pipeline.py": 1684246001.5570047, "TEMPLATE_PATH/pipelines/nlp/fid_dialogue_pipeline.py": 1684246001.552004, "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py": 1684246001.5557013, "TEMPLATE_PATH/pipelines/nlp/codegeex_code_generation_pipeline.py": 1678345974.9174054, "TEMPLATE_PATH/pipelines/nlp/translation_quality_estimation_pipeline.py": 1678345974.931919, "TEMPLATE_PATH/pipelines/nlp/fill_mask_pipeline.py": 1684246001.5525877, "TEMPLATE_PATH/pipelines/nlp/distributed_plug_pipeline.py": 1678345974.91977, "TEMPLATE_PATH/pipelines/nlp/conversational_text_to_sql_pipeline.py": 1678345974.9181793, "TEMPLATE_PATH/pipelines/nlp/distributed_gpt3_pipeline.py": 1681714768.9907482, "TEMPLATE_PATH/pipelines/nlp/information_extraction_pipeline.py": 1678345974.9244976, "TEMPLATE_PATH/pipelines/nlp/table_question_answering_pipeline.py": 1684246001.555219, "TEMPLATE_PATH/pipelines/nlp/user_satisfaction_estimation_pipeline.py": 1684246001.5591247, "TEMPLATE_PATH/pipelines/nlp/dialog_modeling_pipeline.py": 1678345974.9186983, "TEMPLATE_PATH/pipelines/nlp/canmt_translation_pipeline.py": 1683889954.5525997, "TEMPLATE_PATH/pipelines/nlp/word_segmentation_pipeline.py": 1678345974.9326284, "TEMPLATE_PATH/pipelines/nlp/document_segmentation_pipeline.py": 1684246001.5505779, "TEMPLATE_PATH/pipelines/nlp/distributed_gpt_moe_pipeline.py": 1678345974.9194465, "TEMPLATE_PATH/pipelines/nlp/extractive_summarization_pipeline.py": 1684246001.5509684, "TEMPLATE_PATH/pipelines/nlp/text_error_correction_pipeline.py": 1678695526.6476424, "TEMPLATE_PATH/pipelines/nlp/dialog_state_tracking_pipeline.py": 1684246001.5474644, "TEMPLATE_PATH/pipelines/nlp/mglm_text_summarization_pipeline.py": 1678695526.6446507, "TEMPLATE_PATH/pipelines/nlp/translation_pipeline.py": 1678345974.9313443, "TEMPLATE_PATH/pipelines/nlp/siamese_uie_pipeline.py": 1684246001.5545502, "TEMPLATE_PATH/pipelines/nlp/dialog_intent_prediction_pipeline.py": 1684246001.5471377, "TEMPLATE_PATH/pipelines/nlp/sentence_embedding_pipeline.py": 1684246001.5536666, "TEMPLATE_PATH/pipelines/nlp/document_grounded_dialog_rerank_pipeline.py": 1684246001.5493042, "TEMPLATE_PATH/pipelines/nlp/zero_shot_classification_pipeline.py": 1684246001.559703, "TEMPLATE_PATH/pipelines/nlp/text_generation_pipeline.py": 1684246001.5563328, "TEMPLATE_PATH/pipelines/nlp/language_identification_pipline.py": 1678695526.6442416, "TEMPLATE_PATH/pipelines/nlp/token_classification_pipeline.py": 1684246001.5576875, "TEMPLATE_PATH/pipelines/nlp/codegeex_code_translation_pipeline.py": 1678345974.9175882, "TEMPLATE_PATH/pipelines/cv/bad_image_detecting_pipeline.py": 1678345974.84121, "TEMPLATE_PATH/pipelines/cv/image_cartoon_pipeline.py": 1666757257.498452, "TEMPLATE_PATH/pipelines/cv/image_to_image_generate_pipeline.py": 1666757257.5083926, "TEMPLATE_PATH/pipelines/cv/facial_expression_recognition_pipeline.py": 1683889954.5460215, "TEMPLATE_PATH/pipelines/cv/retina_face_detection_pipeline.py": 1666757257.525595, "TEMPLATE_PATH/pipelines/cv/image_style_transfer_pipeline.py": 1684246001.5433357, "TEMPLATE_PATH/pipelines/cv/image_face_fusion_pipeline.py": 1678345974.8556206, "TEMPLATE_PATH/pipelines/cv/ulfd_face_detection_pipeline.py": 1666757257.5294175, "TEMPLATE_PATH/pipelines/cv/pedestrian_attribute_recognition_pipeline.py": 1683889954.5479586, "TEMPLATE_PATH/pipelines/cv/image_denoise_pipeline.py": 1678345974.8544917, "TEMPLATE_PATH/pipelines/cv/vop_retrieval_se_pipeline.py": 1678695526.6399703, "TEMPLATE_PATH/pipelines/cv/image_matting_pipeline.py": 1684246001.542891, "TEMPLATE_PATH/pipelines/cv/image_deblur_pipeline.py": 1678345974.8534613, "TEMPLATE_PATH/pipelines/cv/video_human_matting_pipeline.py": 1678345974.9057999, "TEMPLATE_PATH/pipelines/cv/live_category_pipeline.py": 1666757257.5096319, "TEMPLATE_PATH/pipelines/cv/image_structured_model_probing_pipeline.py": 1678345974.890545, "TEMPLATE_PATH/pipelines/cv/face_quality_assessment_pipeline.py": 1683889954.5438397, "TEMPLATE_PATH/pipelines/cv/face_processing_base_pipeline.py": 1683889954.5433195, "TEMPLATE_PATH/pipelines/cv/image_portrait_enhancement_pipeline.py": 1678345974.8593307, "TEMPLATE_PATH/pipelines/cv/image_color_enhance_pipeline.py": 1678345974.852745, "TEMPLATE_PATH/pipelines/cv/vision_efficient_tuning_pipeline.py": 1678695526.6396506, "TEMPLATE_PATH/pipelines/cv/tbs_detection_utils/utils.py": 1681714768.9856553, "TEMPLATE_PATH/pipelines/cv/video_object_segmentation_pipeline.py": 1678345974.9070742, "TEMPLATE_PATH/pipelines/cv/face_detection_pipeline.py": 1678345974.8464031, "TEMPLATE_PATH/pipelines/cv/body_3d_keypoints_pipeline.py": 1678345974.8419, "TEMPLATE_PATH/pipelines/cv/image_paintbyexample_pipeline.py": 1678695526.6043956, "TEMPLATE_PATH/pipelines/cv/face_recognition_ood_pipeline.py": 1683889954.5451627, "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py": 1678345974.8523827, "TEMPLATE_PATH/pipelines/cv/card_detection_pipeline.py": 1678345974.8422294, "TEMPLATE_PATH/pipelines/cv/table_recognition_pipeline.py": 1678345974.9024644, "TEMPLATE_PATH/pipelines/cv/image_to_image_translation_pipeline.py": 1666757257.5090609, "TEMPLATE_PATH/pipelines/cv/face_attribute_recognition_pipeline.py": 1683889954.5421839, "TEMPLATE_PATH/pipelines/cv/image_debanding_pipeline.py": 1678345974.8531418, "TEMPLATE_PATH/pipelines/cv/video_instance_segmentation_pipeline.py": 1681714768.9862943, "TEMPLATE_PATH/pipelines/cv/tinynas_classification_pipeline.py": 1669108798.6442235, "TEMPLATE_PATH/pipelines/cv/human_reconstruction_pipeline.py": 1681714768.9829588, "TEMPLATE_PATH/pipelines/cv/video_multi_object_tracking_pipeline.py": 1678345974.906372, "TEMPLATE_PATH/pipelines/cv/controllable_image_generation_pipeline.py": 1678695526.600098, "TEMPLATE_PATH/pipelines/cv/image_defrcn_fewshot_pipeline.py": 1678345974.8541288, "TEMPLATE_PATH/pipelines/cv/ddpm_semantic_segmentation_pipeline.py": 1678345974.8439617, "TEMPLATE_PATH/pipelines/cv/content_check_pipeline.py": 1678345974.8424017, "TEMPLATE_PATH/pipelines/cv/vop_retrieval_pipeline.py": 1678345974.9095361, "TEMPLATE_PATH/pipelines/cv/object_detection_3d_pipeline.py": 1678695526.6065028, "TEMPLATE_PATH/pipelines/cv/lineless_table_recognition_pipeline.py": 1678695526.6051717, "TEMPLATE_PATH/pipelines/cv/cmdssl_video_embedding_pipeline.py": 1666757257.487139, "TEMPLATE_PATH/pipelines/cv/tinynas_detection_pipeline.py": 1678345974.9044118, "TEMPLATE_PATH/pipelines/cv/video_deinterlace_pipeline.py": 1678695526.6368866, "TEMPLATE_PATH/pipelines/cv/image_open_vocabulary_detection_pipeline.py": 1678345974.8585114, "TEMPLATE_PATH/pipelines/cv/language_guided_video_summarization_pipeline.py": 1678345974.891131, "TEMPLATE_PATH/pipelines/cv/body_2d_keypoints_pipeline.py": 1666757257.4853406, "TEMPLATE_PATH/pipelines/cv/face_human_hand_detection_pipeline.py": 1666778289.6917272, "TEMPLATE_PATH/pipelines/cv/hicossl_video_embedding_pipeline.py": 1666757257.4973748, "TEMPLATE_PATH/pipelines/cv/face_recognition_pipeline.py": 1678345974.8498085, "TEMPLATE_PATH/pipelines/cv/image_body_reshaping_pipeline.py": 1666757257.497916, "TEMPLATE_PATH/pipelines/cv/image_inpainting_pipeline.py": 1666757257.5020847, "TEMPLATE_PATH/pipelines/cv/face_recognition_onnx_fm_pipeline.py": 1683889954.5441782, "TEMPLATE_PATH/pipelines/cv/image_driving_perception_pipeline.py": 1678695526.6034508, "TEMPLATE_PATH/pipelines/cv/video_stabilization_pipeline.py": 1678345974.9080534, "TEMPLATE_PATH/pipelines/cv/indoor_layout_estimation_pipeline.py": 1678345974.8907528, "TEMPLATE_PATH/pipelines/cv/ddcolor_image_colorization_pipeline.py": 1678345974.8437521, "TEMPLATE_PATH/pipelines/cv/face_emotion_pipeline.py": 1666778289.691363, "TEMPLATE_PATH/pipelines/cv/mtcnn_face_detection_pipeline.py": 1666757257.5116644, "TEMPLATE_PATH/pipelines/cv/nerf_recon_acc_pipeline.py": 1678695526.6060696, "TEMPLATE_PATH/pipelines/cv/image_bts_depth_estimation_pipeline.py": 1681714768.983773, "TEMPLATE_PATH/pipelines/cv/facial_landmark_confidence_pipeline.py": 1683889954.5463324, "TEMPLATE_PATH/pipelines/cv/face_reconstruction_pipeline.py": 1684246001.5418012, "TEMPLATE_PATH/pipelines/cv/mog_face_detection_pipeline.py": 1666757257.5102239, "TEMPLATE_PATH/pipelines/cv/skin_retouching_pipeline.py": 1684246001.5436969, "TEMPLATE_PATH/pipelines/cv/vision_middleware_pipeline.py": 1678345974.9092615, "TEMPLATE_PATH/pipelines/cv/face_liveness_ir_pipeline.py": 1683889954.542443, "TEMPLATE_PATH/pipelines/cv/image_detection_pipeline.py": 1678345974.8551383, "TEMPLATE_PATH/pipelines/cv/realtime_video_object_detection_pipeline.py": 1678695526.631697, "TEMPLATE_PATH/pipelines/cv/video_panoptic_segmentation_pipeline.py": 1678345974.9074109, "TEMPLATE_PATH/pipelines/cv/action_detection_pipeline.py": 1678345974.8401477, "TEMPLATE_PATH/pipelines/cv/product_segmentation_pipeline.py": 1666778289.692797, "TEMPLATE_PATH/pipelines/cv/tbs_detection_pipeline.py": 1684246001.544016, "TEMPLATE_PATH/pipelines/cv/image_matching_pipeline.py": 1678345974.857486, "TEMPLATE_PATH/pipelines/cv/video_category_pipeline.py": 1669108798.6445787, "TEMPLATE_PATH/pipelines/cv/hand_static_pipeline.py": 1666778289.6920865, "TEMPLATE_PATH/pipelines/cv/animal_recognition_pipeline.py": 1678345974.840479, "TEMPLATE_PATH/pipelines/cv/pointcloud_sceneflow_estimation_pipeline.py": 1678345974.9002383, "TEMPLATE_PATH/pipelines/cv/image_instance_segmentation_pipeline.py": 1678345974.8571947, "TEMPLATE_PATH/pipelines/cv/video_frame_interpolation_pipeline.py": 1678345974.9055316, "TEMPLATE_PATH/pipelines/cv/image_quality_assessment_mos_pipeline.py": 1678345974.8893554, "TEMPLATE_PATH/pipelines/cv/video_summarization_pipeline.py": 1666757257.5319543, "TEMPLATE_PATH/pipelines/cv/panorama_depth_estimation_pipeline.py": 1678345974.8999748, "TEMPLATE_PATH/pipelines/cv/fast_instance_segmentation_pipeline.py": 1684246001.5421734, "TEMPLATE_PATH/pipelines/cv/vidt_pipeline.py": 1681714768.9865973, "TEMPLATE_PATH/pipelines/cv/image_skychange_pipeline.py": 1678345974.8903258, "TEMPLATE_PATH/pipelines/cv/image_quality_assessment_man_pipeline.py": 1678695526.6047776, "TEMPLATE_PATH/pipelines/cv/image_restoration_pipeline.py": 1678345974.8897073, "TEMPLATE_PATH/pipelines/cv/video_inpainting_pipeline.py": 1666757257.530707, "TEMPLATE_PATH/pipelines/cv/face_image_generation_pipeline.py": 1666757257.4936557, "TEMPLATE_PATH/pipelines/cv/video_super_resolution_pipeline.py": 1678695526.6385, "TEMPLATE_PATH/pipelines/cv/referring_video_object_segmentation_pipeline.py": 1678695526.633713, "TEMPLATE_PATH/pipelines/cv/virtual_try_on_pipeline.py": 1678345974.9087186, "TEMPLATE_PATH/pipelines/cv/ocr_recognition_pipeline.py": 1681714768.98453, "TEMPLATE_PATH/pipelines/cv/ocr_detection_pipeline.py": 1678695526.607303, "TEMPLATE_PATH/pipelines/cv/movie_scene_segmentation_pipeline.py": 1684121077.5641353, "TEMPLATE_PATH/pipelines/cv/maskdino_instance_segmentation_pipeline.py": 1678345974.892828, "TEMPLATE_PATH/pipelines/cv/video_colorization_pipeline.py": 1678345974.904686, "TEMPLATE_PATH/pipelines/cv/image_human_parsing_pipeline.py": 1678345974.8562174, "TEMPLATE_PATH/pipelines/cv/face_liveness_xc_pipeline.py": 1683889954.5426972, "TEMPLATE_PATH/pipelines/cv/crowd_counting_pipeline.py": 1666757257.4877608, "TEMPLATE_PATH/pipelines/cv/video_depth_estimation_pipeline.py": 1678345974.9052026, "TEMPLATE_PATH/pipelines/cv/image_colorization_pipeline.py": 1666757257.5002234, "TEMPLATE_PATH/pipelines/cv/arc_face_recognition_pipeline.py": 1683889954.5418775, "TEMPLATE_PATH/pipelines/cv/image_quality_assessment_degradation_pipeline.py": 1678345974.8601525, "TEMPLATE_PATH/pipelines/cv/ocr_utils/model_convnext_transformer.py": 1666757257.5147195, "TEMPLATE_PATH/pipelines/cv/ocr_utils/model_resnet18_half.py": 1678345974.8975644, "TEMPLATE_PATH/pipelines/cv/ocr_utils/resnet18_v1.py": 1666757257.5203307, "TEMPLATE_PATH/pipelines/cv/ocr_utils/model_dla34.py": 1678345974.897257, "TEMPLATE_PATH/pipelines/cv/ocr_utils/ocr_modules/vitstr.py": 1666757257.5185978, "TEMPLATE_PATH/pipelines/cv/ocr_utils/ocr_modules/timm_tinyc.py": 1666757257.517882, "TEMPLATE_PATH/pipelines/cv/ocr_utils/ocr_modules/convnext.py": 1666757257.5171049, "TEMPLATE_PATH/pipelines/cv/ocr_utils/table_process.py": 1678345974.8992608, "TEMPLATE_PATH/pipelines/cv/ocr_utils/resnet_utils.py": 1666757257.520979, "TEMPLATE_PATH/pipelines/cv/ocr_utils/ops.py": 1678345974.898596, "TEMPLATE_PATH/pipelines/cv/ocr_utils/utils.py": 1678345974.8997033, "TEMPLATE_PATH/pipelines/cv/ocr_utils/model_vlpt.py": 1678345974.8979936, "TEMPLATE_PATH/pipelines/cv/ocr_utils/model_resnet_mutex_v4_linewithchar.py": 1666757257.5153735, "TEMPLATE_PATH/pipelines/cv/image_inpainting_sdv2_pipeline.py": 1678345974.8565032, "TEMPLATE_PATH/pipelines/cv/image_super_resolution_pipeline.py": 1666757257.5076354, "TEMPLATE_PATH/pipelines/cv/image_salient_detection_pipeline.py": 1678345974.890104, "TEMPLATE_PATH/pipelines/cv/video_single_object_tracking_pipeline.py": 1678695526.637953, "TEMPLATE_PATH/pipelines/cv/face_recognition_onnx_ir_pipeline.py": 1683889954.5448508, "TEMPLATE_PATH/pipelines/cv/product_retrieval_embedding_pipeline.py": 1666757257.5221663, "TEMPLATE_PATH/pipelines/cv/mask_face_recognition_pipeline.py": 1678345974.8923888, "TEMPLATE_PATH/pipelines/cv/mobile_image_super_resolution_pipeline.py": 1678345974.893134, "TEMPLATE_PATH/pipelines/cv/license_plate_detection_pipeline.py": 1678345974.8913991, "TEMPLATE_PATH/pipelines/cv/image_semantic_segmentation_pipeline.py": 1666757257.5062222, "TEMPLATE_PATH/pipelines/cv/text_driven_segmentation_pipleline.py": 1666757257.5275502, "TEMPLATE_PATH/pipelines/cv/motion_generation_pipeline.py": 1678345974.8933938, "TEMPLATE_PATH/pipelines/cv/image_mvs_depth_estimation_pipeline.py": 1678345974.8579566, "TEMPLATE_PATH/pipelines/cv/image_depth_estimation_pipeline.py": 1678345974.854762, "TEMPLATE_PATH/pipelines/cv/action_recognition_pipeline.py": 1666757257.4842403, "TEMPLATE_PATH/pipelines/cv/image_reid_person_pipeline.py": 1666757257.5051024, "TEMPLATE_PATH/pipelines/cv/general_recognition_pipeline.py": 1678345974.851133, "TEMPLATE_PATH/pipelines/cv/shop_segmentation_pipleline.py": 1666757257.5262067, "TEMPLATE_PATH/pipelines/base.py": 1684246001.5408666, "TEMPLATE_PATH/preprocessors/kws.py": 1669108798.6509876, "TEMPLATE_PATH/preprocessors/multi_modal.py": 1684246001.5620222, "TEMPLATE_PATH/preprocessors/science/uni_fold.py": 1678345974.9713385, "TEMPLATE_PATH/preprocessors/tts.py": 1678695526.6618354, "TEMPLATE_PATH/preprocessors/asr.py": 1684246001.5611215, "TEMPLATE_PATH/preprocessors/builder.py": 1666757257.563367, "TEMPLATE_PATH/preprocessors/movie_scene_segmentation/transforms.py": 1678695526.6574507, "TEMPLATE_PATH/preprocessors/common.py": 1678695526.6514163, "TEMPLATE_PATH/preprocessors/nlp/token_classification_preprocessor.py": 1684246001.562926, "TEMPLATE_PATH/preprocessors/nlp/siamese_uie_preprocessor.py": 1678695526.6586974, "TEMPLATE_PATH/preprocessors/nlp/relation_extraction_preprocessor.py": 1678345974.9533129, "TEMPLATE_PATH/preprocessors/nlp/token_classification_viet_preprocessor.py": 1678345974.962513, "TEMPLATE_PATH/preprocessors/nlp/translation_evaluation_preprocessor.py": 1684246001.5633366, "TEMPLATE_PATH/preprocessors/nlp/text_classification_preprocessor.py": 1678345974.957994, "TEMPLATE_PATH/preprocessors/nlp/document_grounded_dialog_retrieval_preprocessor.py": 1678345974.9482706, "TEMPLATE_PATH/preprocessors/nlp/zero_shot_classification_preprocessor.py": 1678345974.9639843, "TEMPLATE_PATH/preprocessors/nlp/canmt_translation.py": 1683889954.5585697, "TEMPLATE_PATH/preprocessors/nlp/fill_mask_preprocessor.py": 1678345974.952453, "TEMPLATE_PATH/preprocessors/nlp/word_alignment_preprocessor.py": 1678695526.6615062, "TEMPLATE_PATH/preprocessors/nlp/space_T_en/fields/preprocess_dataset.py": 1666757257.5921733, "TEMPLATE_PATH/preprocessors/nlp/space_T_en/fields/parse.py": 1666757257.5916936, "TEMPLATE_PATH/preprocessors/nlp/space_T_en/fields/common_utils.py": 1666757257.5911734, "TEMPLATE_PATH/preprocessors/nlp/space_T_en/fields/process_dataset.py": 1666757257.5927129, "TEMPLATE_PATH/preprocessors/nlp/space_T_en/conversational_text_to_sql_preprocessor.py": 1669108798.6572416, "TEMPLATE_PATH/preprocessors/nlp/document_grounded_dialog_generate_preprocessor.py": 1678345974.946995, "TEMPLATE_PATH/preprocessors/nlp/text_error_correction.py": 1678695526.6594443, "TEMPLATE_PATH/preprocessors/nlp/text_ranking_preprocessor.py": 1678345974.9603443, "TEMPLATE_PATH/preprocessors/nlp/transformers_tokenizer.py": 1683889954.5599382, "TEMPLATE_PATH/preprocessors/nlp/bert_seq_cls_tokenizer.py": 1666757257.5692148, "TEMPLATE_PATH/preprocessors/nlp/text_clean.py": 1683889954.5587656, "TEMPLATE_PATH/preprocessors/nlp/utils.py": 1678345974.9635713, "TEMPLATE_PATH/preprocessors/nlp/document_segmentation_preprocessor.py": 1678345974.94956, "TEMPLATE_PATH/preprocessors/nlp/sentence_embedding_preprocessor.py": 1678345974.9542353, "TEMPLATE_PATH/preprocessors/nlp/mglm_summarization_preprocessor.py": 1669108798.653473, "TEMPLATE_PATH/preprocessors/nlp/token_classification_thai_preprocessor.py": 1678345974.9620914, "TEMPLATE_PATH/preprocessors/nlp/mgeo_ranking_preprocessor.py": 1678345974.9527726, "TEMPLATE_PATH/preprocessors/nlp/space/dialog_intent_prediction_preprocessor.py": 1669108798.6542664, "TEMPLATE_PATH/preprocessors/nlp/space/lazy_dataset.py": 1666757257.582808, "TEMPLATE_PATH/preprocessors/nlp/space/dialog_state_tracking_preprocessor.py": 1666757257.579098, "TEMPLATE_PATH/preprocessors/nlp/space/preprocess.py": 1666757257.583339, "TEMPLATE_PATH/preprocessors/nlp/space/data_loader.py": 1666757257.5771036, "TEMPLATE_PATH/preprocessors/nlp/space/batch.py": 1678345974.9554622, "TEMPLATE_PATH/preprocessors/nlp/space/dialog_modeling_preprocessor.py": 1666757257.5782604, "TEMPLATE_PATH/preprocessors/nlp/space/tokenizer.py": 1678345974.9572399, "TEMPLATE_PATH/preprocessors/nlp/space/dst_processors.py": 1669108798.6548202, "TEMPLATE_PATH/preprocessors/nlp/space/args.py": 1666757257.5759423, "TEMPLATE_PATH/preprocessors/nlp/space/fields/gen_field.py": 1678345974.95627, "TEMPLATE_PATH/preprocessors/nlp/space/fields/intent_field.py": 1666757257.5822835, "TEMPLATE_PATH/preprocessors/nlp/space/sampler.py": 1666757257.5839186, "TEMPLATE_PATH/preprocessors/nlp/space/tensorlistdataset.py": 1666757257.5844374, "TEMPLATE_PATH/preprocessors/nlp/dialog_classification_use_preprocessor.py": 1678345974.9462962, "TEMPLATE_PATH/preprocessors/nlp/text_generation_preprocessor.py": 1681714768.996753, "TEMPLATE_PATH/preprocessors/nlp/space_T_cn/table_question_answering_preprocessor.py": 1666757257.5885906, "TEMPLATE_PATH/preprocessors/nlp/space_T_cn/fields/database.py": 1669108798.6561291, "TEMPLATE_PATH/preprocessors/nlp/space_T_cn/fields/schema_link.py": 1666757257.587568, "TEMPLATE_PATH/preprocessors/nlp/space_T_cn/fields/struct.py": 1678345974.9575932, "TEMPLATE_PATH/preprocessors/nlp/document_grounded_dialog_rerank_preprocessor.py": 1678345974.9476607, "TEMPLATE_PATH/preprocessors/nlp/feature_extraction_preprocessor.py": 1678345974.9510474, "TEMPLATE_PATH/preprocessors/nlp/faq_question_answering_preprocessor.py": 1678345974.950317, "TEMPLATE_PATH/preprocessors/audio.py": 1678345974.9349864, "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py": 1678695526.6556287, "TEMPLATE_PATH/preprocessors/cv/util.py": 1678345974.9410372, "TEMPLATE_PATH/preprocessors/cv/timer.py": 1678345974.940564, "TEMPLATE_PATH/preprocessors/cv/bad_image_detecting_preprocessor.py": 1678345974.9370506, "TEMPLATE_PATH/preprocessors/cv/mmcls_preprocessor.py": 1678345974.9400585, "TEMPLATE_PATH/preprocessors/cv/controllable_image_generation.py": 1678695526.6533623, "TEMPLATE_PATH/preprocessors/cv/image_quality_assessment_mos.py": 1678345974.939592, "TEMPLATE_PATH/preprocessors/cv/image_restoration_preprocessor.py": 1678345974.939753, "TEMPLATE_PATH/preprocessors/cv/cv2_transforms.py": 1678695526.6544669, "TEMPLATE_PATH/preprocessors/cv/video_super_resolution.py": 1678345974.941883, "TEMPLATE_PATH/preprocessors/cv/image_quality_assessment_man.py": 1678695526.6563368, "TEMPLATE_PATH/preprocessors/cv/action_detection_mapper.py": 1678695526.6529279, "TEMPLATE_PATH/preprocessors/cv/video_stabilization.py": 1678345974.9413676, "TEMPLATE_PATH/preprocessors/video.py": 1678345974.971809, "TEMPLATE_PATH/preprocessors/image.py": 1678695526.6569033, "TEMPLATE_PATH/preprocessors/base.py": 1683889954.557044, "TEMPLATE_PATH/preprocessors/ofa/visual_question_answering.py": 1678345974.97092, "TEMPLATE_PATH/preprocessors/ofa/image_classification.py": 1678345974.9656992, "TEMPLATE_PATH/preprocessors/ofa/utils/transforms.py": 1666757257.6044796, "TEMPLATE_PATH/preprocessors/ofa/utils/bridge_content_encoder.py": 1678345974.9681842, "TEMPLATE_PATH/preprocessors/ofa/utils/collate.py": 1678345974.968443, "TEMPLATE_PATH/preprocessors/ofa/utils/get_tables.py": 1678345974.9691057, "TEMPLATE_PATH/preprocessors/ofa/utils/text2phone.py": 1678345974.9699347, "TEMPLATE_PATH/preprocessors/ofa/utils/audio_helper.py": 1678345974.967928, "TEMPLATE_PATH/preprocessors/ofa/utils/random_help.py": 1678345974.969357, "TEMPLATE_PATH/preprocessors/ofa/utils/vision_helper.py": 1666757257.6050525, "TEMPLATE_PATH/preprocessors/ofa/utils/constant.py": 1678345974.9688995, "TEMPLATE_PATH/preprocessors/ofa/asr.py": 1678345974.964469, "TEMPLATE_PATH/preprocessors/ofa/text2sql.py": 1678345974.9668753, "TEMPLATE_PATH/preprocessors/ofa/text_classification.py": 1678345974.9671476, "TEMPLATE_PATH/preprocessors/ofa/image_captioning.py": 1678345974.9651005, "TEMPLATE_PATH/preprocessors/ofa/ocr_recognition.py": 1678345974.9659903, "TEMPLATE_PATH/preprocessors/ofa/visual_entailment.py": 1678345974.9702795, "TEMPLATE_PATH/preprocessors/ofa/visual_grounding.py": 1678345974.970591, "TEMPLATE_PATH/preprocessors/ofa/summarization.py": 1678345974.96643, "TEMPLATE_PATH/preprocessors/ofa/text_to_image_synthesis.py": 1678345974.967411, "TEMPLATE_PATH/preprocessors/ofa/sudoku.py": 1678345974.966176, "TEMPLATE_PATH/preprocessors/ofa/base.py": 1678345974.9648006, "TEMPLATE_PATH/trainers/parallel/builder.py": 1666757257.6517034, "TEMPLATE_PATH/trainers/parallel/utils.py": 1666757257.652214, "TEMPLATE_PATH/trainers/optimizer/builder.py": 1678345975.0087109, "TEMPLATE_PATH/trainers/optimizer/child_tuning_adamw_optimizer.py": 1678345975.0091202, "TEMPLATE_PATH/trainers/lrscheduler/builder.py": 1681714769.013421, "TEMPLATE_PATH/trainers/lrscheduler/warmup/warmup.py": 1666757257.6361334, "TEMPLATE_PATH/trainers/lrscheduler/warmup/base.py": 1666757257.635629, "TEMPLATE_PATH/trainers/nlp_trainer.py": 1681714769.015515, "TEMPLATE_PATH/trainers/utils/inference.py": 1681714769.069791, "TEMPLATE_PATH/trainers/utils/log_buffer.py": 1666757257.6546545, "TEMPLATE_PATH/trainers/training_args.py": 1684246001.5734115, "TEMPLATE_PATH/trainers/builder.py": 1683889954.5614784, "TEMPLATE_PATH/trainers/audio/kws_nearfield_trainer.py": 1683889954.5606887, "TEMPLATE_PATH/trainers/audio/kws_utils/model_utils.py": 1678345974.9777398, "TEMPLATE_PATH/trainers/audio/kws_utils/runtime_utils.py": 1678345974.9782813, "TEMPLATE_PATH/trainers/audio/kws_utils/det_utils.py": 1681714769.0015252, "TEMPLATE_PATH/trainers/audio/kws_utils/batch_utils.py": 1683889954.5611897, "TEMPLATE_PATH/trainers/audio/kws_utils/file_utils.py": 1681714769.0070894, "TEMPLATE_PATH/trainers/audio/kws_farfield_trainer.py": 1681714768.9980917, "TEMPLATE_PATH/trainers/audio/separation_trainer.py": 1678345974.9789073, "TEMPLATE_PATH/trainers/audio/asr_trainer.py": 1678345974.974677, "TEMPLATE_PATH/trainers/audio/tts_trainer.py": 1678695526.6909325, "TEMPLATE_PATH/trainers/audio/ans_trainer.py": 1666757257.6118267, "TEMPLATE_PATH/trainers/hooks/checkpoint/checkpoint_hook.py": 1684246001.5663064, "TEMPLATE_PATH/trainers/hooks/checkpoint/checkpoint_processor.py": 1684246001.5666258, "TEMPLATE_PATH/trainers/hooks/checkpoint/load_checkpoint_hook.py": 1684246001.5668476, "TEMPLATE_PATH/trainers/hooks/logger/text_logger_hook.py": 1683889954.5632632, "TEMPLATE_PATH/trainers/hooks/logger/tensorboard_hook.py": 1678695526.7020135, "TEMPLATE_PATH/trainers/hooks/logger/base.py": 1666757257.6279666, "TEMPLATE_PATH/trainers/hooks/optimizer/apex_optimizer_hook.py": 1684246001.5697649, "TEMPLATE_PATH/trainers/hooks/optimizer/torch_optimizer_hook.py": 1684246001.5703554, "TEMPLATE_PATH/trainers/hooks/optimizer/base.py": 1684246001.5700371, "TEMPLATE_PATH/trainers/hooks/distributed/megatron_hook.py": 1684246001.5681868, "TEMPLATE_PATH/trainers/hooks/distributed/deepspeed_hook.py": 1684246001.5679266, "TEMPLATE_PATH/trainers/hooks/distributed/ddp_hook.py": 1684246001.567703, "TEMPLATE_PATH/trainers/hooks/lr_scheduler_hook.py": 1684246001.569458, "TEMPLATE_PATH/trainers/hooks/early_stop_hook.py": 1684246001.5685089, "TEMPLATE_PATH/trainers/hooks/hook.py": 1684246001.5691583, "TEMPLATE_PATH/trainers/hooks/priority.py": 1666757257.6328363, "TEMPLATE_PATH/trainers/hooks/builder.py": 1666757257.6225636, "TEMPLATE_PATH/trainers/hooks/clip_clamp_logit_scale_hook.py": 1669108798.683138, "TEMPLATE_PATH/trainers/hooks/compression/sparsity_hook.py": 1684246001.567191, "TEMPLATE_PATH/trainers/hooks/compression/utils.py": 1678345974.9935489, "TEMPLATE_PATH/trainers/hooks/iter_timer_hook.py": 1666757257.6266162, "TEMPLATE_PATH/trainers/hooks/evaluation_hook.py": 1684246001.5688426, "TEMPLATE_PATH/trainers/multi_modal/clip/clip_trainer.py": 1684246001.571492, "TEMPLATE_PATH/trainers/multi_modal/clip/clip_trainer_utils.py": 1669108798.6861904, "TEMPLATE_PATH/trainers/multi_modal/efficient_diffusion_tuning/efficient_diffusion_tuning_trainer.py": 1683889954.5644114, "TEMPLATE_PATH/trainers/multi_modal/mplug/mplug_trainer.py": 1678345975.001256, "TEMPLATE_PATH/trainers/multi_modal/team/team_trainer.py": 1678345975.0027127, "TEMPLATE_PATH/trainers/multi_modal/team/team_trainer_utils.py": 1669108798.690418, "TEMPLATE_PATH/trainers/multi_modal/mgeo_ranking_trainer.py": 1678345975.0009506, "TEMPLATE_PATH/trainers/multi_modal/ofa/ofa_trainer.py": 1678345975.0016596, "TEMPLATE_PATH/trainers/multi_modal/ofa/ofa_trainer_utils.py": 1678345975.002343, "TEMPLATE_PATH/trainers/default_config.py": 1684246001.5650253, "TEMPLATE_PATH/trainers/nlp/gpt_moe_trainer.py": 1678345975.0055368, "TEMPLATE_PATH/trainers/nlp/plug_trainer.py": 1678695526.7082524, "TEMPLATE_PATH/trainers/nlp/text_generation_trainer.py": 1681714769.0152323, "TEMPLATE_PATH/trainers/nlp/document_grounded_dialog_rerank_trainer.py": 1678345975.004579, "TEMPLATE_PATH/trainers/nlp/csanmt_translation_trainer.py": 1678345975.0036387, "TEMPLATE_PATH/trainers/nlp/translation_evaluation_trainer.py": 1684246001.5721004, "TEMPLATE_PATH/trainers/nlp/faq_question_answering_trainer.py": 1678345975.0051053, "TEMPLATE_PATH/trainers/nlp/table_question_answering_trainer.py": 1678345975.0076075, "TEMPLATE_PATH/trainers/nlp/sequence_classification_trainer.py": 1678345975.0066545, "TEMPLATE_PATH/trainers/nlp/sentence_embedding_trainer.py": 1678695526.7085762, "TEMPLATE_PATH/trainers/nlp/gpt3_trainer.py": 1681714769.014518, "TEMPLATE_PATH/trainers/nlp/text_ranking_trainer.py": 1666757257.6478848, "TEMPLATE_PATH/trainers/nlp/siamese_uie_trainer.py": 1681714769.014841, "TEMPLATE_PATH/trainers/nlp/space/metrics/metrics_tracker.py": 1666757257.645518, "TEMPLATE_PATH/trainers/nlp/space/dialog_intent_trainer.py": 1666757257.6433034, "TEMPLATE_PATH/trainers/nlp/space/eval.py": 1669108798.6920927, "TEMPLATE_PATH/trainers/nlp/space/trainer/intent_trainer.py": 1666757257.6473625, "TEMPLATE_PATH/trainers/nlp/space/trainer/gen_trainer.py": 1666757257.6467648, "TEMPLATE_PATH/trainers/nlp/space/dialog_modeling_trainer.py": 1666757257.64378, "TEMPLATE_PATH/trainers/nlp/document_grounded_dialog_retrieval_trainer.py": 1678345975.004814, "TEMPLATE_PATH/trainers/nlp/document_grounded_dialog_generate_trainer.py": 1678345975.0042856, "TEMPLATE_PATH/trainers/cli_argument_parser.py": 1684246001.5641959, "TEMPLATE_PATH/trainers/cv/ocr_recognition_trainer.py": 1681714769.0097866, "TEMPLATE_PATH/trainers/cv/image_instance_segmentation_trainer.py": 1666757257.6163688, "TEMPLATE_PATH/trainers/cv/referring_video_object_segmentation_trainer.py": 1678695526.6955135, "TEMPLATE_PATH/trainers/cv/vision_efficient_tuning_trainer.py": 1678695526.696203, "TEMPLATE_PATH/trainers/cv/movie_scene_segmentation_trainer.py": 1666757257.617418, "TEMPLATE_PATH/trainers/cv/nerf_recon_acc_trainer.py": 1678695526.694501, "TEMPLATE_PATH/trainers/cv/image_detection_damoyolo_trainer.py": 1681714769.008786, "TEMPLATE_PATH/trainers/cv/image_classifition_trainer.py": 1684246001.5647185, "TEMPLATE_PATH/trainers/cv/cartoon_translation_trainer.py": 1678695526.6927238, "TEMPLATE_PATH/trainers/cv/ocr_detection_db_trainer.py": 1681714769.0095627, "TEMPLATE_PATH/trainers/cv/card_detection_scrfd_trainer.py": 1666757257.6147146, "TEMPLATE_PATH/trainers/cv/face_detection_scrfd_trainer.py": 1666757257.6152842, "TEMPLATE_PATH/trainers/cv/image_inpainting_trainer.py": 1666757257.6158333, "TEMPLATE_PATH/trainers/cv/image_portrait_enhancement_trainer.py": 1666757257.6168902, "TEMPLATE_PATH/trainers/cv/action_detection_trainer.py": 1678695526.6920478, "TEMPLATE_PATH/trainers/cv/image_defrcn_fewshot_detection_trainer.py": 1678345974.9814935, "TEMPLATE_PATH/trainers/trainer.py": 1684246001.5725896, "TEMPLATE_PATH/trainers/base.py": 1681714769.0076036, "TEMPLATE_PATH/msdatasets/ms_dataset.py": 1684317777.8670049, "TEMPLATE_PATH/msdatasets/context/dataset_context_config.py": 1684246001.5200734, "TEMPLATE_PATH/msdatasets/auth/auth_config.py": 1684121077.5437593, "TEMPLATE_PATH/msdatasets/meta/data_meta_config.py": 1681714768.9622037, "TEMPLATE_PATH/msdatasets/meta/data_meta_manager.py": 1684246001.5257208, "TEMPLATE_PATH/msdatasets/utils/oss_utils.py": 1678345974.7962904, "TEMPLATE_PATH/msdatasets/utils/maxcompute_utils.py": 1684246001.5273504, "TEMPLATE_PATH/msdatasets/utils/dataset_utils.py": 1681789723.9124653, "TEMPLATE_PATH/msdatasets/utils/delete_utils.py": 1669108798.6375175, "TEMPLATE_PATH/msdatasets/utils/upload_utils.py": 1678345974.7967587, "TEMPLATE_PATH/msdatasets/task_datasets/video_summarization_dataset.py": 1681714768.9676905, "TEMPLATE_PATH/msdatasets/task_datasets/sidd_image_denoising.py": 1681714768.966313, "TEMPLATE_PATH/msdatasets/task_datasets/torch_base_dataset.py": 1681714768.9668994, "TEMPLATE_PATH/msdatasets/task_datasets/reds_image_deblurring_dataset.py": 1681714768.9657562, "TEMPLATE_PATH/msdatasets/task_datasets/gopro_image_deblurring_dataset.py": 1681714768.9650407, "TEMPLATE_PATH/msdatasets/data_files/data_files_manager.py": 1681714541.6009839, "TEMPLATE_PATH/msdatasets/audio/asr_dataset.py": 1681714768.9251826, "TEMPLATE_PATH/msdatasets/download/download_config.py": 1678345974.7719202, "TEMPLATE_PATH/msdatasets/download/download_manager.py": 1678345974.772169, "TEMPLATE_PATH/msdatasets/download/dataset_builder.py": 1684246001.5244808, "TEMPLATE_PATH/msdatasets/dataset_cls/dataset.py": 1684246001.5233805, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_quality_assessment_degradation/image_quality_assessment_degradation_dataset.py": 1681714769.3278096, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_portrait_enhancement/data_utils.py": 1681714769.329037, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_portrait_enhancement/image_portrait_enhancement_dataset.py": 1681714769.3286672, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/veco_dataset.py": 1681714769.3091025, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_instance_segmentation_coco_dataset.py": 1681714769.3300066, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py": 1684246001.5225265, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/image_dataset.py": 1681714769.3208869, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/make_border_map.py": 1681714769.316306, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/normalize_image.py": 1681714769.3145473, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/make_icdar_data.py": 1681714769.3156052, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/make_seg_detection_data.py": 1681714769.3150744, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/data_process.py": 1681714769.3170214, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/augment_data.py": 1681714769.3176525, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/random_crop_data.py": 1681714769.3139958, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/data_loader.py": 1681714769.3214602, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/measures/quad_measurer.py": 1681714769.319075, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/measures/iou_evaluator.py": 1681714769.3196485, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/augmenter.py": 1681714769.3220074, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/bad_image_detecting/bad_image_detecting_dataset.py": 1681714769.3407733, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_summarization_dataset.py": 1681714768.9606102, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_inpainting/image_inpainting_dataset.py": 1681714769.3304625, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_inpainting/aug.py": 1681714769.33086, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/language_guided_video_summarization_dataset.py": 1681714769.3258283, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/builder.py": 1681714769.3403647, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py": 1681714769.324932, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/movie_scene_segmentation/sampler.py": 1681714769.32447, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/text_ranking_dataset.py": 1681714769.3096716, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/audio/kws_nearfield_dataset.py": 1681714769.3418102, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/audio/kws_farfield_dataset.py": 1681714769.3421595, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/audio/kws_nearfield_processor.py": 1681714769.3414555, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/audio/asr_dataset.py": 1681714768.928494, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/transforms.py": 1681714769.3102627, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/sidd_image_denoising_dataset.py": 1681714769.3109276, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/data_utils.py": 1681714769.3115368, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/reds_image_deblurring_dataset.py": 1681714768.9551075, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_frame_interpolation/data_utils.py": 1681714769.3079662, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_frame_interpolation/video_frame_interpolation_dataset.py": 1681714769.3073726, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_quality_assmessment_mos/image_quality_assessment_mos_dataset.py": 1681714769.3270853, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/mgeo_ranking_dataset.py": 1681714769.32538, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_stabilization/video_stabilization_dataset.py": 1681714769.3063674, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/gopro_image_deblurring_dataset.py": 1681714768.9389687, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/referring_video_object_segmentation/transformers.py": 1681714769.3127193, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py": 1681714769.3133628, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_colorization/image_colorization_dataset.py": 1681714768.9425967, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/build.py": 1681714769.3396943, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/datasets/coco.py": 1681714769.3387377, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/datasets/mosaic_wrapper.py": 1681714769.338384, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/transforms/build.py": 1681714769.3349338, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/transforms/transforms.py": 1681714769.334567, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/collate_batch.py": 1681714769.3393688, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/samplers/grouped_batch_sampler.py": 1681714769.3360593, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/samplers/distributed.py": 1681714769.336487, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/samplers/iteration_based_batch_sampler.py": 1681714769.3356428, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/evaluation/coco/coco_eval.py": 1681714769.3372462, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/easycv_base.py": 1681714769.3340495, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/torch_custom_dataset.py": 1681714768.9585028, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_super_resolution/video_super_resolution_dataset.py": 1681714769.305487, "TEMPLATE_PATH/msdatasets/data_loader/data_loader_manager.py": 1684121077.5480983, "TEMPLATE_PATH/msdatasets/data_loader/data_loader.py": 1684246001.5212934, "TEMPLATE_PATH/exporters/torch_model_exporter.py": 1678695526.18393, "TEMPLATE_PATH/exporters/builder.py": 1666757257.1189609, "TEMPLATE_PATH/exporters/audio/ans_dfsmn_exporter.py": 1684246001.4573822, "TEMPLATE_PATH/exporters/nlp/csanmt_for_translation_exporter.py": 1681714768.8512428, "TEMPLATE_PATH/exporters/nlp/model_for_token_classification_exporter.py": 1683889954.460512, "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py": 1678345974.1415546, "TEMPLATE_PATH/exporters/nlp/sbert_for_zero_shot_classification_exporter.py": 1678345974.1418796, "TEMPLATE_PATH/exporters/cv/object_detection_damoyolo_exporter.py": 1678695526.1809118, "TEMPLATE_PATH/exporters/cv/face_detection_scrfd_exporter.py": 1678695526.1807334, "TEMPLATE_PATH/exporters/cv/cartoon_translation_exporter.py": 1678695526.1803331, "TEMPLATE_PATH/exporters/tf_model_exporter.py": 1678695526.1826663, "TEMPLATE_PATH/exporters/base.py": 1678345974.1376836}, "modelscope_path": "TEMPLATE_PATH"}
\ No newline at end of file
diff --git a/modelscope/utils/ast_utils.py b/modelscope/utils/ast_utils.py
index 374ada20..5cee374d 100644
--- a/modelscope/utils/ast_utils.py
+++ b/modelscope/utils/ast_utils.py
@@ -1,7 +1,6 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import ast
-import contextlib
import hashlib
import os
import os.path as osp
@@ -9,12 +8,11 @@ import time
import traceback
from functools import reduce
from pathlib import Path
-from typing import Generator, Union
+from typing import Union
import gast
import json
-from modelscope import __version__
from modelscope.fileio.file import LocalStorage
from modelscope.metainfo import (CustomDatasets, Heads, Hooks, LR_Schedulers,
Metrics, Models, Optimizers, Pipelines,
@@ -574,6 +572,7 @@ file_scanner = FilesAstScanning()
def _save_index(index, file_path, file_list=None, with_template=False):
# convert tuple key to str key
index[INDEX_KEY] = {str(k): v for k, v in index[INDEX_KEY].items()}
+ from modelscope.version import __version__
index[VERSION_KEY] = __version__
index[MD5_KEY], index[FILES_MTIME_KEY] = file_scanner.files_mtime_md5(
file_list=file_list)
@@ -682,6 +681,7 @@ def load_index(
if not force_rebuild and os.path.exists(file_path):
wrapped_index = _load_index(file_path)
md5, files_mtime = file_scanner.files_mtime_md5(file_list=file_list)
+ from modelscope.version import __version__
if (wrapped_index[VERSION_KEY] == __version__):
index = wrapped_index
if (wrapped_index[MD5_KEY] != md5):
diff --git a/modelscope/utils/audio/audio_utils.py b/modelscope/utils/audio/audio_utils.py
index 3336af06..562769b8 100644
--- a/modelscope/utils/audio/audio_utils.py
+++ b/modelscope/utils/audio/audio_utils.py
@@ -11,6 +11,7 @@ from urllib.parse import urlparse
import numpy as np
from modelscope.fileio.file import HTTPStorage
+from modelscope.hub.utils.utils import get_cache_dir
from modelscope.utils.hub import snapshot_download
from modelscope.utils.logger import get_logger
@@ -323,34 +324,31 @@ def generate_sd_scp_from_url(urls: Union[tuple, list]):
def update_local_model(model_config, model_path, extra_args):
+ if 'update_model' in extra_args and not extra_args['update_model']:
+ return
+ model_revision = None
if 'update_model' in extra_args:
if extra_args['update_model'] == 'latest':
model_revision = None
else:
model_revision = extra_args['update_model']
- if model_config.__contains__('model'):
- model_name = model_config['model']
- if isinstance(model_path, str) and os.path.exists(model_path):
- try:
- logger.info(
- 'Download the model to local path {0} ...'.format(
- model_path))
- src_path = snapshot_download(
- model_name, revision=model_revision)
- # cp to model_path
- if src_path == model_path:
- logger.warning('src_path is the same with model_path')
- return
- for filename in os.listdir(src_path):
- src_file = os.path.join(src_path, filename)
- dst_file = os.path.join(model_path, filename)
- if os.path.isfile(src_file):
- shutil.copy2(src_file, model_path)
- elif os.path.isdir(src_file):
- if os.path.exists(dst_file):
- shutil.rmtree(dst_file)
- shutil.copytree(src_file, dst_file)
- except Exception as e:
- logger.warning(str(e))
- else:
- logger.warning('Can not find model name in configuration')
+ if model_config.__contains__('model'):
+ model_name = model_config['model']
+ dst_dir_root = get_cache_dir()
+ if isinstance(model_path, str) and os.path.exists(
+ model_path) and not model_path.startswith(dst_dir_root):
+ try:
+ dst = os.path.join(dst_dir_root, '.cache/' + model_name)
+ dst_dir = os.path.dirname(dst)
+ os.makedirs(dst_dir, exist_ok=True)
+ if not os.path.exists(dst):
+ os.symlink(os.path.abspath(model_path), dst)
+
+ snapshot_download(
+ model_name,
+ cache_dir=dst_dir_root,
+ revision=model_revision)
+ except Exception as e:
+ logger.warning(str(e))
+ else:
+ logger.warning('Can not find model name in configuration')
diff --git a/modelscope/utils/checkpoint.py b/modelscope/utils/checkpoint.py
index 64681db4..bbde6034 100644
--- a/modelscope/utils/checkpoint.py
+++ b/modelscope/utils/checkpoint.py
@@ -5,7 +5,6 @@ import os
import re
import time
from collections import OrderedDict
-from functools import partial
from shutil import copytree, ignore_patterns, rmtree
from typing import Callable, Dict, Optional, Union
@@ -15,7 +14,6 @@ from torch import nn
from torch.optim import Optimizer
from torch.optim.lr_scheduler import _LRScheduler
-from modelscope import __version__
from modelscope.fileio import File, LocalStorage
from modelscope.utils.config import Config, JSONIteratorEncoder
from modelscope.utils.constant import ConfigFields, ModelFile
@@ -76,6 +74,7 @@ def save_checkpoint(model: torch.nn.Module,
elif not isinstance(meta, dict):
raise TypeError(
f'meta must be a dict or None, but got {type(meta)}')
+ from modelscope import __version__
meta.update(modelscope=__version__, time=time.asctime())
if isinstance(model, torch.nn.parallel.DistributedDataParallel):
diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py
index 2382825a..1f44fc01 100644
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -247,6 +247,7 @@ class MultiModalTasks(object):
video_temporal_grounding = 'video-temporal-grounding'
text_to_video_synthesis = 'text-to-video-synthesis'
efficient_diffusion_tuning = 'efficient-diffusion-tuning'
+ multimodal_dialogue = 'multimodal-dialogue'
class ScienceTasks(object):
@@ -277,6 +278,7 @@ class Tasks(CVTasks, NLPTasks, AudioTasks, MultiModalTasks, ScienceTasks):
This should be used to register models, pipelines, trainers.
"""
reverse_field_index = {}
+ task_template = 'task-template'
@staticmethod
def find_field_by_task(task_name):
@@ -327,6 +329,7 @@ class Hubs(enum.Enum):
"""
modelscope = 'modelscope'
huggingface = 'huggingface'
+ virgo = 'virgo'
class DownloadMode(enum.Enum):
@@ -539,3 +542,37 @@ class DistributedParallelType(object):
class DatasetTensorflowConfig:
BATCH_SIZE = 'batch_size'
DEFAULT_BATCH_SIZE_VALUE = 5
+
+
+class VirgoDatasetConfig:
+
+ default_virgo_namespace = 'default_namespace'
+
+ default_dataset_version = '1'
+
+ env_virgo_endpoint = 'VIRGO_ENDPOINT'
+
+ # Columns for meta request
+ meta_content = 'metaContent'
+ sampling_type = 'samplingType'
+
+ # Columns for meta content
+ col_id = 'id'
+ col_meta_info = 'meta_info'
+ col_analysis_result = 'analysis_result'
+ col_external_info = 'external_info'
+ col_cache_file = 'cache_file'
+
+
+DEFAULT_MAXCOMPUTE_ENDPOINT = 'http://service-corp.odps.aliyun-inc.com/api'
+
+
+class MaxComputeEnvs:
+
+ ACCESS_ID = 'ODPS_ACCESS_ID'
+
+ ACCESS_SECRET_KEY = 'ODPS_ACCESS_SECRET_KEY'
+
+ PROJECT_NAME = 'ODPS_PROJECT_NAME'
+
+ ENDPOINT = 'ODPS_ENDPOINT'
diff --git a/modelscope/utils/demo_utils.py b/modelscope/utils/demo_utils.py
deleted file mode 100644
index 99e61d45..00000000
--- a/modelscope/utils/demo_utils.py
+++ /dev/null
@@ -1,275 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-
-import io
-
-import json
-
-from modelscope.outputs import OutputKeys
-from modelscope.pipelines import pipeline
-from modelscope.utils.constant import Tasks, TasksIODescriptions
-from modelscope.utils.service_utils import NumpyEncoder
-
-TASKS_INPUT_TEMPLATES = {
- # vision tasks
- Tasks.image_portrait_stylization: TasksIODescriptions.image_to_image,
- Tasks.portrait_matting: TasksIODescriptions.image_to_image,
- Tasks.skin_retouching: TasksIODescriptions.image_to_image,
- Tasks.image_captioning: TasksIODescriptions.image_to_text,
- Tasks.image_denoising: TasksIODescriptions.image_to_image,
- Tasks.image_portrait_enhancement: TasksIODescriptions.image_to_image,
- Tasks.image_super_resolution: TasksIODescriptions.image_to_image,
- Tasks.image_colorization: TasksIODescriptions.image_to_image,
- Tasks.image_color_enhancement: TasksIODescriptions.image_to_image,
- Tasks.face_image_generation: TasksIODescriptions.seed_to_image,
- Tasks.image_style_transfer: TasksIODescriptions.images_to_image,
- Tasks.image_segmentation: TasksIODescriptions.image_to_text,
- Tasks.image_object_detection: TasksIODescriptions.image_to_text,
-
- # not tested
- Tasks.image_classification: TasksIODescriptions.image_to_text,
- Tasks.ocr_detection: TasksIODescriptions.image_to_text,
- Tasks.ocr_recognition: TasksIODescriptions.image_to_text,
- Tasks.body_2d_keypoints: TasksIODescriptions.image_to_text,
- Tasks.vision_efficient_tuning: TasksIODescriptions.image_to_text,
-
- # nlp tasks
- Tasks.text_classification: TasksIODescriptions.text_to_text,
- Tasks.text_generation: TasksIODescriptions.text_to_text,
- Tasks.word_segmentation: TasksIODescriptions.text_to_text,
- Tasks.text_error_correction: TasksIODescriptions.text_to_text,
- Tasks.named_entity_recognition: TasksIODescriptions.text_to_text,
- Tasks.sentiment_classification: TasksIODescriptions.text_to_text,
-
- # audio tasks
- Tasks.text_to_speech: TasksIODescriptions.text_to_speech,
- Tasks.auto_speech_recognition: TasksIODescriptions.speech_to_text,
- Tasks.keyword_spotting: TasksIODescriptions.speech_to_text,
- Tasks.acoustic_noise_suppression: TasksIODescriptions.speech_to_speech,
- Tasks.acoustic_echo_cancellation: TasksIODescriptions.speeches_to_speech,
-
- # multi-modal
- Tasks.visual_grounding: TasksIODescriptions.visual_grounding,
- Tasks.visual_question_answering:
- TasksIODescriptions.visual_question_answering,
- Tasks.visual_entailment: TasksIODescriptions.visual_entailment,
- Tasks.generative_multi_modal_embedding:
- TasksIODescriptions.generative_multi_modal_embedding,
-
- # new tasks
- Tasks.virtual_try_on: TasksIODescriptions.images_to_image,
-
- # TODO(lingcai.wl): support more tasks and implement corresponding example
-}
-
-INPUT_EXAMPLES = {
- # Must align with task schema defined in the Widget section of model card=
- # cv
- TasksIODescriptions.image_to_image: {
- 'inputs': [
- 'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/image_cartoon.png'
- ],
- 'urlPaths': {
- 'outUrls': [{
- 'outputKey': OutputKeys.OUTPUT_IMG,
- 'fileType': 'png'
- }]
- }
- },
- TasksIODescriptions.images_to_image: {
- 'inputs': [
- 'https://modelscope.oss-cn-beijing.aliyuncs.com/demo/image-style-transfer/style_transfer_content.jpg',
- 'https://modelscope.oss-cn-beijing.aliyuncs.com/demo/image-style-transfer/style_transfer_style.jpg'
- ],
- 'urlPaths': {
- 'outUrls': [{
- 'outputKey': OutputKeys.OUTPUT_IMG,
- 'fileType': 'png'
- }]
- }
- },
- TasksIODescriptions.image_to_text: {
- 'inputs': [
- 'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/image_cartoon.png'
- ],
- 'urlPaths': {}
- },
- # nlp
- TasksIODescriptions.text_to_text: {
- 'inputs': ['test'],
- 'urlPaths': {}
- },
-
- # audio
- TasksIODescriptions.speech_to_text: {
- 'inputs': [
- 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example.wav'
- ],
- 'urlPaths': {}
- },
- TasksIODescriptions.text_to_speech: {
- 'inputs': ['北京今天天气怎么样'],
- 'urlPaths': {
- 'outUrls': [{
- 'outputKey': OutputKeys.OUTPUT_PCM,
- 'fileType': 'pcm'
- }]
- }
- },
- TasksIODescriptions.speeches_to_speech: {
- 'inputs': [
- 'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/nearend_mic.wav',
- 'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/nearend_speech.wav'
- ],
- 'urlPaths': {
- 'outUrls': [{
- 'outputKey': OutputKeys.OUTPUT_PCM,
- 'fileType': 'pcm'
- }]
- }
- },
- TasksIODescriptions.speech_to_speech: {
- 'inputs': [
- 'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/speech_with_noise.wav'
- ],
- 'urlPaths': {
- 'outUrls': [{
- 'outputKey': OutputKeys.OUTPUT_PCM,
- 'fileType': 'pcm'
- }]
- }
- },
-
- # multi modal
- TasksIODescriptions.visual_grounding: {
- 'task':
- Tasks.visual_grounding,
- 'inputs': [
- 'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-grounding/visual_grounding.png',
- 'a blue turtle-like pokemon with round head'
- ],
- 'urlPaths': {
- 'inUrls': [{
- 'name': 'image'
- }, {
- 'name': 'text'
- }]
- }
- },
- TasksIODescriptions.visual_question_answering: {
- 'task':
- Tasks.visual_question_answering,
- 'inputs': [
- 'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/visual_question_answering.png',
- 'what is grown on the plant?'
- ],
- 'urlPaths': {
- 'inUrls': [{
- 'name': 'image'
- }, {
- 'name': 'text'
- }],
- 'outUrls': [{
- 'outputKey': 'text'
- }]
- }
- },
- TasksIODescriptions.visual_entailment: {
- 'task':
- Tasks.visual_entailment,
- 'inputs': [
- 'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-entailment/visual_entailment.jpg',
- 'there are two birds.', 'test'
- ],
- 'urlPaths': {
- 'inUrls': [{
- 'name': 'image'
- }, {
- 'name': 'text'
- }],
- 'outUrls': [{}]
- }
- },
- TasksIODescriptions.generative_multi_modal_embedding: {
- 'task':
- Tasks.generative_multi_modal_embedding,
- 'inputs': [
- 'http://clip-multimodal.oss-cn-beijing.aliyuncs.com/lingchen/demo/dogs.jpg',
- 'dogs playing in the grass'
- ],
- 'urlPaths': {
- 'inUrls': [{
- 'name': 'image'
- }, {
- 'name': 'text'
- }],
- 'outUrls': [{}]
- }
- },
-}
-
-
-class DemoCompatibilityCheck(object):
-
- def compatibility_check(self):
- if self.task not in TASKS_INPUT_TEMPLATES:
- print('task is not supported in demo service so far')
- return False
- if TASKS_INPUT_TEMPLATES[self.task] not in INPUT_EXAMPLES:
- print('no example input for this task')
- return False
-
- print('testing demo: ', self.task, self.model_id)
- test_pipline = pipeline(self.task, self.model_id)
- req = INPUT_EXAMPLES[TASKS_INPUT_TEMPLATES[self.task]]
- inputs = preprocess(req)
- params = req.get('parameters', {})
- # modelscope inference
- if params != {}:
- output = test_pipline(inputs, **params)
- else:
- output = test_pipline(inputs)
- json.dumps(output, cls=NumpyEncoder)
- result = postprocess(req, output)
- print(result)
- return True
-
-
-def preprocess(req):
- in_urls = req.get('urlPaths').get('inUrls')
- if len(req['inputs']) == 1:
- inputs = req['inputs'][0]
- else:
- inputs = tuple(req['inputs'])
- if in_urls is None or len(in_urls) == 0:
- return inputs
-
- inputs_dict = {}
- for i, in_url in enumerate(in_urls):
- input_name = in_url.get('name')
- if input_name is None or input_name == '':
- return inputs
- inputs_dict[input_name] = req['inputs'][i]
- return inputs_dict
-
-
-def postprocess(req, resp):
- out_urls = req.get('urlPaths').get('outUrls')
- if out_urls is None or len(out_urls) == 0:
- return resp
- new_resp = resp
- if isinstance(resp, str):
- new_resp = json.loads(resp)
- for out_url in out_urls:
- output_key = out_url['outputKey']
- file_type = out_url['fileType']
- new_resp.get(output_key)
- if file_type == 'png' or file_type == 'jpg':
- content = new_resp.get(output_key)
- import cv2
- _, img_encode = cv2.imencode('.' + file_type, content)
- img_bytes = img_encode.tobytes()
- return type(img_bytes)
- else:
- out_mem_file = io.BytesIO()
- out_mem_file.write(new_resp.get(output_key))
- return type(out_mem_file)
diff --git a/modelscope/utils/input_output.py b/modelscope/utils/input_output.py
new file mode 100644
index 00000000..b2c9cd5b
--- /dev/null
+++ b/modelscope/utils/input_output.py
@@ -0,0 +1,756 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import ast
+import base64
+import importlib
+import inspect
+from io import BytesIO
+from typing import Any
+from urllib.parse import urlparse
+
+import numpy as np
+
+from modelscope.hub.api import HubApi
+from modelscope.hub.errors import NotExistError
+from modelscope.hub.file_download import model_file_download
+from modelscope.outputs.outputs import (TASK_OUTPUTS, OutputKeys, OutputTypes,
+ OutputTypeSchema)
+from modelscope.pipeline_inputs import (INPUT_TYPE, INPUT_TYPE_SCHEMA,
+ TASK_INPUTS, InputType)
+from modelscope.pipelines import pipeline
+from modelscope.pipelines.base import Pipeline
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+"""Support webservice integration pipeline。
+
+This module provides a support library when webservice uses pipeline,
+converts webservice input into pipeline input, and converts pipeline
+output into webservice output, which automatically encodes and
+decodes relevant fields.
+
+Example:
+ # create pipeine instance and pipeline information, save it to app
+ pipeline_instance = create_pipeline('damo/cv_gpen_image-portrait-enhancement', 'v1.0.0')
+ pipeline_info = get_pipeline_information_by_pipeline(pipeline_instance)
+ app.state.pipeline = pipeline_instance
+ app.state.pipeline_info = pipeline_info
+
+ # for service schema request.
+ pipeline_info = request.app.state.pipeline_info
+ return pipeline_info.schema
+
+ # for service call request.
+ def inference(request: Request):
+ pipeline_service = request.app.state.pipeline
+ pipeline_info = request.app.state.pipeline_info
+ request_json = await request.json()
+ result = call_pipeline_with_json(pipeline_info,
+ pipeline_service,
+ request_json)
+ # convert output to json, if binary field, we need encoded.
+ output = pipeline_output_to_service_base64_output(pipeline_info.task_name, result)
+ return output
+Todo:
+ * Support more service input type, such as form.
+
+"""
+
+
+def create_pipeline(model_id: str, revision: str):
+ model_configuration_file = model_file_download(
+ model_id=model_id,
+ file_path=ModelFile.CONFIGURATION,
+ revision=revision)
+ cfg = Config.from_file(model_configuration_file)
+ return pipeline(task=cfg.task, model=model_id, model_revision=revision)
+
+
+def get_class_user_attributes(cls):
+ attributes = inspect.getmembers(cls, lambda a: not (inspect.isroutine(a)))
+ user_attributes = [
+ a for a in attributes
+ if (not (a[0].startswith('__') and a[0].endswith('__')))
+ ]
+ return user_attributes
+
+
+def get_input_type(task_inputs: Any):
+ """Get task input schema.
+
+ Args:
+ task_name (str): The task name.
+ """
+ if isinstance(task_inputs, str): # no input key
+ input_type = INPUT_TYPE[task_inputs]
+ return input_type
+ elif isinstance(task_inputs, tuple) or isinstance(task_inputs, list):
+ for item in task_inputs:
+ if isinstance(item,
+ dict): # for list, server only support dict format.
+ return get_input_type(item)
+ else:
+ continue
+ elif isinstance(task_inputs, dict):
+ input_info = {} # key input key, value input type
+ for k, v in task_inputs.items():
+ input_info[k] = get_input_type(v)
+ return input_info
+ else:
+ raise ValueError(f'invalid input_type definition {task_inputs}')
+
+
+def get_input_schema(task_name: str, input_type: type):
+ """Get task input schema.
+
+ Args:
+ task_name (str): The task name.
+ input_type (type): The input type
+ """
+ if input_type is None:
+ task_inputs = TASK_INPUTS[task_name]
+ if isinstance(task_inputs,
+ str): # only one input field, key is task_inputs
+ return {
+ 'type': 'object',
+ 'properties': {
+ task_inputs: INPUT_TYPE_SCHEMA[task_inputs]
+ }
+ }
+ else:
+ task_inputs = input_type
+
+ if isinstance(task_inputs, str): # no input key
+ return INPUT_TYPE_SCHEMA[task_inputs]
+ elif input_type is None and isinstance(task_inputs, list):
+ for item in task_inputs:
+ # for list, server only support dict format.
+ if isinstance(item, dict):
+ return get_input_schema(None, item)
+ elif isinstance(task_inputs, tuple) or isinstance(task_inputs, list):
+ input_schema = {'type': 'array', 'items': {}}
+ for item in task_inputs:
+ if isinstance(item, dict):
+ item_schema = get_input_schema(None, item)
+ input_schema['items']['type'] = item_schema
+ return input_schema
+ else:
+ input_schema['items'] = INPUT_TYPE_SCHEMA[item]
+ return input_schema
+
+ elif isinstance(task_inputs, dict):
+ input_schema = {
+ 'type': 'object',
+ 'properties': {}
+ } # key input key, value input type
+ for k, v in task_inputs.items():
+ input_schema['properties'][k] = get_input_schema(None, v)
+ return input_schema
+ else:
+ raise ValueError(f'invalid input_type definition {task_inputs}')
+
+
+def get_output_schema(task_name: str):
+ """Get task output schema.
+
+ Args:
+ task_name (str): The task name.
+ """
+ task_outputs = TASK_OUTPUTS[task_name]
+ output_schema = {'type': 'object', 'properties': {}}
+ if not isinstance(task_outputs, list):
+ raise ValueError('TASK_OUTPUTS for %s is not list.' % task_name)
+ else:
+ for output_key in task_outputs:
+ output_schema['properties'][output_key] = OutputTypeSchema[
+ output_key]
+ return output_schema
+
+
+def get_input_info(task_name: str):
+ task_inputs = TASK_INPUTS[task_name]
+ if isinstance(task_inputs, str): # no input key default input key input
+ input_type = INPUT_TYPE[task_inputs]
+ return input_type
+ elif isinstance(task_inputs, tuple):
+ return task_inputs
+ elif isinstance(task_inputs, list):
+ for item in task_inputs:
+ if isinstance(item,
+ dict): # for list, server only support dict format.
+ return {'input': get_input_type(item)}
+ else:
+ continue
+ elif isinstance(task_inputs, dict):
+ input_info = {} # key input key, value input type
+ for k, v in task_inputs.items():
+ input_info[k] = get_input_type(v)
+ return {'input': input_info}
+ else:
+ raise ValueError(f'invalid input_type definition {task_inputs}')
+
+
+def get_output_info(task_name: str):
+ output_keys = TASK_OUTPUTS[task_name]
+ output_type = {}
+ if not isinstance(output_keys, list):
+ raise ValueError('TASK_OUTPUTS for %s is not list.' % task_name)
+ else:
+ for output_key in output_keys:
+ output_type[output_key] = OutputTypes[output_key]
+ return output_type
+
+
+def get_task_io_info(task_name: str):
+ """Get task input output schema.
+
+ Args:
+ task_name (str): The task name.
+ """
+ tasks = get_class_user_attributes(Tasks)
+ task_exist = False
+ for key, value in tasks:
+ if key == task_name or value == task_name:
+ task_exist = True
+ break
+ if not task_exist:
+ return None, None
+
+ task_inputs = get_input_info(task_name)
+ task_outputs = get_output_info(task_name)
+
+ return task_inputs, task_outputs
+
+
+def process_arg_type_annotation(arg, default_value):
+ if arg.annotation is not None:
+ if isinstance(arg.annotation, ast.Subscript):
+ return arg.arg, arg.annotation.value.id
+ elif isinstance(arg.annotation, ast.Name):
+ return arg.arg, arg.annotation.id
+ elif isinstance(arg.annotation, ast.Attribute):
+ return arg.arg, arg.annotation.attr
+ else:
+ raise Exception('Invalid annotation: %s' % arg.annotation)
+ else:
+ if default_value is not None:
+ return arg.arg, type(default_value).__name__
+ # Irregular, assuming no type hint no default value type is object
+ logger.warning('arg: %s has no data type annotation, use default!' %
+ (arg.arg))
+ return arg.arg, 'object'
+
+
+def process_args(args):
+ arguments = []
+ # name, type, has_default, default
+ n_args = len(args.args)
+ n_args_default = len(args.defaults)
+ # no default
+ for arg in args.args[0:n_args - n_args_default]:
+ if arg.arg == 'self':
+ continue
+ else:
+ arg_name, arg_type = process_arg_type_annotation(arg, None)
+ arguments.append((arg_name, arg_type, False, None))
+
+ # process defaults arg.
+ for arg, dft in zip(args.args[n_args - n_args_default:], args.defaults):
+ # compatible with python3.7 ast.Num no value.
+ value = dft.value if hasattr(dft, 'value') else dft.n
+ arg_name, arg_type = process_arg_type_annotation(arg, value)
+ arguments.append((arg_name, arg_type, True, value))
+
+ # kwargs
+ n_kwargs = len(args.kwonlyargs)
+ n_kwargs_default = len(args.kw_defaults)
+ for kwarg in args.kwonlyargs[0:n_kwargs - n_kwargs_default]:
+ arg_name, arg_type = process_arg_type_annotation(kwarg)
+ arguments.append((arg_name, arg_type, False, None))
+
+ for kwarg, dft in zip(args.kwonlyargs[n_kwargs - n_kwargs_default:],
+ args.kw_defaults):
+ arg_name, arg_type = process_arg_type_annotation(kwarg)
+ arguments.append((arg_name, arg_type, True, dft.value))
+ return arguments
+
+
+class PipelineClassAnalyzer(ast.NodeVisitor):
+ """Analysis pipeline class define get inputs and parameters.
+ """
+
+ def __init__(self) -> None:
+ super().__init__()
+ self.parameters = []
+ self.has_call = False
+ self.preprocess_parameters = []
+ self.has_preprocess = False
+ self.has_postprocess = False
+ self.has_forward = False
+ self.forward_parameters = []
+ self.postprocess_parameters = []
+ self.lineno = 0
+ self.end_lineno = 0
+
+ def visit_FunctionDef(self, node: ast.FunctionDef) -> Any:
+ if node.name == '__call__':
+ self.parameters = process_args(node.args)
+ self.has_call = True
+ if node.name == 'preprocess':
+ self.preprocess_parameters = process_args(node.args)
+ self.has_preprocess = True
+ elif node.name == 'postprocess':
+ self.postprocess_parameters = process_args(node.args)
+ self.has_postprocess = True
+ elif node.name == 'forward':
+ self.forward_parameters = process_args(node.args)
+ self.has_forward = True
+
+ def get_input_parameters(self):
+ if self.has_call:
+ # custom define __call__ inputs and parameter are control by the
+ # custom __call__, all parameter is input.
+ return self.parameters, None
+ parameters = []
+ if self.has_preprocess:
+ parameters.extend(self.preprocess_parameters[1:])
+ if self.has_forward:
+ parameters.extend(self.forward_parameters[1:])
+ if self.has_postprocess:
+ parameters.extend(self.postprocess_parameters[1:])
+
+ if len(parameters) > 0:
+ return None, parameters
+ else:
+ return None, []
+
+
+class AnalysisSourceFileRegisterModules(ast.NodeVisitor):
+ """Get register_module call of the python source file.
+
+
+ Args:
+ ast (NodeVisitor): The ast node.
+
+ Examples:
+ >>> with open(source_file_path, "rb") as f:
+ >>> src = f.read()
+ >>> analyzer = AnalysisSourceFileRegisterModules(source_file_path)
+ >>> analyzer.visit(ast.parse(src, filename=source_file_path))
+ """
+
+ def __init__(self, source_file_path, class_name) -> None:
+ super().__init__()
+ self.source_file_path = source_file_path
+ self.class_name = class_name
+ self.class_define = None
+
+ def visit_ClassDef(self, node: ast.ClassDef):
+ if node.name == self.class_name:
+ self.class_define = node
+
+
+def get_pipeline_input_parameters(
+ source_file_path: str,
+ class_name: str,
+):
+ """Get pipeline input and parameter
+
+ Args:
+ source_file_path (str): The pipeline source code path
+ class_name (str): The pipeline class name
+ """
+ with open(source_file_path, 'rb') as f:
+ src = f.read()
+ analyzer = AnalysisSourceFileRegisterModules(source_file_path,
+ class_name)
+ analyzer.visit(
+ ast.parse(
+ src,
+ filename=source_file_path,
+ # python3.7 no type_comments parameter .
+ # type_comments=True
+ ))
+ clz = PipelineClassAnalyzer()
+ clz.visit(analyzer.class_define)
+ input, pipeline_parameters = clz.get_input_parameters()
+ # remove the first input parameter, the input is defined by task.
+ return input, pipeline_parameters
+
+
+meta_type_schema_map = {
+ # For parameters, current only support types.
+ 'str': 'string',
+ 'int': 'integer',
+ 'float': 'number',
+ 'bool': 'boolean',
+ 'Dict': 'object',
+ 'dict': 'object',
+ 'list': 'array',
+ 'List': 'array',
+ 'Union': 'object',
+ 'Input': 'object',
+ 'object': 'object',
+}
+
+
+def generate_pipeline_parameters_schema(parameters):
+ parameters_schema = {'type': 'object', 'properties': {}}
+ if len(parameters) == 0:
+ return {}
+ for param in parameters:
+ name, param_type, has_default, default_value = param
+ # 'max_length': ('int', True, 1024)
+ prop = {'type': meta_type_schema_map[param_type]}
+ if has_default:
+ prop['default'] = default_value
+ parameters_schema['properties'][name] = prop
+ return parameters_schema
+
+
+def get_pipeline_information_by_pipeline(pipeline: Pipeline, ):
+ """Get pipeline input output schema.
+
+ Args:
+ pipeline (Pipeline): The pipeline object.
+ """
+ task_name = pipeline.group_key
+ pipeline_class = pipeline.__class__.__name__
+ spec = importlib.util.find_spec(pipeline.__module__)
+ pipeline_file_path = spec.origin
+ info = PipelineInfomation(task_name, pipeline_class, pipeline_file_path)
+ return info
+
+
+class PipelineInfomation():
+ """Analyze pipeline information, task_name, schema.
+ """
+
+ def __init__(self, task_name: str, class_name, source_path):
+ self._task_name = task_name
+ self._class_name = class_name
+ self._source_path = source_path
+ self._is_custom_call_method = False
+ self._analyze()
+
+ def _analyze(self):
+ input, parameters = get_pipeline_input_parameters(
+ self._source_path, self._class_name)
+ if input is not None: # custom pipeline __call__ asr_inferrnce_pipeline
+ self._is_custom_call_method = True
+ self._input_schema = generate_pipeline_parameters_schema(input)
+ self._input_schema[
+ 'description'] = 'For binary input such as image audio video, only url is supported.'
+ self._parameters_schema = {}
+ self._output_schema = {
+ 'type': 'object',
+ }
+ if self._task_name in TASK_OUTPUTS:
+ self._output_schema = get_output_schema(self._task_name)
+ else:
+ # use base pipeline __call__
+ if self._task_name in TASK_INPUTS and self._task_name in TASK_OUTPUTS:
+ # delete the first default input which is defined by task.
+ self._parameters_schema = generate_pipeline_parameters_schema(
+ parameters)
+ self._input_schema = get_input_schema(self._task_name, None)
+ self._output_schema = get_output_schema(self._task_name)
+ else:
+ logger.warning(
+ 'Task: %s input is defined: %s, output is defined: %s which is not completed'
+ % (self._task_name, self._task_name
+ in TASK_INPUTS, self._task_name in TASK_OUTPUTS))
+ self._input_schema = None
+ self._output_schema = None
+ if self._task_name in TASK_INPUTS:
+ self._input_schema = get_input_schema(
+ self._task_name, None)
+ if self._task_name in TASK_OUTPUTS:
+ self._output_schema = get_output_schema(self._task_name)
+ self._parameters_schema = generate_pipeline_parameters_schema(
+ parameters)
+
+ @property
+ def task_name(self):
+ return self._task_name
+
+ @property
+ def is_custom_call(self):
+ return self._is_custom_call_method
+
+ @property
+ def input_schema(self):
+ return self._input_schema
+
+ @property
+ def output_schema(self):
+ return self._output_schema
+
+ @property
+ def parameters_schema(self):
+ return self._parameters_schema
+
+ @property
+ def schema(self):
+ return {
+ 'input': self._input_schema if self._input_schema else
+ self._parameters_schema, # all parameter is input
+ 'parameters':
+ self._parameters_schema if self._input_schema else {},
+ 'output': self._output_schema if self._output_schema else {
+ 'type': 'object',
+ },
+ }
+
+
+def is_url(url: str):
+ """Check the input url is valid url.
+
+ Args:
+ url (str): The url
+
+ Returns:
+ bool: If is url return True, otherwise False.
+ """
+ url_parsed = urlparse(url)
+ if url_parsed.scheme in ('http', 'https', 'oss'):
+ return True
+ else:
+ return False
+
+
+def decode_base64_to_image(content):
+ if content.startswith('http') or content.startswith('oss'):
+ return content
+
+ from PIL import Image
+ image_file_content = base64.b64decode(content)
+ return Image.open(BytesIO(image_file_content))
+
+
+def decode_base64_to_audio(content):
+ if content.startswith('http') or content.startswith('oss'):
+ return content
+
+ file_content = base64.b64decode(content)
+ return file_content
+
+
+def decode_base64_to_video(content):
+ if content.startswith('http') or content.startswith('oss'):
+ return content
+
+ file_content = base64.b64decode(content)
+ return file_content
+
+
+def return_origin(content):
+ return content
+
+
+def decode_box(content):
+ pass
+
+
+def service_multipart_input_to_pipeline_input(body):
+ """Convert multipart data to pipeline input.
+
+ Args:
+ body (dict): The multipart data body
+ """
+ pass
+
+
+def pipeline_output_to_service_multipart_output(output):
+ """Convert multipart data to service multipart output.
+
+ Args:
+ output (dict): Multipart body.
+ """
+ pass
+
+
+base64_decoder_map = {
+ InputType.IMAGE: decode_base64_to_image,
+ InputType.TEXT: return_origin,
+ InputType.AUDIO: decode_base64_to_audio,
+ InputType.VIDEO: decode_base64_to_video,
+ InputType.BOX: decode_box,
+ InputType.DICT: return_origin,
+ InputType.LIST: return_origin,
+ InputType.NUMBER: return_origin,
+}
+
+
+def call_pipeline_with_json(pipeline_info: PipelineInfomation,
+ pipeline: Pipeline, body: str):
+ """Call pipeline with json input.
+
+ Args:
+ pipeline_info (PipelineInfomation): The pipeline information object.
+ pipeline (Pipeline): The pipeline object.
+ body (Dict): The input object, include input and parameters
+ """
+ if pipeline_info.is_custom_call:
+ pipeline_inputs = body['input']
+ result = pipeline(**pipeline_inputs)
+ else:
+ pipeline_inputs, parameters = service_base64_input_to_pipeline_input(
+ pipeline_info.task_name, body)
+ result = pipeline(pipeline_inputs, **parameters)
+
+ return result
+
+
+def service_base64_input_to_pipeline_input(task_name, body):
+ """Convert service base64 input to pipeline input and parameters
+
+ Args:
+ task_name (str): The task name.
+ body (Dict): The input object, include input and parameters
+ """
+ if 'input' not in body:
+ raise ValueError('No input data!')
+ service_input = body['input']
+ if 'parameters' in body:
+ parameters = body['parameters']
+ else:
+ parameters = {}
+ pipeline_input = {}
+
+ task_input_info = TASK_INPUTS[task_name]
+ if isinstance(task_input_info, str): # no input key default
+ return base64_decoder_map[task_input_info](list(
+ service_input.values())[0]), parameters
+ elif isinstance(task_input_info, tuple):
+ pipeline_input = tuple(service_input)
+ return pipeline_input, parameters
+ elif isinstance(task_input_info, dict):
+ for key, value in service_input.items(
+ ): # task input has no nesting field.
+ # get input filed type
+ input_type = task_input_info[key]
+ # TODO recursion for list, dict if need.
+ if not isinstance(input_type, str):
+ pipeline_input[key] = value
+ continue
+ if input_type not in INPUT_TYPE:
+ raise ValueError('Invalid input field: %s' % input_type)
+ pipeline_input[key] = base64_decoder_map[input_type](value)
+ return pipeline_input, parameters
+ elif isinstance(task_input_info,
+ list): # one of input format, we use dict.
+ for item in task_input_info:
+ if isinstance(item, dict):
+ for key, value in service_input.items(
+ ): # task input has no nesting field.
+ # get input filed type
+ input_type = item[key]
+ if input_type not in INPUT_TYPE:
+ raise ValueError('Invalid input field: %s'
+ % input_type)
+ pipeline_input[key] = base64_decoder_map[input_type](value)
+ return pipeline_input, parameters
+ else:
+ raise IndexError('Task %s input invalid: %s' %
+ (task_name, task_input_info))
+
+
+def encode_numpy_image_to_base64(image):
+ from PIL import Image
+ with BytesIO() as output_bytes:
+ pil_image = Image.fromarray(image.astype(np.uint8))
+ pil_image.save(output_bytes, 'PNG')
+ bytes_data = output_bytes.getvalue()
+ base64_str = str(base64.b64encode(bytes_data), 'utf-8')
+ return base64_str
+
+
+def encode_video_to_base64(video):
+ return str(base64.b64encode(video), 'utf-8')
+
+
+def encode_pcm_to_base64(pcm):
+ return str(base64.b64encode(pcm), 'utf-8')
+
+
+def encode_wav_to_base64(wav):
+ return str(base64.b64encode(wav), 'utf-8')
+
+
+def encode_bytes_to_base64(bts):
+ return str(base64.b64encode(bts), 'utf-8')
+
+
+base64_encoder_map = {
+ 'image': encode_numpy_image_to_base64,
+ 'video': encode_video_to_base64,
+ 'pcm': encode_pcm_to_base64,
+ 'wav': encode_wav_to_base64,
+ 'bytes': encode_bytes_to_base64,
+}
+
+# convert numpy etc type to python type.
+type_to_python_type = {
+ np.int64: int,
+}
+
+
+def _convert_to_python_type(inputs):
+ if isinstance(inputs, (list, tuple)):
+ res = []
+ for item in inputs:
+ res.append(_convert_to_python_type(item))
+ return res
+ elif isinstance(inputs, dict):
+ res = {}
+ for k, v in inputs.items():
+ if type(v) in type_to_python_type:
+ res[k] = type_to_python_type[type(v)](v)
+ else:
+ res[k] = _convert_to_python_type(v)
+ return res
+ else:
+ return inputs
+
+
+def pipeline_output_to_service_base64_output(task_name, pipeline_output):
+ """Convert pipeline output to service output,
+ convert binary fields to base64 encoding。
+
+ Args:
+ task_name (str): The output task name.
+ pipeline_output (object): The pipeline output.
+ """
+ json_serializable_output = {}
+ task_outputs = []
+ if task_name in TASK_OUTPUTS:
+ task_outputs = TASK_OUTPUTS[task_name]
+ for key, value in pipeline_output.items():
+ if key not in task_outputs:
+ continue # skip the output not defined.
+ if key in [
+ OutputKeys.OUTPUT_IMG, OutputKeys.OUTPUT_IMGS,
+ OutputKeys.OUTPUT_VIDEO, OutputKeys.OUTPUT_PCM,
+ OutputKeys.OUTPUT_PCM_LIST, OutputKeys.OUTPUT_WAV
+ ]:
+ if isinstance(value, list):
+ items = []
+ if key == OutputKeys.OUTPUT_IMGS:
+ output_item_type = OutputKeys.OUTPUT_IMG
+ else:
+ output_item_type = OutputKeys.OUTPUT_PCM
+ for item in value:
+ items.append(base64_encoder_map[output_item_type](item))
+ json_serializable_output[key] = items
+ else:
+ json_serializable_output[key] = base64_encoder_map[
+ OutputTypes[key]](
+ value)
+ elif OutputTypes[key] in [np.ndarray]:
+ json_serializable_output[key] = value.tolist()
+ else:
+ json_serializable_output[key] = value
+
+ return _convert_to_python_type(json_serializable_output)
diff --git a/modelscope/utils/megatron_utils.py b/modelscope/utils/megatron_utils.py
index 922cb53d..53b5aacb 100644
--- a/modelscope/utils/megatron_utils.py
+++ b/modelscope/utils/megatron_utils.py
@@ -96,15 +96,16 @@ def convert_megatron_checkpoint(
log_master(
f'origin_num_partitions: {origin_num_partitions}, target_num_partitions: {target_num_partitions}'
)
- os.makedirs(target_dir, exist_ok=True)
if origin_num_partitions < target_num_partitions:
+ os.makedirs(target_dir, exist_ok=True)
state_dict = _split_checkpoint(
model, checkpoint_dir,
target_num_partitions // origin_num_partitions)
_save_converted_checkpoint(state_dict, target_dir)
log_master('Split checkpoints succeeded.')
elif origin_num_partitions > target_num_partitions:
+ os.makedirs(target_dir, exist_ok=True)
state_dict = _merge_checkpoint(
model, checkpoint_dir,
origin_num_partitions // target_num_partitions)
diff --git a/modelscope/utils/plugins.py b/modelscope/utils/plugins.py
index a83ca03c..9d238e7d 100644
--- a/modelscope/utils/plugins.py
+++ b/modelscope/utils/plugins.py
@@ -263,12 +263,11 @@ def import_module_and_submodules(package_name: str,
def install_module_from_requirements(requirement_path, ):
- """
+ """ install module from requirements
Args:
requirement_path: The path of requirement file
- Returns:
-
+ No returns, raise error if failed
"""
install_list = []
@@ -292,6 +291,15 @@ def install_module_from_requirements(requirement_path, ):
def import_module_from_file(module_name, file_path):
+ """ install module by name with file path
+
+ Args:
+ module_name: the module name need to be import
+ file_path: the related file path that matched with the module name
+
+ Returns: return the module class
+
+ """
spec = importlib.util.spec_from_file_location(module_name, file_path)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
@@ -299,6 +307,14 @@ def import_module_from_file(module_name, file_path):
def import_module_from_model_dir(model_dir):
+ """ import all the necessary module from a model dir
+
+ Args:
+ model_dir: model file location
+
+ No returns, raise error if failed
+
+ """
from pathlib import Path
file_scanner = FilesAstScanning()
file_scanner.traversal_files(model_dir)
@@ -317,6 +333,14 @@ def import_module_from_model_dir(model_dir):
def install_requirements_by_names(plugins: List[str]):
+ """ install the requirements by names
+
+ Args:
+ plugins: name of plugins (pai-easyscv, transformers)
+
+ No returns, raise error if failed
+
+ """
plugins_manager = PluginsManager()
uninstalled_plugins = []
for plugin in plugins:
@@ -333,6 +357,14 @@ def install_requirements_by_names(plugins: List[str]):
def install_requirements_by_files(requirements: List[str]):
+ """ install the requriements by files
+
+ Args:
+ requirements: a list of files including requirements info (requirements.txt)
+
+ No returns, raise error if failed
+
+ """
for requirement in requirements:
install_module_from_requirements(requirement)
@@ -343,7 +375,8 @@ def register_plugins_repo(plugins: List[str]) -> None:
install_requirements_by_names(plugins)
modules = []
for plugin in plugins:
- modules.extend(get_modules_from_package(plugin))
+ module_name, module_version, _ = get_modules_from_package(plugin)
+ modules.extend(module_name)
import_plugins(modules)
@@ -362,12 +395,15 @@ DEFAULT_INDEX = 'https://pypi.org/simple/'
def get_modules_from_package(package):
- """ to get the modules from a installed package
+ """ to get the modules from an installed package
Args:
package: The distribution name or package name
Returns:
+ import_names: The modules that in the package distribution
+ import_version: The version of those modules, should be same and identical
+ package_name: The package name, if installed by whl file, the package is unknown, should be passed
"""
from zipfile import ZipFile
@@ -378,8 +414,6 @@ def get_modules_from_package(package):
from urllib.parse import urlparse
from urllib import request as urllib2
from pip._internal.utils.packaging import get_requirement
- req = get_requirement(package)
- package = req.name
def urlretrieve(url, filename, data=None, auth=None):
if auth is not None:
@@ -591,24 +625,58 @@ def get_modules_from_package(package):
return result
def discover_import_names(whl_file):
+ import re
logger.debug('finding import names')
zipfile = ZipFile(file=whl_file)
namelist = zipfile.namelist()
[top_level_fname
] = [x for x in namelist if x.endswith('top_level.txt')]
+ [metadata_fname
+ ] = [x for x in namelist if x.endswith('.dist-info/METADATA')]
all_names = zipfile.read(top_level_fname).decode(
'utf-8').strip().splitlines()
+ metadata = zipfile.read(metadata_fname).decode('utf-8')
public_names = [n for n in all_names if not n.startswith('_')]
- return public_names
+
+ version_pattern = re.compile(r'^Version: (?P.+)$',
+ re.MULTILINE)
+ name_pattern = re.compile(r'^Name: (?P.+)$', re.MULTILINE)
+
+ version_match = version_pattern.search(metadata)
+ name_match = name_pattern.search(metadata)
+
+ module_version = version_match.group('version')
+ module_name = name_match.group('name')
+
+ return public_names, module_version, module_name
tmpdir = mkdtemp()
- data = get(package, tmpdir=tmpdir)
- import_names = discover_import_names(data['path'])
+ if package.endswith('.whl'):
+ """if user using .whl file then parse the whl to get the module name"""
+ if not os.path.isfile(package):
+ file_name = os.path.basename(package)
+ file_path = os.path.join(tmpdir, file_name)
+ whl_file, _ = _download_dist(package, file_path, None, None)
+ else:
+ whl_file = package
+ else:
+ """if user using package name then generate whl file and parse the file to get the module name by
+ the discover_import_names method
+ """
+ req = get_requirement(package)
+ package = req.name
+ data = get(package, tmpdir=tmpdir)
+ whl_file = data['path']
+ import_names, import_version, package_name = discover_import_names(
+ whl_file)
shutil.rmtree(tmpdir)
- return import_names
+ return import_names, import_version, package_name
class PluginsManager(object):
+ """
+ plugins manager class
+ """
def __init__(self,
cache_dir=MODELSCOPE_FILE_DIR,
@@ -633,12 +701,26 @@ class PluginsManager(object):
package: the package name need to be installed
Returns:
+ if_installed: True if installed
+ version: the version of installed or None if not installed
"""
if package.split('.')[-1] == 'whl':
- return False, ''
+ # install from whl should test package name instead of module name
+ _, module_version, package_name = get_modules_from_package(package)
+ local_installed, version = PluginsManager._check_plugin_installed(
+ package_name)
+ if local_installed and module_version != version:
+ return False, version
+ elif not local_installed:
+ return False, version
+ return True, module_version
+ else:
+ return PluginsManager._check_plugin_installed(package)
+ @staticmethod
+ def _check_plugin_installed(package, verified_version=None):
from pip._internal.utils.packaging import get_requirement, specifiers
req = get_requirement(package)
@@ -656,11 +738,15 @@ class PluginsManager(object):
if not installed_valid_version:
installed = False
break
+
except KeyError:
version = ''
installed = False
- return installed, version
+ if installed and verified_version is not None and verified_version != version:
+ return False, verified_version
+ else:
+ return installed, version
@staticmethod
def pip_command(
@@ -675,6 +761,9 @@ class PluginsManager(object):
such as ['-r', 'requirements']
Returns:
+ status_code: The pip command status code, 0 if success, else is failed
+ options: parsed option from system args by pip command
+ args: the unknown args that could be parsed by pip command
"""
from pip._internal.commands import create_command
@@ -702,6 +791,7 @@ class PluginsManager(object):
Args:
install_args (list): List of arguments passed to `pip install`.
index_url (str, optional): The pypi index url.
+ force_update: If force update on or off
"""
if len(install_args) == 0:
@@ -730,6 +820,16 @@ class PluginsManager(object):
return status_code, install_args
def parse_args_info(self, args: List[str], options):
+ """
+ parse arguments input info
+ Args:
+ args: the list of args from pip command output
+ options: the options that parsed from system args by pip command method
+
+ Returns:
+ installed_package: generate installed package info in order to store in the file
+ the info includes: name, url and desc of the package
+ """
installed_package = []
# the case of install with requirements
@@ -781,6 +881,15 @@ class PluginsManager(object):
def uninstall_plugins(self,
uninstall_args: Union[str, List],
is_yes=False):
+ """
+ uninstall plugins
+ Args:
+ uninstall_args: args used to uninstall by pip command
+ is_yes: force yes without verified
+
+ Returns: status code, and uninstall args
+
+ """
if is_yes is not None:
uninstall_args += ['-y']
@@ -862,6 +971,7 @@ class PluginsManager(object):
show_all: show installed and official supported if True, else only those installed
Returns:
+ local_plugins_info: show the list of plugins info
"""
local_plugins_info = self._get_plugins_from_file()
@@ -901,6 +1011,7 @@ class PluginsManager(object):
override: Override the file by the list if True, else only update.
Returns:
+ local_plugins_info_json: the json version of updated plugins info
"""
local_plugins_info = self._get_plugins_from_file()
@@ -921,12 +1032,12 @@ class PluginsManager(object):
self,
package_names: Union[str, list],
):
- """
-
+ """remove the plugins from file
Args:
package_names: package name
Returns:
+ local_plugins_info_json: the json version of updated plugins info
"""
local_plugins_info = self._get_plugins_from_file()
@@ -1012,4 +1123,5 @@ class EnvsManager(object):
if __name__ == '__main__':
install_requirements_by_files(['adaseq'])
- import_name = get_modules_from_package('pai-easycv')
+ import_name, import_version, package_name = get_modules_from_package(
+ 'pai-easycv')
diff --git a/modelscope/utils/regress_test_utils.py b/modelscope/utils/regress_test_utils.py
index 0f10c1ce..e03b3a7c 100644
--- a/modelscope/utils/regress_test_utils.py
+++ b/modelscope/utils/regress_test_utils.py
@@ -483,9 +483,9 @@ def numpify_tensor_nested(tensors, reduction=None, clip_value=10000):
t = np.where(t > clip_value, clip_value, t)
t = np.where(t < -clip_value, -clip_value, t)
if reduction == 'sum':
- return t.sum(dtype=np.float)
+ return t.sum(dtype=float)
elif reduction == 'mean':
- return t.mean(dtype=np.float)
+ return t.mean(dtype=float)
return t
return tensors
diff --git a/modelscope/utils/service_utils.py b/modelscope/utils/service_utils.py
index 6e7c0fc1..8f7ca42d 100644
--- a/modelscope/utils/service_utils.py
+++ b/modelscope/utils/service_utils.py
@@ -8,6 +8,7 @@ import requests
from modelscope.outputs import TASK_OUTPUTS, OutputKeys
from modelscope.pipeline_inputs import TASK_INPUTS, InputType
+from modelscope.utils.url_utils import valid_url
# service data decoder func decodes data from network and convert it to pipeline's input
@@ -82,12 +83,16 @@ def get_mimetype(filename):
def decode_base64_to_binary(encoding):
+ if valid_url(encoding):
+ return encoding, ''
extension = get_extension(encoding)
data = encoding.split(',')[1]
return base64.b64decode(data), extension
def decode_base64_to_image(encoding):
+ if valid_url(encoding):
+ return encoding
from PIL import Image
content = encoding.split(';')[1]
image_encoded = content.split(',')[1]
@@ -151,6 +156,7 @@ def service_data_decoder(task, data):
return input_data
elif isinstance(input_type, dict):
input_data = {}
+ data = json.loads(data)
for key, val in input_type.items():
if val == InputType.IMAGE:
input_data[key] = decode_base64_to_image(data[key])
@@ -158,6 +164,8 @@ def service_data_decoder(task, data):
input_data[key] = decode_base64_to_binary(data[key])[0]
elif val == InputType.TEXT:
input_data[key] = data[key]
+ else:
+ return data
return input_data
diff --git a/modelscope/utils/test_utils.py b/modelscope/utils/test_utils.py
index b4ce7299..03d293ec 100644
--- a/modelscope/utils/test_utils.py
+++ b/modelscope/utils/test_utils.py
@@ -150,7 +150,7 @@ def compare_arguments_nested(print_content,
if arg1 is None:
return True
- elif isinstance(arg1, (int, str, bool, np.bool, np.integer, np.str)):
+ elif isinstance(arg1, (int, str, bool, np.bool_, np.integer, np.str_)):
if arg1 != arg2:
if print_content is not None:
print(f'{print_content}, arg1:{arg1}, arg2:{arg2}')
@@ -201,10 +201,8 @@ def compare_arguments_nested(print_content,
return False
return True
elif isinstance(arg1, np.ndarray):
- arg1 = np.where(np.equal(arg1, None), np.NaN,
- arg1).astype(dtype=np.float)
- arg2 = np.where(np.equal(arg2, None), np.NaN,
- arg2).astype(dtype=np.float)
+ arg1 = np.where(np.equal(arg1, None), np.NaN, arg1).astype(dtype=float)
+ arg2 = np.where(np.equal(arg2, None), np.NaN, arg2).astype(dtype=float)
if not all(
np.isclose(arg1, arg2, rtol=rtol, atol=atol,
equal_nan=True).flatten()):
diff --git a/modelscope/utils/url_utils.py b/modelscope/utils/url_utils.py
new file mode 100644
index 00000000..59cc2efd
--- /dev/null
+++ b/modelscope/utils/url_utils.py
@@ -0,0 +1,36 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from urllib.parse import urlparse
+
+import pandas as pd
+
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+def valid_url(url) -> bool:
+ try:
+ result = urlparse(url)
+ return all([result.scheme, result.netloc])
+ except ValueError as e:
+ logger.warning(e)
+ return False
+
+
+def fetch_csv_with_url(csv_url: str) -> pd.DataFrame:
+ """Fetch the csv content from url.
+
+ Args:
+ csv_url (str): The input url of csv data.
+
+ Returns:
+ A pandas DataFrame object which contains the csv content.
+ """
+ try:
+ df = pd.read_csv(csv_url)
+ except Exception as e:
+ logger.error(f'Failed to fetch csv from url: {csv_url}')
+ raise e
+
+ return df
diff --git a/modelscope/version.py b/modelscope/version.py
index 81c35379..cf9bbe98 100644
--- a/modelscope/version.py
+++ b/modelscope/version.py
@@ -1,5 +1,5 @@
# Make sure to modify __release_datetime__ to release time when making official release.
-__version__ = '1.5.0'
+__version__ = '1.6.0'
# default release datetime for branches under active development is set
# to be a time far-far-away-into-the-future
-__release_datetime__ = '2099-10-13 08:56:12'
+__release_datetime__ = '2023-05-18 23:59:00'
diff --git a/requirements/audio/audio_asr.txt b/requirements/audio/audio_asr.txt
index 1ecbe421..7725a0dd 100644
--- a/requirements/audio/audio_asr.txt
+++ b/requirements/audio/audio_asr.txt
@@ -1,2 +1,2 @@
easyasr>=0.0.2
-funasr>=0.4.0
+funasr>=0.5.0
diff --git a/requirements/audio/audio_signal.txt b/requirements/audio/audio_signal.txt
index 61e688f3..16a18e67 100644
--- a/requirements/audio/audio_signal.txt
+++ b/requirements/audio/audio_signal.txt
@@ -1,11 +1,11 @@
hyperpyyaml
-librosa<=0.9.2
+librosa==0.9.2
MinDAEC
mir_eval>=0.7
numpy
rotary_embedding_torch>=0.1.5
scipy
SoundFile>0.10
-speechbrain>=0.5.7
+speechbrain>=0.5.12
torchaudio
tqdm
diff --git a/requirements/audio/audio_tts.txt b/requirements/audio/audio_tts.txt
index b1a85faf..81a5c6f4 100644
--- a/requirements/audio/audio_tts.txt
+++ b/requirements/audio/audio_tts.txt
@@ -3,7 +3,7 @@ greenlet>=1.1.2
inflect
jedi>=0.18.1
kantts
-librosa<=0.9.2
+librosa==0.9.2
lxml
matplotlib
msgpack>=1.0.4
diff --git a/requirements/cv.txt b/requirements/cv.txt
index 7d09a60b..0cec3659 100644
--- a/requirements/cv.txt
+++ b/requirements/cv.txt
@@ -6,7 +6,7 @@ chumpy
clip>=1.0
control_ldm
ddpm_guided_diffusion
-diffusers
+diffusers>=0.13.1,<0.15.0
easydict
easyrobust
edit_distance
@@ -25,7 +25,7 @@ lmdb
lpips
ml_collections
mmcls>=0.21.0
-mmdet>=2.25.0
+mmdet>=2.25.0,<=2.28.2
# mmdet3d-1.0.0rc6 remove networkx and numba version restriction
mmdet3d==1.0.0a1
mmsegmentation<=0.30.0
@@ -39,7 +39,6 @@ onnxruntime>=1.10
onnxsim
open-clip-torch>=2.7.0
opencv-python
-pai-easycv>=0.8,<0.10.0
paint_ldm
pandas
panopticapi
@@ -51,7 +50,7 @@ regex
scikit-image>=0.19.3
scikit-learn>=0.20.1
shapely
-shotdetect_scenedetect_lgss
+shotdetect_scenedetect_lgss>=0.0.4
smplx
tensorflow-estimator>=1.15.1
tf_slim
diff --git a/requirements/framework.txt b/requirements/framework.txt
index e15e95eb..e763ae63 100644
--- a/requirements/framework.txt
+++ b/requirements/framework.txt
@@ -4,9 +4,11 @@ datasets>=2.7.0,<=2.8.0
einops
filelock>=3.3.0
gast>=0.2.2
-mmdet<=2.28.2
-numpy<1.24.0
+# for python3.7 python3.8 compatible
+numpy<=1.22.0
oss2
+# for datasets compatible
+pandas<=1.5.3
Pillow>=6.2.0
# pyarrow 9.0.0 introduced event_loop core dump
pyarrow>=6.0.0,!=9.0.0
@@ -14,7 +16,7 @@ python-dateutil>=2.1
pyyaml
requests
scipy
-setuptools==59.8.0
+setuptools
simplejson>=3.3.0
sortedcontainers>=1.5.9
tqdm>=4.64.0
diff --git a/requirements/multi-modal.txt b/requirements/multi-modal.txt
index 3d75f9b6..9d2c3448 100644
--- a/requirements/multi-modal.txt
+++ b/requirements/multi-modal.txt
@@ -1,7 +1,7 @@
accelerate
diffusers>=0.13.1,<0.15.0
ftfy>=6.0.3
-librosa<=0.9.2
+librosa==0.9.2
opencv-python
pycocoevalcap>=1.2
pycocotools>=2.0.4
@@ -12,13 +12,14 @@ rapidfuzz
# which introduced compatability issues that are being investigated
rouge_score<=0.0.4
sacrebleu
+safetensors
# scikit-video
soundfile
taming-transformers-rom1504
timm
tokenizers
torchvision
-transformers>=4.12.0
+transformers>=4.27.1
# triton==2.0.0.dev20221120
unicodedata2
zhconv
diff --git a/setup.py b/setup.py
index 9affe028..98b12888 100644
--- a/setup.py
+++ b/setup.py
@@ -197,11 +197,12 @@ if __name__ == '__main__':
setup(
name='modelscope',
version=get_version(),
- description='',
+ description=
+ 'ModelScope: bring the notion of Model-as-a-Service to life.',
long_description=readme(),
long_description_content_type='text/markdown',
- author='Alibaba ModelScope team',
- author_email='modelscope@list.alibaba-inc.com',
+ author='ModelScope team',
+ author_email='contact@modelscope.cn',
keywords='python,nlp,science,cv,speech,multi-modal',
url='https://github.com/modelscope/modelscope',
packages=find_packages(exclude=('configs', 'demo')),
diff --git a/tests/cli/test_download_cmd.py b/tests/cli/test_download_cmd.py
index 53cfdadd..6059fa12 100644
--- a/tests/cli/test_download_cmd.py
+++ b/tests/cli/test_download_cmd.py
@@ -17,7 +17,6 @@ DEFAULT_GIT_PATH = 'git'
download_model_file_name = 'test.bin'
-@unittest.skip('temporarily skip')
class DownloadCMDTest(unittest.TestCase):
def setUp(self):
diff --git a/tests/export/test_export_speech_signal_process.py b/tests/export/test_export_speech_signal_process.py
new file mode 100644
index 00000000..d3f6fe14
--- /dev/null
+++ b/tests/export/test_export_speech_signal_process.py
@@ -0,0 +1,83 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+import pickle
+import shutil
+import tempfile
+import unittest
+
+import torch
+
+from modelscope.exporters import Exporter
+from modelscope.models import Model
+from modelscope.utils.logger import get_logger
+from modelscope.utils.regress_test_utils import (compare_arguments_nested,
+ numpify_tensor_nested)
+from modelscope.utils.test_utils import test_level
+
+INPUT_PKL = 'data/test/audios/input.pkl'
+
+INPUT_NAME = 'input'
+OUTPUT_NAME = 'output'
+
+logger = get_logger()
+
+
+class ExportSpeechSignalProcessTest(unittest.TestCase):
+
+ def setUp(self):
+ print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+ self.tmp_dir = tempfile.TemporaryDirectory().name
+ if not os.path.exists(self.tmp_dir):
+ os.makedirs(self.tmp_dir)
+
+ def tearDown(self):
+ shutil.rmtree(self.tmp_dir)
+ super().tearDown()
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_export_ans_dfsmn(self):
+ model_id = 'damo/speech_dfsmn_ans_psm_48k_causal'
+ model = Model.from_pretrained(model_id)
+ onnx_info = Exporter.from_model(model).export_onnx(
+ output_dir=self.tmp_dir)
+
+ with open(os.path.join(os.getcwd(), INPUT_PKL), 'rb') as f:
+ fbank_input = pickle.load(f).cpu()
+ self.assertTrue(
+ self._validate_onnx_model(fbank_input, model, onnx_info['model']),
+ 'export onnx failed because of validation error.')
+
+ @staticmethod
+ def _validate_onnx_model(dummy_inputs, model, output):
+ try:
+ import onnx
+ import onnxruntime as ort
+ except ImportError:
+ logger.warning(
+ 'Cannot validate the exported onnx file, because '
+ 'the installation of onnx or onnxruntime cannot be found')
+ return
+ onnx_model = onnx.load(output)
+ onnx.checker.check_model(onnx_model)
+ ort_session = ort.InferenceSession(output)
+ with torch.no_grad():
+ model.eval()
+ outputs_origin = model.forward(dummy_inputs)
+ outputs_origin = numpify_tensor_nested(outputs_origin)
+
+ input_feed = {INPUT_NAME: dummy_inputs.numpy()}
+ outputs = ort_session.run(
+ None,
+ input_feed,
+ )
+ outputs = numpify_tensor_nested(outputs[0])
+
+ print(outputs)
+ print(outputs_origin)
+ return compare_arguments_nested('Onnx model output match failed',
+ outputs, outputs_origin)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/hub/test_hub_operation.py b/tests/hub/test_hub_operation.py
index 82752869..a22aaa64 100644
--- a/tests/hub/test_hub_operation.py
+++ b/tests/hub/test_hub_operation.py
@@ -22,7 +22,6 @@ DEFAULT_GIT_PATH = 'git'
download_model_file_name = 'test.bin'
-@unittest.skip('temporarily skip')
class HubOperationTest(unittest.TestCase):
def setUp(self):
diff --git a/tests/hub/test_hub_private_files.py b/tests/hub/test_hub_private_files.py
index b79c11cd..a343808f 100644
--- a/tests/hub/test_hub_private_files.py
+++ b/tests/hub/test_hub_private_files.py
@@ -21,7 +21,6 @@ from modelscope.utils.test_utils import (TEST_ACCESS_TOKEN1,
download_model_file_name = 'test.bin'
-@unittest.skip('temporarily skip')
class HubPrivateFileDownloadTest(unittest.TestCase):
def setUp(self):
diff --git a/tests/hub/test_hub_private_repository.py b/tests/hub/test_hub_private_repository.py
index bd2984cf..1b7c41cd 100644
--- a/tests/hub/test_hub_private_repository.py
+++ b/tests/hub/test_hub_private_repository.py
@@ -17,7 +17,6 @@ from modelscope.utils.test_utils import (TEST_ACCESS_TOKEN1,
DEFAULT_GIT_PATH = 'git'
-@unittest.skip('temporarily skip')
class HubPrivateRepositoryTest(unittest.TestCase):
def setUp(self):
diff --git a/tests/hub/test_hub_repository.py b/tests/hub/test_hub_repository.py
index a006d7c2..7631f5db 100644
--- a/tests/hub/test_hub_repository.py
+++ b/tests/hub/test_hub_repository.py
@@ -26,7 +26,6 @@ DEFAULT_GIT_PATH = 'git'
download_model_file_name = 'test.bin'
-@unittest.skip('temporarily skip')
class HubRepositoryTest(unittest.TestCase):
def setUp(self):
@@ -81,6 +80,20 @@ class HubRepositoryTest(unittest.TestCase):
assert lfs_file1 in lfs_files
assert lfs_file2 in lfs_files
+ def test_add_lfs_file_type(self):
+ repo = Repository(self.model_dir, clone_from=self.model_id)
+ assert os.path.exists(os.path.join(self.model_dir, ModelFile.README))
+ os.chdir(self.model_dir)
+ lfs_file = 'test.safetensors'
+ os.system("echo 'safttensor'>%s"
+ % os.path.join(self.model_dir, lfs_file))
+ repo.add_lfs_type('*.safetensors')
+ repo.push('test')
+ # check lfs files.
+ git_wrapper = GitCommandWrapper()
+ lfs_files = git_wrapper.list_lfs_files(self.model_dir)
+ assert lfs_file in lfs_files
+
if __name__ == '__main__':
unittest.main()
diff --git a/tests/hub/test_hub_retry.py b/tests/hub/test_hub_retry.py
index 7f47f119..e294cb68 100644
--- a/tests/hub/test_hub_retry.py
+++ b/tests/hub/test_hub_retry.py
@@ -12,7 +12,6 @@ from modelscope.hub.api import HubApi
from modelscope.hub.file_download import http_get_file
-@unittest.skip('temporarily skip')
class HubOperationTest(unittest.TestCase):
def setUp(self):
diff --git a/tests/hub/test_hub_revision.py b/tests/hub/test_hub_revision.py
index e97422ad..00d5d53d 100644
--- a/tests/hub/test_hub_revision.py
+++ b/tests/hub/test_hub_revision.py
@@ -23,7 +23,6 @@ download_model_file_name = 'test.bin'
download_model_file_name2 = 'test2.bin'
-@unittest.skip('temporarily skip')
class HubRevisionTest(unittest.TestCase):
def setUp(self):
diff --git a/tests/hub/test_hub_revision_release_mode.py b/tests/hub/test_hub_revision_release_mode.py
index 49a83371..3b8416db 100644
--- a/tests/hub/test_hub_revision_release_mode.py
+++ b/tests/hub/test_hub_revision_release_mode.py
@@ -26,7 +26,6 @@ download_model_file_name = 'test.bin'
download_model_file_name2 = 'test2.bin'
-@unittest.skip('temporarily skip')
class HubRevisionTest(unittest.TestCase):
def setUp(self):
diff --git a/tests/hub/test_hub_upload.py b/tests/hub/test_hub_upload.py
index 8e439aeb..2a66cb8b 100644
--- a/tests/hub/test_hub_upload.py
+++ b/tests/hub/test_hub_upload.py
@@ -19,7 +19,6 @@ from modelscope.utils.test_utils import (TEST_ACCESS_TOKEN1, TEST_MODEL_ORG,
logger = get_logger()
-@unittest.skip('temporarily skip')
class HubUploadTest(unittest.TestCase):
def setUp(self):
@@ -38,6 +37,7 @@ class HubUploadTest(unittest.TestCase):
os.mkdir(self.finetune_path)
os.system("echo '{}'>%s"
% os.path.join(self.finetune_path, ModelFile.CONFIGURATION))
+ os.environ['MODELSCOPE_TRAIN_ID'] = 'test-id'
def tearDown(self):
logger.info('TearDown')
diff --git a/tests/metrics/test_translation_evaluation_metrics.py b/tests/metrics/test_translation_evaluation_metrics.py
new file mode 100644
index 00000000..801f742b
--- /dev/null
+++ b/tests/metrics/test_translation_evaluation_metrics.py
@@ -0,0 +1,30 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import unittest
+
+from modelscope.metrics.translation_evaluation_metric import \
+ TranslationEvaluationMetric
+from modelscope.models.nlp.unite.configuration import InputFormat
+from modelscope.utils.test_utils import test_level
+
+
+class TestTranslationEvaluationMetrics(unittest.TestCase):
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_value(self):
+ metric = TranslationEvaluationMetric(gap_threshold=25.0)
+
+ outputs = {'score': [0.25, 0.22, 0.30, 0.78, 1.11, 0.95, 1.00, 0.86]}
+ inputs = {
+ 'lp': ['zh-en'] * 8,
+ 'segment_id': [0, 0, 0, 1, 1, 2, 2, 2],
+ 'raw_score': [94.0, 60.0, 25.0, 59.5, 90.0, 100.0, 80.0, 60.0],
+ 'input_format': [InputFormat.SRC_REF] * 8,
+ }
+ metric.add(outputs, inputs)
+ result = metric.evaluate()
+ print(result)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/msdatasets/test_ms_dataset.py b/tests/msdatasets/test_ms_dataset.py
index 8ded9a46..ddb84b45 100644
--- a/tests/msdatasets/test_ms_dataset.py
+++ b/tests/msdatasets/test_ms_dataset.py
@@ -195,18 +195,7 @@ class MsDatasetTest(unittest.TestCase):
)
print(next(iter(tf_dataset)))
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_streaming_load_coco(self):
- small_coco_for_test = MsDataset.load(
- dataset_name='EasyCV/small_coco_for_test',
- split='train',
- use_streaming=True,
- download_mode=DownloadMode.FORCE_REDOWNLOAD)
- dataset_sample_dict = next(iter(small_coco_for_test))
- print(dataset_sample_dict)
- assert dataset_sample_dict.values()
-
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_streaming_load_uni_fold(self):
"""Test case for loading large scale datasets."""
dataset = MsDataset.load(
@@ -269,7 +258,7 @@ class MsDatasetTest(unittest.TestCase):
def test_to_custom_dataset_movie_scene_toydata(self):
from modelscope.msdatasets.dataset_cls.custom_datasets.movie_scene_segmentation import \
MovieSceneSegmentationDataset
- from modelscope.msdatasets.dataset_cls.dataset import ExternalDataset
+ from modelscope.msdatasets.dataset_cls import ExternalDataset
model_id = 'damo/cv_resnet50-bert_video-scene-segmentation_movienet'
cache_path = snapshot_download(model_id)
diff --git a/tests/msdatasets/test_virgo_dataset.py b/tests/msdatasets/test_virgo_dataset.py
new file mode 100644
index 00000000..96f7f25b
--- /dev/null
+++ b/tests/msdatasets/test_virgo_dataset.py
@@ -0,0 +1,96 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+import unittest
+
+from modelscope.hub.api import HubApi
+from modelscope.msdatasets import MsDataset
+from modelscope.msdatasets.dataset_cls.dataset import VirgoDataset
+from modelscope.utils.constant import DownloadMode, Hubs, VirgoDatasetConfig
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+# Please use your own access token for buc account.
+YOUR_ACCESS_TOKEN = 'your_access_token'
+# Please use your own virgo dataset id and ensure you have access to it.
+VIRGO_DATASET_ID = 'your_virgo_dataset_id'
+
+
+class TestVirgoDataset(unittest.TestCase):
+
+ def setUp(self):
+ self.api = HubApi()
+ self.api.login(YOUR_ACCESS_TOKEN)
+
+ @unittest.skip('to be used for local test only')
+ def test_download_virgo_dataset_meta(self):
+ ds = MsDataset.load(dataset_name=VIRGO_DATASET_ID, hub=Hubs.virgo)
+ ds_one = next(iter(ds))
+ logger.info(ds_one)
+
+ self.assertTrue(ds_one)
+ self.assertIsInstance(ds, VirgoDataset)
+ self.assertIn(VirgoDatasetConfig.col_id, ds_one)
+ self.assertIn(VirgoDatasetConfig.col_meta_info, ds_one)
+ self.assertIn(VirgoDatasetConfig.col_analysis_result, ds_one)
+ self.assertIn(VirgoDatasetConfig.col_external_info, ds_one)
+
+ @unittest.skip('to be used for local test only')
+ def test_download_virgo_dataset_files(self):
+ ds = MsDataset.load(
+ dataset_name=VIRGO_DATASET_ID,
+ hub=Hubs.virgo,
+ download_virgo_files=True)
+
+ ds_one = next(iter(ds))
+ logger.info(ds_one)
+
+ self.assertTrue(ds_one)
+ self.assertIsInstance(ds, VirgoDataset)
+ self.assertTrue(ds.download_virgo_files)
+ self.assertIn(VirgoDatasetConfig.col_cache_file, ds_one)
+ cache_file_path = ds_one[VirgoDatasetConfig.col_cache_file]
+ self.assertTrue(os.path.exists(cache_file_path))
+
+ @unittest.skip('to be used for local test only')
+ def test_force_download_virgo_dataset_files(self):
+ ds = MsDataset.load(
+ dataset_name=VIRGO_DATASET_ID,
+ hub=Hubs.virgo,
+ download_mode=DownloadMode.FORCE_REDOWNLOAD,
+ download_virgo_files=True)
+
+ ds_one = next(iter(ds))
+ logger.info(ds_one)
+
+ self.assertTrue(ds_one)
+ self.assertIsInstance(ds, VirgoDataset)
+ self.assertTrue(ds.download_virgo_files)
+ self.assertIn(VirgoDatasetConfig.col_cache_file, ds_one)
+ cache_file_path = ds_one[VirgoDatasetConfig.col_cache_file]
+ self.assertTrue(os.path.exists(cache_file_path))
+
+ @unittest.skip('to be used for local test only')
+ def test_download_virgo_dataset_odps(self):
+ # Note: the samplingType must be 1, which means to get the dataset from MaxCompute(ODPS).
+ import pandas as pd
+
+ ds = MsDataset.load(
+ dataset_name=VIRGO_DATASET_ID,
+ hub=Hubs.virgo,
+ odps_batch_size=100,
+ odps_limit=2000,
+ odps_drop_last=True)
+
+ ds_one = next(iter(ds))
+ logger.info(ds_one)
+
+ self.assertTrue(ds_one)
+ self.assertIsInstance(ds, VirgoDataset)
+ self.assertTrue(ds_one, pd.DataFrame)
+ logger.info(f'The shape of sample: {ds_one.shape}')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/pipelines/easycv_pipelines/test_panoptic_segmentation_pipeline.py b/tests/pipelines/easycv_pipelines/test_panoptic_segmentation_pipeline.py
deleted file mode 100644
index 49e01251..00000000
--- a/tests/pipelines/easycv_pipelines/test_panoptic_segmentation_pipeline.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import unittest
-
-import cv2
-
-from modelscope.outputs import OutputKeys
-from modelscope.pipelines import pipeline
-from modelscope.utils.constant import Tasks
-from modelscope.utils.cv.image_utils import panoptic_seg_masks_to_image
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
-from modelscope.utils.test_utils import test_level
-
-
-class EasyCVPanopticSegmentationPipelineTest(unittest.TestCase,
- DemoCompatibilityCheck):
- img_path = 'data/test/images/image_semantic_segmentation.jpg'
-
- def setUp(self) -> None:
- self.task = Tasks.image_segmentation
- self.model_id = 'damo/cv_r50_panoptic-segmentation_cocopan'
-
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_r50(self):
- segmentor = pipeline(task=self.task, model=self.model_id)
- outputs = segmentor(self.img_path)
- draw_img = panoptic_seg_masks_to_image(outputs[OutputKeys.MASKS])
- cv2.imwrite('result.jpg', draw_img)
- print('print ' + self.model_id + ' success')
-
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/tests/pipelines/easycv_pipelines/test_segmentation_pipeline.py b/tests/pipelines/easycv_pipelines/test_segmentation_pipeline.py
deleted file mode 100644
index 5f6dac4b..00000000
--- a/tests/pipelines/easycv_pipelines/test_segmentation_pipeline.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import unittest
-from distutils.version import LooseVersion
-
-import cv2
-import easycv
-import numpy as np
-from PIL import Image
-
-from modelscope.outputs import OutputKeys
-from modelscope.pipelines import pipeline
-from modelscope.utils.constant import Tasks
-from modelscope.utils.cv.image_utils import semantic_seg_masks_to_image
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
-from modelscope.utils.test_utils import test_level
-
-
-class EasyCVSegmentationPipelineTest(unittest.TestCase,
- DemoCompatibilityCheck):
- img_path = 'data/test/images/image_segmentation.jpg'
-
- def setUp(self) -> None:
- self.task = Tasks.image_segmentation
- self.model_id = 'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k'
-
- def _internal_test_(self, model_id):
- semantic_seg = pipeline(task=Tasks.image_segmentation, model=model_id)
- outputs = semantic_seg(self.img_path)
-
- draw_img = semantic_seg_masks_to_image(outputs[OutputKeys.MASKS])
- cv2.imwrite('result.jpg', draw_img)
- print('test ' + model_id + ' DONE')
-
- def _internal_test_batch_(self, model_id, num_samples=2, batch_size=2):
- # TODO: support in the future
- img = np.asarray(Image.open(self.img_path))
- num_samples = num_samples
- batch_size = batch_size
- semantic_seg = pipeline(
- task=Tasks.image_segmentation,
- model=model_id,
- batch_size=batch_size)
- outputs = semantic_seg([self.img_path] * num_samples)
-
- self.assertEqual(semantic_seg.predict_op.batch_size, batch_size)
- self.assertEqual(len(outputs), num_samples)
-
- for output in outputs:
- self.assertListEqual(
- list(img.shape)[:2], list(output['seg_pred'].shape))
-
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_segformer_b0(self):
- model_id = 'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k'
- self._internal_test_(model_id)
-
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_segformer_b1(self):
- model_id = 'damo/cv_segformer-b1_image_semantic-segmentation_coco-stuff164k'
- self._internal_test_(model_id)
-
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_segformer_b2(self):
- model_id = 'damo/cv_segformer-b2_image_semantic-segmentation_coco-stuff164k'
- self._internal_test_(model_id)
-
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_segformer_b3(self):
- model_id = 'damo/cv_segformer-b3_image_semantic-segmentation_coco-stuff164k'
- self._internal_test_(model_id)
-
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_segformer_b4(self):
- model_id = 'damo/cv_segformer-b4_image_semantic-segmentation_coco-stuff164k'
- self._internal_test_(model_id)
-
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_segformer_b5(self):
- model_id = 'damo/cv_segformer-b5_image_semantic-segmentation_coco-stuff164k'
- self._internal_test_(model_id)
-
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/tests/pipelines/plugin_remote_pipelines/test_plugin_model.py b/tests/pipelines/plugin_remote_pipelines/test_plugin_model.py
index 43d840ea..71b9e64f 100644
--- a/tests/pipelines/plugin_remote_pipelines/test_plugin_model.py
+++ b/tests/pipelines/plugin_remote_pipelines/test_plugin_model.py
@@ -3,12 +3,11 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.plugins import PluginsManager
from modelscope.utils.test_utils import test_level
-class PluginModelTest(unittest.TestCase, DemoCompatibilityCheck):
+class PluginModelTest(unittest.TestCase):
def setUp(self):
self.package = 'adaseq'
diff --git a/tests/pipelines/test_abnormal_object_detection.py b/tests/pipelines/test_abnormal_object_detection.py
index fbce51c6..c6264069 100644
--- a/tests/pipelines/test_abnormal_object_detection.py
+++ b/tests/pipelines/test_abnormal_object_detection.py
@@ -3,11 +3,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ObjectDetectionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_object_detection
@@ -20,10 +19,6 @@ class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
result = object_detect(input_location)
print(result)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_action_detection.py b/tests/pipelines/test_action_detection.py
index ae7e60b1..d724c81a 100644
--- a/tests/pipelines/test_action_detection.py
+++ b/tests/pipelines/test_action_detection.py
@@ -3,11 +3,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ActionDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ActionDetectionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.action_detection
@@ -20,10 +19,6 @@ class ActionDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
'data/test/videos/action_detection_test_video.mp4')
print('action detection results:', result)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_action_recognition.py b/tests/pipelines/test_action_recognition.py
index 292eb238..9d0c6175 100644
--- a/tests/pipelines/test_action_recognition.py
+++ b/tests/pipelines/test_action_recognition.py
@@ -3,11 +3,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ActionRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ActionRecognitionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.action_recognition
@@ -37,10 +36,6 @@ class ActionRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
'data/test/videos/action_recognition_test_video.mp4')
print('pst recognition results:', result)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_adaint_image_color_enhance.py b/tests/pipelines/test_adaint_image_color_enhance.py
index e36a85ec..f0efef5f 100644
--- a/tests/pipelines/test_adaint_image_color_enhance.py
+++ b/tests/pipelines/test_adaint_image_color_enhance.py
@@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class AdaIntImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck):
+class AdaIntImageColorEnhanceTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_adaint_image-color-enhance-models'
@@ -40,11 +39,6 @@ class AdaIntImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck):
self.pipeline_inference(img_color_enhance,
'data/test/images/image_color_enhance.png')
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- @unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest only')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_addr_mgeo.py b/tests/pipelines/test_addr_mgeo.py
index d630b857..e678d285 100644
--- a/tests/pipelines/test_addr_mgeo.py
+++ b/tests/pipelines/test_addr_mgeo.py
@@ -8,12 +8,11 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import TextClassificationPipeline
from modelscope.preprocessors import TextClassificationTransformersPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool
from modelscope.utils.test_utils import test_level
-class MGeoTest(unittest.TestCase, DemoCompatibilityCheck):
+class MGeoTest(unittest.TestCase):
multi_modal_inputs = {
'source_sentence': ['杭州余杭东方未来学校附近世纪华联商场(金家渡北苑店)'],
@@ -117,10 +116,6 @@ class MGeoTest(unittest.TestCase, DemoCompatibilityCheck):
pipeline_ins = pipeline(task=task, model=model)
print(pipeline_ins(input=inputs))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_addr_similarity.py b/tests/pipelines/test_addr_similarity.py
index 8c1f93c9..ecc879eb 100644
--- a/tests/pipelines/test_addr_similarity.py
+++ b/tests/pipelines/test_addr_similarity.py
@@ -8,12 +8,11 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import TextClassificationPipeline
from modelscope.preprocessors import TextClassificationTransformersPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool
from modelscope.utils.test_utils import test_level
-class AddrSimilarityTest(unittest.TestCase, DemoCompatibilityCheck):
+class AddrSimilarityTest(unittest.TestCase):
sentence1 = '阿里巴巴西溪园区'
sentence2 = '文一西路阿里巴巴'
@@ -37,10 +36,6 @@ class AddrSimilarityTest(unittest.TestCase, DemoCompatibilityCheck):
task=Tasks.text_classification, model=self.model_id)
print(pipeline_ins(input=(self.sentence1, self.sentence2)))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_animal_recognition.py b/tests/pipelines/test_animal_recognition.py
index eb9f92e6..57937770 100644
--- a/tests/pipelines/test_animal_recognition.py
+++ b/tests/pipelines/test_animal_recognition.py
@@ -4,11 +4,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class AnimalRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class AnimalRecognitionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.animal_recognition
@@ -21,10 +20,6 @@ class AnimalRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
result = animal_recognition('data/test/images/dogs.jpg')
print(result)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_arc_face_recognition.py b/tests/pipelines/test_arc_face_recognition.py
index fa17dd91..481b9f33 100644
--- a/tests/pipelines/test_arc_face_recognition.py
+++ b/tests/pipelines/test_arc_face_recognition.py
@@ -6,11 +6,10 @@ import numpy as np
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class FaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class FaceRecognitionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.face_recognition
@@ -31,10 +30,6 @@ class FaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
sim = np.dot(emb1[0], emb2[0])
print(f'Cos similarity={sim:.3f}, img1:{img1} img2:{img2}')
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_automatic_post_editing.py b/tests/pipelines/test_automatic_post_editing.py
index da09851c..190ff788 100644
--- a/tests/pipelines/test_automatic_post_editing.py
+++ b/tests/pipelines/test_automatic_post_editing.py
@@ -3,11 +3,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class AutomaticPostEditingTest(unittest.TestCase, DemoCompatibilityCheck):
+class AutomaticPostEditingTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.translation
@@ -21,10 +20,6 @@ class AutomaticPostEditingTest(unittest.TestCase, DemoCompatibilityCheck):
pipeline_ins = pipeline(self.task, model=self.model_id)
print(pipeline_ins(input=inputs))
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_automatic_speech_recognition.py b/tests/pipelines/test_automatic_speech_recognition.py
index dc624f29..6014438e 100644
--- a/tests/pipelines/test_automatic_speech_recognition.py
+++ b/tests/pipelines/test_automatic_speech_recognition.py
@@ -10,7 +10,6 @@ import soundfile
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import ColorCodes, Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import download_and_untar, test_level
@@ -26,8 +25,7 @@ TFRECORD_TESTSETS_FILE = 'tfrecord.tar.gz'
TFRECORD_TESTSETS_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/datasets/tfrecord.tar.gz'
-class AutomaticSpeechRecognitionTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class AutomaticSpeechRecognitionTest(unittest.TestCase):
action_info = {
'test_run_with_wav_pytorch': {
'checking_item': OutputKeys.TEXT,
@@ -457,10 +455,6 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase,
logger.info(ColorCodes.MAGENTA + str(rec_result)
+ ColorCodes.END)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_bad_image_detecting.py b/tests/pipelines/test_bad_image_detecting.py
index 728da8d1..05954f7b 100644
--- a/tests/pipelines/test_bad_image_detecting.py
+++ b/tests/pipelines/test_bad_image_detecting.py
@@ -7,11 +7,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.cv import BadImageDetecingPipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class BadImageDetectingTest(unittest.TestCase, DemoCompatibilityCheck):
+class BadImageDetectingTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.bad_image_detecting
@@ -58,10 +57,6 @@ class BadImageDetectingTest(unittest.TestCase, DemoCompatibilityCheck):
print('pipeline: the out_label is {}'.format(labels))
print('pipeline: the out_score is {}'.format(scores))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_body_2d_keypoints.py b/tests/pipelines/test_body_2d_keypoints.py
index 5d90cbf0..25d8fa55 100644
--- a/tests/pipelines/test_body_2d_keypoints.py
+++ b/tests/pipelines/test_body_2d_keypoints.py
@@ -8,11 +8,10 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import draw_keypoints
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class Body2DKeypointsTest(unittest.TestCase, DemoCompatibilityCheck):
+class Body2DKeypointsTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.body_2d_keypoints
@@ -34,10 +33,6 @@ class Body2DKeypointsTest(unittest.TestCase, DemoCompatibilityCheck):
body_2d_keypoints = pipeline(self.task, model=self.model_id)
self.pipeline_inference(body_2d_keypoints, Image.open(self.test_image))
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_body_3d_keypoints.py b/tests/pipelines/test_body_3d_keypoints.py
index 6f73a243..33228022 100644
--- a/tests/pipelines/test_body_3d_keypoints.py
+++ b/tests/pipelines/test_body_3d_keypoints.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class Body3DKeypointsTest(unittest.TestCase, DemoCompatibilityCheck):
+class Body3DKeypointsTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_canonical_body-3d-keypoints_video'
@@ -41,10 +40,6 @@ class Body3DKeypointsTest(unittest.TestCase, DemoCompatibilityCheck):
% (self.test_video))
self.pipeline_inference(body_3d_keypoints, pipeline_input=cap)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_body_3d_keypoints_hdformer.py b/tests/pipelines/test_body_3d_keypoints_hdformer.py
index 2ebbc95b..e86f247f 100644
--- a/tests/pipelines/test_body_3d_keypoints_hdformer.py
+++ b/tests/pipelines/test_body_3d_keypoints_hdformer.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class Body3DKeypointsHDFormerTest(unittest.TestCase, DemoCompatibilityCheck):
+class Body3DKeypointsHDFormerTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_hdformer_body-3d-keypoints_video'
@@ -41,10 +40,6 @@ class Body3DKeypointsHDFormerTest(unittest.TestCase, DemoCompatibilityCheck):
% (self.test_video))
self.pipeline_inference(body_3d_keypoints, pipeline_input=cap)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_canmt_translation.py b/tests/pipelines/test_canmt_translation.py
index e3bce5d9..31e57040 100644
--- a/tests/pipelines/test_canmt_translation.py
+++ b/tests/pipelines/test_canmt_translation.py
@@ -8,11 +8,10 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import CanmtTranslationPipeline
from modelscope.preprocessors import CanmtTranslationPreprocessor, Preprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class CanmtTranslationTest(unittest.TestCase, DemoCompatibilityCheck):
+class CanmtTranslationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.competency_aware_translation
@@ -59,10 +58,6 @@ class CanmtTranslationTest(unittest.TestCase, DemoCompatibilityCheck):
pipeline_ins = pipeline(task=self.task)
print(pipeline_ins(self.input))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_card_detection.py b/tests/pipelines/test_card_detection.py
index d913f494..676fb786 100644
--- a/tests/pipelines/test_card_detection.py
+++ b/tests/pipelines/test_card_detection.py
@@ -8,11 +8,10 @@ from modelscope.msdatasets import MsDataset
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import draw_card_detection_result
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class CardDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class CardDetectionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.card_detection
@@ -57,10 +56,6 @@ class CardDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
result = card_detection(img_path)
self.show_result(img_path, result)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_cartoon_stable_diffusion.py b/tests/pipelines/test_cartoon_stable_diffusion.py
index 751c7ea8..6a91ed44 100644
--- a/tests/pipelines/test_cartoon_stable_diffusion.py
+++ b/tests/pipelines/test_cartoon_stable_diffusion.py
@@ -6,11 +6,10 @@ import cv2
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class CartoonStableDiffusionTest(unittest.TestCase, DemoCompatibilityCheck):
+class CartoonStableDiffusionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.text_to_image_synthesis
diff --git a/tests/pipelines/test_chinese_stable_diffusion.py b/tests/pipelines/test_chinese_stable_diffusion.py
index bd6d74aa..05207ddb 100644
--- a/tests/pipelines/test_chinese_stable_diffusion.py
+++ b/tests/pipelines/test_chinese_stable_diffusion.py
@@ -6,11 +6,10 @@ import cv2
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ChineseStableDiffusionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ChineseStableDiffusionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.text_to_image_synthesis
diff --git a/tests/pipelines/test_clip_interrogator.py b/tests/pipelines/test_clip_interrogator.py
new file mode 100644
index 00000000..615aef3c
--- /dev/null
+++ b/tests/pipelines/test_clip_interrogator.py
@@ -0,0 +1,34 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+from modelscope.models import Model
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class CLIPInterrogatorTest(unittest.TestCase):
+
+ @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+ def test_run_with_image_captioning_with_model(self):
+ model = Model.from_pretrained('damo/cv_clip-interrogator')
+ pipeline_caption = pipeline(
+ task=Tasks.image_captioning,
+ model=model,
+ )
+ image = 'data/test/images/image_mplug_vqa.jpg'
+ result = pipeline_caption(image)
+ print(result[OutputKeys.CAPTION])
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_image_captioning_with_name(self):
+ pipeline_caption = pipeline(
+ Tasks.image_captioning, model='damo/cv_clip-interrogator')
+ image = 'data/test/images/image_mplug_vqa.jpg'
+ result = pipeline_caption(image)
+ print(result[OutputKeys.CAPTION])
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/pipelines/test_cmdssl_video_embedding.py b/tests/pipelines/test_cmdssl_video_embedding.py
index 5807c075..9e176cf2 100644
--- a/tests/pipelines/test_cmdssl_video_embedding.py
+++ b/tests/pipelines/test_cmdssl_video_embedding.py
@@ -4,11 +4,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class CMDSSLVideoEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
+class CMDSSLVideoEmbeddingTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.video_embedding
@@ -22,10 +21,6 @@ class CMDSSLVideoEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
print(f'video embedding output: {result}.')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_content_check.py b/tests/pipelines/test_content_check.py
index c68af257..39a791a0 100644
--- a/tests/pipelines/test_content_check.py
+++ b/tests/pipelines/test_content_check.py
@@ -4,11 +4,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ContentCheckTest(unittest.TestCase, DemoCompatibilityCheck):
+class ContentCheckTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_classification
@@ -20,10 +19,6 @@ class ContentCheckTest(unittest.TestCase, DemoCompatibilityCheck):
result = content_check_func('data/test/images/content_check.jpg')
print(result)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_controllable_image_generation.py b/tests/pipelines/test_controllable_image_generation.py
index a5cff66c..7d6b03ce 100644
--- a/tests/pipelines/test_controllable_image_generation.py
+++ b/tests/pipelines/test_controllable_image_generation.py
@@ -10,12 +10,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.cv import ControllableImageGenerationPipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ControllableImageGenerationTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class ControllableImageGenerationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.controllable_image_generation
@@ -68,10 +66,6 @@ class ControllableImageGenerationTest(unittest.TestCase,
print(
'pipeline: the output image path is {}'.format(output_image_path))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_conversational_text_to_sql.py b/tests/pipelines/test_conversational_text_to_sql.py
index 17fffcaf..a7e15dcc 100644
--- a/tests/pipelines/test_conversational_text_to_sql.py
+++ b/tests/pipelines/test_conversational_text_to_sql.py
@@ -8,13 +8,12 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import ConversationalTextToSqlPipeline
from modelscope.preprocessors import ConversationalTextToSqlPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.nlp.space_T_en.utils import \
text2sql_tracking_and_print_results
from modelscope.utils.test_utils import test_level
-class ConversationalTextToSql(unittest.TestCase, DemoCompatibilityCheck):
+class ConversationalTextToSql(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.table_question_answering
@@ -67,10 +66,6 @@ class ConversationalTextToSql(unittest.TestCase, DemoCompatibilityCheck):
pipelines = [pipeline(task=self.task, model=self.model_id)]
text2sql_tracking_and_print_results(self.test_case, pipelines)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_crowd_counting.py b/tests/pipelines/test_crowd_counting.py
index 4e15cfca..be14f29e 100644
--- a/tests/pipelines/test_crowd_counting.py
+++ b/tests/pipelines/test_crowd_counting.py
@@ -8,14 +8,13 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import numpy_to_cv2img
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import test_level
logger = get_logger()
-class CrowdCountingTest(unittest.TestCase, DemoCompatibilityCheck):
+class CrowdCountingTest(unittest.TestCase):
def setUp(self) -> None:
self.input_location = 'data/test/images/crowd_counting.jpg'
@@ -56,10 +55,6 @@ class CrowdCountingTest(unittest.TestCase, DemoCompatibilityCheck):
else:
raise ValueError('process error')
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_csanmt_translation.py b/tests/pipelines/test_csanmt_translation.py
index d989a6c4..03545fc5 100644
--- a/tests/pipelines/test_csanmt_translation.py
+++ b/tests/pipelines/test_csanmt_translation.py
@@ -3,11 +3,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class TranslationTest(unittest.TestCase, DemoCompatibilityCheck):
+class TranslationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.translation
@@ -90,10 +89,6 @@ class TranslationTest(unittest.TestCase, DemoCompatibilityCheck):
pipeline_ins = pipeline(self.task)
print(pipeline_ins(input=inputs))
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_damo_face_detection.py b/tests/pipelines/test_damo_face_detection.py
index 8bd1e009..44578c3e 100644
--- a/tests/pipelines/test_damo_face_detection.py
+++ b/tests/pipelines/test_damo_face_detection.py
@@ -7,11 +7,10 @@ import cv2
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import draw_face_detection_result
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class FaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class FaceDetectionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.face_detection
@@ -36,10 +35,6 @@ class FaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
result = face_detection(img_path)
self.show_result(img_path, result)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_ddcolor_image_colorization.py b/tests/pipelines/test_ddcolor_image_colorization.py
index e1876329..5d752452 100644
--- a/tests/pipelines/test_ddcolor_image_colorization.py
+++ b/tests/pipelines/test_ddcolor_image_colorization.py
@@ -11,11 +11,10 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.pipelines.cv import DDColorImageColorizationPipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class DDColorImageColorizationTest(unittest.TestCase, DemoCompatibilityCheck):
+class DDColorImageColorizationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_colorization
@@ -52,10 +51,6 @@ class DDColorImageColorizationTest(unittest.TestCase, DemoCompatibilityCheck):
image_colorization = pipeline(Tasks.image_colorization)
self.pipeline_inference(image_colorization, self.test_image)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_ddpm_semantic_segmentation.py b/tests/pipelines/test_ddpm_semantic_segmentation.py
index a5303098..ad35e069 100644
--- a/tests/pipelines/test_ddpm_semantic_segmentation.py
+++ b/tests/pipelines/test_ddpm_semantic_segmentation.py
@@ -5,12 +5,10 @@ import torch
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class DDPMImageSemanticSegmentationTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class DDPMImageSemanticSegmentationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_segmentation
@@ -28,10 +26,6 @@ class DDPMImageSemanticSegmentationTest(unittest.TestCase,
else:
raise ValueError('process error')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_deeplpf_image_color_enhance.py b/tests/pipelines/test_deeplpf_image_color_enhance.py
index 08b1a357..87b709fe 100644
--- a/tests/pipelines/test_deeplpf_image_color_enhance.py
+++ b/tests/pipelines/test_deeplpf_image_color_enhance.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class DeepLPFImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck):
+class DeepLPFImageColorEnhanceTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_deeplpfnet_image-color-enhance-models'
@@ -37,10 +36,6 @@ class DeepLPFImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck):
self.pipeline_inference(img_color_enhance,
'data/test/images/image_color_enhance.png')
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_dialog_intent_prediction.py b/tests/pipelines/test_dialog_intent_prediction.py
index 2ee46388..5a1729b8 100644
--- a/tests/pipelines/test_dialog_intent_prediction.py
+++ b/tests/pipelines/test_dialog_intent_prediction.py
@@ -8,11 +8,10 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import DialogIntentPredictionPipeline
from modelscope.preprocessors import DialogIntentPredictionPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class DialogIntentPredictionTest(unittest.TestCase, DemoCompatibilityCheck):
+class DialogIntentPredictionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.task_oriented_conversation
@@ -68,10 +67,6 @@ class DialogIntentPredictionTest(unittest.TestCase, DemoCompatibilityCheck):
for my_pipeline, item in list(zip(pipelines, self.test_case)):
print(my_pipeline(item))
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_dialog_modeling.py b/tests/pipelines/test_dialog_modeling.py
index 6b6259ce..202951a5 100644
--- a/tests/pipelines/test_dialog_modeling.py
+++ b/tests/pipelines/test_dialog_modeling.py
@@ -10,11 +10,10 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import DialogModelingPipeline
from modelscope.preprocessors import DialogModelingPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class DialogModelingTest(unittest.TestCase, DemoCompatibilityCheck):
+class DialogModelingTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.task_oriented_conversation
@@ -148,10 +147,6 @@ class DialogModelingTest(unittest.TestCase, DemoCompatibilityCheck):
pipelines = [pipeline(task=self.task)]
self.generate_and_print_dialog_response(pipelines)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_dialog_state_tracking.py b/tests/pipelines/test_dialog_state_tracking.py
index 6cdd5ee7..e7f72b83 100644
--- a/tests/pipelines/test_dialog_state_tracking.py
+++ b/tests/pipelines/test_dialog_state_tracking.py
@@ -8,13 +8,12 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import DialogStateTrackingPipeline
from modelscope.preprocessors import DialogStateTrackingPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.nlp.space.utils_dst import \
tracking_and_print_dialog_states
from modelscope.utils.test_utils import test_level
-class DialogStateTrackingTest(unittest.TestCase, DemoCompatibilityCheck):
+class DialogStateTrackingTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.task_oriented_conversation
@@ -119,10 +118,6 @@ class DialogStateTrackingTest(unittest.TestCase, DemoCompatibilityCheck):
pipelines = [pipeline(task=self.task, model=self.model_id)]
tracking_and_print_dialog_states(self.test_case, pipelines)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_diffusers_stable_diffusion.py b/tests/pipelines/test_diffusers_stable_diffusion.py
index 98c4862a..eef677fc 100644
--- a/tests/pipelines/test_diffusers_stable_diffusion.py
+++ b/tests/pipelines/test_diffusers_stable_diffusion.py
@@ -6,11 +6,10 @@ import cv2
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class DiffusersStableDiffusionTest(unittest.TestCase, DemoCompatibilityCheck):
+class DiffusersStableDiffusionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.text_to_image_synthesis
diff --git a/tests/pipelines/test_disco_guided_diffusion.py b/tests/pipelines/test_disco_guided_diffusion.py
index d7be7292..f3fd668b 100644
--- a/tests/pipelines/test_disco_guided_diffusion.py
+++ b/tests/pipelines/test_disco_guided_diffusion.py
@@ -5,11 +5,10 @@ import cv2
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class DiscoGuidedDiffusionTest(unittest.TestCase, DemoCompatibilityCheck):
+class DiscoGuidedDiffusionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.text_to_image_synthesis
diff --git a/tests/pipelines/test_document_grounded_dialog_generate.py b/tests/pipelines/test_document_grounded_dialog_generate.py
index da23fe19..b08a07fa 100644
--- a/tests/pipelines/test_document_grounded_dialog_generate.py
+++ b/tests/pipelines/test_document_grounded_dialog_generate.py
@@ -9,12 +9,10 @@ from modelscope.pipelines import pipeline
from modelscope.preprocessors.nlp import \
DocumentGroundedDialogGeneratePreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class DocumentGroundedDialogGenerateTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class DocumentGroundedDialogGenerateTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.document_grounded_dialog_generate
diff --git a/tests/pipelines/test_document_grounded_dialog_retrieval.py b/tests/pipelines/test_document_grounded_dialog_retrieval.py
index 6bcca369..48a63087 100644
--- a/tests/pipelines/test_document_grounded_dialog_retrieval.py
+++ b/tests/pipelines/test_document_grounded_dialog_retrieval.py
@@ -9,12 +9,10 @@ from modelscope.pipelines import pipeline
from modelscope.preprocessors.nlp import \
DocumentGroundedDialogRetrievalPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class DocumentGroundedDialogRetrievalTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class DocumentGroundedDialogRetrievalTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.document_grounded_dialog_retrieval
diff --git a/tests/pipelines/test_document_segmentation.py b/tests/pipelines/test_document_segmentation.py
index 41c490d2..09ce5756 100644
--- a/tests/pipelines/test_document_segmentation.py
+++ b/tests/pipelines/test_document_segmentation.py
@@ -6,14 +6,13 @@ from typing import Any, Dict
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import test_level
logger = get_logger()
-class DocumentSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class DocumentSegmentationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.document_segmentation
@@ -64,10 +63,6 @@ class DocumentSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
for document in documents_list:
print(document)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_document_vl_embedding.py b/tests/pipelines/test_document_vl_embedding.py
index f8d2d5a3..349547d1 100644
--- a/tests/pipelines/test_document_vl_embedding.py
+++ b/tests/pipelines/test_document_vl_embedding.py
@@ -10,11 +10,10 @@ from modelscope.models import Model
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class DocumentVLEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
+class DocumentVLEmbeddingTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/multi-modal_convnext-roberta-base_vldoc-embedding'
@@ -51,10 +50,6 @@ class DocumentVLEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
vldoc_doc_VL_emb_pipeline = pipeline(self.task)
self.pipeline_inference(vldoc_doc_VL_emb_pipeline)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_domain_classification.py b/tests/pipelines/test_domain_classification.py
index 8e5bfa7f..006daa65 100644
--- a/tests/pipelines/test_domain_classification.py
+++ b/tests/pipelines/test_domain_classification.py
@@ -3,11 +3,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class DomainClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
+class DomainClassificationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.text_classification
@@ -36,10 +35,6 @@ class DomainClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
pipeline_ins = pipeline(self.task, model=model_id)
print(pipeline_ins(input=inputs))
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_efficient_diffusion_tuning.py b/tests/pipelines/test_efficient_diffusion_tuning.py
index 9dc5e412..e33b2bf2 100644
--- a/tests/pipelines/test_efficient_diffusion_tuning.py
+++ b/tests/pipelines/test_efficient_diffusion_tuning.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
from modelscope.models.multi_modal import EfficientStableDiffusion
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class EfficientDiffusionTuningTest(unittest.TestCase, DemoCompatibilityCheck):
+class EfficientDiffusionTuningTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.efficient_diffusion_tuning
@@ -28,13 +27,9 @@ class EfficientDiffusionTuningTest(unittest.TestCase, DemoCompatibilityCheck):
model = Model.from_pretrained(model_id)
self.assertTrue(model.__class__ == EfficientStableDiffusion)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_efficient_diffusion_tuning_lora_demo_compatibility(self):
- self.model_id = 'damo/multi-modal_efficient-diffusion-tuning-lora'
- self.compatibility_check()
-
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_control_lora_run_pipeline(self):
+ # TODO: to be fixed in the future
model_id = 'damo/multi-modal_efficient-diffusion-tuning-control-lora'
inputs = {
'prompt':
@@ -53,11 +48,6 @@ class EfficientDiffusionTuningTest(unittest.TestCase, DemoCompatibilityCheck):
model = Model.from_pretrained(model_id)
self.assertTrue(model.__class__ == EfficientStableDiffusion)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_efficient_diffusion_tuning_control_lora_demo_compatibility(self):
- self.model_id = 'damo/multi-modal_efficient-diffusion-tuning-control-lora'
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_extractive_summarization.py b/tests/pipelines/test_extractive_summarization.py
index 26ac508c..a7f12d14 100644
--- a/tests/pipelines/test_extractive_summarization.py
+++ b/tests/pipelines/test_extractive_summarization.py
@@ -6,14 +6,13 @@ from typing import Any, Dict
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import test_level
logger = get_logger()
-class ExtractiveSummarizationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ExtractiveSummarizationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.extractive_summarization
@@ -46,10 +45,6 @@ class ExtractiveSummarizationTest(unittest.TestCase, DemoCompatibilityCheck):
model_id=self.ponet_topic_model_id, documents=self.sentences)
print(result[OutputKeys.TEXT])
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_face_2d_keypoints.py b/tests/pipelines/test_face_2d_keypoints.py
index 7ccc8a59..875a0e11 100644
--- a/tests/pipelines/test_face_2d_keypoints.py
+++ b/tests/pipelines/test_face_2d_keypoints.py
@@ -10,7 +10,7 @@ from modelscope.utils.test_utils import test_level
class EasyCVFace2DKeypointsPipelineTest(unittest.TestCase):
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skip('skip easycv related cases')
def test_face_2d_keypoints(self):
img_path = 'data/test/images/face_detection.png'
model_id = 'damo/cv_mobilenet_face-2d-keypoints_alignment'
diff --git a/tests/pipelines/test_face_detection.py b/tests/pipelines/test_face_detection.py
index 7e35cdbb..86e0f702 100644
--- a/tests/pipelines/test_face_detection.py
+++ b/tests/pipelines/test_face_detection.py
@@ -8,11 +8,10 @@ from modelscope.msdatasets import MsDataset
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import draw_face_detection_result
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class FaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class FaceDetectionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.face_detection
@@ -42,10 +41,6 @@ class FaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
result = face_detection(img_path)
self.show_result(img_path, result)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_face_image_generation.py b/tests/pipelines/test_face_image_generation.py
index 21d8e835..fbd7e3b5 100644
--- a/tests/pipelines/test_face_image_generation.py
+++ b/tests/pipelines/test_face_image_generation.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class FaceGenerationTest(unittest.TestCase, DemoCompatibilityCheck):
+class FaceGenerationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.face_image_generation
@@ -39,10 +38,6 @@ class FaceGenerationTest(unittest.TestCase, DemoCompatibilityCheck):
face_generation = pipeline(self.task)
self.pipeline_inference(face_generation, seed)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_face_recognition.py b/tests/pipelines/test_face_recognition.py
index d3451f5d..7b84590c 100644
--- a/tests/pipelines/test_face_recognition.py
+++ b/tests/pipelines/test_face_recognition.py
@@ -6,11 +6,10 @@ import numpy as np
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class FaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class FaceRecognitionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.face_recognition
@@ -28,10 +27,6 @@ class FaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
sim = np.dot(emb1[0], emb2[0])
print(f'Cos similarity={sim:.3f}, img1:{img1} img2:{img2}')
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_face_recognition_onnx_fm.py b/tests/pipelines/test_face_recognition_onnx_fm.py
index 8478b3bf..b60bec93 100644
--- a/tests/pipelines/test_face_recognition_onnx_fm.py
+++ b/tests/pipelines/test_face_recognition_onnx_fm.py
@@ -6,11 +6,10 @@ import numpy as np
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class FmFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class FmFaceRecognitionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.face_recognition
@@ -31,10 +30,6 @@ class FmFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
sim = np.dot(emb1[0], emb2[0])
print(f'Cos similarity={sim:.3f}, img1:{img1} img2:{img2}')
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_face_recognition_onnx_ir.py b/tests/pipelines/test_face_recognition_onnx_ir.py
index c45042be..a7cf008c 100644
--- a/tests/pipelines/test_face_recognition_onnx_ir.py
+++ b/tests/pipelines/test_face_recognition_onnx_ir.py
@@ -6,11 +6,10 @@ import numpy as np
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class IrFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class IrFaceRecognitionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.face_recognition
@@ -31,10 +30,6 @@ class IrFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
sim = np.dot(emb1[0], emb2[0])
print(f'Cos similarity={sim:.3f}, img1:{img1} img2:{img2}')
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_face_recognition_onnx_transface.py b/tests/pipelines/test_face_recognition_onnx_transface.py
new file mode 100644
index 00000000..183257f0
--- /dev/null
+++ b/tests/pipelines/test_face_recognition_onnx_transface.py
@@ -0,0 +1,35 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+import numpy as np
+
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class TransFaceRecognitionTest(unittest.TestCase):
+
+ def setUp(self) -> None:
+ self.task = Tasks.face_recognition
+ self.model_id = 'damo/cv_vit_face-recognition'
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_face_compare(self):
+ img1 = 'data/test/images/face_recognition_1.png'
+ img2 = 'data/test/images/face_recognition_2.png'
+
+ face_recognition = pipeline(
+ Tasks.face_recognition, model=self.model_id)
+ emb1 = face_recognition(img1)[OutputKeys.IMG_EMBEDDING]
+ emb2 = face_recognition(img2)[OutputKeys.IMG_EMBEDDING]
+ if emb1 is None or emb2 is None:
+ print('No Detected Face.')
+ else:
+ sim = np.dot(emb1[0], emb2[0])
+ print(f'Cos similarity={sim:.3f}, img1:{img1} img2:{img2}')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/pipelines/test_face_recognition_ood.py b/tests/pipelines/test_face_recognition_ood.py
index 8a6fb444..68cf1f0b 100644
--- a/tests/pipelines/test_face_recognition_ood.py
+++ b/tests/pipelines/test_face_recognition_ood.py
@@ -6,11 +6,10 @@ import numpy as np
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class FaceRecognitionOodTest(unittest.TestCase, DemoCompatibilityCheck):
+class FaceRecognitionOodTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.face_recognition
@@ -39,10 +38,6 @@ class FaceRecognitionOodTest(unittest.TestCase, DemoCompatibilityCheck):
print(f'Cos similarity={sim:.3f}, img1:{img1} img2:{img2}')
print(f'OOD score: img1:{score1:.3f} img2:{score2:.3f}')
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_face_reconstruction.py b/tests/pipelines/test_face_reconstruction.py
index b35482fb..06950487 100644
--- a/tests/pipelines/test_face_reconstruction.py
+++ b/tests/pipelines/test_face_reconstruction.py
@@ -14,13 +14,12 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
sys.path.append('.')
-class FaceReconstructionTest(unittest.TestCase, DemoCompatibilityCheck):
+class FaceReconstructionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.face_reconstruction
@@ -60,7 +59,7 @@ class FaceReconstructionTest(unittest.TestCase, DemoCompatibilityCheck):
Tasks.face_reconstruction, model=model_dir)
self.pipeline_inference(face_reconstruction, self.test_image)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_modelhub(self):
face_reconstruction = pipeline(
Tasks.face_reconstruction,
@@ -68,10 +67,6 @@ class FaceReconstructionTest(unittest.TestCase, DemoCompatibilityCheck):
model_revision='v2.0.0-HRN')
self.pipeline_inference(face_reconstruction, self.test_image)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_faq_question_answering.py b/tests/pipelines/test_faq_question_answering.py
index 31680095..89f95162 100644
--- a/tests/pipelines/test_faq_question_answering.py
+++ b/tests/pipelines/test_faq_question_answering.py
@@ -12,11 +12,10 @@ from modelscope.pipelines.nlp import FaqQuestionAnsweringPipeline
from modelscope.preprocessors import \
FaqQuestionAnsweringTransformersPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class FaqQuestionAnsweringTest(unittest.TestCase, DemoCompatibilityCheck):
+class FaqQuestionAnsweringTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.faq_question_answering
@@ -103,10 +102,6 @@ class FaqQuestionAnsweringTest(unittest.TestCase, DemoCompatibilityCheck):
['今天星期六', '明天星期几明天星期几'])
print(np.shape(sentence_vec))
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_fast_instance_segmentation.py b/tests/pipelines/test_fast_instance_segmentation.py
new file mode 100644
index 00000000..d5789150
--- /dev/null
+++ b/tests/pipelines/test_fast_instance_segmentation.py
@@ -0,0 +1,34 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+from modelscope.models import Model
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class FastInstanceSegmentationTest(unittest.TestCase):
+
+ def setUp(self) -> None:
+ self.task = Tasks.image_segmentation
+ self.model_id = 'damo/cv_resnet50_fast-instance-segmentation_coco'
+
+ image = 'data/test/images/image_instance_segmentation.jpg'
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_model_name(self):
+ pipeline_parsing = pipeline(
+ task=Tasks.image_segmentation, model=self.model_id)
+ print(pipeline_parsing(input=self.image)[OutputKeys.LABELS])
+
+ @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+ def test_run_with_model_from_modelhub(self):
+ model = Model.from_pretrained(self.model_id)
+ pipeline_parsing = pipeline(
+ task=Tasks.image_segmentation, model=model, preprocessor=None)
+ print(pipeline_parsing(input=self.image)[OutputKeys.LABELS])
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/pipelines/test_feature_extraction.py b/tests/pipelines/test_feature_extraction.py
index da6be1c0..8e365eab 100644
--- a/tests/pipelines/test_feature_extraction.py
+++ b/tests/pipelines/test_feature_extraction.py
@@ -11,12 +11,10 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import FeatureExtractionPipeline
from modelscope.preprocessors import FillMaskTransformersPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class FeatureExtractionTaskModelTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class FeatureExtractionTaskModelTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.feature_extraction
diff --git a/tests/pipelines/test_fid_dialogue.py b/tests/pipelines/test_fid_dialogue.py
index c69823ce..96d7c919 100644
--- a/tests/pipelines/test_fid_dialogue.py
+++ b/tests/pipelines/test_fid_dialogue.py
@@ -3,11 +3,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class FidDialogueTest(unittest.TestCase, DemoCompatibilityCheck):
+class FidDialogueTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.fid_dialogue
@@ -49,8 +48,7 @@ class FidDialogueTest(unittest.TestCase, DemoCompatibilityCheck):
'forward_params': forward_params
}
- # @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
- @unittest.skip('temporarily skip')
+ @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_240m_pipeline(self):
pipeline_ins = pipeline(
task=self.task,
@@ -59,8 +57,7 @@ class FidDialogueTest(unittest.TestCase, DemoCompatibilityCheck):
result = pipeline_ins(self.input, **self.kwargs)
print(result)
- # @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
- @unittest.skip('temporarily skip')
+ @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_3_7b_pipeline(self):
pipeline_ins = pipeline(
task=self.task,
@@ -69,10 +66,6 @@ class FidDialogueTest(unittest.TestCase, DemoCompatibilityCheck):
result = pipeline_ins(self.input, **self.kwargs)
print(result)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_fill_mask.py b/tests/pipelines/test_fill_mask.py
index 0e427464..450ada15 100644
--- a/tests/pipelines/test_fill_mask.py
+++ b/tests/pipelines/test_fill_mask.py
@@ -10,12 +10,11 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import FillMaskPipeline
from modelscope.preprocessors import FillMaskTransformersPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool
from modelscope.utils.test_utils import test_level
-class FillMaskTest(unittest.TestCase, DemoCompatibilityCheck):
+class FillMaskTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.fill_mask
@@ -134,8 +133,9 @@ class FillMaskTest(unittest.TestCase, DemoCompatibilityCheck):
f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: '
f'{pipeline_ins(test_input)}\n')
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_run_with_model_name(self):
+ # TODO: to be fixed in the future
# veco
pipeline_ins = pipeline(task=Tasks.fill_mask, model=self.model_id_veco)
for language in ['zh', 'en']:
@@ -176,10 +176,6 @@ class FillMaskTest(unittest.TestCase, DemoCompatibilityCheck):
print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: '
f'{pipeline_ins(test_input)}\n')
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_general_image_classification.py b/tests/pipelines/test_general_image_classification.py
index 978c474a..df036fa1 100644
--- a/tests/pipelines/test_general_image_classification.py
+++ b/tests/pipelines/test_general_image_classification.py
@@ -4,13 +4,11 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.regress_test_utils import MsRegressTool
from modelscope.utils.test_utils import test_level
-class GeneralImageClassificationTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class GeneralImageClassificationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_classification
@@ -83,10 +81,6 @@ class GeneralImageClassificationTest(unittest.TestCase,
result = general_image_classification('data/test/images/bird.JPEG')
print(result)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_general_recognition.py b/tests/pipelines/test_general_recognition.py
index ba713bbe..873aaa02 100644
--- a/tests/pipelines/test_general_recognition.py
+++ b/tests/pipelines/test_general_recognition.py
@@ -4,11 +4,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class GeneralRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class GeneralRecognitionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.general_recognition
@@ -22,10 +21,6 @@ class GeneralRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
result = general_recognition('data/test/images/dogs.jpg')
print(result)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_generative_multi_modal_embedding.py b/tests/pipelines/test_generative_multi_modal_embedding.py
index 18b96f65..3a853725 100644
--- a/tests/pipelines/test_generative_multi_modal_embedding.py
+++ b/tests/pipelines/test_generative_multi_modal_embedding.py
@@ -5,11 +5,10 @@ import unittest
from modelscope.models import Model
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class GEMMMultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
+class GEMMMultiModalEmbeddingTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.generative_multi_modal_embedding
@@ -68,10 +67,6 @@ class GEMMMultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
output = generative_multi_modal_embedding_pipeline(test_input)
print(output)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_gridvlp_classification.py b/tests/pipelines/test_gridvlp_classification.py
index 18c6c582..7479d0fa 100644
--- a/tests/pipelines/test_gridvlp_classification.py
+++ b/tests/pipelines/test_gridvlp_classification.py
@@ -3,11 +3,10 @@ import unittest
from modelscope.pipelines.multi_modal.gridvlp_pipeline import (
GridVlpClassificationPipeline, GridVlpEmbeddingPipeline)
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class GridVlpClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
+class GridVlpClassificationTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'rgtjf1/multi-modal_gridvlp_classification_chinese-base-ecom-cate'
@@ -62,10 +61,6 @@ class GridVlpClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
print(f'text: {self.text}\nimage: {self.image}\n'
f'outputs shape: {outputs.shape}')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_hand_2d_keypoints.py b/tests/pipelines/test_hand_2d_keypoints.py
index 43b569d0..a243a478 100644
--- a/tests/pipelines/test_hand_2d_keypoints.py
+++ b/tests/pipelines/test_hand_2d_keypoints.py
@@ -23,7 +23,7 @@ class Hand2DKeypointsPipelineTest(unittest.TestCase):
self.assertEqual(results[OutputKeys.KEYPOINTS].shape[2], 3)
self.assertEqual(results[OutputKeys.BOXES].shape[1], 4)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skip('skip test in current test level: no pipeline implemented')
def test_hand_2d_keypoints_with_default_model(self):
img_path = 'data/test/images/hand_keypoints.jpg'
diff --git a/tests/pipelines/test_hand_detection.py b/tests/pipelines/test_hand_detection.py
index 8a6bbd5a..9ea192a1 100644
--- a/tests/pipelines/test_hand_detection.py
+++ b/tests/pipelines/test_hand_detection.py
@@ -3,17 +3,16 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ObjectDetectionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.domain_specific_object_detection
self.model_id = 'damo/cv_yolox-pai_hand-detection'
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skip('skip test in current test level: no pipeline implemented')
def test_hand_detection_pipeline(self):
test_image = 'data/test/images/hand_detection.jpg'
diff --git a/tests/pipelines/test_hicossl_video_embedding.py b/tests/pipelines/test_hicossl_video_embedding.py
index 8a7de1fa..a367457f 100644
--- a/tests/pipelines/test_hicossl_video_embedding.py
+++ b/tests/pipelines/test_hicossl_video_embedding.py
@@ -4,11 +4,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class HICOSSLVideoEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
+class HICOSSLVideoEmbeddingTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.video_embedding
@@ -23,10 +22,6 @@ class HICOSSLVideoEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
print(f'video embedding output: {result}.')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_hitea_tasks.py b/tests/pipelines/test_hitea_tasks.py
index 50efdfbd..60cd206d 100644
--- a/tests/pipelines/test_hitea_tasks.py
+++ b/tests/pipelines/test_hitea_tasks.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class HiTeATasksTest(unittest.TestCase, DemoCompatibilityCheck):
+class HiTeATasksTest(unittest.TestCase):
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_run_with_video_captioning_with_model(self):
@@ -55,10 +54,6 @@ class HiTeATasksTest(unittest.TestCase, DemoCompatibilityCheck):
result = pipeline_vqa(input)
print(result)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_human_wholebody_keypoint.py b/tests/pipelines/test_human_wholebody_keypoint.py
index 7c5946cc..e0052f77 100644
--- a/tests/pipelines/test_human_wholebody_keypoint.py
+++ b/tests/pipelines/test_human_wholebody_keypoint.py
@@ -11,7 +11,7 @@ from modelscope.utils.test_utils import test_level
class EasyCVFace2DKeypointsPipelineTest(unittest.TestCase):
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skip('skip easycv related cases')
def test_human_wholebody_keypoint(self):
img_path = 'data/test/images/keypoints_detect/img_test_wholebody.jpg'
model_id = 'damo/cv_hrnetw48_human-wholebody-keypoint_image'
diff --git a/tests/pipelines/test_image_body_reshaping.py b/tests/pipelines/test_image_body_reshaping.py
index e1955e94..5a0ec0e7 100644
--- a/tests/pipelines/test_image_body_reshaping.py
+++ b/tests/pipelines/test_image_body_reshaping.py
@@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageBodyReshapingTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageBodyReshapingTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_body_reshaping
@@ -49,10 +48,6 @@ class ImageBodyReshapingTest(unittest.TestCase, DemoCompatibilityCheck):
image_body_reshaping = pipeline(Tasks.image_body_reshaping)
self.pipeline_inference(image_body_reshaping, self.test_image)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_color_enhance.py b/tests/pipelines/test_image_color_enhance.py
index 7c3ae8c0..5e222776 100644
--- a/tests/pipelines/test_image_color_enhance.py
+++ b/tests/pipelines/test_image_color_enhance.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageColorEnhanceTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_csrnet_image-color-enhance-models'
@@ -37,10 +36,6 @@ class ImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck):
self.pipeline_inference(img_color_enhance,
'data/test/images/image_color_enhance.png')
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_colorization.py b/tests/pipelines/test_image_colorization.py
index 547fce89..15ea314a 100644
--- a/tests/pipelines/test_image_colorization.py
+++ b/tests/pipelines/test_image_colorization.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageColorizationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageColorizationTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_unet_image-colorization'
@@ -37,10 +36,6 @@ class ImageColorizationTest(unittest.TestCase, DemoCompatibilityCheck):
image_colorization = pipeline(Tasks.image_colorization)
self.pipeline_inference(image_colorization, self.test_image)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_debanding.py b/tests/pipelines/test_image_debanding.py
index 105d1f45..da784596 100644
--- a/tests/pipelines/test_image_debanding.py
+++ b/tests/pipelines/test_image_debanding.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageColorEnhanceTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_rrdb_image-debanding'
@@ -36,10 +35,6 @@ class ImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck):
self.pipeline_inference(img_debanding,
'data/test/images/image_debanding.png')
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_deblur.py b/tests/pipelines/test_image_deblur.py
index fc9d0101..529ae96c 100644
--- a/tests/pipelines/test_image_deblur.py
+++ b/tests/pipelines/test_image_deblur.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.cv import ImageDeblurPipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageDenoiseTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageDenoiseTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_deblurring
@@ -56,10 +55,6 @@ class ImageDenoiseTest(unittest.TestCase, DemoCompatibilityCheck):
h, w = deblur_img.shape[:2]
print('pipeline: the shape of output_img is {}x{}'.format(h, w))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_defrcn_fewshot.py b/tests/pipelines/test_image_defrcn_fewshot.py
index d2ecde13..1771d7b8 100644
--- a/tests/pipelines/test_image_defrcn_fewshot.py
+++ b/tests/pipelines/test_image_defrcn_fewshot.py
@@ -8,14 +8,13 @@ from modelscope.models import Model
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import test_level
logger = get_logger()
-class ImageDefrcnFewShotTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageDefrcnFewShotTest(unittest.TestCase):
def setUp(self) -> None:
logger.info('start install detectron2-0.3')
@@ -58,10 +57,6 @@ class ImageDefrcnFewShotTest(unittest.TestCase, DemoCompatibilityCheck):
self.task, model=cache_path, model_revision=self.revision)
print(pipeline_defrcn(input=self.image)[OutputKeys.LABELS])
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_denoise.py b/tests/pipelines/test_image_denoise.py
index d95dd343..891e703e 100644
--- a/tests/pipelines/test_image_denoise.py
+++ b/tests/pipelines/test_image_denoise.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.cv import ImageDenoisePipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageDenoiseTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageDenoiseTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_denoising
@@ -56,10 +55,6 @@ class ImageDenoiseTest(unittest.TestCase, DemoCompatibilityCheck):
h, w = denoise_img.shape[:2]
print('pipeline: the shape of output_img is {}x{}'.format(h, w))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_depth_estimation.py b/tests/pipelines/test_image_depth_estimation.py
index 6ec16a64..7f9b3bb9 100644
--- a/tests/pipelines/test_image_depth_estimation.py
+++ b/tests/pipelines/test_image_depth_estimation.py
@@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import depth_to_color
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageDepthEstimationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageDepthEstimationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = 'image-depth-estimation'
diff --git a/tests/pipelines/test_image_depth_estimation_bts.py b/tests/pipelines/test_image_depth_estimation_bts.py
index bda7a41f..e952da30 100644
--- a/tests/pipelines/test_image_depth_estimation_bts.py
+++ b/tests/pipelines/test_image_depth_estimation_bts.py
@@ -8,11 +8,10 @@ from modelscope.models import Model
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageDepthEstimationBtsTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageDepthEstimationBtsTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_depth_estimation
@@ -45,10 +44,6 @@ class ImageDepthEstimationBtsTest(unittest.TestCase, DemoCompatibilityCheck):
cv2.imwrite('result_snapshot.jpg', depth_vis)
print('Test run with snapshot ok.')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_driving_perception.py b/tests/pipelines/test_image_driving_perception.py
index 2f28b7d3..a6ad902d 100644
--- a/tests/pipelines/test_image_driving_perception.py
+++ b/tests/pipelines/test_image_driving_perception.py
@@ -17,11 +17,10 @@ from modelscope.preprocessors.image import LoadImage
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import \
show_image_driving_perception_result
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageDrivingPerceptionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageDrivingPerceptionTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_yolopv2_image-driving-perception_bdd100k'
@@ -59,10 +58,6 @@ class ImageDrivingPerceptionTest(unittest.TestCase, DemoCompatibilityCheck):
self.pipeline_inference(image_driving_perception_pipeline,
self.img_path)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_face_fusion.py b/tests/pipelines/test_image_face_fusion.py
index fde15edf..54d2c3a4 100644
--- a/tests/pipelines/test_image_face_fusion.py
+++ b/tests/pipelines/test_image_face_fusion.py
@@ -7,11 +7,10 @@ from modelscope.hub.snapshot_download import snapshot_download
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageFaceFusionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageFaceFusionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_face_fusion
@@ -50,10 +49,6 @@ class ImageFaceFusionTest(unittest.TestCase, DemoCompatibilityCheck):
cv2.imwrite('result_facefusion.png', result[OutputKeys.OUTPUT_IMG])
print('facefusion.test_run_modelhub_default_model done')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_human_parsing.py b/tests/pipelines/test_image_human_parsing.py
index 77d75862..f9263ea8 100644
--- a/tests/pipelines/test_image_human_parsing.py
+++ b/tests/pipelines/test_image_human_parsing.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageHumanParsingTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageHumanParsingTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_segmentation
@@ -39,10 +38,6 @@ class ImageHumanParsingTest(unittest.TestCase, DemoCompatibilityCheck):
task=Tasks.image_segmentation, model=model, preprocessor=None)
print(pipeline_parsing(input=self.image_multiple)[OutputKeys.LABELS])
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_inpainting_sdv2.py b/tests/pipelines/test_image_inpainting_sdv2.py
index 81002ce8..b21ac69d 100644
--- a/tests/pipelines/test_image_inpainting_sdv2.py
+++ b/tests/pipelines/test_image_inpainting_sdv2.py
@@ -10,11 +10,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.cv import ImageInpaintingSDV2Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageInpaintingSDV2Test(unittest.TestCase, DemoCompatibilityCheck):
+class ImageInpaintingSDV2Test(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_inpainting
@@ -50,10 +49,6 @@ class ImageInpaintingSDV2Test(unittest.TestCase, DemoCompatibilityCheck):
print(
'pipeline: the output image path is {}'.format(output_image_path))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_instance_segmentation.py b/tests/pipelines/test_image_instance_segmentation.py
index 2ba0724a..c305a7c0 100644
--- a/tests/pipelines/test_image_instance_segmentation.py
+++ b/tests/pipelines/test_image_instance_segmentation.py
@@ -12,11 +12,10 @@ from modelscope.pipelines.cv import ImageInstanceSegmentationPipeline
from modelscope.preprocessors import build_preprocessor
from modelscope.utils.config import Config
from modelscope.utils.constant import Fields, ModelFile, Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageInstanceSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageInstanceSegmentationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_segmentation
@@ -61,10 +60,6 @@ class ImageInstanceSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
print(f'pipeline1:{pipeline1(input=self.image)[OutputKeys.LABELS]}')
print(f'pipeline2: {pipeline2(input=self.image)[OutputKeys.LABELS]}')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_layout_estimation.py b/tests/pipelines/test_image_layout_estimation.py
index b312e8c2..4c93fa30 100644
--- a/tests/pipelines/test_image_layout_estimation.py
+++ b/tests/pipelines/test_image_layout_estimation.py
@@ -7,11 +7,10 @@ import cv2
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageLayoutEstimationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageLayoutEstimationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.indoor_layout_estimation
diff --git a/tests/pipelines/test_image_matching.py b/tests/pipelines/test_image_matching.py
index 55fd56df..6007ea31 100644
--- a/tests/pipelines/test_image_matching.py
+++ b/tests/pipelines/test_image_matching.py
@@ -11,11 +11,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import match_pair_visualization
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageMatchingTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageMatchingTest(unittest.TestCase):
def setUp(self) -> None:
self.task = 'image-matching'
diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py
index a3edb705..d6d87a0c 100644
--- a/tests/pipelines/test_image_matting.py
+++ b/tests/pipelines/test_image_matting.py
@@ -8,11 +8,10 @@ from modelscope.msdatasets import MsDataset
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import ModelFile, Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageMattingTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageMattingTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_unet_image-matting'
@@ -61,10 +60,6 @@ class ImageMattingTest(unittest.TestCase, DemoCompatibilityCheck):
f'Output written to dir: {osp.dirname(osp.abspath("result_0.png"))}'
)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_mvs_depth_estimation.py b/tests/pipelines/test_image_mvs_depth_estimation.py
index a7e327e3..b158623b 100644
--- a/tests/pipelines/test_image_mvs_depth_estimation.py
+++ b/tests/pipelines/test_image_mvs_depth_estimation.py
@@ -6,11 +6,10 @@ from modelscope.hub.snapshot_download import snapshot_download
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageMVSDepthEstimationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageMVSDepthEstimationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = 'image-multi-view-depth-estimation'
diff --git a/tests/pipelines/test_image_open_vocabulary_detection.py b/tests/pipelines/test_image_open_vocabulary_detection.py
index 52dc1d11..923e1efe 100644
--- a/tests/pipelines/test_image_open_vocabulary_detection.py
+++ b/tests/pipelines/test_image_open_vocabulary_detection.py
@@ -10,15 +10,13 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import draw_box
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import test_level
logger = get_logger()
-class ImageOpenVocabularyDetectionTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class ImageOpenVocabularyDetectionTest(unittest.TestCase):
def setUp(self) -> None:
os.system(
@@ -74,10 +72,6 @@ class ImageOpenVocabularyDetectionTest(unittest.TestCase,
cv2.imwrite('result_snapshot.jpg', image)
print('Test run with snapshot ok.')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_panoptic_segmentation.py b/tests/pipelines/test_image_panoptic_segmentation.py
index 4f12e6af..38c66055 100644
--- a/tests/pipelines/test_image_panoptic_segmentation.py
+++ b/tests/pipelines/test_image_panoptic_segmentation.py
@@ -9,17 +9,16 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import panoptic_seg_masks_to_image
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImagePanopticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImagePanopticSegmentationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_segmentation
self.model_id = 'damo/cv_swinL_panoptic-segmentation_cocopan'
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skip('skip test in current test level: no pipeline implemented')
def test_image_panoptic_segmentation(self):
input_location = 'data/test/images/image_panoptic_segmentation.jpg'
pan_segmentor = pipeline(Tasks.image_segmentation, model=self.model_id)
@@ -29,7 +28,7 @@ class ImagePanopticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
cv2.imwrite('result.jpg', draw_img)
print('print test_image_panoptic_segmentation return success')
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skip('skip test in current test level: no pipeline implemented')
def test_image_panoptic_segmentation_from_PIL(self):
input_location = 'data/test/images/image_panoptic_segmentation.jpg'
pan_segmentor = pipeline(Tasks.image_segmentation, model=self.model_id)
@@ -40,10 +39,6 @@ class ImagePanopticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
cv2.imwrite('result.jpg', draw_img)
print('print test_image_panoptic_segmentation from PIL return success')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_portrait_enhancement.py b/tests/pipelines/test_image_portrait_enhancement.py
index f0814c07..43978fd2 100644
--- a/tests/pipelines/test_image_portrait_enhancement.py
+++ b/tests/pipelines/test_image_portrait_enhancement.py
@@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImagePortraitEnhancementTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImagePortraitEnhancementTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_portrait_enhancement
@@ -46,10 +45,6 @@ class ImagePortraitEnhancementTest(unittest.TestCase, DemoCompatibilityCheck):
face_enhancement = pipeline(Tasks.image_portrait_enhancement)
self.pipeline_inference(face_enhancement, self.test_image)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_quality_assessment_degradation.py b/tests/pipelines/test_image_quality_assessment_degradation.py
index cb0f24c7..1acf8163 100644
--- a/tests/pipelines/test_image_quality_assessment_degradation.py
+++ b/tests/pipelines/test_image_quality_assessment_degradation.py
@@ -9,14 +9,12 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.cv import ImageQualityAssessmentDegradationPipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
sys.path.insert(0, '.')
-class ImageQualityAssessmentDegradationTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class ImageQualityAssessmentDegradationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_quality_assessment_degradation
@@ -54,10 +52,6 @@ class ImageQualityAssessmentDegradationTest(unittest.TestCase,
out_path = pipeline_ins(input=self.test_img)[OutputKeys.SCORES]
print('pipeline: the out_path is {}'.format(out_path))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_quality_assessment_man.py b/tests/pipelines/test_image_quality_assessment_man.py
index 2668d45d..f36f8b3c 100644
--- a/tests/pipelines/test_image_quality_assessment_man.py
+++ b/tests/pipelines/test_image_quality_assessment_man.py
@@ -7,11 +7,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.cv import ImageQualityAssessmentMANPipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageQualityAssessmentMANTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageQualityAssessmentMANTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_quality_assessment_mos
@@ -47,10 +46,6 @@ class ImageQualityAssessmentMANTest(unittest.TestCase, DemoCompatibilityCheck):
out_path = pipeline_ins(input=self.test_img)[OutputKeys.SCORE]
print('pipeline: the out_path is {}'.format(out_path))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_quality_assessment_mos.py b/tests/pipelines/test_image_quality_assessment_mos.py
index 608be8f8..3ca26b0a 100644
--- a/tests/pipelines/test_image_quality_assessment_mos.py
+++ b/tests/pipelines/test_image_quality_assessment_mos.py
@@ -7,11 +7,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.cv import ImageQualityAssessmentMosPipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageQualityAssessmentMosTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageQualityAssessmentMosTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_quality_assessment_mos
@@ -47,10 +46,6 @@ class ImageQualityAssessmentMosTest(unittest.TestCase, DemoCompatibilityCheck):
out_path = pipeline_ins(input=self.test_img)[OutputKeys.SCORE]
print('pipeline: the out_path is {}'.format(out_path))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_reid_person.py b/tests/pipelines/test_image_reid_person.py
index 310cdd66..e107d5ee 100644
--- a/tests/pipelines/test_image_reid_person.py
+++ b/tests/pipelines/test_image_reid_person.py
@@ -6,11 +6,10 @@ from PIL import Image
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageReidPersonTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageReidPersonTest(unittest.TestCase):
def setUp(self) -> None:
self.input_location = 'data/test/images/image_reid_person.jpg'
@@ -50,10 +49,6 @@ class ImageReidPersonTest(unittest.TestCase, DemoCompatibilityCheck):
)
print(f'The img embedding is: {result[OutputKeys.IMG_EMBEDDING]}')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_restoration.py b/tests/pipelines/test_image_restoration.py
index baffa7d5..b9c600b2 100644
--- a/tests/pipelines/test_image_restoration.py
+++ b/tests/pipelines/test_image_restoration.py
@@ -4,11 +4,10 @@ import unittest
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageRestorationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageRestorationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_demoireing
@@ -24,10 +23,6 @@ class ImageRestorationTest(unittest.TestCase, DemoCompatibilityCheck):
Image.fromarray(result[OutputKeys.OUTPUT_IMG]).save(input_location
+ '_demoire.jpg')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_semantic_segmentation.py b/tests/pipelines/test_image_semantic_segmentation.py
index 2e8d7522..5bc89bd1 100644
--- a/tests/pipelines/test_image_semantic_segmentation.py
+++ b/tests/pipelines/test_image_semantic_segmentation.py
@@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import semantic_seg_masks_to_image
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageSemanticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageSemanticSegmentationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = 'image-segmentation'
@@ -54,10 +53,6 @@ class ImageSemanticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
cv2.imwrite('result.jpg', draw_img)
print('test_image_semantic_segmentation_vitadapter_from_PIL DONE')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_structured_model_probing.py b/tests/pipelines/test_image_structured_model_probing.py
index 1befcf98..f4d46d92 100644
--- a/tests/pipelines/test_image_structured_model_probing.py
+++ b/tests/pipelines/test_image_structured_model_probing.py
@@ -4,12 +4,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageStructuredModelProbingTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class ImageStructuredModelProbingTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_classification
diff --git a/tests/pipelines/test_image_style_transfer.py b/tests/pipelines/test_image_style_transfer.py
index 5f37f204..223ec757 100644
--- a/tests/pipelines/test_image_style_transfer.py
+++ b/tests/pipelines/test_image_style_transfer.py
@@ -7,11 +7,10 @@ from modelscope.hub.snapshot_download import snapshot_download
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageStyleTransferTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageStyleTransferTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_style_transfer
@@ -53,10 +52,6 @@ class ImageStyleTransferTest(unittest.TestCase, DemoCompatibilityCheck):
cv2.imwrite('result_styletransfer3.png', result[OutputKeys.OUTPUT_IMG])
print('style_transfer.test_run_modelhub_default_model done')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_image_super_resolution.py b/tests/pipelines/test_image_super_resolution.py
index d5cbebe8..45066300 100644
--- a/tests/pipelines/test_image_super_resolution.py
+++ b/tests/pipelines/test_image_super_resolution.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageSuperResolutionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageSuperResolutionTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_rrdb_image-super-resolution'
@@ -37,10 +36,6 @@ class ImageSuperResolutionTest(unittest.TestCase, DemoCompatibilityCheck):
super_resolution = pipeline(Tasks.image_super_resolution)
self.pipeline_inference(super_resolution, self.img)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_interactive_translation_pipeline.py b/tests/pipelines/test_interactive_translation_pipeline.py
index b973250a..c240ba70 100644
--- a/tests/pipelines/test_interactive_translation_pipeline.py
+++ b/tests/pipelines/test_interactive_translation_pipeline.py
@@ -3,11 +3,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class InteractiveTranslationTest(unittest.TestCase, DemoCompatibilityCheck):
+class InteractiveTranslationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.translation
@@ -28,10 +27,6 @@ class InteractiveTranslationTest(unittest.TestCase, DemoCompatibilityCheck):
pipeline_ins = pipeline(self.task, model=model_id)
print(pipeline_ins(inputs + '' + prefix))
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_inverse_text_processing.py b/tests/pipelines/test_inverse_text_processing.py
index dc7fb1e0..a1d5a712 100644
--- a/tests/pipelines/test_inverse_text_processing.py
+++ b/tests/pipelines/test_inverse_text_processing.py
@@ -3,11 +3,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class InverseTextProcessingTest(unittest.TestCase, DemoCompatibilityCheck):
+class InverseTextProcessingTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.inverse_text_processing,
@@ -61,10 +60,6 @@ class InverseTextProcessingTest(unittest.TestCase, DemoCompatibilityCheck):
itn_result = itn_inference_pipline(text_in=lang_text_in)
print(itn_result)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_key_word_spotting.py b/tests/pipelines/test_key_word_spotting.py
index 13f7a308..f22bc845 100644
--- a/tests/pipelines/test_key_word_spotting.py
+++ b/tests/pipelines/test_key_word_spotting.py
@@ -10,7 +10,6 @@ import soundfile
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import ColorCodes, Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import download_and_untar, test_level
@@ -27,7 +26,7 @@ NEG_TESTSETS_FILE = 'neg_testsets.tar.gz'
NEG_TESTSETS_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/KWS/neg_testsets.tar.gz'
-class KeyWordSpottingTest(unittest.TestCase, DemoCompatibilityCheck):
+class KeyWordSpottingTest(unittest.TestCase):
action_info = {
'test_run_with_wav': {
'checking_item': [OutputKeys.KWS_LIST, 0, 'keyword'],
@@ -344,10 +343,6 @@ class KeyWordSpottingTest(unittest.TestCase, DemoCompatibilityCheck):
model_id=model_id, audio_in=wav_path, keywords=keywords)
logger.info(ColorCodes.YELLOW + str(kws_result) + ColorCodes.END)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_key_word_spotting_farfield.py b/tests/pipelines/test_key_word_spotting_farfield.py
index e736f48b..3193149c 100644
--- a/tests/pipelines/test_key_word_spotting_farfield.py
+++ b/tests/pipelines/test_key_word_spotting_farfield.py
@@ -19,6 +19,7 @@ class KWSFarfieldTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/speech_dfsmn_kws_char_farfield_16k_nihaomiya'
+ self.model_id_iot = 'damo/speech_dfsmn_kws_char_farfield_iot_16k_nihaomiya'
if os.path.isfile(OUTPUT_WAV):
os.remove(OUTPUT_WAV)
@@ -29,6 +30,13 @@ class KWSFarfieldTest(unittest.TestCase):
self.assertEqual(len(result['kws_list']), 5)
print(result['kws_list'][-1])
+ @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+ def test_normal_iot(self):
+ kws = pipeline(Tasks.keyword_spotting, model=self.model_id_iot)
+ result = kws(os.path.join(os.getcwd(), TEST_SPEECH_FILE))
+ self.assertEqual(len(result['kws_list']), 5)
+ print(result['kws_list'][-1])
+
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_output(self):
kws = pipeline(Tasks.keyword_spotting, model=self.model_id)
diff --git a/tests/pipelines/test_language_guided_video_summarization.py b/tests/pipelines/test_language_guided_video_summarization.py
index 0f06d4f2..01d88b55 100755
--- a/tests/pipelines/test_language_guided_video_summarization.py
+++ b/tests/pipelines/test_language_guided_video_summarization.py
@@ -9,12 +9,10 @@ import torch
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class LanguageGuidedVideoSummarizationTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class LanguageGuidedVideoSummarizationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.language_guided_video_summarization
@@ -40,10 +38,6 @@ class LanguageGuidedVideoSummarizationTest(unittest.TestCase,
print(f'video summarization output:\n {result}.')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_language_identification.py b/tests/pipelines/test_language_identification.py
index ddd91e69..ccfa1a7d 100644
--- a/tests/pipelines/test_language_identification.py
+++ b/tests/pipelines/test_language_identification.py
@@ -3,11 +3,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class LanguageIdentificationTest(unittest.TestCase, DemoCompatibilityCheck):
+class LanguageIdentificationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.text_classification
@@ -22,11 +21,6 @@ class LanguageIdentificationTest(unittest.TestCase, DemoCompatibilityCheck):
pipeline_ins = pipeline(self.task, model=self.model_id)
print(pipeline_ins(input=inputs))
- @unittest.skipUnless(test_level() >= 0,
- 'skip test case in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_license_plate_detection.py b/tests/pipelines/test_license_plate_detection.py
index 70cdb820..3c30618d 100644
--- a/tests/pipelines/test_license_plate_detection.py
+++ b/tests/pipelines/test_license_plate_detection.py
@@ -5,11 +5,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class LicensePlateDectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class LicensePlateDectionTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_resnet18_license-plate-detection_damo'
@@ -32,10 +31,6 @@ class LicensePlateDectionTest(unittest.TestCase, DemoCompatibilityCheck):
license_plate_detection = pipeline(Tasks.license_plate_detection)
self.pipeline_inference(license_plate_detection, self.test_image)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_lineless_table_recognition.py b/tests/pipelines/test_lineless_table_recognition.py
index 53fde8a1..59e173f4 100644
--- a/tests/pipelines/test_lineless_table_recognition.py
+++ b/tests/pipelines/test_lineless_table_recognition.py
@@ -8,11 +8,10 @@ import numpy as np
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class TableRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class TableRecognitionTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_resnet-transformer_table-structure-recognition_lore'
@@ -35,10 +34,6 @@ class TableRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
lineless_table_recognition = pipeline(Tasks.lineless_table_recognition)
self.pipeline_inference(lineless_table_recognition, self.test_image)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_live_category.py b/tests/pipelines/test_live_category.py
index 391ed283..88b94b69 100644
--- a/tests/pipelines/test_live_category.py
+++ b/tests/pipelines/test_live_category.py
@@ -3,11 +3,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class LiveCategoryTest(unittest.TestCase, DemoCompatibilityCheck):
+class LiveCategoryTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.live_category
@@ -21,10 +20,6 @@ class LiveCategoryTest(unittest.TestCase, DemoCompatibilityCheck):
print(f'live category output: {result}.')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_mask_face_recognition.py b/tests/pipelines/test_mask_face_recognition.py
index 550e80e4..2a7e8ede 100644
--- a/tests/pipelines/test_mask_face_recognition.py
+++ b/tests/pipelines/test_mask_face_recognition.py
@@ -6,11 +6,10 @@ import numpy as np
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class MaskFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class MaskFaceRecognitionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.face_recognition
@@ -28,10 +27,6 @@ class MaskFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
sim = np.dot(emb1[0], emb2[0])
print(f'Cos similarity={sim:.3f}, img1:{img1} img2:{img2}')
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_maskdino_instance_segmentation.py b/tests/pipelines/test_maskdino_instance_segmentation.py
index 14e0887d..88c46de1 100644
--- a/tests/pipelines/test_maskdino_instance_segmentation.py
+++ b/tests/pipelines/test_maskdino_instance_segmentation.py
@@ -8,12 +8,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.cv import MaskDINOInstanceSegmentationPipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class MaskDINOInstanceSegmentationTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class MaskDINOInstanceSegmentationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_segmentation
@@ -45,10 +43,6 @@ class MaskDINOInstanceSegmentationTest(unittest.TestCase,
print(f'pipeline1:{pipeline1(input=self.image)[OutputKeys.LABELS]}')
print(f'pipeline2: {pipeline2(input=self.image)[OutputKeys.LABELS]}')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_mglm_text_summarization.py b/tests/pipelines/test_mglm_text_summarization.py
index 47abc741..703e9bbe 100644
--- a/tests/pipelines/test_mglm_text_summarization.py
+++ b/tests/pipelines/test_mglm_text_summarization.py
@@ -6,11 +6,10 @@ from modelscope.models import Model
from modelscope.pipelines import pipeline
from modelscope.preprocessors import MGLMSummarizationPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class mGLMTest(unittest.TestCase, DemoCompatibilityCheck):
+class mGLMTest(unittest.TestCase):
def setUp(self) -> None:
self.output_dir = 'unittest_output'
diff --git a/tests/pipelines/test_mobile_image_super_resolution.py b/tests/pipelines/test_mobile_image_super_resolution.py
index 2cc7adf0..a486d244 100644
--- a/tests/pipelines/test_mobile_image_super_resolution.py
+++ b/tests/pipelines/test_mobile_image_super_resolution.py
@@ -8,12 +8,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class MobileImageSuperResolutionTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class MobileImageSuperResolutionTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_ecbsr_image-super-resolution_mobile'
@@ -38,10 +36,6 @@ class MobileImageSuperResolutionTest(unittest.TestCase,
super_resolution = pipeline(Tasks.image_super_resolution)
self.pipeline_inference(super_resolution, self.img)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_motion_generation.py b/tests/pipelines/test_motion_generation.py
index 7938611c..43903eb8 100644
--- a/tests/pipelines/test_motion_generation.py
+++ b/tests/pipelines/test_motion_generation.py
@@ -4,11 +4,10 @@ import unittest
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class MDMMotionGenerationTest(unittest.TestCase, DemoCompatibilityCheck):
+class MDMMotionGenerationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.motion_generation
@@ -23,10 +22,6 @@ class MDMMotionGenerationTest(unittest.TestCase, DemoCompatibilityCheck):
result[OutputKeys.KEYPOINTS].shape)
print('motion generation video file:', result[OutputKeys.OUTPUT_VIDEO])
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_movie_scene_segmentation.py b/tests/pipelines/test_movie_scene_segmentation.py
index 0ac8b716..c6498a6c 100644
--- a/tests/pipelines/test_movie_scene_segmentation.py
+++ b/tests/pipelines/test_movie_scene_segmentation.py
@@ -10,11 +10,10 @@ from modelscope.pipelines import pipeline
from modelscope.trainers import build_trainer
from modelscope.utils.config import Config, ConfigDict
from modelscope.utils.constant import ModelFile, Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class MovieSceneSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class MovieSceneSegmentationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.movie_scene_segmentation
@@ -123,10 +122,6 @@ class MovieSceneSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
else:
raise ValueError('process error')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_mplug_owl_multimodal_dialogue.py b/tests/pipelines/test_mplug_owl_multimodal_dialogue.py
new file mode 100644
index 00000000..57bce67e
--- /dev/null
+++ b/tests/pipelines/test_mplug_owl_multimodal_dialogue.py
@@ -0,0 +1,100 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+from PIL import Image
+
+from modelscope.models import Model
+from modelscope.outputs import OutputKeys
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class MplugOwlMultimodalDialogueTest(unittest.TestCase):
+
+ @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+ def test_run_with_multimodal_dialogue_with_model(self):
+ model = Model.from_pretrained(
+ 'damo/multi-modal_mplug_owl_multimodal-dialogue_7b')
+ pipeline_multimodal_dialogue = pipeline(
+ task=Tasks.multimodal_dialogue,
+ model=model,
+ )
+ image = 'data/resource/portrait_input.png'
+ system_prompt_1 = 'The following is a conversation between a curious human and AI assistant.'
+ system_prompt_2 = "The assistant gives helpful, detailed, and polite answers to the user's questions."
+ messages = {
+ 'messages': [
+ {
+ 'role': 'system',
+ 'content': system_prompt_1 + ' ' + system_prompt_2
+ },
+ {
+ 'role': 'user',
+ 'content': [{
+ 'image': image
+ }]
+ },
+ {
+ 'role': 'user',
+ 'content': 'Describe the facial expression of the man.'
+ },
+ ]
+ }
+ result = pipeline_multimodal_dialogue(messages)
+ print(result[OutputKeys.TEXT])
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_multimodal_dialogue_with_name(self):
+ pipeline_multimodal_dialogue = pipeline(
+ Tasks.multimodal_dialogue,
+ model='damo/multi-modal_mplug_owl_multimodal-dialogue_7b')
+ image = 'data/resource/portrait_input.png'
+ system_prompt_1 = 'The following is a conversation between a curious human and AI assistant.'
+ system_prompt_2 = "The assistant gives helpful, detailed, and polite answers to the user's questions."
+ messages = {
+ 'messages': [
+ {
+ 'role': 'system',
+ 'content': system_prompt_1 + ' ' + system_prompt_2
+ },
+ {
+ 'role': 'user',
+ 'content': [{
+ 'image': image
+ }]
+ },
+ {
+ 'role': 'user',
+ 'content': 'Describe the facial expression of the man.'
+ },
+ ]
+ }
+ result = pipeline_multimodal_dialogue(messages)
+ print(result[OutputKeys.TEXT])
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_multimodal_dialogue_with_text(self):
+ pipeline_multimodal_dialogue = pipeline(
+ Tasks.multimodal_dialogue,
+ model='damo/multi-modal_mplug_owl_multimodal-dialogue_7b')
+ system_prompt_1 = 'The following is a conversation between a curious human and AI assistant.'
+ system_prompt_2 = "The assistant gives helpful, detailed, and polite answers to the user's questions."
+ messages = {
+ 'messages': [
+ {
+ 'role': 'system',
+ 'content': system_prompt_1 + ' ' + system_prompt_2
+ },
+ {
+ 'role': 'user',
+ 'content': 'Where is the captial of China?'
+ },
+ ]
+ }
+ result = pipeline_multimodal_dialogue(messages)
+ print(result[OutputKeys.TEXT])
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/pipelines/test_mplug_tasks.py b/tests/pipelines/test_mplug_tasks.py
index 21439ce2..cff998b4 100644
--- a/tests/pipelines/test_mplug_tasks.py
+++ b/tests/pipelines/test_mplug_tasks.py
@@ -7,11 +7,10 @@ from modelscope.models import Model
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck):
+class MplugTasksTest(unittest.TestCase):
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_run_with_image_captioning_with_model(self):
@@ -95,10 +94,6 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck):
result = pipeline_vqa(input)
print(result)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_msrresnetlite_video_super_resolution.py b/tests/pipelines/test_msrresnetlite_video_super_resolution.py
index d79e9702..d44cbd34 100644
--- a/tests/pipelines/test_msrresnetlite_video_super_resolution.py
+++ b/tests/pipelines/test_msrresnetlite_video_super_resolution.py
@@ -7,11 +7,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.cv import VideoSuperResolutionPipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class MSRResNetLiteVSRTest(unittest.TestCase, DemoCompatibilityCheck):
+class MSRResNetLiteVSRTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.video_super_resolution
@@ -50,10 +49,6 @@ class MSRResNetLiteVSRTest(unittest.TestCase, DemoCompatibilityCheck):
input=self.test_video)[OutputKeys.OUTPUT_VIDEO]
print('pipeline: the output video path is {}'.format(out_video_path))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_multi_modal_embedding.py b/tests/pipelines/test_multi_modal_embedding.py
index 7eddc690..486adc94 100644
--- a/tests/pipelines/test_multi_modal_embedding.py
+++ b/tests/pipelines/test_multi_modal_embedding.py
@@ -8,11 +8,10 @@ from modelscope.models import Model
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class MultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
+class MultiModalEmbeddingTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.multi_modal_embedding
@@ -54,10 +53,6 @@ class MultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
print('l2-norm: {}'.format(torch.norm(text_embedding,
dim=-1).item())) # should be 1.0
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_multilingual_named_entity_recognition.py b/tests/pipelines/test_multilingual_named_entity_recognition.py
index ec134023..a31adf1f 100644
--- a/tests/pipelines/test_multilingual_named_entity_recognition.py
+++ b/tests/pipelines/test_multilingual_named_entity_recognition.py
@@ -8,12 +8,10 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import NamedEntityRecognitionPipeline
from modelscope.preprocessors import NERPreprocessorThai, NERPreprocessorViet
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class MultilingualNamedEntityRecognitionTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class MultilingualNamedEntityRecognitionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.named_entity_recognition
@@ -123,10 +121,6 @@ class MultilingualNamedEntityRecognitionTest(unittest.TestCase,
self.viet_sentence[5:]
]))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_multilingual_word_segmentation.py b/tests/pipelines/test_multilingual_word_segmentation.py
index f10e6d98..878af0d3 100644
--- a/tests/pipelines/test_multilingual_word_segmentation.py
+++ b/tests/pipelines/test_multilingual_word_segmentation.py
@@ -8,12 +8,11 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import WordSegmentationThaiPipeline
from modelscope.preprocessors import WordSegmentationPreprocessorThai
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.regress_test_utils import MsRegressTool
from modelscope.utils.test_utils import test_level
-class WordSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class WordSegmentationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.word_segmentation
@@ -65,10 +64,6 @@ class WordSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
pipeline_ins(
input=[self.sentence, self.sentence[:10], self.sentence[6:]]))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_named_entity_recognition.py b/tests/pipelines/test_named_entity_recognition.py
index 175e9261..8b7424f4 100644
--- a/tests/pipelines/test_named_entity_recognition.py
+++ b/tests/pipelines/test_named_entity_recognition.py
@@ -10,11 +10,10 @@ from modelscope.pipelines.nlp import NamedEntityRecognitionPipeline
from modelscope.preprocessors import \
TokenClassificationTransformersPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class NamedEntityRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class NamedEntityRecognitionTest(unittest.TestCase):
language_examples = {
'zh':
'新华社北京二月十一日电(记者唐虹)',
@@ -470,10 +469,6 @@ class NamedEntityRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
model_id)
print(pipeline_ins(input=sentence))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_nerf_recon_acc.py b/tests/pipelines/test_nerf_recon_acc.py
index 95d879fb..7ca0fa44 100644
--- a/tests/pipelines/test_nerf_recon_acc.py
+++ b/tests/pipelines/test_nerf_recon_acc.py
@@ -9,11 +9,10 @@ from modelscope.msdatasets import MsDataset
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import DownloadMode, Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class NeRFReconAccTest(unittest.TestCase, DemoCompatibilityCheck):
+class NeRFReconAccTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_nerf-3d-reconstruction-accelerate_damo'
@@ -63,11 +62,6 @@ class NeRFReconAccTest(unittest.TestCase, DemoCompatibilityCheck):
dict(data_dir=self.data_dir, render_dir=self.render_dir))
print('facefusion.test_run_modelhub_default_model done')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- @unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest only')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_nli.py b/tests/pipelines/test_nli.py
index a7d2a236..5bbe353b 100644
--- a/tests/pipelines/test_nli.py
+++ b/tests/pipelines/test_nli.py
@@ -7,12 +7,11 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import TextClassificationPipeline
from modelscope.preprocessors import TextClassificationTransformersPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool
from modelscope.utils.test_utils import test_level
-class NLITest(unittest.TestCase, DemoCompatibilityCheck):
+class NLITest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.nli
@@ -78,10 +77,6 @@ class NLITest(unittest.TestCase, DemoCompatibilityCheck):
pipeline_ins = pipeline(task=Tasks.nli)
print(pipeline_ins(input=(self.sentence1, self.sentence2)))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_object_detecion_3d.py b/tests/pipelines/test_object_detecion_3d.py
index bb0eebda..69b75b39 100644
--- a/tests/pipelines/test_object_detecion_3d.py
+++ b/tests/pipelines/test_object_detecion_3d.py
@@ -10,11 +10,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ObjectDetection3DTest(unittest.TestCase, DemoCompatibilityCheck):
+class ObjectDetection3DTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.object_detection_3d
@@ -48,10 +47,6 @@ class ObjectDetection3DTest(unittest.TestCase, DemoCompatibilityCheck):
detect = pipeline(self.task)
self.pipeline_inference(detect, idx)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_object_detection.py b/tests/pipelines/test_object_detection.py
index e4bf6b54..f06d954b 100644
--- a/tests/pipelines/test_object_detection.py
+++ b/tests/pipelines/test_object_detection.py
@@ -3,11 +3,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ObjectDetectionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.human_detection
@@ -43,12 +42,9 @@ class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
result = human_detect(input_location)
print(result)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_image_object_detection_auto_pipeline(self):
+ # TODO: to be fixed in the future
model_id = 'damo/cv_yolox_image-object-detection-auto'
test_image = 'data/test/images/auto_demo.jpg'
@@ -59,7 +55,7 @@ class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
image_object_detection_auto.show_result(test_image, result,
'auto_demo_ret.jpg')
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skip('skip test in current test level: no pipeline implemented')
def test_image_object_detection_dino_pipeline(self):
model_id = 'damo/cv_swinl_image-object-detection_dino'
test_image = 'data/test/images/image_detection.jpg'
diff --git a/tests/pipelines/test_ocr_detection.py b/tests/pipelines/test_ocr_detection.py
index 243e274b..0ed2e59c 100644
--- a/tests/pipelines/test_ocr_detection.py
+++ b/tests/pipelines/test_ocr_detection.py
@@ -4,11 +4,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class OCRDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class OCRDetectionTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_resnet18_ocr-detection-line-level_damo'
@@ -43,10 +42,6 @@ class OCRDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
ocr_detection = pipeline(Tasks.ocr_detection)
self.pipeline_inference(ocr_detection, self.test_image)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_ocr_recognition.py b/tests/pipelines/test_ocr_recognition.py
index 145ae22a..94ee521f 100644
--- a/tests/pipelines/test_ocr_recognition.py
+++ b/tests/pipelines/test_ocr_recognition.py
@@ -6,14 +6,13 @@ import PIL
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class OCRRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class OCRRecognitionTest(unittest.TestCase):
def setUp(self) -> None:
- self.model_id = 'damo/cv_crnn_ocr-recognition-general_damo'
+ self.model_id = 'damo/cv_convnextTiny_ocr-recognition-general_damo'
self.test_image = 'data/test/images/ocr_recognition.jpg'
self.task = Tasks.ocr_recognition
@@ -26,7 +25,47 @@ class OCRRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
ocr_recognition = pipeline(
Tasks.ocr_recognition,
model=self.model_id,
- model_revision='v2.2.1')
+ model_revision='v2.3.0')
+ self.pipeline_inference(ocr_recognition, self.test_image)
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_model_from_modelhub_handwritten(self):
+ ocr_recognition = pipeline(
+ Tasks.ocr_recognition,
+ model='damo/cv_convnextTiny_ocr-recognition-handwritten_damo',
+ model_revision='v2.3.0')
+ self.pipeline_inference(ocr_recognition, self.test_image)
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_model_from_modelhub_scene(self):
+ ocr_recognition = pipeline(
+ Tasks.ocr_recognition,
+ model='damo/cv_convnextTiny_ocr-recognition-scene_damo',
+ model_revision='v2.3.0')
+ self.pipeline_inference(ocr_recognition, self.test_image)
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_model_from_modelhub_document(self):
+ ocr_recognition = pipeline(
+ Tasks.ocr_recognition,
+ model='damo/cv_convnextTiny_ocr-recognition-document_damo',
+ model_revision='v2.3.0')
+ self.pipeline_inference(ocr_recognition, self.test_image)
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_model_from_modelhub_licenseplate(self):
+ ocr_recognition = pipeline(
+ Tasks.ocr_recognition,
+ model='damo/cv_convnextTiny_ocr-recognition-licenseplate_damo',
+ model_revision='v2.3.0')
+ self.pipeline_inference(ocr_recognition, self.test_image)
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_model_from_modelhub_crnn(self):
+ ocr_recognition = pipeline(
+ Tasks.ocr_recognition,
+ model='damo/cv_crnn_ocr-recognition-general_damo',
+ model_revision='v2.2.2')
self.pipeline_inference(ocr_recognition, self.test_image)
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@@ -34,7 +73,7 @@ class OCRRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
ocr_recognition = pipeline(
Tasks.ocr_recognition,
model=self.model_id,
- model_revision='v2.2.1')
+ model_revision='v2.3.0')
imagePIL = PIL.Image.open(self.test_image)
self.pipeline_inference(ocr_recognition, imagePIL)
@@ -44,9 +83,75 @@ class OCRRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
Tasks.ocr_recognition, model_revision='v2.3.0')
self.pipeline_inference(ocr_recognition, self.test_image)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_model_from_modelhub_cpu(self):
+ ocr_recognition = pipeline(
+ Tasks.ocr_recognition,
+ model=self.model_id,
+ model_revision='v2.3.0',
+ device='cpu')
+ self.pipeline_inference(ocr_recognition, self.test_image)
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_model_from_modelhub_handwritten_cpu(self):
+ ocr_recognition = pipeline(
+ Tasks.ocr_recognition,
+ model='damo/cv_convnextTiny_ocr-recognition-handwritten_damo',
+ model_revision='v2.3.0',
+ device='cpu')
+ self.pipeline_inference(ocr_recognition, self.test_image)
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_model_from_modelhub_scene_cpu(self):
+ ocr_recognition = pipeline(
+ Tasks.ocr_recognition,
+ model='damo/cv_convnextTiny_ocr-recognition-scene_damo',
+ model_revision='v2.3.0',
+ device='cpu')
+ self.pipeline_inference(ocr_recognition, self.test_image)
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_model_from_modelhub_document_cpu(self):
+ ocr_recognition = pipeline(
+ Tasks.ocr_recognition,
+ model='damo/cv_convnextTiny_ocr-recognition-document_damo',
+ model_revision='v2.3.0',
+ device='cpu')
+ self.pipeline_inference(ocr_recognition, self.test_image)
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_model_from_modelhub_licenseplate_cpu(self):
+ ocr_recognition = pipeline(
+ Tasks.ocr_recognition,
+ model='damo/cv_convnextTiny_ocr-recognition-licenseplate_damo',
+ model_revision='v2.3.0',
+ device='cpu')
+ self.pipeline_inference(ocr_recognition, self.test_image)
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_model_from_modelhub_crnn_cpu(self):
+ ocr_recognition = pipeline(
+ Tasks.ocr_recognition,
+ model='damo/cv_crnn_ocr-recognition-general_damo',
+ model_revision='v2.2.2',
+ device='cpu')
+ self.pipeline_inference(ocr_recognition, self.test_image)
+
+ @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+ def test_run_with_model_from_modelhub_PILinput_cpu(self):
+ ocr_recognition = pipeline(
+ Tasks.ocr_recognition,
+ model=self.model_id,
+ model_revision='v2.3.0',
+ device='cpu')
+ imagePIL = PIL.Image.open(self.test_image)
+ self.pipeline_inference(ocr_recognition, imagePIL)
+
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+ def test_run_modelhub_default_model_cpu(self):
+ ocr_recognition = pipeline(
+ Tasks.ocr_recognition, model_revision='v2.3.0', device='cpu')
+ self.pipeline_inference(ocr_recognition, self.test_image)
if __name__ == '__main__':
diff --git a/tests/pipelines/test_ofa_tasks.py b/tests/pipelines/test_ofa_tasks.py
index df1b5647..55c3ae65 100644
--- a/tests/pipelines/test_ofa_tasks.py
+++ b/tests/pipelines/test_ofa_tasks.py
@@ -11,11 +11,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import created_boxed_image
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck):
+class OfaTasksTest(unittest.TestCase):
def setUp(self) -> None:
self.output_dir = 'unittest_output'
@@ -366,10 +365,6 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck):
for r in result:
print(r[OutputKeys.TEXT])
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_panorama_depth_estimation.py b/tests/pipelines/test_panorama_depth_estimation.py
index 99e575e3..23552274 100644
--- a/tests/pipelines/test_panorama_depth_estimation.py
+++ b/tests/pipelines/test_panorama_depth_estimation.py
@@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import depth_to_color
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class PanoramaDepthEstimationTest(unittest.TestCase, DemoCompatibilityCheck):
+class PanoramaDepthEstimationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = 'panorama-depth-estimation'
diff --git a/tests/pipelines/test_pedestrian_attribute_recognition.py b/tests/pipelines/test_pedestrian_attribute_recognition.py
index c0ace43c..7d58ce12 100644
--- a/tests/pipelines/test_pedestrian_attribute_recognition.py
+++ b/tests/pipelines/test_pedestrian_attribute_recognition.py
@@ -8,12 +8,10 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import draw_pedestrian_attribute
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class PedestrianAttributeRecognitionTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class PedestrianAttributeRecognitionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.pedestrian_attribute_recognition
@@ -39,10 +37,6 @@ class PedestrianAttributeRecognitionTest(unittest.TestCase,
self.pipeline_inference(pedestrian_attribute_recognition,
Image.open(self.test_image))
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_person_image_cartoon.py b/tests/pipelines/test_person_image_cartoon.py
index 1dfaf519..40d26c86 100644
--- a/tests/pipelines/test_person_image_cartoon.py
+++ b/tests/pipelines/test_person_image_cartoon.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ImageCartoonTest(unittest.TestCase, DemoCompatibilityCheck):
+class ImageCartoonTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_unet_person-image-cartoon_compound-models'
@@ -83,10 +82,6 @@ class ImageCartoonTest(unittest.TestCase, DemoCompatibilityCheck):
img_cartoon = pipeline(Tasks.image_portrait_stylization)
self.pipeline_inference(img_cartoon, self.test_image)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_pointcloud_sceneflow_estimation.py b/tests/pipelines/test_pointcloud_sceneflow_estimation.py
index 34d87f09..4d4bf7f2 100644
--- a/tests/pipelines/test_pointcloud_sceneflow_estimation.py
+++ b/tests/pipelines/test_pointcloud_sceneflow_estimation.py
@@ -7,12 +7,10 @@ import numpy as np
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class PointCloudSceneFlowEstimationTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class PointCloudSceneFlowEstimationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = 'pointcloud-sceneflow-estimation'
diff --git a/tests/pipelines/test_product_retrieval_embedding.py b/tests/pipelines/test_product_retrieval_embedding.py
index 2483d53a..f194bb7b 100644
--- a/tests/pipelines/test_product_retrieval_embedding.py
+++ b/tests/pipelines/test_product_retrieval_embedding.py
@@ -8,11 +8,10 @@ from modelscope.models import Model
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ProductRetrievalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
+class ProductRetrievalEmbeddingTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.product_retrieval_embedding
@@ -41,10 +40,6 @@ class ProductRetrievalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
result = product_embed(self.img_input)[OutputKeys.IMG_EMBEDDING]
print('abs sum value is: {}'.format(np.sum(np.abs(result))))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_realtime_object_detection.py b/tests/pipelines/test_realtime_object_detection.py
index 498c09d8..21450619 100644
--- a/tests/pipelines/test_realtime_object_detection.py
+++ b/tests/pipelines/test_realtime_object_detection.py
@@ -7,14 +7,13 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import realtime_object_detection_bbox_vis
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import test_level
logger = get_logger()
-class RealtimeObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class RealtimeObjectDetectionTest(unittest.TestCase):
def setUp(self) -> None:
self.easycv_small_model_id = 'damo/cv_cspnet_image-object-detection_yolox'
@@ -22,7 +21,7 @@ class RealtimeObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
self.test_image = 'data/test/images/keypoints_detect/000000438862.jpg'
self.task = Tasks.image_object_detection
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skip('skip test in current test level: no pipeline implemented')
def test_run_easycv_yolox(self):
realtime_object_detection = pipeline(
Tasks.image_object_detection, model=self.easycv_small_model_id)
@@ -34,7 +33,7 @@ class RealtimeObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
else:
raise ValueError('process error')
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skip('skip test in current test level: no pipeline implemented')
def test_run_easycv_yolox_nano(self):
realtime_object_detection = pipeline(
Tasks.image_object_detection, model=self.easycv_nano_model_id)
@@ -46,10 +45,6 @@ class RealtimeObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
else:
raise ValueError('process error')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_realtime_video_object_detection.py b/tests/pipelines/test_realtime_video_object_detection.py
index 716c9260..d42bda67 100644
--- a/tests/pipelines/test_realtime_video_object_detection.py
+++ b/tests/pipelines/test_realtime_video_object_detection.py
@@ -9,15 +9,13 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import show_video_object_detection_result
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import test_level
logger = get_logger()
-class RealtimeVideoObjectDetectionTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class RealtimeVideoObjectDetectionTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_cspnet_video-object-detection_streamyolo'
@@ -53,10 +51,6 @@ class RealtimeVideoObjectDetectionTest(unittest.TestCase,
else:
raise ValueError('process error')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_referring_video_object_segmentation.py b/tests/pipelines/test_referring_video_object_segmentation.py
index 509e9317..2b7de41c 100644
--- a/tests/pipelines/test_referring_video_object_segmentation.py
+++ b/tests/pipelines/test_referring_video_object_segmentation.py
@@ -3,12 +3,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ReferringVideoObjectSegmentationTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class ReferringVideoObjectSegmentationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.referring_video_object_segmentation
@@ -45,10 +43,6 @@ class ReferringVideoObjectSegmentationTest(unittest.TestCase,
else:
raise ValueError('process error')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_relation_extraction.py b/tests/pipelines/test_relation_extraction.py
index 17ab61fc..44c0b9ad 100644
--- a/tests/pipelines/test_relation_extraction.py
+++ b/tests/pipelines/test_relation_extraction.py
@@ -8,11 +8,10 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import InformationExtractionPipeline
from modelscope.preprocessors import RelationExtractionTransformersPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class RelationExtractionTest(unittest.TestCase, DemoCompatibilityCheck):
+class RelationExtractionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.relation_extraction
@@ -55,10 +54,6 @@ class RelationExtractionTest(unittest.TestCase, DemoCompatibilityCheck):
pipeline_ins = pipeline(task=Tasks.relation_extraction)
print(pipeline_ins(input=self.sentence))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_salient_detection.py b/tests/pipelines/test_salient_detection.py
index 3101213c..78ae94db 100644
--- a/tests/pipelines/test_salient_detection.py
+++ b/tests/pipelines/test_salient_detection.py
@@ -4,11 +4,10 @@ import unittest
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class SalientDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class SalientDetectionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.semantic_segmentation
@@ -44,10 +43,6 @@ class SalientDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
cv2.imwrite(input_location + '_camouflag.jpg',
result[OutputKeys.MASKS])
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_sentence_similarity.py b/tests/pipelines/test_sentence_similarity.py
index 233bd3a1..e411158f 100644
--- a/tests/pipelines/test_sentence_similarity.py
+++ b/tests/pipelines/test_sentence_similarity.py
@@ -11,12 +11,11 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import TextClassificationPipeline
from modelscope.preprocessors import TextClassificationTransformersPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool
from modelscope.utils.test_utils import test_level
-class SentenceSimilarityTest(unittest.TestCase, DemoCompatibilityCheck):
+class SentenceSimilarityTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.sentence_similarity
@@ -110,10 +109,6 @@ class SentenceSimilarityTest(unittest.TestCase, DemoCompatibilityCheck):
pipeline_ins = pipeline(task=Tasks.sentence_similarity)
print(pipeline_ins(input=(self.sentence1, self.sentence2)))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_sentiment_classification.py b/tests/pipelines/test_sentiment_classification.py
index 278f34a8..bb0311ff 100644
--- a/tests/pipelines/test_sentiment_classification.py
+++ b/tests/pipelines/test_sentiment_classification.py
@@ -9,12 +9,10 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import TextClassificationPipeline
from modelscope.preprocessors import TextClassificationTransformersPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class SentimentClassificationTaskModelTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class SentimentClassificationTaskModelTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.text_classification
@@ -63,10 +61,6 @@ class SentimentClassificationTaskModelTest(unittest.TestCase,
self.assertTrue(
isinstance(pipeline_ins.model, ModelForTextClassification))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_siamese_uie.py b/tests/pipelines/test_siamese_uie.py
index 30b38d2e..c5008573 100644
--- a/tests/pipelines/test_siamese_uie.py
+++ b/tests/pipelines/test_siamese_uie.py
@@ -10,12 +10,11 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import SiameseUiePipeline
from modelscope.preprocessors import SiameseUiePreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool
from modelscope.utils.test_utils import test_level
-class ZeroShotClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ZeroShotClassificationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.siamese_uie
@@ -67,10 +66,6 @@ class ZeroShotClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
pipeline_ins = pipeline(task=Tasks.siamese_uie, model_revision='v1.1')
print(pipeline_ins(input=self.sentence, schema=self.schema))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_skin_retouching.py b/tests/pipelines/test_skin_retouching.py
index db8d89ed..aa1e0c59 100644
--- a/tests/pipelines/test_skin_retouching.py
+++ b/tests/pipelines/test_skin_retouching.py
@@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class SkinRetouchingTest(unittest.TestCase, DemoCompatibilityCheck):
+class SkinRetouchingTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.skin_retouching
@@ -41,10 +40,6 @@ class SkinRetouchingTest(unittest.TestCase, DemoCompatibilityCheck):
skin_retouching = pipeline(Tasks.skin_retouching)
self.pipeline_inference(skin_retouching, self.test_image)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_soonet_video_temporal_grounding.py b/tests/pipelines/test_soonet_video_temporal_grounding.py
index 21f8027c..4fafeb31 100644
--- a/tests/pipelines/test_soonet_video_temporal_grounding.py
+++ b/tests/pipelines/test_soonet_video_temporal_grounding.py
@@ -5,12 +5,10 @@ from modelscope.models import Model
from modelscope.models.multi_modal.soonet import SOONet
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class SOONetVideoTemporalGroundingTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class SOONetVideoTemporalGroundingTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.video_temporal_grounding
diff --git a/tests/pipelines/test_speaker_verification.py b/tests/pipelines/test_speaker_verification.py
index 83d8aff3..2b90c66e 100644
--- a/tests/pipelines/test_speaker_verification.py
+++ b/tests/pipelines/test_speaker_verification.py
@@ -1,13 +1,11 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
-import os.path
import unittest
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Union
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import test_level
@@ -16,17 +14,28 @@ logger = get_logger()
SPEAKER1_A_EN_16K_WAV = 'data/test/audios/speaker1_a_en_16k.wav'
SPEAKER1_B_EN_16K_WAV = 'data/test/audios/speaker1_b_en_16k.wav'
SPEAKER2_A_EN_16K_WAV = 'data/test/audios/speaker2_a_en_16k.wav'
+SCL_EXAMPLE_WAV = 'data/test/audios/scl_example1.wav'
-class SpeakerVerificationTest(unittest.TestCase, DemoCompatibilityCheck):
+class SpeakerVerificationTest(unittest.TestCase):
ecapatdnn_voxceleb_16k_model_id = 'damo/speech_ecapa-tdnn_sv_en_voxceleb_16k'
campplus_voxceleb_16k_model_id = 'damo/speech_campplus_sv_en_voxceleb_16k'
+ rdino_voxceleb_16k_model_id = 'damo/speech_rdino_ecapa_tdnn_sv_en_voxceleb_16k'
+ speaker_change_locating_cn_model_id = 'damo/speech_campplus-transformer_scl_zh-cn_16k-common'
+ eres2net_voxceleb_16k_model_id = 'damo/speech_eres2net_sv_en_voxceleb_16k'
def setUp(self) -> None:
self.task = Tasks.speaker_verification
- def run_pipeline(self, model_id: str, audios: List[str]) -> Dict[str, Any]:
- p = pipeline(task=self.task, model=model_id)
+ def run_pipeline(self,
+ model_id: str,
+ audios: Union[List[str], str],
+ task: str = None,
+ model_revision=None) -> Dict[str, Any]:
+ if task is not None:
+ self.task = task
+ p = pipeline(
+ task=self.task, model=model_id, model_revision=model_revision)
result = p(audios)
return result
@@ -51,9 +60,36 @@ class SpeakerVerificationTest(unittest.TestCase, DemoCompatibilityCheck):
print(result)
self.assertTrue(OutputKeys.SCORE in result)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_speaker_verification_rdino_voxceleb_16k(self):
+ logger.info('Run speaker verification for rdino_voxceleb_16k model')
+ result = self.run_pipeline(
+ model_id=self.rdino_voxceleb_16k_model_id,
+ audios=[SPEAKER1_A_EN_16K_WAV, SPEAKER1_B_EN_16K_WAV],
+ model_revision='v1.0.1')
+ print(result)
+ self.assertTrue(OutputKeys.SCORE in result)
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_speaker_change_locating_cn_16k(self):
+ logger.info(
+ 'Run speaker change locating for campplus-transformer model')
+ result = self.run_pipeline(
+ model_id=self.speaker_change_locating_cn_model_id,
+ task=Tasks.speaker_diarization,
+ audios=SCL_EXAMPLE_WAV)
+ print(result)
+ self.assertTrue(OutputKeys.TEXT in result)
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_speaker_verification_eres2net_voxceleb_16k(self):
+ logger.info('Run speaker verification for eres2net_voxceleb_16k model')
+ result = self.run_pipeline(
+ model_id=self.eres2net_voxceleb_16k_model_id,
+ audios=[SPEAKER1_A_EN_16K_WAV, SPEAKER1_B_EN_16K_WAV],
+ model_revision='v1.0.2')
+ print(result)
+ self.assertTrue(OutputKeys.SCORE in result)
if __name__ == '__main__':
diff --git a/tests/pipelines/test_speech_separation.py b/tests/pipelines/test_speech_separation.py
index 194f84a8..4edb3b43 100644
--- a/tests/pipelines/test_speech_separation.py
+++ b/tests/pipelines/test_speech_separation.py
@@ -8,13 +8,12 @@ import numpy
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
MIX_SPEECH_FILE = 'data/test/audios/mix_speech.wav'
-class SpeechSeparationTest(unittest.TestCase, DemoCompatibilityCheck):
+class SpeechSeparationTest(unittest.TestCase):
def setUp(self) -> None:
pass
@@ -32,10 +31,6 @@ class SpeechSeparationTest(unittest.TestCase, DemoCompatibilityCheck):
sf.write(save_file, numpy.frombuffer(signal, dtype=numpy.int16),
8000)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_speech_signal_process.py b/tests/pipelines/test_speech_signal_process.py
index 2c26cee6..104bf88a 100644
--- a/tests/pipelines/test_speech_signal_process.py
+++ b/tests/pipelines/test_speech_signal_process.py
@@ -7,7 +7,6 @@ from modelscope.metainfo import Pipelines
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
NEAREND_MIC_FILE = 'data/test/audios/nearend_mic.wav'
@@ -24,7 +23,7 @@ NOISE_SPEECH_URL = 'https://modelscope.oss-cn-beijing.aliyuncs.com/' \
'test/audios/speech_with_noise.wav'
-class SpeechSignalProcessTest(unittest.TestCase, DemoCompatibilityCheck):
+class SpeechSignalProcessTest(unittest.TestCase):
def setUp(self) -> None:
pass
@@ -150,10 +149,6 @@ class SpeechSignalProcessTest(unittest.TestCase, DemoCompatibilityCheck):
w.write(pcm)
audio = f.read(block_size)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_table_question_answering.py b/tests/pipelines/test_table_question_answering.py
index 4d6eff24..d688ef23 100644
--- a/tests/pipelines/test_table_question_answering.py
+++ b/tests/pipelines/test_table_question_answering.py
@@ -40,7 +40,7 @@ def tableqa_tracking_and_print_results_with_history(
print('question', question)
print('sql text:', output_dict[OutputKeys.SQL_STRING])
print('sql query:', output_dict[OutputKeys.SQL_QUERY])
- print('query result:', output_dict[OutputKeys.QUERT_RESULT])
+ print('query result:', output_dict[OutputKeys.QUERY_RESULT])
print('json dumps', json.dumps(output_dict, ensure_ascii=False))
print()
historical_queries = output_dict[OutputKeys.HISTORY]
@@ -66,7 +66,7 @@ def tableqa_tracking_and_print_results_without_history(
print('question', question)
print('sql text:', output_dict[OutputKeys.SQL_STRING])
print('sql query:', output_dict[OutputKeys.SQL_QUERY])
- print('query result:', output_dict[OutputKeys.QUERT_RESULT])
+ print('query result:', output_dict[OutputKeys.QUERY_RESULT])
print('json dumps', json.dumps(output_dict, ensure_ascii=False))
print()
@@ -99,7 +99,7 @@ def tableqa_tracking_and_print_results_with_tableid(
print('question', question)
print('sql text:', output_dict[OutputKeys.SQL_STRING])
print('sql query:', output_dict[OutputKeys.SQL_QUERY])
- print('query result:', output_dict[OutputKeys.QUERT_RESULT])
+ print('query result:', output_dict[OutputKeys.QUERY_RESULT])
print('json dumps', json.dumps(output_dict, ensure_ascii=False))
print()
historical_queries = output_dict[OutputKeys.HISTORY]
@@ -135,7 +135,7 @@ class TableQuestionAnswering(unittest.TestCase):
'history_sql': None
})
print(i, result[OutputKeys.OUTPUT][OutputKeys.SQL_QUERY],
- result[OutputKeys.OUTPUT][OutputKeys.QUERT_RESULT],
+ result[OutputKeys.OUTPUT][OutputKeys.QUERY_RESULT],
json.dumps(result))
procs = []
diff --git a/tests/pipelines/test_table_recognition.py b/tests/pipelines/test_table_recognition.py
index 3c6ee74a..6b81fc62 100644
--- a/tests/pipelines/test_table_recognition.py
+++ b/tests/pipelines/test_table_recognition.py
@@ -5,11 +5,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class TableRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
+class TableRecognitionTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_dla34_table-structure-recognition_cycle-centernet'
@@ -32,10 +31,6 @@ class TableRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
table_recognition = pipeline(Tasks.table_recognition)
self.pipeline_inference(table_recognition, self.test_image)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_tbs_detection.py b/tests/pipelines/test_tbs_detection.py
index ac0dd550..0d5a1283 100644
--- a/tests/pipelines/test_tbs_detection.py
+++ b/tests/pipelines/test_tbs_detection.py
@@ -2,11 +2,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class ObjectDetectionTest(unittest.TestCase):
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_run_with_model_name(self):
diff --git a/tests/pipelines/test_text2text_generation.py b/tests/pipelines/test_text2text_generation.py
index 40576a29..d439e033 100644
--- a/tests/pipelines/test_text2text_generation.py
+++ b/tests/pipelines/test_text2text_generation.py
@@ -8,11 +8,10 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import TextGenerationT5Pipeline
from modelscope.preprocessors import TextGenerationT5Preprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class Text2TextGenerationTest(unittest.TestCase, DemoCompatibilityCheck):
+class Text2TextGenerationTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id_generate = 'damo/t5-cn-base-test'
@@ -86,10 +85,6 @@ class Text2TextGenerationTest(unittest.TestCase, DemoCompatibilityCheck):
pipeline_ins = pipeline(task=Tasks.text2text_generation)
print(pipeline_ins(self.input_generate))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_text_classification.py b/tests/pipelines/test_text_classification.py
index d07ddbb8..128f86af 100644
--- a/tests/pipelines/test_text_classification.py
+++ b/tests/pipelines/test_text_classification.py
@@ -7,11 +7,10 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import TextClassificationPipeline
from modelscope.preprocessors import TextClassificationTransformersPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class SequenceClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
+class SequenceClassificationTest(unittest.TestCase):
sentence1 = 'i like this wonderful place'
def setUp(self) -> None:
@@ -91,10 +90,6 @@ class SequenceClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
result = text_classification(dataset)
self.printDataset(result)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_text_driven_segmentation.py b/tests/pipelines/test_text_driven_segmentation.py
index a67729ff..741787d9 100644
--- a/tests/pipelines/test_text_driven_segmentation.py
+++ b/tests/pipelines/test_text_driven_segmentation.py
@@ -23,10 +23,6 @@ class TextDrivenSegmentationTest(unittest.TestCase):
# result[OutputKeys.MASKS] is segment map result,other keys are not used
cv2.imwrite(input_location + '_lseg.jpg', result[OutputKeys.MASKS])
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.test_demo()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_text_error_correction.py b/tests/pipelines/test_text_error_correction.py
index 171f3ab2..b4bf5be9 100644
--- a/tests/pipelines/test_text_error_correction.py
+++ b/tests/pipelines/test_text_error_correction.py
@@ -9,11 +9,10 @@ from modelscope.pipelines.nlp import TextErrorCorrectionPipeline
from modelscope.preprocessors import (Preprocessor,
TextErrorCorrectionPreprocessor)
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class TextErrorCorrectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class TextErrorCorrectionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.text_error_correction
@@ -81,10 +80,6 @@ class TextErrorCorrectionTest(unittest.TestCase, DemoCompatibilityCheck):
task=Tasks.text_error_correction, model=self.law_model_id)
print(pipeline_ins(self.input_law))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_text_generation.py b/tests/pipelines/test_text_generation.py
index 998cbd18..378b1bbc 100644
--- a/tests/pipelines/test_text_generation.py
+++ b/tests/pipelines/test_text_generation.py
@@ -8,11 +8,10 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import TextGenerationPipeline
from modelscope.preprocessors import TextGenerationTransformersPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class TextGenerationTest(unittest.TestCase, DemoCompatibilityCheck):
+class TextGenerationTest(unittest.TestCase):
def setUp(self) -> None:
self.palm_model_id_zh_base = 'damo/nlp_palm2.0_text-generation_chinese-base'
@@ -261,10 +260,6 @@ class TextGenerationTest(unittest.TestCase, DemoCompatibilityCheck):
model='damo/nlp_gpt2_text-generation_english-base')
print(pipe('My name is Teven and I am'))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_text_to_image_synthesis.py b/tests/pipelines/test_text_to_image_synthesis.py
index 5e28282b..63c38571 100644
--- a/tests/pipelines/test_text_to_image_synthesis.py
+++ b/tests/pipelines/test_text_to_image_synthesis.py
@@ -8,11 +8,10 @@ from modelscope.models import Model
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class TextToImageSynthesisTest(unittest.TestCase, DemoCompatibilityCheck):
+class TextToImageSynthesisTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.text_to_image_synthesis
@@ -61,10 +60,6 @@ class TextToImageSynthesisTest(unittest.TestCase, DemoCompatibilityCheck):
self.test_text)[OutputKeys.OUTPUT_IMGS][0]
print(np.sum(np.abs(img)))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_text_to_speech.py b/tests/pipelines/test_text_to_speech.py
index f746dfbe..528977ce 100644
--- a/tests/pipelines/test_text_to_speech.py
+++ b/tests/pipelines/test_text_to_speech.py
@@ -11,7 +11,6 @@ import torch
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import test_level
@@ -20,8 +19,7 @@ import tensorflow as tf # isort:skip
logger = get_logger()
-class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.text_to_speech
@@ -109,10 +107,6 @@ class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase,
with open(f'output_{self.test_model_name[i]}', 'wb') as f:
f.write(wav)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_text_to_video_synthesis.py b/tests/pipelines/test_text_to_video_synthesis.py
index 6463c155..97ef6089 100644
--- a/tests/pipelines/test_text_to_video_synthesis.py
+++ b/tests/pipelines/test_text_to_video_synthesis.py
@@ -5,11 +5,10 @@ import unittest
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class TextToVideoSynthesisTest(unittest.TestCase, DemoCompatibilityCheck):
+class TextToVideoSynthesisTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.text_to_video_synthesis
@@ -27,10 +26,6 @@ class TextToVideoSynthesisTest(unittest.TestCase, DemoCompatibilityCheck):
self.test_text)[OutputKeys.OUTPUT_VIDEO]
print(output_video_path)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_tinymog_face_detection.py b/tests/pipelines/test_tinymog_face_detection.py
index e80fa482..48e74f44 100644
--- a/tests/pipelines/test_tinymog_face_detection.py
+++ b/tests/pipelines/test_tinymog_face_detection.py
@@ -8,11 +8,10 @@ from modelscope.msdatasets import MsDataset
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import draw_face_detection_result
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class TinyMogFaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class TinyMogFaceDetectionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.face_detection
@@ -48,10 +47,6 @@ class TinyMogFaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
result = face_detection(self.img_path)
self.show_result(self.img_path, result)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_tinynas_classification.py b/tests/pipelines/test_tinynas_classification.py
index ebc6b722..300bd2b1 100644
--- a/tests/pipelines/test_tinynas_classification.py
+++ b/tests/pipelines/test_tinynas_classification.py
@@ -4,11 +4,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class TinyNASClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
+class TinyNASClassificationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_classification
@@ -21,10 +20,6 @@ class TinyNASClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
result = tinynas_classification('data/test/images/image_wolf.jpeg')
print(result)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_tinynas_detection.py b/tests/pipelines/test_tinynas_detection.py
index f7c513ff..08c746ea 100644
--- a/tests/pipelines/test_tinynas_detection.py
+++ b/tests/pipelines/test_tinynas_detection.py
@@ -7,11 +7,10 @@ from PIL import Image
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class TinynasObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class TinynasObjectDetectionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_object_detection
@@ -52,10 +51,6 @@ class TinynasObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
'data/test/images/image_detection.jpg')
print('damoyolo-t', result)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_image_object_detection_auto_pipeline(self):
test_image = 'data/test/images/image_detection.jpg'
diff --git a/tests/pipelines/test_traffic_sign_detection.py b/tests/pipelines/test_traffic_sign_detection.py
index 5404649d..efedec14 100644
--- a/tests/pipelines/test_traffic_sign_detection.py
+++ b/tests/pipelines/test_traffic_sign_detection.py
@@ -7,20 +7,15 @@ from PIL import Image
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class TrafficSignDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
+class TrafficSignDetectionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.domain_specific_object_detection
self.model_id = 'damo/cv_tinynas_object-detection_damoyolo_traffic_sign'
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_traffic_sign_detection_damoyolo(self):
tinynas_object_detection = pipeline(
diff --git a/tests/pipelines/test_translation_evaluation.py b/tests/pipelines/test_translation_evaluation.py
index 53524fdc..e936f41a 100644
--- a/tests/pipelines/test_translation_evaluation.py
+++ b/tests/pipelines/test_translation_evaluation.py
@@ -2,14 +2,13 @@
import unittest
-from modelscope.models.nlp.unite.configuration_unite import EvaluationMode
+from modelscope.models.nlp.unite.configuration import InputFormat
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class TranslationEvaluationTest(unittest.TestCase, DemoCompatibilityCheck):
+class TranslationEvaluationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.translation_evaluation
@@ -18,7 +17,7 @@ class TranslationEvaluationTest(unittest.TestCase, DemoCompatibilityCheck):
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_run_with_model_name_for_unite_large(self):
- input = {
+ input_dict = {
'hyp': [
'This is a sentence.',
'This is another sentence.',
@@ -34,27 +33,27 @@ class TranslationEvaluationTest(unittest.TestCase, DemoCompatibilityCheck):
}
pipeline_ins = pipeline(self.task, model=self.model_id_large)
- print(pipeline_ins(input=input))
+ print(pipeline_ins(input_dict)['score'])
- pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.SRC)
- print(pipeline_ins(input=input))
+ pipeline_ins.change_input_format(input_format=InputFormat.SRC)
+ print(pipeline_ins(input_dict)['score'])
- pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.REF)
- print(pipeline_ins(input=input))
+ pipeline_ins.change_input_format(input_format=InputFormat.REF)
+ print(pipeline_ins(input_dict)['score'])
pipeline_ins = pipeline(
self.task, model=self.model_id_large, device='cpu')
- print(pipeline_ins(input=input))
+ print(pipeline_ins(input_dict)['score'])
- pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.SRC)
- print(pipeline_ins(input=input))
+ pipeline_ins.change_input_format(input_format=InputFormat.SRC)
+ print(pipeline_ins(input_dict)['score'])
- pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.REF)
- print(pipeline_ins(input=input))
+ pipeline_ins.change_input_format(input_format=InputFormat.REF)
+ print(pipeline_ins(input_dict)['score'])
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_run_with_model_name_for_unite_base(self):
- input = {
+ input_dict = {
'hyp': [
'This is a sentence.',
'This is another sentence.',
@@ -70,23 +69,23 @@ class TranslationEvaluationTest(unittest.TestCase, DemoCompatibilityCheck):
}
pipeline_ins = pipeline(self.task, model=self.model_id_base)
- print(pipeline_ins(input=input))
+ print(pipeline_ins(input_dict)['score'])
- pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.SRC)
- print(pipeline_ins(input=input))
+ pipeline_ins.change_input_format(input_format=InputFormat.SRC)
+ print(pipeline_ins(input_dict)['score'])
- pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.REF)
- print(pipeline_ins(input=input))
+ pipeline_ins.change_input_format(input_format=InputFormat.REF)
+ print(pipeline_ins(input_dict)['score'])
pipeline_ins = pipeline(
self.task, model=self.model_id_base, device='cpu')
- print(pipeline_ins(input=input))
+ print(pipeline_ins(input_dict)['score'])
- pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.SRC)
- print(pipeline_ins(input=input))
+ pipeline_ins.change_input_format(input_format=InputFormat.SRC)
+ print(pipeline_ins(input_dict)['score'])
- pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.REF)
- print(pipeline_ins(input=input))
+ pipeline_ins.change_input_format(input_format=InputFormat.REF)
+ print(pipeline_ins(input_dict)['score'])
if __name__ == '__main__':
diff --git a/tests/pipelines/test_translation_quality_estimation.py b/tests/pipelines/test_translation_quality_estimation.py
index 315fa72b..0890f31b 100644
--- a/tests/pipelines/test_translation_quality_estimation.py
+++ b/tests/pipelines/test_translation_quality_estimation.py
@@ -3,12 +3,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class TranslationQualityEstimationTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class TranslationQualityEstimationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.sentence_similarity
@@ -23,10 +21,6 @@ class TranslationQualityEstimationTest(unittest.TestCase,
pipeline_ins = pipeline(self.task, model=self.model_id)
print(pipeline_ins(input=inputs))
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_unifold.py b/tests/pipelines/test_unifold.py
index cf67929d..98d2c1ce 100644
--- a/tests/pipelines/test_unifold.py
+++ b/tests/pipelines/test_unifold.py
@@ -4,11 +4,10 @@ import unittest
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class UnifoldProteinStructureTest(unittest.TestCase, DemoCompatibilityCheck):
+class UnifoldProteinStructureTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.protein_structure
diff --git a/tests/pipelines/test_universal_matting.py b/tests/pipelines/test_universal_matting.py
index 5868cf36..1450d938 100644
--- a/tests/pipelines/test_universal_matting.py
+++ b/tests/pipelines/test_universal_matting.py
@@ -8,11 +8,10 @@ from modelscope.msdatasets import MsDataset
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import ModelFile, Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class UniversalMattingTest(unittest.TestCase, DemoCompatibilityCheck):
+class UniversalMattingTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_unet_universal-matting'
@@ -35,10 +34,6 @@ class UniversalMattingTest(unittest.TestCase, DemoCompatibilityCheck):
cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG])
print(f'Output written to {osp.abspath("result.png")}')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_user_satisfaction_estimation.py b/tests/pipelines/test_user_satisfaction_estimation.py
index 2bbfd5d7..2904ea30 100644
--- a/tests/pipelines/test_user_satisfaction_estimation.py
+++ b/tests/pipelines/test_user_satisfaction_estimation.py
@@ -6,12 +6,10 @@ from modelscope.models import Model
from modelscope.pipelines import pipeline
from modelscope.preprocessors import DialogueClassificationUsePreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class UserSatisfactionEstimationTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class UserSatisfactionEstimationTest(unittest.TestCase):
model_id = 'damo/nlp_user-satisfaction-estimation_chinese'
input_dialogue = [('返修退换货咨询|||', '手机有质量问题怎么办|||稍等,我看下', '开不开机了|||',
@@ -33,10 +31,6 @@ class UserSatisfactionEstimationTest(unittest.TestCase,
task=Tasks.text_classification, model=self.model_id)
print(pipeline_ins(input=self.input_dialogue))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- print(self.compatibility_check())
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_video_category.py b/tests/pipelines/test_video_category.py
index 660196b8..61ee72b0 100644
--- a/tests/pipelines/test_video_category.py
+++ b/tests/pipelines/test_video_category.py
@@ -3,11 +3,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VideoCategoryTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoCategoryTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.video_category
@@ -21,10 +20,6 @@ class VideoCategoryTest(unittest.TestCase, DemoCompatibilityCheck):
print(f'video category output: {result}.')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_video_colorization.py b/tests/pipelines/test_video_colorization.py
index c35577a4..fe6c0f87 100644
--- a/tests/pipelines/test_video_colorization.py
+++ b/tests/pipelines/test_video_colorization.py
@@ -11,11 +11,10 @@ from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline
from modelscope.pipelines.cv import VideoColorizationPipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VideoColorizationTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoColorizationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.video_colorization
@@ -44,10 +43,6 @@ class VideoColorizationTest(unittest.TestCase, DemoCompatibilityCheck):
video_colorization = pipeline(Tasks.video_colorization)
self.pipeline_inference(video_colorization, self.test_video)
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_video_deinterlace.py b/tests/pipelines/test_video_deinterlace.py
index bcb36cc3..267d4664 100644
--- a/tests/pipelines/test_video_deinterlace.py
+++ b/tests/pipelines/test_video_deinterlace.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.cv import VideoDeinterlacePipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VideoDeinterlaceTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoDeinterlaceTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.video_deinterlace
@@ -52,10 +51,6 @@ class VideoDeinterlaceTest(unittest.TestCase, DemoCompatibilityCheck):
input=self.test_video)[OutputKeys.OUTPUT_VIDEO]
print('pipeline: the output video path is {}'.format(out_video_path))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_video_depth_estimation.py b/tests/pipelines/test_video_depth_estimation.py
index 30ca3b33..6a054b2b 100644
--- a/tests/pipelines/test_video_depth_estimation.py
+++ b/tests/pipelines/test_video_depth_estimation.py
@@ -5,11 +5,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import show_video_depth_estimation_result
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VideoDepthEstimationTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoDepthEstimationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = 'video-depth-estimation'
diff --git a/tests/pipelines/test_video_frame_interpolation.py b/tests/pipelines/test_video_frame_interpolation.py
index c23aa46a..11a4f568 100644
--- a/tests/pipelines/test_video_frame_interpolation.py
+++ b/tests/pipelines/test_video_frame_interpolation.py
@@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.cv import VideoFrameInterpolationPipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VideoFrameInterpolationTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoFrameInterpolationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.video_frame_interpolation
@@ -58,10 +57,6 @@ class VideoFrameInterpolationTest(unittest.TestCase, DemoCompatibilityCheck):
input=self.test_video)[OutputKeys.OUTPUT_VIDEO]
print('pipeline: the output video path is {}'.format(out_video_path))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_video_instance_segmentation.py b/tests/pipelines/test_video_instance_segmentation.py
index 0a76d260..465cf26f 100644
--- a/tests/pipelines/test_video_instance_segmentation.py
+++ b/tests/pipelines/test_video_instance_segmentation.py
@@ -3,11 +3,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VideoInstanceSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoInstanceSegmentationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.video_panoptic_segmentation
@@ -33,10 +32,6 @@ class VideoInstanceSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
print(f'video instance segmentation output:\n {result}.')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_video_multi_modal_embedding.py b/tests/pipelines/test_video_multi_modal_embedding.py
index afe5940d..fe87b089 100644
--- a/tests/pipelines/test_video_multi_modal_embedding.py
+++ b/tests/pipelines/test_video_multi_modal_embedding.py
@@ -4,14 +4,13 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import test_level
logger = get_logger()
-class VideoMultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoMultiModalEmbeddingTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.video_multi_modal_embedding
@@ -41,10 +40,6 @@ class VideoMultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
logger.info('video feature: {}'.format(
output['video_embedding'][0][0][0]))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_video_multi_object_tracking.py b/tests/pipelines/test_video_multi_object_tracking.py
index 97f1e705..f63fd8b1 100644
--- a/tests/pipelines/test_video_multi_object_tracking.py
+++ b/tests/pipelines/test_video_multi_object_tracking.py
@@ -4,11 +4,10 @@ import unittest
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class MultiObjectTracking(unittest.TestCase, DemoCompatibilityCheck):
+class MultiObjectTracking(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.video_multi_object_tracking
@@ -34,10 +33,6 @@ class MultiObjectTracking(unittest.TestCase, DemoCompatibilityCheck):
in result)
assert len(result[OutputKeys.LABELS]) == len(result[OutputKeys.BOXES])
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_video_object_segmentation.py b/tests/pipelines/test_video_object_segmentation.py
index e4adeb26..6f0e7c2a 100644
--- a/tests/pipelines/test_video_object_segmentation.py
+++ b/tests/pipelines/test_video_object_segmentation.py
@@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import masks_visualization
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VideoObjectSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoObjectSegmentationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = 'video-object-segmentation'
diff --git a/tests/pipelines/test_video_panoptic_segmentation.py b/tests/pipelines/test_video_panoptic_segmentation.py
index ad038135..cc805812 100644
--- a/tests/pipelines/test_video_panoptic_segmentation.py
+++ b/tests/pipelines/test_video_panoptic_segmentation.py
@@ -3,11 +3,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VideoPanopticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoPanopticSegmentationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.video_panoptic_segmentation
@@ -32,10 +31,6 @@ class VideoPanopticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
print(f'video summarization output:\n {result}.')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_video_single_object_tracking.py b/tests/pipelines/test_video_single_object_tracking.py
index e75ccbb0..c8331649 100644
--- a/tests/pipelines/test_video_single_object_tracking.py
+++ b/tests/pipelines/test_video_single_object_tracking.py
@@ -5,11 +5,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import show_video_tracking_result
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class SingleObjectTracking(unittest.TestCase, DemoCompatibilityCheck):
+class SingleObjectTracking(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.video_single_object_tracking
@@ -46,10 +45,6 @@ class SingleObjectTracking(unittest.TestCase, DemoCompatibilityCheck):
result = video_single_object_tracking((video_path, init_bbox))
print('result is : ', result[OutputKeys.BOXES])
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_video_stabilization.py b/tests/pipelines/test_video_stabilization.py
index d102f3e1..26501c2d 100644
--- a/tests/pipelines/test_video_stabilization.py
+++ b/tests/pipelines/test_video_stabilization.py
@@ -7,11 +7,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.cv import VideoStabilizationPipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VideoStabilizationTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoStabilizationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.video_stabilization
@@ -42,10 +41,6 @@ class VideoStabilizationTest(unittest.TestCase, DemoCompatibilityCheck):
input=self.test_video)[OutputKeys.OUTPUT_VIDEO]
print('pipeline: the output video path is {}'.format(out_video_path))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_video_summarization.py b/tests/pipelines/test_video_summarization.py
index 1f965c53..dc6a3a80 100644
--- a/tests/pipelines/test_video_summarization.py
+++ b/tests/pipelines/test_video_summarization.py
@@ -3,11 +3,10 @@ import unittest
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VideoSummarizationTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoSummarizationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.video_summarization
@@ -30,10 +29,6 @@ class VideoSummarizationTest(unittest.TestCase, DemoCompatibilityCheck):
print(f'video summarization output:\n {result}.')
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_video_super_resolution.py b/tests/pipelines/test_video_super_resolution.py
index 0da18dd7..2e207887 100644
--- a/tests/pipelines/test_video_super_resolution.py
+++ b/tests/pipelines/test_video_super_resolution.py
@@ -7,11 +7,10 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.cv import VideoSuperResolutionPipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VideoSuperResolutionTest(unittest.TestCase, DemoCompatibilityCheck):
+class VideoSuperResolutionTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.video_super_resolution
@@ -50,10 +49,6 @@ class VideoSuperResolutionTest(unittest.TestCase, DemoCompatibilityCheck):
input=self.test_video)[OutputKeys.OUTPUT_VIDEO]
print('pipeline: the output video path is {}'.format(out_video_path))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_vidt_face.py b/tests/pipelines/test_vidt_face.py
index 8640d128..e49d9de9 100644
--- a/tests/pipelines/test_vidt_face.py
+++ b/tests/pipelines/test_vidt_face.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
from modelscope.models.cv.vidt import VidtModel
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VidtTest(unittest.TestCase, DemoCompatibilityCheck):
+class VidtTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_object_detection
diff --git a/tests/pipelines/test_vidt_logo.py b/tests/pipelines/test_vidt_logo.py
index 143eb205..fce6fe48 100644
--- a/tests/pipelines/test_vidt_logo.py
+++ b/tests/pipelines/test_vidt_logo.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
from modelscope.models.cv.vidt import VidtModel
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VidtTest(unittest.TestCase, DemoCompatibilityCheck):
+class VidtTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_object_detection
diff --git a/tests/pipelines/test_virtual_try_on.py b/tests/pipelines/test_virtual_try_on.py
index 5c18dcc4..c8a55f79 100644
--- a/tests/pipelines/test_virtual_try_on.py
+++ b/tests/pipelines/test_virtual_try_on.py
@@ -8,11 +8,10 @@ from PIL import Image
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VirtualTryonTest(unittest.TestCase, DemoCompatibilityCheck):
+class VirtualTryonTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.virtual_try_on
@@ -36,10 +35,6 @@ class VirtualTryonTest(unittest.TestCase, DemoCompatibilityCheck):
img = pipeline_virtual_tryon(self.input_imgs)[OutputKeys.OUTPUT_IMG]
cv2.imwrite('demo.jpg', img[:, :, ::-1])
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_vision_efficient_tuning.py b/tests/pipelines/test_vision_efficient_tuning.py
index c88ed478..acfbb235 100644
--- a/tests/pipelines/test_vision_efficient_tuning.py
+++ b/tests/pipelines/test_vision_efficient_tuning.py
@@ -6,11 +6,10 @@ from modelscope.models.cv.vision_efficient_tuning.model import \
VisionEfficientTuningModel
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck):
+class VisionEfficientTuningTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.vision_efficient_tuning
@@ -29,11 +28,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck):
model = Model.from_pretrained(model_id)
self.assertTrue(model.__class__ == VisionEfficientTuningModel)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_vision_efficient_tuning_adapter_demo_compatibility(self):
- self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-adapter'
- self.compatibility_check()
-
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_vision_efficient_tuning_lora_run_pipeline(self):
model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-lora'
@@ -48,11 +42,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck):
model = Model.from_pretrained(model_id)
self.assertTrue(model.__class__ == VisionEfficientTuningModel)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_vision_efficient_tuning_lora_demo_compatibility(self):
- self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-lora'
- self.compatibility_check()
-
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_vision_efficient_tuning_prefix_run_pipeline(self):
model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-prefix'
@@ -67,11 +56,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck):
model = Model.from_pretrained(model_id)
self.assertTrue(model.__class__ == VisionEfficientTuningModel)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_vision_efficient_tuning_prefix_demo_compatibility(self):
- self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-prefix'
- self.compatibility_check()
-
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_vision_efficient_tuning_prompt_run_pipeline(self):
model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-prompt'
@@ -86,11 +70,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck):
model = Model.from_pretrained(model_id)
self.assertTrue(model.__class__ == VisionEfficientTuningModel)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_vision_efficient_tuning_prompt_demo_compatibility(self):
- self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-prompt'
- self.compatibility_check()
-
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_vision_efficient_tuning_bitfit_run_pipeline(self):
model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-bitfit'
@@ -105,11 +84,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck):
model = Model.from_pretrained(model_id)
self.assertTrue(model.__class__ == VisionEfficientTuningModel)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_vision_efficient_tuning_bitfit_demo_compatibility(self):
- self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-bitfit'
- self.compatibility_check()
-
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_vision_efficient_tuning_sidetuning_run_pipeline(self):
model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-sidetuning'
@@ -125,11 +99,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck):
model = Model.from_pretrained(model_id)
self.assertTrue(model.__class__ == VisionEfficientTuningModel)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_vision_efficient_tuning_sidetuning_demo_compatibility(self):
- self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-sidetuning'
- self.compatibility_check()
-
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_vision_efficient_tuning_utuning_run_pipeline(self):
model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-utuning'
@@ -144,11 +113,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck):
model = Model.from_pretrained(model_id)
self.assertTrue(model.__class__ == VisionEfficientTuningModel)
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_vision_efficient_tuning_utuning_demo_compatibility(self):
- self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-utuning'
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_vision_middleware.py b/tests/pipelines/test_vision_middleware.py
index b3531154..e8c1218b 100644
--- a/tests/pipelines/test_vision_middleware.py
+++ b/tests/pipelines/test_vision_middleware.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
from modelscope.models.cv.vision_middleware import VisionMiddlewareModel
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VisionMiddlewareTest(unittest.TestCase, DemoCompatibilityCheck):
+class VisionMiddlewareTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.image_segmentation
diff --git a/tests/pipelines/test_vop_retrieval.py b/tests/pipelines/test_vop_retrieval.py
index c9c356c5..78e7eecc 100644
--- a/tests/pipelines/test_vop_retrieval.py
+++ b/tests/pipelines/test_vop_retrieval.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
from modelscope.models.cv.vop_retrieval import VoP
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VopRetrievalTest(unittest.TestCase, DemoCompatibilityCheck):
+class VopRetrievalTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.vop_retrieval
diff --git a/tests/pipelines/test_vop_retrieval_sebias.py b/tests/pipelines/test_vop_retrieval_sebias.py
index bea1bc45..a129f7f0 100644
--- a/tests/pipelines/test_vop_retrieval_sebias.py
+++ b/tests/pipelines/test_vop_retrieval_sebias.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
from modelscope.models.cv.vop_retrieval import VideoTextRetrievalModelSeries
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VopRetrievalTest(unittest.TestCase, DemoCompatibilityCheck):
+class VopRetrievalTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.vop_retrieval
diff --git a/tests/pipelines/test_vop_retrieval_separtial.py b/tests/pipelines/test_vop_retrieval_separtial.py
index 942fbd3b..c5832aaa 100644
--- a/tests/pipelines/test_vop_retrieval_separtial.py
+++ b/tests/pipelines/test_vop_retrieval_separtial.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
from modelscope.models.cv.vop_retrieval import VideoTextRetrievalModelSeries
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VopRetrievalTest(unittest.TestCase, DemoCompatibilityCheck):
+class VopRetrievalTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.vop_retrieval
diff --git a/tests/pipelines/test_vop_retrieval_seproj.py b/tests/pipelines/test_vop_retrieval_seproj.py
index a371ac36..2fceb2e7 100644
--- a/tests/pipelines/test_vop_retrieval_seproj.py
+++ b/tests/pipelines/test_vop_retrieval_seproj.py
@@ -5,11 +5,10 @@ from modelscope.models import Model
from modelscope.models.cv.vop_retrieval import VideoTextRetrievalModelSeries
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
-class VopRetrievalTest(unittest.TestCase, DemoCompatibilityCheck):
+class VopRetrievalTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.vop_retrieval
diff --git a/tests/pipelines/test_wenet_automatic_speech_recognition.py b/tests/pipelines/test_wenet_automatic_speech_recognition.py
index 4adf8119..ac47cea7 100644
--- a/tests/pipelines/test_wenet_automatic_speech_recognition.py
+++ b/tests/pipelines/test_wenet_automatic_speech_recognition.py
@@ -10,7 +10,6 @@ import soundfile
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import ColorCodes, Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import download_and_untar, test_level
@@ -20,8 +19,7 @@ WAV_FILE = 'data/test/audios/asr_example.wav'
URL_FILE = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example.wav'
-class WeNetAutomaticSpeechRecognitionTest(unittest.TestCase,
- DemoCompatibilityCheck):
+class WeNetAutomaticSpeechRecognitionTest(unittest.TestCase):
action_info = {
'test_run_with_pcm': {
'checking_item': OutputKeys.TEXT,
diff --git a/tests/pipelines/test_word_segmentation.py b/tests/pipelines/test_word_segmentation.py
index f8bdaef7..f8c9e078 100644
--- a/tests/pipelines/test_word_segmentation.py
+++ b/tests/pipelines/test_word_segmentation.py
@@ -10,12 +10,11 @@ from modelscope.pipelines.nlp import WordSegmentationPipeline
from modelscope.preprocessors import \
TokenClassificationTransformersPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool
from modelscope.utils.test_utils import test_level
-class WordSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
+class WordSegmentationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.word_segmentation
@@ -164,10 +163,6 @@ class WordSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
pipeline_ins = pipeline(task=Tasks.word_segmentation)
print(pipeline_ins(input=self.sentence))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_zero_shot_classification.py b/tests/pipelines/test_zero_shot_classification.py
index f9a52b42..89832d18 100644
--- a/tests/pipelines/test_zero_shot_classification.py
+++ b/tests/pipelines/test_zero_shot_classification.py
@@ -9,12 +9,11 @@ from modelscope.pipelines.nlp import ZeroShotClassificationPipeline
from modelscope.preprocessors import \
ZeroShotClassificationTransformersPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool
from modelscope.utils.test_utils import test_level
-class ZeroShotClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
+class ZeroShotClassificationTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.zero_shot_classification
@@ -79,10 +78,6 @@ class ZeroShotClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
pipeline_ins = pipeline(task=Tasks.zero_shot_classification)
print(pipeline_ins(input=self.sentence, candidate_labels=self.labels))
- @unittest.skip('demo compatibility test is only enabled on a needed-basis')
- def test_demo_compatibility(self):
- self.compatibility_check()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/run_config.yaml b/tests/run_config.yaml
index 773c6397..ba678468 100644
--- a/tests/run_config.yaml
+++ b/tests/run_config.yaml
@@ -21,6 +21,7 @@ isolated: # test cases that may require excessive anmount of GPU memory or run
- test_image_instance_segmentation_trainer.py
- test_image_portrait_enhancement_trainer.py
- test_translation_trainer.py
+ - test_translation_evaluation_trainer.py
- test_unifold.py
- test_automatic_post_editing.py
- test_mplug_tasks.py
@@ -66,7 +67,7 @@ isolated: # test cases that may require excessive anmount of GPU memory or run
envs:
default: # default env, case not in other env will in default, pytorch.
dependencies: # requirement packages,pip install before test case run.
- - numpy>=1.20
+ - numpy>=1.20,<=1.21.0
- protobuf<4,>=3.20.2
tensorflow1x: # cases excuted tensorflow1.x framework.
requirements: # requirements files run before test case run.
@@ -77,6 +78,7 @@ envs:
- test_text_to_speech.py
- test_csanmt_translation.py
- test_translation_trainer.py
+ - test_translation_evaluation_trainer.py
- test_ocr_detection.py
- test_automatic_speech_recognition.py
- test_image_matting.py
@@ -85,3 +87,21 @@ envs:
- test_image_style_transfer.py
- test_image_portrait_stylization_trainer.py
- test_language_identification.py
+ - test_language_guided_video_summarization_trainer.py
+ - test_motion_generation.py
+ - test_universal_matting.py
+ - test_dialog_modeling.py
+ - test_trainer.py
+ - test_abnormal_object_detection.py
+ - test_image_face_fusion.py
+ - test_ocr_detection_db_trainer.py
+ - test_language_guided_video_summarization.py
+ - test_interactive_translation_pipeline.py
+ - test_image_defrcn_fewshot_trainer.py
+ - test_automatic_post_editing.py
+ - test_human_reconstruction.py
+ - test_nerf_recon_acc_trainer.py
+ - test_nerf_recon_acc.py
+ - test_speech_signal_process.py
+ - test_tensorboard_hook.py
+ - test_efficient_diffusion_tuning_trainer.py
diff --git a/tests/trainers/audio/test_kws_farfield_trainer.py b/tests/trainers/audio/test_kws_farfield_trainer.py
index cc2b38f6..9bf65e04 100644
--- a/tests/trainers/audio/test_kws_farfield_trainer.py
+++ b/tests/trainers/audio/test_kws_farfield_trainer.py
@@ -23,6 +23,7 @@ class TestKwsFarfieldTrainer(unittest.TestCase):
if not os.path.exists(self.tmp_dir):
os.makedirs(self.tmp_dir)
self.model_id = 'damo/speech_dfsmn_kws_char_farfield_16k_nihaomiya'
+ self.model_id_iot = 'damo/speech_dfsmn_kws_char_farfield_iot_16k_nihaomiya'
train_pos_list = self.create_list('pos.list', POS_FILE)
train_neg_list = self.create_list('neg.list', NEG_FILE)
@@ -83,3 +84,23 @@ class TestKwsFarfieldTrainer(unittest.TestCase):
f'work_dir:{self.tmp_dir}')
self.assertIn('val_dataset.bin', results_files,
f'work_dir:{self.tmp_dir}')
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_normal_iot(self):
+ kwargs = dict(
+ model=self.model_id_iot,
+ work_dir=self.tmp_dir,
+ workers=2,
+ max_epochs=2,
+ train_iters_per_epoch=2,
+ val_iters_per_epoch=1,
+ custom_conf=self.custom_conf)
+
+ trainer = build_trainer(
+ Trainers.speech_dfsmn_kws_char_farfield, default_args=kwargs)
+ trainer.train()
+ results_files = os.listdir(self.tmp_dir)
+ self.assertIn(f'{trainer.timestamp}.log.json', results_files,
+ f'work_dir:{self.tmp_dir}')
+ self.assertIn('val_dataset.bin', results_files,
+ f'work_dir:{self.tmp_dir}')
diff --git a/tests/trainers/easycv/__init__.py b/tests/trainers/cli/__init__.py
similarity index 100%
rename from tests/trainers/easycv/__init__.py
rename to tests/trainers/cli/__init__.py
diff --git a/tests/trainers/cli/test_cli.py b/tests/trainers/cli/test_cli.py
new file mode 100644
index 00000000..b9fb7539
--- /dev/null
+++ b/tests/trainers/cli/test_cli.py
@@ -0,0 +1,52 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+import json
+
+from modelscope import MsDataset, TrainingArgs, build_dataset_from_file
+from modelscope.utils.test_utils import test_level
+
+
+class TestCli(unittest.TestCase):
+
+ def setUp(self) -> None:
+ content = [{
+ 'dataset': {
+ 'dataset_name': 'clue',
+ 'subset_name': 'cmnli',
+ 'split': 'train',
+ },
+ 'column_mapping': {
+ 'sentence1': 'sentence1',
+ 'sentence2': 'sentence2',
+ 'label': 'label',
+ },
+ 'split': 0.8,
+ }, {
+ 'dataset': {
+ 'dataset_name': 'glue',
+ 'subset_name': 'mnli',
+ 'split': 'validation_matched',
+ },
+ 'column_mapping': {
+ 'premise': 'sentence1',
+ 'hypothesis': 'sentence2',
+ 'label': 'label',
+ },
+ 'split': 'val',
+ }]
+ with open('./dataset.json', 'w') as f:
+ json.dump(content, f)
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_merge_dataset_from_file(self):
+ dataset = MsDataset.load('clue', subset_name='cmnli', split='train')
+ dataset2 = MsDataset.load(
+ 'glue', subset_name='mnli', split='validation_matched')
+ training_args = TrainingArgs(dataset_json_file='./dataset.json')
+ train, test = build_dataset_from_file(training_args.dataset_json_file)
+ self.assertEqual(len(train) + len(test), len(dataset) + len(dataset2))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer.py b/tests/trainers/easycv/test_easycv_trainer.py
deleted file mode 100644
index 11f9a739..00000000
--- a/tests/trainers/easycv/test_easycv_trainer.py
+++ /dev/null
@@ -1,238 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import shutil
-import tempfile
-import unittest
-
-import json
-import torch
-
-from modelscope.metainfo import Models, Pipelines, Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import build_trainer
-from modelscope.utils.config import Config
-from modelscope.utils.constant import LogKeys, ModeKeys, Tasks
-from modelscope.utils.logger import get_logger
-from modelscope.utils.test_utils import DistributedTestCase, test_level
-from modelscope.utils.torch_utils import is_master
-
-
-def train_func(work_dir, dist=False, log_interval=3, imgs_per_gpu=4):
- import easycv
- config_path = os.path.join(
- os.path.dirname(easycv.__file__),
- 'configs/detection/yolox/yolox_s_8xb16_300e_coco.py')
-
- cfg = Config.from_file(config_path)
-
- cfg.log_config.update(
- dict(hooks=[
- dict(type='TextLoggerHook'),
- dict(type='TensorboardLoggerHook')
- ])) # not support TensorboardLoggerHookV2
-
- ms_cfg_file = os.path.join(work_dir, 'ms_yolox_s_8xb16_300e_coco.json')
- from easycv.utils.ms_utils import to_ms_config
-
- if is_master():
- to_ms_config(
- cfg,
- dump=True,
- task=Tasks.image_object_detection,
- ms_model_name=Models.yolox,
- pipeline_name=Pipelines.easycv_detection,
- save_path=ms_cfg_file)
-
- trainer_name = Trainers.easycv
- train_dataset = MsDataset.load(
- dataset_name='small_coco_for_test', namespace='EasyCV', split='train')
- eval_dataset = MsDataset.load(
- dataset_name='small_coco_for_test',
- namespace='EasyCV',
- split='validation')
-
- cfg_options = {
- 'train.max_epochs':
- 2,
- 'train.dataloader.batch_size_per_gpu':
- imgs_per_gpu,
- 'evaluation.dataloader.batch_size_per_gpu':
- 2,
- 'train.hooks': [
- {
- 'type': 'CheckpointHook',
- 'interval': 1
- },
- {
- 'type': 'EvaluationHook',
- 'interval': 1
- },
- {
- 'type': 'TextLoggerHook',
- 'ignore_rounding_keys': None,
- 'interval': log_interval
- },
- ]
- }
- kwargs = dict(
- cfg_file=ms_cfg_file,
- train_dataset=train_dataset,
- eval_dataset=eval_dataset,
- work_dir=work_dir,
- cfg_options=cfg_options,
- launcher='pytorch' if dist else None)
-
- trainer = build_trainer(trainer_name, kwargs)
- trainer.train()
-
-
-@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
-class EasyCVTrainerTestSingleGpu(unittest.TestCase):
-
- def setUp(self):
- self.logger = get_logger()
- self.logger.info(('Testing %s.%s' %
- (type(self).__name__, self._testMethodName)))
- self.tmp_dir = tempfile.TemporaryDirectory().name
- if not os.path.exists(self.tmp_dir):
- os.makedirs(self.tmp_dir)
-
- def tearDown(self):
- super().tearDown()
- shutil.rmtree(self.tmp_dir, ignore_errors=True)
-
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_single_gpu(self):
- train_func(self.tmp_dir)
-
- results_files = os.listdir(self.tmp_dir)
- json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
- self.assertEqual(len(json_files), 1)
-
- with open(json_files[0], 'r', encoding='utf-8') as f:
- lines = [i.strip() for i in f.readlines()]
-
- self.assertDictContainsSubset(
- {
- LogKeys.MODE: ModeKeys.TRAIN,
- LogKeys.EPOCH: 1,
- LogKeys.ITER: 3,
- LogKeys.LR: 0.00029
- }, json.loads(lines[0]))
- self.assertDictContainsSubset(
- {
- LogKeys.MODE: ModeKeys.EVAL,
- LogKeys.EPOCH: 1,
- LogKeys.ITER: 10
- }, json.loads(lines[1]))
- self.assertDictContainsSubset(
- {
- LogKeys.MODE: ModeKeys.TRAIN,
- LogKeys.EPOCH: 2,
- LogKeys.ITER: 3,
- LogKeys.LR: 0.00205
- }, json.loads(lines[2]))
- self.assertDictContainsSubset(
- {
- LogKeys.MODE: ModeKeys.EVAL,
- LogKeys.EPOCH: 2,
- LogKeys.ITER: 10
- }, json.loads(lines[3]))
- self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
- self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
- for i in [0, 2]:
- self.assertIn(LogKeys.DATA_LOAD_TIME, lines[i])
- self.assertIn(LogKeys.ITER_TIME, lines[i])
- self.assertIn(LogKeys.MEMORY, lines[i])
- self.assertIn('total_loss', lines[i])
- for i in [1, 3]:
- self.assertIn(
- 'CocoDetectionEvaluator_DetectionBoxes_Precision/mAP',
- lines[i])
- self.assertIn('DetectionBoxes_Precision/mAP', lines[i])
- self.assertIn('DetectionBoxes_Precision/mAP@.50IOU', lines[i])
- self.assertIn('DetectionBoxes_Precision/mAP@.75IOU', lines[i])
- self.assertIn('DetectionBoxes_Precision/mAP (small)', lines[i])
-
-
-@unittest.skipIf(not torch.cuda.is_available()
- or torch.cuda.device_count() <= 1, 'distributed unittest')
-class EasyCVTrainerTestMultiGpus(DistributedTestCase):
-
- def setUp(self):
- self.logger = get_logger()
- self.logger.info(('Testing %s.%s' %
- (type(self).__name__, self._testMethodName)))
- self.tmp_dir = tempfile.TemporaryDirectory().name
- if not os.path.exists(self.tmp_dir):
- os.makedirs(self.tmp_dir)
-
- def tearDown(self):
- super().tearDown()
- shutil.rmtree(self.tmp_dir, ignore_errors=True)
-
- @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
- def test_multi_gpus(self):
- self.start(
- train_func,
- num_gpus=2,
- work_dir=self.tmp_dir,
- dist=True,
- log_interval=2,
- imgs_per_gpu=5)
-
- results_files = os.listdir(self.tmp_dir)
- json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
- self.assertEqual(len(json_files), 1)
-
- with open(json_files[0], 'r', encoding='utf-8') as f:
- lines = [i.strip() for i in f.readlines()]
-
- self.assertDictContainsSubset(
- {
- LogKeys.MODE: ModeKeys.TRAIN,
- LogKeys.EPOCH: 1,
- LogKeys.ITER: 2,
- LogKeys.LR: 0.0002
- }, json.loads(lines[0]))
- self.assertDictContainsSubset(
- {
- LogKeys.MODE: ModeKeys.EVAL,
- LogKeys.EPOCH: 1,
- LogKeys.ITER: 5
- }, json.loads(lines[1]))
- self.assertDictContainsSubset(
- {
- LogKeys.MODE: ModeKeys.TRAIN,
- LogKeys.EPOCH: 2,
- LogKeys.ITER: 2,
- LogKeys.LR: 0.0018
- }, json.loads(lines[2]))
- self.assertDictContainsSubset(
- {
- LogKeys.MODE: ModeKeys.EVAL,
- LogKeys.EPOCH: 2,
- LogKeys.ITER: 5
- }, json.loads(lines[3]))
-
- self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
- self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
-
- for i in [0, 2]:
- self.assertIn(LogKeys.DATA_LOAD_TIME, lines[i])
- self.assertIn(LogKeys.ITER_TIME, lines[i])
- self.assertIn(LogKeys.MEMORY, lines[i])
- self.assertIn('total_loss', lines[i])
- for i in [1, 3]:
- self.assertIn(
- 'CocoDetectionEvaluator_DetectionBoxes_Precision/mAP',
- lines[i])
- self.assertIn('DetectionBoxes_Precision/mAP', lines[i])
- self.assertIn('DetectionBoxes_Precision/mAP@.50IOU', lines[i])
- self.assertIn('DetectionBoxes_Precision/mAP@.75IOU', lines[i])
- self.assertIn('DetectionBoxes_Precision/mAP (small)', lines[i])
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer_detection_dino.py b/tests/trainers/easycv/test_easycv_trainer_detection_dino.py
deleted file mode 100644
index 90d1f691..00000000
--- a/tests/trainers/easycv/test_easycv_trainer_detection_dino.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import shutil
-import tempfile
-import unittest
-
-import torch
-
-from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import build_trainer
-from modelscope.utils.constant import LogKeys
-from modelscope.utils.logger import get_logger
-from modelscope.utils.test_utils import test_level
-
-
-@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
-class EasyCVTrainerTestDetectionDino(unittest.TestCase):
- model_id = 'damo/cv_swinl_image-object-detection_dino'
-
- def setUp(self):
- self.logger = get_logger()
- self.logger.info(('Testing %s.%s' %
- (type(self).__name__, self._testMethodName)))
-
- def _train(self, tmp_dir):
- cfg_options = {'train.max_epochs': 1}
-
- trainer_name = Trainers.easycv
-
- train_dataset = MsDataset.load(
- dataset_name='small_coco_for_test',
- namespace='EasyCV',
- split='train')
- eval_dataset = MsDataset.load(
- dataset_name='small_coco_for_test',
- namespace='EasyCV',
- split='validation')
-
- kwargs = dict(
- model=self.model_id,
- train_dataset=train_dataset,
- eval_dataset=eval_dataset,
- work_dir=tmp_dir,
- cfg_options=cfg_options)
-
- trainer = build_trainer(trainer_name, kwargs)
- trainer.train()
-
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_trainer_single_gpu(self):
- temp_file_dir = tempfile.TemporaryDirectory()
- tmp_dir = temp_file_dir.name
- if not os.path.exists(tmp_dir):
- os.makedirs(tmp_dir)
-
- self._train(tmp_dir)
-
- results_files = os.listdir(tmp_dir)
- json_files = glob.glob(os.path.join(tmp_dir, '*.log.json'))
- self.assertEqual(len(json_files), 1)
- self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
-
- temp_file_dir.cleanup()
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py b/tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py
deleted file mode 100644
index e4f0c57e..00000000
--- a/tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import shutil
-import tempfile
-import unittest
-
-import torch
-
-from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import build_trainer
-from modelscope.utils.constant import DownloadMode, LogKeys, Tasks
-from modelscope.utils.logger import get_logger
-from modelscope.utils.test_utils import test_level
-
-
-@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
-class EasyCVTrainerTestFace2DKeypoints(unittest.TestCase):
- model_id = 'damo/cv_mobilenet_face-2d-keypoints_alignment'
-
- def setUp(self):
- self.logger = get_logger()
- self.logger.info(('Testing %s.%s' %
- (type(self).__name__, self._testMethodName)))
-
- def _train(self, tmp_dir):
- cfg_options = {'train.max_epochs': 2}
-
- trainer_name = Trainers.easycv
-
- train_dataset = MsDataset.load(
- dataset_name='face_2d_keypoints_dataset',
- namespace='modelscope',
- split='train',
- download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS)
- eval_dataset = MsDataset.load(
- dataset_name='face_2d_keypoints_dataset',
- namespace='modelscope',
- split='train',
- download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS)
-
- kwargs = dict(
- model=self.model_id,
- train_dataset=train_dataset,
- eval_dataset=eval_dataset,
- work_dir=tmp_dir,
- cfg_options=cfg_options)
-
- trainer = build_trainer(trainer_name, kwargs)
- trainer.train()
-
- @unittest.skip(
- 'skip since face_2d_keypoints_dataset is set to private for now')
- def test_trainer_single_gpu(self):
- temp_file_dir = tempfile.TemporaryDirectory()
- tmp_dir = temp_file_dir.name
- if not os.path.exists(tmp_dir):
- os.makedirs(tmp_dir)
-
- self._train(tmp_dir)
-
- results_files = os.listdir(tmp_dir)
- json_files = glob.glob(os.path.join(tmp_dir, '*.log.json'))
- self.assertEqual(len(json_files), 1)
- self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
-
- temp_file_dir.cleanup()
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer_hand_2d_keypoints.py b/tests/trainers/easycv/test_easycv_trainer_hand_2d_keypoints.py
deleted file mode 100644
index 270ecbc4..00000000
--- a/tests/trainers/easycv/test_easycv_trainer_hand_2d_keypoints.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import shutil
-import tempfile
-import unittest
-
-import torch
-
-from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import build_trainer
-from modelscope.utils.constant import DownloadMode, LogKeys, Tasks
-from modelscope.utils.logger import get_logger
-from modelscope.utils.test_utils import test_level
-
-
-@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
-class EasyCVTrainerTestHand2dKeypoints(unittest.TestCase):
- model_id = 'damo/cv_hrnetw18_hand-pose-keypoints_coco-wholebody'
-
- def setUp(self):
- self.logger = get_logger()
- self.logger.info(('Testing %s.%s' %
- (type(self).__name__, self._testMethodName)))
- self.tmp_dir = tempfile.TemporaryDirectory().name
- if not os.path.exists(self.tmp_dir):
- os.makedirs(self.tmp_dir)
-
- def tearDown(self):
- super().tearDown()
- shutil.rmtree(self.tmp_dir, ignore_errors=True)
-
- def _train(self):
- cfg_options = {'train.max_epochs': 20}
-
- trainer_name = Trainers.easycv
-
- train_dataset = MsDataset.load(
- dataset_name='cv_hand_2d_keypoints_coco_wholebody',
- namespace='chenhyer',
- split='subtrain',
- download_mode=DownloadMode.FORCE_REDOWNLOAD)
- eval_dataset = MsDataset.load(
- dataset_name='cv_hand_2d_keypoints_coco_wholebody',
- namespace='chenhyer',
- split='subtrain',
- download_mode=DownloadMode.FORCE_REDOWNLOAD)
-
- kwargs = dict(
- model=self.model_id,
- train_dataset=train_dataset,
- eval_dataset=eval_dataset,
- work_dir=self.tmp_dir,
- cfg_options=cfg_options)
-
- trainer = build_trainer(trainer_name, kwargs)
- trainer.train()
-
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_trainer_single_gpu(self):
- self._train()
-
- results_files = os.listdir(self.tmp_dir)
- json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
- self.assertEqual(len(json_files), 1)
- self.assertIn(f'{LogKeys.EPOCH}_10.pth', results_files)
- self.assertIn(f'{LogKeys.EPOCH}_20.pth', results_files)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer_hand_detection.py b/tests/trainers/easycv/test_easycv_trainer_hand_detection.py
deleted file mode 100644
index 60ea1319..00000000
--- a/tests/trainers/easycv/test_easycv_trainer_hand_detection.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import shutil
-import tempfile
-import unittest
-
-import torch
-
-from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import build_trainer
-from modelscope.utils.constant import DownloadMode, LogKeys, Tasks
-from modelscope.utils.logger import get_logger
-from modelscope.utils.test_utils import test_level
-
-
-class EasyCVTrainerTestHandDetection(unittest.TestCase):
- model_id = 'damo/cv_yolox-pai_hand-detection'
-
- def setUp(self):
- self.logger = get_logger()
- self.logger.info(('Testing %s.%s' %
- (type(self).__name__, self._testMethodName)))
-
- def _train(self, tmp_dir):
- cfg_options = {'train.max_epochs': 2}
-
- trainer_name = Trainers.easycv
-
- train_dataset = MsDataset.load(
- dataset_name='hand_detection_dataset', split='subtrain')
- eval_dataset = MsDataset.load(
- dataset_name='hand_detection_dataset', split='subtrain')
-
- kwargs = dict(
- model=self.model_id,
- train_dataset=train_dataset,
- eval_dataset=eval_dataset,
- work_dir=tmp_dir,
- cfg_options=cfg_options)
-
- trainer = build_trainer(trainer_name, kwargs)
- trainer.train()
-
- @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
- def test_trainer_single_gpu(self):
- temp_file_dir = tempfile.TemporaryDirectory()
- tmp_dir = temp_file_dir.name
- if not os.path.exists(tmp_dir):
- os.makedirs(tmp_dir)
-
- self._train(tmp_dir)
-
- results_files = os.listdir(tmp_dir)
- # json_files = glob.glob(os.path.join(tmp_dir, '*.log.json'))
- self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
-
- temp_file_dir.cleanup()
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer_panoptic_mask2former.py b/tests/trainers/easycv/test_easycv_trainer_panoptic_mask2former.py
deleted file mode 100644
index f6a6c41a..00000000
--- a/tests/trainers/easycv/test_easycv_trainer_panoptic_mask2former.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import shutil
-import tempfile
-import unittest
-
-import torch
-from mmcv.runner.hooks import HOOKS as MMCV_HOOKS
-
-from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import build_trainer
-from modelscope.utils.constant import LogKeys, Tasks
-from modelscope.utils.logger import get_logger
-from modelscope.utils.test_utils import test_level
-
-
-@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
-class EasyCVTrainerTestPanopticMask2Former(unittest.TestCase):
-
- def setUp(self):
- self.logger = get_logger()
- self.logger.info(('Testing %s.%s' %
- (type(self).__name__, self._testMethodName)))
- self.tmp_dir = tempfile.TemporaryDirectory().name
- if not os.path.exists(self.tmp_dir):
- os.makedirs(self.tmp_dir)
-
- def tearDown(self):
- super().tearDown()
- shutil.rmtree(self.tmp_dir, ignore_errors=True)
-
- def _train(self):
- cfg_options = {'train.max_epochs': 1}
-
- trainer_name = Trainers.easycv
-
- train_dataset = MsDataset.load(
- dataset_name='COCO2017_panopic_subset', split='train')
- eval_dataset = MsDataset.load(
- dataset_name='COCO2017_panopic_subset', split='validation')
- kwargs = dict(
- model='damo/cv_r50_panoptic-segmentation_cocopan',
- train_dataset=train_dataset,
- eval_dataset=eval_dataset,
- work_dir=self.tmp_dir,
- cfg_options=cfg_options)
-
- trainer = build_trainer(trainer_name, kwargs)
-
- hook_name = 'YOLOXLrUpdaterHook'
- mmcv_hook = MMCV_HOOKS._module_dict.pop(hook_name, None)
-
- trainer.train()
-
- MMCV_HOOKS._module_dict[hook_name] = mmcv_hook
-
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_single_gpu_mask2former_r50(self):
- self._train()
-
- results_files = os.listdir(self.tmp_dir)
- json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
- self.assertEqual(len(json_files), 1)
- self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/tests/trainers/easycv/test_easycv_trainer_realtime_object_detection.py b/tests/trainers/easycv/test_easycv_trainer_realtime_object_detection.py
deleted file mode 100644
index 1171eed4..00000000
--- a/tests/trainers/easycv/test_easycv_trainer_realtime_object_detection.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import shutil
-import tempfile
-import unittest
-
-import torch
-
-from modelscope.hub.snapshot_download import snapshot_download
-from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import build_trainer
-from modelscope.utils.constant import DownloadMode, LogKeys, Tasks
-from modelscope.utils.logger import get_logger
-from modelscope.utils.test_utils import test_level
-
-
-@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
-class EasyCVTrainerTestRealtimeObjectDetection(unittest.TestCase):
- model_id = 'damo/cv_cspnet_image-object-detection_yolox'
-
- def setUp(self):
- self.logger = get_logger()
- self.logger.info(('Testing %s.%s' %
- (type(self).__name__, self._testMethodName)))
-
- def _train(self, tmp_dir):
- # cfg_options = {'train.max_epochs': 2}
- self.cache_path = snapshot_download(self.model_id)
- cfg_options = {
- 'train.max_epochs':
- 2,
- 'train.dataloader.batch_size_per_gpu':
- 4,
- 'evaluation.dataloader.batch_size_per_gpu':
- 2,
- 'train.hooks': [
- {
- 'type': 'CheckpointHook',
- 'interval': 1
- },
- {
- 'type': 'EvaluationHook',
- 'interval': 1
- },
- {
- 'type': 'TextLoggerHook',
- 'ignore_rounding_keys': None,
- 'interval': 2
- },
- ],
- 'load_from':
- os.path.join(self.cache_path, 'pytorch_model.bin')
- }
-
- trainer_name = Trainers.easycv
-
- train_dataset = MsDataset.load(
- dataset_name='small_coco_for_test',
- namespace='EasyCV',
- split='train')
- eval_dataset = MsDataset.load(
- dataset_name='small_coco_for_test',
- namespace='EasyCV',
- split='validation')
-
- kwargs = dict(
- model=self.model_id,
- # model_revision='v1.0.2',
- train_dataset=train_dataset,
- eval_dataset=eval_dataset,
- work_dir=tmp_dir,
- cfg_options=cfg_options)
-
- trainer = build_trainer(trainer_name, kwargs)
- trainer.train()
-
- @unittest.skipUnless(
- test_level() >= 0,
- 'skip since face_2d_keypoints_dataset is set to private for now')
- def test_trainer_single_gpu(self):
- temp_file_dir = tempfile.TemporaryDirectory()
- tmp_dir = temp_file_dir.name
- if not os.path.exists(tmp_dir):
- os.makedirs(tmp_dir)
-
- self._train(tmp_dir)
-
- results_files = os.listdir(tmp_dir)
- json_files = glob.glob(os.path.join(tmp_dir, '*.log.json'))
- self.assertEqual(len(json_files), 1)
- self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
-
- temp_file_dir.cleanup()
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/tests/trainers/easycv/test_segformer.py b/tests/trainers/easycv/test_segformer.py
deleted file mode 100644
index 90a66635..00000000
--- a/tests/trainers/easycv/test_segformer.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import glob
-import os
-import shutil
-import tempfile
-import unittest
-
-import torch
-
-from modelscope.metainfo import Trainers
-from modelscope.msdatasets import MsDataset
-from modelscope.trainers import build_trainer
-from modelscope.utils.constant import LogKeys, Tasks
-from modelscope.utils.logger import get_logger
-from modelscope.utils.test_utils import test_level
-
-
-@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
-class EasyCVTrainerTestSegformer(unittest.TestCase):
-
- def setUp(self):
- self.logger = get_logger()
- self.logger.info(('Testing %s.%s' %
- (type(self).__name__, self._testMethodName)))
- self.tmp_dir = tempfile.TemporaryDirectory().name
- if not os.path.exists(self.tmp_dir):
- os.makedirs(self.tmp_dir)
-
- def tearDown(self):
- super().tearDown()
- shutil.rmtree(self.tmp_dir, ignore_errors=True)
-
- def _train(self):
-
- cfg_options = {
- 'train.max_epochs': 2,
- 'model.decode_head.norm_cfg.type': 'BN'
- }
-
- trainer_name = Trainers.easycv
- train_dataset = MsDataset.load(
- dataset_name='small_coco_stuff164k',
- namespace='EasyCV',
- split='train')
- eval_dataset = MsDataset.load(
- dataset_name='small_coco_stuff164k',
- namespace='EasyCV',
- split='validation')
- kwargs = dict(
- model=
- 'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k',
- train_dataset=train_dataset,
- eval_dataset=eval_dataset,
- work_dir=self.tmp_dir,
- cfg_options=cfg_options)
-
- trainer = build_trainer(trainer_name, kwargs)
- trainer.train()
-
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
- def test_single_gpu_segformer(self):
- self._train()
-
- results_files = os.listdir(self.tmp_dir)
- json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
- self.assertEqual(len(json_files), 1)
- self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
- self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/tests/trainers/hooks/test_lr_scheduler_hook.py b/tests/trainers/hooks/test_lr_scheduler_hook.py
index cd28b055..432fb39a 100644
--- a/tests/trainers/hooks/test_lr_scheduler_hook.py
+++ b/tests/trainers/hooks/test_lr_scheduler_hook.py
@@ -105,6 +105,7 @@ class LrSchedulerHookTest(unittest.TestCase):
train_dataloader = trainer._build_dataloader_with_dataset(
trainer.train_dataset, **trainer.cfg.train.get('dataloader', {}))
trainer.register_optimizers_hook()
+ trainer.register_processors()
trainer._hooks = [
hook for hook in trainer._hooks if hook.__class__.__name__ not in
['CheckpointHook', 'TextLoggerHook', 'IterTimerHook']
@@ -177,6 +178,7 @@ class LrSchedulerHookTest(unittest.TestCase):
train_dataloader = trainer._build_dataloader_with_dataset(
trainer.train_dataset, **trainer.cfg.train.get('dataloader', {}))
trainer.register_optimizers_hook()
+ trainer.register_processors()
trainer._hooks = [
hook for hook in trainer._hooks if hook.__class__.__name__ not in
['CheckpointHook', 'TextLoggerHook', 'IterTimerHook']
@@ -365,6 +367,7 @@ class PlateauLrSchedulerHookTest(unittest.TestCase):
trainer.train_dataloader = train_dataloader
trainer.data_loader = train_dataloader
trainer.register_optimizers_hook()
+ trainer.register_processors()
trainer._hooks = [
hook for hook in trainer._hooks if hook.__class__.__name__ not in
['CheckpointHook', 'TextLoggerHook', 'IterTimerHook']
diff --git a/tests/trainers/hooks/test_optimizer_hook.py b/tests/trainers/hooks/test_optimizer_hook.py
index b9899c36..ed0e202a 100644
--- a/tests/trainers/hooks/test_optimizer_hook.py
+++ b/tests/trainers/hooks/test_optimizer_hook.py
@@ -150,6 +150,7 @@ class TorchAMPOptimizerHookTest(unittest.TestCase):
train_dataloader = trainer._build_dataloader_with_dataset(
trainer.train_dataset, **trainer.cfg.train.get('dataloader', {}))
trainer.register_optimizers_hook()
+ trainer.register_processors()
trainer._hooks = [
hook for hook in trainer._hooks if hook.__class__.__name__ not in
['CheckpointHook', 'TextLoggerHook', 'IterTimerHook']
diff --git a/tests/trainers/model_trainer_map.py b/tests/trainers/model_trainer_map.py
index 4057c331..4e9005f7 100644
--- a/tests/trainers/model_trainer_map.py
+++ b/tests/trainers/model_trainer_map.py
@@ -11,33 +11,18 @@ model_trainer_map = {
['tests/trainers/audio/test_separation_trainer.py'],
'speech_tts/speech_sambert-hifigan_tts_zh-cn_multisp_pretrain_16k':
['tests/trainers/audio/test_tts_trainer.py'],
- 'damo/cv_mobilenet_face-2d-keypoints_alignment':
- ['tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py'],
- 'damo/cv_hrnetw18_hand-pose-keypoints_coco-wholebody':
- ['tests/trainers/easycv/test_easycv_trainer_hand_2d_keypoints.py'],
- 'damo/cv_yolox-pai_hand-detection':
- ['tests/trainers/easycv/test_easycv_trainer_hand_detection.py'],
- 'damo/cv_r50_panoptic-segmentation_cocopan':
- ['tests/trainers/easycv/test_easycv_trainer_panoptic_mask2former.py'],
- 'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k':
- ['tests/trainers/easycv/test_segformer.py'],
'damo/cv_resnet_carddetection_scrfd34gkps':
['tests/trainers/test_card_detection_scrfd_trainer.py'],
- 'damo/multi-modal_clip-vit-base-patch16_zh': [
- 'tests/trainers/test_clip_trainer.py'
- ],
- 'damo/nlp_space_pretrained-dialog-model': [
- 'tests/trainers/test_dialog_intent_trainer.py'
- ],
- 'damo/cv_resnet_facedetection_scrfd10gkps': [
- 'tests/trainers/test_face_detection_scrfd_trainer.py'
- ],
- 'damo/nlp_structbert_faq-question-answering_chinese-base': [
- 'tests/trainers/test_finetune_faq_question_answering.py'
- ],
- 'PAI/nlp_gpt3_text-generation_0.35B_MoE-64': [
- 'tests/trainers/test_finetune_gpt_moe.py'
- ],
+ 'damo/multi-modal_clip-vit-base-patch16_zh':
+ ['tests/trainers/test_clip_trainer.py'],
+ 'damo/nlp_space_pretrained-dialog-model':
+ ['tests/trainers/test_dialog_intent_trainer.py'],
+ 'damo/cv_resnet_facedetection_scrfd10gkps':
+ ['tests/trainers/test_face_detection_scrfd_trainer.py'],
+ 'damo/nlp_structbert_faq-question-answering_chinese-base':
+ ['tests/trainers/test_finetune_faq_question_answering.py'],
+ 'PAI/nlp_gpt3_text-generation_0.35B_MoE-64':
+ ['tests/trainers/test_finetune_gpt_moe.py'],
'damo/nlp_gpt3_text-generation_1.3B': [
'tests/trainers/test_finetune_gpt3.py'
],
@@ -139,6 +124,12 @@ model_trainer_map = {
'damo/nlp_csanmt_translation_en2es': [
'tests/trainers/test_translation_trainer.py'
],
+ 'damo/nlp_unite_mup_translation_evaluation_multilingual_base': [
+ 'tests/trainers/test_translation_evaluation_trainer.py'
+ ],
+ 'damo/nlp_unite_mup_translation_evaluation_multilingual_large': [
+ 'tests/trainers/test_translation_evaluation_trainer.py'
+ ],
'damo/cv_googlenet_pgl-video-summarization': [
'tests/trainers/test_video_summarization_trainer.py'
],
diff --git a/tests/trainers/test_trainer_with_nlp.py b/tests/trainers/test_trainer_with_nlp.py
index ceb04e15..a736d4fa 100644
--- a/tests/trainers/test_trainer_with_nlp.py
+++ b/tests/trainers/test_trainer_with_nlp.py
@@ -9,6 +9,7 @@ import unittest
import numpy as np
import torch
from packaging import version
+from torch.utils.data import RandomSampler
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Metrics
@@ -204,12 +205,20 @@ class TestTrainerWithNlp(unittest.TestCase):
cfg.preprocessor.val['label2id'] = {'0': 0, '1': 1}
cfg.train.dataloader.batch_size_per_gpu = 2
cfg.train.hooks = [{
- 'type': 'BestCkptSaverHook',
- 'interval': 1,
- 'by_epoch': False,
- 'metric_key': 'accuracy',
- 'max_checkpoint_num': 4,
- 'restore_best': True,
+ 'type':
+ 'BestCkptSaverHook',
+ 'interval':
+ 1,
+ 'by_epoch':
+ False,
+ 'output_dir':
+ os.path.join(self.tmp_dir, 'output_test_best'),
+ 'metric_key':
+ 'accuracy',
+ 'max_checkpoint_num':
+ 4,
+ 'restore_best':
+ True,
}, {
'type': 'TextLoggerHook',
'interval': 1
@@ -270,7 +279,7 @@ class TestTrainerWithNlp(unittest.TestCase):
os.path.join(self.tmp_dir, 'output', 'pytorch_model.bin')))
self.assertTrue(
os.path.isfile(
- os.path.join(self.tmp_dir, 'output_best',
+ os.path.join(self.tmp_dir, 'output_test_best',
'pytorch_model.bin')))
md51 = hashlib.md5(
pathlib.Path(
@@ -282,7 +291,7 @@ class TestTrainerWithNlp(unittest.TestCase):
self.assertEqual(md51, md52)
md51 = hashlib.md5(
pathlib.Path(
- os.path.join(self.tmp_dir, 'output_best',
+ os.path.join(self.tmp_dir, 'output_test_best',
'pytorch_model.bin')).read_bytes()).hexdigest()
md52 = hashlib.md5(
pathlib.Path(
@@ -472,6 +481,34 @@ class TestTrainerWithNlp(unittest.TestCase):
cache_path + '/pytorch_model.bin', saving_fn=saving_fn))
self.assertTrue(os.path.isfile(f'{tmp_dir}/predicts.txt'))
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_trainer_with_custom_sampler(self):
+ tmp_dir = tempfile.TemporaryDirectory().name
+ if not os.path.exists(tmp_dir):
+ os.makedirs(tmp_dir)
+
+ model_id = 'damo/nlp_structbert_sentence-similarity_chinese-tiny'
+ cache_path = snapshot_download(model_id)
+ model = SbertForSequenceClassification.from_pretrained(cache_path)
+
+ class CustomSampler(RandomSampler):
+
+ pass
+
+ kwargs = dict(
+ cfg_file=os.path.join(cache_path, ModelFile.CONFIGURATION),
+ model=model,
+ train_dataset=self.dataset,
+ eval_dataset=self.dataset,
+ samplers=CustomSampler(self.dataset),
+ work_dir=self.tmp_dir)
+
+ trainer = build_trainer(default_args=kwargs)
+ trainer.train()
+ self.assertTrue(
+ type(trainer.train_dataloader.sampler) == CustomSampler)
+ self.assertTrue(type(trainer.eval_dataloader.sampler) == CustomSampler)
+
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_trainer_with_prediction(self):
tmp_dir = tempfile.TemporaryDirectory().name
diff --git a/tests/trainers/test_training_args.py b/tests/trainers/test_training_args.py
index 6e4d306e..e8f6d8a2 100644
--- a/tests/trainers/test_training_args.py
+++ b/tests/trainers/test_training_args.py
@@ -1,8 +1,8 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import unittest
-from modelscope.trainers.default_config import DEFAULT_CONFIG
-from modelscope.trainers.training_args import CliArgumentParser, TrainingArgs
+from modelscope import TrainingArgs
+from modelscope.trainers.cli_argument_parser import CliArgumentParser
from modelscope.utils.test_utils import test_level
@@ -29,14 +29,14 @@ class TrainingArgsTest(unittest.TestCase):
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_flatten_args(self):
- cfg = DEFAULT_CONFIG
+ training_args = TrainingArgs()
input_args = [
'--optimizer_params',
'weight_decay=0.8,eps=1e-6,correct_bias=False',
'--lr_scheduler_params', 'initial_lr=3e-5,niter_decay=1'
]
- training_args = TrainingArgs.from_cli(input_args)
- cfg = training_args(cfg)
+ training_args = training_args.parse_cli(input_args)
+ cfg, _ = training_args.to_config()
self.assertAlmostEqual(cfg.train.optimizer.weight_decay, 0.8)
self.assertAlmostEqual(cfg.train.optimizer.eps, 1e-6)
self.assertFalse(cfg.train.optimizer.correct_bias)
diff --git a/tests/trainers/test_translation_evaluation_trainer.py b/tests/trainers/test_translation_evaluation_trainer.py
new file mode 100644
index 00000000..139427da
--- /dev/null
+++ b/tests/trainers/test_translation_evaluation_trainer.py
@@ -0,0 +1,30 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+from modelscope.metainfo import Trainers
+from modelscope.trainers import build_trainer
+from modelscope.utils.test_utils import test_level
+
+
+class TranslationEvaluationTest(unittest.TestCase):
+
+ def setUp(self) -> None:
+ self.name = Trainers.translation_evaluation_trainer
+ self.model_id_large = 'damo/nlp_unite_mup_translation_evaluation_multilingual_large'
+ self.model_id_base = 'damo/nlp_unite_mup_translation_evaluation_multilingual_base'
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_unite_mup_large(self) -> None:
+ default_args = {'model': self.model_id_large}
+ trainer = build_trainer(name=self.name, default_args=default_args)
+ trainer.train()
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_unite_mup_base(self) -> None:
+ default_args = {'model': self.model_id_base}
+ trainer = build_trainer(name=self.name, default_args=default_args)
+ trainer.train()
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/utils/test_input_output.py b/tests/utils/test_input_output.py
new file mode 100644
index 00000000..53b75a39
--- /dev/null
+++ b/tests/utils/test_input_output.py
@@ -0,0 +1,142 @@
+import base64
+import unittest
+
+import json
+
+from modelscope.utils.constant import Tasks
+from modelscope.utils.input_output import (
+ PipelineInfomation, service_base64_input_to_pipeline_input)
+
+
+def encode_image_to_base64(image):
+ base64_str = str(base64.b64encode(image), 'utf-8')
+ return base64_str
+
+
+class PipelineInputOutputTest(unittest.TestCase):
+
+ def test_template_pipeline_dict_input(self):
+ pipeline_info = PipelineInfomation(
+ Tasks.task_template, 'PipelineTemplate',
+ 'modelscope/pipelines/pipeline_template.py')
+ schema = pipeline_info.schema
+ expect_schema = {
+ 'input': {
+ 'type': 'object',
+ 'properties': {
+ 'image': {
+ 'type': 'string',
+ 'description':
+ 'Base64 encoded image file or url string.'
+ },
+ 'text': {
+ 'type': 'string',
+ 'description': 'The input text.'
+ }
+ }
+ },
+ 'parameters': {
+ 'type': 'object',
+ 'properties': {
+ 'max_length': {
+ 'type': 'integer',
+ 'default': 1024
+ },
+ 'top_p': {
+ 'type': 'number',
+ 'default': 0.8
+ },
+ 'postprocess_param1': {
+ 'type': 'string',
+ 'default': None
+ }
+ }
+ },
+ 'output': {
+ 'type': 'object',
+ 'properties': {
+ 'boxes': {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ },
+ 'output_img': {
+ 'type': 'string',
+ 'description': 'The base64 encoded image.'
+ },
+ 'text_embedding': {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ }
+ }
+ }
+ }
+ assert expect_schema == schema
+
+ def test_template_pipeline_list_input(self):
+ pipeline_info = PipelineInfomation(
+ Tasks.text_classification, 'LanguageIdentificationPipeline',
+ 'modelscope/pipelines/nlp/language_identification_pipline.py')
+ schema = pipeline_info.schema
+ expect_schema = {
+ 'input': {
+ 'type': 'object',
+ 'properties': {
+ 'text': {
+ 'type': 'string',
+ 'description': 'The input text.'
+ },
+ 'text2': {
+ 'type': 'string',
+ 'description': 'The input text.'
+ }
+ }
+ },
+ 'parameters': {},
+ 'output': {
+ 'type': 'object',
+ 'properties': {
+ 'scores': {
+ 'type': 'array',
+ 'items': {
+ 'type': 'number'
+ }
+ },
+ 'labels': {
+ 'type': 'array',
+ 'items': {
+ 'type': 'string'
+ }
+ }
+ }
+ }
+ }
+ assert expect_schema == schema
+
+ def test_input_output_encode_decode(self):
+ with open('data/test/images/image_captioning.png', 'rb') as f:
+ image = f.read()
+ text = 'hello schema.'
+ request_json = {
+ 'input': {
+ 'image': encode_image_to_base64(image),
+ 'text': text
+ },
+ 'parameters': {
+ 'max_length': 10000,
+ 'top_p': 0.8
+ }
+ }
+ pipeline_inputs, parameters = service_base64_input_to_pipeline_input(
+ Tasks.task_template, request_json)
+ assert 'image' in pipeline_inputs
+ assert pipeline_inputs['text'] == text
+ assert parameters['max_length'] == 10000
+ assert parameters['top_p'] == 0.8
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tools/convert_megatron_ckpt.py b/tools/convert_megatron_ckpt.py
new file mode 100644
index 00000000..f9b8f8f3
--- /dev/null
+++ b/tools/convert_megatron_ckpt.py
@@ -0,0 +1,31 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import argparse
+import os
+
+from modelscope.models import Model
+from modelscope.utils.megatron_utils import convert_megatron_checkpoint
+
+
+def unwrap_model(model):
+ for name in ('model', 'module', 'dist_model'):
+ while hasattr(model, name):
+ model = getattr(model, name)
+ return model
+
+
+parser = argparse.ArgumentParser(
+ description='Split or merge your megatron_based checkpoint.')
+parser.add_argument(
+ '--model_dir', type=str, required=True, help='Checkpoint to be converted.')
+parser.add_argument(
+ '--target_dir', type=str, required=True, help='Target save path.')
+args = parser.parse_args()
+
+model = Model.from_pretrained(
+ args.model_dir,
+ rank=int(os.getenv('RANK')),
+ megatron_cfg={'tensor_model_parallel_size': int(os.getenv('WORLD_SIZE'))})
+unwrapped_model = unwrap_model(model)
+
+convert_megatron_checkpoint(unwrapped_model, model.model_dir, args.target_dir)
diff --git a/tools/convert_megatron_ckpt.sh b/tools/convert_megatron_ckpt.sh
new file mode 100644
index 00000000..86e94877
--- /dev/null
+++ b/tools/convert_megatron_ckpt.sh
@@ -0,0 +1,7 @@
+TARGET_TENSOR_MODEL_PARALLEL_SIZE=1
+ORIGIN_MODEL='damo/nlp_gpt3_text-generation_1.3B'
+TARGET_DIR='./target'
+
+torchrun --nproc_per_node $TARGET_TENSOR_MODEL_PARALLEL_SIZE tools/convert_megatron_ckpt.py \
+ --model_dir $ORIGIN_MODEL \
+ --target_dir $TARGET_DIR \