diff --git a/README.md b/README.md index 68f03744..c9b071ab 100644 --- a/README.md +++ b/README.md @@ -108,9 +108,9 @@ Audio: * [speech_charctc_kws_phone-xiaoyun](https://modelscope.cn/models/damo/speech_charctc_kws_phone-xiaoyun) * [u2pp_conformer-asr-cn-16k-online](https://modelscope.cn/models/wenet/u2pp_conformer-asr-cn-16k-online) - + * [speech_fsmn_vad_zh-cn-16k-common-pytorch](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) - + * [punc_ct-transformer_zh-cn-common-vocab272727-pytorch](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/summary) * [speech_frcrn_ans_cirm_16k](https://modelscope.cn/models/damo/speech_frcrn_ans_cirm_16k) diff --git a/examples/pytorch/image_classification/finetune_image_classification.py b/examples/pytorch/image_classification/finetune_image_classification.py index 4e96c2cd..e5bb9bdd 100644 --- a/examples/pytorch/image_classification/finetune_image_classification.py +++ b/examples/pytorch/image_classification/finetune_image_classification.py @@ -1,13 +1,12 @@ import os from dataclasses import dataclass, field +from modelscope import MsDataset, TrainingArgs from modelscope.metainfo import Trainers -from modelscope.msdatasets.ms_dataset import MsDataset from modelscope.trainers.builder import build_trainer -from modelscope.trainers.training_args import TrainingArgs -@dataclass +@dataclass(init=False) class ImageClassificationTrainingArgs(TrainingArgs): num_classes: int = field( default=None, @@ -46,26 +45,35 @@ def create_dataset(name, split): dataset_name, namespace=namespace, subset_name='default', split=split) -def train(): - args = ImageClassificationTrainingArgs.from_cli( - model='damo/cv_vit-base_image-classification_ImageNet-labels', - max_epochs=1, - lr=1e-4, - optimizer='AdamW', - warmup_iters=1, - topk=(1, )) - if args.dataset_name is not None: - train_dataset = create_dataset(args.dataset_name, split='train') - val_dataset = create_dataset(args.dataset_name, split='validation') +training_args = ImageClassificationTrainingArgs( + model='damo/cv_vit-base_image-classification_ImageNet-labels', + max_epochs=1, + lr=1e-4, + optimizer='AdamW', + warmup_iters=1, + topk=(1, )).parse_cli() +config, args = training_args.to_config() + + +def cfg_modify_fn(cfg): + if args.use_model_config: + cfg.merge_from_dict(config) else: - train_dataset = create_dataset(args.train_dataset_name, split='train') - val_dataset = create_dataset(args.val_dataset_name, split='validation') + cfg = config + return cfg + + +def train(): + train_dataset = create_dataset( + training_args.train_dataset_name, split=training_args.train_split) + val_dataset = create_dataset( + training_args.val_dataset_name, split=training_args.val_split) kwargs = dict( model=args.model, # model id train_dataset=train_dataset, # training dataset eval_dataset=val_dataset, # validation dataset - cfg_modify_fn=args # callback to modify configuration + cfg_modify_fn=cfg_modify_fn # callback to modify configuration ) # in distributed training, specify pytorch launcher diff --git a/examples/pytorch/image_classification/run_train.sh b/examples/pytorch/image_classification/run_train.sh index 5a7b3a09..ad560424 100644 --- a/examples/pytorch/image_classification/run_train.sh +++ b/examples/pytorch/image_classification/run_train.sh @@ -2,4 +2,7 @@ PYTHONPATH=. python -m torch.distributed.launch --nproc_per_node=2 \ examples/pytorch/image_classification/finetune_image_classification.py \ --num_classes 2 \ --train_dataset_name 'tany0699/cats_and_dogs' \ - --val_dataset_name 'tany0699/cats_and_dogs' + --val_dataset_name 'tany0699/cats_and_dogs' \ + --train_split train \ + --val_split validation \ + --use_model_config true \ diff --git a/examples/pytorch/multi_modal_embedding/finetune_multi_modal_embedding.py b/examples/pytorch/multi_modal_embedding/finetune_multi_modal_embedding.py index cc7da842..7b4cfbb8 100644 --- a/examples/pytorch/multi_modal_embedding/finetune_multi_modal_embedding.py +++ b/examples/pytorch/multi_modal_embedding/finetune_multi_modal_embedding.py @@ -1,15 +1,13 @@ import os from dataclasses import dataclass, field -from functools import partial +from modelscope import MsDataset, TrainingArgs from modelscope.metainfo import Trainers -from modelscope.msdatasets import MsDataset from modelscope.trainers import build_trainer -from modelscope.trainers.training_args import (TrainingArgs, get_flatten_value, - set_flatten_value) +from modelscope.trainers.training_args import set_flatten_value -@dataclass +@dataclass(init=False) class MultiModalEmbeddingArguments(TrainingArgs): trainer: str = field( @@ -17,6 +15,12 @@ class MultiModalEmbeddingArguments(TrainingArgs): 'help': 'The trainer used', }) + work_dir: str = field( + default='./tmp', + metadata={ + 'help': 'The working path for saving checkpoint', + }) + use_fp16: bool = field( default=None, metadata={ @@ -35,7 +39,6 @@ class MultiModalEmbeddingArguments(TrainingArgs): default=None, metadata={ 'cfg_node': 'train.optimizer_hparams', - 'cfg_getter': partial(get_flatten_value, exclusions=['lr']), 'cfg_setter': set_flatten_value, 'help': 'The optimizer init params except `lr`', }) @@ -51,7 +54,6 @@ class MultiModalEmbeddingArguments(TrainingArgs): default=None, metadata={ 'cfg_node': 'dataset.column_map', - 'cfg_getter': get_flatten_value, 'cfg_setter': set_flatten_value, 'help': 'The column map for dataset', }) @@ -67,7 +69,6 @@ class MultiModalEmbeddingArguments(TrainingArgs): default=None, metadata={ 'cfg_node': 'train.lr_scheduler_hook', - 'cfg_getter': get_flatten_value, 'cfg_setter': set_flatten_value, 'help': 'The parameters for lr scheduler hook', }) @@ -76,7 +77,6 @@ class MultiModalEmbeddingArguments(TrainingArgs): default=None, metadata={ 'cfg_node': 'train.optimizer_hook', - 'cfg_getter': get_flatten_value, 'cfg_setter': set_flatten_value, 'help': 'The parameters for optimizer hook', }) @@ -92,23 +92,28 @@ class MultiModalEmbeddingArguments(TrainingArgs): 'help': 'The data parallel world size', }) - def __call__(self, config): - config = super().__call__(config) - config.merge_from_dict({'pretrained_model.model_name': self.model}) - if self.clip_clamp: - config.train.hooks.append({'type': 'ClipClampLogitScaleHook'}) - if self.world_size > 1: - config.train.launcher = 'pytorch' - return config + +config, args = MultiModalEmbeddingArguments().parse_cli().to_config() +print(config, args) -args = MultiModalEmbeddingArguments.from_cli(task='multi-modal-embedding') -print(args) +def cfg_modify_fn(cfg): + if args.use_model_config: + cfg.merge_from_dict(config) + else: + cfg = config + cfg.merge_from_dict({'pretrained_model.model_name': args.model}) + if args.clip_clamp: + cfg.train.hooks.append({'type': 'ClipClampLogitScaleHook'}) + if args.world_size > 1: + cfg.train.launcher = 'pytorch' + return cfg + train_dataset = MsDataset.load( - args.dataset_name, namespace='modelscope', split='train') + args.train_dataset_name, namespace='modelscope', split='train') eval_dataset = MsDataset.load( - args.dataset_name, namespace='modelscope', split='validation') + args.train_dataset_name, namespace='modelscope', split='validation') os.makedirs(args.work_dir, exist_ok=True) kwargs = dict( @@ -116,6 +121,6 @@ kwargs = dict( train_dataset=train_dataset, eval_dataset=eval_dataset, work_dir=args.work_dir, - cfg_modify_fn=args) + cfg_modify_fn=cfg_modify_fn) trainer = build_trainer(name=args.trainer, default_args=kwargs) trainer.train() diff --git a/examples/pytorch/multi_modal_embedding/run_train.sh b/examples/pytorch/multi_modal_embedding/run_train.sh index 89eef73e..3974405b 100644 --- a/examples/pytorch/multi_modal_embedding/run_train.sh +++ b/examples/pytorch/multi_modal_embedding/run_train.sh @@ -6,14 +6,16 @@ PYTHONPATH=. torchrun --nproc_per_node $DATA_PARALLEL_SIZE \ --trainer 'clip-multi-modal-embedding' \ --work_dir './workspace/ckpts/clip' \ --model 'damo/multi-modal_clip-vit-base-patch16_zh' \ - --dataset_name 'muge' \ + --train_dataset_name 'muge' \ --dataset_column_map 'img=image,text=query' \ --max_epochs 1 \ --use_fp16 true \ --per_device_train_batch_size 180 \ + --train_data_worker 0 \ --train_shuffle true \ --train_drop_last true \ --per_device_eval_batch_size 128 \ + --eval_data_worker 0 \ --eval_shuffle true \ --eval_drop_last true \ --save_ckpt_best true \ @@ -33,3 +35,4 @@ PYTHONPATH=. torchrun --nproc_per_node $DATA_PARALLEL_SIZE \ --optimizer_hook 'type=TorchAMPOptimizerHook,cumulative_iters=1,loss_keys=loss' \ --clip_clamp true \ --world_size $DATA_PARALLEL_SIZE \ + --use_model_config true \ diff --git a/examples/pytorch/stable_diffusion/finetune_stable_diffusion.py b/examples/pytorch/stable_diffusion/finetune_stable_diffusion.py index bd05097d..28ba853c 100644 --- a/examples/pytorch/stable_diffusion/finetune_stable_diffusion.py +++ b/examples/pytorch/stable_diffusion/finetune_stable_diffusion.py @@ -4,30 +4,32 @@ from modelscope.msdatasets import MsDataset from modelscope.trainers import EpochBasedTrainer, build_trainer from modelscope.trainers.training_args import TrainingArgs - -@dataclass -class StableDiffusionArguments(TrainingArgs): - - def __call__(self, config): - config = super().__call__(config) - config.train.lr_scheduler.T_max = self.max_epochs - config.model.inference = False - return config - - -args = StableDiffusionArguments.from_cli(task='efficient-diffusion-tuning') +training_args = TrainingArgs(task='efficient-diffusion-tuning').parse_cli() +config, args = training_args.to_config() print(args) -dataset = MsDataset.load(args.dataset_name, namespace=args.namespace) +dataset = MsDataset.load( + args.train_dataset_name, namespace=args.train_dataset_namespace) train_dataset = dataset['train'] validation_dataset = dataset['validation'] + +def cfg_modify_fn(cfg): + if args.use_model_config: + cfg.merge_from_dict(config) + else: + cfg = config + cfg.train.lr_scheduler.T_max = training_args.max_epochs + cfg.model.inference = False + return cfg + + kwargs = dict( - model=args.model, - work_dir=args.work_dir, + model=training_args.model, + work_dir=training_args.work_dir, train_dataset=train_dataset, eval_dataset=validation_dataset, - cfg_modify_fn=args) + cfg_modify_fn=cfg_modify_fn) trainer: EpochBasedTrainer = build_trainer(name='trainer', default_args=kwargs) trainer.train() diff --git a/examples/pytorch/stable_diffusion/run_train.sh b/examples/pytorch/stable_diffusion/run_train.sh index c8bfa26c..0e551942 100644 --- a/examples/pytorch/stable_diffusion/run_train.sh +++ b/examples/pytorch/stable_diffusion/run_train.sh @@ -1,11 +1,12 @@ PYTHONPATH=. torchrun examples/pytorch/stable_diffusion/finetune_stable_diffusion.py \ --model 'damo/multi-modal_efficient-diffusion-tuning-lora' \ --work_dir './tmp/stable_diffusion_tuning' \ - --namespace 'damo' \ - --dataset_name 'buptwq/lora-stable-diffusion-finetune-dog' \ - --max_epochs 150 \ + --train_dataset_namespace 'damo' \ + --train_dataset_name 'controlnet_dataset_condition_fill50k' \ + --max_epochs 1 \ --save_ckpt_strategy 'by_epoch' \ --logging_interval 100 \ --train.dataloader.workers_per_gpu 0 \ --evaluation.dataloader.workers_per_gpu 0 \ - --train.optimizer.lr 1e-4 + --train.optimizer.lr 1e-5 \ + --use_model_config true diff --git a/examples/pytorch/text_classification/finetune_text_classification.py b/examples/pytorch/text_classification/finetune_text_classification.py index 7747bc25..dfcb7b4d 100644 --- a/examples/pytorch/text_classification/finetune_text_classification.py +++ b/examples/pytorch/text_classification/finetune_text_classification.py @@ -1,26 +1,18 @@ import os from dataclasses import dataclass, field -from modelscope.msdatasets import MsDataset -from modelscope.trainers import EpochBasedTrainer, build_trainer -from modelscope.trainers.training_args import TrainingArgs +from modelscope import (EpochBasedTrainer, MsDataset, TrainingArgs, + build_dataset_from_file) +from modelscope.trainers import build_trainer -def get_labels(cfg, metadata): - label2id = cfg.safe_get(metadata['cfg_node']) - if label2id is not None: - return ','.join(label2id.keys()) - - -def set_labels(cfg, labels, metadata): +def set_labels(labels): if isinstance(labels, str): labels = labels.split(',') - cfg.merge_from_dict( - {metadata['cfg_node']: {label: id - for id, label in enumerate(labels)}}) + return {label: id for id, label in enumerate(labels)} -@dataclass +@dataclass(init=False) class TextClassificationArguments(TrainingArgs): first_sequence: str = field( @@ -49,7 +41,6 @@ class TextClassificationArguments(TrainingArgs): metadata={ 'help': 'The labels of the dataset', 'cfg_node': 'preprocessor.label2id', - 'cfg_getter': get_labels, 'cfg_setter': set_labels, }) @@ -60,30 +51,39 @@ class TextClassificationArguments(TrainingArgs): 'cfg_node': 'preprocessor.type' }) - def __call__(self, config): - config = super().__call__(config) - config.model['num_labels'] = len(self.labels) - if config.train.lr_scheduler.type == 'LinearLR': - config.train.lr_scheduler['total_iters'] = \ - int(len(train_dataset) / self.per_device_train_batch_size) * self.max_epochs - return config + +config, args = TextClassificationArguments().parse_cli().to_config() + +print(config, args) -args = TextClassificationArguments.from_cli( - task='text-classification', eval_metrics='seq-cls-metric') +def cfg_modify_fn(cfg): + if args.use_model_config: + cfg.merge_from_dict(config) + else: + cfg = config + cfg.model['num_labels'] = len(cfg.preprocessor.label2id) + if cfg.train.lr_scheduler.type == 'LinearLR': + cfg.train.lr_scheduler['total_iters'] = \ + int(len(train_dataset) / cfg.train.dataloader.batch_size_per_gpu) * cfg.train.max_epochs + return cfg -print(args) -dataset = MsDataset.load(args.dataset_name, subset_name=args.subset_name) -train_dataset = dataset['train'] -validation_dataset = dataset['validation'] +if args.dataset_json_file is None: + dataset = MsDataset.load( + args.train_dataset_name, subset_name=args.train_subset_name) + train_dataset = dataset['train'] + validation_dataset = dataset['validation'] +else: + train_dataset, validation_dataset = build_dataset_from_file( + args.dataset_json_file) kwargs = dict( model=args.model, train_dataset=train_dataset, eval_dataset=validation_dataset, seed=args.seed, - cfg_modify_fn=args) + cfg_modify_fn=cfg_modify_fn) os.environ['LOCAL_RANK'] = str(args.local_rank) trainer: EpochBasedTrainer = build_trainer(name='trainer', default_args=kwargs) diff --git a/examples/pytorch/text_classification/run_train.sh b/examples/pytorch/text_classification/run_train.sh index 93c23d0d..e91a9996 100644 --- a/examples/pytorch/text_classification/run_train.sh +++ b/examples/pytorch/text_classification/run_train.sh @@ -1,12 +1,16 @@ PYTHONPATH=. python examples/pytorch/text_classification/finetune_text_classification.py \ + --task 'text-classification' \ --model 'damo/nlp_structbert_backbone_base_std' \ - --dataset_name 'clue' \ - --subset_name 'tnews' \ + --train_dataset_name 'clue' \ + --train_subset_name 'tnews' \ --first_sequence 'sentence' \ --preprocessor.label label \ --model.num_labels 15 \ --labels '0,1,2,3,4,5,6,7,8,9,10,11,12,13,14' \ --preprocessor 'sen-cls-tokenizer' \ + --use_model_config True \ + --max_epochs 1 \ --train.dataloader.workers_per_gpu 0 \ --evaluation.dataloader.workers_per_gpu 0 \ --train.optimizer.lr 1e-5 \ + --eval_metrics 'seq-cls-metric' \ diff --git a/examples/pytorch/text_generation/finetune_text_generation.py b/examples/pytorch/text_generation/finetune_text_generation.py index 7a140a0c..a89970e8 100644 --- a/examples/pytorch/text_generation/finetune_text_generation.py +++ b/examples/pytorch/text_generation/finetune_text_generation.py @@ -1,12 +1,11 @@ from dataclasses import dataclass, field +from modelscope import EpochBasedTrainer, MsDataset, TrainingArgs from modelscope.metainfo import Trainers -from modelscope.msdatasets import MsDataset -from modelscope.trainers import EpochBasedTrainer, build_trainer -from modelscope.trainers.training_args import TrainingArgs +from modelscope.trainers import build_trainer -@dataclass +@dataclass(init=False) class TextGenerationArguments(TrainingArgs): trainer: str = field( @@ -67,30 +66,35 @@ class TextGenerationArguments(TrainingArgs): 'help': 'Whether to use MegatronHook', }) - def __call__(self, config): - config = super().__call__(config) - if config.train.lr_scheduler.type == 'noam': - config.train.lr_scheduler = { - 'type': 'LambdaLR', - 'lr_lambda': noam_lambda, - 'options': { - 'by_epoch': False - } - } - if self.use_megatron: - config.train.hooks.append({'type': 'MegatronHook'}) - return config - def noam_lambda(current_step: int): current_step += 1 return min(current_step**(-0.5), current_step * 100**(-1.5)) -args = TextGenerationArguments.from_cli(task='text-generation') -print(args) +config, args = TextGenerationArguments().parse_cli().to_config() +print(config, args) -dataset = MsDataset.load(args.dataset_name) + +def cfg_modify_fn(cfg): + if args.use_model_config: + cfg.merge_from_dict(config) + else: + cfg = config + if cfg.train.lr_scheduler.type == 'noam': + cfg.train.lr_scheduler = { + 'type': 'LambdaLR', + 'lr_lambda': noam_lambda, + 'options': { + 'by_epoch': False + } + } + if args.use_megatron: + cfg.train.hooks.append({'type': 'MegatronHook'}) + return cfg + + +dataset = MsDataset.load(args.train_dataset_name) train_dataset = dataset['train'] eval_dataset = dataset['validation' if 'validation' in dataset else 'test'] @@ -100,7 +104,7 @@ kwargs = dict( eval_dataset=eval_dataset, seed=args.seed, work_dir=args.work_dir, - cfg_modify_fn=args) + cfg_modify_fn=cfg_modify_fn) trainer: EpochBasedTrainer = build_trainer( name=args.trainer, default_args=kwargs) diff --git a/examples/pytorch/text_generation/run_train_gpt3.sh b/examples/pytorch/text_generation/run_train_gpt3.sh index a20a5bb2..fd37b42c 100644 --- a/examples/pytorch/text_generation/run_train_gpt3.sh +++ b/examples/pytorch/text_generation/run_train_gpt3.sh @@ -8,7 +8,7 @@ PYTHONPATH=. torchrun --nproc_per_node $WORLD_SIZE examples/pytorch/text_generat --trainer 'nlp-gpt3-trainer' \ --work_dir './tmp' \ --model 'damo/nlp_gpt3_text-generation_1.3B' \ - --dataset_name 'chinese-poetry-collection' \ + --train_dataset_name 'chinese-poetry-collection' \ --preprocessor 'text-gen-jieba-tokenizer' \ --src_txt 'text1' \ --tgt_txt 'text2' \ @@ -20,4 +20,5 @@ PYTHONPATH=. torchrun --nproc_per_node $WORLD_SIZE examples/pytorch/text_generat --world_size $WORLD_SIZE \ --tensor_model_parallel_size $TENSOR_MODEL_PARALLEL_SIZE \ --use_megatron true \ - # --dataset_name 'DuReader_robust-QG' \ # input&output + --use_model_config true \ + # --train_dataset_name 'DuReader_robust-QG' \ # input&output diff --git a/examples/pytorch/text_generation/run_train_mt5.sh b/examples/pytorch/text_generation/run_train_mt5.sh new file mode 100644 index 00000000..6d032d6e --- /dev/null +++ b/examples/pytorch/text_generation/run_train_mt5.sh @@ -0,0 +1,13 @@ +PYTHONPATH=. torchrun examples/pytorch/text_generation/finetune_text_generation.py \ + --trainer 'text-generation-trainer' \ + --work_dir './tmp' \ + --task 'text2text-generation' \ + --model 'damo/nlp_mt5_zero-shot-augment_chinese-base' \ + --train_dataset_name 'DuReader_robust-QG' \ + --src_txt 'text1' \ + --tgt_txt 'text2' \ + --max_epochs 1 \ + --use_model_config True \ + --per_device_train_batch_size 8 \ + --lr 1e-3 \ + --lr_scheduler 'noam' \ diff --git a/examples/pytorch/text_generation/run_train_palm.sh b/examples/pytorch/text_generation/run_train_palm.sh index ff88ce7d..68b9e89d 100644 --- a/examples/pytorch/text_generation/run_train_palm.sh +++ b/examples/pytorch/text_generation/run_train_palm.sh @@ -2,10 +2,11 @@ PYTHONPATH=. torchrun examples/pytorch/text_generation/finetune_text_generation. --trainer 'text-generation-trainer' \ --work_dir './tmp' \ --model 'damo/nlp_palm2.0_pretrained_chinese-base' \ - --dataset_name 'DuReader_robust-QG' \ + --train_dataset_name 'DuReader_robust-QG' \ --src_txt 'text1' \ --tgt_txt 'text2' \ - --max_epochs 15 \ + --max_epochs 1 \ + --use_model_config True \ --per_device_train_batch_size 8 \ --lr 1e-3 \ --lr_scheduler 'noam' \ diff --git a/examples/pytorch/token_classification/finetune_token_classification.py b/examples/pytorch/token_classification/finetune_token_classification.py index cf51ed22..3f9de791 100644 --- a/examples/pytorch/token_classification/finetune_token_classification.py +++ b/examples/pytorch/token_classification/finetune_token_classification.py @@ -1,20 +1,22 @@ from dataclasses import dataclass, field -from modelscope.metainfo import Trainers -from modelscope.msdatasets import MsDataset -from modelscope.trainers import build_trainer -from modelscope.trainers.training_args import (TrainingArgs, get_flatten_value, - set_flatten_value) +from modelscope import (EpochBasedTrainer, MsDataset, TrainingArgs, + build_dataset_from_file) -@dataclass +@dataclass(init=False) class TokenClassificationArguments(TrainingArgs): - trainer: str = field( - default=Trainers.default, metadata={ + default=None, metadata={ 'help': 'The trainer used', }) + work_dir: str = field( + default='./tmp', + metadata={ + 'help': 'The working path for saving checkpoint', + }) + preprocessor: str = field( default=None, metadata={ @@ -29,60 +31,99 @@ class TokenClassificationArguments(TrainingArgs): 'cfg_node': 'preprocessor.padding' }) - train_dataset_params: str = field( + mode: str = field( + default='inference', + metadata={ + 'help': 'The preprocessor padding', + 'cfg_node': 'preprocessor.mode' + }) + + first_sequence: str = field( default=None, metadata={ - 'cfg_node': 'dataset.train', - 'cfg_getter': get_flatten_value, - 'cfg_setter': set_flatten_value, + 'cfg_node': 'preprocessor.first_sequence', 'help': 'The parameters for train dataset', }) - def __call__(self, config): - config = super().__call__(config) - if config.safe_get('dataset.train.label') == 'ner_tags': - ner_tags_labels = train_dataset['ner_tags'] + eval_dataset[ - 'ner_tags'] - label_enumerate_values = self._get_label_list(ner_tags_labels) - config.merge_from_dict( - {'dataset.train.labels': label_enumerate_values}) - if config.train.lr_scheduler.type == 'LinearLR': - config.train.lr_scheduler['total_iters'] = \ - int(len(train_dataset) / self.per_device_train_batch_size) * self.max_epochs - return config + label: str = field( + default=None, + metadata={ + 'cfg_node': 'preprocessor.label', + 'help': 'The parameters for train dataset', + }) - # TODO: Future performance optimization in MsDataset - @staticmethod - def _get_label_list(labels): - unique_labels = set() - for label in labels: - unique_labels = unique_labels | set(label) - label_list = list(unique_labels) - label_list.sort() - return label_list + sequence_length: int = field( + default=128, + metadata={ + 'cfg_node': 'preprocessor.sequence_length', + 'help': 'The parameters for train dataset', + }) -args = TokenClassificationArguments.from_cli(task='token-classification') +training_args = TokenClassificationArguments().parse_cli() +config, args = training_args.to_config() print(args) -# load dataset -train_dataset = MsDataset.load( - args.dataset_name, - subset_name=args.subset_name, - split='train', - namespace='damo')['train'] -eval_dataset = MsDataset.load( - args.dataset_name, - subset_name=args.subset_name, - split='validation', - namespace='damo')['validation'] + +def get_label_list(labels): + unique_labels = set() + for label in labels: + unique_labels = unique_labels | set(label) + label_list = list(unique_labels) + label_list.sort() + return label_list + + +def cfg_modify_fn(cfg): + if args.use_model_config: + cfg.merge_from_dict(config) + else: + cfg = config + labels = train_dataset[training_args.label] + validation_dataset[ + training_args.label] + label_enumerate_values = get_label_list(labels) + cfg.merge_from_dict({ + 'preprocessor.label2id': + {label: id + for id, label in enumerate(label_enumerate_values)} + }) + cfg.merge_from_dict({'model.num_labels': len(label_enumerate_values)}) + cfg.merge_from_dict({'preprocessor.use_fast': True}) + cfg.merge_from_dict({ + 'evaluation.metrics': { + 'type': 'token-cls-metric', + 'label2id': + {label: id + for id, label in enumerate(label_enumerate_values)} + } + }) + if cfg.train.lr_scheduler.type == 'LinearLR': + cfg.train.lr_scheduler['total_iters'] = \ + int(len(train_dataset) / cfg.train.dataloader.batch_size_per_gpu) * cfg.train.max_epochs + return cfg + + +if args.dataset_json_file is None: + train_dataset = MsDataset.load( + args.train_dataset_name, + subset_name=args.train_subset_name, + split='train', + namespace=args.train_dataset_namespace)['train'] + validation_dataset = MsDataset.load( + args.train_dataset_name, + subset_name=args.train_subset_name, + split='validation', + namespace=args.train_dataset_namespace)['validation'] +else: + train_dataset, validation_dataset = build_dataset_from_file( + args.dataset_json_file) kwargs = dict( model=args.model, train_dataset=train_dataset, - eval_dataset=eval_dataset, + eval_dataset=validation_dataset, work_dir=args.work_dir, - cfg_modify_fn=args) + cfg_modify_fn=cfg_modify_fn) -trainer = build_trainer(name=args.trainer, default_args=kwargs) +trainer = EpochBasedTrainer(**kwargs) trainer.train() diff --git a/examples/pytorch/token_classification/run_train_mgeo.sh b/examples/pytorch/token_classification/run_train_mgeo.sh index f80af84f..1e384ec5 100644 --- a/examples/pytorch/token_classification/run_train_mgeo.sh +++ b/examples/pytorch/token_classification/run_train_mgeo.sh @@ -1,15 +1,22 @@ -PYTHONPATH=. torchrun examples/pytorch/token_classification/finetune_token_classification.py \ +PYTHONPATH=. python examples/pytorch/token_classification/finetune_token_classification.py \ + --task 'token-classification' \ --trainer 'nlp-base-trainer' \ --work_dir './tmp' \ --model 'damo/mgeo_backbone_chinese_base' \ - --dataset_name 'GeoGLUE' \ - --subset_name 'GeoETA' \ - --train_dataset_params 'first_sequence=tokens,label=ner_tags,sequence_length=128' \ + --train_dataset_name 'GeoGLUE' \ + --train_subset_name 'GeoETA' \ + --train_dataset_namespace 'damo' \ + --first_sequence 'tokens' \ + --eval_strategy by_step \ + --eval_interval 10 \ + --label 'ner_tags' \ + --sequence_length 128 \ --preprocessor 'token-cls-tokenizer' \ --preprocessor_padding 'max_length' \ --max_epochs 1 \ + --mode 'inference' \ + --use_model_config True \ --per_device_train_batch_size 32 \ + --train_data_worker 0 \ + --eval_data_worker 0 \ --lr 3e-5 \ - --save_ckpt_strategy 'by_epoch' \ - --logging_interval 100 \ - --eval_strategy 'by_epoch' \ diff --git a/examples/pytorch/token_classification/run_train_structbert.sh b/examples/pytorch/token_classification/run_train_structbert.sh index 28967f60..a44c4519 100644 --- a/examples/pytorch/token_classification/run_train_structbert.sh +++ b/examples/pytorch/token_classification/run_train_structbert.sh @@ -1,16 +1,22 @@ -PYTHONPATH=. torchrun examples/pytorch/token_classification/finetune_token_classification.py \ +PYTHONPATH=. python examples/pytorch/token_classification/finetune_token_classification.py \ + --task 'token-classification' \ --trainer 'nlp-base-trainer' \ --work_dir './tmp' \ --model 'damo/nlp_structbert_backbone_base_std' \ - --dataset_name 'GeoGLUE' \ - --subset_name 'GeoETA' \ - --train_dataset_params 'first_sequence=tokens,label=ner_tags,sequence_length=128' \ + --train_dataset_name 'GeoGLUE' \ + --train_subset_name 'GeoETA' \ + --train_dataset_namespace 'damo' \ + --first_sequence 'tokens' \ + --eval_strategy by_step \ + --eval_interval 20 \ + --label 'ner_tags' \ + --sequence_length 128 \ --preprocessor 'token-cls-tokenizer' \ --preprocessor_padding 'max_length' \ --max_epochs 2 \ + --mode 'inference' \ + --use_model_config True \ --per_device_train_batch_size 32 \ + --train_data_worker 0 \ + --eval_data_worker 0 \ --lr 3e-5 \ - --save_ckpt_strategy 'by_epoch' \ - --logging_interval 1 \ - --eval_strategy 'by_step' \ - --eval_interval 20 \ diff --git a/examples/pytorch/transformers/configuration.json b/examples/pytorch/transformers/configuration.json deleted file mode 100644 index df6a73c8..00000000 --- a/examples/pytorch/transformers/configuration.json +++ /dev/null @@ -1 +0,0 @@ -{"framework":"pytorch","train":{"work_dir":"/tmp","max_epochs":10,"dataloader":{"batch_size_per_gpu":16,"workers_per_gpu":0},"optimizer":{"type":"SGD","lr":0.001},"lr_scheduler":{"type":"StepLR","step_size":2},"hooks":[{"type":"CheckpointHook","interval":1}]},"evaluation":{"dataloader":{"batch_size_per_gpu":16,"workers_per_gpu":0,"shuffle":false}}} diff --git a/examples/pytorch/transformers/finetune_transformers_model.py b/examples/pytorch/transformers/finetune_transformers_model.py index bbfb807a..5110f751 100644 --- a/examples/pytorch/transformers/finetune_transformers_model.py +++ b/examples/pytorch/transformers/finetune_transformers_model.py @@ -5,11 +5,11 @@ from datasets import load_dataset from transformers import (BertForSequenceClassification, BertTokenizerFast, default_data_collator) +from modelscope import TrainingArgs from modelscope.trainers import EpochBasedTrainer, build_trainer -from modelscope.trainers.default_config import DEFAULT_CONFIG, TrainingArgs -@dataclass +@dataclass(init=False) class TransformersArguments(TrainingArgs): num_labels: int = field( @@ -17,13 +17,27 @@ class TransformersArguments(TrainingArgs): 'help': 'The number of labels', }) + sentence: str = field( + default=None, metadata={ + 'help': 'The sentence key', + }) -args = TransformersArguments.from_cli( - task='text-classification', eval_metrics='seq-cls-metric') + label: str = field( + default=None, metadata={ + 'help': 'The label key', + }) -print(args) -dataset = load_dataset(args.dataset_name, args.subset_name) +training_args = TransformersArguments( + task='text-classification', eval_metrics='seq-cls-metric').parse_cli() +config, args = training_args.to_config() + +print(config, args) + +train_dataset = load_dataset( + args.train_dataset_name, args.train_subset_name, split=args.train_split) +val_dataset = load_dataset( + args.val_dataset_name, args.val_subset_name, split=args.val_split) model = BertForSequenceClassification.from_pretrained( args.model, num_labels=args.num_labels) @@ -31,26 +45,30 @@ tokenizer = BertTokenizerFast.from_pretrained(args.model) def tokenize_sentence(row): - return tokenizer(row['sentence'], padding='max_length', max_length=128) + return tokenizer( + row[training_args.sentence], padding='max_length', max_length=128) # Extra columns, Rename columns -dataset = dataset.map(tokenize_sentence).remove_columns(['sentence', - 'idx']).rename_column( - 'label', 'labels') +train_dataset = train_dataset.map(tokenize_sentence) +val_dataset = val_dataset.map(tokenize_sentence) +if training_args.label != 'labels': + train_dataset = train_dataset.rename_columns( + {training_args.label: 'labels'}) + val_dataset = val_dataset.rename_columns({training_args.label: 'labels'}) cfg_file = os.path.join(args.work_dir or './', 'configuration.json') -DEFAULT_CONFIG.dump(cfg_file) +config.dump(cfg_file) kwargs = dict( model=model, cfg_file=cfg_file, # data_collator data_collator=default_data_collator, - train_dataset=dataset['train'], - eval_dataset=dataset['validation'], - seed=args.seed, - cfg_modify_fn=args) + train_dataset=train_dataset, + eval_dataset=val_dataset, + remove_unused_data=True, + seed=args.seed) os.environ['LOCAL_RANK'] = str(args.local_rank) trainer: EpochBasedTrainer = build_trainer(name='trainer', default_args=kwargs) diff --git a/examples/pytorch/transformers/run_train.sh b/examples/pytorch/transformers/run_train.sh index c76c4636..94e5ef75 100644 --- a/examples/pytorch/transformers/run_train.sh +++ b/examples/pytorch/transformers/run_train.sh @@ -1,5 +1,14 @@ PYTHONPATH=. python examples/pytorch/transformers/finetune_transformers_model.py \ --model bert-base-uncased \ --num_labels 15 \ - --dataset_name clue \ - --subset_name tnews + --train_dataset_name clue \ + --train_subset_name tnews \ + --train_split train \ + --val_dataset_name clue \ + --val_subset_name tnews \ + --train_split train \ + --val_split validation \ + --sentence sentence \ + --label label \ + --eval_strategy by_step \ + --eval_interval 100 diff --git a/modelscope/__init__.py b/modelscope/__init__.py index 81fdf505..f7553958 100644 --- a/modelscope/__init__.py +++ b/modelscope/__init__.py @@ -1,4 +1,79 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -from .version import __release_datetime__, __version__ +from typing import TYPE_CHECKING -__all__ = ['__version__', '__release_datetime__'] +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .version import __release_datetime__, __version__ + from .trainers import EpochBasedTrainer, TrainingArgs, build_dataset_from_file + from .trainers import Hook, Priority + from .exporters import Exporter + from .exporters import TfModelExporter + from .exporters import TorchModelExporter + from .hub.api import HubApi + from .hub.snapshot_download import snapshot_download + from .hub.push_to_hub import push_to_hub, push_to_hub_async + from .hub.check_model import check_model_is_id, check_local_model_is_latest + from .metrics import AudioNoiseMetric, Metric, task_default_metrics, ImageColorEnhanceMetric, ImageDenoiseMetric, \ + ImageInstanceSegmentationCOCOMetric, ImagePortraitEnhancementMetric, SequenceClassificationMetric, \ + TextGenerationMetric, TokenClassificationMetric, VideoSummarizationMetric, MovieSceneSegmentationMetric, \ + AccuracyMetric, BleuMetric, ImageInpaintingMetric, ReferringVideoObjectSegmentationMetric, \ + VideoFrameInterpolationMetric, VideoStabilizationMetric, VideoSuperResolutionMetric, PplMetric, \ + ImageQualityAssessmentDegradationMetric, ImageQualityAssessmentMosMetric, TextRankingMetric, \ + LossMetric, ImageColorizationMetric, OCRRecognitionMetric + from .models import Model, TorchModel + from .preprocessors import Preprocessor + from .pipelines import Pipeline, pipeline + from .utils.hub import read_config, create_model_if_not_exist + from .utils.logger import get_logger + from .msdatasets import MsDataset + +else: + _import_structure = { + 'version': ['__release_datetime__', '__version__'], + 'trainers': [ + 'EpochBasedTrainer', 'TrainingArgs', 'Hook', 'Priority', + 'build_dataset_from_file' + ], + 'exporters': [ + 'Exporter', + 'TfModelExporter', + 'TorchModelExporter', + ], + 'hub.api': ['HubApi'], + 'hub.snapshot_download': ['snapshot_download'], + 'hub.push_to_hub': ['push_to_hub', 'push_to_hub_async'], + 'hub.check_model': + ['check_model_is_id', 'check_local_model_is_latest'], + 'metrics': [ + 'AudioNoiseMetric', 'Metric', 'task_default_metrics', + 'ImageColorEnhanceMetric', 'ImageDenoiseMetric', + 'ImageInstanceSegmentationCOCOMetric', + 'ImagePortraitEnhancementMetric', 'SequenceClassificationMetric', + 'TextGenerationMetric', 'TokenClassificationMetric', + 'VideoSummarizationMetric', 'MovieSceneSegmentationMetric', + 'AccuracyMetric', 'BleuMetric', 'ImageInpaintingMetric', + 'ReferringVideoObjectSegmentationMetric', + 'VideoFrameInterpolationMetric', 'VideoStabilizationMetric', + 'VideoSuperResolutionMetric', 'PplMetric', + 'ImageQualityAssessmentDegradationMetric', + 'ImageQualityAssessmentMosMetric', 'TextRankingMetric', + 'LossMetric', 'ImageColorizationMetric', 'OCRRecognitionMetric' + ], + 'models': ['Model', 'TorchModel'], + 'preprocessors': ['Preprocessor'], + 'pipelines': ['Pipeline', 'pipeline'], + 'utils.hub': ['read_config', 'create_model_if_not_exist'], + 'utils.logger': ['get_logger'], + 'msdatasets': ['MsDataset'] + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/cli/template/template.tpl b/modelscope/cli/template/template.tpl index 0c09a925..78fe339c 100644 --- a/modelscope/cli/template/template.tpl +++ b/modelscope/cli/template/template.tpl @@ -122,10 +122,11 @@ class ${pipeline_name}(Pipeline): # Tips: usr_config_path is the temporary save configuration location, after upload modelscope hub, it is the model_id usr_config_path = '${configuration_path}' config = Config({ - 'framework': 'pytorch', - 'task': '${task_name}', - 'model': {'type': 'my-custom-model'}, - "pipeline": {"type": "my-custom-pipeline"} + "framework": 'pytorch', + "task": '${task_name}', + "model": {'type': 'my-custom-model'}, + "pipeline": {"type": "my-custom-pipeline"}, + "allow_remote": True }) config.dump('${configuration_path}' + 'configuration.json') diff --git a/modelscope/models/cv/human_wholebody_keypoint/__init__.py b/modelscope/exporters/audio/__init__.py similarity index 75% rename from modelscope/models/cv/human_wholebody_keypoint/__init__.py rename to modelscope/exporters/audio/__init__.py index 30e23457..883151cd 100644 --- a/modelscope/models/cv/human_wholebody_keypoint/__init__.py +++ b/modelscope/exporters/audio/__init__.py @@ -1,14 +1,14 @@ # Copyright (c) Alibaba, Inc. and its affiliates. + from typing import TYPE_CHECKING from modelscope.utils.import_utils import LazyImportModule if TYPE_CHECKING: - from .human_wholebody_keypoint import HumanWholeBodyKeypoint - + from .ans_dfsmn_exporter import ANSDFSMNExporter else: _import_structure = { - 'human_wholebody_keypoint': ['HumanWholeBodyKeypoint'] + 'ans_dfsmn_exporter': ['ANSDFSMNExporter'], } import sys diff --git a/modelscope/exporters/audio/ans_dfsmn_exporter.py b/modelscope/exporters/audio/ans_dfsmn_exporter.py new file mode 100644 index 00000000..976f983f --- /dev/null +++ b/modelscope/exporters/audio/ans_dfsmn_exporter.py @@ -0,0 +1,62 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os + +import torch + +from modelscope.exporters.builder import EXPORTERS +from modelscope.exporters.torch_model_exporter import TorchModelExporter +from modelscope.metainfo import Models +from modelscope.utils.constant import ModelFile, Tasks + +INPUT_NAME = 'input' +OUTPUT_NAME = 'output' + + +@EXPORTERS.register_module( + Tasks.acoustic_noise_suppression, module_name=Models.speech_dfsmn_ans) +class ANSDFSMNExporter(TorchModelExporter): + + def export_onnx(self, output_dir: str, opset=9, **kwargs): + """Export the model as onnx format files. + + Args: + output_dir: The output dir. + opset: The version of the ONNX operator set to use. + kwargs: + device: The device used to forward. + Returns: + A dict containing the model key - model file path pairs. + """ + model = self.model if 'model' not in kwargs else kwargs.pop('model') + device_name = 'cpu' if 'device' not in kwargs else kwargs.pop('device') + model_bin_file = os.path.join(model.model_dir, + ModelFile.TORCH_MODEL_BIN_FILE) + if os.path.exists(model_bin_file): + checkpoint = torch.load(model_bin_file, map_location='cpu') + model.load_state_dict(checkpoint) + onnx_file = os.path.join(output_dir, ModelFile.ONNX_MODEL_FILE) + + with torch.no_grad(): + model.eval() + device = torch.device(device_name) + model.to(device) + model_script = torch.jit.script(model) + fbank_input = torch.zeros((1, 3, 120), dtype=torch.float32) + torch.onnx.export( + model_script, + fbank_input, + onnx_file, + opset_version=opset, + input_names=[INPUT_NAME], + output_names=[OUTPUT_NAME], + dynamic_axes={ + INPUT_NAME: { + 0: 'batch_size', + 1: 'number_of_frame' + }, + OUTPUT_NAME: { + 0: 'batch_size', + 1: 'number_of_frame' + } + }) + return {'model': onnx_file} diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index ad8d0c5d..e3436aea 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -6,6 +6,7 @@ import functools import os import pickle import platform +import re import shutil import tempfile import uuid @@ -15,10 +16,10 @@ from http.cookiejar import CookieJar from os.path import expanduser from typing import Dict, List, Optional, Tuple, Union +import requests from requests import Session from requests.adapters import HTTPAdapter, Retry -from modelscope import __version__ from modelscope.hub.constants import (API_HTTP_CLIENT_TIMEOUT, API_RESPONSE_FIELD_DATA, API_RESPONSE_FIELD_EMAIL, @@ -45,7 +46,7 @@ from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, MASTER_MODEL_BRANCH, DatasetFormations, DatasetMetaFormats, DatasetVisibilityMap, DownloadChannel, - ModelFile) + ModelFile, VirgoDatasetConfig) from modelscope.utils.logger import get_logger from .utils.utils import (get_endpoint, get_release_datetime, model_id_to_group_owner_name) @@ -160,6 +161,7 @@ class HubApi: 'Visibility': visibility, # server check 'License': license, 'OriginalModelId': original_model_id, + 'TrainId': os.environ.get('MODELSCOPE_TRAIN_ID', ''), } r = self.session.post( path, json=body, cookies=cookies, headers=self.headers) @@ -236,8 +238,10 @@ class HubApi: license: Optional[str] = Licenses.APACHE_V2, chinese_name: Optional[str] = None, commit_message: Optional[str] = 'upload model', + tag: Optional[str] = None, revision: Optional[str] = DEFAULT_REPOSITORY_REVISION, - original_model_id: Optional[str] = None): + original_model_id: Optional[str] = None, + ignore_file_pattern: Optional[Union[List[str], str]] = None): """Upload model from a given directory to given repository. A valid model directory must contain a configuration.json file. @@ -268,10 +272,13 @@ class HubApi: chinese name of the new created model. commit_message(`str`, *optional*, defaults to `None`): commit message of the push request. + tag(`str`, *optional*, defaults to `None`): + The tag on this commit revision (`str`, *optional*, default to DEFAULT_MODEL_REVISION): which branch to push. If the branch is not exists, It will create a new branch and push to it. original_model_id (str, optional): The base model id which this model is trained from + ignore_file_pattern (`Union[List[str], str]`, optional): The file pattern to ignore uploading Raises: InvalidParameter: Parameter invalid. @@ -292,6 +299,10 @@ class HubApi: if cookies is None: raise NotLoginException('Must login before upload!') files_to_save = os.listdir(model_dir) + if ignore_file_pattern is None: + ignore_file_pattern = [] + if isinstance(ignore_file_pattern, str): + ignore_file_pattern = [ignore_file_pattern] try: self.get_model(model_id=model_id) except Exception: @@ -325,6 +336,8 @@ class HubApi: shutil.rmtree(src, ignore_errors=True) for f in files_to_save: if f[0] != '.': + if any([re.search(pattern, f) is not None for pattern in ignore_file_pattern]): + continue src = os.path.join(model_dir, f) if os.path.isdir(src): shutil.copytree(src, os.path.join(tmp_dir, f)) @@ -338,6 +351,8 @@ class HubApi: commit_message=commit_message, local_branch=revision, remote_branch=revision) + if tag is not None: + repo.tag_and_push(tag, tag) except Exception: raise finally: @@ -581,6 +596,17 @@ class HubApi: file_list = file_list['Files'] return file_list + @staticmethod + def dump_datatype_file(dataset_type: int, meta_cache_dir: str): + """ + Dump the data_type as a local file, in order to get the dataset formation without calling the datahub. + More details, please refer to the class `modelscope.utils.constant.DatasetFormations`. + """ + dataset_type_file_path = os.path.join(meta_cache_dir, + f'{str(dataset_type)}{DatasetFormations.formation_mark_ext.value}') + with open(dataset_type_file_path, 'w') as fp: + fp.write('*** Automatically-generated file, do not modify ***') + def get_dataset_meta_files_local_paths(self, dataset_name: str, namespace: str, revision: str, @@ -591,10 +617,7 @@ class HubApi: cookies = ModelScopeConfig.get_cookies() # Dump the data_type as a local file - dataset_type_file_path = os.path.join(meta_cache_dir, - f'{str(dataset_type)}{DatasetFormations.formation_mark_ext.value}') - with open(dataset_type_file_path, 'w') as fp: - fp.write('*** Automatically-generated file, do not modify ***') + HubApi.dump_datatype_file(dataset_type=dataset_type, meta_cache_dir=meta_cache_dir) for file_info in file_list: file_path = file_info['Path'] @@ -661,7 +684,6 @@ class HubApi: cookies = self._check_cookie(use_cookies=True) else: cookies = ModelScopeConfig.get_cookies() - r = self.session.get(url=datahub_url, cookies=cookies, headers=self.headers) r = self.session.get( url=datahub_url, cookies=cookies, headers=self.headers) @@ -669,6 +691,31 @@ class HubApi: raise_on_error(resp) return resp['Data'] + def get_virgo_meta(self, dataset_id: str, version: int = 1) -> dict: + """ + Get virgo dataset meta info. + """ + virgo_endpoint = os.environ.get(VirgoDatasetConfig.env_virgo_endpoint, '') + if not virgo_endpoint: + raise RuntimeError(f'Virgo endpoint is not set in env: {VirgoDatasetConfig.env_virgo_endpoint}') + + virgo_dataset_url = f'{virgo_endpoint}/data/set/download' + cookies = requests.utils.dict_from_cookiejar(ModelScopeConfig.get_cookies()) + + dataset_info = dict( + dataSetId=dataset_id, + dataSetVersion=version + ) + data = dict( + data=dataset_info, + ) + r = self.session.post(url=virgo_dataset_url, json=data, cookies=cookies, headers=self.headers, timeout=900) + resp = r.json() + if resp['code'] != 0: + raise RuntimeError(f'Failed to get virgo dataset: {resp}') + + return resp['data'] + def get_dataset_access_config_for_unzipped(self, dataset_name: str, namespace: str, @@ -895,6 +942,7 @@ class ModelScopeConfig: if MODELSCOPE_CLOUD_USERNAME in os.environ: user_name = os.environ[MODELSCOPE_CLOUD_USERNAME] + from modelscope import __version__ ua = 'modelscope/%s; python/%s; session_id/%s; platform/%s; processor/%s; env/%s; user/%s' % ( __version__, platform.python_version(), diff --git a/modelscope/hub/errors.py b/modelscope/hub/errors.py index be94d7fd..4bf2f935 100644 --- a/modelscope/hub/errors.py +++ b/modelscope/hub/errors.py @@ -2,6 +2,7 @@ from http import HTTPStatus +import requests from requests.exceptions import HTTPError from modelscope.utils.logger import get_logger @@ -57,13 +58,22 @@ def is_ok(rsp): return rsp['Code'] == HTTPStatus.OK and rsp['Success'] +def _decode_response_error(response: requests.Response): + if 'application/json' in response.headers.get('content-type', ''): + message = response.json() + else: + message = response.content.decode('utf-8') + return message + + def handle_http_post_error(response, url, request_body): try: response.raise_for_status() except HTTPError as error: logger.error('Request %s with body: %s exception' % (url, request_body)) - logger.error('Response details: %s' % response.content) + message = _decode_response_error(response) + logger.error('Response details: %s' % message) raise error @@ -75,7 +85,8 @@ def handle_http_response(response, logger, cookies, model_id): logger.error( f'Authentication token does not exist, failed to access model {model_id} which may not exist or may be \ private. Please login first.') - logger.error('Response details: %s' % response.content) + message = _decode_response_error(response) + logger.error('Response details: %s' % message) raise error diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py index 380d2432..6d3ad63d 100644 --- a/modelscope/hub/file_download.py +++ b/modelscope/hub/file_download.py @@ -12,7 +12,6 @@ import requests from requests.adapters import Retry from tqdm import tqdm -from modelscope import __version__ from modelscope.hub.api import HubApi, ModelScopeConfig from modelscope.hub.constants import (API_FILE_DOWNLOAD_CHUNK_SIZE, API_FILE_DOWNLOAD_RETRY_TIMES, diff --git a/modelscope/hub/git.py b/modelscope/hub/git.py index 80887738..b0fae148 100644 --- a/modelscope/hub/git.py +++ b/modelscope/hub/git.py @@ -55,16 +55,10 @@ class GitCommandWrapper(metaclass=Singleton): response.check_returncode() return response except subprocess.CalledProcessError as error: - if response.returncode == 1: - logger.info('Nothing to commit.') - return response - else: - logger.error( - 'There are error run git command, you may need to login first.' - ) - raise GitError('stdout: %s, stderr: %s' % - (response.stdout.decode('utf8'), - error.stderr.decode('utf8'))) + logger.error('There are error run git command.') + raise GitError( + 'stdout: %s, stderr: %s' % + (response.stdout.decode('utf8'), error.stderr.decode('utf8'))) def config_auth_token(self, repo_dir, auth_token): url = self.get_repo_remote_url(repo_dir) @@ -199,8 +193,11 @@ class GitCommandWrapper(metaclass=Singleton): else: return ['/'.join(line.split('/')[1:]) for line in info[1:]] - def pull(self, repo_dir: str): - cmds = ['-C', repo_dir, 'pull'] + def pull(self, + repo_dir: str, + remote: str = 'origin', + branch: str = 'master'): + cmds = ['-C', repo_dir, 'pull', remote, branch] return self._run_git_command(*cmds) def push(self, diff --git a/modelscope/hub/push_to_hub.py b/modelscope/hub/push_to_hub.py index ee7b240e..d117cc7f 100644 --- a/modelscope/hub/push_to_hub.py +++ b/modelscope/hub/push_to_hub.py @@ -4,8 +4,8 @@ import concurrent.futures import os from modelscope.hub.api import HubApi -from modelscope.hub.constants import Licenses, ModelVisibility -from modelscope.hub.errors import NotExistError +from modelscope.hub.constants import ModelVisibility +from modelscope.utils.constant import DEFAULT_REPOSITORY_REVISION from modelscope.utils.logger import get_logger logger = get_logger() @@ -18,7 +18,10 @@ def _api_push_to_hub(repo_name, token, private=True, commit_message='', - source_repo=''): + tag=None, + source_repo='', + ignore_file_pattern=None, + revision=DEFAULT_REPOSITORY_REVISION): try: api = HubApi() api.login(token) @@ -29,7 +32,10 @@ def _api_push_to_hub(repo_name, if not private else ModelVisibility.PRIVATE, chinese_name=repo_name, commit_message=commit_message, - original_model_id=source_repo) + tag=tag, + original_model_id=source_repo, + ignore_file_pattern=ignore_file_pattern, + revision=revision) commit_message = commit_message or 'No commit message' logger.info( f'Successfully upload the model to {repo_name} with message: {commit_message}' @@ -48,7 +54,10 @@ def push_to_hub(repo_name, private=True, retry=3, commit_message='', - source_repo=''): + tag=None, + source_repo='', + ignore_file_pattern=None, + revision=DEFAULT_REPOSITORY_REVISION): """ Args: repo_name: The repo name for the modelhub repo @@ -57,13 +66,18 @@ def push_to_hub(repo_name, private: If is a private repo, default True retry: Retry times if something error in uploading, default 3 commit_message: The commit message + tag: The tag of this commit source_repo: The source repo (model id) which this model comes from - + ignore_file_pattern: The file pattern to be ignored in uploading. + revision: The branch to commit to Returns: The boolean value to represent whether the model is uploaded. """ if token is None: token = os.environ.get('MODELSCOPE_API_TOKEN') + if ignore_file_pattern is None: + ignore_file_pattern = os.environ.get('UPLOAD_IGNORE_FILE_PATTERN') + assert repo_name is not None assert token is not None, 'Either pass in a token or to set `MODELSCOPE_API_TOKEN` in the environment variables.' assert os.path.isdir(output_dir) assert 'configuration.json' in os.listdir(output_dir) or 'configuration.yaml' in os.listdir(output_dir) \ @@ -73,7 +87,8 @@ def push_to_hub(repo_name, f'Uploading {output_dir} to {repo_name} with message {commit_message}') for i in range(retry): if _api_push_to_hub(repo_name, output_dir, token, private, - commit_message, source_repo): + commit_message, tag, source_repo, + ignore_file_pattern, revision): return True return False @@ -83,7 +98,10 @@ def push_to_hub_async(repo_name, token=None, private=True, commit_message='', - source_repo=''): + tag=None, + source_repo='', + ignore_file_pattern=None, + revision=DEFAULT_REPOSITORY_REVISION): """ Args: repo_name: The repo name for the modelhub repo @@ -91,13 +109,18 @@ def push_to_hub_async(repo_name, token: The user api token, function will check the `MODELSCOPE_API_TOKEN` variable if this argument is None private: If is a private repo, default True commit_message: The commit message + tag: The tag of this commit source_repo: The source repo (model id) which this model comes from - + ignore_file_pattern: The file pattern to be ignored in uploading + revision: The branch to commit to Returns: A handler to check the result and the status """ if token is None: token = os.environ.get('MODELSCOPE_API_TOKEN') + if ignore_file_pattern is None: + ignore_file_pattern = os.environ.get('UPLOAD_IGNORE_FILE_PATTERN') + assert repo_name is not None assert token is not None, 'Either pass in a token or to set `MODELSCOPE_API_TOKEN` in the environment variables.' assert os.path.isdir(output_dir) assert 'configuration.json' in os.listdir(output_dir) or 'configuration.yaml' in os.listdir(output_dir) \ @@ -106,4 +129,5 @@ def push_to_hub_async(repo_name, logger.info( f'Uploading {output_dir} to {repo_name} with message {commit_message}') return _executor.submit(_api_push_to_hub, repo_name, output_dir, token, - private, commit_message, source_repo) + private, commit_message, tag, source_repo, + ignore_file_pattern, revision) diff --git a/modelscope/hub/repository.py b/modelscope/hub/repository.py index 1d107a3c..3fc6da2b 100644 --- a/modelscope/hub/repository.py +++ b/modelscope/hub/repository.py @@ -88,6 +88,26 @@ class Repository: remote = None return remote + def pull(self, remote: str = 'origin', branch: str = 'master'): + """Pull remote branch + + Args: + remote (str, optional): The remote name. Defaults to 'origin'. + branch (str, optional): The remote branch. Defaults to 'master'. + """ + self.git_wrapper.pull(self.model_dir, remote=remote, branch=branch) + + def add_lfs_type(self, file_name_suffix: str): + """Add file suffix to lfs list. + + Args: + file_name_suffix (str): The file name suffix. + examples '*.safetensors' + """ + os.system( + "printf '%s filter=lfs diff=lfs merge=lfs -text\n'>>%s" % + (file_name_suffix, os.path.join(self.model_dir, '.gitattributes'))) + def push(self, commit_message: str, local_branch: Optional[str] = DEFAULT_REPOSITORY_REVISION, @@ -120,7 +140,6 @@ class Repository: self.model_repo_name) url = self.git_wrapper.get_repo_remote_url(self.model_dir) - self.git_wrapper.pull(self.model_dir) self.git_wrapper.add(self.model_dir, all_files=True) self.git_wrapper.commit(self.model_dir, commit_message) diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py index 60ad6d85..c4057314 100644 --- a/modelscope/metainfo.py +++ b/modelscope/metainfo.py @@ -116,15 +116,9 @@ class Models(object): bad_image_detecting = 'bad-image-detecting' controllable_image_generation = 'controllable-image-generation' longshortnet = 'longshortnet' + fastinst = 'fastinst' pedestrian_attribute_recognition = 'pedestrian-attribute-recognition' - # EasyCV models - yolox = 'YOLOX' - segformer = 'Segformer' - hand_2d_keypoints = 'HRNet-Hand2D-Keypoints' - image_object_detection_auto = 'image-object-detection-auto' - dino = 'DINO' - # nlp models bert = 'bert' palm = 'palm-v2' @@ -177,6 +171,7 @@ class Models(object): speech_frcrn_ans_cirm_16k = 'speech_frcrn_ans_cirm_16k' speech_dfsmn_ans = 'speech_dfsmn_ans' speech_dfsmn_kws_char_farfield = 'speech_dfsmn_kws_char_farfield' + speech_dfsmn_kws_char_farfield_iot = 'speech_dfsmn_kws_char_farfield_iot' speech_kws_fsmn_char_ctc_nearfield = 'speech_kws_fsmn_char_ctc_nearfield' speech_mossformer_separation_temporal_8k = 'speech_mossformer_separation_temporal_8k' kws_kwsbp = 'kws-kwsbp' @@ -187,6 +182,9 @@ class Models(object): generic_sv = 'generic-sv' ecapa_tdnn_sv = 'ecapa-tdnn-sv' campplus_sv = 'cam++-sv' + eres2net_sv = 'eres2net-sv' + scl_sd = 'scl-sd' + rdino_tdnn_sv = 'rdino_ecapa-tdnn-sv' generic_lm = 'generic-lm' # multi-modal models @@ -205,6 +203,8 @@ class Models(object): hitea = 'hitea' soonet = 'soonet' efficient_diffusion_tuning = 'efficient-diffusion-tuning' + mplug_owl = 'mplug-owl' + clip_interrogator = 'clip-interrogator' # science models unifold = 'unifold' @@ -255,6 +255,7 @@ class Pipelines(object): should use task name for this pipeline. For pipeline which suuport only one model, we should use ${Model}-${Task} as its name. """ + pipeline_template = 'pipeline-template' # vision tasks portrait_matting = 'unet-image-matting' universal_matting = 'unet-universal-matting' @@ -277,8 +278,6 @@ class Pipelines(object): tbs_detection = 'tbs-detection' object_detection = 'vit-object-detection' abnormal_object_detection = 'abnormal-object-detection' - easycv_detection = 'easycv-detection' - easycv_segmentation = 'easycv-segmentation' face_2d_keypoints = 'mobilenet_face-2d-keypoints_alignment' salient_detection = 'u2net-salient-detection' salient_boudary_detection = 'res2net-salient-detection' @@ -347,7 +346,6 @@ class Pipelines(object): video_single_object_tracking_procontext = 'procontext-vitb-video-single-object-tracking' video_multi_object_tracking = 'video-multi-object-tracking' image_panoptic_segmentation = 'image-panoptic-segmentation' - image_panoptic_segmentation_easycv = 'image-panoptic-segmentation-easycv' video_summarization = 'googlenet_pgl_video_summarization' language_guided_video_summarization = 'clip-it-video-summarization' image_semantic_segmentation = 'image-semantic-segmentation' @@ -402,7 +400,7 @@ class Pipelines(object): nerf_recon_acc = 'nerf-recon-acc' bad_image_detecting = 'bad-image-detecting' controllable_image_generation = 'controllable-image-generation' - + fast_instance_segmentation = 'fast-instance-segmentation' image_quality_assessment_mos = 'image-quality-assessment-mos' image_quality_assessment_man = 'image-quality-assessment-man' image_quality_assessment_degradation = 'image-quality-assessment-degradation' @@ -485,6 +483,9 @@ class Pipelines(object): speaker_diarization_inference = 'speaker-diarization-inference' vad_inference = 'vad-inference' speaker_verification = 'speaker-verification' + speaker_verification_rdino = 'speaker-verification-rdino' + speaker_verification_eres2net = 'speaker-verification-eres2net' + speaker_change_locating = 'speaker-change-locating' lm_inference = 'language-score-prediction' speech_timestamp_inference = 'speech-timestamp-inference' @@ -514,6 +515,7 @@ class Pipelines(object): gridvlp_multi_modal_embedding = 'gridvlp-multi-modal-embedding' soonet_video_temporal_grounding = 'soonet-video-temporal-grounding' efficient_diffusion_tuning = 'efficient-diffusion-tuning' + multimodal_dialogue = 'multimodal-dialogue' # science tasks protein_structure = 'unifold-protein-structure' @@ -881,6 +883,7 @@ class NLPTrainers(object): document_grounded_dialog_rerank_trainer = 'document-grounded-dialog-rerank-trainer' document_grounded_dialog_retrieval_trainer = 'document-grounded-dialog-retrieval-trainer' siamese_uie_trainer = 'siamese-uie-trainer' + translation_evaluation_trainer = 'translation-evaluation-trainer' class MultiModalTrainers(object): @@ -911,7 +914,6 @@ class Trainers(CVTrainers, NLPTrainers, MultiModalTrainers, AudioTrainers): """ default = 'trainer' - easycv = 'easycv' tinynas_damoyolo = 'tinynas-damoyolo' @staticmethod @@ -933,8 +935,6 @@ class Trainers(CVTrainers, NLPTrainers, MultiModalTrainers, AudioTrainers): return Fields.multi_modal elif attribute_or_value == Trainers.default: return Trainers.default - elif attribute_or_value == Trainers.easycv: - return Trainers.easycv else: return 'unknown' @@ -1034,6 +1034,8 @@ class Preprocessors(object): vldoc_preprocessor = 'vldoc-preprocessor' hitea_tasks_preprocessor = 'hitea-tasks-preprocessor' diffusion_image_generation_preprocessor = 'diffusion-image-generation-preprocessor' + mplug_owl_preprocessor = 'mplug-owl-preprocessor' + image_captioning_clip_interrogator_preprocessor = 'image-captioning-clip-interrogator-preprocessor' # science preprocessor unifold_preprocessor = 'unifold-preprocessor' @@ -1098,6 +1100,8 @@ class Metrics(object): # metric for image-colorization task image_colorization_metric = 'image-colorization-metric' ocr_recognition_metric = 'ocr-recognition-metric' + # metric for translation evaluation + translation_evaluation_metric = 'translation-evaluation-metric' class Optimizers(object): @@ -1165,14 +1169,6 @@ class LR_Schedulers(object): class CustomDatasets(object): """ Names for different datasets. """ - ClsDataset = 'ClsDataset' - Face2dKeypointsDataset = 'FaceKeypointDataset' - HandCocoWholeBodyDataset = 'HandCocoWholeBodyDataset' - HumanWholeBodyKeypointDataset = 'WholeBodyCocoTopDownDataset' - SegDataset = 'SegDataset' - DetDataset = 'DetDataset' - DetImagesMixDataset = 'DetImagesMixDataset' - PanopticDataset = 'PanopticDataset' PairedDataset = 'PairedDataset' SiddDataset = 'SiddDataset' GoproDataset = 'GoproDataset' diff --git a/modelscope/metrics/__init__.py b/modelscope/metrics/__init__.py index 17767001..6f5dfbde 100644 --- a/modelscope/metrics/__init__.py +++ b/modelscope/metrics/__init__.py @@ -31,6 +31,7 @@ if TYPE_CHECKING: from .loss_metric import LossMetric from .image_colorization_metric import ImageColorizationMetric from .ocr_recognition_metric import OCRRecognitionMetric + from .translation_evaluation_metric import TranslationEvaluationMetric else: _import_structure = { 'audio_noise_metric': ['AudioNoiseMetric'], @@ -62,7 +63,8 @@ else: 'text_ranking_metric': ['TextRankingMetric'], 'loss_metric': ['LossMetric'], 'image_colorization_metric': ['ImageColorizationMetric'], - 'ocr_recognition_metric': ['OCRRecognitionMetric'] + 'ocr_recognition_metric': ['OCRRecognitionMetric'], + 'translation_evaluation_metric': ['TranslationEvaluationMetric'] } import sys diff --git a/modelscope/metrics/builder.py b/modelscope/metrics/builder.py index 2bc756e6..43aaea14 100644 --- a/modelscope/metrics/builder.py +++ b/modelscope/metrics/builder.py @@ -42,6 +42,7 @@ class MetricKeys(object): NDCG = 'ndcg' AR = 'AR' Colorfulness = 'colorfulness' + Kendall_Tau_Correlation = 'kendall_tau_correlation' task_default_metrics = { @@ -76,6 +77,7 @@ task_default_metrics = { Tasks.bad_image_detecting: [Metrics.accuracy], Tasks.ocr_recognition: [Metrics.ocr_recognition_metric], Tasks.efficient_diffusion_tuning: [Metrics.loss_metric], + Tasks.translation_evaluation: [Metrics.translation_evaluation_metric] } diff --git a/modelscope/metrics/translation_evaluation_metric.py b/modelscope/metrics/translation_evaluation_metric.py new file mode 100644 index 00000000..81705d3b --- /dev/null +++ b/modelscope/metrics/translation_evaluation_metric.py @@ -0,0 +1,174 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import importlib +from typing import Dict, List, Union + +from pandas import DataFrame + +from modelscope.metainfo import Metrics +from modelscope.metrics.base import Metric +from modelscope.metrics.builder import METRICS, MetricKeys +from modelscope.models.nlp.unite.configuration import InputFormat +from modelscope.utils.logger import get_logger +from modelscope.utils.registry import default_group + +logger = get_logger() + + +@METRICS.register_module( + group_key=default_group, module_name=Metrics.translation_evaluation_metric) +class TranslationEvaluationMetric(Metric): + r"""The metric class for translation evaluation. + + """ + + def __init__(self, gap_threshold: float = 25.0): + r"""Build a translation evaluation metric, following the designed + Kendall's tau correlation from WMT Metrics Shared Task competitions. + + Args: + gap_threshold: The score gap denoting the available hypothesis pair. + + Returns: + A metric for translation evaluation. + """ + self.gap_threshold = gap_threshold + + self.lp = list() + self.segment_id = list() + self.raw_score = list() + self.score = list() + self.input_format = list() + + def clear(self) -> None: + r"""Clear all the stored variables. + """ + self.lp.clear() + self.segment_id.clear() + self.raw_score.clear() + self.input_format.clear() + + self.score.clear() + + return + + def add(self, outputs: Dict[str, List[float]], + inputs: Dict[str, List[Union[float, int]]]) -> None: + r"""Collect the related results for processing. + + Args: + outputs: Dict containing 'scores' + inputs: Dict containing 'labels' and 'segment_ids' + + """ + + self.lp += inputs['lp'] + self.segment_id += inputs['segment_id'] + self.raw_score += inputs['raw_score'] + self.input_format += inputs['input_format'] + + self.score += outputs['score'] + + return + + def evaluate(self) -> Dict[str, Dict[str, float]]: + r"""Compute the Kendall's tau correlation. + + Returns: + A dict denoting Kendall's tau correlation. + + """ + + data = { + 'lp': self.lp, + 'segment_id': self.segment_id, + 'raw_score': self.raw_score, + 'input_format': self.input_format, + 'score': self.score + } + data = DataFrame(data=data) + correlation = dict() + + for input_format in data.input_format.unique(): + logger.info('Evaluation results for %s input format' + % input_format.value) + input_format_data = data[data.input_format == input_format] + + temp_correlation = dict() + + for lp in sorted(input_format_data.lp.unique()): + sub_data = input_format_data[input_format_data.lp == lp] + temp_correlation[input_format.value + '_' + + lp] = self.compute_kendall_tau(sub_data) + logger.info( + '\t%s: %f' % + (lp, + temp_correlation[input_format.value + '_' + lp] * 100)) + + avg_correlation = sum( + temp_correlation.values()) / len(temp_correlation) + correlation[input_format.value + '_avg'] = avg_correlation + logger.info('Average evaluation result for %s input format: %f' % + (input_format.value, avg_correlation)) + logger.info('') + correlation.update(temp_correlation) + + return correlation + + def merge(self, other: 'TranslationEvaluationMetric') -> None: + r"""Merge the predictions from other TranslationEvaluationMetric objects. + + Args: + other: Another TranslationEvaluationMetric object. + + """ + + self.lp += other.lp + self.segment_id += other.segment_ids + self.raw_score += other.raw_score + self.input_format += other.input_format + + self.score += other.score + + return + + def compute_kendall_tau(self, csv_data: DataFrame) -> float: + r"""Compute kendall's tau correlation. + + Args: + csv_data: The pandas dataframe. + + Returns: + float: THe kendall's Tau correlation. + + """ + concor = discor = 0 + + for segment_id in sorted(csv_data.segment_id.unique()): + group_csv_data = csv_data[csv_data.segment_id == segment_id] + + examples = group_csv_data.to_dict('records') + + for i in range(0, len(examples)): + for j in range(i + 1, len(examples)): + if self.raw_score[i] - self.raw_score[ + j] >= self.gap_threshold: + if self.score[i] > self.score[j]: + concor += 1 + elif self.score[i] < self.score[j]: + discor += 1 + elif self.raw_score[i] - self.raw_score[ + j] <= -self.gap_threshold: + if self.score[i] < self.score[j]: + concor += 1 + elif self.score[i] > self.score[j]: + discor += 1 + + if concor + discor == 0: + logger.warning( + 'We don\'t have available pairs when evaluation. ' + 'Marking the kendall tau correlation as the lowest value (-1.0).' + ) + return -1.0 + else: + return (concor - discor) / (concor + discor) diff --git a/modelscope/models/audio/ans/conv_stft.py b/modelscope/models/audio/ans/conv_stft.py index 4b393a4c..3d37f1aa 100644 --- a/modelscope/models/audio/ans/conv_stft.py +++ b/modelscope/models/audio/ans/conv_stft.py @@ -39,7 +39,7 @@ class ConvSTFT(nn.Module): super(ConvSTFT, self).__init__() if fft_len is None: - self.fft_len = np.int(2**np.ceil(np.log2(win_len))) + self.fft_len = int(2**np.ceil(np.log2(win_len))) else: self.fft_len = fft_len @@ -78,7 +78,7 @@ class ConviSTFT(nn.Module): fix=True): super(ConviSTFT, self).__init__() if fft_len is None: - self.fft_len = np.int(2**np.ceil(np.log2(win_len))) + self.fft_len = int(2**np.ceil(np.log2(win_len))) else: self.fft_len = fft_len kernel, window = init_kernels( diff --git a/modelscope/models/audio/asr/generic_automatic_speech_recognition.py b/modelscope/models/audio/asr/generic_automatic_speech_recognition.py index 25de839e..8dd11982 100644 --- a/modelscope/models/audio/asr/generic_automatic_speech_recognition.py +++ b/modelscope/models/audio/asr/generic_automatic_speech_recognition.py @@ -45,27 +45,5 @@ class GenericAutomaticSpeechRecognition(Model): def forward(self) -> Dict[str, Any]: """preload model and return the info of the model """ - if self.model_cfg['model_config']['type'] == Frameworks.tf: - from easyasr import asr_inference_paraformer_tf - if hasattr(asr_inference_paraformer_tf, 'preload'): - model_workspace = self.model_cfg['model_workspace'] - model_path = os.path.join(model_workspace, - self.model_cfg['am_model']) - vocab_path = os.path.join( - model_workspace, - self.model_cfg['model_config']['vocab_file']) - sampled_ids = 'seq2seq/sampled_ids' - sampled_lengths = 'seq2seq/sampled_lengths' - if 'sampled_ids' in self.model_cfg['model_config']: - sampled_ids = self.model_cfg['model_config']['sampled_ids'] - if 'sampled_lengths' in self.model_cfg['model_config']: - sampled_lengths = self.model_cfg['model_config'][ - 'sampled_lengths'] - asr_inference_paraformer_tf.preload( - ngpu=1, - asr_model_file=model_path, - vocab_file=vocab_path, - sampled_ids=sampled_ids, - sampled_lengths=sampled_lengths) return self.model_cfg diff --git a/modelscope/models/audio/kws/farfield/fsmn_sele_v3.py b/modelscope/models/audio/kws/farfield/fsmn_sele_v3.py new file mode 100644 index 00000000..d57354d0 --- /dev/null +++ b/modelscope/models/audio/kws/farfield/fsmn_sele_v3.py @@ -0,0 +1,233 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .fsmn import AffineTransform, Fsmn, LinearTransform, RectifiedLinear +from .model_def import HEADER_BLOCK_SIZE, ActivationType, LayerType, f32ToI32 + + +class DFSMNUnit(nn.Module): + """ one multi-channel deep fsmn unit + Args: + dimin: input dimension + dimexpand: feature expansion dimension + dimout: output dimension + lorder: left ofder + rorder: right order + """ + + def __init__(self, + dimin=64, + dimexpand=128, + dimout=64, + lorder=10, + rorder=1): + super(DFSMNUnit, self).__init__() + + self.expand = AffineTransform(dimin, dimexpand) + self.shrink = LinearTransform(dimexpand, dimout) + self.fsmn = Fsmn(dimout, dimout, lorder, rorder, 1, 1) + + self.debug = False + self.dataout = None + + def forward(self, input): + """ + Args: + input: [batch, time, feature] + """ + out1 = F.relu(self.expand(input)) + out2 = self.shrink(out1) + out3 = self.fsmn(out2) + + # add skip connection for matched data + if input.shape[-1] == out3.shape[-1]: + out3 = input + out3 + if self.debug: + self.dataout = out3 + return out3 + + def print_model(self): + self.expand.printModel() + self.shrink.printModel() + self.fsmn.printModel() + + def to_kaldi_nnet(self): + re_str = self.expand.toKaldiNNet() + relu = RectifiedLinear(self.expand.linear.out_features, + self.expand.linear.out_features) + re_str += relu.toKaldiNNet() + re_str = self.shrink.toKaldiNNet() + re_str += self.fsmn.toKaldiNNet() + return re_str + + +class FSMNSeleNetV3(nn.Module): + """ Deep FSMN model with channel selection performs multi-channel kws. + Zhang, Shiliang, et al. "Deep-FSMN for large vocabulary continuous speech + recognition." 2018 IEEE International Conference on Acoustics, Speech and + Signal Processing (ICASSP). IEEE, 2018. + + Args: + input_dim: input dimension + linear_dim: fsmn input dimension + proj_dim: fsmn projection dimension + lorder: fsmn left order + rorder: fsmn right order + num_syn: output dimension + fsmn_layers: no. of fsmn units + """ + + def __init__(self, + input_dim=120, + linear_dim=128, + proj_dim=64, + lorder=10, + rorder=1, + num_syn=5, + fsmn_layers=5): + super(FSMNSeleNetV3, self).__init__() + + self.mem = [] + # the first unit, mapping input dim to proj dim + unit = DFSMNUnit(input_dim, linear_dim, proj_dim, lorder, rorder) + self.mem.append(unit) + self.add_module('mem_{:d}'.format(0), unit) + + # deep fsmn layers with skip connection + for i in range(1, fsmn_layers): + unit = DFSMNUnit(proj_dim, linear_dim, proj_dim, lorder, rorder) + self.mem.append(unit) + self.add_module('mem_{:d}'.format(i), unit) + + self.expand2 = AffineTransform(proj_dim, linear_dim) + self.decision = AffineTransform(linear_dim, num_syn) + + def forward(self, input): + # multi-channel temp space, [batch, time, channel, feature] + if torch.cuda.is_available(): + x = torch.zeros(input.shape[0], input.shape[1], input.shape[2], + self.expand2.linear.out_features).cuda() + else: + x = torch.zeros(input.shape[0], input.shape[1], input.shape[2], + self.expand2.linear.out_features) + + for n in range(input.shape[2]): + chin = input[:, :, n, :] + + for unit in self.mem: + chout = unit(chin) + chin = chout + + x[:, :, n, :] = F.relu(self.expand2(chout)) + + # perform max pooling + pool = nn.MaxPool2d((x.shape[2], 1), stride=(x.shape[2], 1)) + y = pool(x) + + # remove channel dimension + y = torch.squeeze(y, -2) + z = self.decision(y) + + return z + + def print_model(self): + for unit in self.mem: + unit.print_model() + + self.expand2.printModel() + self.decision.printModel() + + def print_header(self): + """ get DFSMN params + """ + input_dim = self.mem[0].expand.linear.in_features + linear_dim = self.mem[0].expand.linear.out_features + proj_dim = self.mem[0].shrink.linear.out_features + lorder = self.mem[0].fsmn.conv_left.kernel_size[0] + rorder = 0 + if self.mem[0].fsmn.conv_right is not None: + rorder = self.mem[0].fsmn.conv_right.kernel_size[0] + + num_syn = self.decision.linear.out_features + fsmn_layers = len(self.mem) + + # no. of output channels, 0.0 means the same as numins + numouts = 1.0 + + # + # write total header + # + header = [0.0] * HEADER_BLOCK_SIZE * 5 + # numins + header[0] = 0.0 + # numouts + header[1] = numouts + # dimins + header[2] = input_dim + # dimouts + header[3] = num_syn + # numlayers + header[4] = 4 + + # + # write each layer's header + # + hidx = 1 + + header[HEADER_BLOCK_SIZE * hidx + 0] = float( + LayerType.LAYER_DFSMN.value) + header[HEADER_BLOCK_SIZE * hidx + 1] = 0.0 + header[HEADER_BLOCK_SIZE * hidx + 2] = input_dim + header[HEADER_BLOCK_SIZE * hidx + 3] = linear_dim + header[HEADER_BLOCK_SIZE * hidx + 4] = proj_dim + header[HEADER_BLOCK_SIZE * hidx + 5] = lorder + header[HEADER_BLOCK_SIZE * hidx + 6] = rorder + header[HEADER_BLOCK_SIZE * hidx + 7] = fsmn_layers + hidx += 1 + + header[HEADER_BLOCK_SIZE * hidx + 0] = float( + LayerType.LAYER_DENSE.value) + header[HEADER_BLOCK_SIZE * hidx + 1] = 0.0 + header[HEADER_BLOCK_SIZE * hidx + 2] = proj_dim + header[HEADER_BLOCK_SIZE * hidx + 3] = linear_dim + header[HEADER_BLOCK_SIZE * hidx + 4] = 1.0 + header[HEADER_BLOCK_SIZE * hidx + 5] = float( + ActivationType.ACTIVATION_RELU.value) + hidx += 1 + + header[HEADER_BLOCK_SIZE * hidx + 0] = float( + LayerType.LAYER_MAX_POOLING.value) + header[HEADER_BLOCK_SIZE * hidx + 1] = 0.0 + header[HEADER_BLOCK_SIZE * hidx + 2] = linear_dim + hidx += 1 + + header[HEADER_BLOCK_SIZE * hidx + 0] = float( + LayerType.LAYER_DENSE.value) + header[HEADER_BLOCK_SIZE * hidx + 1] = numouts + header[HEADER_BLOCK_SIZE * hidx + 2] = linear_dim + header[HEADER_BLOCK_SIZE * hidx + 3] = num_syn + header[HEADER_BLOCK_SIZE * hidx + 4] = 1.0 + header[HEADER_BLOCK_SIZE * hidx + 5] = float( + ActivationType.ACTIVATION_SOFTMAX.value) + + for h in header: + print(f32ToI32(h)) + + def to_kaldi_nnet(self): + re_str = '\n' + for unit in self.mem: + re_str += unit.to_kaldi_nnet() + re_str = self.expand2.toKaldiNNet() + relu = RectifiedLinear(self.expand2.linear.out_features, + self.expand2.linear.out_features) + re_str += relu.toKaldiNNet() + re_str += self.decision.toKaldiNNet() + re_str += ' %d %d\n' % (self.decision.linear.out_features, + self.decision.linear.out_features) + re_str += '\n' + re_str += '\n' + + return re_str diff --git a/modelscope/models/audio/kws/farfield/model.py b/modelscope/models/audio/kws/farfield/model.py index fff88805..670ac97c 100644 --- a/modelscope/models/audio/kws/farfield/model.py +++ b/modelscope/models/audio/kws/farfield/model.py @@ -11,6 +11,7 @@ from modelscope.models.builder import MODELS from modelscope.utils.audio.audio_utils import update_conf from modelscope.utils.constant import Tasks from .fsmn_sele_v2 import FSMNSeleNetV2 +from .fsmn_sele_v3 import FSMNSeleNetV3 @MODELS.register_module( @@ -18,6 +19,7 @@ from .fsmn_sele_v2 import FSMNSeleNetV2 class FSMNSeleNetV2Decorator(TorchModel): r""" A decorator of FSMNSeleNetV2 for integrating into modelscope framework """ + MODEL_CLASS = FSMNSeleNetV2 MODEL_TXT = 'model.txt' SC_CONFIG = 'sound_connect.conf' @@ -33,7 +35,7 @@ class FSMNSeleNetV2Decorator(TorchModel): """ super().__init__(model_dir, *args, **kwargs) if training: - self.model = FSMNSeleNetV2(*args, **kwargs) + self.model = self.MODEL_CLASS(*args, **kwargs) else: sc_config_file = os.path.join(model_dir, self.SC_CONFIG) model_txt_file = os.path.join(model_dir, self.MODEL_TXT) @@ -42,7 +44,7 @@ class FSMNSeleNetV2Decorator(TorchModel): self._sc = None if os.path.exists(model_txt_file): - conf_dict = dict(mode=56542, kws_model=model_txt_file) + conf_dict = dict(kws_model=model_txt_file) update_conf(sc_config_file, new_config_file, conf_dict) import py_sound_connect self._sc = py_sound_connect.SoundConnect(new_config_file) @@ -50,8 +52,8 @@ class FSMNSeleNetV2Decorator(TorchModel): self.size_out = self._sc.bytesPerBlockOut() else: raise Exception( - f'Invalid model directory! Failed to load model file: {model_txt_file}.' - ) + f'Invalid model directory! Failed to load model file:' + f' {model_txt_file}.') def __del__(self): if hasattr(self, 'tmp_dir'): @@ -73,3 +75,24 @@ class FSMNSeleNetV2Decorator(TorchModel): 'confidence': self._sc.kwsConfidence() } return result + + +@MODELS.register_module( + Tasks.keyword_spotting, + module_name=Models.speech_dfsmn_kws_char_farfield_iot) +class FSMNSeleNetV3Decorator(FSMNSeleNetV2Decorator): + r""" A decorator of FSMNSeleNetV3 for integrating into modelscope framework """ + + MODEL_CLASS = FSMNSeleNetV3 + + def __init__(self, + model_dir: str, + training: Optional[bool] = False, + *args, + **kwargs): + """initialize the dfsmn model from the `model_dir` path. + + Args: + model_dir (str): the model path. + """ + super().__init__(model_dir, training, *args, **kwargs) diff --git a/modelscope/models/audio/sv/DTDNN.py b/modelscope/models/audio/sv/DTDNN.py index d9e21ce8..d86d6799 100644 --- a/modelscope/models/audio/sv/DTDNN.py +++ b/modelscope/models/audio/sv/DTDNN.py @@ -76,11 +76,13 @@ class CAMPPlus(nn.Module): bn_size=4, init_channels=128, config_str='batchnorm-relu', - memory_efficient=True): + memory_efficient=True, + output_level='segment'): super(CAMPPlus, self).__init__() self.head = FCM(feat_dim=feat_dim) channels = self.head.out_channels + self.output_level = output_level self.xvector = nn.Sequential( OrderedDict([ @@ -118,10 +120,14 @@ class CAMPPlus(nn.Module): self.xvector.add_module('out_nonlinear', get_nonlinear(config_str, channels)) - self.xvector.add_module('stats', StatsPool()) - self.xvector.add_module( - 'dense', - DenseLayer(channels * 2, embedding_size, config_str='batchnorm_')) + if self.output_level == 'segment': + self.xvector.add_module('stats', StatsPool()) + self.xvector.add_module( + 'dense', + DenseLayer( + channels * 2, embedding_size, config_str='batchnorm_')) + else: + assert self.output_level == 'frame', '`output_level` should be set to \'segment\' or \'frame\'. ' for m in self.modules(): if isinstance(m, (nn.Conv1d, nn.Linear)): @@ -133,6 +139,8 @@ class CAMPPlus(nn.Module): x = x.permute(0, 2, 1) # (B,T,F) => (B,F,T) x = self.head(x) x = self.xvector(x) + if self.output_level == 'frame': + x = x.transpose(1, 2) return x diff --git a/modelscope/models/audio/sv/ERes2Net.py b/modelscope/models/audio/sv/ERes2Net.py new file mode 100644 index 00000000..615be064 --- /dev/null +++ b/modelscope/models/audio/sv/ERes2Net.py @@ -0,0 +1,344 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +""" Res2Net implementation is adapted from https://github.com/wenet-e2e/wespeaker. + ERes2Net incorporates both local and global feature fusion techniques to improve the performance. The local feature + fusion (LFF) fuses the features within one single residual block to extract the local signal. + The global feature fusion (GFF) takes acoustic features of different scales as input to aggregate global signal. +""" +import math +import os +from typing import Any, Dict, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchaudio.compliance.kaldi as Kaldi + +import modelscope.models.audio.sv.pooling_layers as pooling_layers +from modelscope.metainfo import Models +from modelscope.models import MODELS, TorchModel +from modelscope.models.audio.sv.fusion import AFF +from modelscope.utils.constant import Tasks + + +class ReLU(nn.Hardtanh): + + def __init__(self, inplace=False): + super(ReLU, self).__init__(0, 20, inplace) + + def __repr__(self): + inplace_str = 'inplace' if self.inplace else '' + return self.__class__.__name__ + ' (' \ + + inplace_str + ')' + + +def conv1x1(in_planes, out_planes, stride=1): + '1x1 convolution without padding' + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=1, + stride=stride, + padding=0, + bias=False) + + +def conv3x3(in_planes, out_planes, stride=1): + '3x3 convolution with padding' + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False) + + +class BasicBlockRes2Net(nn.Module): + expansion = 2 + + def __init__(self, in_planes, planes, stride=1, baseWidth=32, scale=2): + super(BasicBlockRes2Net, self).__init__() + width = int(math.floor(planes * (baseWidth / 64.0))) + self.conv1 = conv1x1(in_planes, width * scale, stride) + self.bn1 = nn.BatchNorm2d(width * scale) + self.nums = scale + + convs = [] + bns = [] + for i in range(self.nums): + convs.append(conv3x3(width, width)) + bns.append(nn.BatchNorm2d(width)) + self.convs = nn.ModuleList(convs) + self.bns = nn.ModuleList(bns) + self.relu = ReLU(inplace=True) + + self.conv3 = conv1x1(width * scale, planes * self.expansion) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion * planes: + self.shortcut = nn.Sequential( + nn.Conv2d( + in_planes, + self.expansion * planes, + kernel_size=1, + stride=stride, + bias=False), nn.BatchNorm2d(self.expansion * planes)) + self.stride = stride + self.width = width + self.scale = scale + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + spx = torch.split(out, self.width, 1) + for i in range(self.nums): + if i == 0: + sp = spx[i] + else: + sp = sp + spx[i] + sp = self.convs[i](sp) + sp = self.relu(self.bns[i](sp)) + if i == 0: + out = sp + else: + out = torch.cat((out, sp), 1) + + out = self.conv3(out) + out = self.bn3(out) + + residual = self.shortcut(x) + out += residual + out = self.relu(out) + + return out + + +class BasicBlockRes2Net_diff_AFF(nn.Module): + expansion = 2 + + def __init__(self, in_planes, planes, stride=1, baseWidth=32, scale=2): + super(BasicBlockRes2Net_diff_AFF, self).__init__() + width = int(math.floor(planes * (baseWidth / 64.0))) + self.conv1 = conv1x1(in_planes, width * scale, stride) + self.bn1 = nn.BatchNorm2d(width * scale) + self.nums = scale + + convs = [] + fuse_models = [] + bns = [] + for i in range(self.nums): + convs.append(conv3x3(width, width)) + bns.append(nn.BatchNorm2d(width)) + for j in range(self.nums - 1): + fuse_models.append(AFF(channels=width)) + + self.convs = nn.ModuleList(convs) + self.bns = nn.ModuleList(bns) + self.fuse_models = nn.ModuleList(fuse_models) + self.relu = ReLU(inplace=True) + + self.conv3 = conv1x1(width * scale, planes * self.expansion) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion * planes: + self.shortcut = nn.Sequential( + nn.Conv2d( + in_planes, + self.expansion * planes, + kernel_size=1, + stride=stride, + bias=False), nn.BatchNorm2d(self.expansion * planes)) + self.stride = stride + self.width = width + self.scale = scale + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + spx = torch.split(out, self.width, 1) + for i in range(self.nums): + if i == 0: + sp = spx[i] + else: + sp = self.fuse_models[i - 1](sp, spx[i]) + + sp = self.convs[i](sp) + sp = self.relu(self.bns[i](sp)) + if i == 0: + out = sp + else: + out = torch.cat((out, sp), 1) + + out = self.conv3(out) + out = self.bn3(out) + + residual = self.shortcut(x) + out += residual + out = self.relu(out) + + return out + + +class ERes2Net(nn.Module): + + def __init__(self, + block=BasicBlockRes2Net, + block_fuse=BasicBlockRes2Net_diff_AFF, + num_blocks=[3, 4, 6, 3], + m_channels=32, + feat_dim=80, + embed_dim=192, + pooling_func='TSTP', + two_emb_layer=False): + super(ERes2Net, self).__init__() + self.in_planes = m_channels + self.feat_dim = feat_dim + self.embed_dim = embed_dim + self.stats_dim = int(feat_dim / 8) * m_channels * 8 + self.two_emb_layer = two_emb_layer + + self.conv1 = nn.Conv2d( + 1, m_channels, kernel_size=3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(m_channels) + self.layer1 = self._make_layer( + block, m_channels, num_blocks[0], stride=1) + self.layer2 = self._make_layer( + block, m_channels * 2, num_blocks[1], stride=2) + self.layer3 = self._make_layer( + block_fuse, m_channels * 4, num_blocks[2], stride=2) + self.layer4 = self._make_layer( + block_fuse, m_channels * 8, num_blocks[3], stride=2) + + # downsampling + self.layer1_downsample = nn.Conv2d( + m_channels * 2, + m_channels * 4, + kernel_size=3, + stride=2, + padding=1, + bias=False) + self.layer2_downsample = nn.Conv2d( + m_channels * 4, + m_channels * 8, + kernel_size=3, + padding=1, + stride=2, + bias=False) + self.layer3_downsample = nn.Conv2d( + m_channels * 8, + m_channels * 16, + kernel_size=3, + padding=1, + stride=2, + bias=False) + + # bottom-up fusion + self.fuse_mode12 = AFF(channels=m_channels * 4) + self.fuse_mode123 = AFF(channels=m_channels * 8) + self.fuse_mode1234 = AFF(channels=m_channels * 16) + + self.n_stats = 1 if pooling_func == 'TAP' or pooling_func == 'TSDP' else 2 + self.pool = getattr(pooling_layers, pooling_func)( + in_dim=self.stats_dim * block.expansion) + self.seg_1 = nn.Linear(self.stats_dim * block.expansion * self.n_stats, + embed_dim) + if self.two_emb_layer: + self.seg_bn_1 = nn.BatchNorm1d(embed_dim, affine=False) + self.seg_2 = nn.Linear(embed_dim, embed_dim) + else: + self.seg_bn_1 = nn.Identity() + self.seg_2 = nn.Identity() + + def _make_layer(self, block, planes, num_blocks, stride): + strides = [stride] + [1] * (num_blocks - 1) + layers = [] + for stride in strides: + layers.append(block(self.in_planes, planes, stride)) + self.in_planes = planes * block.expansion + return nn.Sequential(*layers) + + def forward(self, x): + x = x.permute(0, 2, 1) + + x = x.unsqueeze_(1) + out = F.relu(self.bn1(self.conv1(x))) + out1 = self.layer1(out) + + # bottom-up fusion + out2 = self.layer2(out1) + out1_downsample = self.layer1_downsample(out1) + fuse_out12 = self.fuse_mode12(out2, out1_downsample) + + out3 = self.layer3(out2) + fuse_out12_downsample = self.layer2_downsample(fuse_out12) + fuse_out123 = self.fuse_mode123(out3, fuse_out12_downsample) + + out4 = self.layer4(out3) + fuse_out123_downsample = self.layer3_downsample(fuse_out123) + fuse_out1234 = self.fuse_mode1234(out4, fuse_out123_downsample) + stats = self.pool(fuse_out1234) + + embed_a = self.seg_1(stats) + if self.two_emb_layer: + out = F.relu(embed_a) + out = self.seg_bn_1(out) + embed_b = self.seg_2(out) + return embed_b + else: + return embed_a + + +@MODELS.register_module( + Tasks.speaker_verification, module_name=Models.eres2net_sv) +class SpeakerVerificationERes2Net(TorchModel): + r"""Enhanced Res2Net architecture with local and global feature fusion. ERes2Net is mainly composed + of LFF and GFF. The LFF extracts localization-preserved speaker features and strengthen the local information + interaction. GFF fuses multi-scale feature maps in bottom-up pathway to obtain global information. + Args: + model_dir: A model dir. + model_config: The model config. + """ + + def __init__(self, model_dir, model_config: Dict[str, Any], *args, + **kwargs): + super().__init__(model_dir, model_config, *args, **kwargs) + self.model_config = model_config + self.other_config = kwargs + self.feature_dim = 80 + + self.embedding_model = ERes2Net() + + pretrained_model_name = kwargs['pretrained_model'] + self.__load_check_point(pretrained_model_name) + + self.embedding_model.eval() + + def forward(self, audio): + assert len(audio.shape) == 2 and audio.shape[ + 0] == 1, 'modelscope error: the shape of input audio to model needs to be [1, T]' + # audio shape: [1, T] + feature = self.__extract_feature(audio) + embedding = self.embedding_model(feature) + + return embedding + + def __extract_feature(self, audio): + feature = Kaldi.fbank(audio, num_mel_bins=self.feature_dim) + feature = feature - feature.mean(dim=0, keepdim=True) + feature = feature.unsqueeze(0) + return feature + + def __load_check_point(self, pretrained_model_name, device=None): + if not device: + device = torch.device('cpu') + self.embedding_model.load_state_dict( + torch.load( + os.path.join(self.model_dir, pretrained_model_name), + map_location=device), + strict=True) diff --git a/modelscope/models/audio/sv/fusion.py b/modelscope/models/audio/sv/fusion.py new file mode 100644 index 00000000..615529bd --- /dev/null +++ b/modelscope/models/audio/sv/fusion.py @@ -0,0 +1,32 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import torch +import torch.nn as nn + + +class AFF(nn.Module): + + def __init__(self, channels=64, r=4): + super(AFF, self).__init__() + inter_channels = int(channels // r) + + self.local_att = nn.Sequential( + nn.Conv2d( + channels * 2, + inter_channels, + kernel_size=1, + stride=1, + padding=0), + nn.BatchNorm2d(inter_channels), + nn.SiLU(inplace=True), + nn.Conv2d( + inter_channels, channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm2d(channels), + ) + + def forward(self, x, ds_y): + xa = torch.cat((x, ds_y), dim=1) + x_att = self.local_att(xa) + x_att = 1.0 + torch.tanh(x_att) + xo = torch.mul(x, x_att) + torch.mul(ds_y, 2.0 - x_att) + + return xo diff --git a/modelscope/models/audio/sv/pooling_layers.py b/modelscope/models/audio/sv/pooling_layers.py new file mode 100644 index 00000000..0fdc44ca --- /dev/null +++ b/modelscope/models/audio/sv/pooling_layers.py @@ -0,0 +1,107 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +""" This implementation is adapted from https://github.com/wenet-e2e/wespeaker. +""" +import torch +import torch.nn as nn + + +class TAP(nn.Module): + """ + Temporal average pooling, only first-order mean is considered + """ + + def __init__(self, **kwargs): + super(TAP, self).__init__() + + def forward(self, x): + pooling_mean = x.mean(dim=-1) + # To be compatable with 2D input + pooling_mean = pooling_mean.flatten(start_dim=1) + return pooling_mean + + +class TSDP(nn.Module): + """ + Temporal standard deviation pooling, only second-order std is considered + """ + + def __init__(self, **kwargs): + super(TSDP, self).__init__() + + def forward(self, x): + # The last dimension is the temporal axis + pooling_std = torch.sqrt(torch.var(x, dim=-1) + 1e-8) + pooling_std = pooling_std.flatten(start_dim=1) + return pooling_std + + +class TSTP(nn.Module): + """ + Temporal statistics pooling, concatenate mean and std, which is used in + x-vector + Comment: simple concatenation can not make full use of both statistics + """ + + def __init__(self, **kwargs): + super(TSTP, self).__init__() + + def forward(self, x): + # The last dimension is the temporal axis + pooling_mean = x.mean(dim=-1) + pooling_std = torch.sqrt(torch.var(x, dim=-1) + 1e-8) + pooling_mean = pooling_mean.flatten(start_dim=1) + pooling_std = pooling_std.flatten(start_dim=1) + + stats = torch.cat((pooling_mean, pooling_std), 1) + return stats + + +class ASTP(nn.Module): + """ Attentive statistics pooling: Channel- and context-dependent + statistics pooling, first used in ECAPA_TDNN. + """ + + def __init__(self, in_dim, bottleneck_dim=128, global_context_att=False): + super(ASTP, self).__init__() + self.global_context_att = global_context_att + + # Use Conv1d with stride == 1 rather than Linear, then we don't + # need to transpose inputs. + if global_context_att: + self.linear1 = nn.Conv1d( + in_dim * 3, bottleneck_dim, + kernel_size=1) # equals W and b in the paper + else: + self.linear1 = nn.Conv1d( + in_dim, bottleneck_dim, + kernel_size=1) # equals W and b in the paper + self.linear2 = nn.Conv1d( + bottleneck_dim, in_dim, + kernel_size=1) # equals V and k in the paper + + def forward(self, x): + """ + x: a 3-dimensional tensor in tdnn-based architecture (B,F,T) + or a 4-dimensional tensor in resnet architecture (B,C,F,T) + 0-dim: batch-dimension, last-dim: time-dimension (frame-dimension) + """ + if len(x.shape) == 4: + x = x.reshape(x.shape[0], x.shape[1] * x.shape[2], x.shape[3]) + assert len(x.shape) == 3 + + if self.global_context_att: + context_mean = torch.mean(x, dim=-1, keepdim=True).expand_as(x) + context_std = torch.sqrt( + torch.var(x, dim=-1, keepdim=True) + 1e-10).expand_as(x) + x_in = torch.cat((x, context_mean, context_std), dim=1) + else: + x_in = x + + # DON'T use ReLU here! ReLU may be hard to converge. + alpha = torch.tanh( + self.linear1(x_in)) # alpha = F.relu(self.linear1(x_in)) + alpha = torch.softmax(self.linear2(alpha), dim=2) + mean = torch.sum(alpha * x, dim=2) + var = torch.sum(alpha * (x**2), dim=2) - mean**2 + std = torch.sqrt(var.clamp(min=1e-10)) + return torch.cat([mean, std], dim=1) diff --git a/modelscope/models/audio/sv/rdino.py b/modelscope/models/audio/sv/rdino.py new file mode 100644 index 00000000..0d51ee7a --- /dev/null +++ b/modelscope/models/audio/sv/rdino.py @@ -0,0 +1,573 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +""" This ECAPA-TDNN implementation is adapted from https://github.com/speechbrain/speechbrain. + RDINOHead implementation is adapted from DINO framework. +""" +import math +import os +from typing import Any, Dict, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchaudio.compliance.kaldi as Kaldi + +from modelscope.metainfo import Models +from modelscope.models import MODELS, TorchModel +from modelscope.utils.constant import Tasks + + +def length_to_mask(length, max_len=None, dtype=None, device=None): + assert len(length.shape) == 1 + + if max_len is None: + max_len = length.max().long().item() + mask = torch.arange( + max_len, device=length.device, dtype=length.dtype).expand( + len(length), max_len) < length.unsqueeze(1) + + if dtype is None: + dtype = length.dtype + + if device is None: + device = length.device + + mask = torch.as_tensor(mask, dtype=dtype, device=device) + return mask + + +def get_padding_elem(L_in: int, stride: int, kernel_size: int, dilation: int): + if stride > 1: + n_steps = math.ceil(((L_in - kernel_size * dilation) / stride) + 1) + L_out = stride * (n_steps - 1) + kernel_size * dilation + padding = [kernel_size // 2, kernel_size // 2] + + else: + L_out = (L_in - dilation * (kernel_size - 1) - 1) // stride + 1 + + padding = [(L_in - L_out) // 2, (L_in - L_out) // 2] + return padding + + +class Conv1d(nn.Module): + + def __init__( + self, + out_channels, + kernel_size, + in_channels, + stride=1, + dilation=1, + padding='same', + groups=1, + bias=True, + padding_mode='reflect', + ): + super().__init__() + self.kernel_size = kernel_size + self.stride = stride + self.dilation = dilation + self.padding = padding + self.padding_mode = padding_mode + + self.conv = nn.Conv1d( + in_channels, + out_channels, + self.kernel_size, + stride=self.stride, + dilation=self.dilation, + padding=0, + groups=groups, + bias=bias, + ) + + def forward(self, x): + if self.padding == 'same': + x = self._manage_padding(x, self.kernel_size, self.dilation, + self.stride) + + elif self.padding == 'causal': + num_pad = (self.kernel_size - 1) * self.dilation + x = F.pad(x, (num_pad, 0)) + + elif self.padding == 'valid': + pass + + else: + raise ValueError( + "Padding must be 'same', 'valid' or 'causal'. Got " + + self.padding) + + wx = self.conv(x) + + return wx + + def _manage_padding( + self, + x, + kernel_size: int, + dilation: int, + stride: int, + ): + L_in = x.shape[-1] + padding = get_padding_elem(L_in, stride, kernel_size, dilation) + x = F.pad(x, padding, mode=self.padding_mode) + + return x + + +class BatchNorm1d(nn.Module): + + def __init__( + self, + input_size, + eps=1e-05, + momentum=0.1, + ): + super().__init__() + self.norm = nn.BatchNorm1d( + input_size, + eps=eps, + momentum=momentum, + ) + + def forward(self, x): + return self.norm(x) + + +class TDNNBlock(nn.Module): + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + dilation, + activation=nn.ReLU, + groups=1, + ): + super(TDNNBlock, self).__init__() + self.conv = Conv1d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + dilation=dilation, + groups=groups, + ) + self.activation = activation() + self.norm = BatchNorm1d(input_size=out_channels) + + def forward(self, x): + return self.norm(self.activation(self.conv(x))) + + +class Res2NetBlock(torch.nn.Module): + + def __init__(self, + in_channels, + out_channels, + scale=8, + kernel_size=3, + dilation=1): + super(Res2NetBlock, self).__init__() + assert in_channels % scale == 0 + assert out_channels % scale == 0 + + in_channel = in_channels // scale + hidden_channel = out_channels // scale + + self.blocks = nn.ModuleList([ + TDNNBlock( + in_channel, + hidden_channel, + kernel_size=kernel_size, + dilation=dilation, + ) for i in range(scale - 1) + ]) + self.scale = scale + + def forward(self, x): + y = [] + for i, x_i in enumerate(torch.chunk(x, self.scale, dim=1)): + if i == 0: + y_i = x_i + elif i == 1: + y_i = self.blocks[i - 1](x_i) + else: + y_i = self.blocks[i - 1](x_i + y_i) + y.append(y_i) + y = torch.cat(y, dim=1) + return y + + +class SEBlock(nn.Module): + + def __init__(self, in_channels, se_channels, out_channels): + super(SEBlock, self).__init__() + + self.conv1 = Conv1d( + in_channels=in_channels, out_channels=se_channels, kernel_size=1) + self.relu = torch.nn.ReLU(inplace=True) + self.conv2 = Conv1d( + in_channels=se_channels, out_channels=out_channels, kernel_size=1) + self.sigmoid = torch.nn.Sigmoid() + + def forward(self, x, lengths=None): + L = x.shape[-1] + if lengths is not None: + mask = length_to_mask(lengths * L, max_len=L, device=x.device) + mask = mask.unsqueeze(1) + total = mask.sum(dim=2, keepdim=True) + s = (x * mask).sum(dim=2, keepdim=True) / total + else: + s = x.mean(dim=2, keepdim=True) + + s = self.relu(self.conv1(s)) + s = self.sigmoid(self.conv2(s)) + + return s * x + + +class AttentiveStatisticsPooling(nn.Module): + + def __init__(self, channels, attention_channels=128, global_context=True): + super().__init__() + + self.eps = 1e-12 + self.global_context = global_context + if global_context: + self.tdnn = TDNNBlock(channels * 3, attention_channels, 1, 1) + else: + self.tdnn = TDNNBlock(channels, attention_channels, 1, 1) + self.tanh = nn.Tanh() + self.conv = Conv1d( + in_channels=attention_channels, + out_channels=channels, + kernel_size=1) + + def forward(self, x, lengths=None): + L = x.shape[-1] + + def _compute_statistics(x, m, dim=2, eps=self.eps): + mean = (m * x).sum(dim) + std = torch.sqrt( + (m * (x - mean.unsqueeze(dim)).pow(2)).sum(dim).clamp(eps)) + return mean, std + + if lengths is None: + lengths = torch.ones(x.shape[0], device=x.device) + + # Make binary mask of shape [N, 1, L] + mask = length_to_mask(lengths * L, max_len=L, device=x.device) + mask = mask.unsqueeze(1) + + # Expand the temporal context of the pooling layer by allowing the + # self-attention to look at global properties of the utterance. + if self.global_context: + # torch.std is unstable for backward computation + # https://github.com/pytorch/pytorch/issues/4320 + total = mask.sum(dim=2, keepdim=True).float() + mean, std = _compute_statistics(x, mask / total) + mean = mean.unsqueeze(2).repeat(1, 1, L) + std = std.unsqueeze(2).repeat(1, 1, L) + attn = torch.cat([x, mean, std], dim=1) + else: + attn = x + + # Apply layers + attn = self.conv(self.tanh(self.tdnn(attn))) + + # Filter out zero-paddings + attn = attn.masked_fill(mask == 0, float('-inf')) + + attn = F.softmax(attn, dim=2) + mean, std = _compute_statistics(x, attn) + # Append mean and std of the batch + pooled_stats = torch.cat((mean, std), dim=1) + pooled_stats = pooled_stats.unsqueeze(2) + + return pooled_stats + + +class SERes2NetBlock(nn.Module): + + def __init__( + self, + in_channels, + out_channels, + res2net_scale=8, + se_channels=128, + kernel_size=1, + dilation=1, + activation=torch.nn.ReLU, + groups=1, + ): + super().__init__() + self.out_channels = out_channels + self.tdnn1 = TDNNBlock( + in_channels, + out_channels, + kernel_size=1, + dilation=1, + activation=activation, + groups=groups, + ) + self.res2net_block = Res2NetBlock(out_channels, out_channels, + res2net_scale, kernel_size, dilation) + self.tdnn2 = TDNNBlock( + out_channels, + out_channels, + kernel_size=1, + dilation=1, + activation=activation, + groups=groups, + ) + self.se_block = SEBlock(out_channels, se_channels, out_channels) + + self.shortcut = None + if in_channels != out_channels: + self.shortcut = Conv1d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + ) + + def forward(self, x, lengths=None): + residual = x + if self.shortcut: + residual = self.shortcut(x) + + x = self.tdnn1(x) + x = self.res2net_block(x) + x = self.tdnn2(x) + x = self.se_block(x, lengths) + + return x + residual + + +class ECAPA_TDNN(nn.Module): + """An implementation of the speaker embedding model in a paper. + "ECAPA-TDNN: Emphasized Channel Attention, Propagation and Aggregation in + TDNN Based Speaker Verification" (https://arxiv.org/abs/2005.07143). + """ + + def __init__( + self, + input_size, + device='cpu', + lin_neurons=512, + activation=torch.nn.ReLU, + channels=[512, 512, 512, 512, 1536], + kernel_sizes=[5, 3, 3, 3, 1], + dilations=[1, 2, 3, 4, 1], + attention_channels=128, + res2net_scale=8, + se_channels=128, + global_context=True, + groups=[1, 1, 1, 1, 1], + ): + + super().__init__() + assert len(channels) == len(kernel_sizes) + assert len(channels) == len(dilations) + self.channels = channels + self.blocks = nn.ModuleList() + + # The initial TDNN layer + self.blocks.append( + TDNNBlock( + input_size, + channels[0], + kernel_sizes[0], + dilations[0], + activation, + groups[0], + )) + + # SE-Res2Net layers + for i in range(1, len(channels) - 1): + self.blocks.append( + SERes2NetBlock( + channels[i - 1], + channels[i], + res2net_scale=res2net_scale, + se_channels=se_channels, + kernel_size=kernel_sizes[i], + dilation=dilations[i], + activation=activation, + groups=groups[i], + )) + + # Multi-layer feature aggregation + self.mfa = TDNNBlock( + channels[-1], + channels[-1], + kernel_sizes[-1], + dilations[-1], + activation, + groups=groups[-1], + ) + + # Attentive Statistical Pooling + self.asp = AttentiveStatisticsPooling( + channels[-1], + attention_channels=attention_channels, + global_context=global_context, + ) + self.asp_bn = BatchNorm1d(input_size=channels[-1] * 2) + + # Final linear transformation + self.fc = Conv1d( + in_channels=channels[-1] * 2, + out_channels=lin_neurons, + kernel_size=1, + ) + + def forward(self, x, lengths=None): + """Returns the embedding vector. + + Arguments + --------- + x : torch.Tensor + Tensor of shape (batch, time, channel). + """ + x = x.transpose(1, 2) + + xl = [] + for layer in self.blocks: + try: + x = layer(x, lengths=lengths) + except TypeError: + x = layer(x) + xl.append(x) + + # Multi-layer feature aggregation + x = torch.cat(xl[1:], dim=1) + x = self.mfa(x) + + # Attentive Statistical Pooling + x = self.asp(x, lengths=lengths) + x = self.asp_bn(x) + + # Final linear transformation + x = self.fc(x) + + x = x.transpose(1, 2).squeeze(1) + return x + + +class RDINOHead(nn.Module): + + def __init__(self, + in_dim, + out_dim, + use_bn=False, + norm_last_layer=True, + nlayers=3, + hidden_dim=2048, + bottleneck_dim=256, + add_dim=8192): + super().__init__() + nlayers = max(nlayers, 1) + if nlayers == 1: + self.mlp = nn.Linear(in_dim, bottleneck_dim) + else: + layers = [nn.Linear(in_dim, hidden_dim)] + if use_bn: + layers.append(nn.BatchNorm1d(hidden_dim)) + layers.append(nn.GELU()) + for _ in range(nlayers - 2): + layers.append(nn.Linear(hidden_dim, hidden_dim)) + if use_bn: + layers.append(nn.BatchNorm1d(hidden_dim)) + layers.append(nn.GELU()) + + layers.append(nn.Linear(hidden_dim, add_dim)) + self.mlp = nn.Sequential(*layers) + self.add_layer = nn.Linear(add_dim, bottleneck_dim) + self.apply(self._init_weights) + self.last_layer = nn.utils.weight_norm( + nn.Linear(bottleneck_dim, out_dim, bias=False)) + self.last_layer.weight_g.data.fill_(1) + if norm_last_layer: + self.last_layer.weight_g.requires_grad = False + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + torch.nn.init.trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + + def forward(self, x): + vicr_out = self.mlp(x) + x = self.add_layer(vicr_out) + x = nn.functional.normalize(x, dim=-1, p=2) + x = self.last_layer(x) + return vicr_out, x + + +class Combine(nn.Module): + + def __init__(self, backbone, head): + super(Combine, self).__init__() + self.backbone = backbone + self.head = head + + def forward(self, x): + x = self.backbone(x) + output = self.head(x) + return output + + +@MODELS.register_module( + Tasks.speaker_verification, module_name=Models.rdino_tdnn_sv) +class SpeakerVerification_RDINO(TorchModel): + + def __init__(self, model_dir, model_config: Dict[str, Any], *args, + **kwargs): + super().__init__(model_dir, model_config, *args, **kwargs) + self.model_config = model_config + self.other_config = kwargs + if self.model_config['channel'] != 1024: + raise ValueError( + 'modelscope error: Currently only 1024-channel ecapa tdnn is supported.' + ) + + self.feature_dim = 80 + channels_config = [1024, 1024, 1024, 1024, 3072] + + self.embedding_model = ECAPA_TDNN( + self.feature_dim, channels=channels_config) + self.embedding_model = Combine(self.embedding_model, + RDINOHead(512, 65536, True)) + + pretrained_model_name = kwargs['pretrained_model'] + self.__load_check_point(pretrained_model_name) + + self.embedding_model.eval() + + def forward(self, audio): + assert len(audio.shape) == 2 and audio.shape[ + 0] == 1, 'modelscope error: the shape of input audio to model needs to be [1, T]' + # audio shape: [1, T] + feature = self.__extract_feature(audio) + embedding = self.embedding_model.backbone(feature) + + return embedding + + def __extract_feature(self, audio): + feature = Kaldi.fbank(audio, num_mel_bins=self.feature_dim) + feature = feature - feature.mean(dim=0, keepdim=True) + feature = feature.unsqueeze(0) + return feature + + def __load_check_point(self, pretrained_model_name, device=None): + if not device: + device = torch.device('cpu') + state_dict = torch.load( + os.path.join(self.model_dir, pretrained_model_name), + map_location=device) + state_dict_tea = { + k.replace('module.', ''): v + for k, v in state_dict['teacher'].items() + } + self.embedding_model.load_state_dict(state_dict_tea, strict=True) diff --git a/modelscope/models/audio/sv/speaker_change_locator.py b/modelscope/models/audio/sv/speaker_change_locator.py new file mode 100644 index 00000000..c22e4c1b --- /dev/null +++ b/modelscope/models/audio/sv/speaker_change_locator.py @@ -0,0 +1,319 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import os +from collections import OrderedDict +from typing import Any, Dict, Union + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchaudio.compliance.kaldi as Kaldi + +from modelscope.metainfo import Models +from modelscope.models import MODELS, TorchModel +from modelscope.models.audio.sv.DTDNN import CAMPPlus +from modelscope.utils.constant import Tasks + + +class MultiHeadSelfAttention(nn.Module): + + def __init__(self, n_units, h=8, dropout=0.1): + super(MultiHeadSelfAttention, self).__init__() + self.linearQ = nn.Linear(n_units, n_units) + self.linearK = nn.Linear(n_units, n_units) + self.linearV = nn.Linear(n_units, n_units) + self.linearO = nn.Linear(n_units, n_units) + self.d_k = n_units // h + self.h = h + self.dropout = nn.Dropout(p=dropout) + self.att = None + + def forward(self, x, batch_size): + # x: (BT, F) + q = self.linearQ(x).reshape(batch_size, -1, self.h, self.d_k) + k = self.linearK(x).reshape(batch_size, -1, self.h, self.d_k) + v = self.linearV(x).reshape(batch_size, -1, self.h, self.d_k) + scores = torch.matmul(q.transpose(1, 2), k.permute( + 0, 2, 3, 1)) / np.sqrt(self.d_k) + # scores: (B, h, T, T) + self.att = F.softmax(scores, dim=3) + p_att = self.dropout(self.att) + # v : (B, T, h, d_k) + # p_att : (B, h, T, T) + x = torch.matmul(p_att, v.transpose(1, 2)) + # x : (B, h, T, d_k) + x = x.transpose(1, 2).reshape(-1, self.h * self.d_k) + return self.linearO(x) + + +class PositionwiseFeedForward(nn.Module): + + def __init__(self, n_units, d_units, dropout): + super(PositionwiseFeedForward, self).__init__() + self.linear1 = nn.Linear(n_units, d_units) + self.linear2 = nn.Linear(d_units, n_units) + self.dropout = nn.Dropout(p=dropout) + + def forward(self, x): + return self.linear2(self.dropout(F.relu(self.linear1(x)))) + + +class PosEncoding(nn.Module): + + def __init__(self, max_seq_len, d_word_vec): + super(PosEncoding, self).__init__() + pos_enc = np.array([[ + pos / np.power(10000, 2.0 * (j // 2) / d_word_vec) + for j in range(d_word_vec) + ] for pos in range(max_seq_len)]) + pos_enc[:, 0::2] = np.sin(pos_enc[:, 0::2]) + pos_enc[:, 1::2] = np.cos(pos_enc[:, 1::2]) + pad_row = np.zeros([1, d_word_vec]) + pos_enc = np.concatenate([pad_row, pos_enc]).astype(np.float32) + + self.pos_enc = torch.nn.Embedding(max_seq_len + 1, d_word_vec) + self.pos_enc.weight = torch.nn.Parameter( + torch.from_numpy(pos_enc), requires_grad=False) + + def forward(self, input_len): + max_len = torch.max(input_len) + input_pos = torch.LongTensor([ + list(range(1, len + 1)) + [0] * (max_len - len) + for len in input_len + ]) + + return self.pos_enc(input_pos) + + +class TransformerEncoder(nn.Module): + + def __init__(self, + idim, + n_units=256, + n_layers=2, + e_units=512, + h=4, + dropout=0.1): + super(TransformerEncoder, self).__init__() + self.linear_in = nn.Linear(idim, n_units) + self.lnorm_in = nn.LayerNorm(n_units) + + self.n_layers = n_layers + self.dropout = nn.Dropout(p=dropout) + for i in range(n_layers): + setattr(self, '{}{:d}'.format('lnorm1_', i), nn.LayerNorm(n_units)) + setattr(self, '{}{:d}'.format('self_att_', i), + MultiHeadSelfAttention(n_units, h)) + setattr(self, '{}{:d}'.format('lnorm2_', i), nn.LayerNorm(n_units)) + setattr(self, '{}{:d}'.format('ff_', i), + PositionwiseFeedForward(n_units, e_units, dropout)) + self.lnorm_out = nn.LayerNorm(n_units) + + def forward(self, x): + # x: [B, num_anchors, T, n_in] + bs, num, tframe, dim = x.size() + x = x.reshape(bs * num, tframe, -1) # [B*num_anchors, T, dim] + # x: (B, T, F) ... batch, time, (mel)freq + B_size, T_size, _ = x.shape + # e: (BT, F) + e = self.linear_in(x.reshape(B_size * T_size, -1)) + # Encoder stack + for i in range(self.n_layers): + # layer normalization + e = getattr(self, '{}{:d}'.format('lnorm1_', i))(e) + # self-attention + s = getattr(self, '{}{:d}'.format('self_att_', i))(e, x.shape[0]) + # residual + e = e + self.dropout(s) + # layer normalization + e = getattr(self, '{}{:d}'.format('lnorm2_', i))(e) + # positionwise feed-forward + s = getattr(self, '{}{:d}'.format('ff_', i))(e) + # residual + e = e + self.dropout(s) + # final layer normalization + # output: (BT, F) + # output: (B, F, T) + output = self.lnorm_out(e).reshape(B_size, T_size, -1) + output = output.reshape(bs, num, tframe, + -1) # [B, num_anchors, T, dim] + return output + + +class TransformerEncoder_out(nn.Module): + + def __init__(self, + idim, + n_units=256, + n_layers=2, + e_units=512, + h=4, + dropout=0.1): + super(TransformerEncoder_out, self).__init__() + self.linear_in = nn.Linear(idim, n_units) + self.lnorm_in = nn.LayerNorm(n_units) + + self.n_layers = n_layers + self.dropout = nn.Dropout(p=dropout) + for i in range(n_layers): + setattr(self, '{}{:d}'.format('lnorm1_', i), nn.LayerNorm(n_units)) + setattr(self, '{}{:d}'.format('self_att_', i), + MultiHeadSelfAttention(n_units, h)) + setattr(self, '{}{:d}'.format('lnorm2_', i), nn.LayerNorm(n_units)) + setattr(self, '{}{:d}'.format('ff_', i), + PositionwiseFeedForward(n_units, e_units, dropout)) + self.lnorm_out = nn.LayerNorm(n_units) + + def forward(self, x): + # x: (B, T, F) + B_size, T_size, _ = x.shape + # e: (BT, F) + e = self.linear_in(x.reshape(B_size * T_size, -1)) + # Encoder stack + for i in range(self.n_layers): + # layer normalization + e = getattr(self, '{}{:d}'.format('lnorm1_', i))(e) + # self-attention + s = getattr(self, '{}{:d}'.format('self_att_', i))(e, x.shape[0]) + # residual + e = e + self.dropout(s) + # layer normalization + e = getattr(self, '{}{:d}'.format('lnorm2_', i))(e) + # positionwise feed-forward + s = getattr(self, '{}{:d}'.format('ff_', i))(e) + # residual + e = e + self.dropout(s) + # final layer normalization + # output: (BT, F) + # output: (B, T, F) + output = self.lnorm_out(e).reshape(B_size, T_size, -1) + return output + + +class OutLayer(nn.Module): + + def __init__(self, n_units=256, num_anchors=2): + super(OutLayer, self).__init__() + self.combine = TransformerEncoder_out(num_anchors * n_units, n_units) + self.out_linear = nn.Linear(n_units // num_anchors, 1) + + def forward(self, input): + # input: [B, num_anchors, T, dim] + bs, num, tframe, dim = input.size() + output = input.permute(0, 2, 1, + 3).reshape(bs, tframe, + -1) # [Bs, t, num_anchors*dim] + output = self.combine(output) # [Bs, t, n_units] + output = output.reshape( + bs, tframe, num, -1) # [Bs, t, num_anchors, n_units//num_anchors] + output = self.out_linear(output).squeeze(-1) # [Bs, t, num_anchors] + + return output + + +class TransformerDetector(nn.Module): + + def __init__(self, + frame_dim=512, + anchor_dim=192, + hidden_dim=256, + max_seq_len=1000): + super(TransformerDetector, self).__init__() + self.detection = TransformerEncoder( + idim=frame_dim + anchor_dim, n_units=hidden_dim) + self.output = OutLayer(n_units=hidden_dim) + self.pos_enc = PosEncoding(max_seq_len, hidden_dim) + + def forward(self, feats, anchors): + # feats: [1, t, fdim] + num_frames = feats.shape[1] + num_anchors = anchors.shape[1] + bs = feats.shape[0] + feats = feats.unsqueeze(1).repeat( + 1, num_anchors, 1, 1) # shape: [Bs, num_anchors, t, fdim] + anchors = anchors.unsqueeze(2).repeat( + 1, 1, num_frames, 1) # shape: [Bs, num_anchors, t, xdim] + sd_in = torch.cat((feats, anchors), + dim=-1) # shape: [Bs, num_anchors, t, fdim+xdim] + sd_out = self.detection(sd_in) # shape: [Bs, num_anchors, t, sd_dim] + + # pos + pos_emb = self.pos_enc(torch.tensor([num_frames] * (bs * num_anchors))) + pos_emb = pos_emb.reshape(bs, num_anchors, num_frames, -1) + sd_out += pos_emb + + # output + output = self.output(sd_out) # shape: [Bs, t, num_anchors] + + return output + + +@MODELS.register_module(Tasks.speaker_diarization, module_name=Models.scl_sd) +class SpeakerChangeLocatorTransformer(TorchModel): + r"""A speaekr change locator using the transformer architecture as the backbone. + Args: + model_dir: A model dir. + model_config: The model config. + """ + + def __init__(self, model_dir, model_config: Dict[str, Any], *args, + **kwargs): + super().__init__(model_dir, model_config, *args, **kwargs) + self.model_config = model_config + + self.feature_dim = self.model_config['fbank_dim'] + frame_size = self.model_config['frame_size'] + anchor_size = self.model_config['anchor_size'] + + self.encoder = CAMPPlus(self.feature_dim, output_level='frame') + self.backend = TransformerDetector( + frame_dim=frame_size, anchor_dim=anchor_size) + + pretrained_encoder = kwargs['pretrained_encoder'] + pretrained_backend = kwargs['pretrained_backend'] + + self.__load_check_point(pretrained_encoder, pretrained_backend) + + self.encoder.eval() + self.backend.eval() + + def forward(self, audio, anchors): + assert len(audio.shape) == 2 and audio.shape[ + 0] == 1, 'modelscope error: the shape of input audio to model needs to be [1, T]' + assert len( + anchors.shape + ) == 3 and anchors.shape[0] == 1 and anchors.shape[ + 1] == 2, 'modelscope error: the shape of input anchors to model needs to be [1, 2, D]' + # audio shape: [1, T] + feature = self.__extract_feature(audio) + frame_state = self.encoder(feature) + output = self.backend(frame_state, anchors) + output = output.squeeze(0).detach().cpu().sigmoid() + + time_scale_factor = int(np.ceil(feature.shape[1] / output.shape[0])) + output = output.unsqueeze(1).expand(-1, time_scale_factor, + -1).reshape(-1, output.shape[-1]) + return output + + def __extract_feature(self, audio): + feature = Kaldi.fbank(audio, num_mel_bins=self.feature_dim) + feature = feature - feature.mean(dim=0, keepdim=True) + feature = feature.unsqueeze(0) + return feature + + def __load_check_point(self, + pretrained_encoder, + pretrained_backend, + device=None): + if not device: + device = torch.device('cpu') + self.encoder.load_state_dict( + torch.load( + os.path.join(self.model_dir, pretrained_encoder), + map_location=device)) + + self.backend.load_state_dict( + torch.load( + os.path.join(self.model_dir, pretrained_backend), + map_location=device)) diff --git a/modelscope/models/audio/tts/voice.py b/modelscope/models/audio/tts/voice.py index 645a528f..ed9edf43 100644 --- a/modelscope/models/audio/tts/voice.py +++ b/modelscope/models/audio/tts/voice.py @@ -17,11 +17,9 @@ from kantts.train.trainer import GAN_Trainer, Sambert_Trainer, distributed_init from kantts.utils.ling_unit.ling_unit import KanTtsLinguisticUnit from torch.utils.data import DataLoader -from modelscope import __version__ from modelscope.utils.audio.audio_utils import TtsCustomParams from modelscope.utils.audio.tts_exceptions import ( TtsModelConfigurationException, TtsModelNotExistsException) -from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.logger import get_logger logger = get_logger() @@ -394,6 +392,7 @@ class Voice: logger.info(f'TRAINING steps: {train_max_steps}') config['create_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) + from modelscope import __version__ config['modelscope_version'] = __version__ with open(os.path.join(stage_dir, 'config.yaml'), 'w') as f: @@ -558,6 +557,7 @@ class Voice: logger.info(f'resume from: {resume_from}') config['create_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) + from modelscope import __version__ config['modelscope_version'] = __version__ with open(os.path.join(stage_dir, 'config.yaml'), 'w') as f: diff --git a/modelscope/models/cv/__init__.py b/modelscope/models/cv/__init__.py index 21487216..39acec69 100644 --- a/modelscope/models/cv/__init__.py +++ b/modelscope/models/cv/__init__.py @@ -4,9 +4,8 @@ from . import (action_recognition, animal_recognition, bad_image_detecting, body_2d_keypoints, body_3d_keypoints, cartoon, cmdssl_video_embedding, controllable_image_generation, - crowd_counting, face_2d_keypoints, face_detection, - face_generation, face_reconstruction, human_reconstruction, - human_wholebody_keypoint, image_classification, + crowd_counting, face_detection, face_generation, + face_reconstruction, human_reconstruction, image_classification, image_color_enhance, image_colorization, image_defrcn_fewshot, image_denoise, image_inpainting, image_instance_segmentation, image_matching, image_mvs_depth_estimation, diff --git a/modelscope/models/cv/body_2d_keypoints/hrnet_v2.py b/modelscope/models/cv/body_2d_keypoints/hrnet_v2.py index ebd69adb..19e426b2 100644 --- a/modelscope/models/cv/body_2d_keypoints/hrnet_v2.py +++ b/modelscope/models/cv/body_2d_keypoints/hrnet_v2.py @@ -72,7 +72,7 @@ class PoseHighResolutionNetV2(TorchModel): self.stage4, pre_stage_channels = self._make_stage( self.stage4_cfg, num_channels, multi_scale_output=True) """final four layers""" - last_inp_channels = np.int(np.sum(pre_stage_channels)) + last_inp_channels = int(np.sum(pre_stage_channels)) self.final_layer = nn.Sequential( nn.Conv2d( in_channels=last_inp_channels, diff --git a/modelscope/models/cv/cartoon/facelib/face_landmark.py b/modelscope/models/cv/cartoon/facelib/face_landmark.py index 3b7cc1b9..3c53f3a6 100644 --- a/modelscope/models/cv/cartoon/facelib/face_landmark.py +++ b/modelscope/models/cv/cartoon/facelib/face_landmark.py @@ -81,7 +81,7 @@ class FaceLandmark: bbox[2] = center[0] + one_edge // 2 bbox[3] = center[1] + one_edge // 2 - bbox = bbox.astype(np.int) + bbox = bbox.astype(int) crop_image = bimg[bbox[1]:bbox[3], bbox[0]:bbox[2], :] h, w, _ = crop_image.shape crop_image = cv2.resize( diff --git a/modelscope/models/cv/crowd_counting/hrnet_aspp_relu.py b/modelscope/models/cv/crowd_counting/hrnet_aspp_relu.py index 0d1bd3ca..64f40da0 100644 --- a/modelscope/models/cv/crowd_counting/hrnet_aspp_relu.py +++ b/modelscope/models/cv/crowd_counting/hrnet_aspp_relu.py @@ -356,7 +356,7 @@ class HighResolutionNet(nn.Module): num_channels) self.stage3, pre_stage_channels = self._make_stage( self.stage3_cfg, num_channels) - last_inp_channels = np.int(np.sum(pre_stage_channels)) + 256 + last_inp_channels = int(np.sum(pre_stage_channels)) + 256 self.redc_layer = nn.Sequential( nn.Conv2d( in_channels=last_inp_channels, diff --git a/modelscope/models/cv/easycv_base.py b/modelscope/models/cv/easycv_base.py deleted file mode 100644 index 7bc35e84..00000000 --- a/modelscope/models/cv/easycv_base.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from easycv.models.base import BaseModel -from easycv.utils.ms_utils import EasyCVMeta - -from modelscope.models.base import TorchModel - - -class EasyCVBaseModel(BaseModel, TorchModel): - """Base model for EasyCV.""" - - def __init__(self, model_dir=None, args=(), kwargs={}): - kwargs.pop(EasyCVMeta.ARCH, None) # pop useless keys - BaseModel.__init__(self) - TorchModel.__init__(self, model_dir=model_dir) - - def forward(self, img, mode='train', **kwargs): - if self.training: - losses = self.forward_train(img, **kwargs) - loss, log_vars = self._parse_losses(losses) - return dict(loss=loss, log_vars=log_vars) - else: - return self.forward_test(img, **kwargs) - - def __call__(self, *args, **kwargs): - return self.forward(*args, **kwargs) diff --git a/modelscope/models/cv/face_2d_keypoints/__init__.py b/modelscope/models/cv/face_2d_keypoints/__init__.py deleted file mode 100644 index 636ba0f4..00000000 --- a/modelscope/models/cv/face_2d_keypoints/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from typing import TYPE_CHECKING - -from modelscope.utils.import_utils import LazyImportModule - -if TYPE_CHECKING: - from .face_2d_keypoints_align import Face2DKeypoints - -else: - _import_structure = {'face_2d_keypoints_align': ['Face2DKeypoints']} - - import sys - - sys.modules[__name__] = LazyImportModule( - __name__, - globals()['__file__'], - _import_structure, - module_spec=__spec__, - extra_objects={}, - ) diff --git a/modelscope/models/cv/face_2d_keypoints/face_2d_keypoints_align.py b/modelscope/models/cv/face_2d_keypoints/face_2d_keypoints_align.py deleted file mode 100644 index 468662a0..00000000 --- a/modelscope/models/cv/face_2d_keypoints/face_2d_keypoints_align.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from easycv.models.face.face_keypoint import FaceKeypoint - -from modelscope.metainfo import Models -from modelscope.models.builder import MODELS -from modelscope.models.cv.easycv_base import EasyCVBaseModel -from modelscope.utils.constant import Tasks - - -@MODELS.register_module( - group_key=Tasks.face_2d_keypoints, module_name=Models.face_2d_keypoints) -class Face2DKeypoints(EasyCVBaseModel, FaceKeypoint): - - def __init__(self, model_dir=None, *args, **kwargs): - EasyCVBaseModel.__init__(self, model_dir, args, kwargs) - FaceKeypoint.__init__(self, *args, **kwargs) diff --git a/modelscope/models/cv/face_detection/peppa_pig_face/face_landmark.py b/modelscope/models/cv/face_detection/peppa_pig_face/face_landmark.py index 03a3b5b7..e7e2ddaf 100644 --- a/modelscope/models/cv/face_detection/peppa_pig_face/face_landmark.py +++ b/modelscope/models/cv/face_detection/peppa_pig_face/face_landmark.py @@ -82,7 +82,7 @@ class FaceLandmark: bbox[2] = center[0] + one_edge // 2 bbox[3] = center[1] + one_edge // 2 - bbox = bbox.astype(np.int) + bbox = bbox.astype(int) crop_image = bimg[bbox[1]:bbox[3], bbox[0]:bbox[2], :] h, w, _ = crop_image.shape crop_image = cv2.resize(crop_image, diff --git a/modelscope/models/cv/hand_2d_keypoints/__init__.py b/modelscope/models/cv/hand_2d_keypoints/__init__.py deleted file mode 100644 index 2b06f19a..00000000 --- a/modelscope/models/cv/hand_2d_keypoints/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from typing import TYPE_CHECKING - -from modelscope.utils.import_utils import LazyImportModule - -if TYPE_CHECKING: - from .hand_2d_keypoints import Hand2dKeyPoints - -else: - _import_structure = {'hand_2d_keypoints': ['Hand2dKeyPoints']} - - import sys - - sys.modules[__name__] = LazyImportModule( - __name__, - globals()['__file__'], - _import_structure, - module_spec=__spec__, - extra_objects={}, - ) diff --git a/modelscope/models/cv/hand_2d_keypoints/hand_2d_keypoints.py b/modelscope/models/cv/hand_2d_keypoints/hand_2d_keypoints.py deleted file mode 100644 index 15a97c30..00000000 --- a/modelscope/models/cv/hand_2d_keypoints/hand_2d_keypoints.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from easycv.models.pose import TopDown - -from modelscope.metainfo import Models -from modelscope.models.builder import MODELS -from modelscope.models.cv.easycv_base import EasyCVBaseModel -from modelscope.utils.constant import Tasks - - -@MODELS.register_module( - group_key=Tasks.hand_2d_keypoints, module_name=Models.hand_2d_keypoints) -class Hand2dKeyPoints(EasyCVBaseModel, TopDown): - - def __init__(self, model_dir=None, *args, **kwargs): - EasyCVBaseModel.__init__(self, model_dir, args, kwargs) - TopDown.__init__(self, *args, **kwargs) diff --git a/modelscope/models/cv/human_reconstruction/models/human_segmenter.py b/modelscope/models/cv/human_reconstruction/models/human_segmenter.py index 3f0261e7..29bf6f70 100644 --- a/modelscope/models/cv/human_reconstruction/models/human_segmenter.py +++ b/modelscope/models/cv/human_reconstruction/models/human_segmenter.py @@ -31,7 +31,7 @@ class human_segmenter(object): img = np.dstack((img, img, img)) elif img.shape[2] == 4: img = img[:, :, :3] - img = img.astype(np.float) + img = img.astype(float) return img def run(self, img): diff --git a/modelscope/models/cv/human_reconstruction/utils.py b/modelscope/models/cv/human_reconstruction/utils.py index 45653dc6..67e1efdb 100644 --- a/modelscope/models/cv/human_reconstruction/utils.py +++ b/modelscope/models/cv/human_reconstruction/utils.py @@ -69,8 +69,8 @@ def eval_grid(coords, num_samples=512 * 512 * 512): resolution = coords.shape[1:4] sdf = np.zeros(resolution) - dirty = np.ones(resolution, dtype=np.bool) - grid_mask = np.zeros(resolution, dtype=np.bool) + dirty = np.ones(resolution, dtype=bool) + grid_mask = np.zeros(resolution, dtype=bool) reso = resolution[0] // init_resolution while reso > 0: diff --git a/modelscope/models/cv/human_wholebody_keypoint/human_wholebody_keypoint.py b/modelscope/models/cv/human_wholebody_keypoint/human_wholebody_keypoint.py deleted file mode 100644 index dd3c0290..00000000 --- a/modelscope/models/cv/human_wholebody_keypoint/human_wholebody_keypoint.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from easycv.models.pose.top_down import TopDown - -from modelscope.metainfo import Models -from modelscope.models.builder import MODELS -from modelscope.models.cv.easycv_base import EasyCVBaseModel -from modelscope.utils.constant import Tasks - - -@MODELS.register_module( - group_key=Tasks.human_wholebody_keypoint, - module_name=Models.human_wholebody_keypoint) -class HumanWholeBodyKeypoint(EasyCVBaseModel, TopDown): - - def __init__(self, model_dir=None, *args, **kwargs): - EasyCVBaseModel.__init__(self, model_dir, args, kwargs) - TopDown.__init__(self, *args, **kwargs) diff --git a/modelscope/models/cv/image_defrcn_fewshot/utils/voc_register.py b/modelscope/models/cv/image_defrcn_fewshot/utils/voc_register.py index 7a94066e..0b043493 100644 --- a/modelscope/models/cv/image_defrcn_fewshot/utils/voc_register.py +++ b/modelscope/models/cv/image_defrcn_fewshot/utils/voc_register.py @@ -163,7 +163,7 @@ def load_filtered_voc_instances(name: str, root: str, dirname: str, split: str, os.path.join(split_dir, 'box_{}shot_{}_train.txt'.format(shot, cls))) as f: - fileids_ = np.loadtxt(f, dtype=np.str).tolist() + fileids_ = np.loadtxt(f, dtype=np.str_).tolist() if isinstance(fileids_, str): fileids_ = [fileids_] fileids_ = [ @@ -219,7 +219,7 @@ def load_filtered_voc_instances(name: str, root: str, dirname: str, split: str, with PathManager.open( os.path.join(root, dirname, 'ImageSets', 'Main', split + '.txt')) as f: - fileids = np.loadtxt(f, dtype=np.str) + fileids = np.loadtxt(f, dtype=np.str_) for fileid in fileids: anno_file = os.path.join(root, dirname, 'Annotations', diff --git a/modelscope/models/cv/image_instance_segmentation/__init__.py b/modelscope/models/cv/image_instance_segmentation/__init__.py index 60e688eb..8041a7e7 100644 --- a/modelscope/models/cv/image_instance_segmentation/__init__.py +++ b/modelscope/models/cv/image_instance_segmentation/__init__.py @@ -8,10 +8,12 @@ if TYPE_CHECKING: from .maskdino_swin import MaskDINOSwin from .model import CascadeMaskRCNNSwinModel from .maskdino_model import MaskDINOSwinModel + from .fastinst_model import FastInst from .postprocess_utils import get_img_ins_seg_result, get_maskdino_ins_seg_result else: _import_structure = { 'cascade_mask_rcnn_swin': ['CascadeMaskRCNNSwin'], + 'fastinst_model': ['FastInst'], 'maskdino_swin': ['MaskDINOSwin'], 'model': ['CascadeMaskRCNNSwinModel'], 'maskdino_model': ['MaskDINOSwinModel'], diff --git a/modelscope/models/cv/image_instance_segmentation/backbones/__init__.py b/modelscope/models/cv/image_instance_segmentation/backbones/__init__.py index bbeac51e..1e7325f3 100644 --- a/modelscope/models/cv/image_instance_segmentation/backbones/__init__.py +++ b/modelscope/models/cv/image_instance_segmentation/backbones/__init__.py @@ -6,10 +6,12 @@ from modelscope.utils.import_utils import LazyImportModule if TYPE_CHECKING: from .swin_transformer import SwinTransformer from .swin_transformer import D2SwinTransformer + from .resnet import build_resnet_backbone else: _import_structure = { 'swin_transformer': ['SwinTransformer', 'D2SwinTransformer'], + 'resnet': ['build_resnet_backbone'] } import sys diff --git a/modelscope/models/cv/image_instance_segmentation/backbones/resnet.py b/modelscope/models/cv/image_instance_segmentation/backbones/resnet.py new file mode 100644 index 00000000..4e2a5ec1 --- /dev/null +++ b/modelscope/models/cv/image_instance_segmentation/backbones/resnet.py @@ -0,0 +1,114 @@ +# Part of the implementation is borrowed and modified from Detectron2, publicly available at +# https://github.com/facebookresearch/detectron2/blob/main/projects/DeepLab/deeplab/resnet.py + +import torch.nn.functional as F +from torch import nn + +from modelscope.models.cv.image_human_parsing.backbone.deeplab_resnet import ( + BottleneckBlock, DeeplabResNet, get_norm) +from modelscope.models.cv.image_instance_segmentation.maskdino.utils import \ + Conv2d + + +class BasicStem(nn.Module): + """ + The standard ResNet stem (layers before the first residual block), + with a conv, relu and max_pool. + """ + + def __init__(self, in_channels=3, out_channels=64, norm='BN'): + """ + Args: + norm (str or callable): norm after the first conv layer. + See :func:`layers.get_norm` for supported format. + """ + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.stride = 4 + self.conv1 = Conv2d( + in_channels, + out_channels, + kernel_size=7, + stride=2, + padding=3, + bias=False, + norm=get_norm(norm, out_channels), + ) + + def forward(self, x): + x = self.conv1(x) + x = F.relu_(x) + x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) + return x + + +def build_resnet_backbone(out_features, depth, num_groups, width_per_group, + norm, stem_out_channels, res2_out_channels, + stride_in_1x1, res4_dilation, res5_dilation, + res5_multi_grid, input_shape): + stem = BasicStem( + in_channels=input_shape['channels'], + out_channels=stem_out_channels, + norm=norm) + bottleneck_channels = num_groups * width_per_group + in_channels = stem_out_channels + out_channels = res2_out_channels + + assert res4_dilation in { + 1, 2 + }, 'res4_dilation cannot be {}.'.format(res4_dilation) + assert res5_dilation in { + 1, 2, 4 + }, 'res5_dilation cannot be {}.'.format(res5_dilation) + if res4_dilation == 2: + # Always dilate res5 if res4 is dilated. + assert res5_dilation == 4 + + num_blocks_per_stage = { + 50: [3, 4, 6, 3], + 101: [3, 4, 23, 3], + 152: [3, 8, 36, 3] + }[depth] + + stages = [] + out_stage_idx = [{ + 'res2': 2, + 'res3': 3, + 'res4': 4, + 'res5': 5 + }[f] for f in out_features] + max_stage_idx = max(out_stage_idx) + for idx, stage_idx in enumerate(range(2, max_stage_idx + 1)): + if stage_idx == 4: + dilation = res4_dilation + elif stage_idx == 5: + dilation = res5_dilation + else: + dilation = 1 + first_stride = 1 if idx == 0 or dilation > 1 else 2 + stride_per_block = [first_stride] + stride_per_block += [1] * (num_blocks_per_stage[idx] - 1) + stage_kargs = { + 'num_blocks': num_blocks_per_stage[idx], + 'stride_per_block': stride_per_block, + 'in_channels': in_channels, + 'out_channels': out_channels, + 'norm': norm, + 'bottleneck_channels': bottleneck_channels, + 'stride_in_1x1': stride_in_1x1, + 'dilation': dilation, + 'num_groups': num_groups, + 'block_class': BottleneckBlock + } + if stage_idx == 5: + stage_kargs.pop('dilation') + stage_kargs['dilation_per_block'] = [ + dilation * mg for mg in res5_multi_grid + ] + blocks = DeeplabResNet.make_stage(**stage_kargs) + in_channels = out_channels + out_channels *= 2 + bottleneck_channels *= 2 + stages.append(blocks) + return DeeplabResNet(stem, stages, out_features=out_features) diff --git a/modelscope/models/cv/image_instance_segmentation/fastinst/__init__.py b/modelscope/models/cv/image_instance_segmentation/fastinst/__init__.py new file mode 100644 index 00000000..b937315b --- /dev/null +++ b/modelscope/models/cv/image_instance_segmentation/fastinst/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. diff --git a/modelscope/models/cv/image_instance_segmentation/fastinst/fastinst_decoder.py b/modelscope/models/cv/image_instance_segmentation/fastinst/fastinst_decoder.py new file mode 100644 index 00000000..aa4300f6 --- /dev/null +++ b/modelscope/models/cv/image_instance_segmentation/fastinst/fastinst_decoder.py @@ -0,0 +1,351 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import math + +import torch +from torch import nn +from torch.nn import functional as F + +from modelscope.models.cv.image_colorization.ddcolor.utils.transformer_utils import ( + MLP, CrossAttentionLayer, FFNLayer, SelfAttentionLayer) + + +class QueryProposal(nn.Module): + + def __init__(self, num_features, num_queries, num_classes): + super().__init__() + self.topk = num_queries + self.num_classes = num_classes + + self.conv_proposal_cls_logits = nn.Sequential( + nn.Conv2d( + num_features, num_features, kernel_size=3, stride=1, + padding=1), + nn.ReLU(inplace=True), + nn.Conv2d( + num_features, + num_classes + 1, + kernel_size=1, + stride=1, + padding=0), + ) + + @torch.no_grad() + def compute_coordinates(self, x): + h, w = x.size(2), x.size(3) + y_loc = torch.linspace(0, 1, h, device=x.device) + x_loc = torch.linspace(0, 1, w, device=x.device) + y_loc, x_loc = torch.meshgrid(y_loc, x_loc) + locations = torch.stack([x_loc, y_loc], 0).unsqueeze(0) + return locations + + def seek_local_maximum(self, x, epsilon=1e-6): + """ + inputs: + x: torch.tensor, shape [b, c, h, w] + return: + torch.tensor, shape [b, c, h, w] + """ + x_pad = F.pad(x, (1, 1, 1, 1), 'constant', 0) + # top, bottom, left, right, top-left, top-right, bottom-left, bottom-right + maximum = (x >= x_pad[:, :, :-2, 1:-1]) & \ + (x >= x_pad[:, :, 2:, 1:-1]) & \ + (x >= x_pad[:, :, 1:-1, :-2]) & \ + (x >= x_pad[:, :, 1:-1, 2:]) & \ + (x >= x_pad[:, :, :-2, :-2]) & \ + (x >= x_pad[:, :, :-2, 2:]) & \ + (x >= x_pad[:, :, 2:, :-2]) & \ + (x >= x_pad[:, :, 2:, 2:]) & \ + (x >= epsilon) + return maximum.to(x) + + def forward(self, x, pos_embeddings): + + proposal_cls_logits = self.conv_proposal_cls_logits(x) # b, c, h, w + proposal_cls_probs = proposal_cls_logits.softmax(dim=1) # b, c, h, w + proposal_cls_one_hot = F.one_hot( + proposal_cls_probs[:, :-1, :, :].max(1)[1], + num_classes=self.num_classes + 1).permute(0, 3, 1, 2) # b, c, h, w + proposal_cls_probs = proposal_cls_probs.mul(proposal_cls_one_hot) + proposal_local_maximum_map = self.seek_local_maximum( + proposal_cls_probs) # b, c, h, w + proposal_cls_probs = proposal_cls_probs + proposal_local_maximum_map # b, c, h, w + + # top-k indices + topk_indices = torch.topk( + proposal_cls_probs[:, :-1, :, :].flatten(2).max(1)[0], + self.topk, + dim=1)[1] # b, q + topk_indices = topk_indices.unsqueeze(1) # b, 1, q + + # topk queries + topk_proposals = torch.gather( + x.flatten(2), dim=2, index=topk_indices.repeat(1, x.shape[1], + 1)) # b, c, q + pos_embeddings = pos_embeddings.repeat(x.shape[0], 1, 1, 1).flatten(2) + topk_pos_embeddings = torch.gather( + pos_embeddings, + dim=2, + index=topk_indices.repeat(1, pos_embeddings.shape[1], + 1)) # b, c, q + if self.training: + locations = self.compute_coordinates(x).repeat(x.shape[0], 1, 1, 1) + topk_locations = torch.gather( + locations.flatten(2), + dim=2, + index=topk_indices.repeat(1, locations.shape[1], 1)) + topk_locations = topk_locations.transpose(-1, -2) # b, q, 2 + else: + topk_locations = None + return topk_proposals, topk_pos_embeddings, topk_locations, proposal_cls_logits + + +class FastInstDecoder(nn.Module): + + def __init__(self, in_channels, *, num_classes: int, hidden_dim: int, + num_queries: int, num_aux_queries: int, nheads: int, + dim_feedforward: int, dec_layers: int, pre_norm: bool, + mask_dim: int): + """ + Args: + in_channels: channels of the input features + num_classes: number of classes + hidden_dim: Transformer feature dimension + num_queries: number of queries + num_aux_queries: number of auxiliary queries + nheads: number of heads + dim_feedforward: feature dimension in feedforward network + dec_layers: number of Transformer decoder layers + pre_norm: whether to use pre-LayerNorm or not + mask_dim: mask feature dimension + """ + super().__init__() + self.num_heads = nheads + self.num_layers = dec_layers + self.num_queries = num_queries + self.num_aux_queries = num_aux_queries + self.num_classes = num_classes + + meta_pos_size = int(round(math.sqrt(self.num_queries))) + self.meta_pos_embed = nn.Parameter( + torch.empty(1, hidden_dim, meta_pos_size, meta_pos_size)) + if num_aux_queries > 0: + self.empty_query_features = nn.Embedding(num_aux_queries, + hidden_dim) + self.empty_query_pos_embed = nn.Embedding(num_aux_queries, + hidden_dim) + + self.query_proposal = QueryProposal(hidden_dim, num_queries, + num_classes) + + self.transformer_query_cross_attention_layers = nn.ModuleList() + self.transformer_query_self_attention_layers = nn.ModuleList() + self.transformer_query_ffn_layers = nn.ModuleList() + self.transformer_mask_cross_attention_layers = nn.ModuleList() + self.transformer_mask_ffn_layers = nn.ModuleList() + for idx in range(self.num_layers): + self.transformer_query_cross_attention_layers.append( + CrossAttentionLayer( + d_model=hidden_dim, + nhead=nheads, + dropout=0.0, + normalize_before=pre_norm)) + self.transformer_query_self_attention_layers.append( + SelfAttentionLayer( + d_model=hidden_dim, + nhead=nheads, + dropout=0.0, + normalize_before=pre_norm)) + self.transformer_query_ffn_layers.append( + FFNLayer( + d_model=hidden_dim, + dim_feedforward=dim_feedforward, + dropout=0.0, + normalize_before=pre_norm)) + self.transformer_mask_cross_attention_layers.append( + CrossAttentionLayer( + d_model=hidden_dim, + nhead=nheads, + dropout=0.0, + normalize_before=pre_norm)) + self.transformer_mask_ffn_layers.append( + FFNLayer( + d_model=hidden_dim, + dim_feedforward=dim_feedforward, + dropout=0.0, + normalize_before=pre_norm)) + + self.decoder_query_norm_layers = nn.ModuleList() + self.class_embed_layers = nn.ModuleList() + self.mask_embed_layers = nn.ModuleList() + self.mask_features_layers = nn.ModuleList() + for idx in range(self.num_layers + 1): + self.decoder_query_norm_layers.append(nn.LayerNorm(hidden_dim)) + self.class_embed_layers.append( + MLP(hidden_dim, hidden_dim, num_classes + 1, 3)) + self.mask_embed_layers.append( + MLP(hidden_dim, hidden_dim, mask_dim, 3)) + self.mask_features_layers.append(nn.Linear(hidden_dim, mask_dim)) + + def forward(self, x, mask_features, targets=None): + bs = x[0].shape[0] + proposal_size = x[1].shape[-2:] + pixel_feature_size = x[2].shape[-2:] + + pixel_pos_embeds = F.interpolate( + self.meta_pos_embed, + size=pixel_feature_size, + mode='bilinear', + align_corners=False) + proposal_pos_embeds = F.interpolate( + self.meta_pos_embed, + size=proposal_size, + mode='bilinear', + align_corners=False) + + pixel_features = x[2].flatten(2).permute(2, 0, 1) + pixel_pos_embeds = pixel_pos_embeds.flatten(2).permute(2, 0, 1) + + query_features, query_pos_embeds, query_locations, proposal_cls_logits = self.query_proposal( + x[1], proposal_pos_embeds) + query_features = query_features.permute(2, 0, 1) + query_pos_embeds = query_pos_embeds.permute(2, 0, 1) + if self.num_aux_queries > 0: + aux_query_features = self.empty_query_features.weight.unsqueeze( + 1).repeat(1, bs, 1) + aux_query_pos_embed = self.empty_query_pos_embed.weight.unsqueeze( + 1).repeat(1, bs, 1) + query_features = torch.cat([query_features, aux_query_features], + dim=0) + query_pos_embeds = torch.cat( + [query_pos_embeds, aux_query_pos_embed], dim=0) + + outputs_class, outputs_mask, attn_mask, _, _ = self.forward_prediction_heads( + query_features, + pixel_features, + pixel_feature_size, + -1, + return_attn_mask=True) + predictions_class = [outputs_class] + predictions_mask = [outputs_mask] + predictions_matching_index = [None] + query_feature_memory = [query_features] + pixel_feature_memory = [pixel_features] + + for i in range(self.num_layers): + query_features, pixel_features = self.forward_one_layer( + query_features, pixel_features, query_pos_embeds, + pixel_pos_embeds, attn_mask, i) + if i < self.num_layers - 1: + outputs_class, outputs_mask, attn_mask, _, _ = self.forward_prediction_heads( + query_features, + pixel_features, + pixel_feature_size, + i, + return_attn_mask=True, + ) + else: + outputs_class, outputs_mask, _, matching_indices, gt_attn_mask = self.forward_prediction_heads( + query_features, + pixel_features, + pixel_feature_size, + i, + ) + predictions_class.append(outputs_class) + predictions_mask.append(outputs_mask) + predictions_matching_index.append(None) + query_feature_memory.append(query_features) + pixel_feature_memory.append(pixel_features) + + out = { + 'proposal_cls_logits': + proposal_cls_logits, + 'query_locations': + query_locations, + 'pred_logits': + predictions_class[-1], + 'pred_masks': + predictions_mask[-1], + 'pred_indices': + predictions_matching_index[-1], + 'aux_outputs': + self._set_aux_loss(predictions_class, predictions_mask, + predictions_matching_index, query_locations) + } + return out + + def forward_one_layer(self, query_features, pixel_features, + query_pos_embeds, pixel_pos_embeds, attn_mask, i): + pixel_features = self.transformer_mask_cross_attention_layers[i]( + pixel_features, + query_features, + query_pos=pixel_pos_embeds, + pos=query_pos_embeds) + pixel_features = self.transformer_mask_ffn_layers[i](pixel_features) + + query_features = self.transformer_query_cross_attention_layers[i]( + query_features, + pixel_features, + memory_mask=attn_mask, + query_pos=query_pos_embeds, + pos=pixel_pos_embeds) + query_features = self.transformer_query_self_attention_layers[i]( + query_features, query_pos=query_pos_embeds) + query_features = self.transformer_query_ffn_layers[i](query_features) + return query_features, pixel_features + + def forward_prediction_heads(self, + query_features, + pixel_features, + pixel_feature_size, + idx_layer, + return_attn_mask=False, + return_gt_attn_mask=False, + targets=None, + query_locations=None): + decoder_query_features = self.decoder_query_norm_layers[idx_layer + 1]( + query_features[:self.num_queries]) + decoder_query_features = decoder_query_features.transpose(0, 1) + if idx_layer + 1 == self.num_layers: + outputs_class = self.class_embed_layers[idx_layer + 1]( + decoder_query_features) + else: + outputs_class = None + outputs_mask_embed = self.mask_embed_layers[idx_layer + 1]( + decoder_query_features) + outputs_mask_features = self.mask_features_layers[idx_layer + 1]( + pixel_features.transpose(0, 1)) + + outputs_mask = torch.einsum('bqc,blc->bql', outputs_mask_embed, + outputs_mask_features) + outputs_mask = outputs_mask.reshape(-1, self.num_queries, + *pixel_feature_size) + + if return_attn_mask: + # outputs_mask.shape: b, q, h, w + attn_mask = F.pad(outputs_mask, + (0, 0, 0, 0, 0, self.num_aux_queries), + 'constant', 1) + attn_mask = (attn_mask < 0.).flatten(2) # b, q, hw + invalid_query = attn_mask.all(-1, keepdim=True) # b, q, 1 + attn_mask = (~invalid_query) & attn_mask # b, q, hw + attn_mask = attn_mask.unsqueeze(1).repeat(1, self.num_heads, 1, + 1).flatten(0, 1) + attn_mask = attn_mask.detach() + else: + attn_mask = None + + matching_indices = None + gt_attn_mask = None + + return outputs_class, outputs_mask, attn_mask, matching_indices, gt_attn_mask + + @torch.jit.unused + def _set_aux_loss(self, outputs_class, outputs_seg_masks, output_indices, + output_query_locations): + return [{ + 'query_locations': output_query_locations, + 'pred_logits': a, + 'pred_masks': b, + 'pred_matching_indices': c + } for a, b, c in zip(outputs_class[:-1], outputs_seg_masks[:-1], + output_indices[:-1])] diff --git a/modelscope/models/cv/image_instance_segmentation/fastinst/fastinst_encoder.py b/modelscope/models/cv/image_instance_segmentation/fastinst/fastinst_encoder.py new file mode 100644 index 00000000..46b3f74d --- /dev/null +++ b/modelscope/models/cv/image_instance_segmentation/fastinst/fastinst_encoder.py @@ -0,0 +1,180 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import logging +from typing import Callable, Optional, Union + +import torch +from torch import nn +from torch.nn import functional as F + +from modelscope.models.cv.image_instance_segmentation.maskdino.utils import \ + Conv2d + + +# This is a modified FPN decoder. +class BaseFPN(nn.Module): + + def __init__( + self, + input_shape, + *, + convs_dim: int, + mask_dim: int, + norm: Optional[Union[str, Callable]] = None, + ): + """ + Args: + input_shape: shapes (channels and stride) of the input features + convs_dim: number of output channels for the intermediate conv layers. + mask_dim: number of output channels for the final conv layer. + norm (str or callable): normalization for all conv layers + """ + super().__init__() + + input_shape = sorted(input_shape.items(), key=lambda x: x[1]['stride']) + self.in_features = [k for k, v in input_shape + ] # starting from "res3" to "res5" + feature_channels = [v['channels'] for k, v in input_shape] + + lateral_convs = [] + output_convs = [] + + use_bias = norm == '' + for idx, in_channels in enumerate(feature_channels): + lateral_norm = nn.GroupNorm(32, convs_dim) + output_norm = nn.GroupNorm(32, convs_dim) + + lateral_conv = Conv2d( + in_channels, + convs_dim, + kernel_size=1, + bias=use_bias, + norm=lateral_norm) + output_conv = Conv2d( + convs_dim, + convs_dim, + kernel_size=3, + stride=1, + padding=1, + bias=use_bias, + norm=output_norm, + activation=F.relu, + ) + self.add_module('adapter_{}'.format(idx + 1), lateral_conv) + self.add_module('layer_{}'.format(idx + 1), output_conv) + + lateral_convs.append(lateral_conv) + output_convs.append(output_conv) + # Place convs into top-down order (from low to high resolution) + # to make the top-down computation in forward clearer. + self.lateral_convs = lateral_convs[::-1] + self.output_convs = output_convs[::-1] + + self.convs_dim = convs_dim + self.num_feature_levels = 3 # always use 3 scales + + def forward_features(self, features): + multi_scale_features = [] + num_cur_levels = 0 + # Reverse feature maps into top-down order (from low to high resolution) + for idx, f in enumerate(self.in_features[::-1]): + x = features[f] + lateral_conv = self.lateral_convs[idx] + output_conv = self.output_convs[idx] + if idx == 0: + y = lateral_conv(x) + else: + cur_fpn = lateral_conv(x) + y = cur_fpn + F.interpolate( + y, + size=cur_fpn.shape[-2:], + mode='bilinear', + align_corners=False) + y = output_conv(y) + + if num_cur_levels < self.num_feature_levels: + multi_scale_features.append(y) + num_cur_levels += 1 + return None, multi_scale_features + + def forward(self, features, targets=None): + logger = logging.getLogger(__name__) + logger.warning( + 'Calling forward() may cause unpredicted behavior of PixelDecoder module.' + ) + return self.forward_features(features) + + +class PyramidPoolingModule(nn.Module): + + def __init__(self, in_channels, channels=512, sizes=(1, 2, 3, 6)): + super().__init__() + self.stages = [] + self.stages = nn.ModuleList( + [self._make_stage(in_channels, channels, size) for size in sizes]) + self.bottleneck = Conv2d(in_channels + len(sizes) * channels, + in_channels, 1) + + def _make_stage(self, features, out_features, size): + prior = nn.AdaptiveAvgPool2d(output_size=(size, size)) + conv = Conv2d(features, out_features, 1) + return nn.Sequential(prior, conv) + + def forward(self, feats): + h, w = feats.size(2), feats.size(3) + priors = [ + F.interpolate( + input=F.relu_(stage(feats)), + size=(h, w), + mode='bilinear', + align_corners=False) for stage in self.stages + ] + [feats] + out = F.relu_(self.bottleneck(torch.cat(priors, 1))) + return out + + +class PyramidPoolingModuleFPN(BaseFPN): + + def __init__( + self, + input_shape, + *, + convs_dim: int, + mask_dim: int, + norm: Optional[Union[str, Callable]] = None, + ): + """ + NOTE: this interface is experimental. + Args: + input_shape: shapes (channels and stride) of the input features + convs_dim: number of output channels for the intermediate conv layers. + mask_dim: number of output channels for the final conv layer. + norm (str or callable): normalization for all conv layers + """ + super().__init__( + input_shape, convs_dim=convs_dim, mask_dim=mask_dim, norm=norm) + self.ppm = PyramidPoolingModule(convs_dim, convs_dim // 4) + + def forward_features(self, features): + multi_scale_features = [] + num_cur_levels = 0 + # Reverse feature maps into top-down order (from low to high resolution) + for idx, f in enumerate(self.in_features[::-1]): + x = features[f] + lateral_conv = self.lateral_convs[idx] + output_conv = self.output_convs[idx] + if idx == 0: + y = self.ppm(lateral_conv(x)) + else: + cur_fpn = lateral_conv(x) + y = cur_fpn + F.interpolate( + y, + size=cur_fpn.shape[-2:], + mode='bilinear', + align_corners=False) + y = output_conv(y) + + if num_cur_levels < self.num_feature_levels: + multi_scale_features.append(y) + num_cur_levels += 1 + + return None, multi_scale_features diff --git a/modelscope/models/cv/image_instance_segmentation/fastinst_model.py b/modelscope/models/cv/image_instance_segmentation/fastinst_model.py new file mode 100644 index 00000000..f9cfbc4f --- /dev/null +++ b/modelscope/models/cv/image_instance_segmentation/fastinst_model.py @@ -0,0 +1,221 @@ +# Part of implementation is borrowed and modified from Mask2Former, publicly available at +# https://github.com/facebookresearch/Mask2Former. +import os +from typing import Any, Dict, List + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from modelscope.metainfo import Models +from modelscope.models.base import TorchModel +from modelscope.models.builder import MODELS +from modelscope.models.cv.image_instance_segmentation.maskdino_swin import \ + ImageList +from modelscope.utils.constant import ModelFile, Tasks +from modelscope.utils.logger import get_logger +from .backbones import build_resnet_backbone +from .fastinst.fastinst_decoder import FastInstDecoder +from .fastinst.fastinst_encoder import PyramidPoolingModuleFPN + +logger = get_logger() + + +@MODELS.register_module(Tasks.image_segmentation, module_name=Models.fastinst) +class FastInst(TorchModel): + + def __init__(self, + model_dir, + backbone=None, + encoder=None, + decoder=None, + pretrained=None, + classes=None, + **kwargs): + """ + Deep Learning Technique for Human Parsing: A Survey and Outlook. See https://arxiv.org/abs/2301.00394 + Args: + backbone (dict): backbone config. + encoder (dict): encoder config. + decoder (dict): decoder config. + pretrained (bool): whether to use pretrained model + classes (list): class names + """ + super(FastInst, self).__init__(model_dir, **kwargs) + + self.backbone = build_resnet_backbone( + **backbone, input_shape={'channels': 3}) + in_features = encoder.pop('in_features') + input_shape = { + k: v + for k, v in self.backbone.output_shape().items() + if k in in_features + } + encoder = PyramidPoolingModuleFPN(input_shape=input_shape, **encoder) + decoder = FastInstDecoder(in_channels=encoder.convs_dim, **decoder) + self.sem_seg_head = FastInstHead( + pixel_decoder=encoder, transformer_predictor=decoder) + + self.num_classes = decoder.num_classes + self.num_queries = decoder.num_queries + self.size_divisibility = 32 + self.register_buffer( + 'pixel_mean', + torch.Tensor([123.675, 116.28, 103.53]).view(-1, 1, 1), False) + self.register_buffer( + 'pixel_std', + torch.Tensor([58.395, 57.12, 57.375]).view(-1, 1, 1), False) + self.classes = classes + self.test_topk_per_image = 100 + + if pretrained: + model_path = os.path.join(model_dir, ModelFile.TORCH_MODEL_FILE) + logger.info(f'loading model from {model_path}') + weight = torch.load(model_path, map_location='cpu')['model'] + tgt_weight = self.state_dict() + for name in list(weight.keys()): + if name in tgt_weight: + load_size = weight[name].size() + tgt_size = tgt_weight[name].size() + mis_match = False + if len(load_size) != len(tgt_size): + mis_match = True + else: + for n1, n2 in zip(load_size, tgt_size): + if n1 != n2: + mis_match = True + break + if mis_match: + logger.info( + f'size mismatch for {name} ' + f'({load_size} -> {tgt_size}), skip loading.') + del weight[name] + else: + logger.info( + f'{name} doesn\'t exist in current model, skip loading.' + ) + + self.load_state_dict(weight, strict=False) + logger.info('load model done') + + def forward(self, batched_inputs: List[dict]) -> Dict[str, Any]: + images = [x['image'].to(self.device) for x in batched_inputs] + images = [(x - self.pixel_mean) / self.pixel_std for x in images] + images = ImageList.from_tensors(images, self.size_divisibility) + + features = self.backbone(images.tensor) + outputs = self.sem_seg_head(features) + + return dict( + outputs=outputs, batched_inputs=batched_inputs, images=images) + + def postprocess(self, input: Dict[str, Any]) -> Dict[str, Any]: + outputs = input['outputs'] + batched_inputs = input['batched_inputs'] + images = input['images'] + if self.training: + raise NotImplementedError + else: + mask_cls_results = outputs['pred_logits'] # (B, Q, C+1) + mask_pred_results = outputs['pred_masks'] # (B, Q, H, W) + # upsample masks + mask_pred_results = F.interpolate( + mask_pred_results, + size=(images.tensor.shape[-2], images.tensor.shape[-1]), + mode='bilinear', + align_corners=False, + ) + + del outputs + + processed_results = [] + for mask_cls_result, mask_pred_result, input_per_image, image_size in zip( + mask_cls_results, mask_pred_results, batched_inputs, + images.image_sizes): + height = input_per_image.get('height', image_size[0]) + width = input_per_image.get('width', image_size[1]) + processed_results.append({}) # for each image + + mask_pred_result = self.sem_seg_postprocess( + mask_pred_result, image_size, height, width) + mask_cls_result = mask_cls_result.to(mask_pred_result) + + instance_r = self.instance_inference(mask_cls_result, + mask_pred_result) + processed_results[-1]['instances'] = instance_r + + return dict(eval_result=processed_results) + + @property + def device(self): + return self.pixel_mean.device + + def sem_seg_postprocess(self, result, img_size, output_height, + output_width): + result = result[:, :img_size[0], :img_size[1]].expand(1, -1, -1, -1) + result = F.interpolate( + result, + size=(output_height, output_width), + mode='bilinear', + align_corners=False)[0] + return result + + def instance_inference(self, mask_cls, mask_pred): + # mask_pred is already processed to have the same shape as original input + image_size = mask_pred.shape[-2:] + + # [Q, K] + scores = F.softmax(mask_cls, dim=-1)[:, :-1] + labels = torch.arange( + self.num_classes, + device=self.device).unsqueeze(0).repeat(self.num_queries, + 1).flatten(0, 1) + scores_per_image, topk_indices = scores.flatten(0, 1).topk( + self.test_topk_per_image, sorted=False) + labels_per_image = labels[topk_indices] + + topk_indices = topk_indices // self.num_classes + mask_pred = mask_pred[topk_indices] + + result = {'image_size': image_size} + # mask (before sigmoid) + mask_pred_sigmoid = mask_pred.sigmoid() + result['pred_masks'] = (mask_pred_sigmoid > 0.5).float() + + # calculate average mask prob + mask_scores_per_image = (mask_pred_sigmoid.flatten(1) + * result['pred_masks'].flatten(1)).sum(1) / ( + result['pred_masks'].flatten(1).sum(1) + + 1e-6) + result['scores'] = scores_per_image * mask_scores_per_image + result['pred_classes'] = labels_per_image + return result + + +class FastInstHead(nn.Module): + + def __init__( + self, + *, + pixel_decoder: nn.Module, + # extra parameters + transformer_predictor: nn.Module): + """ + NOTE: this interface is experimental. + Args: + pixel_decoder: the pixel decoder module + transformer_predictor: the transformer decoder that makes prediction + """ + super().__init__() + self.pixel_decoder = pixel_decoder + self.predictor = transformer_predictor + + def forward(self, features, targets=None): + return self.layers(features, targets) + + def layers(self, features, targets=None): + mask_features, multi_scale_features = self.pixel_decoder.forward_features( + features) + predictions = self.predictor(multi_scale_features, mask_features, + targets) + return predictions diff --git a/modelscope/models/cv/image_instance_segmentation/postprocess_utils.py b/modelscope/models/cv/image_instance_segmentation/postprocess_utils.py index fdbb2fb0..aad7d8e9 100644 --- a/modelscope/models/cv/image_instance_segmentation/postprocess_utils.py +++ b/modelscope/models/cv/image_instance_segmentation/postprocess_utils.py @@ -108,16 +108,16 @@ def get_img_ins_seg_result(img_seg_result=None, for seg_result in img_seg_result: box = [ - np.int(seg_result[0]), - np.int(seg_result[1]), - np.int(seg_result[2]), - np.int(seg_result[3]) + int(seg_result[0]), + int(seg_result[1]), + int(seg_result[2]), + int(seg_result[3]) ] - score = np.float(seg_result[4]) + score = float(seg_result[4]) category = seg_result[5] mask = np.array(seg_result[6], order='F', dtype='uint8') - mask = mask.astype(np.float) + mask = mask.astype(float) results_dict[OutputKeys.BOXES].append(box) results_dict[OutputKeys.MASKS].append(mask) diff --git a/modelscope/models/cv/image_mvs_depth_estimation/colmap2mvsnet.py b/modelscope/models/cv/image_mvs_depth_estimation/colmap2mvsnet.py index feda4430..37d92c13 100644 --- a/modelscope/models/cv/image_mvs_depth_estimation/colmap2mvsnet.py +++ b/modelscope/models/cv/image_mvs_depth_estimation/colmap2mvsnet.py @@ -382,7 +382,7 @@ def processing_single_scene(args): points3d[p3d_id].xyz[0], points3d[p3d_id].xyz[1], points3d[p3d_id].xyz[2], 1 ]) - zs.append(np.asscalar(transformed[2])) + zs.append(transformed[2].item()) zs_sorted = sorted(zs) # relaxed depth range max_ratio = 0.1 diff --git a/modelscope/models/cv/image_mvs_depth_estimation/depth_filter.py b/modelscope/models/cv/image_mvs_depth_estimation/depth_filter.py index 16cdedf4..4ef6275a 100644 --- a/modelscope/models/cv/image_mvs_depth_estimation/depth_filter.py +++ b/modelscope/models/cv/image_mvs_depth_estimation/depth_filter.py @@ -40,7 +40,7 @@ def read_mask(filename): # save a binary mask def save_mask(filename, mask): - assert mask.dtype == np.bool + assert mask.dtype == bool mask = mask.astype(np.uint8) * 255 Image.fromarray(mask).save(filename) diff --git a/modelscope/models/cv/image_panoptic_segmentation/__init__.py b/modelscope/models/cv/image_panoptic_segmentation/__init__.py index 1af5b6f8..2b2be4b7 100644 --- a/modelscope/models/cv/image_panoptic_segmentation/__init__.py +++ b/modelscope/models/cv/image_panoptic_segmentation/__init__.py @@ -5,7 +5,6 @@ from modelscope.utils.import_utils import LazyImportModule if TYPE_CHECKING: from .panseg_model import SwinLPanopticSegmentation - from .r50_panseg_model import R50PanopticSegmentation else: _import_structure = { diff --git a/modelscope/models/cv/image_panoptic_segmentation/r50_panseg_model.py b/modelscope/models/cv/image_panoptic_segmentation/r50_panseg_model.py deleted file mode 100644 index 73b6b76c..00000000 --- a/modelscope/models/cv/image_panoptic_segmentation/r50_panseg_model.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. - -from easycv.models.segmentation import Mask2Former - -from modelscope.metainfo import Models -from modelscope.models.builder import MODELS -from modelscope.models.cv.easycv_base import EasyCVBaseModel -from modelscope.utils.constant import Tasks - - -@MODELS.register_module( - group_key=Tasks.image_segmentation, - module_name=Models.r50_panoptic_segmentation) -class R50PanopticSegmentation(EasyCVBaseModel, Mask2Former): - - def __init__(self, model_dir=None, *args, **kwargs): - EasyCVBaseModel.__init__(self, model_dir, args, kwargs) - Mask2Former.__init__(self, *args, **kwargs) diff --git a/modelscope/models/cv/image_semantic_segmentation/segformer.py b/modelscope/models/cv/image_semantic_segmentation/segformer.py deleted file mode 100644 index 46303526..00000000 --- a/modelscope/models/cv/image_semantic_segmentation/segformer.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from easycv.models.segmentation import EncoderDecoder - -from modelscope.metainfo import Models -from modelscope.models.builder import MODELS -from modelscope.models.cv.easycv_base import EasyCVBaseModel -from modelscope.utils.constant import Tasks - - -@MODELS.register_module( - group_key=Tasks.image_segmentation, module_name=Models.segformer) -class Segformer(EasyCVBaseModel, EncoderDecoder): - - def __init__(self, model_dir=None, *args, **kwargs): - EasyCVBaseModel.__init__(self, model_dir, args, kwargs) - EncoderDecoder.__init__(self, *args, **kwargs) diff --git a/modelscope/models/cv/image_semantic_segmentation/semantic_seg_model.py b/modelscope/models/cv/image_semantic_segmentation/semantic_seg_model.py index 2b38ebad..455f29fb 100644 --- a/modelscope/models/cv/image_semantic_segmentation/semantic_seg_model.py +++ b/modelscope/models/cv/image_semantic_segmentation/semantic_seg_model.py @@ -60,7 +60,7 @@ class SemanticSegmentation(TorchModel): ids = ids[legal_indices] segms = (semantic_result[None] == ids[:, None, None]) - masks = [it.astype(np.int) for it in segms] + masks = [it.astype(int) for it in segms] labels_txt = np.array(self.CLASSES)[ids].tolist() results = { diff --git a/modelscope/models/cv/image_skychange/ptsemseg/hrnet_backnone.py b/modelscope/models/cv/image_skychange/ptsemseg/hrnet_backnone.py index 66429d67..8fcb6625 100644 --- a/modelscope/models/cv/image_skychange/ptsemseg/hrnet_backnone.py +++ b/modelscope/models/cv/image_skychange/ptsemseg/hrnet_backnone.py @@ -458,7 +458,7 @@ class HrnetBackBone(nn.Module): self.stage4, pre_stage_channels = self._make_stage( self.stage4_cfg, num_channels, multi_scale_output=True) - self.backbone_last_inp_channels = np.int(np.sum(pre_stage_channels)) + self.backbone_last_inp_channels = int(np.sum(pre_stage_channels)) def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer): diff --git a/modelscope/models/cv/image_skychange/ptsemseg/hrnet_super_and_ocr.py b/modelscope/models/cv/image_skychange/ptsemseg/hrnet_super_and_ocr.py index 09768451..5dbef66e 100644 --- a/modelscope/models/cv/image_skychange/ptsemseg/hrnet_super_and_ocr.py +++ b/modelscope/models/cv/image_skychange/ptsemseg/hrnet_super_and_ocr.py @@ -259,7 +259,7 @@ class HrnetSuperAndOcr(HrnetBackBone): num_channels = [64, last_inp_channels] self.stage_super, super_stage_channels = self._make_stage( self.super_dict, num_channels) - last_inp_channels = np.int(np.sum(super_stage_channels)) + last_inp_channels = int(np.sum(super_stage_channels)) if self.is_contain_aspp: aspp_param = kwargs['aspp'] @@ -372,7 +372,7 @@ class HrnetSuperAndOcr(HrnetBackBone): num_channels = [64, ocr_mid_channels] self.stage_super, super_stage_channels = self._make_stage( self.super_dict, num_channels) - last_inp_channels = np.int(np.sum(super_stage_channels)) + last_inp_channels = int(np.sum(super_stage_channels)) self.cls_head = nn.Sequential( nn.Conv2d( diff --git a/modelscope/models/cv/movie_scene_segmentation/model.py b/modelscope/models/cv/movie_scene_segmentation/model.py index 336af3b3..818a3876 100644 --- a/modelscope/models/cv/movie_scene_segmentation/model.py +++ b/modelscope/models/cv/movie_scene_segmentation/model.py @@ -13,7 +13,8 @@ import torch.nn as nn import torch.nn.functional as F import torchvision.transforms as TF from PIL import Image -from shotdetect_scenedetect_lgss import shot_detect +from shotdetect_scenedetect_lgss import shot_detector +from tqdm import tqdm from modelscope.metainfo import Models from modelscope.models.base.base_torch_model import TorchModel @@ -60,6 +61,9 @@ class MovieSceneSegmentationModel(TorchModel): self.head_sbd = nn.Linear(hdim, 2) load_param_with_prefix('head_sbd', self.head_sbd, params) + self.shot_detector = shot_detector() + self.shot_detector.init(**self.cfg.preprocessor.shot_detect) + self.test_transform = TF.Compose([ TF.Resize(size=256, interpolation=Image.BICUBIC), TF.CenterCrop(224), @@ -98,29 +102,45 @@ class MovieSceneSegmentationModel(TorchModel): def inference(self, batch): logger.info('Begin scene detect ......') bs = self.cfg.pipeline.batch_size_per_gpu - sids = batch['sid'] - inputs = batch['shot_feat'] + device = self.crn.attention_mask.device - shot_num = len(sids) + shot_timecode_lst = batch['shot_timecode_lst'] + shot_idx_lst = batch['shot_idx_lst'] + + shot_num = len(shot_timecode_lst) cnt = math.ceil(shot_num / bs) - infer_sid, infer_pred = [], [] + infer_pred = [] infer_result = {} - for i in range(cnt): + self.shot_detector.start() + + for i in tqdm(range(cnt)): start = i * bs end = (i + 1) * bs if (i + 1) * bs < shot_num else shot_num - input_ = inputs[start:end] - sid_ = sids[start:end] - input_ = torch.stack(input_) + + batch_shot_idx_lst = shot_idx_lst[start:end] + + shot_start_idx = batch_shot_idx_lst[0][0] + shot_end_idx = batch_shot_idx_lst[-1][-1] + batch_timecode_lst = { + i: shot_timecode_lst[i] + for i in range(shot_start_idx, shot_end_idx + 1) + } + batch_shot_keyf_lst = self.shot_detector.get_frame_img( + batch_timecode_lst, shot_start_idx, shot_num) + inputs = self.get_batch_input(batch_shot_keyf_lst, shot_start_idx, + batch_shot_idx_lst) + + input_ = torch.stack(inputs).to(device) outputs = self.shared_step(input_) # shape [b,2] prob = F.softmax(outputs, dim=1) - infer_sid.extend(sid_.cpu().detach().numpy()) infer_pred.extend(prob[:, 1].cpu().detach().numpy()) - infer_result.update({'pred': np.stack(infer_pred)}) - infer_result.update({'sid': infer_sid}) - assert len(infer_result['sid']) == len(sids) - assert len(infer_result['pred']) == len(inputs) + infer_result.update({'pred': np.stack(infer_pred)}) + infer_result.update({'sid': np.arange(shot_num)}) + + assert len(infer_result['pred']) == shot_num + self.shot_detector.release() return infer_result def shared_step(self, inputs): @@ -162,38 +182,48 @@ class MovieSceneSegmentationModel(TorchModel): logger.info('Generate scene .......') pred_dict = inputs['feat'] + shot2keyf = inputs['shot2keyf'] thres = self.cfg.pipeline.save_threshold anno_dict = get_pred_boundary(pred_dict, thres) scene_dict_lst, scene_list, shot_num, shot_dict_lst = pred2scene( - self.shot2keyf, anno_dict) + shot2keyf, anno_dict) if self.cfg.pipeline.save_split_scene: re_dir = scene2video(inputs['input_video_pth'], scene_list, thres) print(f'Split scene video saved to {re_dir}') return len(scene_list), scene_dict_lst, shot_num, shot_dict_lst - def preprocess(self, inputs): - logger.info('Begin shot detect......') - shot_keyf_lst, anno, shot2keyf = shot_detect( - inputs, **self.cfg.preprocessor.shot_detect) - logger.info('Shot detect done!') + def get_batch_input(self, shot_keyf_lst, shot_start_idx, shot_idx_lst): - single_shot_feat, sid = [], [] + single_shot_feat = [] for idx, one_shot in enumerate(shot_keyf_lst): one_shot = [ self.test_transform(one_frame) for one_frame in one_shot ] one_shot = torch.stack(one_shot, dim=0) single_shot_feat.append(one_shot) - sid.append(idx) + single_shot_feat = torch.stack(single_shot_feat, dim=0) + shot_feat = [] + for idx, shot_idx in enumerate(shot_idx_lst): + shot_idx_ = shot_idx - shot_start_idx + _one_shot = single_shot_feat[shot_idx_] + shot_feat.append(_one_shot) + + return shot_feat + + def preprocess(self, inputs): + logger.info('Begin shot detect......') + shot_timecode_lst, anno, shot2keyf = self.shot_detector.shot_detect( + inputs, **self.cfg.preprocessor.shot_detect) + logger.info('Shot detect done!') + + shot_idx_lst = [] for idx, one_shot in enumerate(anno): shot_idx = int(one_shot['shot_id']) + np.arange( -self.neighbor_size, self.neighbor_size + 1) - shot_idx = np.clip(shot_idx, 0, one_shot['num_shot']) - _one_shot = single_shot_feat[shot_idx] - shot_feat.append(_one_shot) - self.shot2keyf = shot2keyf - self.anno = anno - return shot_feat, sid + shot_idx = np.clip(shot_idx, 0, one_shot['num_shot'] - 1) + shot_idx_lst.append(shot_idx) + + return shot2keyf, anno, shot_timecode_lst, shot_idx_lst diff --git a/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py b/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py index 49155716..34bebce0 100644 --- a/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py +++ b/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py @@ -10,11 +10,12 @@ from tqdm import tqdm def get_pred_boundary(pred_dict, threshold=0.5): - pred = pred_dict['pred'] + pred = pred_dict['pred'].cpu().numpy() + sid = pred_dict['sid'].cpu().numpy().astype(np.int32) tmp = (pred > threshold).astype(np.int32) anno_dict = {} for idx in range(len(tmp)): - anno_dict.update({str(pred_dict['sid'][idx]).zfill(4): int(tmp[idx])}) + anno_dict.update({str(sid[idx]).zfill(4): int(tmp[idx])}) return anno_dict diff --git a/modelscope/models/cv/nerf_recon_acc/network/segmenter.py b/modelscope/models/cv/nerf_recon_acc/network/segmenter.py index d71b9f16..e3d0ca8d 100644 --- a/modelscope/models/cv/nerf_recon_acc/network/segmenter.py +++ b/modelscope/models/cv/nerf_recon_acc/network/segmenter.py @@ -31,7 +31,7 @@ class ObjectSegmenter(object): elif img.shape[2] == 4: img = img[:, :, :3] img = img[:, :, ::-1] - img = img.astype(np.float) + img = img.astype(float) return img def run_mask(self, img): diff --git a/modelscope/models/cv/object_detection/dino.py b/modelscope/models/cv/object_detection/dino.py deleted file mode 100644 index e6c652f1..00000000 --- a/modelscope/models/cv/object_detection/dino.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from easycv.models.detection.detectors import Detection as _Detection - -from modelscope.metainfo import Models -from modelscope.models.builder import MODELS -from modelscope.models.cv.easycv_base import EasyCVBaseModel -from modelscope.utils.constant import Tasks - - -@MODELS.register_module( - group_key=Tasks.image_object_detection, module_name=Models.dino) -class DINO(EasyCVBaseModel, _Detection): - - def __init__(self, model_dir=None, *args, **kwargs): - EasyCVBaseModel.__init__(self, model_dir, args, kwargs) - _Detection.__init__(self, *args, **kwargs) diff --git a/modelscope/models/cv/object_detection/yolox_pai.py b/modelscope/models/cv/object_detection/yolox_pai.py deleted file mode 100644 index 7888cf82..00000000 --- a/modelscope/models/cv/object_detection/yolox_pai.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from easycv.models.detection.detectors import YOLOX as _YOLOX - -from modelscope.metainfo import Models -from modelscope.models.builder import MODELS -from modelscope.models.cv.easycv_base import EasyCVBaseModel -from modelscope.utils.constant import Tasks - - -@MODELS.register_module( - group_key=Tasks.image_object_detection, module_name=Models.yolox) -@MODELS.register_module( - group_key=Tasks.image_object_detection, - module_name=Models.image_object_detection_auto) -@MODELS.register_module( - group_key=Tasks.domain_specific_object_detection, module_name=Models.yolox) -class YOLOX(EasyCVBaseModel, _YOLOX): - - def __init__(self, model_dir=None, *args, **kwargs): - EasyCVBaseModel.__init__(self, model_dir, args, kwargs) - _YOLOX.__init__(self, *args, **kwargs) diff --git a/modelscope/models/cv/object_detection_3d/depe/result_vis.py b/modelscope/models/cv/object_detection_3d/depe/result_vis.py index d577ab68..efaef0b5 100644 --- a/modelscope/models/cv/object_detection_3d/depe/result_vis.py +++ b/modelscope/models/cv/object_detection_3d/depe/result_vis.py @@ -30,7 +30,7 @@ def depth2color(depth): if gray == 1: return tuple(colors[-1].tolist()) num_rank = len(colors) - 1 - rank = np.floor(gray * num_rank).astype(np.int) + rank = np.floor(gray * num_rank).astype(int) diff = (gray - rank / num_rank) * num_rank tmp = colors[rank + 1] - colors[rank] return tuple((colors[rank] + tmp * diff).tolist()) @@ -136,7 +136,7 @@ def plot_result(res_path, l2g = get_lidar2global(infos) corners_lidar = corners_global @ np.linalg.inv(l2g).T corners_lidar = corners_lidar[:, :3] - pred_flag = np.ones((corners_lidar.shape[0] // 8, ), dtype=np.bool) + pred_flag = np.ones((corners_lidar.shape[0] // 8, ), dtype=bool) scores = [ pred_res[rid]['detection_score'] for rid in range(len(pred_res)) ] @@ -151,7 +151,7 @@ def plot_result(res_path, origin=(0.5, 0.5, 0.5)).corners.numpy().reshape(-1, 3) corners_lidar = np.concatenate([corners_lidar, corners_lidar_gt], axis=0) - gt_flag = np.ones((corners_lidar_gt.shape[0] // 8), dtype=np.bool) + gt_flag = np.ones((corners_lidar_gt.shape[0] // 8), dtype=bool) pred_flag = np.concatenate( [pred_flag, np.logical_not(gt_flag)], axis=0) scores = scores + [0 for _ in range(infos['gt_boxes'].shape[0])] @@ -169,7 +169,7 @@ def plot_result(res_path, check_point_in_img(corners_img, img.shape[0], img.shape[1])) valid = valid.reshape( -1, 8) # valid means: d>0 and visible in current view - corners_img = corners_img.reshape(-1, 8, 2).astype(np.int) + corners_img = corners_img.reshape(-1, 8, 2).astype(int) for aid in range(valid.shape[0]): if scores[aid] < vis_thred and pred_flag[aid]: continue diff --git a/modelscope/models/cv/ocr_recognition/model.py b/modelscope/models/cv/ocr_recognition/model.py index 6eb13403..2406b6dc 100644 --- a/modelscope/models/cv/ocr_recognition/model.py +++ b/modelscope/models/cv/ocr_recognition/model.py @@ -90,8 +90,15 @@ class OCRRecognition(TorchModel): f'recognizer should be either ConvNextViT, CRNN, but got {cfgs.model.recognizer}' ) if model_path != '': - self.recognizer.load_state_dict( - torch.load(model_path, map_location='cpu')) + params_pretrained = torch.load(model_path, map_location='cpu') + model_dict = self.recognizer.state_dict() + # remove prefix for finetuned models + check_point = { + k.replace('recognizer.', ''): v + for k, v in params_pretrained.items() + } + model_dict.update(check_point) + self.recognizer.load_state_dict(model_dict) dict_path = os.path.join(model_dir, ModelFile.VOCAB_FILE) self.labelMapping = dict() diff --git a/modelscope/models/cv/open_vocabulary_detection_vild/vild.py b/modelscope/models/cv/open_vocabulary_detection_vild/vild.py index 999ec27a..2aea0593 100644 --- a/modelscope/models/cv/open_vocabulary_detection_vild/vild.py +++ b/modelscope/models/cv/open_vocabulary_detection_vild/vild.py @@ -176,8 +176,7 @@ class OpenVocabularyDetectionViLD(Model): # Filter out invalid rois (nmsed rois) valid_indices = np.where( np.logical_and( - np.isin( - np.arange(len(roi_scores), dtype=np.int), nmsed_indices), + np.isin(np.arange(len(roi_scores), dtype=int), nmsed_indices), np.logical_and( np.logical_not(np.all(roi_boxes == 0., axis=-1)), np.logical_and(roi_scores >= min_rpn_score_thresh, diff --git a/modelscope/models/cv/panorama_depth_estimation/networks/layers.py b/modelscope/models/cv/panorama_depth_estimation/networks/layers.py index 99e166aa..52fb3d39 100644 --- a/modelscope/models/cv/panorama_depth_estimation/networks/layers.py +++ b/modelscope/models/cv/panorama_depth_estimation/networks/layers.py @@ -72,7 +72,7 @@ class Cube2Equirec(nn.Module): self.equ_h, 0), 3 * self.equ_w // 8, 1) # Prepare ceil mask - mask = np.zeros((self.equ_h, self.equ_w // 4), np.bool) + mask = np.zeros((self.equ_h, self.equ_w // 4), bool) idx = np.linspace(-np.pi, np.pi, self.equ_w // 4) / 4 idx = self.equ_h // 2 - np.round( np.arctan(np.cos(idx)) * self.equ_h / np.pi).astype(int) diff --git a/modelscope/models/cv/video_depth_estimation/utils/depth.py b/modelscope/models/cv/video_depth_estimation/utils/depth.py index e9f287e7..5fbf6aa6 100644 --- a/modelscope/models/cv/video_depth_estimation/utils/depth.py +++ b/modelscope/models/cv/video_depth_estimation/utils/depth.py @@ -29,7 +29,7 @@ def load_depth(file): elif file.endswith('png'): depth_png = np.array(load_image(file), dtype=int) assert (np.max(depth_png) > 255), 'Wrong .png depth file' - return depth_png.astype(np.float) / 256. + return depth_png.astype(float) / 256. else: raise NotImplementedError('Depth extension not supported.') diff --git a/modelscope/models/cv/video_frame_interpolation/utils/scene_change_detection.py b/modelscope/models/cv/video_frame_interpolation/utils/scene_change_detection.py index 4cbe60a7..379fe855 100644 --- a/modelscope/models/cv/video_frame_interpolation/utils/scene_change_detection.py +++ b/modelscope/models/cv/video_frame_interpolation/utils/scene_change_detection.py @@ -85,7 +85,7 @@ def do_scene_detect(F01_tensor, F10_tensor, img0_tensor, img1_tensor): img_diff = ori_img.float() - ref_img.float() img_diff = torch.abs(img_diff) - kernel = np.ones([8, 8], np.float) / 64 + kernel = np.ones([8, 8], float) / 64 kernel = torch.FloatTensor(kernel).to(device).unsqueeze(0).unsqueeze(0) diff = F.conv2d(img_diff, kernel, padding=4) diff --git a/modelscope/models/cv/video_multi_object_tracking/tracker/matching.py b/modelscope/models/cv/video_multi_object_tracking/tracker/matching.py index 45d2f5c0..e5c2e8a9 100644 --- a/modelscope/models/cv/video_multi_object_tracking/tracker/matching.py +++ b/modelscope/models/cv/video_multi_object_tracking/tracker/matching.py @@ -27,7 +27,7 @@ def linear_assignment(cost_matrix, thresh): def ious(atlbrs, btlbrs): - ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) + ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=float) if ious.size == 0: return ious @@ -60,13 +60,13 @@ def embedding_distance(tracks, detections, metric='cosine'): cost_matrix: np.ndarray """ - cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=float) if cost_matrix.size == 0: return cost_matrix det_features = np.asarray([track.curr_feat for track in detections], - dtype=np.float) + dtype=float) track_features = np.asarray([track.smooth_feat for track in tracks], - dtype=np.float) + dtype=float) cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) return cost_matrix diff --git a/modelscope/models/cv/video_multi_object_tracking/tracker/multitracker.py b/modelscope/models/cv/video_multi_object_tracking/tracker/multitracker.py index 1dc3297f..d38477b7 100644 --- a/modelscope/models/cv/video_multi_object_tracking/tracker/multitracker.py +++ b/modelscope/models/cv/video_multi_object_tracking/tracker/multitracker.py @@ -28,7 +28,7 @@ class STrack(BaseTrack): def __init__(self, tlwh, score, temp_feat, buffer_size=30): # wait activate - self._tlwh = np.asarray(tlwh, dtype=np.float) + self._tlwh = np.asarray(tlwh, dtype=float) self.kalman_filter = None self.mean, self.covariance = None, None self.is_activated = False diff --git a/modelscope/models/multi_modal/__init__.py b/modelscope/models/multi_modal/__init__.py index e85c48fb..9fa34baf 100644 --- a/modelscope/models/multi_modal/__init__.py +++ b/modelscope/models/multi_modal/__init__.py @@ -20,6 +20,8 @@ if TYPE_CHECKING: from .vldoc import VLDocForDocVLEmbedding from .video_synthesis import TextToVideoSynthesis from .efficient_diffusion_tuning import EfficientStableDiffusion + from .mplug_owl import MplugOwlForConditionalGeneration + from .clip_interrogator import CLIP_Interrogator else: _import_structure = { @@ -37,7 +39,9 @@ else: ['MultiStageDiffusionForTextToImageSynthesis'], 'vldoc': ['VLDocForDocVLEmbedding'], 'video_synthesis': ['TextToVideoSynthesis'], - 'efficient_diffusion_tuning': ['EfficientStableDiffusion'] + 'efficient_diffusion_tuning': ['EfficientStableDiffusion'], + 'mplug_owl': ['MplugOwlForConditionalGeneration'], + 'clip_interrogator': ['CLIP_Interrogator'], } import sys diff --git a/modelscope/models/multi_modal/clip_interrogator/__init__.py b/modelscope/models/multi_modal/clip_interrogator/__init__.py new file mode 100644 index 00000000..96fefbf6 --- /dev/null +++ b/modelscope/models/multi_modal/clip_interrogator/__init__.py @@ -0,0 +1 @@ +from .model import CLIP_Interrogator diff --git a/modelscope/models/multi_modal/clip_interrogator/model.py b/modelscope/models/multi_modal/clip_interrogator/model.py new file mode 100644 index 00000000..a7e27cbd --- /dev/null +++ b/modelscope/models/multi_modal/clip_interrogator/model.py @@ -0,0 +1,599 @@ +# This implementation is adopted from CLIP-Interrogator, made pubicly available under the MIT License at +# https://github.com/pharmapsychotic/clip-interrogator/blob/main/clip_interrogator/clip_interrogator.py + +import hashlib +import math +import os +import time +from dataclasses import dataclass +from typing import List, Optional + +import numpy as np +import open_clip +import requests +import torch +import torchvision.transforms as transforms +from PIL import Image +from safetensors.numpy import load_file, save_file +from tqdm import tqdm +from transformers import (AutoModelForCausalLM, AutoProcessor, + Blip2ForConditionalGeneration, + BlipForConditionalGeneration) + +from modelscope.metainfo import Models +from modelscope.models.base import TorchModel +from modelscope.models.builder import MODELS +from modelscope.outputs import OutputKeys +from modelscope.preprocessors import LoadImage +from modelscope.utils.constant import ModelFile, Tasks +from modelscope.utils.logger import get_logger + +logger = get_logger() + +__all__ = ['CLIP_Interrogator'] + +CAPTION_MODELS = { + 'blip-base': 'blip-image-captioning-base', + 'blip-large': 'blip-image-captioning-large', + 'blip2-2.7b': 'blip2-opt-2.7b', + 'blip2-flan-t5-xl': 'blip2-flan-t5-xl', + 'git-large-coco': 'git-large-coco', +} + + +@dataclass +class Config: + # models can optionally be passed in directly + caption_model = None + caption_processor = None + clip_model = None + clip_preprocess = None + + # blip settings + caption_max_length: int = 32 + caption_model_name: Optional[ + str] = 'blip-large' # use a key from CAPTION_MODELS or None + caption_offload: bool = False + + # clip settings + clip_model_name: str = 'ViT-L-14/openai' + clip_model_path: Optional[str] = None + clip_offload: bool = False + + # interrogator settings + cache_path: str = 'cache' # path to store cached text embeddings + download_cache: bool = False # when true, cached embeds are downloaded from huggingface + chunk_size: int = 2048 # batch size for CLIP, use smaller for lower VRAM + data_path: str = os.path.join(os.path.dirname(__file__), 'data') + device: str = ('cuda' if torch.cuda.is_available() else 'cpu') + flavor_intermediate_count: int = 2048 + quiet: bool = False # when quiet progress bars are not shown + + def apply_low_vram_defaults(self): + self.caption_model_name = 'blip-base' + self.caption_offload = True + self.clip_offload = True + self.chunk_size = 1024 + self.flavor_intermediate_count = 1024 + + +# CLIP-Interrogator utilize CLIP and BLIP to generate rich caption for images. +# CLIP is a zero-shot image classifier which can be used to generate image and text embeddings. +# BLIP is a new VLP framework which transfers flexibly to both vision-language understanding and generation tasks. +# BLIP effectively utilizes the noisy web data by bootstrapping the captions, where +# a captioner generates synthetic captions and a filter removes the noisy ones. +# Please infer to the paper CLIP: Learning Transferable Visual Models From Natural Language Supervision +# https://arxiv.org/abs/2103.00020 +# BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation +# https://arxiv.org/abs/2201.12086 + + +class Interrogator(): + + def __init__(self, config: Config): + self.config = config + self.device = config.device + self.dtype = torch.float16 if self.device == 'cuda' else torch.float32 + self.caption_offloaded = True + self.clip_offloaded = True + self.load_caption_model() + self.load_clip_model() + + def load_caption_model(self): + if self.config.caption_model is None and self.config.caption_model_name: + if not self.config.quiet: + print( + f'Loading caption model {self.config.caption_model_name}...' + ) + + model_path = CAPTION_MODELS[self.config.caption_model_name] + if self.config.caption_model_name.startswith('git-'): + caption_model = AutoModelForCausalLM.from_pretrained( + os.path.join(self.config.cache_path, model_path), + torch_dtype=torch.float32) + elif self.config.caption_model_name.startswith('blip2-'): + caption_model = Blip2ForConditionalGeneration.from_pretrained( + os.path.join(self.config.cache_path, model_path), + torch_dtype=self.dtype) + else: + caption_model = BlipForConditionalGeneration.from_pretrained( + os.path.join(self.config.cache_path, model_path), + torch_dtype=self.dtype) + self.caption_processor = AutoProcessor.from_pretrained( + os.path.join(self.config.cache_path, model_path)) + + caption_model.eval() + if not self.config.caption_offload: + caption_model = caption_model.to(self.config.device) + self.caption_model = caption_model + else: + self.caption_model = self.config.caption_model + self.caption_processor = self.config.caption_processor + + def load_clip_model(self): + start_time = time.time() + config = self.config + + clip_model_name, clip_model_pretrained_name = config.clip_model_name.split( + '/', 2) + + if config.clip_model is None: + if not config.quiet: + print(f'Loading CLIP model {config.clip_model_name}...') + + self.clip_model, _, self.clip_preprocess = open_clip.create_model_and_transforms( + clip_model_name, + pretrained=clip_model_pretrained_name, + precision='fp16' if config.device == 'cuda' else 'fp32', + device=config.device, + jit=False, + cache_dir=config.clip_model_path) + self.clip_model.eval() + else: + self.clip_model = config.clip_model + self.clip_preprocess = config.clip_preprocess + self.tokenize = open_clip.get_tokenizer(clip_model_name) + + sites = [ + 'Artstation', 'behance', 'cg society', 'cgsociety', 'deviantart', + 'dribbble', 'flickr', 'instagram', 'pexels', 'pinterest', + 'pixabay', 'pixiv', 'polycount', 'reddit', 'shutterstock', + 'tumblr', 'unsplash', 'zbrush central' + ] + trending_list = [site for site in sites] + trending_list.extend(['trending on ' + site for site in sites]) + trending_list.extend(['featured on ' + site for site in sites]) + trending_list.extend([site + ' contest winner' for site in sites]) + + raw_artists = load_list(config.data_path, 'artists.txt') + artists = [f'by {a}' for a in raw_artists] + artists.extend([f'inspired by {a}' for a in raw_artists]) + + self._prepare_clip() + self.artists = LabelTable(artists, 'artists', self) + self.flavors = LabelTable( + load_list(config.data_path, 'flavors.txt'), 'flavors', self) + self.mediums = LabelTable( + load_list(config.data_path, 'mediums.txt'), 'mediums', self) + self.movements = LabelTable( + load_list(config.data_path, 'movements.txt'), 'movements', self) + self.trendings = LabelTable(trending_list, 'trendings', self) + self.negative = LabelTable( + load_list(config.data_path, 'negative.txt'), 'negative', self) + + end_time = time.time() + if not config.quiet: + print( + f'Loaded CLIP model and data in {end_time-start_time:.2f} seconds.' + ) + + def chain(self, + image_features: torch.Tensor, + phrases: List[str], + best_prompt: str = '', + best_sim: float = 0, + min_count: int = 8, + max_count: int = 32, + desc='Chaining', + reverse: bool = False) -> str: + self._prepare_clip() + + phrases = set(phrases) + if not best_prompt: + best_prompt = self.rank_top( + image_features, [f for f in phrases], reverse=reverse) + best_sim = self.similarity(image_features, best_prompt) + phrases.remove(best_prompt) + curr_prompt, curr_sim = best_prompt, best_sim + + def check(addition: str, idx: int) -> bool: + nonlocal best_prompt, best_sim, curr_prompt, curr_sim + prompt = curr_prompt + ', ' + addition + sim = self.similarity(image_features, prompt) + if reverse: + sim = -sim + + if sim > best_sim: + best_prompt, best_sim = prompt, sim + if sim > curr_sim or idx < min_count: + curr_prompt, curr_sim = prompt, sim + return True + return False + + for idx in tqdm( + range(max_count), desc=desc, disable=self.config.quiet): + best = self.rank_top( + image_features, [f'{curr_prompt}, {f}' for f in phrases], + reverse=reverse) + flave = best[len(curr_prompt) + 2:] + if not check(flave, idx): + break + if _prompt_at_max_len(curr_prompt, self.tokenize): + break + phrases.remove(flave) + + return best_prompt + + def generate_caption(self, pil_image: Image) -> str: + assert self.caption_model is not None, 'No caption model loaded.' + self._prepare_caption() + inputs = self.caption_processor( + images=pil_image, return_tensors='pt').to(self.device) + if not self.config.caption_model_name.startswith('git-'): + inputs = inputs.to(self.dtype) + tokens = self.caption_model.generate( + **inputs, max_new_tokens=self.config.caption_max_length) + return self.caption_processor.batch_decode( + tokens, skip_special_tokens=True)[0].strip() + + def image_to_features(self, image: Image) -> torch.Tensor: + self._prepare_clip() + images = self.clip_preprocess(image).unsqueeze(0).to(self.device) + with torch.no_grad(), torch.cuda.amp.autocast(): + image_features = self.clip_model.encode_image(images) + image_features /= image_features.norm(dim=-1, keepdim=True) + return image_features + + def interrogate_classic(self, + image: Image, + max_flavors: int = 3, + caption: Optional[str] = None) -> str: + """Classic mode creates a prompt in a standard format first describing the image, + then listing the artist, trending, movement, and flavor text modifiers.""" + caption = caption or self.generate_caption(image) + image_features = self.image_to_features(image) + + medium = self.mediums.rank(image_features, 1)[0] + artist = self.artists.rank(image_features, 1)[0] + trending = self.trendings.rank(image_features, 1)[0] + movement = self.movements.rank(image_features, 1)[0] + flaves = ', '.join(self.flavors.rank(image_features, max_flavors)) + + if caption.startswith(medium): + prompt = f'{caption} {artist}, {trending}, {movement}, {flaves}' + else: + prompt = f'{caption}, {medium} {artist}, {trending}, {movement}, {flaves}' + + return _truncate_to_fit(prompt, self.tokenize) + + def interrogate_fast(self, + image: Image, + max_flavors: int = 32, + caption: Optional[str] = None) -> str: + """Fast mode simply adds the top ranked terms after a caption. It generally results in + better similarity between generated prompt and image than classic mode, but the prompts + are less readable.""" + caption = caption or self.generate_caption(image) + image_features = self.image_to_features(image) + merged = _merge_tables([ + self.artists, self.flavors, self.mediums, self.movements, + self.trendings + ], self) + tops = merged.rank(image_features, max_flavors) + return _truncate_to_fit(caption + ', ' + ', '.join(tops), + self.tokenize) + + def interrogate_negative(self, image: Image, max_flavors: int = 32) -> str: + """Negative mode chains together the most dissimilar terms to the image. It can be used + to help build a negative prompt to pair with the regular positive prompt and often + improve the results of generated images particularly with Stable Diffusion 2.""" + image_features = self.image_to_features(image) + flaves = self.flavors.rank( + image_features, + self.config.flavor_intermediate_count, + reverse=True) + flaves = flaves + self.negative.labels + return self.chain( + image_features, + flaves, + max_count=max_flavors, + reverse=True, + desc='Negative chain') + + def interrogate(self, + image: Image, + min_flavors: int = 8, + max_flavors: int = 32, + caption: Optional[str] = None) -> str: + caption = caption or self.generate_caption(image) + image_features = self.image_to_features(image) + + merged = _merge_tables([ + self.artists, self.flavors, self.mediums, self.movements, + self.trendings + ], self) + flaves = merged.rank(image_features, + self.config.flavor_intermediate_count) + best_prompt, best_sim = caption, self.similarity( + image_features, caption) + best_prompt = self.chain( + image_features, + flaves, + best_prompt, + best_sim, + min_count=min_flavors, + max_count=max_flavors, + desc='Flavor chain') + + fast_prompt = self.interrogate_fast( + image, max_flavors, caption=caption) + classic_prompt = self.interrogate_classic( + image, max_flavors, caption=caption) + candidates = [caption, classic_prompt, fast_prompt, best_prompt] + return candidates[np.argmax( + self.similarities(image_features, candidates))] + + def rank_top(self, + image_features: torch.Tensor, + text_array: List[str], + reverse: bool = False) -> str: + self._prepare_clip() + text_tokens = self.tokenize([text + for text in text_array]).to(self.device) + with torch.no_grad(), torch.cuda.amp.autocast(): + text_features = self.clip_model.encode_text(text_tokens) + text_features /= text_features.norm(dim=-1, keepdim=True) + similarity = text_features @ image_features.T + if reverse: + similarity = -similarity + return text_array[similarity.argmax().item()] + + def similarity(self, image_features: torch.Tensor, text: str) -> float: + self._prepare_clip() + text_tokens = self.tokenize([text]).to(self.device) + with torch.no_grad(), torch.cuda.amp.autocast(): + text_features = self.clip_model.encode_text(text_tokens) + text_features /= text_features.norm(dim=-1, keepdim=True) + similarity = text_features @ image_features.T + return similarity[0][0].item() + + def similarities(self, image_features: torch.Tensor, + text_array: List[str]) -> List[float]: + self._prepare_clip() + text_tokens = self.tokenize([text + for text in text_array]).to(self.device) + with torch.no_grad(), torch.cuda.amp.autocast(): + text_features = self.clip_model.encode_text(text_tokens) + text_features /= text_features.norm(dim=-1, keepdim=True) + similarity = text_features @ image_features.T + return similarity.T[0].tolist() + + def _prepare_caption(self): + if self.config.clip_offload and not self.clip_offloaded: + self.clip_model = self.clip_model.to('cpu') + self.clip_offloaded = True + if self.caption_offloaded: + self.caption_model = self.caption_model.to(self.device) + self.caption_offloaded = False + + def _prepare_clip(self): + if self.config.caption_offload and not self.caption_offloaded: + self.caption_model = self.caption_model.to('cpu') + self.caption_offloaded = True + if self.clip_offloaded: + self.clip_model = self.clip_model.to(self.device) + self.clip_offloaded = False + + +class LabelTable(): + + def __init__(self, labels: List[str], desc: str, ci: Interrogator): + clip_model, config = ci.clip_model, ci.config + self.chunk_size = config.chunk_size + self.config = config + self.device = config.device + self.embeds = [] + self.labels = labels + self.tokenize = ci.tokenize + + hash = hashlib.sha256(','.join(labels).encode()).hexdigest() + sanitized_name = self.config.clip_model_name.replace('/', '_').replace( + '@', '_') + self._load_cached(desc, hash, sanitized_name) + + if len(self.labels) != len(self.embeds): + self.embeds = [] + chunks = np.array_split( + self.labels, max(1, + len(self.labels) / config.chunk_size)) + for chunk in tqdm( + chunks, + desc=f'Preprocessing {desc}' if desc else None, + disable=self.config.quiet): + text_tokens = self.tokenize(chunk).to(self.device) + with torch.no_grad(), torch.cuda.amp.autocast(): + text_features = clip_model.encode_text(text_tokens) + text_features /= text_features.norm(dim=-1, keepdim=True) + text_features = text_features.half().cpu().numpy() + for i in range(text_features.shape[0]): + self.embeds.append(text_features[i]) + + if desc and self.config.cache_path: + os.makedirs(self.config.cache_path, exist_ok=True) + cache_filepath = os.path.join( + self.config.cache_path, + f'{sanitized_name}_{desc}.safetensors') + tensors = { + 'embeds': np.stack(self.embeds), + 'hash': np.array([ord(c) for c in hash], dtype=np.int8) + } + save_file(tensors, cache_filepath) + + if self.device == 'cpu' or self.device == torch.device('cpu'): + self.embeds = [e.astype(np.float32) for e in self.embeds] + + def _load_cached(self, desc: str, hash: str, sanitized_name: str) -> bool: + if self.config.cache_path is None or desc is None: + return False + + cached_safetensors = os.path.join( + self.config.cache_path, f'{sanitized_name}_{desc}.safetensors') + + if os.path.exists(cached_safetensors): + try: + tensors = load_file(cached_safetensors) + except Exception as e: + print(f'Failed to load {cached_safetensors}') + print(e) + return False + if 'hash' in tensors and 'embeds' in tensors: + if np.array_equal( + tensors['hash'], + np.array([ord(c) for c in hash], dtype=np.int8)): + self.embeds = tensors['embeds'] + if len(self.embeds.shape) == 2: + self.embeds = [ + self.embeds[i] for i in range(self.embeds.shape[0]) + ] + return True + + return False + + def _rank(self, + image_features: torch.Tensor, + text_embeds: torch.Tensor, + top_count: int = 1, + reverse: bool = False) -> str: + top_count = min(top_count, len(text_embeds)) + text_embeds = torch.stack([torch.from_numpy(t) + for t in text_embeds]).to(self.device) + with torch.cuda.amp.autocast(): + similarity = image_features @ text_embeds.T + if reverse: + similarity = -similarity + _, top_labels = similarity.float().cpu().topk(top_count, dim=-1) + return [top_labels[0][i].numpy() for i in range(top_count)] + + def rank(self, + image_features: torch.Tensor, + top_count: int = 1, + reverse: bool = False) -> List[str]: + if len(self.labels) <= self.chunk_size: + tops = self._rank( + image_features, + self.embeds, + top_count=top_count, + reverse=reverse) + return [self.labels[i] for i in tops] + + num_chunks = int(math.ceil(len(self.labels) / self.chunk_size)) + keep_per_chunk = int(self.chunk_size / num_chunks) + + top_labels, top_embeds = [], [] + for chunk_idx in tqdm(range(num_chunks), disable=self.config.quiet): + start = chunk_idx * self.chunk_size + stop = min(start + self.chunk_size, len(self.embeds)) + tops = self._rank( + image_features, + self.embeds[start:stop], + top_count=keep_per_chunk, + reverse=reverse) + top_labels.extend([self.labels[start + i] for i in tops]) + top_embeds.extend([self.embeds[start + i] for i in tops]) + + tops = self._rank(image_features, top_embeds, top_count=top_count) + return [top_labels[i] for i in tops] + + +def _download_file(url: str, + filepath: str, + chunk_size: int = 4 * 1024 * 1024, + quiet: bool = False): + r = requests.get(url, stream=True) + if r.status_code != 200: + return + + file_size = int(r.headers.get('Content-Length', 0)) + filename = url.split('/')[-1] + progress = tqdm( + total=file_size, + unit='B', + unit_scale=True, + desc=filename, + disable=quiet) + with open(filepath, 'wb') as f: + for chunk in r.iter_content(chunk_size=chunk_size): + if chunk: + f.write(chunk) + progress.update(len(chunk)) + progress.close() + + +def _merge_tables(tables: List[LabelTable], ci: Interrogator) -> LabelTable: + m = LabelTable([], None, ci) + for table in tables: + m.labels.extend(table.labels) + m.embeds.extend(table.embeds) + return m + + +def _prompt_at_max_len(text: str, tokenize) -> bool: + tokens = tokenize([text]) + return tokens[0][-1] != 0 + + +def _truncate_to_fit(text: str, tokenize) -> str: + parts = text.split(', ') + new_text = parts[0] + for part in parts[1:]: + if _prompt_at_max_len(new_text + part, tokenize): + break + new_text += ', ' + part + return new_text + + +def list_caption_models() -> List[str]: + return list(CAPTION_MODELS.keys()) + + +def list_clip_models() -> List[str]: + return ['/'.join(x) for x in open_clip.list_pretrained()] + + +def load_list(data_path: str, filename: Optional[str] = None) -> List[str]: + """Load a list of strings from a file.""" + if filename is not None: + data_path = os.path.join(data_path, filename) + with open(data_path, 'r', encoding='utf-8', errors='replace') as f: + items = [line.strip() for line in f.readlines()] + return items + + +@MODELS.register_module( + Tasks.image_captioning, module_name=Models.clip_interrogator) +class CLIP_Interrogator(TorchModel): + + def __init__(self, model_dir, device='cuda', device_id=0, *args, **kwargs): + super().__init__( + model_dir=model_dir, device_id=device_id, *args, **kwargs) + self.device = device + self.dtype = torch.float16 if self.device == 'cuda' else torch.float32 + cf = Config(clip_model_name='ViT-L-14/openai') + cf.data_path = os.path.join(model_dir, 'data') + cf.clip_model_path = model_dir + cf.cache_path = model_dir + self.ci = Interrogator(cf) + + def forward(self, inputs): + image = transforms.ToPILImage()(inputs) + return {'caption': self.ci.interrogate(image)} diff --git a/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py b/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py index 813f750e..743c049a 100644 --- a/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py +++ b/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py @@ -128,13 +128,13 @@ class VideoCLIPForMultiModalEmbedding(TorchModel): local_transform, s=None, e=None): - video_mask = np.zeros(self.max_frames, dtype=np.long) + video_mask = np.zeros(self.max_frames, dtype=int) max_video_length = 0 # T x 3 x H x W video = np.zeros((self.max_frames, 3, rawVideoExtractor.size, rawVideoExtractor.size), - dtype=np.float) + dtype=float) if s is None: start_time, end_time = None, None diff --git a/modelscope/models/multi_modal/mplug_owl/__init__.py b/modelscope/models/multi_modal/mplug_owl/__init__.py new file mode 100644 index 00000000..76ccfb5a --- /dev/null +++ b/modelscope/models/multi_modal/mplug_owl/__init__.py @@ -0,0 +1,18 @@ +# Copyright 2021-2023 The Alibaba DAMO mPLUG Authors. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .configuration_mplug_owl import (MplugOwlConfig, MplugOwlVisionConfig, + MplugOwlVisualAbstractorConfig) +from .modeling_mplug_owl import MplugOwlForConditionalGeneration diff --git a/modelscope/models/multi_modal/mplug_owl/configuration_mplug_owl.py b/modelscope/models/multi_modal/mplug_owl/configuration_mplug_owl.py new file mode 100644 index 00000000..6e32238a --- /dev/null +++ b/modelscope/models/multi_modal/mplug_owl/configuration_mplug_owl.py @@ -0,0 +1,257 @@ +# Copyright 2021-2023 The Alibaba DAMO mPLUG Team Authors. +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" MPLUG OWL model configuration """ +import copy +import os +from typing import Union + +from transformers import PretrainedConfig +from transformers.models.auto import CONFIG_MAPPING +from transformers.utils import logging + +from modelscope.utils.constant import Tasks + +logger = logging.get_logger() + + +class MplugOwlVisionConfig(PretrainedConfig): + r""" + Args: + hidden_size (`int`, *optional*, defaults to 768): + Dimensionality of the encoder layers and the pooler layer. + intermediate_size (`int`, *optional*, defaults to 3072): + Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder. + num_hidden_layers (`int`, *optional*, defaults to 12): + Number of hidden layers in the Transformer encoder. + num_attention_heads (`int`, *optional*, defaults to 12): + Number of attention heads for each attention layer in the Transformer encoder. + image_size (`int`, *optional*, defaults to 224): + The size (resolution) of each image. + patch_size (`int`, *optional*, defaults to 32): + The size (resolution) of each patch. + hidden_act (`str` or `function`, *optional*, defaults to `"quick_gelu"`): + The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`, + `"relu"`, `"selu"` and `"gelu_new"` ``"quick_gelu"` are supported. + layer_norm_eps (`float`, *optional*, defaults to 1e-5): + The epsilon used by the layer normalization layers. + attention_dropout (`float`, *optional*, defaults to 0.0): + The dropout ratio for the attention probabilities. + initializer_range (`float`, *optional*, defaults to 0.02): + The standard deviation of the truncated_normal_initializer for initializing all weight matrices. + initializer_factor (`float`, *optional*, defaults to 1): + A factor for initializing all weight matrices (should be kept to 1, used internally for initialization + testing). + ```""" + + model_type = 'mplug_owl_vision_model' + + def __init__( + self, + hidden_size=1024, + intermediate_size=4096, + projection_dim=768, + num_hidden_layers=24, + num_attention_heads=16, + num_channels=3, + image_size=224, + patch_size=14, + hidden_act='quick_gelu', + layer_norm_eps=1e-6, + attention_dropout=0.0, + initializer_range=0.02, + initializer_factor=1.0, + use_flash_attn=False, + **kwargs, + ): + super().__init__(**kwargs) + + self.hidden_size = hidden_size + self.intermediate_size = intermediate_size + self.projection_dim = projection_dim + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.num_channels = num_channels + self.patch_size = patch_size + self.image_size = image_size + self.initializer_range = initializer_range + self.initializer_factor = initializer_factor + self.attention_dropout = attention_dropout + self.layer_norm_eps = layer_norm_eps + self.hidden_act = hidden_act + self.use_flash_attn = use_flash_attn + + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path: Union[str, + os.PathLike], + **kwargs) -> 'PretrainedConfig': + config_dict, kwargs = cls.get_config_dict( + pretrained_model_name_or_path, **kwargs) + + # get the vision config dict if we are loading from MplugOwlConfig + if config_dict.get('model_type') == 'mplug_owl': + config_dict = config_dict['vision_config'] + + if 'model_type' in config_dict and hasattr( + cls, + 'model_type') and config_dict['model_type'] != cls.model_type: + logger.warning( + f"You are using a model of type {config_dict['model_type']} to instantiate a model of type " + f'{cls.model_type}. This is not supported for all configurations of models and can yield errors.' + ) + + return cls.from_dict(config_dict, **kwargs) + + +class MplugOwlVisualAbstractorConfig(PretrainedConfig): + + model_type = 'MPlugOwlVisualAbstractor' + + def __init__( + self, + hidden_size=1024, + num_hidden_layers=6, + num_attention_heads=16, + intermediate_size=4096, + attention_probs_dropout_prob=0.1, + initializer_range=0.02, + layer_norm_eps=1e-6, + encoder_hidden_size=1024, + **kwargs, + ): + super().__init__(**kwargs) + + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.initializer_range = initializer_range + self.layer_norm_eps = layer_norm_eps + self.encoder_hidden_size = encoder_hidden_size + + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path: Union[str, + os.PathLike], + **kwargs) -> 'PretrainedConfig': + config_dict, kwargs = cls.get_config_dict( + pretrained_model_name_or_path, **kwargs) + + # get the qformer config dict if we are loading from MplugOwlConfig + if config_dict.get('model_type') == 'mplug_owl': + config_dict = config_dict['abstractor_config'] + + if 'model_type' in config_dict and hasattr( + cls, + 'model_type') and config_dict['model_type'] != cls.model_type: + logger.warning( + f"You are using a model of type {config_dict['model_type']} to instantiate a model of type " + f'{cls.model_type}. This is not supported for all configurations of models and can yield errors.' + ) + + return cls.from_dict(config_dict, **kwargs) + + +class MplugOwlConfig(PretrainedConfig): + r""" + Args: + vision_config (`dict`, *optional*): + Dictionary of configuration options used to initialize [`MplugOwlVisionConfig`]. + qformer_config (`dict`, *optional*): + Dictionary of configuration options used to initialize [`MplugOwlVisualAbstractorConfig`]. + text_config (`dict`, *optional*): + Dictionary of configuration options used to initialize any [`PretrainedConfig`]. + num_query_tokens (`int`, *optional*, defaults to 32): + The number of query tokens passed through the Transformer. + + kwargs (*optional*): + Dictionary of keyword arguments. + """ + + model_type = 'mplug_owl' + is_composition = True + + def __init__(self, + task=Tasks.multimodal_dialogue, + vision_config=None, + visual_abstractor_config=None, + text_config=None, + num_query_tokens=64, + **kwargs): + + super().__init__(**kwargs) + self.task = task + if vision_config is None: + vision_config = MplugOwlVisionConfig().to_dict() + logger.info('vision_config is None.') + + if visual_abstractor_config is None: + visual_abstractor_config = {} + logger.info('abstractor_config is None. ') + + if text_config is None: + # we use LLAMA 7b by default + from transformers.models.llama.configuration_llama import \ + LlamaConfig + text_config = LlamaConfig(pad_token_id=2).to_dict() + logger.info('text_config is None.') + + self.vision_config = MplugOwlVisionConfig(**vision_config) + self.visual_abstractor_config = MplugOwlVisualAbstractorConfig( + **visual_abstractor_config) + text_model_type = text_config[ + 'model_type'] if 'model_type' in text_config else 'llama' + self.text_config = CONFIG_MAPPING[text_model_type](**text_config) + + self.tie_word_embeddings = self.text_config.tie_word_embeddings + + self.num_query_tokens = num_query_tokens + self.initializer_factor = 1.0 + self.initializer_range = 0.02 + + @classmethod + def from_vision_abstractor_text_configs( + cls, + vision_config: MplugOwlVisionConfig, + visual_abstractor_config: MplugOwlVisualAbstractorConfig, + text_config: PretrainedConfig, + **kwargs, + ): + r""" + Returns: + [`MplugOwlConfig`]: An instance of a configuration object + """ + + return cls( + vision_config=vision_config.to_dict(), + visual_abstractor_config=visual_abstractor_config.to_dict(), + text_config=text_config.to_dict(), + **kwargs, + ) + + def to_dict(self): + """ + Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`]. + + Returns: + `Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance, + """ + output = copy.deepcopy(self.__dict__) + output['vision_config'] = self.vision_config.to_dict() + tmp = self.visual_abstractor_config.to_dict() + output['visual_abstractor_config'] = tmp + output['text_config'] = self.text_config.to_dict() + output['model_type'] = self.__class__.model_type + return output diff --git a/modelscope/models/multi_modal/mplug_owl/modeling_mplug_owl.py b/modelscope/models/multi_modal/mplug_owl/modeling_mplug_owl.py new file mode 100644 index 00000000..21a29185 --- /dev/null +++ b/modelscope/models/multi_modal/mplug_owl/modeling_mplug_owl.py @@ -0,0 +1,1551 @@ +# Copyright 2021-2023 The Alibaba DAMO mPLUG Team Authors. +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch MPLUG OWL model. """ + +import copy +import logging +import math +import os +import os.path as osp +import random +from dataclasses import dataclass +from io import BytesIO +from typing import Any, Dict, List, Optional, Tuple, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint +import transformers +from torch.nn import CrossEntropyLoss +from transformers.activations import ACT2FN +from transformers.modeling_outputs import ( + BaseModelOutput, BaseModelOutputWithPastAndCrossAttentions, + BaseModelOutputWithPooling, BaseModelOutputWithPoolingAndCrossAttentions, + CausalLMOutputWithCrossAttentions) +from transformers.modeling_utils import (PreTrainedModel, + apply_chunking_to_forward, + find_pruneable_heads_and_indices, + prune_linear_layer) +from transformers.models.auto import AutoModelForCausalLM +from transformers.utils import ModelOutput + +from modelscope.metainfo import Models +from modelscope.models import TorchModel +from modelscope.models.base import Tensor +from modelscope.models.builder import MODELS +from modelscope.models.multi_modal.mplug_owl.configuration_mplug_owl import ( + MplugOwlConfig, MplugOwlVisionConfig, MplugOwlVisualAbstractorConfig) +from modelscope.outputs import OutputKeys +from modelscope.utils.config import Config +from modelscope.utils.constant import ModelFile, Tasks + +__all__ = ['MplugOwlForConditionalGeneration'] + + +@dataclass +class MplugOwlForConditionalGenerationModelOutput(ModelOutput): + """ + Class defining the outputs of [`MPlugOwlForConditionalGeneration`]. + + Args: + loss (`torch.FloatTensor`, *optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`): + Language modeling loss from the language model. + logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`): + Prediction scores of the language modeling head of the language model. + vision_outputs (`BaseModelOutputWithPooling`): + Outputs of the vision encoder. + + language_model_outputs (`CausalLMOutputWithPast`): + Outputs of the language model. + """ + + loss: Optional[Tuple[torch.FloatTensor]] = None + logits: Optional[Tuple[torch.FloatTensor]] = None + vision_outputs: Optional[torch.FloatTensor] = None + language_model_outputs: Optional[Tuple[torch.FloatTensor]] = None + + def to_tuple(self) -> Tuple[Any]: + return tuple( + self[k] if k not in ['vision_outputs', 'language_model_outputs' + ] else getattr(self, k).to_tuple() + for k in self.keys()) + + +def get_ltor_masks_and_position_ids_from_embeddings(data): + """Build masks and position id for left to right model.""" + + # Extract batch size and sequence length. + micro_batch_size, seq_length = data.size()[:2] + + # Attention mask (lower triangular). + att_mask_batch = 1 + attention_mask = torch.tril( + torch.ones((att_mask_batch, seq_length, seq_length), + device=data.device)).view(att_mask_batch, 1, seq_length, + seq_length) + + # Loss mask. + loss_mask = torch.ones( + data.size()[:2], dtype=torch.float, device=data.device) + + # Position ids. + position_ids = torch.arange( + seq_length, dtype=torch.long, device=data.device) + position_ids = position_ids.unsqueeze(0).expand_as(data[..., 0]) + + # Convert attention mask to binary: + attention_mask = (attention_mask < 0.5) + + return attention_mask, loss_mask, position_ids + + +class MplugOwlVisionEmbeddings(nn.Module): + + def __init__(self, config: MplugOwlVisionConfig): + super().__init__() + self.config = config + self.hidden_size = config.hidden_size + self.image_size = config.image_size + self.patch_size = config.patch_size + + self.cls_token = nn.Parameter(torch.randn(1, 1, self.hidden_size)) + + self.patch_embed = nn.Conv2d( + in_channels=3, + out_channels=self.hidden_size, + kernel_size=self.patch_size, + stride=self.patch_size, + bias=False) + + self.num_patches = (self.image_size // self.patch_size)**2 + + self.position_embedding = nn.Parameter( + torch.randn(1, self.num_patches + 1, self.hidden_size)) + + self.pre_layernorm = LayerNormFp32( + self.hidden_size, eps=config.layer_norm_eps) + + def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor: + batch_size = pixel_values.size(0) + image_embeds = self.patch_embed(pixel_values) + image_embeds = image_embeds.flatten(2).transpose(1, 2) + + class_embeds = self.cls_token.expand(batch_size, 1, + -1).to(image_embeds.dtype) + embeddings = torch.cat([class_embeds, image_embeds], dim=1) + embeddings = embeddings + \ + self.position_embedding[:, : embeddings.size(1)].to( + image_embeds.dtype) + embeddings = self.pre_layernorm(embeddings) + return embeddings + + +class LayerNormFp32(nn.LayerNorm): + """Subclass torch's LayerNorm to handle fp16 (by casting to float32 and back).""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def forward(self, x: torch.Tensor): + output = torch.nn.functional.layer_norm( + x.float(), + self.normalized_shape, + self.weight.float() if self.weight is not None else None, + self.bias.float() if self.bias is not None else None, + self.eps, + ) + return output.type_as(x) + + +class MplugOwlVisionAttention(nn.Module): + """Multi-headed attention from 'Attention Is All You Need' paper""" + + def __init__(self, config): + super().__init__() + self.config = config + self.hidden_size = config.hidden_size + self.num_heads = config.num_attention_heads + self.head_dim = self.hidden_size // self.num_heads + if self.head_dim * self.num_heads != self.hidden_size: + raise ValueError( + f'hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size} and `num_heads`:' + f' {self.num_heads}).') + self.scale = self.head_dim**-0.5 + self.dropout = nn.Dropout(config.attention_dropout) + + self.query_key_value = nn.Linear(self.hidden_size, + 3 * self.hidden_size) + self.dense = nn.Linear(self.hidden_size, self.hidden_size) + + def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): + return tensor.view(bsz, seq_len, self.num_heads, + self.head_dim).transpose(1, 2).contiguous() + + def forward( + self, + hidden_states: torch.Tensor, + head_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = False, + ) -> Tuple[torch.Tensor, Optional[torch.Tensor], + Optional[Tuple[torch.Tensor]]]: + """Input shape: Batch x Time x Channel""" + + bsz, seq_len, embed_dim = hidden_states.size() + + mixed_qkv = self.query_key_value(hidden_states) + + mixed_qkv = mixed_qkv.reshape(bsz, seq_len, self.num_heads, 3, + embed_dim // self.num_heads).permute( + 3, 0, 2, 1, 4) # [3, b, np, sq, hn] + query_states, key_states, value_states = ( + mixed_qkv[0], + mixed_qkv[1], + mixed_qkv[2], + ) + + # Take the dot product between "query" and "key" to get the raw attention scores. + attention_scores = torch.matmul(query_states, + key_states.transpose(-1, -2)) + + attention_scores = attention_scores * self.scale + + # Normalize the attention scores to probabilities. + attention_probs = torch.softmax(attention_scores, dim=-1) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs = self.dropout(attention_probs) + + # Mask heads if we want to + if head_mask is not None: + attention_probs = attention_probs * head_mask + + context_layer = torch.matmul(attention_probs, + value_states).permute(0, 2, 1, 3) + + new_context_layer_shape = context_layer.size()[:-2] + ( + self.hidden_size, ) + context_layer = context_layer.reshape(new_context_layer_shape) + + output = self.dense(context_layer) + + outputs = (output, attention_probs) if output_attentions else (output, + None) + + return outputs + + +class QuickGELU(nn.Module): + + def forward(self, x: torch.Tensor): + return x * torch.sigmoid(1.702 * x) + + +class MplugOwlMLP(nn.Module): + + def __init__(self, config): + super().__init__() + self.config = config + self.activation_fn = QuickGELU() + self.fc1 = nn.Linear(config.hidden_size, config.intermediate_size) + self.fc2 = nn.Linear(config.intermediate_size, config.hidden_size) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.fc1(hidden_states) + hidden_states = self.activation_fn(hidden_states) + hidden_states = self.fc2(hidden_states) + return hidden_states + + +class MplugOwlVisionEncoderLayer(nn.Module): + + def __init__(self, config: MplugOwlVisionConfig): + super().__init__() + self.hidden_size = config.hidden_size + self.self_attn = MplugOwlVisionAttention(config) + self.input_layernorm = LayerNormFp32( + self.hidden_size, eps=config.layer_norm_eps) + self.mlp = MplugOwlMLP(config) + self.post_attention_layernorm = LayerNormFp32( + self.hidden_size, eps=config.layer_norm_eps) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: torch.Tensor, + output_attentions: Optional[bool] = False, + ) -> Tuple[torch.FloatTensor]: + """ + Args: + hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)` + attention_mask (`torch.FloatTensor`): attention mask of size + `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values. + `(config.encoder_attention_heads,)`. + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under + returned tensors for more detail. + """ + residual = hidden_states + + hidden_states = self.input_layernorm(hidden_states) + hidden_states, attn_weights = self.self_attn( + hidden_states=hidden_states, + head_mask=attention_mask, + output_attentions=output_attentions, + ) + hidden_states = hidden_states + residual + residual = hidden_states + hidden_states = self.post_attention_layernorm(hidden_states) + hidden_states = self.mlp(hidden_states) + + hidden_states = hidden_states + residual + + outputs = (hidden_states, ) + + if output_attentions: + outputs += (attn_weights, ) + + return outputs + + +class MplugOwlPreTrainedModel(PreTrainedModel): + """ + An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained + models. + """ + + config_class = MplugOwlConfig + base_model_prefix = 'mplug_owl' + supports_gradient_checkpointing = True + _keys_to_ignore_on_load_missing = [ + r'position_ids', + r'language_model.encoder.embed_tokens.weight', + r'language_model.decoder.embed_tokens.weight', + r'language_model.lm_head.weight', + ] + _no_split_modules = ['MplugOwlAttention'] + _keep_in_fp32_modules = ['wo'] + + def _init_weights(self, module): + """Initialize the weights""" + factor = self.config.initializer_range + if isinstance(module, nn.Conv2d) or isinstance( + module, nn.Embedding) or isinstance(module, nn.Linear): + module.weight.data.normal_(mean=0.0, std=factor) + if hasattr(module, 'bias') and module.bias is not None: + module.bias.data.zero_() + + if isinstance(module, MplugOwlVisionEmbeddings): + if hasattr(self.config, 'vision_config'): + factor = self.config.vision_config.initializer_range + nn.init.trunc_normal_( + module.position_embedding, mean=0.0, std=factor) + nn.init.trunc_normal_(module.cls_token, mean=0.0, std=factor) + + elif isinstance(module, nn.LayerNorm): + module.bias.data.zero_() + module.weight.data.fill_(1.0) + elif isinstance(module, nn.Linear) and module.bias is not None: + module.bias.data.zero_() + elif isinstance(module, nn.Parameter): + nn.init.trunc_normal_(module.data, mean=0.0, std=factor) + + def _set_gradient_checkpointing(self, module, value=False): + if isinstance(module, MplugOwlVisionEncoder): + module.gradient_checkpointing = value + + +MPLUG_OWL_START_DOCSTRING = r""" + This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the + library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads + etc.) + + This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. + Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage + and behavior. + + Parameters: + config ([`MplugOwlConfig`]): Model configuration class with all the parameters of the model. + Initializing with a config file does not load the weights associated with the model, only the + configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights. +""" + +MPLUG_OWL_VISION_INPUTS_DOCSTRING = r""" + Args: + pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): + Pixel values. Pixel values can be obtained using [`MplugOwlPreprocessor`]. + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned + tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for + more detail. + return_dict (`bool`, *optional*): + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. +""" + +MPLUG_OWL_TEXT_INPUTS_DOCSTRING = r""" + Args: + input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): + Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide + it. Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and + [`PreTrainedTokenizer.__call__`] for details. [What are input IDs?](../glossary#input-ids) + attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + [What are attention masks?](../glossary#attention-mask) + decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*): + Indices of decoder input sequence tokens in the vocabulary. + + Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and + [`PreTrainedTokenizer.__call__`] for details. + + [What are decoder input IDs?](../glossary#decoder-input-ids) + + T5 uses the `pad_token_id` as the starting token for `decoder_input_ids` generation. If `past_key_values` + is used, optionally only the last `decoder_input_ids` have to be input (see `past_key_values`). + + To know more on how to prepare `decoder_input_ids` for pretraining take a look at [T5 + Training](./t5#training). + decoder_attention_mask (`torch.BoolTensor` of shape `(batch_size, target_sequence_length)`, *optional*): + Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also + be used by default. + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned + tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for + more detail. + return_dict (`bool`, *optional*): + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. +""" + +MPLUG_OWL_INPUTS_DOCSTRING = r""" + Args: + pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): + Pixel values. Pixel values can be obtained using [`MplugOwlPreprocessor`]. + input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): + Indices of input sequence tokens in the vocabulary of the language model. Input tokens can optionally be + provided to serve as text prompt, which the language model can continue. + + Indices can be obtained using [`MplugOwlPreprocessor`]. See [`MplugOwlPreprocessor.__call__`] for details. + + [What are input IDs?](../glossary#input-ids) + attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + [What are attention masks?](../glossary#attention-mask) + + decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*): + Indices of decoder input sequence tokens in the vocabulary of the language model. Only relevant in case an + encoder-decoder language model (like T5) is used. + + Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and + [`PreTrainedTokenizer.__call__`] for details. [What are decoder input IDs?](../glossary#decoder-input-ids) + + decoder_attention_mask (`torch.BoolTensor` of shape `(batch_size, target_sequence_length)`, *optional*): + Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also + be used by default. + + Only relevant in case an encoder-decoder language model (like T5) is used. + + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned + tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for + more detail. + return_dict (`bool`, *optional*): + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. +""" + + +class MplugOwlVisionEncoder(nn.Module): + """ + Transformer encoder consisting of `config.num_hidden_layers` self attention layers. Each layer is a + [`MplugOwlVisionEncoderLayer`]. + + Args: + config (`MplugOwlVisionConfig`): + The corresponding vision configuration for the `MplugOwlEncoder`. + """ + + def __init__(self, config: MplugOwlVisionConfig): + super().__init__() + self.config = config + self.layers = nn.ModuleList([ + MplugOwlVisionEncoderLayer(config) + for _ in range(config.num_hidden_layers) + ]) + self.gradient_checkpointing = False + + def forward( + self, + inputs_embeds, + attention_mask: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, BaseModelOutput]: + r""" + Args: + inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`): + Embedded representation of the inputs. Should be float, not int tokens. + attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + [What are attention masks?](../glossary#attention-mask) + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under + returned tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors + for more detail. + return_dict (`bool`, *optional*): + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. + """ + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else + self.config.output_hidden_states) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + encoder_states = () if output_hidden_states else None + all_attentions = () if output_attentions else None + + hidden_states = inputs_embeds + for idx, encoder_layer in enumerate(self.layers): + if output_hidden_states: + encoder_states = encoder_states + (hidden_states, ) + if self.gradient_checkpointing and self.training: + + def create_custom_forward(module): + + def custom_forward(*inputs): + return module(*inputs, output_attentions) + + return custom_forward + + layer_outputs = torch.utils.checkpoint.checkpoint( + create_custom_forward(encoder_layer), + hidden_states, + attention_mask, + ) + else: + layer_outputs = encoder_layer( + hidden_states, + attention_mask, + output_attentions=output_attentions, + ) + + hidden_states = layer_outputs[0] + + if output_attentions: + all_attentions = all_attentions + (layer_outputs[1], ) + + if output_hidden_states: + encoder_states = encoder_states + (hidden_states, ) + + if not return_dict: + return tuple( + v for v in [hidden_states, encoder_states, all_attentions] + if v is not None) + return BaseModelOutput( + last_hidden_state=hidden_states, + hidden_states=encoder_states, + attentions=all_attentions) + + +class MplugOwlVisionModel(MplugOwlPreTrainedModel): + main_input_name = 'pixel_values' + config_class = MplugOwlVisionConfig + + def __init__(self, config: MplugOwlVisionConfig): + super().__init__(config) + self.config = config + self.hidden_size = config.hidden_size + + self.embeddings = MplugOwlVisionEmbeddings(config) + self.encoder = MplugOwlVisionEncoder(config) + self.post_layernorm = LayerNormFp32( + self.hidden_size, eps=config.layer_norm_eps) + + self.post_init() + + def forward( + self, + pixel_values: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, BaseModelOutputWithPooling]: + r""" + Returns: + + """ + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else + self.config.output_hidden_states) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + if pixel_values is None: + raise ValueError('You have to specify pixel_values') + + hidden_states = self.embeddings(pixel_values) + + encoder_outputs = self.encoder( + inputs_embeds=hidden_states, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + last_hidden_state = encoder_outputs[0] + last_hidden_state = self.post_layernorm(last_hidden_state) + + pooled_output = last_hidden_state[:, 0, :] + pooled_output = self.post_layernorm(pooled_output) + + if not return_dict: + return (last_hidden_state, pooled_output) + encoder_outputs[1:] + + return BaseModelOutputWithPooling( + last_hidden_state=last_hidden_state, + pooler_output=pooled_output, + hidden_states=encoder_outputs.hidden_states, + attentions=encoder_outputs.attentions, + ) + + def get_input_embeddings(self): + return self.embeddings + + +class MplugOwlVisualAbstractorMLP(nn.Module): + + def __init__(self, config: MplugOwlVisualAbstractorConfig): + super().__init__() + self.config = config + in_features = config.hidden_size + hidden_features = config.intermediate_size + hidden_features = int(2 * hidden_features / 3) + multiple_of = 256 + hidden_features = multiple_of * \ + ((hidden_features + multiple_of - 1) // multiple_of) + self.act = nn.SiLU() + + self.w1 = nn.Linear(in_features, hidden_features) + self.w2 = nn.Linear(hidden_features, in_features) + self.w3 = nn.Linear(in_features, hidden_features) + self.ffn_ln = LayerNormFp32(hidden_features, eps=config.layer_norm_eps) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.act( + self.w1(hidden_states)) * self.w3(hidden_states) + hidden_states = self.ffn_ln(hidden_states) + hidden_states = self.w2(hidden_states) + return hidden_states + + +class MplugOwlVisualAbstractorMultiHeadAttention(nn.Module): + + def __init__(self, config: MplugOwlVisualAbstractorConfig): + super().__init__() + self.config = config + if config.hidden_size % config.num_attention_heads != 0: + raise ValueError( + 'The hidden size (%d) is not a multiple of the number of attention heads (%d)' + % (config.hidden_size, config.num_attention_heads)) + + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int(config.hidden_size + / config.num_attention_heads) + self.all_head_size = self.num_attention_heads * self.attention_head_size + + self.query = nn.Linear(config.hidden_size, self.all_head_size) + self.key = nn.Linear(config.encoder_hidden_size, self.all_head_size) + self.value = nn.Linear(config.encoder_hidden_size, self.all_head_size) + + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + self.save_attention = False + + def save_attn_gradients(self, attn_gradients): + self.attn_gradients = attn_gradients + + def get_attn_gradients(self): + return self.attn_gradients + + def save_attention_map(self, attention_map): + self.attention_map = attention_map + + def get_attention_map(self): + return self.attention_map + + def transpose_for_scores(self, x): + new_x_shape = x.size()[:-1] + (self.num_attention_heads, + self.attention_head_size) + x = x.view(*new_x_shape) + return x.permute(0, 2, 1, 3) + + def forward( + self, + hidden_states, + attention_mask=None, + head_mask=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + past_key_value=None, + output_attentions=False, + ): + # If this is instantiated as a cross-attention module, the keys + # and values come from an encoder; the attention mask needs to be + # such that the encoder's padding tokens are not attended to. + key_layer = self.transpose_for_scores(self.key(encoder_hidden_states)) + value_layer = self.transpose_for_scores( + self.value(encoder_hidden_states)) + attention_mask = encoder_attention_mask + + mixed_query_layer = self.query(hidden_states) + + query_layer = self.transpose_for_scores(mixed_query_layer) + + past_key_value = (key_layer, value_layer) + + # Take the dot product between "query" and "key" to get the raw attention scores. + attention_scores = torch.matmul(query_layer, + key_layer.transpose(-1, -2)) + + attention_scores = attention_scores / \ + math.sqrt(self.attention_head_size) + + if attention_mask is not None: + # Apply the attention mask is (precomputed for all layers in BertModel forward() function) + attention_scores = attention_scores + attention_mask + + # Normalize the attention scores to probabilities. + attention_probs = nn.Softmax(dim=-1)(attention_scores) + + if self.save_attention: + self.save_attention_map(attention_probs) + attention_probs.register_hook(self.save_attn_gradients) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs_dropped = self.dropout(attention_probs) + + # Mask heads if we want to + if head_mask is not None: + attention_probs_dropped = attention_probs_dropped * head_mask + + context_layer = torch.matmul(attention_probs_dropped, value_layer) + + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[:-2] + ( + self.all_head_size, ) + context_layer = context_layer.view(*new_context_layer_shape) + + outputs = (context_layer, + attention_probs) if output_attentions else (context_layer, ) + + outputs = outputs + (past_key_value, ) + return outputs + + +class MplugOwlVisualAbstractorCrossOutput(nn.Module): + + def __init__(self, config: MplugOwlVisualAbstractorConfig): + super().__init__() + dim = config.hidden_size + self.out_proj = nn.Linear(dim, dim, bias=True) + self.norm2 = LayerNormFp32(dim) + self.mlp = MplugOwlVisualAbstractorMLP(config) + + def forward(self, hidden_states: torch.Tensor, + input_tensor: torch.Tensor) -> torch.Tensor: + input_tensor = input_tensor + self.out_proj(hidden_states) + input_tensor = input_tensor + self.mlp(self.norm2(input_tensor)) + return input_tensor + + +class MplugOwlVisualAbstractorAttention(nn.Module): + + def __init__(self, config: MplugOwlVisualAbstractorConfig): + super().__init__() + self.attention = MplugOwlVisualAbstractorMultiHeadAttention(config) + self.output = MplugOwlVisualAbstractorCrossOutput(config) + self.pruned_heads = set() + self.norm1 = LayerNormFp32(config.hidden_size) + self.normk = LayerNormFp32(config.hidden_size) + + def prune_heads(self, heads): + if len(heads) == 0: + return + heads, index = find_pruneable_heads_and_indices( + heads, self.attention.num_attention_heads, + self.attention.attention_head_size, self.pruned_heads) + + # Prune linear layers + self.attention.query = prune_linear_layer(self.attention.query, index) + self.attention.key = prune_linear_layer(self.attention.key, index) + self.attention.value = prune_linear_layer(self.attention.value, index) + self.output.dense = prune_linear_layer( + self.output.out_proj, index, dim=1) + + # Update hyper params and store pruned heads + self.attention.num_attention_heads = self.attention.num_attention_heads - \ + len(heads) + self.attention.all_head_size = self.attention.attention_head_size * \ + self.attention.num_attention_heads + self.pruned_heads = self.pruned_heads.union(heads) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.FloatTensor] = None, + head_mask: Optional[torch.FloatTensor] = None, + encoder_hidden_states: Optional[torch.FloatTensor] = None, + encoder_attention_mask: Optional[torch.FloatTensor] = None, + past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + output_attentions: Optional[bool] = False, + ) -> Tuple[torch.Tensor]: + # HACK we apply norm on q and k + hidden_states = self.norm1(hidden_states) + encoder_hidden_states = self.normk(encoder_hidden_states) + encoder_hidden_states = torch.cat( + [hidden_states, encoder_hidden_states], dim=1) + encoder_attention_mask = torch.cat( + [attention_mask, encoder_attention_mask], dim=-1) + self_outputs = self.attention( + hidden_states, + attention_mask, + head_mask, + encoder_hidden_states, + encoder_attention_mask, + past_key_value, + output_attentions, + ) + attention_output = self.output(self_outputs[0], hidden_states) + # add attentions if we output them + outputs = (attention_output, ) + self_outputs[1:] + return outputs + + +class MplugOwlVisualAbstractorLayer(nn.Module): + + def __init__(self, config, layer_idx): + super().__init__() + self.chunk_size_feed_forward = config.chunk_size_feed_forward + self.seq_len_dim = 1 + + self.layer_idx = layer_idx + + self.crossattention = MplugOwlVisualAbstractorAttention(config) + self.has_cross_attention = True + + def forward( + self, + hidden_states, + attention_mask=None, + head_mask=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + output_attentions=False, + ): + if encoder_hidden_states is None: + raise ValueError( + 'encoder_hidden_states must be given for cross-attention layers' + ) + cross_attention_outputs = self.crossattention( + hidden_states, + attention_mask, + head_mask, + encoder_hidden_states, + encoder_attention_mask, + output_attentions=output_attentions, + ) + query_attention_output = cross_attention_outputs[0] + + outputs = (query_attention_output, ) + return outputs + + +class MplugOwlVisualAbstractorEncoder(nn.Module): + + def __init__(self, config): + super().__init__() + self.config = config + self.layers = nn.ModuleList([ + MplugOwlVisualAbstractorLayer(config, layer_idx) + for layer_idx in range(config.num_hidden_layers) + ]) + self.gradient_checkpointing = False + + def forward( + self, + hidden_states, + attention_mask=None, + head_mask=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + past_key_values=None, + output_attentions=False, + output_hidden_states=False, + return_dict=True, + ): + all_hidden_states = () if output_hidden_states else None + + for i in range(self.config.num_hidden_layers): + layer_module = self.layers[i] + if output_hidden_states: + all_hidden_states = all_hidden_states + (hidden_states, ) + + layer_head_mask = head_mask[i] if head_mask is not None else None + past_key_value = past_key_values[ + i] if past_key_values is not None else None + + if getattr(self.config, 'gradient_checkpointing', + False) and self.training: + + def create_custom_forward(module): + + def custom_forward(*inputs): + return module(*inputs, past_key_value, + output_attentions) + + return custom_forward + + layer_outputs = torch.utils.checkpoint.checkpoint( + create_custom_forward(layer_module), + hidden_states, + attention_mask, + layer_head_mask, + encoder_hidden_states, + encoder_attention_mask, + ) + else: + layer_outputs = layer_module( + hidden_states, + attention_mask, + layer_head_mask, + encoder_hidden_states, + encoder_attention_mask, + output_attentions, + ) + + hidden_states = layer_outputs[0] + + return BaseModelOutput(last_hidden_state=hidden_states, ) + + +class MplugOwlVisualAbstractorModel(MplugOwlPreTrainedModel): + + def __init__(self, config: MplugOwlVisualAbstractorConfig, + language_hidden_size): + super().__init__(config) + self.config = config + + self.encoder = MplugOwlVisualAbstractorEncoder(config) + self.visual_fc = torch.nn.Linear(config.hidden_size, + language_hidden_size) + self.vit_eos = torch.nn.Parameter( + torch.randn(1, 1, language_hidden_size)) + self.post_init() + + def _prune_heads(self, heads_to_prune): + """ + Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base + class PreTrainedModel + """ + for layer, heads in heads_to_prune.items(): + self.encoder.layer[layer].attention.prune_heads(heads) + + def get_extended_attention_mask( + self, + attention_mask: torch.Tensor, + input_shape: Tuple[int], + device: torch.device, + ) -> torch.Tensor: + """ + Makes broadcastable attention and causal masks so that future and masked tokens are ignored. + + Arguments: + attention_mask (`torch.Tensor`): + Mask with ones indicating tokens to attend to, zeros for tokens to ignore. + input_shape (`Tuple[int]`): + The shape of the input to the model. + device: (`torch.device`): + The device of the input to the model. + + Returns: + `torch.Tensor` The extended attention mask, with a the same dtype as `attention_mask.dtype`. + """ + # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length] + # ourselves in which case we just need to make it broadcastable to all heads. + if attention_mask.dim() == 3: + extended_attention_mask = attention_mask[:, None, :, :] + elif attention_mask.dim() == 2: + extended_attention_mask = attention_mask[:, None, None, :] + else: + raise ValueError( + 'Wrong shape for input_ids (shape {}) or attention_mask (shape {})' + .format(input_shape, attention_mask.shape)) + + # Since attention_mask is 1.0 for positions we want to attend and 0.0 for + # masked positions, this operation will create a tensor which is 0.0 for + # positions we want to attend and -10000.0 for masked positions. + # Since we are adding it to the raw scores before the softmax, this is + # effectively the same as removing these entirely. + extended_attention_mask = extended_attention_mask.to( + dtype=self.dtype) # fp16 compatibility + extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 + return extended_attention_mask + + def forward( + self, + query_embeds, + attention_mask=None, + head_mask=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + past_key_values=None, + output_attentions=None, + output_hidden_states=None, + return_dict=None, + ): + r""" + encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, `optional`): + Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if + the model is configured as a decoder. + encoder_attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, `optional`): + Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in + the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`: + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + past_key_values (`tuple(tuple(torch.FloatTensor))` of length `config.n_layers` with each tuple having 4 tensors: + shape `(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`): Contains precomputed key and + value hidden states of the attention blocks. Can be used to speed up decoding. If `past_key_values` are + used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key + value states given to this model) of shape `(batch_size, 1)` instead of all `decoder_input_ids` of shape + `(batch_size, sequence_length)`. + """ + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else + self.config.output_hidden_states) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + embedding_output = query_embeds + input_shape = embedding_output.size()[:-1] + batch_size, seq_length = input_shape + device = embedding_output.device + + # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length] + # ourselves in which case we just need to make it broadcastable to all heads. + if attention_mask is None: + attention_mask = torch.ones( + (query_embeds.shape[0], query_embeds.shape[1]), + dtype=torch.long, + device=query_embeds.device) + extended_attention_mask = self.get_extended_attention_mask( + attention_mask, input_shape, device) + + # If a 2D or 3D attention mask is provided for the cross-attention + # we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length] + if encoder_hidden_states is not None: + if type(encoder_hidden_states) == list: + encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states[ + 0].size() + else: + ( + encoder_batch_size, + encoder_sequence_length, + _, + ) = encoder_hidden_states.size() + encoder_hidden_shape = (encoder_batch_size, + encoder_sequence_length) + + if type(encoder_attention_mask) == list: + encoder_extended_attention_mask = [ + self.invert_attention_mask(mask) + for mask in encoder_attention_mask + ] + elif encoder_attention_mask is None: + encoder_attention_mask = torch.ones( + encoder_hidden_shape, device=device) + encoder_extended_attention_mask = self.invert_attention_mask( + encoder_attention_mask) + else: + encoder_extended_attention_mask = self.invert_attention_mask( + encoder_attention_mask) + else: + encoder_extended_attention_mask = None + + # Prepare head mask if needed + # 1.0 in head_mask indicate we keep the head + # attention_probs has shape bsz x n_heads x N x N + # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads] + # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length] + head_mask = self.get_head_mask(head_mask, + self.config.num_hidden_layers) + + encoder_outputs = self.encoder( + embedding_output, + attention_mask=extended_attention_mask, + head_mask=head_mask, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_extended_attention_mask, + past_key_values=past_key_values, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + sequence_output = encoder_outputs[0] + pooled_output = sequence_output[:, 0, :] + + sequence_output = self.visual_fc(sequence_output) + eos_repeat = self.vit_eos.repeat(sequence_output.shape[0], 1, 1) + sequence_output = torch.cat([sequence_output, eos_repeat], dim=1) + + return BaseModelOutputWithPooling( + last_hidden_state=sequence_output, + pooler_output=pooled_output, + hidden_states=encoder_outputs.hidden_states, + ) + + +class MplugOwlModel(MplugOwlPreTrainedModel): + r"""The mPLUG-Owl model is a multi-modal conversation model that support various modalities as input. + mPLUG-Owl consists a visual encoder, a visual abstrator module and a language decoder model, which enables + both image and text input. + This model is implemented base on mPLUG-Owl: Modularization Empowers Large Language Models with Multimodality. + `Paper `. + """ + config_class = MplugOwlConfig + main_input_name = 'pixel_values' + + def __init__(self, config: MplugOwlConfig): + super().__init__(config) + + self.vision_model = MplugOwlVisionModel(config.vision_config) + + self.query_tokens = nn.Parameter( + torch.zeros(1, config.num_query_tokens, + config.visual_abstractor_config.hidden_size)) + self.abstractor = MplugOwlVisualAbstractorModel( + config.visual_abstractor_config, config.text_config.hidden_size) + + # if config.use_decoder_only_language_model: + language_model = AutoModelForCausalLM.from_config(config.text_config) + self.language_model = language_model + + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.language_model.get_input_embeddings() + + def set_input_embeddings(self, value): + self.language_model.set_input_embeddings(value) + + def set_output_embeddings(self, new_embeddings): + self.language_model.set_output_embeddings(new_embeddings) + + def get_output_embeddings(self) -> nn.Module: + return self.language_model.get_output_embeddings() + + def get_encoder(self): + return self.language_model.get_encoder() + + def get_decoder(self): + return self.language_model.get_decoder() + + def _tie_weights(self): + if not self.config.use_decoder_only_language_model: + self.language_model.encoder.embed_tokens = self.language_model.shared + self.language_model.decoder.embed_tokens = self.language_model.shared + + def get_text_features( + self, + input_ids: Optional[torch.Tensor] = None, + attention_mask: Optional[torch.Tensor] = None, + decoder_input_ids: Optional[torch.Tensor] = None, + decoder_attention_mask: Optional[torch.Tensor] = None, + labels: Optional[torch.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ): + + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else + self.config.output_hidden_states) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + if self.config.use_decoder_only_language_model: + text_outputs = self.language_model( + input_ids=input_ids, + attention_mask=attention_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + else: + inputs_embeds = self.language_model.get_input_embeddings()( + input_ids) + + text_outputs = self.language_model( + inputs_embeds=inputs_embeds, + attention_mask=attention_mask, + decoder_input_ids=decoder_input_ids, + decoder_attention_mask=decoder_attention_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + labels=labels, + ) + + return text_outputs + + def get_image_features( + self, + pixel_values: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ): + + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else + self.config.output_hidden_states) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + vision_outputs = self.vision_model( + pixel_values=pixel_values, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + return vision_outputs + + +def get_media_indices(my_list): + if isinstance(my_list, torch.Tensor): + my_list = my_list.cpu().tolist() + result = [] + for i in range(len(my_list)): + if i == 0 and my_list[i] < 0: + result.append(i) + elif my_list[i] != my_list[i - 1] and my_list[i] < 0: + result.append(i) + return result + + +class MplugOwlForConditionalGenerationHF(MplugOwlPreTrainedModel): + config_class = MplugOwlConfig + main_input_name = 'pixel_values' + + def __init__(self, config: MplugOwlConfig, **kwargs): + super().__init__(config) + + self.vision_model = MplugOwlVisionModel(config.vision_config) + + self.query_tokens = nn.Parameter( + torch.zeros(1, config.num_query_tokens, + config.visual_abstractor_config.hidden_size)) + self.abstractor = MplugOwlVisualAbstractorModel( + config.visual_abstractor_config, config.text_config.hidden_size) + + # if config.use_decoder_only_language_model: + language_model = AutoModelForCausalLM.from_config(config.text_config) + self.language_model = language_model + + # Initialize weights and apply final processing + self.post_init() + self.main_input_name = 'input_ids' + + def get_input_embeddings(self): + return self.language_model.get_input_embeddings() + + def set_input_embeddings(self, value): + self.language_model.set_input_embeddings(value) + + def set_output_embeddings(self, new_embeddings): + self.language_model.set_output_embeddings(new_embeddings) + + def get_output_embeddings(self) -> nn.Module: + return self.language_model.get_output_embeddings() + + def get_encoder(self): + return self.language_model.get_encoder() + + def get_decoder(self): + return self.language_model.get_decoder() + + def _tie_weights(self): + if not self.config.use_decoder_only_language_model: + self.language_model.encoder.embed_tokens = self.language_model.shared + self.language_model.decoder.embed_tokens = self.language_model.shared + + def _preprocess_accelerate(self): + r""" + Some pre-processing hacks to make the model `accelerate` compatible. Check + https://github.com/huggingface/transformers/pull/21707 for more details. + """ + hf_device_map = self.hf_device_map + + if len( + hf_device_map + ) > 1 and 'language_model' not in hf_device_map and torch.cuda.device_count( + ) > 1: + # warn users about unexpected behavior when using multi-GPU + mPLUG-Owl + `accelerate`. + logger.warning( + 'The `language_model` is not in the `hf_device_map` dictionary and you are running your script' + ' in a multi-GPU environment. this may lead to unexpected behavior when using `accelerate`.' + ' Please pass a `device_map` that contains `language_model` to remove this warning.' + ' Please refer to https://github.com/huggingface/blog/blob/main/accelerate-large-models.md for' + ' more details on creating a `device_map` for large models.', ) + + if hasattr(self.language_model, '_hf_hook'): + self.language_model._hf_hook.io_same_device = True # For `generate` compatibility + + def forward( + self, + pixel_values: torch.FloatTensor, + input_ids: torch.FloatTensor, + num_images, + non_padding_mask: Optional[torch.LongTensor] = None, + non_media_mask: Optional[torch.LongTensor] = None, + prompt_mask: Optional[torch.LongTensor] = None, + attention_mask: Optional[torch.LongTensor] = None, + decoder_input_ids: Optional[torch.LongTensor] = None, + decoder_attention_mask: Optional[torch.LongTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + labels: Optional[torch.LongTensor] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, MplugOwlForConditionalGenerationModelOutput]: + + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + # get text embedding + text_tokens_ = input_ids + batch_size = input_ids.shape[0] + + media_token_indices = [ + # [:-1] since we would not use the last token for embedding + get_media_indices(text_tokens_[i][:-1]) for i in range(batch_size) + ] + text_tokens_[text_tokens_ < 0] = 1 # Not used + text_embeds = self.get_input_embeddings()( + text_tokens_) # Temporally Embedding + + if pixel_values is not None: + pixel_values = pixel_values.half() + image_embeds = self.vision_model( + pixel_values, return_dict=True).last_hidden_state + + image_attention_mask = torch.ones( + image_embeds.size()[:-1], + dtype=torch.long, + device=image_embeds.device) + query_tokens = self.query_tokens.expand(image_embeds.shape[0], -1, + -1) + + query_features = self.abstractor( + query_embeds=query_tokens, + encoder_hidden_states=image_embeds, + encoder_attention_mask=image_attention_mask, + )['last_hidden_state'] + img_seq_length = query_features.shape[1] + + num_images_per_sample = num_images.long().cpu().tolist() + + text_chunk_embeds = [] + img_idx = 0 + for b in range(batch_size): + start = 0 + result = [] + if len(media_token_indices[b]) > 0: + for i, pos in enumerate(media_token_indices[b]): + if pos > start: + result.append(text_embeds[b, start:pos]) + result.append(query_features[img_idx + i]) + start = pos + img_seq_length + if start < text_embeds.shape[1]: + result.append(text_embeds[b, start:]) + + img_idx += num_images_per_sample[b] + text_chunk_embeds.append(torch.cat(result, dim=0)) + + # Actual Input Embeddings + input_embeds = torch.stack(text_chunk_embeds, dim=0) + + # Create causal mask and position ids + _, loss_mask, position_ids = \ + get_ltor_masks_and_position_ids_from_embeddings(input_embeds) + + # Calculate the loss_mask + non_padding_mask = non_padding_mask.long() + non_media_mask = non_media_mask.long() + prompt_mask = prompt_mask.long() # TODO How to deal with prompt mask + loss_mask = loss_mask[:, :-1] + + loss_mask = loss_mask * non_padding_mask * non_media_mask * prompt_mask + + # Forward into GPT + outputs = self.language_model( + inputs_embeds=input_embeds, + attention_mask=attention_mask, + labels=labels, + ) + outputs.loss = (outputs.loss + * loss_mask.view(-1)).sum() / loss_mask.sum() + return outputs + + @torch.no_grad() + def generate( + self, + pixel_values: torch.FloatTensor, + input_ids: Optional[torch.LongTensor] = None, + attention_mask: Optional[torch.LongTensor] = None, + **generate_kwargs, + ) -> torch.LongTensor: + """ + Overrides `generate` function to be able to use the model as a conditional generator. + + Args: + pixel_values (`torch.FloatTensor` of shape (batch_size, num_channels, height, width)): + Input images to be processed. + input_ids (`torch.LongTensor` of shape (batch_size, sequence_length), *optional*): + The sequence used as a prompt for the generation. + attention_mask (`torch.LongTensor` of shape (batch_size, sequence_length), *optional*): + Mask to avoid performing attention on padding token indices + + Returns: + captions (list): A list of strings of length batch_size * num_captions. + """ + + if input_ids is not None: + batch_size = input_ids.size(0) + media_token_indices = [ + get_media_indices(input_ids[i]) for i in range(batch_size) + ] + num_images_per_sample = [len(x) for x in media_token_indices] + input_ids = input_ids.clone() + input_ids[input_ids < 0] = 0 # Not used + + if attention_mask is None: + attention_mask = torch.ones_like(input_ids).long().to( + input_ids.device) + + if hasattr(self, 'hf_device_map'): + # preprocess for `accelerate` + self._preprocess_accelerate() + batch_size = input_ids.shape[0] + # get text embedding + inputs_embeds = self.get_input_embeddings()(input_ids) + # get visual embedding + if pixel_values is not None: + pixel_values = pixel_values.half() + pixel_values = pixel_values.to(input_ids.device) + with torch.no_grad(): + image_embeds = self.vision_model( + pixel_values, return_dict=True).last_hidden_state + image_attention_mask = torch.ones( + image_embeds.size()[:-1], + dtype=torch.long, + device=image_embeds.device) + query_tokens = self.query_tokens.expand( + image_embeds.shape[0], -1, -1) + query_outputs = self.abstractor( + query_embeds=query_tokens, + encoder_hidden_states=image_embeds, + encoder_attention_mask=image_attention_mask, + return_dict=True, + ) + query_output = query_outputs['last_hidden_state'] + image_embeds = query_output + img_seq_length = image_embeds.shape[1] + + # =================== + # Get actual input embeddings + # =================== + text_chunk_embeds = [] + text_chunk_attns = [] + img_idx = 0 + + for b in range(batch_size): + start = 0 + result = [] + result_attn = [] + for i, pos in enumerate(media_token_indices[b]): + if pos > start: + result.append(inputs_embeds[b, start:pos]) + result_attn.append(attention_mask[b, start:pos]) + result.append(image_embeds[img_idx + i]) + result_attn.append( + torch.ones( + image_embeds[img_idx + i].shape[0], + device=inputs_embeds.device)) + start = pos + img_seq_length + if start < inputs_embeds.shape[1]: + result.append(inputs_embeds[b, start:]) + result_attn.append(attention_mask[b, start:]) + + img_idx += num_images_per_sample[b] + text_chunk_embeds.append(torch.cat(result, dim=0)) + text_chunk_attns.append(torch.cat(result_attn, dim=0)) + inputs_embeds = torch.stack(text_chunk_embeds, dim=0) + attention_mask = torch.stack(text_chunk_attns, dim=0) + + outputs = self.language_model.generate( + inputs_embeds=inputs_embeds, + attention_mask=attention_mask, + **generate_kwargs, + ) + + return outputs + + +@MODELS.register_module( + Tasks.multimodal_dialogue, module_name=Models.mplug_owl) +class MplugOwlForConditionalGeneration(TorchModel): + + def __init__(self, model_dir: str, *args, **kwargs): + """initialize the mPLUG-Owl model from the `model_dir` path. + Args: + model_dir (str): the model path. + """ + + super().__init__(model_dir, *args, **kwargs) + self.model = MplugOwlForConditionalGenerationHF.from_pretrained( + model_dir, + torch_dtype=torch.half, + ) + + def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: + output = self.model.generate(**input) + return output diff --git a/modelscope/models/nlp/mglm/blocklm_utils.py b/modelscope/models/nlp/mglm/blocklm_utils.py index b05cd2c2..e75aea92 100644 --- a/modelscope/models/nlp/mglm/blocklm_utils.py +++ b/modelscope/models/nlp/mglm/blocklm_utils.py @@ -212,10 +212,10 @@ class ConstructBlockStrategy: block_spans, rng, task='bert'): - position_ids = np.arange(len(tokens), dtype=np.long) + position_ids = np.arange(len(tokens), dtype=int) targets = copy.deepcopy(tokens) mask_id = self.tokenizer.get_command('MASK').Id - mlm_masks = np.zeros(len(tokens), dtype=np.long) + mlm_masks = np.zeros(len(tokens), dtype=int) for start, end in block_spans: for idx in range(start, end): tokens[idx] = mask_id @@ -231,7 +231,7 @@ class ConstructBlockStrategy: rng, task='bert'): text_length = len(tokens) - position_ids = np.ones(len(tokens), dtype=np.long) + position_ids = np.ones(len(tokens), dtype=int) for start, end in block_spans: position_ids[start + 1:end] = 0 position_ids = np.cumsum(position_ids) - 1 @@ -270,7 +270,7 @@ class ConstructBlockStrategy: (end - start + 1)) if self.block_position_encoding: target_block_position_ids.append( - np.arange(1, end - start + 2, dtype=np.long)) + np.arange(1, end - start + 2, dtype=int)) else: target_block_position_ids.append([1] * (end - start + 1)) block_spans.sort(key=lambda x: x[0]) @@ -307,7 +307,7 @@ class ConstructBlockStrategy: target_tokens = target_tokens + [ self.tokenizer.get_command('eop').Id ] - loss_masks = np.ones(len(target_tokens), dtype=np.long) + loss_masks = np.ones(len(target_tokens), dtype=int) return source_tokens, target_tokens, loss_masks else: tokens = np.concatenate(source_tokens + target_tokens) @@ -326,12 +326,12 @@ class ConstructBlockStrategy: for pos in mask_pos: tokens[pos] = self.tokenizer.get_command('dBLOCK').Id targets = np.concatenate(source_tokens + targets) - loss_masks = np.ones(len(tokens), dtype=np.long) + loss_masks = np.ones(len(tokens), dtype=int) loss_masks[:source_length] = 0 position_ids = np.concatenate(source_position_ids + target_position_ids) block_position_ids = np.concatenate( - [np.zeros(source_length, dtype=np.long)] + [np.zeros(source_length, dtype=int)] + target_block_position_ids) position_ids = np.stack([position_ids, block_position_ids], axis=0) if attention_mask is not None: @@ -539,22 +539,21 @@ class ConstructBlockStrategy: (source_tokens, [self.generation_mask], target_tokens)) loss_masks = np.concatenate( (np.zeros(len(source_tokens) + 1, - dtype=np.long), target_masks)) + dtype=int), target_masks)) token_batch.append(tokens) target_batch.append(targets) loss_mask_batch.append(loss_masks) position_ids = np.arange( - len(source_tokens) + len(target_tokens) + 1, - dtype=np.long) + len(source_tokens) + len(target_tokens) + 1, dtype=int) position_ids[len(source_tokens) + 1:] = len(source_tokens) if self.block_position_encoding: block_position_ids = np.concatenate( - (np.zeros(len(source_tokens), dtype=np.long), - np.arange(len(target_tokens) + 1, dtype=np.long))) + (np.zeros(len(source_tokens), dtype=int), + np.arange(len(target_tokens) + 1, dtype=int))) else: block_position_ids = np.concatenate( - (np.zeros(len(source_tokens) + 1, dtype=np.long), - np.ones(len(target_tokens) + 1, dtype=np.long))) + (np.zeros(len(source_tokens) + 1, dtype=int), + np.ones(len(target_tokens) + 1, dtype=int))) position_id_batch.append( np.stack([position_ids, block_position_ids], axis=0)) else: @@ -597,27 +596,25 @@ class ConstructBlockStrategy: max_length = max(seq_lengths) token_batch = [ np.concatenate( - (tokens, np.zeros(max_length - len(tokens), - dtype=np.long))) + (tokens, np.zeros(max_length - len(tokens), dtype=int))) for tokens in token_batch ] target_batch = [ np.concatenate( - (targets, - np.zeros(max_length - len(targets), dtype=np.long))) + (targets, np.zeros(max_length - len(targets), dtype=int))) for targets in target_batch ] loss_mask_batch = [ np.concatenate( (loss_masks, - np.zeros(max_length - len(loss_masks), dtype=np.long))) + np.zeros(max_length - len(loss_masks), dtype=int))) for loss_masks in loss_mask_batch ] position_id_batch = [ - np.concatenate((position_ids, - np.zeros( - (2, max_length - position_ids.shape[1]), - dtype=np.long)), - axis=1) for position_ids in position_id_batch + np.concatenate( + (position_ids, + np.zeros( + (2, max_length - position_ids.shape[1]), dtype=int)), + axis=1) for position_ids in position_id_batch ] return token_batch, target_batch, loss_mask_batch, position_id_batch diff --git a/modelscope/models/nlp/mglm/data_utils/datasets.py b/modelscope/models/nlp/mglm/data_utils/datasets.py index 39ffaea3..37bfbcc2 100644 --- a/modelscope/models/nlp/mglm/data_utils/datasets.py +++ b/modelscope/models/nlp/mglm/data_utils/datasets.py @@ -583,8 +583,8 @@ class XLDataset(data.Dataset): def getidx(self, idx): tokens, targets, loss_masks = [], [], [] attention_mask = np.concatenate( - (np.zeros((self.max_seq_len, self.mem_len), dtype=np.long), - np.ones((self.max_seq_len, self.max_seq_len), dtype=np.long)), + (np.zeros((self.max_seq_len, self.mem_len), dtype=int), + np.ones((self.max_seq_len, self.max_seq_len), dtype=int)), axis=1) sample_idx = bisect_right(self.indices, idx * self.max_seq_len) last_end = 0 if sample_idx == 0 else self.indices[sample_idx - 1] diff --git a/modelscope/models/nlp/mglm/test/test_block.py b/modelscope/models/nlp/mglm/test/test_block.py index ed4225da..eb630835 100644 --- a/modelscope/models/nlp/mglm/test/test_block.py +++ b/modelscope/models/nlp/mglm/test/test_block.py @@ -28,7 +28,7 @@ def main(): counts = np.array([0] * 10) for _ in range(10000): spans = strategy.sample_span_in_document( - np.array([1, 2, 3, 0, 4, 5, 6, 7, 9, 0], dtype=np.long), [1, 1], + np.array([1, 2, 3, 0, 4, 5, 6, 7, 9, 0], dtype=int), [1, 1], random.Random()) for start, end in spans: counts[start:end] += 1 diff --git a/modelscope/models/nlp/mglm/test/test_rel_shift.py b/modelscope/models/nlp/mglm/test/test_rel_shift.py index 00cbb9fe..ad68b15e 100644 --- a/modelscope/models/nlp/mglm/test/test_rel_shift.py +++ b/modelscope/models/nlp/mglm/test/test_rel_shift.py @@ -17,7 +17,7 @@ def main(): num_iters=300000, decay_style='cosine', decay_ratio=0.1) - steps = np.arange(0, 400000, 10, dtype=np.long) + steps = np.arange(0, 400000, 10, dtype=int) rates = [] for step in steps: lr_scheduler.num_iters = step diff --git a/modelscope/models/nlp/unite/__init__.py b/modelscope/models/nlp/unite/__init__.py index 06c2146e..939f0ab7 100644 --- a/modelscope/models/nlp/unite/__init__.py +++ b/modelscope/models/nlp/unite/__init__.py @@ -5,12 +5,12 @@ from typing import TYPE_CHECKING from modelscope.utils.import_utils import LazyImportModule if TYPE_CHECKING: - from .configuration_unite import UniTEConfig - from .modeling_unite import UniTEForTranslationEvaluation + from .configuration import UniTEConfig + from .translation_evaluation import UniTEForTranslationEvaluation else: _import_structure = { - 'configuration_unite': ['UniTEConfig'], - 'modeling_unite': ['UniTEForTranslationEvaluation'], + 'configuration': ['UniTEConfig'], + 'translation_evaluation': ['UniTEForTranslationEvaluation'], } import sys diff --git a/modelscope/models/nlp/unite/configuration_unite.py b/modelscope/models/nlp/unite/configuration.py similarity index 93% rename from modelscope/models/nlp/unite/configuration_unite.py rename to modelscope/models/nlp/unite/configuration.py index b0a48585..402538f7 100644 --- a/modelscope/models/nlp/unite/configuration_unite.py +++ b/modelscope/models/nlp/unite/configuration.py @@ -9,7 +9,7 @@ from modelscope.utils.config import Config logger = logging.get_logger() -class EvaluationMode(Enum): +class InputFormat(Enum): SRC = 'src' REF = 'ref' SRC_REF = 'src-ref' diff --git a/modelscope/models/nlp/unite/modeling_unite.py b/modelscope/models/nlp/unite/translation_evaluation.py similarity index 61% rename from modelscope/models/nlp/unite/modeling_unite.py rename to modelscope/models/nlp/unite/translation_evaluation.py index deea737d..c7e96027 100644 --- a/modelscope/models/nlp/unite/modeling_unite.py +++ b/modelscope/models/nlp/unite/translation_evaluation.py @@ -20,6 +20,8 @@ from transformers.activations import ACT2FN from modelscope.metainfo import Models from modelscope.models.base import TorchModel from modelscope.models.builder import MODELS +from modelscope.models.nlp.unite.configuration import InputFormat +from modelscope.outputs.nlp_outputs import TranslationEvaluationOutput from modelscope.utils.constant import Tasks from modelscope.utils.logger import get_logger @@ -71,8 +73,16 @@ class LayerwiseAttention(Module): mask: torch.Tensor = None, ) -> torch.Tensor: tensors = torch.cat(list(x.unsqueeze(dim=0) for x in tensors), dim=0) - normed_weights = softmax( - self.scalar_parameters, dim=0).view(-1, 1, 1, 1) + + if self.training and self.dropout: + normed_weights = softmax( + torch.where(self.dropout_mask.uniform_() > self.dropout, + self.scalar_parameters, self.dropout_fill), + dim=-1) + else: + normed_weights = softmax(self.scalar_parameters, dim=-1) + + normed_weights = normed_weights.view(-1, 1, 1, 1) mask_float = mask.float() weighted_sum = (normed_weights @@ -97,18 +107,18 @@ class FeedForward(Module): Feed Forward Neural Network. Args: - in_dim (:obj:`int`): - Number of input features. - out_dim (:obj:`int`, defaults to 1): - Number of output features. Default is 1 -- a single scalar. - hidden_sizes (:obj:`List[int]`, defaults to `[3072, 768]`): - List with hidden layer sizes. - activations (:obj:`str`, defaults to `Sigmoid`): - Name of the activation function to be used in the hidden layers. - final_activation (:obj:`str`, Optional, defaults to `None`): - Name of the final activation function if any. - dropout (:obj:`float`, defaults to 0.1): - Dropout ratio to be used in the hidden layers. + in_dim (:obj:`int`): + Number of input features. + out_dim (:obj:`int`, defaults to 1): + Number of output features. Default is 1 -- a single scalar. + hidden_sizes (:obj:`List[int]`, defaults to `[3072, 768]`): + List with hidden layer sizes. + activations (:obj:`str`, defaults to `Sigmoid`): + Name of the activation function to be used in the hidden layers. + final_activation (:obj:`str`, Optional, defaults to `None`): + Name of the final activation function if any. + dropout (:obj:`float`, defaults to 0.1): + Dropout ratio to be used in the hidden layers. """ super().__init__() modules = [] @@ -266,8 +276,11 @@ class UniTEForTranslationEvaluation(TorchModel): return - def forward(self, input_sentences: List[torch.Tensor]): - input_ids = self.combine_input_sentences(input_sentences) + def forward(self, + input_ids: torch.Tensor, + input_format: Optional[List[InputFormat]] = None, + score: Optional[torch.Tensor] = None, + **kwargs) -> TranslationEvaluationOutput: attention_mask = input_ids.ne(self.pad_token_id).long() outputs = self.encoder( input_ids=input_ids, @@ -276,125 +289,138 @@ class UniTEForTranslationEvaluation(TorchModel): return_dict=True) mix_states = self.layerwise_attention(outputs['hidden_states'], attention_mask) - pred = self.estimator(mix_states) - return pred.squeeze(dim=-1) + pred = self.estimator(mix_states).squeeze(dim=-1) + output = TranslationEvaluationOutput( + score=pred.cpu().tolist(), input_format=input_format) - def load_checkpoint(self, path: str, device: torch.device): - state_dict = torch.load(path, map_location=device) - self.load_state_dict(state_dict) + if score is not None: + loss = (pred - score).pow(2).mean() + output['loss'] = loss + + return output + + def load_checkpoint(self, path: str, device: torch.device, plm_only: bool): + if plm_only: + self.encoder = self.encoder.from_pretrained(path).to(device) + self.encoder.pooler = None + else: + state_dict = torch.load(path, map_location=device) + self.load_state_dict(state_dict) logger.info('Loading checkpoint parameters from %s' % path) return - def combine_input_sentences(self, input_sent_groups: List[torch.Tensor]): - for input_sent_group in input_sent_groups[1:]: - input_sent_group[:, 0] = self.eos_token_id - if len(input_sent_groups) == 3: - cutted_sents = self.cut_long_sequences3(input_sent_groups) - else: - cutted_sents = self.cut_long_sequences2(input_sent_groups) - return cutted_sents - - @staticmethod - def cut_long_sequences2(all_input_concat: List[List[torch.Tensor]], +def combine_input_sentences(all_input_concat: List[List[torch.Tensor]], maximum_length: int = 512, - pad_idx: int = 1): - all_input_concat = list(zip(*all_input_concat)) - collected_tuples = list() - for tensor_tuple in all_input_concat: - all_lens = tuple(len(x) for x in tensor_tuple) + pad_idx: int = 1, + eos_idx: int = 2): + for group in all_input_concat[1:]: + group[:, 0] = eos_idx - if sum(all_lens) > maximum_length: - lengths = dict(enumerate(all_lens)) - lengths_sorted_idxes = list(x[0] for x in sorted( - lengths.items(), key=lambda d: d[1], reverse=True)) + if len(all_input_concat) == 3: + return cut_long_sequences3(all_input_concat, maximum_length, pad_idx) + else: + return cut_long_sequences2(all_input_concat, maximum_length, pad_idx) - offset = ceil((sum(lengths.values()) - maximum_length) / 2) - if min(all_lens) > (maximum_length - // 2) and min(all_lens) > offset: - lengths = dict((k, v - offset) for k, v in lengths.items()) - else: - lengths[lengths_sorted_idxes[ - 0]] = maximum_length - lengths[lengths_sorted_idxes[1]] +def cut_long_sequences2(all_input_concat: List[List[torch.Tensor]], + maximum_length: int = 512, + pad_idx: int = 1): + all_input_concat = list(zip(*all_input_concat)) + collected_tuples = list() + for tensor_tuple in all_input_concat: + tensor_tuple = tuple( + x.masked_select(x.ne(pad_idx)) for x in tensor_tuple) + all_lens = tuple(len(x) for x in tensor_tuple) - new_lens = list(lengths[k] - for k in range(0, len(tensor_tuple))) - new_tensor_tuple = tuple( - x[:y] for x, y in zip(tensor_tuple, new_lens)) - for x, y in zip(new_tensor_tuple, tensor_tuple): - x[-1] = y[-1] - collected_tuples.append(new_tensor_tuple) + if sum(all_lens) > maximum_length: + lengths = dict(enumerate(all_lens)) + lengths_sorted_idxes = list(x[0] for x in sorted( + lengths.items(), key=lambda d: d[1], reverse=True)) + + offset = ceil((sum(lengths.values()) - maximum_length) / 2) + + if min(all_lens) > (maximum_length + // 2) and min(all_lens) > offset: + lengths = dict((k, v - offset) for k, v in lengths.items()) else: - collected_tuples.append(tensor_tuple) + lengths[lengths_sorted_idxes[0]] = maximum_length - lengths[ + lengths_sorted_idxes[1]] - concat_tensor = list(torch.cat(x, dim=0) for x in collected_tuples) - all_input_concat_padded = pad_sequence( - concat_tensor, batch_first=True, padding_value=pad_idx) + new_lens = list(lengths[k] for k in range(0, len(tensor_tuple))) + new_tensor_tuple = tuple(x[:y] + for x, y in zip(tensor_tuple, new_lens)) + for x, y in zip(new_tensor_tuple, tensor_tuple): + x[-1] = y[-1] + collected_tuples.append(new_tensor_tuple) + else: + collected_tuples.append(tensor_tuple) - return all_input_concat_padded + concat_tensor = list(torch.cat(x, dim=0) for x in collected_tuples) + all_input_concat_padded = pad_sequence( + concat_tensor, batch_first=True, padding_value=pad_idx) + return all_input_concat_padded - @staticmethod - def cut_long_sequences3(all_input_concat: List[List[torch.Tensor]], - maximum_length: int = 512, - pad_idx: int = 1): - all_input_concat = list(zip(*all_input_concat)) - collected_tuples = list() - for tensor_tuple in all_input_concat: - all_lens = tuple(len(x) for x in tensor_tuple) - if sum(all_lens) > maximum_length: - lengths = dict(enumerate(all_lens)) - lengths_sorted_idxes = list(x[0] for x in sorted( - lengths.items(), key=lambda d: d[1], reverse=True)) +def cut_long_sequences3(all_input_concat: List[List[torch.Tensor]], + maximum_length: int = 512, + pad_idx: int = 1): + all_input_concat = list(zip(*all_input_concat)) + collected_tuples = list() + for tensor_tuple in all_input_concat: + tensor_tuple = tuple( + x.masked_select(x.ne(pad_idx)) for x in tensor_tuple) + all_lens = tuple(len(x) for x in tensor_tuple) - offset = ceil((sum(lengths.values()) - maximum_length) / 3) + if sum(all_lens) > maximum_length: + lengths = dict(enumerate(all_lens)) + lengths_sorted_idxes = list(x[0] for x in sorted( + lengths.items(), key=lambda d: d[1], reverse=True)) - if min(all_lens) > (maximum_length - // 3) and min(all_lens) > offset: - lengths = dict((k, v - offset) for k, v in lengths.items()) - else: - while sum(lengths.values()) > maximum_length: - if lengths[lengths_sorted_idxes[0]] > lengths[ - lengths_sorted_idxes[1]]: - offset = maximum_length - lengths[ - lengths_sorted_idxes[1]] - lengths[ - lengths_sorted_idxes[2]] - if offset > lengths[lengths_sorted_idxes[1]]: - lengths[lengths_sorted_idxes[0]] = offset - else: - lengths[lengths_sorted_idxes[0]] = lengths[ - lengths_sorted_idxes[1]] - elif lengths[lengths_sorted_idxes[0]] == lengths[ - lengths_sorted_idxes[1]] > lengths[ - lengths_sorted_idxes[2]]: - offset = (maximum_length - - lengths[lengths_sorted_idxes[2]]) // 2 - if offset > lengths[lengths_sorted_idxes[2]]: - lengths[lengths_sorted_idxes[0]] = lengths[ - lengths_sorted_idxes[1]] = offset - else: - lengths[lengths_sorted_idxes[0]] = lengths[ - lengths_sorted_idxes[1]] = lengths[ - lengths_sorted_idxes[2]] + offset = ceil((sum(lengths.values()) - maximum_length) / 3) + + if min(all_lens) > (maximum_length + // 3) and min(all_lens) > offset: + lengths = dict((k, v - offset) for k, v in lengths.items()) + else: + while sum(lengths.values()) > maximum_length: + if lengths[lengths_sorted_idxes[0]] > lengths[ + lengths_sorted_idxes[1]]: + offset = maximum_length - lengths[lengths_sorted_idxes[ + 1]] - lengths[lengths_sorted_idxes[2]] + if offset > lengths[lengths_sorted_idxes[1]]: + lengths[lengths_sorted_idxes[0]] = offset + else: + lengths[lengths_sorted_idxes[0]] = lengths[ + lengths_sorted_idxes[1]] + elif lengths[lengths_sorted_idxes[0]] == lengths[ + lengths_sorted_idxes[1]] > lengths[ + lengths_sorted_idxes[2]]: + offset = (maximum_length + - lengths[lengths_sorted_idxes[2]]) // 2 + if offset > lengths[lengths_sorted_idxes[2]]: + lengths[lengths_sorted_idxes[0]] = lengths[ + lengths_sorted_idxes[1]] = offset else: lengths[lengths_sorted_idxes[0]] = lengths[ lengths_sorted_idxes[1]] = lengths[ - lengths_sorted_idxes[ - 2]] = maximum_length // 3 + lengths_sorted_idxes[2]] + else: + lengths[lengths_sorted_idxes[0]] = lengths[ + lengths_sorted_idxes[1]] = lengths[ + lengths_sorted_idxes[2]] = maximum_length // 3 - new_lens = list(lengths[k] for k in range(0, len(lengths))) - new_tensor_tuple = tuple( - x[:y] for x, y in zip(tensor_tuple, new_lens)) + new_lens = list(lengths[k] for k in range(0, len(lengths))) + new_tensor_tuple = tuple(x[:y] + for x, y in zip(tensor_tuple, new_lens)) - for x, y in zip(new_tensor_tuple, tensor_tuple): - x[-1] = y[-1] - collected_tuples.append(new_tensor_tuple) - else: - collected_tuples.append(tensor_tuple) + for x, y in zip(new_tensor_tuple, tensor_tuple): + x[-1] = y[-1] + collected_tuples.append(new_tensor_tuple) + else: + collected_tuples.append(tensor_tuple) - concat_tensor = list(torch.cat(x, dim=0) for x in collected_tuples) - all_input_concat_padded = pad_sequence( - concat_tensor, batch_first=True, padding_value=pad_idx) - - return all_input_concat_padded + concat_tensor = list(torch.cat(x, dim=0) for x in collected_tuples) + all_input_concat_padded = pad_sequence( + concat_tensor, batch_first=True, padding_value=pad_idx) + return all_input_concat_padded diff --git a/modelscope/models/science/unifold/data/msa_pairing.py b/modelscope/models/science/unifold/data/msa_pairing.py index cc65962c..77c4e9a6 100644 --- a/modelscope/models/science/unifold/data/msa_pairing.py +++ b/modelscope/models/science/unifold/data/msa_pairing.py @@ -115,7 +115,7 @@ def pad_features(feature: np.ndarray, feature_name: str) -> np.ndarray: Returns: The feature with an additional padding row. """ - assert feature.dtype != np.dtype(np.string_) + assert feature.dtype != np.dtype(np.str_) if feature_name in ( 'msa_all_seq', 'msa_mask_all_seq', diff --git a/modelscope/models/science/unifold/msa/templates.py b/modelscope/models/science/unifold/msa/templates.py index d1ff8cf1..f2d3d79c 100644 --- a/modelscope/models/science/unifold/msa/templates.py +++ b/modelscope/models/science/unifold/msa/templates.py @@ -1100,9 +1100,9 @@ class HmmsearchHitFeaturizer(TemplateHitFeaturizer): np.zeros((1, num_res, residue_constants.atom_type_num, 3), np.float32), 'template_domain_names': - np.array([''.encode()], dtype=np.object), + np.array([''.encode()], dtype=np.object_), 'template_sequence': - np.array([''.encode()], dtype=np.object), + np.array([''.encode()], dtype=np.object_), 'template_sum_probs': np.array([0], dtype=np.float32), } diff --git a/modelscope/msdatasets/auth/auth_config.py b/modelscope/msdatasets/auth/auth_config.py index 576a6efd..e09db93c 100644 --- a/modelscope/msdatasets/auth/auth_config.py +++ b/modelscope/msdatasets/auth/auth_config.py @@ -23,6 +23,15 @@ class OssAuthConfig(BaseAuthConfig): cookies=cookies, git_token=git_token, user_info=user_info) +class VirgoAuthConfig(BaseAuthConfig): + """The authorization config for virgo dataset.""" + + def __init__(self, cookies: CookieJar, git_token: str, + user_info: Tuple[str, str]): + super().__init__( + cookies=cookies, git_token=git_token, user_info=user_info) + + class MaxComputeAuthConfig(BaseAuthConfig): # TODO: MaxCompute dataset to be supported. def __init__(self, cookies: CookieJar, git_token: str, diff --git a/modelscope/msdatasets/context/dataset_context_config.py b/modelscope/msdatasets/context/dataset_context_config.py index 26b05f7d..4007d60c 100644 --- a/modelscope/msdatasets/context/dataset_context_config.py +++ b/modelscope/msdatasets/context/dataset_context_config.py @@ -42,6 +42,7 @@ class DatasetContextConfig: self.data_files = data_files self.cache_root_dir = cache_root_dir self.use_streaming = use_streaming + self.download_virgo_files: bool = False @property def config_kwargs(self) -> dict: diff --git a/modelscope/msdatasets/data_loader/data_loader.py b/modelscope/msdatasets/data_loader/data_loader.py index 1ef92372..b1450c61 100644 --- a/modelscope/msdatasets/data_loader/data_loader.py +++ b/modelscope/msdatasets/data_loader/data_loader.py @@ -1,11 +1,12 @@ # Copyright (c) Alibaba, Inc. and its affiliates. +import os from abc import ABC, abstractmethod from typing import Optional, Union from datasets import (Dataset, DatasetBuilder, DatasetDict, IterableDataset, IterableDatasetDict) -from datasets import load_dataset as hf_data_loader +from datasets import load_dataset as hf_load_dataset from modelscope.hub.api import ModelScopeConfig from modelscope.msdatasets.auth.auth_config import OssAuthConfig @@ -13,13 +14,18 @@ from modelscope.msdatasets.context.dataset_context_config import \ DatasetContextConfig from modelscope.msdatasets.data_files.data_files_manager import \ DataFilesManager -from modelscope.msdatasets.dataset_cls.dataset import ExternalDataset +from modelscope.msdatasets.dataset_cls import ExternalDataset from modelscope.msdatasets.meta.data_meta_manager import DataMetaManager -from modelscope.utils.constant import DatasetFormations +from modelscope.utils.constant import (DatasetFormations, DatasetPathName, + DownloadMode, VirgoDatasetConfig) +from modelscope.utils.logger import get_logger +from modelscope.utils.url_utils import valid_url + +logger = get_logger() -class BaseDataLoader(ABC): - """Base dataset loader to load data.""" +class BaseDownloader(ABC): + """Base dataset downloader to load data.""" def __init__(self, dataset_context_config: DatasetContextConfig): self.dataset_context_config = dataset_context_config @@ -28,35 +34,35 @@ class BaseDataLoader(ABC): def process(self): """The entity processing pipeline for fetching the data. """ raise NotImplementedError( - f'No default implementation provided for {BaseDataLoader.__name__}.process.' + f'No default implementation provided for {BaseDownloader.__name__}.process.' ) @abstractmethod def _authorize(self): raise NotImplementedError( - f'No default implementation provided for {BaseDataLoader.__name__}._authorize.' + f'No default implementation provided for {BaseDownloader.__name__}._authorize.' ) @abstractmethod def _build(self): raise NotImplementedError( - f'No default implementation provided for {BaseDataLoader.__name__}._build.' + f'No default implementation provided for {BaseDownloader.__name__}._build.' ) @abstractmethod def _prepare_and_download(self): raise NotImplementedError( - f'No default implementation provided for {BaseDataLoader.__name__}._prepare_and_download.' + f'No default implementation provided for {BaseDownloader.__name__}._prepare_and_download.' ) @abstractmethod def _post_process(self): raise NotImplementedError( - f'No default implementation provided for {BaseDataLoader.__name__}._post_process.' + f'No default implementation provided for {BaseDownloader.__name__}._post_process.' ) -class OssDataLoader(BaseDataLoader): +class OssDownloader(BaseDownloader): def __init__(self, dataset_context_config: DatasetContextConfig): super().__init__(dataset_context_config) @@ -127,7 +133,7 @@ class OssDataLoader(BaseDataLoader): raise f'meta-file: {dataset_name}.py not found on the modelscope hub.' if dataset_py_script and dataset_formation == DatasetFormations.hf_compatible: - self.dataset = hf_data_loader( + self.dataset = hf_load_dataset( dataset_py_script, name=subset_name, revision=version, @@ -147,8 +153,151 @@ class OssDataLoader(BaseDataLoader): self.dataset.custom_map = self.dataset_context_config.data_meta_config.meta_type_map -class MaxComputeDataLoader(BaseDataLoader): - """Data loader for MaxCompute data source.""" +class VirgoDownloader(BaseDownloader): + """Data downloader for Virgo data source.""" + + def __init__(self, dataset_context_config: DatasetContextConfig): + super().__init__(dataset_context_config) + self.dataset = None + + def process(self): + """ + Sequential data fetching virgo dataset process: authorize -> build -> prepare_and_download -> post_process + """ + self._authorize() + self._build() + self._prepare_and_download() + self._post_process() + + def _authorize(self): + """Authorization of virgo dataset.""" + from modelscope.msdatasets.auth.auth_config import VirgoAuthConfig + + cookies = ModelScopeConfig.get_cookies() + user_info = ModelScopeConfig.get_user_info() + + if not self.dataset_context_config.auth_config: + auth_config = VirgoAuthConfig( + cookies=cookies, git_token='', user_info=user_info) + else: + auth_config = self.dataset_context_config.auth_config + auth_config.cookies = cookies + auth_config.git_token = '' + auth_config.user_info = user_info + + self.dataset_context_config.auth_config = auth_config + + def _build(self): + """ + Fetch virgo meta and build virgo dataset. + """ + from modelscope.msdatasets.dataset_cls.dataset import VirgoDataset + import pandas as pd + + meta_manager = DataMetaManager(self.dataset_context_config) + meta_manager.fetch_virgo_meta() + self.dataset_context_config = meta_manager.dataset_context_config + self.dataset = VirgoDataset( + **self.dataset_context_config.config_kwargs) + + virgo_cache_dir = os.path.join( + self.dataset_context_config.cache_root_dir, + self.dataset_context_config.namespace, + self.dataset_context_config.dataset_name, + self.dataset_context_config.version) + os.makedirs( + os.path.join(virgo_cache_dir, DatasetPathName.META_NAME), + exist_ok=True) + meta_content_cache_file = os.path.join(virgo_cache_dir, + DatasetPathName.META_NAME, + 'meta_content.csv') + + if isinstance(self.dataset.meta, pd.DataFrame): + meta_content_df = self.dataset.meta + meta_content_df.to_csv(meta_content_cache_file, index=False) + self.dataset.meta_content_cache_file = meta_content_cache_file + self.dataset.virgo_cache_dir = virgo_cache_dir + logger.info( + f'Virgo meta content saved to {meta_content_cache_file}') + + def _prepare_and_download(self): + """ + Fetch data-files from oss-urls in the virgo meta content. + """ + + download_virgo_files = self.dataset_context_config.config_kwargs.pop( + 'download_virgo_files', '') + + if self.dataset.data_type == 0 and download_virgo_files: + import requests + import json + import shutil + from urllib.parse import urlparse + from functools import partial + + def download_file(meta_info_val, data_dir): + file_url_list = [] + file_path_list = [] + try: + meta_info_val = json.loads(meta_info_val) + # get url first, if not exist, try to get inner_url + file_url = meta_info_val.get('url', '') + if file_url: + file_url_list.append(file_url) + else: + tmp_inner_member_list = meta_info_val.get( + 'inner_url', '') + for item in tmp_inner_member_list: + file_url = item.get('url', '') + if file_url: + file_url_list.append(file_url) + + for one_file_url in file_url_list: + is_url = valid_url(one_file_url) + if is_url: + url_parse_res = urlparse(file_url) + file_name = os.path.basename(url_parse_res.path) + else: + raise ValueError(f'Unsupported url: {file_url}') + file_path = os.path.join(data_dir, file_name) + file_path_list.append((one_file_url, file_path)) + + except Exception as e: + logger.error(f'parse virgo meta info error: {e}') + file_path_list = [] + + for file_url_item, file_path_item in file_path_list: + if file_path_item and not os.path.exists(file_path_item): + logger.info(f'Downloading file to {file_path_item}') + os.makedirs(data_dir, exist_ok=True) + with open(file_path_item, 'wb') as f: + f.write(requests.get(file_url_item).content) + + return file_path_list + + self.dataset.download_virgo_files = True + download_mode = self.dataset_context_config.download_mode + data_files_dir = os.path.join(self.dataset.virgo_cache_dir, + DatasetPathName.DATA_FILES_NAME) + + if download_mode == DownloadMode.FORCE_REDOWNLOAD: + shutil.rmtree(data_files_dir, ignore_errors=True) + + from tqdm import tqdm + tqdm.pandas(desc='apply download_file') + self.dataset.meta[ + VirgoDatasetConfig. + col_cache_file] = self.dataset.meta.progress_apply( + lambda row: partial( + download_file, data_dir=data_files_dir)(row.meta_info), + axis=1) + + def _post_process(self): + ... + + +class MaxComputeDownloader(BaseDownloader): + """Data downloader for MaxCompute data source.""" # TODO: MaxCompute data source to be supported . def __init__(self, dataset_context_config: DatasetContextConfig): diff --git a/modelscope/msdatasets/data_loader/data_loader_manager.py b/modelscope/msdatasets/data_loader/data_loader_manager.py index 3c8a638a..5be32de1 100644 --- a/modelscope/msdatasets/data_loader/data_loader_manager.py +++ b/modelscope/msdatasets/data_loader/data_loader_manager.py @@ -9,7 +9,7 @@ from datasets import load_dataset as hf_data_loader from modelscope.hub.api import HubApi from modelscope.msdatasets.context.dataset_context_config import \ DatasetContextConfig -from modelscope.msdatasets.data_loader.data_loader import OssDataLoader +from modelscope.msdatasets.data_loader.data_loader import OssDownloader from modelscope.utils.constant import EXTENSIONS_TO_LOAD from modelscope.utils.logger import get_logger @@ -127,7 +127,7 @@ class RemoteDataLoaderManager(DataLoaderManager): return dataset_ret # To use the modelscope data loader elif data_loader_type == RemoteDataLoaderType.MS_DATA_LOADER: - oss_data_loader = OssDataLoader( + oss_data_loader = OssDownloader( dataset_context_config=self.dataset_context_config) oss_data_loader.process() # download statistics diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/__init__.py index 9eb62168..a367fe79 100644 --- a/modelscope/msdatasets/dataset_cls/custom_datasets/__init__.py +++ b/modelscope/msdatasets/dataset_cls/custom_datasets/__init__.py @@ -27,12 +27,6 @@ if TYPE_CHECKING: from .video_frame_interpolation import VideoFrameInterpolationDataset from .video_stabilization import VideoStabilizationDataset from .video_super_resolution import VideoSuperResolutionDataset - from .image_semantic_segmentation import SegDataset - from .face_2d_keypoins import FaceKeypointDataset - from .hand_2d_keypoints import HandCocoWholeBodyDataset - from .human_wholebody_keypoint import WholeBodyCocoTopDownDataset - from .image_classification import ClsDataset - from .object_detection import DetDataset, DetImagesMixDataset from .ocr_detection import DataLoader, ImageDataset, QuadMeasurer from .ocr_recognition_dataset import OCRRecognitionDataset from .image_colorization import ImageColorizationDataset @@ -66,12 +60,6 @@ else: 'video_frame_interpolation': ['VideoFrameInterpolationDataset'], 'video_stabilization': ['VideoStabilizationDataset'], 'video_super_resolution': ['VideoSuperResolutionDataset'], - 'image_semantic_segmentation': ['SegDataset'], - 'face_2d_keypoins': ['FaceKeypointDataset'], - 'hand_2d_keypoints': ['HandCocoWholeBodyDataset'], - 'human_wholebody_keypoint': ['WholeBodyCocoTopDownDataset'], - 'image_classification': ['ClsDataset'], - 'object_detection': ['DetDataset', 'DetImagesMixDataset'], 'ocr_detection': ['DataLoader', 'ImageDataset', 'QuadMeasurer'], 'ocr_recognition_dataset': ['OCRRecognitionDataset'], 'image_colorization': ['ImageColorizationDataset'], diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/__init__.py deleted file mode 100644 index e9d76b7e..00000000 --- a/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from typing import TYPE_CHECKING - -from modelscope.utils.import_utils import LazyImportModule - -if TYPE_CHECKING: - from .face_2d_keypoints_dataset import FaceKeypointDataset - -else: - _import_structure = {'face_2d_keypoints_dataset': ['FaceKeypointDataset']} - - import sys - - sys.modules[__name__] = LazyImportModule( - __name__, - globals()['__file__'], - _import_structure, - module_spec=__spec__, - extra_objects={}, - ) diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/face_2d_keypoints_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/face_2d_keypoints_dataset.py deleted file mode 100644 index 9f55901f..00000000 --- a/modelscope/msdatasets/dataset_cls/custom_datasets/face_2d_keypoins/face_2d_keypoints_dataset.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from easycv.datasets.face import FaceKeypointDataset as _FaceKeypointDataset - -from modelscope.metainfo import CustomDatasets -from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS -from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \ - EasyCVBaseDataset -from modelscope.utils.constant import Tasks - - -@CUSTOM_DATASETS.register_module( - group_key=Tasks.face_2d_keypoints, - module_name=CustomDatasets.Face2dKeypointsDataset) -class FaceKeypointDataset(EasyCVBaseDataset, _FaceKeypointDataset): - """EasyCV dataset for face 2d keypoints. - - Args: - split_config (dict): Dataset root path from MSDataset, e.g. - {"train":"local cache path"} or {"evaluation":"local cache path"}. - preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for - the model if supplied. Not support yet. - mode: Training or Evaluation. - """ - - def __init__(self, - split_config=None, - preprocessor=None, - mode=None, - *args, - **kwargs) -> None: - EasyCVBaseDataset.__init__( - self, - split_config=split_config, - preprocessor=preprocessor, - mode=mode, - args=args, - kwargs=kwargs) - _FaceKeypointDataset.__init__(self, *args, **kwargs) diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/__init__.py deleted file mode 100644 index 3af670e3..00000000 --- a/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from typing import TYPE_CHECKING - -from modelscope.utils.import_utils import LazyImportModule - -if TYPE_CHECKING: - from .hand_2d_keypoints_dataset import HandCocoWholeBodyDataset - -else: - _import_structure = { - 'hand_2d_keypoints_dataset': ['HandCocoWholeBodyDataset'] - } - - import sys - - sys.modules[__name__] = LazyImportModule( - __name__, - globals()['__file__'], - _import_structure, - module_spec=__spec__, - extra_objects={}, - ) diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/hand_2d_keypoints_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/hand_2d_keypoints_dataset.py deleted file mode 100644 index c6163715..00000000 --- a/modelscope/msdatasets/dataset_cls/custom_datasets/hand_2d_keypoints/hand_2d_keypoints_dataset.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from easycv.datasets.pose import \ - HandCocoWholeBodyDataset as _HandCocoWholeBodyDataset - -from modelscope.metainfo import CustomDatasets -from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS -from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \ - EasyCVBaseDataset -from modelscope.utils.constant import Tasks - - -@CUSTOM_DATASETS.register_module( - group_key=Tasks.hand_2d_keypoints, - module_name=CustomDatasets.HandCocoWholeBodyDataset) -class HandCocoWholeBodyDataset(EasyCVBaseDataset, _HandCocoWholeBodyDataset): - """EasyCV dataset for human hand 2d keypoints. - - Args: - split_config (dict): Dataset root path from MSDataset, e.g. - {"train":"local cache path"} or {"evaluation":"local cache path"}. - preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for - the model if supplied. Not support yet. - mode: Training or Evaluation. - """ - - def __init__(self, - split_config=None, - preprocessor=None, - mode=None, - *args, - **kwargs) -> None: - EasyCVBaseDataset.__init__( - self, - split_config=split_config, - preprocessor=preprocessor, - mode=mode, - args=args, - kwargs=kwargs) - _HandCocoWholeBodyDataset.__init__(self, *args, **kwargs) diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/__init__.py deleted file mode 100644 index 472ed2d8..00000000 --- a/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from typing import TYPE_CHECKING - -from modelscope.utils.import_utils import LazyImportModule - -if TYPE_CHECKING: - from .human_wholebody_keypoint_dataset import WholeBodyCocoTopDownDataset - -else: - _import_structure = { - 'human_wholebody_keypoint_dataset': ['WholeBodyCocoTopDownDataset'] - } - - import sys - - sys.modules[__name__] = LazyImportModule( - __name__, - globals()['__file__'], - _import_structure, - module_spec=__spec__, - extra_objects={}, - ) diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py deleted file mode 100644 index 59c97af8..00000000 --- a/modelscope/msdatasets/dataset_cls/custom_datasets/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from easycv.datasets.pose import \ - WholeBodyCocoTopDownDataset as _WholeBodyCocoTopDownDataset - -from modelscope.metainfo import CustomDatasets -from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS -from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \ - EasyCVBaseDataset -from modelscope.utils.constant import Tasks - - -@CUSTOM_DATASETS.register_module( - group_key=Tasks.human_wholebody_keypoint, - module_name=CustomDatasets.HumanWholeBodyKeypointDataset) -class WholeBodyCocoTopDownDataset(EasyCVBaseDataset, - _WholeBodyCocoTopDownDataset): - """EasyCV dataset for human whole body 2d keypoints. - - Args: - split_config (dict): Dataset root path from MSDataset, e.g. - {"train":"local cache path"} or {"evaluation":"local cache path"}. - preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for - the model if supplied. Not support yet. - mode: Training or Evaluation. - """ - - def __init__(self, - split_config=None, - preprocessor=None, - mode=None, - *args, - **kwargs) -> None: - EasyCVBaseDataset.__init__( - self, - split_config=split_config, - preprocessor=preprocessor, - mode=mode, - args=args, - kwargs=kwargs) - _WholeBodyCocoTopDownDataset.__init__(self, *args, **kwargs) diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/__init__.py deleted file mode 100644 index 95e8d7a1..00000000 --- a/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from typing import TYPE_CHECKING - -from modelscope.utils.import_utils import LazyImportModule - -if TYPE_CHECKING: - from .classification_dataset import ClsDataset - -else: - _import_structure = {'classification_dataset': ['ClsDataset']} - - import sys - - sys.modules[__name__] = LazyImportModule( - __name__, - globals()['__file__'], - _import_structure, - module_spec=__spec__, - extra_objects={}, - ) diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/classification_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/classification_dataset.py deleted file mode 100644 index 386810c7..00000000 --- a/modelscope/msdatasets/dataset_cls/custom_datasets/image_classification/classification_dataset.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from easycv.datasets.classification import ClsDataset as _ClsDataset - -from modelscope.metainfo import CustomDatasets -from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS -from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \ - EasyCVBaseDataset -from modelscope.utils.constant import Tasks - - -@CUSTOM_DATASETS.register_module( - group_key=Tasks.image_classification, - module_name=CustomDatasets.ClsDataset) -class ClsDataset(_ClsDataset): - """EasyCV dataset for classification. - - Args: - split_config (dict): Dataset root path from MSDataset, e.g. - {"train":"local cache path"} or {"evaluation":"local cache path"}. - preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for - the model if supplied. Not support yet. - mode: Training or Evaluation. - """ - - def __init__(self, - split_config=None, - preprocessor=None, - mode=None, - *args, - **kwargs) -> None: - EasyCVBaseDataset.__init__( - self, - split_config=split_config, - preprocessor=preprocessor, - mode=mode, - args=args, - kwargs=kwargs) - _ClsDataset.__init__(self, *args, **kwargs) diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/__init__.py deleted file mode 100644 index 26121bdb..00000000 --- a/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from typing import TYPE_CHECKING - -from modelscope.utils.import_utils import LazyImportModule - -if TYPE_CHECKING: - from .segmentation_dataset import SegDataset - -else: - _import_structure = {'easycv_segmentation': ['SegDataset']} - - import sys - - sys.modules[__name__] = LazyImportModule( - __name__, - globals()['__file__'], - _import_structure, - module_spec=__spec__, - extra_objects={}, - ) diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/segmentation_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/segmentation_dataset.py deleted file mode 100644 index 71e7c42b..00000000 --- a/modelscope/msdatasets/dataset_cls/custom_datasets/image_semantic_segmentation/segmentation_dataset.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from easycv.datasets.segmentation import SegDataset as _SegDataset - -from modelscope.metainfo import CustomDatasets -from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS -from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \ - EasyCVBaseDataset -from modelscope.utils.constant import Tasks - - -@CUSTOM_DATASETS.register_module( - group_key=Tasks.image_segmentation, module_name=CustomDatasets.SegDataset) -class SegDataset(EasyCVBaseDataset, _SegDataset): - """EasyCV dataset for Sementic segmentation. - For more details, please refer to : - https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/segmentation/raw.py . - - Args: - split_config (dict): Dataset root path from MSDataset, e.g. - {"train":"local cache path"} or {"evaluation":"local cache path"}. - preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for - the model if supplied. Not support yet. - mode: Training or Evaluation. - data_source: Data source config to parse input data. - pipeline: Sequence of transform object or config dict to be composed. - ignore_index (int): Label index to be ignored. - profiling: If set True, will print transform time. - """ - - def __init__(self, - split_config=None, - preprocessor=None, - mode=None, - *args, - **kwargs) -> None: - EasyCVBaseDataset.__init__( - self, - split_config=split_config, - preprocessor=preprocessor, - mode=mode, - args=args, - kwargs=kwargs) - _SegDataset.__init__(self, *args, **kwargs) diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/__init__.py b/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/__init__.py deleted file mode 100644 index 403163e9..00000000 --- a/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from typing import TYPE_CHECKING - -from modelscope.utils.import_utils import LazyImportModule - -if TYPE_CHECKING: - from .detection_dataset import DetDataset, DetImagesMixDataset - -else: - _import_structure = { - 'detection_dataset': ['DetDataset', 'DetImagesMixDataset'] - } - - import sys - - sys.modules[__name__] = LazyImportModule( - __name__, - globals()['__file__'], - _import_structure, - module_spec=__spec__, - extra_objects={}, - ) diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/detection_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/detection_dataset.py deleted file mode 100644 index 66c11f64..00000000 --- a/modelscope/msdatasets/dataset_cls/custom_datasets/object_detection/detection_dataset.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. - -from easycv.datasets.detection import DetDataset as _DetDataset -from easycv.datasets.detection import \ - DetImagesMixDataset as _DetImagesMixDataset - -from modelscope.metainfo import CustomDatasets -from modelscope.msdatasets.dataset_cls.custom_datasets import CUSTOM_DATASETS -from modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base import \ - EasyCVBaseDataset -from modelscope.utils.constant import Tasks - - -@CUSTOM_DATASETS.register_module( - group_key=Tasks.image_object_detection, - module_name=CustomDatasets.DetDataset) -@CUSTOM_DATASETS.register_module( - group_key=Tasks.image_segmentation, module_name=CustomDatasets.DetDataset) -class DetDataset(EasyCVBaseDataset, _DetDataset): - """EasyCV dataset for object detection. - For more details, please refer to https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/detection/raw.py . - - Args: - split_config (dict): Dataset root path from MSDataset, e.g. - {"train":"local cache path"} or {"evaluation":"local cache path"}. - preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for - the model if supplied. Not support yet. - mode: Training or Evaluation. - data_source: Data source config to parse input data. - pipeline: Transform config list - profiling: If set True, will print pipeline time - classes: A list of class names, used in evaluation for result and groundtruth visualization - """ - - def __init__(self, - split_config=None, - preprocessor=None, - mode=None, - *args, - **kwargs) -> None: - EasyCVBaseDataset.__init__( - self, - split_config=split_config, - preprocessor=preprocessor, - mode=mode, - args=args, - kwargs=kwargs) - _DetDataset.__init__(self, *args, **kwargs) - - -@CUSTOM_DATASETS.register_module( - group_key=Tasks.image_object_detection, - module_name=CustomDatasets.DetImagesMixDataset) -@CUSTOM_DATASETS.register_module( - group_key=Tasks.domain_specific_object_detection, - module_name=CustomDatasets.DetImagesMixDataset) -class DetImagesMixDataset(EasyCVBaseDataset, _DetImagesMixDataset): - """EasyCV dataset for object detection, a wrapper of multiple images mixed dataset. - Suitable for training on multiple images mixed data augmentation like - mosaic and mixup. For the augmentation pipeline of mixed image data, - the `get_indexes` method needs to be provided to obtain the image - indexes, and you can set `skip_flags` to change the pipeline running - process. At the same time, we provide the `dynamic_scale` parameter - to dynamically change the output image size. - output boxes format: cx, cy, w, h - - For more details, please refer to https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/detection/mix.py . - - Args: - split_config (dict): Dataset root path from MSDataset, e.g. - {"train":"local cache path"} or {"evaluation":"local cache path"}. - preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for - the model if supplied. Not support yet. - mode: Training or Evaluation. - data_source (:obj:`DetSourceCoco`): Data source config to parse input data. - pipeline (Sequence[dict]): Sequence of transform object or - config dict to be composed. - dynamic_scale (tuple[int], optional): The image scale can be changed - dynamically. Default to None. - skip_type_keys (list[str], optional): Sequence of type string to - be skip pipeline. Default to None. - label_padding: out labeling padding [N, 120, 5] - """ - - def __init__(self, - split_config=None, - preprocessor=None, - mode=None, - *args, - **kwargs) -> None: - EasyCVBaseDataset.__init__( - self, - split_config=split_config, - preprocessor=preprocessor, - mode=mode, - args=args, - kwargs=kwargs) - _DetImagesMixDataset.__init__(self, *args, **kwargs) diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py index bc9cd3ca..bfbb6eb3 100644 --- a/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py +++ b/modelscope/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py @@ -34,10 +34,12 @@ def Q2B(uchar): Tasks.ocr_recognition, module_name=Models.ocr_recognition) class OCRRecognitionDataset(TorchCustomDataset): - def __init__(self, **kwargs): + def __init__(self, local_lmdb=None, preprocessor=None, **kwargs): split_config = kwargs['split_config'] cache_root = next(iter(split_config.values())) lmdb_path = os.path.join(cache_root, DATASET_STRUCTURE['lmdb']) + if local_lmdb is not None: + lmdb_path = local_lmdb self.env = lmdb.open( lmdb_path, max_readers=1, @@ -51,7 +53,7 @@ class OCRRecognitionDataset(TorchCustomDataset): self.nSamples = 0 with self.env.begin(write=False) as txn: self.nSamples = int(txn.get('num-samples'.encode())) - self.reco_preprocess = kwargs['preprocessor'] + self.reco_preprocess = preprocessor def __len__(self): return self.nSamples diff --git a/modelscope/msdatasets/dataset_cls/dataset.py b/modelscope/msdatasets/dataset_cls/dataset.py index 4acf51b1..9114285e 100644 --- a/modelscope/msdatasets/dataset_cls/dataset.py +++ b/modelscope/msdatasets/dataset_cls/dataset.py @@ -4,11 +4,15 @@ import copy import os import datasets +import pandas as pd from datasets import IterableDataset -from PIL import Image -from modelscope.utils.constant import EXTENSIONS_TO_LOAD +from modelscope.msdatasets.utils.maxcompute_utils import MaxComputeUtil +from modelscope.utils.constant import (DEFAULT_MAXCOMPUTE_ENDPOINT, + EXTENSIONS_TO_LOAD, MaxComputeEnvs, + VirgoDatasetConfig) from modelscope.utils.logger import get_logger +from modelscope.utils.url_utils import fetch_csv_with_url, valid_url logger = get_logger() @@ -97,6 +101,7 @@ class NativeIterableDataset(IterableDataset): ex_cache_path = dl_manager.download_and_extract(v) ret[k] = ex_cache_path if k.endswith('Image:FILE'): + from PIL import Image ret[k + ':Object'] = Image.open(fp=ex_cache_path) if k.endswith('Audio:FILE'): import torchaudio @@ -108,3 +113,154 @@ class NativeIterableDataset(IterableDataset): def __len__(self): return 1 + + +class VirgoDataset(object): + """Dataset class for Virgo. + + Attributes: + _meta_content (str): Virgo meta data content, could be a url that contains csv file. + _data_type (int): Virgo dataset type, 0-Standard virgo dataset; Others-User define dataset (to be supported) + + Examples: + >>> from modelscope.msdatasets.dataset_cls.dataset import VirgoDataset + >>> input_kwargs = {'metaContent': 'http://xxx-xxx/xxx.csv', 'samplingType': 0} + >>> virgo_dataset = VirgoDataset(**input_kwargs) + >>> print(virgo_dataset[1]) + >>> print(len(virgo_dataset)) + >>> for line in virgo_dataset: + >>> print(line) + + Note: If you set `download_virgo_files` to True by using + MsDataset.load(dataset_name='your-virgo-dataset-id', hub=Hubs.virgo, download_virgo_files=True), + you can get the cache file path of the virgo dataset, the column name is `cache_file`. + >>> if virgo_dataset.download_virgo_files: + >>> print(virgo_dataset[1].get('cache_file')) + """ + + def __init__(self, **kwargs): + + self._meta_content: str = '' + self.data_type: int = 0 + self.odps_table_name: str = '' + self.odps_table_partition: str = None + self._odps_utils: MaxComputeUtil = None + self.config_kwargs = kwargs + + self._meta: pd.DataFrame = pd.DataFrame() + + self._meta_content = self.config_kwargs.pop( + VirgoDatasetConfig.meta_content, '') + self.data_type = self.config_kwargs.pop( + VirgoDatasetConfig.sampling_type, 0) + + self._check_variables() + self._parse_meta() + + self.meta_content_cache_file = '' + self.virgo_cache_dir = '' + self.download_virgo_files: bool = False + + self.odps_table_ins = None + self.odps_reader_ins = None + self.odps_batch_size = self.config_kwargs.pop('odps_batch_size', 100) + self.odps_limit = self.config_kwargs.pop('odps_limit', None) + self.odps_drop_last = self.config_kwargs.pop('odps_drop_last', False) + if self._odps_utils: + self.odps_table_ins, self.odps_reader_ins = self._odps_utils.get_table_reader_ins( + self.odps_table_name, self.odps_table_partition) + + def __getitem__(self, index): + if self.odps_reader_ins: + return MaxComputeUtil.gen_reader_item( + reader=self.odps_reader_ins, + index=index, + batch_size_in=self.odps_batch_size, + limit_in=self.odps_limit, + drop_last_in=self.odps_drop_last, + partitions=self.odps_table_ins.table_schema.partitions, + columns=self.odps_table_ins.table_schema.names) + return self._meta.iloc[index].to_dict() + + def __len__(self): + if isinstance(self._meta, dict): + return self._meta.get('odpsCount', 0) + return len(self._meta) + + def __iter__(self): + if self.odps_reader_ins: + odps_batch_data = MaxComputeUtil.gen_reader_batch( + reader=self.odps_reader_ins, + batch_size_in=self.odps_batch_size, + limit_in=self.odps_limit, + drop_last_in=self.odps_drop_last, + partitions=self.odps_table_ins.table_schema.partitions, + columns=self.odps_table_ins.table_schema.names) + for batch in odps_batch_data: + yield batch + else: + for _, row in self._meta.iterrows(): + yield row.to_dict() + + @property + def meta(self) -> pd.DataFrame: + """ + Virgo meta data. Contains columns: id, meta_info, analysis_result, external_info and + cache_file (if download_virgo_files is True). + """ + return self._meta + + def _parse_meta(self): + # Fetch csv content + if isinstance(self._meta_content, str) and valid_url( + self._meta_content): + meta_content_df = fetch_csv_with_url(self._meta_content) + self._meta = meta_content_df + elif isinstance(self._meta_content, dict): + self._meta = self._meta_content + self.odps_table_name = self._meta.get('odpsTableName', '') + self.odps_table_partition = self._meta.get('odpsTablePartition', + None) + self._odps_utils = self._get_odps_info() + else: + raise 'The meta content must be url or dict.' + + @staticmethod + def _get_odps_info() -> MaxComputeUtil: + """ + Get MaxComputeUtil instance. + + Args: + None + + Returns: + MaxComputeUtil instance. + """ + access_id = os.environ.get(MaxComputeEnvs.ACCESS_ID, '') + access_key = os.environ.get(MaxComputeEnvs.ACCESS_SECRET_KEY, '') + proj_name = os.environ.get(MaxComputeEnvs.PROJECT_NAME, '') + endpoint = os.environ.get(MaxComputeEnvs.ENDPOINT, + DEFAULT_MAXCOMPUTE_ENDPOINT) + + if not access_id or not access_key or not proj_name: + raise ValueError( + f'Please set MaxCompute envs for Virgo: {MaxComputeEnvs.ACCESS_ID}, ' + f'{MaxComputeEnvs.ACCESS_SECRET_KEY}, {MaxComputeEnvs.PROJECT_NAME}, ' + f'{MaxComputeEnvs.ENDPOINT}(default: http://service-corp.odps.aliyun-inc.com/api)' + ) + + return MaxComputeUtil(access_id, access_key, proj_name, endpoint) + + def _check_variables(self): + """Check member variables in this class. + 1. Condition-1: self._meta_content cannot be empty + 2. Condition-2: self._meta_content must be url when self._data_type is 0 + """ + if not self._meta_content: + raise 'Them meta content cannot be empty.' + if self.data_type not in [0, 1]: + raise 'Supported samplingType should be 0 or 1, others are not supported yet.' + if self.data_type == 0 and not valid_url(self._meta_content): + raise 'The meta content must be url when data type is 0.' + if self.data_type == 1 and not isinstance(self._meta_content, dict): + raise 'The meta content must be dict when data type is 1.' diff --git a/modelscope/msdatasets/download/dataset_builder.py b/modelscope/msdatasets/download/dataset_builder.py index 73a3a1a1..8ad5243a 100644 --- a/modelscope/msdatasets/download/dataset_builder.py +++ b/modelscope/msdatasets/download/dataset_builder.py @@ -18,8 +18,8 @@ from datasets.utils.py_utils import map_nested from modelscope.hub.api import HubApi from modelscope.msdatasets.context.dataset_context_config import \ DatasetContextConfig -from modelscope.msdatasets.dataset_cls.dataset import (ExternalDataset, - NativeIterableDataset) +from modelscope.msdatasets.dataset_cls import (ExternalDataset, + NativeIterableDataset) from modelscope.msdatasets.download.download_manager import \ DataStreamingDownloadManager from modelscope.msdatasets.utils.dataset_utils import \ diff --git a/modelscope/msdatasets/meta/data_meta_manager.py b/modelscope/msdatasets/meta/data_meta_manager.py index d90b8d5e..0fa74c37 100644 --- a/modelscope/msdatasets/meta/data_meta_manager.py +++ b/modelscope/msdatasets/meta/data_meta_manager.py @@ -140,6 +140,14 @@ class DataMetaManager(object): self.dataset_context_config.data_meta_config = data_meta_config + def fetch_virgo_meta(self) -> None: + virgo_dataset_id = self.dataset_context_config.dataset_name + version = int(self.dataset_context_config.version) + + meta_content = self.api.get_virgo_meta( + dataset_id=virgo_dataset_id, version=version) + self.dataset_context_config.config_kwargs.update(meta_content) + def _fetch_meta_from_cache(self, meta_cache_dir): local_paths = defaultdict(list) dataset_type = None diff --git a/modelscope/msdatasets/ms_dataset.py b/modelscope/msdatasets/ms_dataset.py index 0a88eb91..912e061d 100644 --- a/modelscope/msdatasets/ms_dataset.py +++ b/modelscope/msdatasets/ms_dataset.py @@ -13,13 +13,14 @@ from datasets.utils.file_utils import is_relative_path from modelscope.hub.repository import DatasetRepository from modelscope.msdatasets.context.dataset_context_config import \ DatasetContextConfig +from modelscope.msdatasets.data_loader.data_loader import VirgoDownloader from modelscope.msdatasets.data_loader.data_loader_manager import ( LocalDataLoaderManager, LocalDataLoaderType, RemoteDataLoaderManager, RemoteDataLoaderType) +from modelscope.msdatasets.dataset_cls import (ExternalDataset, + NativeIterableDataset) from modelscope.msdatasets.dataset_cls.custom_datasets.builder import \ build_custom_dataset -from modelscope.msdatasets.dataset_cls.dataset import (ExternalDataset, - NativeIterableDataset) from modelscope.msdatasets.utils.delete_utils import DatasetDeleteManager from modelscope.msdatasets.utils.upload_utils import DatasetUploadManager from modelscope.preprocessors import build_preprocessor @@ -28,7 +29,7 @@ from modelscope.utils.config_ds import MS_DATASETS_CACHE from modelscope.utils.constant import (DEFAULT_DATASET_NAMESPACE, DEFAULT_DATASET_REVISION, ConfigFields, DownloadMode, Hubs, ModeKeys, Tasks, - UploadMode) + UploadMode, VirgoDatasetConfig) from modelscope.utils.import_utils import is_tf_available, is_torch_available from modelscope.utils.logger import get_logger @@ -188,9 +189,6 @@ class MsDataset: data_files (str or Sequence or Mapping, optional): Path(s) to source data file(s). split (str, optional): Which split of the data to load. hub (Hubs or str, optional): When loading from a remote hub, where it is from. default Hubs.modelscope - download_mode (DownloadMode or str, optional): - How to treat existing datasets. default DownloadMode.REUSE_DATASET_IF_EXISTS - config_kwargs (additional keyword arguments): Keyword arguments to be passed download_mode (DownloadMode or str, optional): How to treat existing datasets. default DownloadMode.REUSE_DATASET_IF_EXISTS cache_dir (str, Optional): User-define local cache directory. @@ -287,6 +285,23 @@ class MsDataset: custom_cfg=custom_cfg, **config_kwargs) dataset_inst.is_custom = True return dataset_inst + elif hub == Hubs.virgo: + # Rewrite the namespace, version and cache_dir for virgo dataset. + if namespace == DEFAULT_DATASET_NAMESPACE: + dataset_context_config.namespace = VirgoDatasetConfig.default_virgo_namespace + if version == DEFAULT_DATASET_REVISION: + dataset_context_config.version = VirgoDatasetConfig.default_dataset_version + if cache_dir == MS_DATASETS_CACHE: + from modelscope.utils.config_ds import CACHE_HOME + cache_dir = os.path.join(CACHE_HOME, 'virgo', 'hub', + 'datasets') + dataset_context_config.cache_root_dir = cache_dir + + virgo_downloader = VirgoDownloader(dataset_context_config) + virgo_downloader.process() + + return virgo_downloader.dataset + else: raise 'Please adjust input args to specify a loading mode, we support following scenes: ' \ 'loading from local disk, huggingface hub and modelscope hub.' diff --git a/modelscope/msdatasets/utils/maxcompute_utils.py b/modelscope/msdatasets/utils/maxcompute_utils.py new file mode 100644 index 00000000..83c6370d --- /dev/null +++ b/modelscope/msdatasets/utils/maxcompute_utils.py @@ -0,0 +1,160 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import math + +import pandas as pd + + +class MaxComputeUtil: + """ + MaxCompute util class. + + Args: + access_id: your access id of MaxCompute + access_key: access key of MaxCompute + project_name: your project name of MaxCompute + endpoint: endpoint of MaxCompute + + Attributes: + _odps: ODPS object + + """ + + def __init__(self, access_id, access_key, project_name, endpoint): + from odps import ODPS + self._odps = ODPS(access_id, access_key, project_name, endpoint) + + def _get_table(self, table_name): + """ + Get MaxCompute table object. + """ + return self._odps.get_table(table_name) + + def _read_data(self, table_name: str, pt_condition: str) -> pd.DataFrame: + """ + Read data from MaxCompute table. + :param table_name: table name + :param pt_condition: partition condition, + Example: pt_condition = 'dt=20230331' + :return: pandas dataframe with all data + """ + t = self._get_table(table_name) + + with t.open_reader(partition=pt_condition, limit=False) as reader: + pd_df = reader.to_pandas() + + return pd_df + + def fetch_data_to_csv(self, table_name: str, pt_condition: str, + output_path: str) -> None: + """ + Fetch data from MaxCompute table to local file. + :param table_name: table name + :param pt_condition: partition condition, + Example: pt_condition = 'dt=20230331' + :param output_path: output path + :return: None + """ + pd_df = self._read_data(table_name, pt_condition) + pd_df.to_csv(output_path, index=False) + print(f'Fetch data to {output_path} successfully.') + + @staticmethod + def _check_batch_args(reader, batch_size, limit): + if not limit: + limit = reader.count + if batch_size <= 0: + raise ValueError( + f'batch_size must be positive, but got {batch_size}') + if batch_size > limit: + batch_size = limit + return batch_size, limit + + @staticmethod + def gen_reader_batch(reader, batch_size_in: int, limit_in: int, + drop_last_in: bool, partitions: list, columns: list): + """ + Generate batch data from MaxCompute table. + + Args: + reader: MaxCompute table reader + batch_size_in: batch size + limit_in: limit of data, None means fetch all data + drop_last_in: whether drop last incomplete batch data + partitions: table partitions + columns: table columns + + Returns: + batch data generator + """ + + batch_size_in, limit_in = MaxComputeUtil._check_batch_args( + reader, batch_size_in, limit_in) + + batch_num = math.floor(limit_in / batch_size_in) + for i in range(batch_num + 1): + if i == batch_num and not drop_last_in and limit_in % batch_size_in > 0: + batch_records = reader[i * batch_size_in:( + i * batch_size_in + (limit_in % batch_size_in))] + else: + batch_records = reader[i * batch_size_in:(i + 1) + * batch_size_in] + batch_data_list = [] + for record in batch_records: + tmp_vals = [val for _, val in list(record)] + tmp_vals = tmp_vals[:(len(tmp_vals) - len(partitions))] + batch_data_list.append(tmp_vals) + yield pd.DataFrame(batch_data_list, columns=columns) + + @staticmethod + def gen_reader_item(reader, index: int, batch_size_in: int, limit_in: int, + drop_last_in: bool, partitions: list, columns: list): + """ + Get single batch data from MaxCompute table by indexing. + + Args: + reader: MaxCompute table reader + index: index of batch data + batch_size_in: batch size + limit_in: limit of data, None means fetch all data + drop_last_in: whether drop last incomplete batch data + partitions: table partitions + columns: table columns + + Returns: + single batch data (dataframe) + """ + batch_size_in, limit_in = MaxComputeUtil._check_batch_args( + reader, batch_size_in, limit_in) + + if drop_last_in: + batch_num = math.floor(limit_in / batch_size_in) + else: + batch_num = math.ceil(limit_in / batch_size_in) + + if index < 0: + raise ValueError(f'index must be non-negative, but got {index}') + if index >= batch_num: + raise ValueError( + f'index must be less than batch_num, but got index={index}, batch_num={batch_num}' + ) + + start = index * batch_size_in + end = (index + 1) * batch_size_in + if end > limit_in: + end = limit_in + batch_item = reader[start:end] + + batch_data_list = [] + for record in batch_item: + tmp_vals = [val for _, val in list(record)] + tmp_vals = tmp_vals[:(len(tmp_vals) - len(partitions))] + batch_data_list.append(tmp_vals) + + return pd.DataFrame(batch_data_list, columns=columns) + + def get_table_reader_ins(self, table_name: str, pt_condition: str = None): + + table_ins = self._get_table(table_name) + with table_ins.open_reader(partition=pt_condition) as reader: + return table_ins, reader diff --git a/modelscope/outputs/nlp_outputs.py b/modelscope/outputs/nlp_outputs.py index e288df70..d6b934c2 100644 --- a/modelscope/outputs/nlp_outputs.py +++ b/modelscope/outputs/nlp_outputs.py @@ -454,3 +454,13 @@ class SentencEmbeddingModelOutput(ModelOutputBase): query_embeddings: Tensor = None doc_embeddings: Tensor = None loss: Tensor = None + + +@dataclass +class TranslationEvaluationOutput(ModelOutputBase): + """The output class for translation evaluation models. + """ + + score: Tensor = None + loss: Tensor = None + input_format: List[str] = None diff --git a/modelscope/outputs/outputs.py b/modelscope/outputs/outputs.py index ddbe4593..ab24a34c 100644 --- a/modelscope/outputs/outputs.py +++ b/modelscope/outputs/outputs.py @@ -1,6 +1,10 @@ # Copyright (c) Alibaba, Inc. and its affiliates. from collections import OrderedDict, namedtuple from dataclasses import dataclass, fields +from typing import Dict, List, Tuple + +import numpy as np +import torch from modelscope.utils.constant import Tasks @@ -50,7 +54,7 @@ class OutputKeys(object): SQL_STRING = 'sql_string' SQL_QUERY = 'sql_query' HISTORY = 'history' - QUERT_RESULT = 'query_result' + QUERY_RESULT = 'query_result' TIMESTAMPS = 'timestamps' SHOT_NUM = 'shot_num' SCENE_NUM = 'scene_num' @@ -62,8 +66,343 @@ class OutputKeys(object): TBOUNDS = 'tbounds' -TASK_OUTPUTS = { +OutputTypes = { + OutputKeys.LOSS: float, # checked + OutputKeys.LOGITS: np.ndarray, # checked. + OutputKeys.SCORES: List[float], # checked + OutputKeys.SCORE: float, # checked + OutputKeys.LABEL: str, # checked + OutputKeys.LABELS: List[str], # checked + OutputKeys.INPUT_IDS: np.ndarray, # checked + OutputKeys.LABEL_POS: np.ndarray, # checked + OutputKeys.POSES: + List[np.ndarray], # [Tuple(np.ndarray, np.ndarray)] # checked doubtful + OutputKeys.CAPTION: str, + OutputKeys.BOXES: np.ndarray, # checked + OutputKeys.KEYPOINTS: np.ndarray, # checked + OutputKeys.MASKS: np.ndarray, # checked + OutputKeys.DEPTHS: List[np.ndarray], # checked + OutputKeys.DEPTHS_COLOR: List[np.ndarray], # checked + OutputKeys.LAYOUT: np.ndarray, # checked + OutputKeys.TEXT: str, # checked + OutputKeys.POLYGONS: np.array, # checked + OutputKeys.OUTPUT: Dict, + OutputKeys.OUTPUT_IMG: 'image', # checked + OutputKeys.OUTPUT_IMGS: List[np.ndarray], # checked + OutputKeys.OUTPUT_VIDEO: 'bytes', + OutputKeys.OUTPUT_PCM: np.ndarray, + OutputKeys.OUTPUT_PCM_LIST: List[np.ndarray], + OutputKeys.OUTPUT_WAV: np.ndarray, + OutputKeys.OUTPUT_OBJ: Dict, + OutputKeys.OUTPUT_MESH: np.ndarray, + OutputKeys.IMG_EMBEDDING: np.ndarray, + OutputKeys.SPK_EMBEDDING: np.ndarray, + OutputKeys.SPO_LIST: List[float], + OutputKeys.TEXT_EMBEDDING: np.ndarray, + OutputKeys.TRANSLATION: str, + OutputKeys.RESPONSE: Dict, + OutputKeys.PREDICTION: np.ndarray, # checked + OutputKeys.PREDICTIONS: List[np.ndarray], + OutputKeys.PROBABILITIES: np.ndarray, + OutputKeys.DIALOG_STATES: object, + OutputKeys.VIDEO_EMBEDDING: np.ndarray, + OutputKeys.UUID: str, + OutputKeys.WORD: str, + OutputKeys.KWS_LIST: List[str], + OutputKeys.SQL_STRING: str, # checked + OutputKeys.SQL_QUERY: str, # checked + OutputKeys.HISTORY: Dict, # checked + OutputKeys.QUERY_RESULT: Dict, # checked + OutputKeys.TIMESTAMPS: str, + OutputKeys.SHOT_NUM: int, + OutputKeys.SCENE_NUM: int, + OutputKeys.SCENE_META_LIST: List[int], + OutputKeys.SHOT_META_LIST: List[int], + OutputKeys.MATCHES: List[np.ndarray], + OutputKeys.PCD12: np.ndarray, + OutputKeys.PCD12_ALIGN: np.ndarray, + OutputKeys.TBOUNDS: Dict, +} +OutputTypeSchema = { + OutputKeys.LOSS: { + 'type': 'number' + }, # checked + OutputKeys.LOGITS: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, # checked. + OutputKeys.SCORES: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, # checked + OutputKeys.SCORE: { + 'type': 'number' + }, # checked + OutputKeys.LABEL: { + 'type': 'string' + }, # checked + OutputKeys.LABELS: { + 'type': 'array', + 'items': { + 'type': 'string' + } + }, # checked + OutputKeys.INPUT_IDS: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, # checked + OutputKeys.LABEL_POS: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, # checked + OutputKeys.POSES: { + 'type': 'array', + 'items': { + 'type': 'array', + 'items': { + 'type': 'number' + } + } + }, # [Tuple(np.ndarray, np.ndarray)] # checked doubtful + OutputKeys.CAPTION: { + 'type': 'string' + }, + OutputKeys.BOXES: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, # checked + OutputKeys.KEYPOINTS: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, # checked + OutputKeys.MASKS: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, # checked + OutputKeys.DEPTHS: { + 'type': 'array', + 'items': { + 'type': 'array', + 'items': { + 'type': 'number' + } + } + }, # checked + OutputKeys.DEPTHS_COLOR: { + 'type': 'array', + 'items': { + 'type': 'array', + 'items': { + 'type': 'number' + } + } + }, # checked + OutputKeys.LAYOUT: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, # checked + OutputKeys.TEXT: { + 'type': 'string' + }, # checked + OutputKeys.POLYGONS: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, # checked + OutputKeys.OUTPUT: { + 'type': 'object' + }, + OutputKeys.OUTPUT_IMG: { + 'type': 'string', + 'description': 'The base64 encoded image.', + }, # checked + OutputKeys.OUTPUT_IMGS: { + 'type': 'array', + 'items': { + 'type': 'string', + 'description': 'The base64 encoded image.', + } + }, # checked + OutputKeys.OUTPUT_VIDEO: { + 'type': 'string', + 'description': 'The base64 encoded video.', + }, + OutputKeys.OUTPUT_PCM: { + 'type': 'string', + 'description': 'The base64 encoded PCM.', + }, + OutputKeys.OUTPUT_PCM_LIST: { + 'type': 'array', + 'items': { + 'type': 'string', + 'description': 'The base64 encoded PCM.', + } + }, + OutputKeys.OUTPUT_WAV: { + 'type': 'string', + 'description': 'The base64 encoded WAV.', + }, + OutputKeys.OUTPUT_OBJ: { + 'type': 'object' + }, + OutputKeys.OUTPUT_MESH: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, + OutputKeys.IMG_EMBEDDING: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, + OutputKeys.SPK_EMBEDDING: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, + OutputKeys.SPO_LIST: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, + OutputKeys.TEXT_EMBEDDING: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, + OutputKeys.TRANSLATION: { + 'type': 'string' + }, + OutputKeys.RESPONSE: { + 'type': 'object' + }, + OutputKeys.PREDICTION: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, # checked + OutputKeys.PREDICTIONS: { + 'type': 'array', + 'items': { + 'type': 'array', + 'items': { + 'type': 'number' + } + } + }, + OutputKeys.PROBABILITIES: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, + OutputKeys.DIALOG_STATES: { + 'type': 'object' + }, + OutputKeys.VIDEO_EMBEDDING: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, + OutputKeys.UUID: { + 'type': 'string' + }, + OutputKeys.WORD: { + 'type': 'string' + }, + OutputKeys.KWS_LIST: { + 'type': 'array', + 'items': { + 'type': 'string' + } + }, + OutputKeys.SQL_STRING: { + 'type': 'string' + }, # checked + OutputKeys.SQL_QUERY: { + 'type': 'string' + }, # checked + OutputKeys.HISTORY: { + 'type': 'object' + }, # checked + OutputKeys.QUERY_RESULT: { + 'type': 'object' + }, # checked + OutputKeys.TIMESTAMPS: { + 'type': 'string' + }, + OutputKeys.SHOT_NUM: { + 'type': 'integer' + }, + OutputKeys.SCENE_NUM: { + 'type': 'integer' + }, + OutputKeys.SCENE_META_LIST: { + 'type': 'array', + 'items': { + 'type': 'integer' + } + }, + OutputKeys.SHOT_META_LIST: { + 'type': 'array', + 'items': { + 'type': 'integer' + } + }, + OutputKeys.MATCHES: { + 'type': 'array', + 'items': { + 'type': 'array', + 'items': { + 'type': 'number' + } + } + }, + OutputKeys.PCD12: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, + OutputKeys.PCD12_ALIGN: { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, + OutputKeys.TBOUNDS: { + 'type': 'object' + }, +} + +TASK_OUTPUTS = { + Tasks.task_template: + [OutputKeys.BOXES, OutputKeys.OUTPUT_IMG, OutputKeys.TEXT_EMBEDDING], # ============ vision tasks =================== # ocr detection result for single sample @@ -388,8 +727,9 @@ TASK_OUTPUTS = { # "scores": [0.885272, 0.014790631, 0.014558001] # "labels": ['噪声强度', '模糊程度', '压缩强度'], # } - Tasks.image_quality_assessment_degradation: - [OutputKeys.SCORES, OutputKeys.LABELS], + Tasks.image_quality_assessment_degradation: [ + OutputKeys.SCORES, OutputKeys.LABELS + ], # live category recognition result for single video # { @@ -1029,6 +1369,10 @@ TASK_OUTPUTS = { # {"text": "this is a text answser. "} Tasks.video_question_answering: [OutputKeys.TEXT], + # Multimodal Dialogue result for a sample + # {"text": "this is a text response. "} + Tasks.multimodal_dialogue: [OutputKeys.TEXT], + # auto_speech_recognition result for a single sample # { # "text": "每天都要快乐喔" @@ -1107,9 +1451,9 @@ TASK_OUTPUTS = { # } Tasks.image_skychange: [OutputKeys.OUTPUT_IMG], # { - # 'scores': [0.1, 0.2, 0.3, ...] + # 'score': [0.1, 0.2, 0.3, ...] # } - Tasks.translation_evaluation: [OutputKeys.SCORES], + Tasks.translation_evaluation: [OutputKeys.SCORE], # video object segmentation result for a single video # { @@ -1140,6 +1484,7 @@ TASK_OUTPUTS = { Tasks.document_grounded_dialog_rerank: [OutputKeys.OUTPUT], Tasks.document_grounded_dialog_retrieval: [OutputKeys.OUTPUT], Tasks.video_temporal_grounding: [OutputKeys.SCORES, OutputKeys.TBOUNDS], + Tasks.text_to_video_synthesis: [OutputKeys.OUTPUT_VIDEO], } diff --git a/modelscope/pipeline_inputs.py b/modelscope/pipeline_inputs.py index 032bdff6..8cb031e7 100644 --- a/modelscope/pipeline_inputs.py +++ b/modelscope/pipeline_inputs.py @@ -20,7 +20,7 @@ class InputType(object): BOX = 'box' DICT = 'dict' LIST = 'list' - INT = 'int' + NUMBER = 'number' INPUT_TYPE = { @@ -31,7 +31,42 @@ INPUT_TYPE = { InputType.BOX: (list, np.ndarray), InputType.DICT: (dict, type(None)), InputType.LIST: (list, type(None)), - InputType.INT: int, + InputType.NUMBER: int, +} + +INPUT_TYPE_SCHEMA = { + InputType.IMAGE: { + 'type': 'string', + 'description': 'Base64 encoded image file or url string.' + }, # support url or base64 encoded file. + InputType.AUDIO: { + 'type': 'string', + 'description': 'Base64 encoded audio file or url string..' + }, # support url or base64 encoded file. + InputType.VIDEO: { + 'type': 'string', + 'description': 'Base64 encoded video file or url string..' + }, # support url or base64 encoded file. + InputType.TEXT: { + 'type': 'string', + 'description': 'The input text.' + }, + InputType.BOX: { + 'type': 'array', + 'description': 'Box coordinate, should be int.', + 'items': { + 'type': 'number' + } + }, + InputType.DICT: { # unknown properties + 'type': 'object', + }, + InputType.LIST: { + 'type': 'array' + }, # unknown item type. + InputType.NUMBER: { + 'type': 'integer' + }, } @@ -47,12 +82,19 @@ def check_input_type(input_type, input): TASK_INPUTS = { + + Tasks.task_template: { + 'image': InputType.IMAGE, + 'text': InputType.TEXT + }, # if task input is single var, value is InputType # if task input is a tuple, value is tuple of InputType # if task input is a dict, value is a dict of InputType, where key # equals the one needed in pipeline input dict # if task input is a list, value is a set of input format, in which - # each element corresponds to one input format as described above. + # each element corresponds to one input format as described above and + # must include a dict format. + # ============ vision tasks =================== Tasks.ocr_detection: InputType.IMAGE, @@ -73,7 +115,7 @@ TASK_INPUTS = { Tasks.human_detection: InputType.IMAGE, Tasks.face_image_generation: - InputType.INT, + InputType.NUMBER, Tasks.image_classification: InputType.IMAGE, Tasks.image_object_detection: @@ -191,8 +233,7 @@ TASK_INPUTS = { Tasks.nli: (InputType.TEXT, InputType.TEXT), Tasks.sentiment_classification: InputType.TEXT, - Tasks.zero_shot_classification: - InputType.TEXT, + Tasks.zero_shot_classification: InputType.TEXT, Tasks.relation_extraction: InputType.TEXT, Tasks.translation: @@ -212,7 +253,13 @@ TASK_INPUTS = { 'source_sentence': InputType.LIST, 'sentences_to_compare': InputType.LIST, }, - Tasks.text_ranking: (InputType.TEXT, InputType.TEXT), + Tasks.text_ranking: [ + (InputType.TEXT, InputType.TEXT), + { + 'source_sentence': InputType.LIST, + 'sentences_to_compare': InputType.LIST + } + ], Tasks.text_generation: InputType.TEXT, Tasks.fid_dialogue: { @@ -261,7 +308,7 @@ TASK_INPUTS = { }, # ============ audio tasks =================== - Tasks.auto_speech_recognition: + Tasks.auto_speech_recognition: # input can be audio, or audio and text. [InputType.AUDIO, { 'wav': InputType.AUDIO, 'text': InputType.TEXT @@ -290,6 +337,9 @@ TASK_INPUTS = { Tasks.video_captioning: [InputType.VIDEO, { 'video': InputType.VIDEO, }], + Tasks.multimodal_dialogue: { + 'messages': InputType.LIST, + }, Tasks.visual_grounding: { 'image': InputType.IMAGE, 'text': InputType.TEXT @@ -332,5 +382,9 @@ TASK_INPUTS = { 'video_input_path': InputType.TEXT, 'video_output_path': InputType.TEXT, 'mask_path': InputType.TEXT, - } + }, + Tasks.text_to_video_synthesis: { + 'text': InputType.TEXT + }, + Tasks.video_summarization: InputType.TEXT, } diff --git a/modelscope/pipelines/__init__.py b/modelscope/pipelines/__init__.py index 71fe307b..d98a7af9 100644 --- a/modelscope/pipelines/__init__.py +++ b/modelscope/pipelines/__init__.py @@ -1,7 +1,5 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -from typing import TYPE_CHECKING -from modelscope.utils.import_utils import LazyImportModule from . import audio, cv, multi_modal, nlp from .base import Pipeline from .builder import pipeline diff --git a/modelscope/pipelines/audio/asr_inference_pipeline.py b/modelscope/pipelines/audio/asr_inference_pipeline.py index b5a4cba7..b9c0bd03 100644 --- a/modelscope/pipelines/audio/asr_inference_pipeline.py +++ b/modelscope/pipelines/audio/asr_inference_pipeline.py @@ -54,6 +54,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): lm_model_revision: Optional[str] = None, timestamp_model: Optional[Union[Model, str]] = None, timestamp_model_revision: Optional[str] = None, + ngpu: int = 1, **kwargs): """ Use `model` and `preprocessor` to create an asr pipeline for prediction @@ -87,7 +88,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): beam_size('int'): beam size for decoding ctc_weight('float'): - CTC weight in joint decoding + the CTC weight in joint decoding lm_weight('float'): lm weight decoding_ind('int', defaults to 0): @@ -119,48 +120,48 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): self.model_cfg = self.model.forward() self.cmd = self.get_cmd(kwargs, model) - if self.cmd['code_base'] == 'funasr': - from funasr.bin import asr_inference_launch - self.funasr_infer_modelscope = asr_inference_launch.inference_launch( - mode=self.cmd['mode'], - maxlenratio=self.cmd['maxlenratio'], - minlenratio=self.cmd['minlenratio'], - batch_size=self.cmd['batch_size'], - beam_size=self.cmd['beam_size'], - ngpu=self.cmd['ngpu'], - ctc_weight=self.cmd['ctc_weight'], - lm_weight=self.cmd['lm_weight'], - penalty=self.cmd['penalty'], - log_level=self.cmd['log_level'], - asr_train_config=self.cmd['asr_train_config'], - asr_model_file=self.cmd['asr_model_file'], - cmvn_file=self.cmd['cmvn_file'], - lm_file=self.cmd['lm_file'], - token_type=self.cmd['token_type'], - key_file=self.cmd['key_file'], - lm_train_config=self.cmd['lm_train_config'], - bpemodel=self.cmd['bpemodel'], - allow_variable_data_keys=self.cmd['allow_variable_data_keys'], - output_dir=self.cmd['output_dir'], - dtype=self.cmd['dtype'], - seed=self.cmd['seed'], - ngram_weight=self.cmd['ngram_weight'], - nbest=self.cmd['nbest'], - num_workers=self.cmd['num_workers'], - vad_infer_config=self.cmd['vad_infer_config'], - vad_model_file=self.cmd['vad_model_file'], - vad_cmvn_file=self.cmd['vad_cmvn_file'], - punc_model_file=self.cmd['punc_model_file'], - punc_infer_config=self.cmd['punc_infer_config'], - timestamp_model_file=self.cmd['timestamp_model_file'], - timestamp_infer_config=self.cmd['timestamp_infer_config'], - timestamp_cmvn_file=self.cmd['timestamp_cmvn_file'], - outputs_dict=self.cmd['outputs_dict'], - param_dict=self.cmd['param_dict'], - token_num_relax=self.cmd['token_num_relax'], - decoding_ind=self.cmd['decoding_ind'], - decoding_mode=self.cmd['decoding_mode'], - ) + from funasr.bin import asr_inference_launch + self.funasr_infer_modelscope = asr_inference_launch.inference_launch( + mode=self.cmd['mode'], + maxlenratio=self.cmd['maxlenratio'], + minlenratio=self.cmd['minlenratio'], + batch_size=self.cmd['batch_size'], + beam_size=self.cmd['beam_size'], + ngpu=self.cmd['ngpu'], + ctc_weight=self.cmd['ctc_weight'], + lm_weight=self.cmd['lm_weight'], + penalty=self.cmd['penalty'], + log_level=self.cmd['log_level'], + asr_train_config=self.cmd['asr_train_config'], + asr_model_file=self.cmd['asr_model_file'], + cmvn_file=self.cmd['cmvn_file'], + lm_file=self.cmd['lm_file'], + token_type=self.cmd['token_type'], + key_file=self.cmd['key_file'], + lm_train_config=self.cmd['lm_train_config'], + bpemodel=self.cmd['bpemodel'], + allow_variable_data_keys=self.cmd['allow_variable_data_keys'], + output_dir=self.cmd['output_dir'], + dtype=self.cmd['dtype'], + seed=self.cmd['seed'], + ngram_weight=self.cmd['ngram_weight'], + nbest=self.cmd['nbest'], + num_workers=self.cmd['num_workers'], + vad_infer_config=self.cmd['vad_infer_config'], + vad_model_file=self.cmd['vad_model_file'], + vad_cmvn_file=self.cmd['vad_cmvn_file'], + punc_model_file=self.cmd['punc_model_file'], + punc_infer_config=self.cmd['punc_infer_config'], + timestamp_model_file=self.cmd['timestamp_model_file'], + timestamp_infer_config=self.cmd['timestamp_infer_config'], + timestamp_cmvn_file=self.cmd['timestamp_cmvn_file'], + outputs_dict=self.cmd['outputs_dict'], + param_dict=self.cmd['param_dict'], + token_num_relax=self.cmd['token_num_relax'], + decoding_ind=self.cmd['decoding_ind'], + decoding_mode=self.cmd['decoding_mode'], + **kwargs, + ) def __call__(self, audio_in: Union[str, bytes], @@ -197,7 +198,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): """ # code base - code_base = self.cmd['code_base'] + # code_base = self.cmd['code_base'] self.recog_type = recog_type self.audio_format = audio_format self.audio_fs = None @@ -207,31 +208,21 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): self.cmd['output_dir'] = output_dir self.cmd['param_dict'] = param_dict - if code_base == 'funasr': - if isinstance(audio_in, str): - # for funasr code, generate wav.scp from url or local path - self.audio_in, self.raw_inputs = generate_scp_from_url( - audio_in) - elif isinstance(audio_in, bytes): - self.audio_in = audio_in - self.raw_inputs = None - else: - import numpy - import torch - if isinstance(audio_in, torch.Tensor): - self.audio_in = None - self.raw_inputs = audio_in - elif isinstance(audio_in, numpy.ndarray): - self.audio_in = None - self.raw_inputs = audio_in - elif isinstance(audio_in, str): - # load pcm data from url if audio_in is url str - self.audio_in, checking_audio_fs = load_bytes_from_url(audio_in) + if isinstance(audio_in, str): + # for funasr code, generate wav.scp from url or local path + self.audio_in, self.raw_inputs = generate_scp_from_url(audio_in) elif isinstance(audio_in, bytes): - # load pcm data from wav data if audio_in is wave format - self.audio_in, checking_audio_fs = extract_pcm_from_wav(audio_in) - else: self.audio_in = audio_in + self.raw_inputs = None + else: + import numpy + import torch + if isinstance(audio_in, torch.Tensor): + self.audio_in = None + self.raw_inputs = audio_in + elif isinstance(audio_in, numpy.ndarray): + self.audio_in = None + self.raw_inputs = audio_in # set the sample_rate of audio_in if checking_audio_fs is valid if checking_audio_fs is not None: @@ -265,12 +256,6 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): if self.preprocessor is None: self.preprocessor = WavToScp() - # pipeline() from pipelines/builder.py passes 'device' but 'ngpu' needed here - device = extra_args.get('device') - if device == 'cpu': - extra_args['ngpu'] = 0 - elif device == 'gpu': - extra_args['ngpu'] = 1 outputs = self.preprocessor.config_checking(self.model_cfg) # generate asr inference command cmd = { @@ -323,109 +308,88 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): } } - if self.framework == Frameworks.torch: - frontend_conf = None - token_num_relax = None - decoding_ind = None - decoding_mode = None - if os.path.exists(outputs['am_model_config']): - config_file = open( - outputs['am_model_config'], encoding='utf-8') - root = yaml.full_load(config_file) - config_file.close() - if 'frontend_conf' in root: - frontend_conf = root['frontend_conf'] - if os.path.exists(outputs['asr_model_config']): - config_file = open( - outputs['asr_model_config'], encoding='utf-8') - root = yaml.full_load(config_file) - config_file.close() - if 'token_num_relax' in root: - token_num_relax = root['token_num_relax'] - if 'decoding_ind' in root: - decoding_ind = root['decoding_ind'] - if 'decoding_mode' in root: - decoding_mode = root['decoding_mode'] + frontend_conf = None + token_num_relax = None + decoding_ind = None + decoding_mode = None + if os.path.exists(outputs['am_model_config']): + config_file = open(outputs['am_model_config'], encoding='utf-8') + root = yaml.full_load(config_file) + config_file.close() + if 'frontend_conf' in root: + frontend_conf = root['frontend_conf'] + if os.path.exists(outputs['asr_model_config']): + config_file = open(outputs['asr_model_config'], encoding='utf-8') + root = yaml.full_load(config_file) + config_file.close() + if 'token_num_relax' in root: + token_num_relax = root['token_num_relax'] + if 'decoding_ind' in root: + decoding_ind = root['decoding_ind'] + if 'decoding_mode' in root: + decoding_mode = root['decoding_mode'] - cmd['beam_size'] = root['beam_size'] - cmd['penalty'] = root['penalty'] - cmd['maxlenratio'] = root['maxlenratio'] - cmd['minlenratio'] = root['minlenratio'] - cmd['ctc_weight'] = root['ctc_weight'] - cmd['lm_weight'] = root['lm_weight'] - cmd['asr_train_config'] = outputs['am_model_config'] - cmd['lm_file'] = outputs['lm_model_path'] - cmd['lm_train_config'] = outputs['lm_model_config'] - cmd['batch_size'] = outputs['model_config']['batch_size'] - cmd['frontend_conf'] = frontend_conf - if frontend_conf is not None and 'fs' in frontend_conf: - cmd['fs']['model_fs'] = frontend_conf['fs'] - cmd['token_num_relax'] = token_num_relax - cmd['decoding_ind'] = decoding_ind - cmd['decoding_mode'] = decoding_mode - if outputs.__contains__('mvn_file'): - cmd['cmvn_file'] = outputs['mvn_file'] - model_config = self.model_cfg['model_config'] - if model_config.__contains__('vad_model') and self.vad_model != '': - self.vad_model = model_config['vad_model'] - if model_config.__contains__('vad_model_revision'): - self.vad_model_revision = model_config['vad_model_revision'] - if model_config.__contains__( - 'punc_model') and self.punc_model != '': - self.punc_model = model_config['punc_model'] - if model_config.__contains__('punc_model_revision'): - self.punc_model_revision = model_config['punc_model_revision'] - if model_config.__contains__( - 'timestamp_model') and self.timestamp_model != '': - self.timestamp_model = model_config['timestamp_model'] - if model_config.__contains__('timestamp_model_revision'): - self.timestamp_model_revision = model_config[ - 'timestamp_model_revision'] - update_local_model(model_config, model_path, extra_args) - self.load_vad_model(cmd) - self.load_punc_model(cmd) - self.load_lm_model(cmd) - self.load_timestamp_model(cmd) + cmd['beam_size'] = root['beam_size'] + cmd['penalty'] = root['penalty'] + cmd['maxlenratio'] = root['maxlenratio'] + cmd['minlenratio'] = root['minlenratio'] + cmd['ctc_weight'] = root['ctc_weight'] + cmd['lm_weight'] = root['lm_weight'] + cmd['asr_train_config'] = outputs['am_model_config'] + cmd['lm_file'] = outputs['lm_model_path'] + cmd['lm_train_config'] = outputs['lm_model_config'] + cmd['batch_size'] = outputs['model_config']['batch_size'] + cmd['frontend_conf'] = frontend_conf + if frontend_conf is not None and 'fs' in frontend_conf: + cmd['fs']['model_fs'] = frontend_conf['fs'] + cmd['token_num_relax'] = token_num_relax + cmd['decoding_ind'] = decoding_ind + cmd['decoding_mode'] = decoding_mode + if outputs.__contains__('mvn_file'): + cmd['cmvn_file'] = outputs['mvn_file'] + model_config = self.model_cfg['model_config'] + if model_config.__contains__('vad_model') and self.vad_model != '': + self.vad_model = model_config['vad_model'] + if model_config.__contains__('vad_model_revision'): + self.vad_model_revision = model_config['vad_model_revision'] + if model_config.__contains__('punc_model') and self.punc_model != '': + self.punc_model = model_config['punc_model'] + if model_config.__contains__('punc_model_revision'): + self.punc_model_revision = model_config['punc_model_revision'] + if model_config.__contains__( + 'timestamp_model') and self.timestamp_model != '': + self.timestamp_model = model_config['timestamp_model'] + if model_config.__contains__('timestamp_model_revision'): + self.timestamp_model_revision = model_config[ + 'timestamp_model_revision'] + update_local_model(model_config, model_path, extra_args) + self.load_vad_model(cmd) + self.load_punc_model(cmd) + self.load_lm_model(cmd) + self.load_timestamp_model(cmd) - user_args_dict = [ - 'output_dir', - 'batch_size', - 'mode', - 'ngpu', - 'beam_size', - 'ctc_weight', - 'lm_weight', - 'decoding_ind', - 'decoding_mode', - 'vad_model_file', - 'vad_infer_config', - 'vad_cmvn_file', - 'punc_model_file', - 'punc_infer_config', - 'param_dict', - ] + user_args_dict = [ + 'output_dir', + 'batch_size', + 'mode', + 'ngpu', + 'beam_size', + 'ctc_weight', + 'lm_weight', + 'decoding_ind', + 'decoding_mode', + 'vad_model_file', + 'vad_infer_config', + 'vad_cmvn_file', + 'punc_model_file', + 'punc_infer_config', + 'param_dict', + ] - for user_args in user_args_dict: - if user_args in extra_args and extra_args[ - user_args] is not None: - cmd[user_args] = extra_args[user_args] - - elif self.framework == Frameworks.tf: - cmd['fs']['model_fs'] = outputs['model_config']['fs'] - cmd['hop_length'] = outputs['model_config']['hop_length'] - cmd['feature_dims'] = outputs['model_config']['feature_dims'] - cmd['predictions_file'] = 'text' - cmd['cmvn_file'] = outputs['am_mvn_file'] - cmd['vocab_file'] = outputs['vocab_file'] - if 'idx_text' in outputs: - cmd['idx_text'] = outputs['idx_text'] - if 'sampled_ids' in outputs['model_config']: - cmd['sampled_ids'] = outputs['model_config']['sampled_ids'] - if 'sampled_lengths' in outputs['model_config']: - cmd['sampled_lengths'] = outputs['model_config'][ - 'sampled_lengths'] - else: - raise ValueError('model type is mismatching') + for user_args in user_args_dict: + if user_args in extra_args and extra_args[user_args] is not None: + cmd[user_args] = extra_args[user_args] + del extra_args[user_args] return cmd @@ -520,23 +484,12 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): logger.info(f"Decoding with {inputs['audio_format']} files ...") data_cmd: Sequence[Tuple[str, str, str]] - if self.cmd['code_base'] == 'funasr': - if isinstance(self.audio_in, bytes): - data_cmd = [self.audio_in, 'speech', 'bytes'] - elif isinstance(self.audio_in, str): - data_cmd = [self.audio_in, 'speech', 'sound'] - elif self.raw_inputs is not None: - data_cmd = None - else: - if inputs['audio_format'] == 'wav' or inputs[ - 'audio_format'] == 'pcm': - data_cmd = ['speech', 'sound'] - elif inputs['audio_format'] == 'kaldi_ark': - data_cmd = ['speech', 'kaldi_ark'] - elif inputs['audio_format'] == 'tfrecord': - data_cmd = ['speech', 'tfrecord'] - if inputs.__contains__('mvn_file'): - data_cmd.append(inputs['mvn_file']) + if isinstance(self.audio_in, bytes): + data_cmd = [self.audio_in, 'speech', 'bytes'] + elif isinstance(self.audio_in, str): + data_cmd = [self.audio_in, 'speech', 'sound'] + elif self.raw_inputs is not None: + data_cmd = None # generate asr inference command self.cmd['name_and_type'] = data_cmd @@ -618,34 +571,9 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): return ref_list def run_inference(self, cmd, **kwargs): - asr_result = [] - if self.framework == Frameworks.torch and cmd['code_base'] == 'funasr': - asr_result = self.funasr_infer_modelscope( - cmd['name_and_type'], cmd['raw_inputs'], cmd['output_dir'], - cmd['fs'], cmd['param_dict'], **kwargs) - - elif self.framework == Frameworks.tf: - from easyasr import asr_inference_paraformer_tf - if hasattr(asr_inference_paraformer_tf, 'set_parameters'): - asr_inference_paraformer_tf.set_parameters( - language=cmd['lang']) - else: - # in order to support easyasr-0.0.2 - cmd['fs'] = cmd['fs']['model_fs'] - - asr_result = asr_inference_paraformer_tf.asr_inference( - ngpu=cmd['ngpu'], - name_and_type=cmd['name_and_type'], - audio_lists=cmd['audio_in'], - idx_text_file=cmd['idx_text'], - asr_model_file=cmd['asr_model_file'], - vocab_file=cmd['vocab_file'], - am_mvn_file=cmd['cmvn_file'], - predictions_file=cmd['predictions_file'], - fs=cmd['fs'], - hop_length=cmd['hop_length'], - feature_dims=cmd['feature_dims'], - sampled_ids=cmd['sampled_ids'], - sampled_lengths=cmd['sampled_lengths']) + asr_result = self.funasr_infer_modelscope(cmd['name_and_type'], + cmd['raw_inputs'], + cmd['output_dir'], cmd['fs'], + cmd['param_dict'], **kwargs) return asr_result diff --git a/modelscope/pipelines/audio/lm_infer_pipeline.py b/modelscope/pipelines/audio/lm_infer_pipeline.py index f271ea45..75d835d6 100644 --- a/modelscope/pipelines/audio/lm_infer_pipeline.py +++ b/modelscope/pipelines/audio/lm_infer_pipeline.py @@ -35,7 +35,10 @@ class LanguageModelPipeline(Pipeline): """ - def __init__(self, model: Union[Model, str] = None, **kwargs): + def __init__(self, + model: Union[Model, str] = None, + ngpu: int = 1, + **kwargs): """ Use `model` to create a LM pipeline for prediction Args: @@ -88,7 +91,9 @@ class LanguageModelPipeline(Pipeline): split_with_space=self.cmd['split_with_space'], seg_dict_file=self.cmd['seg_dict_file'], output_dir=self.cmd['output_dir'], - param_dict=self.cmd['param_dict']) + param_dict=self.cmd['param_dict'], + **kwargs, + ) def __call__(self, text_in: str = None, @@ -189,6 +194,7 @@ class LanguageModelPipeline(Pipeline): for user_args in user_args_dict: if user_args in extra_args and extra_args[user_args] is not None: cmd[user_args] = extra_args[user_args] + del extra_args[user_args] return cmd diff --git a/modelscope/pipelines/audio/punctuation_processing_pipeline.py b/modelscope/pipelines/audio/punctuation_processing_pipeline.py index 2f4dee7a..3ab3481d 100644 --- a/modelscope/pipelines/audio/punctuation_processing_pipeline.py +++ b/modelscope/pipelines/audio/punctuation_processing_pipeline.py @@ -39,7 +39,10 @@ class PunctuationProcessingPipeline(Pipeline): """ - def __init__(self, model: Union[Model, str] = None, **kwargs): + def __init__(self, + model: Union[Model, str] = None, + ngpu: int = 1, + **kwargs): """use `model` to create an asr pipeline for prediction """ super().__init__(model=model, **kwargs) @@ -59,7 +62,9 @@ class PunctuationProcessingPipeline(Pipeline): train_config=self.cmd['train_config'], model_file=self.cmd['model_file'], output_dir=self.cmd['output_dir'], - param_dict=self.cmd['param_dict']) + param_dict=self.cmd['param_dict'], + **kwargs, + ) def __call__(self, text_in: str = None, @@ -141,6 +146,7 @@ class PunctuationProcessingPipeline(Pipeline): for user_args in user_args_dict: if user_args in extra_args and extra_args[user_args] is not None: cmd[user_args] = extra_args[user_args] + del extra_args[user_args] return cmd diff --git a/modelscope/pipelines/audio/speaker_change_locating_pipeline.py b/modelscope/pipelines/audio/speaker_change_locating_pipeline.py new file mode 100644 index 00000000..0bab08ac --- /dev/null +++ b/modelscope/pipelines/audio/speaker_change_locating_pipeline.py @@ -0,0 +1,105 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import io +from typing import Any, Dict, List, Union + +import numpy as np +import soundfile as sf +import torch + +from modelscope.fileio import File +from modelscope.metainfo import Pipelines +from modelscope.outputs import OutputKeys +from modelscope.pipelines.base import InputModel, Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.utils.constant import Tasks +from modelscope.utils.logger import get_logger + +logger = get_logger() + +__all__ = ['SpeakerChangeLocatingPipeline'] + + +@PIPELINES.register_module( + Tasks.speaker_diarization, module_name=Pipelines.speaker_change_locating) +class SpeakerChangeLocatingPipeline(Pipeline): + """Speaker Change Locating Inference Pipeline + use `model` to create a speaker change Locating pipeline. + + Args: + model (SpeakerChangeLocatingPipeline): A model instance, or a model local dir, or a model id in the model hub. + kwargs (dict, `optional`): + Extra kwargs passed into the pipeline's constructor. + Example: + >>> from modelscope.pipelines import pipeline + >>> from modelscope.utils.constant import Tasks + >>> p = pipeline( + >>> task=Tasks.speaker_diarization, model='damo/speech_campplus-transformer_scl_zh-cn_16k-common') + >>> print(p(audio)) + + """ + + def __init__(self, model: InputModel, **kwargs): + """use `model` to create a speaker change Locating pipeline for prediction + Args: + model (str): a valid offical model id + """ + super().__init__(model=model, **kwargs) + self.model_config = self.model.model_config + self.config = self.model.model_config + self.anchor_size = self.config['anchor_size'] + + def __call__(self, audio: str, embds: List = None) -> Dict[str, Any]: + if embds is not None: + assert len(embds) == 2 + assert isinstance(embds[0], np.ndarray) and isinstance( + embds[1], np.ndarray) + assert embds[0].shape == ( + self.anchor_size, ) and embds[1].shape == (self.anchor_size, ) + else: + embd1 = np.zeros(self.anchor_size // 2) + embd2 = np.ones(self.anchor_size - self.anchor_size // 2) + embd3 = np.ones(self.anchor_size // 2) + embd4 = np.zeros(self.anchor_size - self.anchor_size // 2) + embds = [ + np.stack([embd1, embd2], axis=1).flatten(), + np.stack([embd3, embd4], axis=1).flatten(), + ] + anchors = torch.from_numpy(np.stack(embds, + axis=0)).float().unsqueeze(0) + + output = self.preprocess(audio) + output = self.forward(output, anchors) + output = self.postprocess(output) + + return output + + def forward(self, input: torch.Tensor, anchors: torch.Tensor): + output = self.model(input, anchors) + return output + + def postprocess(self, input: torch.Tensor) -> Dict[str, Any]: + predict = np.where(np.diff(input.argmax(-1).numpy())) + try: + predict = predict[0][0] * 0.01 + 0.02 + predict = round(predict, 2) + return {OutputKeys.TEXT: f'The change point is at {predict}s.'} + except Exception: + return {OutputKeys.TEXT: 'No change point is found.'} + + def preprocess(self, input: str) -> torch.Tensor: + if isinstance(input, str): + file_bytes = File.read(input) + data, fs = sf.read(io.BytesIO(file_bytes), dtype='float32') + if len(data.shape) == 2: + data = data[:, 0] + if fs != self.model_config['sample_rate']: + raise ValueError( + 'modelscope error: Only support %d sample rate files' + % self.model_cfg['sample_rate']) + data = torch.from_numpy(data).unsqueeze(0) + else: + raise ValueError( + 'modelscope error: The input type is restricted to audio file address' + % i) + return data diff --git a/modelscope/pipelines/audio/speaker_diarization_pipeline.py b/modelscope/pipelines/audio/speaker_diarization_pipeline.py index f4f68cba..71715ecd 100644 --- a/modelscope/pipelines/audio/speaker_diarization_pipeline.py +++ b/modelscope/pipelines/audio/speaker_diarization_pipeline.py @@ -48,6 +48,7 @@ class SpeakerDiarizationPipeline(Pipeline): model: Union[Model, str] = None, sv_model: Optional[Union[Model, str]] = None, sv_model_revision: Optional[str] = None, + ngpu: int = 1, **kwargs): """use `model` to create a speaker diarization pipeline for prediction Args: @@ -90,6 +91,7 @@ class SpeakerDiarizationPipeline(Pipeline): dur_threshold=self.cmd['dur_threshold'], out_format=self.cmd['out_format'], param_dict=self.cmd['param_dict'], + **kwargs, ) def __call__(self, @@ -203,6 +205,7 @@ class SpeakerDiarizationPipeline(Pipeline): cmd[user_args].update(extra_args[user_args]) else: cmd[user_args] = extra_args[user_args] + del extra_args[user_args] return cmd diff --git a/modelscope/pipelines/audio/speaker_verification_eres2net_pipeline.py b/modelscope/pipelines/audio/speaker_verification_eres2net_pipeline.py new file mode 100644 index 00000000..ef91d83b --- /dev/null +++ b/modelscope/pipelines/audio/speaker_verification_eres2net_pipeline.py @@ -0,0 +1,110 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import io +from typing import Any, Dict, List, Union + +import soundfile as sf +import torch + +from modelscope.fileio import File +from modelscope.metainfo import Pipelines +from modelscope.outputs import OutputKeys +from modelscope.pipelines.base import InputModel, Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.utils.constant import Tasks +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +@PIPELINES.register_module( + Tasks.speaker_verification, + module_name=Pipelines.speaker_verification_eres2net) +class ERes2Net_Pipeline(Pipeline): + """Speaker Verification Inference Pipeline + use `model` to create a Speaker Verification pipeline. + + Args: + model (SpeakerVerificationPipeline): A model instance, or a model local dir, or a model id in the model hub. + kwargs (dict, `optional`): + Extra kwargs passed into the pipeline's constructor. + Example: + >>> from modelscope.pipelines import pipeline + >>> from modelscope.utils.constant import Tasks + >>> p = pipeline( + >>> task=Tasks.speaker_verification, model='damo/speech_ecapa-tdnn_sv_en_voxceleb_16k') + >>> print(p([audio_1, audio_2])) + + """ + + def __init__(self, model: InputModel, **kwargs): + """use `model` to create a speaker verification pipeline for prediction + Args: + model (str): a valid offical model id + """ + super().__init__(model=model, **kwargs) + self.model_config = self.model.model_config + self.config = self.model.other_config + self.thr = self.config['yesOrno_thr'] + + def __call__(self, + in_audios: List[str], + thr: float = None) -> Dict[str, Any]: + if thr is not None: + self.thr = thr + if self.thr < -1 or self.thr > 1: + raise ValueError( + 'modelscope error: the thr value should be in [-1, 1], but found to be %f.' + % self.thr) + outputs = self.preprocess(in_audios) + outputs = self.forward(outputs) + outputs = self.postprocess(outputs) + + return outputs + + def forward(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + emb1 = self.model(inputs['data1']) + emb2 = self.model(inputs['data2']) + + return {'emb1': emb1, 'emb2': emb2} + + def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + score = self.compute_cos_similarity(inputs['emb1'], inputs['emb2']) + score = round(score, 5) + if score >= self.thr: + ans = 'yes' + else: + ans = 'no' + + return {OutputKeys.SCORE: score, OutputKeys.TEXT: ans} + + def preprocess(self, inputs: List[str], + **preprocess_params) -> Dict[str, Any]: + if len(inputs) != 2: + raise ValueError( + 'modelscope error: Two input audio files are required.') + output = {} + for i in range(len(inputs)): + if isinstance(inputs[i], str): + file_bytes = File.read(inputs[i]) + data, fs = sf.read(io.BytesIO(file_bytes), dtype='float32') + if len(data.shape) == 2: + data = data[:, 0] + if fs != self.model_config['sample_rate']: + raise ValueError( + 'modelscope error: Only support %d sample rate files' + % self.model_cfg['sample_rate']) + output['data%d' % + (i + 1)] = torch.from_numpy(data).unsqueeze(0) + else: + raise ValueError( + 'modelscope error: The input type is temporarily restricted to audio file address' + % i) + return output + + def compute_cos_similarity(self, emb1: torch.Tensor, + emb2: torch.Tensor) -> float: + assert len(emb1.shape) == 2 and len(emb2.shape) == 2 + cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6) + cosine = cos(emb1, emb2) + return cosine.item() diff --git a/modelscope/pipelines/audio/speaker_verification_pipeline.py b/modelscope/pipelines/audio/speaker_verification_pipeline.py index 97e73627..e576885a 100644 --- a/modelscope/pipelines/audio/speaker_verification_pipeline.py +++ b/modelscope/pipelines/audio/speaker_verification_pipeline.py @@ -41,7 +41,10 @@ class SpeakerVerificationPipeline(Pipeline): """ - def __init__(self, model: Union[Model, str] = None, **kwargs): + def __init__(self, + model: Union[Model, str] = None, + ngpu: int = 1, + **kwargs): """use `model` to create an asr pipeline for prediction """ super().__init__(model=model, **kwargs) @@ -67,6 +70,7 @@ class SpeakerVerificationPipeline(Pipeline): embedding_node=self.cmd['embedding_node'], sv_threshold=self.cmd['sv_threshold'], param_dict=self.cmd['param_dict'], + **kwargs, ) def __call__(self, @@ -168,6 +172,7 @@ class SpeakerVerificationPipeline(Pipeline): cmd[user_args].update(extra_args[user_args]) else: cmd[user_args] = extra_args[user_args] + del extra_args[user_args] return cmd diff --git a/modelscope/pipelines/audio/speaker_verification_rdino_pipeline.py b/modelscope/pipelines/audio/speaker_verification_rdino_pipeline.py new file mode 100644 index 00000000..dd08ccf4 --- /dev/null +++ b/modelscope/pipelines/audio/speaker_verification_rdino_pipeline.py @@ -0,0 +1,110 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import io +from typing import Any, Dict, List, Union + +import soundfile as sf +import torch + +from modelscope.fileio import File +from modelscope.metainfo import Pipelines +from modelscope.outputs import OutputKeys +from modelscope.pipelines.base import InputModel, Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.utils.constant import Tasks +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +@PIPELINES.register_module( + Tasks.speaker_verification, + module_name=Pipelines.speaker_verification_rdino) +class RDINO_Pipeline(Pipeline): + """Speaker Verification Inference Pipeline + use `model` to create a Speaker Verification pipeline. + + Args: + model (SpeakerVerificationPipeline): A model instance, or a model local dir, or a model id in the model hub. + kwargs (dict, `optional`): + Extra kwargs passed into the pipeline's constructor. + Example: + >>> from modelscope.pipelines import pipeline + >>> from modelscope.utils.constant import Tasks + >>> p = pipeline( + >>> task=Tasks.speaker_verification, model='damo/speech_ecapa-tdnn_sv_en_voxceleb_16k') + >>> print(p([audio_1, audio_2])) + + """ + + def __init__(self, model: InputModel, **kwargs): + """use `model` to create a speaker verification pipeline for prediction + Args: + model (str): a valid offical model id + """ + super().__init__(model=model, **kwargs) + self.model_config = self.model.model_config + self.config = self.model.other_config + self.thr = self.config['yesOrno_thr'] + + def __call__(self, + in_audios: List[str], + thr: float = None) -> Dict[str, Any]: + if thr is not None: + self.thr = thr + if self.thr < -1 or self.thr > 1: + raise ValueError( + 'modelscope error: the thr value should be in [-1, 1], but found to be %f.' + % self.thr) + outputs = self.preprocess(in_audios) + outputs = self.forward(outputs) + outputs = self.postprocess(outputs) + + return outputs + + def forward(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + emb1 = self.model(inputs['data1']) + emb2 = self.model(inputs['data2']) + + return {'emb1': emb1, 'emb2': emb2} + + def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + score = self.compute_cos_similarity(inputs['emb1'], inputs['emb2']) + score = round(score, 5) + if score >= self.thr: + ans = 'yes' + else: + ans = 'no' + + return {OutputKeys.SCORE: score, OutputKeys.TEXT: ans} + + def preprocess(self, inputs: List[str], + **preprocess_params) -> Dict[str, Any]: + if len(inputs) != 2: + raise ValueError( + 'modelscope error: Two input audio files are required.') + output = {} + for i in range(len(inputs)): + if isinstance(inputs[i], str): + file_bytes = File.read(inputs[i]) + data, fs = sf.read(io.BytesIO(file_bytes), dtype='float32') + if len(data.shape) == 2: + data = data[:, 0] + if fs != self.model_config['sample_rate']: + raise ValueError( + 'modelscope error: Only support %d sample rate files' + % self.model_cfg['sample_rate']) + output['data%d' % + (i + 1)] = torch.from_numpy(data).unsqueeze(0) + else: + raise ValueError( + 'modelscope error: The input type is temporarily restricted to audio file address' + % i) + return output + + def compute_cos_similarity(self, emb1: torch.Tensor, + emb2: torch.Tensor) -> float: + assert len(emb1.shape) == 2 and len(emb2.shape) == 2 + cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6) + cosine = cos(emb1, emb2) + return cosine.item() diff --git a/modelscope/pipelines/audio/timestamp_pipeline.py b/modelscope/pipelines/audio/timestamp_pipeline.py index b60fef05..0968b359 100644 --- a/modelscope/pipelines/audio/timestamp_pipeline.py +++ b/modelscope/pipelines/audio/timestamp_pipeline.py @@ -40,7 +40,10 @@ class TimestampPipeline(Pipeline): """ - def __init__(self, model: Union[Model, str] = None, **kwargs): + def __init__(self, + model: Union[Model, str] = None, + ngpu: int = 1, + **kwargs): """ Use `model` and `preprocessor` to create an asr pipeline for prediction Args: @@ -84,7 +87,9 @@ class TimestampPipeline(Pipeline): allow_variable_data_keys=self.cmd['allow_variable_data_keys'], split_with_space=self.cmd['split_with_space'], seg_dict_file=self.cmd['seg_dict_file'], - param_dict=self.cmd['param_dict']) + param_dict=self.cmd['param_dict'], + **kwargs, + ) def __call__(self, audio_in: Union[str, bytes], @@ -264,6 +269,7 @@ class TimestampPipeline(Pipeline): for user_args in user_args_dict: if user_args in extra_args and extra_args[user_args] is not None: cmd[user_args] = extra_args[user_args] + del extra_args[user_args] return cmd diff --git a/modelscope/pipelines/audio/voice_activity_detection_pipeline.py b/modelscope/pipelines/audio/voice_activity_detection_pipeline.py index c1c6e01f..0121b242 100644 --- a/modelscope/pipelines/audio/voice_activity_detection_pipeline.py +++ b/modelscope/pipelines/audio/voice_activity_detection_pipeline.py @@ -41,7 +41,10 @@ class VoiceActivityDetectionPipeline(Pipeline): """ - def __init__(self, model: Union[Model, str] = None, **kwargs): + def __init__(self, + model: Union[Model, str] = None, + ngpu: int = 1, + **kwargs): """use `model` to create an vad pipeline for prediction """ super().__init__(model=model, **kwargs) @@ -60,7 +63,9 @@ class VoiceActivityDetectionPipeline(Pipeline): key_file=self.cmd['key_file'], vad_infer_config=self.cmd['vad_infer_config'], vad_model_file=self.cmd['vad_model_file'], - vad_cmvn_file=self.cmd['vad_cmvn_file']) + vad_cmvn_file=self.cmd['vad_cmvn_file'], + **kwargs, + ) def __call__(self, audio_in: Union[str, bytes], @@ -209,6 +214,7 @@ class VoiceActivityDetectionPipeline(Pipeline): for user_args in user_args_dict: if user_args in extra_args and extra_args[user_args] is not None: cmd[user_args] = extra_args[user_args] + del extra_args[user_args] return cmd diff --git a/modelscope/pipelines/cv/__init__.py b/modelscope/pipelines/cv/__init__.py index 54289644..e9d7a785 100644 --- a/modelscope/pipelines/cv/__init__.py +++ b/modelscope/pipelines/cv/__init__.py @@ -9,7 +9,6 @@ if TYPE_CHECKING: from .animal_recognition_pipeline import AnimalRecognitionPipeline from .body_2d_keypoints_pipeline import Body2DKeypointsPipeline from .body_3d_keypoints_pipeline import Body3DKeypointsPipeline - from .hand_2d_keypoints_pipeline import Hand2DKeypointsPipeline from .cmdssl_video_embedding_pipeline import CMDSSLVideoEmbeddingPipeline from .card_detection_pipeline import CardDetectionPipeline from .hicossl_video_embedding_pipeline import HICOSSLVideoEmbeddingPipeline @@ -29,13 +28,10 @@ if TYPE_CHECKING: from .image_classification_pipeline import GeneralImageClassificationPipeline from .image_color_enhance_pipeline import ImageColorEnhancePipeline from .image_colorization_pipeline import ImageColorizationPipeline - from .image_classification_pipeline import ImageClassificationPipeline from .image_denoise_pipeline import ImageDenoisePipeline from .image_deblur_pipeline import ImageDeblurPipeline from .image_instance_segmentation_pipeline import ImageInstanceSegmentationPipeline from .image_matting_pipeline import ImageMattingPipeline - from .image_panoptic_segmentation_pipeline import ImagePanopticSegmentationPipeline - from .image_semantic_segmentation_pipeline import ImagePanopticSegmentationEasyCVPipeline from .image_portrait_enhancement_pipeline import ImagePortraitEnhancementPipeline from .image_reid_person_pipeline import ImageReidPersonPipeline from .image_semantic_segmentation_pipeline import ImageSemanticSegmentationPipeline @@ -46,7 +42,6 @@ if TYPE_CHECKING: from .image_inpainting_pipeline import ImageInpaintingPipeline from .image_paintbyexample_pipeline import ImagePaintbyexamplePipeline from .product_retrieval_embedding_pipeline import ProductRetrievalEmbeddingPipeline - from .realtime_object_detection_pipeline import RealtimeObjectDetectionPipeline from .live_category_pipeline import LiveCategoryPipeline from .ocr_detection_pipeline import OCRDetectionPipeline from .ocr_recognition_pipeline import OCRRecognitionPipeline @@ -59,10 +54,6 @@ if TYPE_CHECKING: from .video_category_pipeline import VideoCategoryPipeline from .virtual_try_on_pipeline import VirtualTryonPipeline from .shop_segmentation_pipleline import ShopSegmentationPipeline - from .easycv_pipelines import (EasyCVDetectionPipeline, - EasyCVSegmentationPipeline, - Face2DKeypointsPipeline, - HumanWholebodyKeypointsPipeline) from .text_driven_segmentation_pipleline import TextDrivenSegmentationPipeline from .movie_scene_segmentation_pipeline import MovieSceneSegmentationPipeline from .mog_face_detection_pipeline import MogFaceDetectionPipeline @@ -123,7 +114,6 @@ else: 'animal_recognition_pipeline': ['AnimalRecognitionPipeline'], 'body_2d_keypoints_pipeline': ['Body2DKeypointsPipeline'], 'body_3d_keypoints_pipeline': ['Body3DKeypointsPipeline'], - 'hand_2d_keypoints_pipeline': ['Hand2DKeypointsPipeline'], 'card_detection_pipeline': ['CardDetectionPipeline'], 'cmdssl_video_embedding_pipeline': ['CMDSSLVideoEmbeddingPipeline'], 'hicossl_video_embedding_pipeline': ['HICOSSLVideoEmbeddingPipeline'], @@ -140,7 +130,7 @@ else: 'face_recognition_onnx_fm_pipeline': ['FaceRecognitionOnnxFmPipeline'], 'general_recognition_pipeline': ['GeneralRecognitionPipeline'], 'image_classification_pipeline': - ['GeneralImageClassificationPipeline', 'ImageClassificationPipeline'], + ['GeneralImageClassificationPipeline'], 'image_cartoon_pipeline': ['ImageCartoonPipeline'], 'image_denoise_pipeline': ['ImageDenoisePipeline'], 'image_deblur_pipeline': ['ImageDeblurPipeline'], @@ -149,10 +139,6 @@ else: 'image_instance_segmentation_pipeline': ['ImageInstanceSegmentationPipeline'], 'image_matting_pipeline': ['ImageMattingPipeline'], - 'image_panoptic_segmentation_pipeline': [ - 'ImagePanopticSegmentationPipeline', - 'ImagePanopticSegmentationEasyCVPipeline' - ], 'image_portrait_enhancement_pipeline': ['ImagePortraitEnhancementPipeline'], 'image_reid_person_pipeline': ['ImageReidPersonPipeline'], @@ -164,8 +150,6 @@ else: ['Image2ImageTranslationPipeline'], 'product_retrieval_embedding_pipeline': ['ProductRetrievalEmbeddingPipeline'], - 'realtime_object_detection_pipeline': - ['RealtimeObjectDetectionPipeline'], 'live_category_pipeline': ['LiveCategoryPipeline'], 'image_to_image_generate_pipeline': ['Image2ImageGenerationPipeline'], 'image_inpainting_pipeline': ['ImageInpaintingPipeline'], @@ -180,12 +164,6 @@ else: 'video_category_pipeline': ['VideoCategoryPipeline'], 'virtual_try_on_pipeline': ['VirtualTryonPipeline'], 'shop_segmentation_pipleline': ['ShopSegmentationPipeline'], - 'easycv_pipelines': [ - 'EasyCVDetectionPipeline', - 'EasyCVSegmentationPipeline', - 'Face2DKeypointsPipeline', - 'HumanWholebodyKeypointsPipeline', - ], 'text_driven_segmentation_pipleline': ['TextDrivenSegmentationPipeline'], 'movie_scene_segmentation_pipeline': @@ -202,9 +180,8 @@ else: ['FaceAttributeRecognitionPipeline'], 'mtcnn_face_detection_pipeline': ['MtcnnFaceDetectionPipeline'], 'hand_static_pipeline': ['HandStaticPipeline'], - 'referring_video_object_segmentation_pipeline': [ - 'ReferringVideoObjectSegmentationPipeline' - ], + 'referring_video_object_segmentation_pipeline': + ['ReferringVideoObjectSegmentationPipeline'], 'language_guided_video_summarization_pipeline': [ 'LanguageGuidedVideoSummarizationPipeline' ], diff --git a/modelscope/pipelines/cv/easycv_pipelines/__init__.py b/modelscope/pipelines/cv/easycv_pipelines/__init__.py deleted file mode 100644 index e0209b85..00000000 --- a/modelscope/pipelines/cv/easycv_pipelines/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from typing import TYPE_CHECKING - -from modelscope.utils.import_utils import LazyImportModule - -if TYPE_CHECKING: - from .detection_pipeline import EasyCVDetectionPipeline - from .segmentation_pipeline import EasyCVSegmentationPipeline - from .face_2d_keypoints_pipeline import Face2DKeypointsPipeline - from .human_wholebody_keypoint_pipeline import HumanWholebodyKeypointsPipeline -else: - _import_structure = { - 'detection_pipeline': ['EasyCVDetectionPipeline'], - 'segmentation_pipeline': ['EasyCVSegmentationPipeline'], - 'face_2d_keypoints_pipeline': ['Face2DKeypointsPipeline'], - 'human_wholebody_keypoint_pipeline': - ['HumanWholebodyKeypointsPipeline'], - } - - import sys - - sys.modules[__name__] = LazyImportModule( - __name__, - globals()['__file__'], - _import_structure, - module_spec=__spec__, - extra_objects={}, - ) diff --git a/modelscope/pipelines/cv/easycv_pipelines/base.py b/modelscope/pipelines/cv/easycv_pipelines/base.py deleted file mode 100644 index 0a31be94..00000000 --- a/modelscope/pipelines/cv/easycv_pipelines/base.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import glob -import os -import os.path as osp -from typing import Any - -import numpy as np -from easycv.utils.ms_utils import EasyCVMeta -from PIL import ImageFile - -from modelscope.hub.snapshot_download import snapshot_download -from modelscope.pipelines.util import is_official_hub_path -from modelscope.utils.config import Config -from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, Invoke, - ModelFile, ThirdParty) -from modelscope.utils.device import create_device - - -class EasyCVPipeline(object): - """Base pipeline for EasyCV. - Loading configuration file of modelscope style by default, - but it is actually use the predictor api of easycv to predict. - So here we do some adaptation work for configuration and predict api. - """ - - def __init__(self, model: str, model_file_pattern='*.pt', *args, **kwargs): - """ - model (str): model id on modelscope hub or local model path. - model_file_pattern (str): model file pattern. - - """ - self.model_file_pattern = model_file_pattern - - assert isinstance(model, str) - if osp.exists(model): - model_dir = model - else: - assert is_official_hub_path( - model), 'Only support local model path and official hub path!' - model_dir = snapshot_download( - model_id=model, - revision=DEFAULT_MODEL_REVISION, - user_agent={ - Invoke.KEY: Invoke.PIPELINE, - ThirdParty.KEY: ThirdParty.EASYCV - }) - - assert osp.isdir(model_dir) - model_files = glob.glob( - os.path.join(model_dir, self.model_file_pattern)) - assert len( - model_files - ) == 1, f'Need one model file, but find {len(model_files)}: {model_files}' - - model_path = model_files[0] - self.model_path = model_path - self.model_dir = model_dir - - # get configuration file from source model dir - self.config_file = os.path.join(model_dir, ModelFile.CONFIGURATION) - assert os.path.exists( - self.config_file - ), f'Not find "{ModelFile.CONFIGURATION}" in model directory!' - - self.cfg = Config.from_file(self.config_file) - if 'device' in kwargs: - kwargs['device'] = create_device(kwargs['device']) - if 'predictor_config' in kwargs: - kwargs.pop('predictor_config') - self.predict_op = self._build_predict_op(**kwargs) - - def _build_predict_op(self, **kwargs): - """Build EasyCV predictor.""" - from easycv.predictors.builder import build_predictor - - easycv_config = self._to_easycv_config() - pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, { - 'model_path': self.model_path, - 'config_file': easycv_config, - **kwargs - }) - return pipeline_op - - def _to_easycv_config(self): - """Adapt to EasyCV predictor.""" - # TODO: refine config compatibility problems - - easycv_arch = self.cfg.model.pop(EasyCVMeta.ARCH, None) - model_cfg = self.cfg.model - # Revert to the configuration of easycv - if easycv_arch is not None: - model_cfg.update(easycv_arch) - - easycv_config = Config(dict(model=model_cfg)) - - reserved_keys = [] - if hasattr(self.cfg, EasyCVMeta.META): - easycv_meta_cfg = getattr(self.cfg, EasyCVMeta.META) - reserved_keys = easycv_meta_cfg.get(EasyCVMeta.RESERVED_KEYS, []) - for key in reserved_keys: - easycv_config.merge_from_dict({key: getattr(self.cfg, key)}) - if 'test_pipeline' not in reserved_keys: - easycv_config.merge_from_dict( - {'test_pipeline': self.cfg.dataset.val.get('pipeline', [])}) - - return easycv_config - - def _is_single_inputs(self, inputs): - if isinstance(inputs, str) or (isinstance(inputs, list) - and len(inputs) == 1) or isinstance( - inputs, np.ndarray) or isinstance( - inputs, ImageFile.ImageFile): - return True - - return False - - def __call__(self, inputs) -> Any: - outputs = self.predict_op(inputs) - - if self._is_single_inputs(inputs): - outputs = outputs[0] - - return outputs diff --git a/modelscope/pipelines/cv/easycv_pipelines/detection_pipeline.py b/modelscope/pipelines/cv/easycv_pipelines/detection_pipeline.py deleted file mode 100644 index 2a95ebb4..00000000 --- a/modelscope/pipelines/cv/easycv_pipelines/detection_pipeline.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from typing import Any - -from modelscope.metainfo import Pipelines -from modelscope.outputs import OutputKeys -from modelscope.pipelines.builder import PIPELINES -from modelscope.utils.constant import ModelFile, Tasks -from modelscope.utils.cv.image_utils import \ - show_image_object_detection_auto_result -from .base import EasyCVPipeline - - -@PIPELINES.register_module( - Tasks.image_object_detection, module_name=Pipelines.easycv_detection) -@PIPELINES.register_module( - Tasks.image_object_detection, - module_name=Pipelines.image_object_detection_auto) -@PIPELINES.register_module( - Tasks.domain_specific_object_detection, - module_name=Pipelines.hand_detection) -class EasyCVDetectionPipeline(EasyCVPipeline): - """Pipeline for easycv detection task.""" - - def __init__(self, - model: str, - model_file_pattern=ModelFile.TORCH_MODEL_FILE, - *args, - **kwargs): - """ - model (str): model id on modelscope hub or local model path. - model_file_pattern (str): model file pattern. - """ - - super(EasyCVDetectionPipeline, self).__init__( - model=model, - model_file_pattern=model_file_pattern, - *args, - **kwargs) - - def show_result(self, img_path, result, save_path=None): - show_image_object_detection_auto_result(img_path, result, save_path) - - def __call__(self, inputs) -> Any: - outputs = self.predict_op(inputs) - - scores = [] - labels = [] - boxes = [] - for output in outputs: - for score, label, box in zip(output['detection_scores'], - output['detection_classes'], - output['detection_boxes']): - scores.append(score) - labels.append(self.cfg.CLASSES[label]) - boxes.append([b for b in box]) - - results = [{ - OutputKeys.SCORES: scores, - OutputKeys.LABELS: labels, - OutputKeys.BOXES: boxes - } for output in outputs] - - if self._is_single_inputs(inputs): - results = results[0] - - return results diff --git a/modelscope/pipelines/cv/easycv_pipelines/face_2d_keypoints_pipeline.py b/modelscope/pipelines/cv/easycv_pipelines/face_2d_keypoints_pipeline.py deleted file mode 100644 index 0ddc6a6c..00000000 --- a/modelscope/pipelines/cv/easycv_pipelines/face_2d_keypoints_pipeline.py +++ /dev/null @@ -1,244 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import copy -import math -from typing import Any - -import cv2 -import numpy as np - -from modelscope.metainfo import Pipelines -from modelscope.outputs import OutputKeys -from modelscope.pipelines import pipeline -from modelscope.pipelines.builder import PIPELINES -from modelscope.preprocessors import LoadImage -from modelscope.utils.constant import ModelFile, Tasks -from modelscope.utils.logger import get_logger -from .base import EasyCVPipeline - -logger = get_logger() - - -@PIPELINES.register_module( - Tasks.face_2d_keypoints, module_name=Pipelines.face_2d_keypoints) -class Face2DKeypointsPipeline(EasyCVPipeline): - """Pipeline for face 2d keypoints detection.""" - - def __init__(self, - model: str, - model_file_pattern=ModelFile.TORCH_MODEL_FILE, - *args, - **kwargs): - """ - model (str): model id on modelscope hub or local model path. - model_file_pattern (str): model file pattern. - """ - - super(Face2DKeypointsPipeline, self).__init__( - model=model, - model_file_pattern=model_file_pattern, - *args, - **kwargs) - - # face detect pipeline - det_model_id = 'damo/cv_resnet_facedetection_scrfd10gkps' - self.face_detection = pipeline( - Tasks.face_detection, model=det_model_id) - - def show_result(self, img, points, scale=2, save_path=None): - return self.predict_op.show_result(img, points, scale, save_path) - - def _choose_face(self, det_result, min_face=10): - """ - choose face with maximum area - Args: - det_result: output of face detection pipeline - min_face: minimum size of valid face w/h - """ - bboxes = np.array(det_result[OutputKeys.BOXES]) - landmarks = np.array(det_result[OutputKeys.KEYPOINTS]) - if bboxes.shape[0] == 0: - logger.warning('No face detected!') - return None - # face idx with enough size - face_idx = [] - for i in range(bboxes.shape[0]): - box = bboxes[i] - if (box[2] - box[0]) >= min_face and (box[3] - box[1]) >= min_face: - face_idx += [i] - if len(face_idx) == 0: - logger.warning( - f'Face size not enough, less than {min_face}x{min_face}!') - return None - bboxes = bboxes[face_idx] - landmarks = landmarks[face_idx] - - return bboxes, landmarks - - def expend_box(self, box, w, h, scalex=0.3, scaley=0.5): - x1 = box[0] - y1 = box[1] - wb = box[2] - x1 - hb = box[3] - y1 - deltax = int(wb * scalex) - deltay1 = int(hb * scaley) - deltay2 = int(hb * scalex) - x1 = x1 - deltax - y1 = y1 - deltay1 - if x1 < 0: - deltax = deltax + x1 - x1 = 0 - if y1 < 0: - deltay1 = deltay1 + y1 - y1 = 0 - x2 = x1 + wb + 2 * deltax - y2 = y1 + hb + deltay1 + deltay2 - x2 = np.clip(x2, 0, w - 1) - y2 = np.clip(y2, 0, h - 1) - return [x1, y1, x2, y2] - - def rotate_point(self, angle, center, landmark): - rad = angle * np.pi / 180.0 - alpha = np.cos(rad) - beta = np.sin(rad) - M = np.zeros((2, 3), dtype=np.float32) - M[0, 0] = alpha - M[0, 1] = beta - M[0, 2] = (1 - alpha) * center[0] - beta * center[1] - M[1, 0] = -beta - M[1, 1] = alpha - M[1, 2] = beta * center[0] + (1 - alpha) * center[1] - - landmark_ = np.asarray([(M[0, 0] * x + M[0, 1] * y + M[0, 2], - M[1, 0] * x + M[1, 1] * y + M[1, 2]) - for (x, y) in landmark]) - return M, landmark_ - - def rotate_crop_img(self, img, pts, M): - imgT = cv2.warpAffine(img, M, (int(img.shape[1]), int(img.shape[0]))) - - x1 = pts[5][0] - x2 = pts[5][0] - y1 = pts[5][1] - y2 = pts[5][1] - for i in range(0, 9): - x1 = min(x1, pts[i][0]) - x2 = max(x2, pts[i][0]) - y1 = min(y1, pts[i][1]) - y2 = max(y2, pts[i][1]) - - height, width, _ = imgT.shape - x1 = min(max(0, int(x1)), width) - y1 = min(max(0, int(y1)), height) - x2 = min(max(0, int(x2)), width) - y2 = min(max(0, int(y2)), height) - sub_imgT = imgT[y1:y2, x1:x2] - - return sub_imgT, imgT, [x1, y1, x2, y2] - - def crop_img(self, imgT, pts): - enlarge_ratio = 1.1 - - x1 = np.min(pts[:, 0]) - x2 = np.max(pts[:, 0]) - y1 = np.min(pts[:, 1]) - y2 = np.max(pts[:, 1]) - w = x2 - x1 + 1 - h = y2 - y1 + 1 - x1 = int(x1 - (enlarge_ratio - 1.0) / 2.0 * w) - y1 = int(y1 - (enlarge_ratio - 1.0) / 2.0 * h) - x1 = max(0, x1) - y1 = max(0, y1) - - new_w = int(enlarge_ratio * w) - new_h = int(enlarge_ratio * h) - new_x1 = x1 - new_y1 = y1 - new_x2 = new_x1 + new_w - new_y2 = new_y1 + new_h - - height, width, _ = imgT.shape - - new_x1 = min(max(0, new_x1), width) - new_y1 = min(max(0, new_y1), height) - new_x2 = max(min(width, new_x2), 0) - new_y2 = max(min(height, new_y2), 0) - - sub_imgT = imgT[new_y1:new_y2, new_x1:new_x2] - - return sub_imgT, [new_x1, new_y1, new_x2, new_y2] - - def __call__(self, inputs) -> Any: - img = LoadImage.convert_to_ndarray(inputs) - h, w, c = img.shape - img_rgb = copy.deepcopy(img) - img_rgb = img_rgb[:, :, ::-1] - det_result = self.face_detection(img_rgb) - - bboxes = np.array(det_result[OutputKeys.BOXES]) - if bboxes.shape[0] == 0: - logger.warning('No face detected!') - results = { - OutputKeys.KEYPOINTS: [], - OutputKeys.POSES: [], - OutputKeys.BOXES: [] - } - return results - - boxes, keypoints = self._choose_face(det_result) - - output_boxes = [] - output_keypoints = [] - output_poses = [] - for index, box_ori in enumerate(boxes): - box = self.expend_box(box_ori, w, h, scalex=0.1, scaley=0.1) - y0 = int(box[1]) - y1 = int(box[3]) - x0 = int(box[0]) - x1 = int(box[2]) - sub_img = img[y0:y1, x0:x1] - - keypoint = keypoints[index] - pts = [[keypoint[0], keypoint[1]], [keypoint[2], keypoint[3]], - [keypoint[4], keypoint[5]], [keypoint[6], keypoint[7]], - [keypoint[8], keypoint[9]], [box[0], box[1]], - [box[2], box[1]], [box[0], box[3]], [box[2], box[3]]] - # radian - angle = math.atan2((pts[1][1] - pts[0][1]), - (pts[1][0] - pts[0][0])) - # angle - theta = angle * (180 / np.pi) - - center = [w // 2, h // 2] - cx, cy = center - M, landmark_ = self.rotate_point(theta, (cx, cy), pts) - sub_imgT, imgT, bbox = self.rotate_crop_img(img, landmark_, M) - - outputs = self.predict_op([sub_imgT])[0] - tmp_keypoints = outputs['point'] - - for idx in range(0, len(tmp_keypoints)): - tmp_keypoints[idx][0] += bbox[0] - tmp_keypoints[idx][1] += bbox[1] - - for idx in range(0, 6): - sub_img, bbox = self.crop_img(imgT, tmp_keypoints) - outputs = self.predict_op([sub_img])[0] - tmp_keypoints = outputs['point'] - for idx in range(0, len(tmp_keypoints)): - tmp_keypoints[idx][0] += bbox[0] - tmp_keypoints[idx][1] += bbox[1] - - M2, tmp_keypoints = self.rotate_point(-theta, (cx, cy), - tmp_keypoints) - - output_keypoints.append(np.array(tmp_keypoints)) - output_poses.append(np.array(outputs['pose'])) - output_boxes.append(np.array(box_ori)) - - results = { - OutputKeys.KEYPOINTS: output_keypoints, - OutputKeys.POSES: output_poses, - OutputKeys.BOXES: output_boxes - } - - return results diff --git a/modelscope/pipelines/cv/easycv_pipelines/human_wholebody_keypoint_pipeline.py b/modelscope/pipelines/cv/easycv_pipelines/human_wholebody_keypoint_pipeline.py deleted file mode 100644 index 903c4106..00000000 --- a/modelscope/pipelines/cv/easycv_pipelines/human_wholebody_keypoint_pipeline.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import os.path -from typing import Any - -from modelscope.metainfo import Pipelines -from modelscope.outputs import OutputKeys -from modelscope.pipelines.builder import PIPELINES -from modelscope.utils.constant import ModelFile, Tasks -from .base import EasyCVPipeline - - -@PIPELINES.register_module( - Tasks.human_wholebody_keypoint, - module_name=Pipelines.human_wholebody_keypoint) -class HumanWholebodyKeypointsPipeline(EasyCVPipeline): - """Pipeline for human wholebody 2d keypoints detection.""" - - def __init__(self, - model: str, - model_file_pattern=ModelFile.TORCH_MODEL_FILE, - *args, - **kwargs): - """ - model (str): model id on modelscope hub or local model path. - model_file_pattern (str): model file pattern. - """ - super(HumanWholebodyKeypointsPipeline, self).__init__( - model=model, - model_file_pattern=model_file_pattern, - *args, - **kwargs) - - def _build_predict_op(self, **kwargs): - """Build EasyCV predictor.""" - from easycv.predictors.builder import build_predictor - detection_predictor_type = self.cfg['DETECTION']['type'] - detection_model_path = os.path.join( - self.model_dir, self.cfg['DETECTION']['model_path']) - detection_cfg_file = os.path.join(self.model_dir, - self.cfg['DETECTION']['config_file']) - detection_score_threshold = self.cfg['DETECTION']['score_threshold'] - self.cfg.pipeline.predictor_config[ - 'detection_predictor_config'] = dict( - type=detection_predictor_type, - model_path=detection_model_path, - config_file=detection_cfg_file, - score_threshold=detection_score_threshold) - easycv_config = self._to_easycv_config() - pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, { - 'model_path': self.model_path, - 'config_file': easycv_config, - **kwargs - }) - return pipeline_op - - def __call__(self, inputs) -> Any: - outputs = self.predict_op(inputs) - - results = [{ - OutputKeys.KEYPOINTS: output['keypoints'], - OutputKeys.BOXES: output['boxes'] - } for output in outputs] - - if self._is_single_inputs(inputs): - results = results[0] - - return results diff --git a/modelscope/pipelines/cv/easycv_pipelines/segmentation_pipeline.py b/modelscope/pipelines/cv/easycv_pipelines/segmentation_pipeline.py deleted file mode 100644 index bd09fc9b..00000000 --- a/modelscope/pipelines/cv/easycv_pipelines/segmentation_pipeline.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from typing import Any - -import numpy as np - -from modelscope.metainfo import Pipelines -from modelscope.outputs import OutputKeys -from modelscope.pipelines.builder import PIPELINES -from modelscope.utils.constant import Tasks -from .base import EasyCVPipeline - - -@PIPELINES.register_module( - Tasks.image_segmentation, module_name=Pipelines.easycv_segmentation) -class EasyCVSegmentationPipeline(EasyCVPipeline): - """Pipeline for easycv segmentation task.""" - - def __init__(self, model: str, model_file_pattern='*.pt', *args, **kwargs): - """ - model (str): model id on modelscope hub or local model path. - model_file_pattern (str): model file pattern. - """ - - super(EasyCVSegmentationPipeline, self).__init__( - model=model, - model_file_pattern=model_file_pattern, - *args, - **kwargs) - - def __call__(self, inputs) -> Any: - outputs = self.predict_op(inputs) - - semantic_result = outputs[0]['seg_pred'] - - ids = np.unique(semantic_result)[::-1] - legal_indices = ids != len(self.predict_op.CLASSES) # for VOID label - ids = ids[legal_indices] - segms = (semantic_result[None] == ids[:, None, None]) - masks = [it.astype(np.int) for it in segms] - labels_txt = np.array(self.predict_op.CLASSES)[ids].tolist() - - results = { - OutputKeys.MASKS: masks, - OutputKeys.LABELS: labels_txt, - OutputKeys.SCORES: [0.999 for _ in range(len(labels_txt))] - } - return results diff --git a/modelscope/pipelines/cv/face_reconstruction_pipeline.py b/modelscope/pipelines/cv/face_reconstruction_pipeline.py index f8240fc0..b9a8e320 100644 --- a/modelscope/pipelines/cv/face_reconstruction_pipeline.py +++ b/modelscope/pipelines/cv/face_reconstruction_pipeline.py @@ -134,7 +134,7 @@ class FaceReconstructionPipeline(Pipeline): img = LoadImage.convert_to_ndarray(input) if len(img.shape) == 2: img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) - img = img.astype(np.float) + img = img.astype(float) result = {'img': img} return result diff --git a/modelscope/pipelines/cv/fast_instance_segmentation_pipeline.py b/modelscope/pipelines/cv/fast_instance_segmentation_pipeline.py new file mode 100644 index 00000000..6ee341de --- /dev/null +++ b/modelscope/pipelines/cv/fast_instance_segmentation_pipeline.py @@ -0,0 +1,116 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import Any, Dict, Optional, Union + +import numpy as np +import torch +import torchvision.transforms as T + +from modelscope.metainfo import Pipelines +from modelscope.models.cv.image_instance_segmentation import FastInst +from modelscope.outputs import OutputKeys +from modelscope.pipelines.base import Input, Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.preprocessors import LoadImage +from modelscope.utils.constant import Tasks +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +@PIPELINES.register_module( + Tasks.image_segmentation, module_name=Pipelines.fast_instance_segmentation) +class FastInstanceSegmentationPipeline(Pipeline): + + def __init__(self, + model: Union[FastInst, str], + preprocessor: Optional = None, + **kwargs): + r"""The inference pipeline for fastinst models. + + The model outputs a dict with keys of `scores`, `labels`, and `masks`. + + Args: + model (`str` or `Model` or module instance): A model instance or a model local dir + or a model id in the model hub. + preprocessor (`Preprocessor`, `optional`): A Preprocessor instance. + kwargs (dict, `optional`): + Extra kwargs passed into the preprocessor's constructor. + + Examples: + >>> from modelscope.outputs import OutputKeys + >>> from modelscope.pipelines import pipeline + >>> pipeline_ins = pipeline('image-segmentation', + model='damo/cv_resnet50_fast-instance-segmentation_coco') + >>> input_img = 'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/image_instance_segmentation.jpg' + >>> print(pipeline_ins(input_img)[OutputKeys.LABELS]) + """ + super().__init__(model=model, preprocessor=preprocessor, **kwargs) + self.model.eval() + + def _get_preprocess_shape(self, oldh, oldw, short_edge_length, max_size): + h, w = oldh, oldw + size = short_edge_length * 1.0 + scale = size / min(h, w) + if h < w: + newh, neww = size, scale * w + else: + newh, neww = scale * h, size + if max(newh, neww) > max_size: + scale = max_size * 1.0 / max(newh, neww) + newh = newh * scale + neww = neww * scale + neww = int(neww + 0.5) + newh = int(newh + 0.5) + return (newh, neww) + + def preprocess(self, + input: Input, + min_size=640, + max_size=1333) -> Dict[str, Any]: + image = LoadImage.convert_to_img(input) + w, h = image.size[:2] + dataset_dict = {'width': w, 'height': h} + new_h, new_w = self._get_preprocess_shape(h, w, min_size, max_size) + test_transforms = T.Compose([ + T.Resize((new_h, new_w)), + T.ToTensor(), + ]) + image = test_transforms(image) + dataset_dict['image'] = image * 255. + result = {'batched_inputs': [dataset_dict]} + return result + + def forward(self, input: Dict[str, Any], + **forward_params) -> Dict[str, Any]: + with torch.no_grad(): + output = self.model(**input) + return output + + def postprocess(self, + inputs: Dict[str, Any], + score_thr=0.5) -> Dict[str, Any]: + predictions = inputs['eval_result'][0]['instances'] + scores = predictions['scores'].detach().cpu().numpy() + pred_masks = predictions['pred_masks'].detach().cpu().numpy() + pred_classes = predictions['pred_classes'].detach().cpu().numpy() + + thresholded_idxs = np.array(scores) >= score_thr + scores = scores[thresholded_idxs] + pred_classes = pred_classes[thresholded_idxs] + pred_masks = pred_masks[thresholded_idxs] + + results_dict = { + OutputKeys.MASKS: [], + OutputKeys.LABELS: [], + OutputKeys.SCORES: [] + } + for score, cls, mask in zip(scores, pred_classes, pred_masks): + score = np.float64(score) + label = self.model.classes[int(cls)] + mask = np.array(mask, dtype=np.float64) + + results_dict[OutputKeys.SCORES].append(score) + results_dict[OutputKeys.LABELS].append(label) + results_dict[OutputKeys.MASKS].append(mask) + + return results_dict diff --git a/modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py b/modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py deleted file mode 100644 index 63281e80..00000000 --- a/modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import os.path - -from modelscope.metainfo import Pipelines -from modelscope.pipelines.builder import PIPELINES -from modelscope.utils.constant import ModelFile, Tasks -from .easycv_pipelines.base import EasyCVPipeline - - -@PIPELINES.register_module( - Tasks.hand_2d_keypoints, module_name=Pipelines.hand_2d_keypoints) -class Hand2DKeypointsPipeline(EasyCVPipeline): - """Pipeline for hand pose keypoint task.""" - - def __init__(self, - model: str, - model_file_pattern=ModelFile.TORCH_MODEL_FILE, - *args, - **kwargs): - """ - model (str): model id on modelscope hub or local model path. - model_file_pattern (str): model file pattern. - """ - super(Hand2DKeypointsPipeline, self).__init__( - model=model, - model_file_pattern=model_file_pattern, - *args, - **kwargs) - - def _build_predict_op(self, **kwargs): - """Build EasyCV predictor.""" - from easycv.predictors.builder import build_predictor - detection_predictor_type = self.cfg['DETECTION']['type'] - detection_model_path = os.path.join( - self.model_dir, self.cfg['DETECTION']['model_path']) - detection_cfg_file = os.path.join(self.model_dir, - self.cfg['DETECTION']['config_file']) - detection_score_threshold = self.cfg['DETECTION']['score_threshold'] - self.cfg.pipeline.predictor_config[ - 'detection_predictor_config'] = dict( - type=detection_predictor_type, - model_path=detection_model_path, - config_file=detection_cfg_file, - score_threshold=detection_score_threshold) - easycv_config = self._to_easycv_config() - pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, { - 'model_path': self.model_path, - 'config_file': easycv_config, - **kwargs - }) - return pipeline_op diff --git a/modelscope/pipelines/cv/image_detection_pipeline.py b/modelscope/pipelines/cv/image_detection_pipeline.py index 86963c37..2b8275c2 100644 --- a/modelscope/pipelines/cv/image_detection_pipeline.py +++ b/modelscope/pipelines/cv/image_detection_pipeline.py @@ -30,7 +30,7 @@ class ImageDetectionPipeline(Pipeline): def preprocess(self, input: Input) -> Dict[str, Any]: img = LoadImage.convert_to_ndarray(input) - img = img.astype(np.float) + img = img.astype(np.float64) img = self.model.preprocess(img) result = {'img': img} return result diff --git a/modelscope/pipelines/cv/image_matting_pipeline.py b/modelscope/pipelines/cv/image_matting_pipeline.py index 5f5d1d56..bee655c5 100644 --- a/modelscope/pipelines/cv/image_matting_pipeline.py +++ b/modelscope/pipelines/cv/image_matting_pipeline.py @@ -53,7 +53,7 @@ class ImageMattingPipeline(Pipeline): def preprocess(self, input: Input) -> Dict[str, Any]: img = LoadImage.convert_to_ndarray(input) - img = img.astype(np.float) + img = img.astype(float) result = {'img': img} return result diff --git a/modelscope/pipelines/cv/image_panoptic_segmentation_pipeline.py b/modelscope/pipelines/cv/image_panoptic_segmentation_pipeline.py deleted file mode 100644 index fe941d9f..00000000 --- a/modelscope/pipelines/cv/image_panoptic_segmentation_pipeline.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from typing import Any, Dict, Union - -import cv2 -import numpy as np -import PIL -import torch - -from modelscope.metainfo import Pipelines -from modelscope.outputs import OutputKeys -from modelscope.pipelines.base import Input, Pipeline -from modelscope.pipelines.builder import PIPELINES -from modelscope.pipelines.cv.easycv_pipelines.base import EasyCVPipeline -from modelscope.preprocessors import load_image -from modelscope.utils.constant import Tasks -from modelscope.utils.logger import get_logger - -logger = get_logger() - - -@PIPELINES.register_module( - Tasks.image_segmentation, - module_name=Pipelines.image_panoptic_segmentation) -class ImagePanopticSegmentationPipeline(Pipeline): - - def __init__(self, model: str, **kwargs): - """ - use `model` to create a image panoptic segmentation pipeline for prediction - Args: - model: model id on modelscope hub. - """ - super().__init__(model=model, **kwargs) - - logger.info('panoptic segmentation model, pipeline init') - - def preprocess(self, input: Input) -> Dict[str, Any]: - from mmdet.datasets.pipelines import Compose - from mmcv.parallel import collate, scatter - from mmdet.datasets import replace_ImageToTensor - - cfg = self.model.cfg - # build the data pipeline - - if isinstance(input, str): - cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam' - img = np.array(load_image(input)) - img = img[:, :, ::-1] # convert to bgr - elif isinstance(input, PIL.Image.Image): - cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam' - img = np.array(input.convert('RGB')) - elif isinstance(input, np.ndarray): - cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam' - if len(input.shape) == 2: - img = cv2.cvtColor(input, cv2.COLOR_GRAY2BGR) - else: - img = input - else: - raise TypeError(f'input should be either str, PIL.Image,' - f' np.array, but got {type(input)}') - - # collect data - data = dict(img=img) - cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline) - test_pipeline = Compose(cfg.data.test.pipeline) - - data = test_pipeline(data) - # copy from mmdet_model collect data - data = collate([data], samples_per_gpu=1) - data['img_metas'] = [ - img_metas.data[0] for img_metas in data['img_metas'] - ] - data['img'] = [img.data[0] for img in data['img']] - if next(self.model.parameters()).is_cuda: - # scatter to specified GPU - data = scatter(data, [next(self.model.parameters()).device])[0] - - return data - - def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: - results = self.model.inference(input) - - return results - - def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: - # bz=1, tcguo - pan_results = inputs[0]['pan_results'] - INSTANCE_OFFSET = 1000 - - ids = np.unique(pan_results)[::-1] - legal_indices = ids != self.model.num_classes # for VOID label - ids = ids[legal_indices] - labels = np.array([id % INSTANCE_OFFSET for id in ids], dtype=np.int64) - segms = (pan_results[None] == ids[:, None, None]) - masks = [it.astype(np.int) for it in segms] - labels_txt = np.array(self.model.CLASSES)[labels].tolist() - - outputs = { - OutputKeys.MASKS: masks, - OutputKeys.LABELS: labels_txt, - OutputKeys.SCORES: [0.999 for _ in range(len(labels_txt))] - } - return outputs - - -@PIPELINES.register_module( - Tasks.image_segmentation, - module_name=Pipelines.image_panoptic_segmentation_easycv) -class ImagePanopticSegmentationEasyCVPipeline(EasyCVPipeline): - """Pipeline built upon easycv for image segmentation.""" - - def __init__(self, model: str, model_file_pattern='*.pt', *args, **kwargs): - """ - model (str): model id on modelscope hub or local model path. - model_file_pattern (str): model file pattern. - """ - super(ImagePanopticSegmentationEasyCVPipeline, self).__init__( - model=model, - model_file_pattern=model_file_pattern, - *args, - **kwargs) - - def __call__(self, inputs) -> Any: - outputs = self.predict_op(inputs) - easycv_results = outputs[0] - - results = { - OutputKeys.MASKS: - easycv_results[OutputKeys.MASKS], - OutputKeys.LABELS: - easycv_results[OutputKeys.LABELS], - OutputKeys.SCORES: - [0.999 for _ in range(len(easycv_results[OutputKeys.LABELS]))] - } - - return results diff --git a/modelscope/pipelines/cv/image_style_transfer_pipeline.py b/modelscope/pipelines/cv/image_style_transfer_pipeline.py index e5fd0d48..49a0bff0 100644 --- a/modelscope/pipelines/cv/image_style_transfer_pipeline.py +++ b/modelscope/pipelines/cv/image_style_transfer_pipeline.py @@ -73,12 +73,12 @@ class ImageStyleTransferPipeline(Pipeline): content = LoadImage.convert_to_ndarray(content) if len(content.shape) == 2: content = cv2.cvtColor(content, cv2.COLOR_GRAY2BGR) - content_img = content.astype(np.float) + content_img = content.astype(float) style_img = LoadImage.convert_to_ndarray(style) if len(style_img.shape) == 2: style_img = cv2.cvtColor(style_img, cv2.COLOR_GRAY2BGR) - style_img = style_img.astype(np.float) + style_img = style_img.astype(float) result = {'content': content_img, 'style': style_img} return result diff --git a/modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py b/modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py index 3fffc546..3cef5c28 100644 --- a/modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py +++ b/modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py @@ -43,23 +43,32 @@ class MovieSceneSegmentationPipeline(Pipeline): """ self.input_video_pth = input if isinstance(input, str): - shot_feat, sid = self.model.preprocess(input) + self.shot2keyf, self.anno, self.shot_timecode_lst, self.shot_idx_lst = self.model.preprocess( + input) else: raise TypeError(f'input should be a str,' f' but got {type(input)}') - result = {'sid': sid, 'shot_feat': shot_feat} + result = { + 'shot_timecode_lst': self.shot_timecode_lst, + 'shot_idx_lst': self.shot_idx_lst + } - return result + with torch.no_grad(): + output = self.model.inference(result) + + return output def forward(self, input: Dict[str, Any], **forward_params) -> Dict[str, Any]: - with torch.no_grad(): - output = self.model.inference(input) - return output + return input def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: - data = {'input_video_pth': self.input_video_pth, 'feat': inputs} + data = { + 'input_video_pth': self.input_video_pth, + 'feat': inputs, + 'shot2keyf': self.shot2keyf + } scene_num, scene_meta_lst, shot_num, shot_meta_lst = self.model.postprocess( data) result = { diff --git a/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py b/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py index 39195bcd..123057f5 100644 --- a/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py +++ b/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py @@ -225,7 +225,7 @@ class ReferringVideoObjectSegmentationPipeline(Pipeline): def apply_mask(image, mask, color, transparency=0.7): mask = mask[..., np.newaxis].repeat(repeats=3, axis=2) mask = mask * transparency - color_matrix = np.ones(image.shape, dtype=np.float) * color + color_matrix = np.ones(image.shape, dtype=np.float64) * color out_image = color_matrix * mask + image * (1.0 - mask) return out_image diff --git a/modelscope/pipelines/cv/skin_retouching_pipeline.py b/modelscope/pipelines/cv/skin_retouching_pipeline.py index b2b5f4ca..da9b912f 100644 --- a/modelscope/pipelines/cv/skin_retouching_pipeline.py +++ b/modelscope/pipelines/cv/skin_retouching_pipeline.py @@ -105,7 +105,7 @@ class SkinRetouchingPipeline(Pipeline): img = LoadImage.convert_to_ndarray(input) if len(img.shape) == 2: img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) - img = img.astype(np.float) + img = img.astype(float) result = {'img': img} return result diff --git a/modelscope/pipelines/cv/tbs_detection_pipeline.py b/modelscope/pipelines/cv/tbs_detection_pipeline.py index 58831846..8bbac9c8 100644 --- a/modelscope/pipelines/cv/tbs_detection_pipeline.py +++ b/modelscope/pipelines/cv/tbs_detection_pipeline.py @@ -116,7 +116,7 @@ class TBSDetectionPipeline(Pipeline): - **labels** (`List[str]`, optional) -- The boxes's class_names of detected object in image. """ img = LoadImage.convert_to_ndarray(input) - img = img.astype(np.float) + img = img.astype(float) result = {'img': img, 'img_path': input} return result diff --git a/modelscope/pipelines/multi_modal/__init__.py b/modelscope/pipelines/multi_modal/__init__.py index 2e496952..b28e9a71 100644 --- a/modelscope/pipelines/multi_modal/__init__.py +++ b/modelscope/pipelines/multi_modal/__init__.py @@ -21,6 +21,7 @@ if TYPE_CHECKING: from .diffusers_wrapped import StableDiffusionWrapperPipeline, ChineseStableDiffusionPipeline from .soonet_video_temporal_grounding_pipeline import SOONetVideoTemporalGroundingPipeline from .text_to_video_synthesis_pipeline import TextToVideoSynthesisPipeline + from .multimodal_dialogue_pipeline import MultimodalDialoguePipeline else: _import_structure = { 'image_captioning_pipeline': ['ImageCaptioningPipeline'], @@ -45,6 +46,7 @@ else: 'soonet_video_temporal_grounding_pipeline': ['SOONetVideoTemporalGroundingPipeline'], 'text_to_video_synthesis_pipeline': ['TextToVideoSynthesisPipeline'], + 'multimodal_dialogue_pipeline': ['MultimodalDialoguePipeline'] } import sys diff --git a/modelscope/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py b/modelscope/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py index d1e3a2ae..ce0455b6 100644 --- a/modelscope/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py +++ b/modelscope/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py @@ -39,10 +39,10 @@ class DiffusersPipeline(Pipeline): self.models = [self.model] self.has_multiple_models = len(self.models) > 1 - def preprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + def preprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: return inputs - def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: return inputs def __call__(self, input: Union[Input, List[Input]], *args, diff --git a/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py b/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py index d1627962..539fd4ba 100644 --- a/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py +++ b/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py @@ -46,7 +46,9 @@ class ChineseStableDiffusionPipeline(DiffusersPipeline): torch_dtype = kwargs.get('torch_dtype', torch.float32) self.pipeline = _DiffuersChineseStableDiffusionPipeline.from_pretrained( - model, torch_dtype=torch_dtype).to(self.device) + model, torch_dtype=torch_dtype) + self.pipeline.text_encoder.pooler = None + self.pipeline.to(self.device) def forward(self, inputs: Dict[str, Any], **forward_params) -> Dict[str, Any]: @@ -73,7 +75,7 @@ class ChineseStableDiffusionPipeline(DiffusersPipeline): callback=inputs.get('callback'), callback_steps=inputs.get('callback_steps', 1)) - def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: images = [] for img in inputs.images: if isinstance(img, Image.Image): diff --git a/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py b/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py index b6d9d3bd..49b4ef37 100644 --- a/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py +++ b/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py @@ -65,7 +65,7 @@ class StableDiffusionWrapperPipeline(DiffusersPipeline): callback=inputs.get('callback'), callback_steps=inputs.get('callback_steps', 1)) - def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: images = [] for img in inputs.images: if isinstance(img, Image.Image): diff --git a/modelscope/pipelines/multi_modal/image_captioning_pipeline.py b/modelscope/pipelines/multi_modal/image_captioning_pipeline.py index fbab88fd..17b850da 100644 --- a/modelscope/pipelines/multi_modal/image_captioning_pipeline.py +++ b/modelscope/pipelines/multi_modal/image_captioning_pipeline.py @@ -1,15 +1,18 @@ # Copyright (c) Alibaba, Inc. and its affiliates. from typing import Any, Dict, Optional, Union +import numpy as np import torch from modelscope.metainfo import Pipelines -from modelscope.models.multi_modal import MPlugForAllTasks, OfaForAllTasks +from modelscope.models.multi_modal import (CLIP_Interrogator, MPlugForAllTasks, + OfaForAllTasks) from modelscope.pipelines.base import Model, Pipeline from modelscope.pipelines.builder import PIPELINES from modelscope.pipelines.util import batch_process -from modelscope.preprocessors import (MPlugPreprocessor, OfaPreprocessor, - Preprocessor) +from modelscope.preprocessors import ( + ImageCaptioningClipInterrogatorPreprocessor, MPlugPreprocessor, + OfaPreprocessor, Preprocessor, load_image) from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.logger import get_logger @@ -28,6 +31,17 @@ class ImageCaptioningPipeline(Pipeline): use `model` and `preprocessor` to create a image captioning pipeline for prediction Args: model: model id on modelscope hub. + Examples: + from modelscope.pipelines import pipeline + from modelscope.utils.constant import Tasks + + model_id = 'damo/cv_clip-interrogator' + input_image = "test.png" + + pipeline_ci = pipeline(Tasks.image_captioning, model=model_id) + print(pipeline_ci(input_image)) + + """ super().__init__(model=model, preprocessor=preprocessor, **kwargs) self.model.eval() @@ -39,6 +53,9 @@ class ImageCaptioningPipeline(Pipeline): self.preprocessor = OfaPreprocessor(self.model.model_dir) elif isinstance(self.model, MPlugForAllTasks): self.preprocessor = MPlugPreprocessor(self.model.model_dir) + elif isinstance(self.model, CLIP_Interrogator): + self.preprocessor = ImageCaptioningClipInterrogatorPreprocessor( + ) def _batch(self, data): if isinstance(self.model, OfaForAllTasks): diff --git a/modelscope/pipelines/multi_modal/multimodal_dialogue_pipeline.py b/modelscope/pipelines/multi_modal/multimodal_dialogue_pipeline.py new file mode 100644 index 00000000..31df19fc --- /dev/null +++ b/modelscope/pipelines/multi_modal/multimodal_dialogue_pipeline.py @@ -0,0 +1,90 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import Any, Dict, Optional, Union + +import torch + +from modelscope.metainfo import Pipelines +from modelscope.models.multi_modal import MplugOwlForConditionalGeneration +from modelscope.outputs import OutputKeys, TokenGeneratorOutput +from modelscope.pipelines.base import Model, Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.preprocessors import MplugOwlPreprocessor, Preprocessor +from modelscope.utils.constant import Tasks +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +@PIPELINES.register_module( + Tasks.multimodal_dialogue, module_name=Pipelines.multimodal_dialogue) +class MultimodalDialoguePipeline(Pipeline): + r""" Multimodal Dialogue Pipeline. + + Examples: + >>> from modelscope.pipelines import pipeline + >>> chatbot = pipeline('multimodal-dialogue', 'damo/multi-modal_mplug_owl_multimodal-dialogue_7b') + >>> image = 'data/resource/portrait_input.png' + >>> system_prompt_1 = 'The following is a conversation between a curious human and AI assistant.' + >>> system_prompt_2 = "The assistant gives helpful, detailed, and polite answers to the user's questions." + >>> messages = { + >>> 'messages': [ + >>> { + >>> 'role': 'system', + >>> 'content': system_prompt_1 + ' ' + system_prompt_2 + >>> }, + >>> { + >>> 'role': 'user', + >>> 'content': [{ + >>> 'image': image + >>> }] + >>> }, + >>> { + >>> 'role': 'user', + >>> 'content': 'Describe the facial expression of the man.' + >>> }, + >>> ] + >>> } + >>> chatbot(messages) + >>> { + >>> "text": he is angry. + >>> } + >>> + """ + + def __init__(self, + model: Union[Model, str], + preprocessor: Optional[Preprocessor] = None, + **kwargs): + """ + use `model` and `preprocessor` to create a multimodal dialogue pipeline for prediction + Args: + model: model id on modelscope hub. + """ + super().__init__(model=model, preprocessor=preprocessor, **kwargs) + self.model.eval() + if preprocessor is None: + if isinstance(self.model, MplugOwlForConditionalGeneration): + self.preprocessor = MplugOwlPreprocessor(self.model.model_dir) + + def forward(self, inputs: Dict[str, Any], + **forward_params) -> Dict[str, Any]: + """ + the `forward_params` can be the generation configurations listed in transformers library. + """ + with torch.no_grad(): + return super().forward(inputs, **forward_params) + + def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]: + """process the prediction results + + Args: + inputs (Dict[str, Any]): _description_ + + Returns: + Dict[str, str]: the prediction results + """ + if isinstance(self.model, MplugOwlForConditionalGeneration): + output = self.preprocessor.tokenizer.decode( + inputs[0], skip_special_tokens=True) + inputs = {OutputKeys.TEXT: output} + return inputs diff --git a/modelscope/pipelines/multi_modal/text_to_video_synthesis_pipeline.py b/modelscope/pipelines/multi_modal/text_to_video_synthesis_pipeline.py index ee6635a6..50e2437b 100644 --- a/modelscope/pipelines/multi_modal/text_to_video_synthesis_pipeline.py +++ b/modelscope/pipelines/multi_modal/text_to_video_synthesis_pipeline.py @@ -1,5 +1,6 @@ # Copyright (c) Alibaba, Inc. and its affiliates. +import os import tempfile from typing import Any, Dict, Optional @@ -62,8 +63,10 @@ class TextToVideoSynthesisPipeline(Pipeline): **post_params) -> Dict[str, Any]: video = tensor2vid(inputs['video']) output_video_path = post_params.get('output_video', None) + temp_video_file = False if output_video_path is None: output_video_path = tempfile.NamedTemporaryFile(suffix='.mp4').name + temp_video_file = True fourcc = cv2.VideoWriter_fourcc(*'mp4v') h, w, c = video[0].shape @@ -72,7 +75,15 @@ class TextToVideoSynthesisPipeline(Pipeline): for i in range(len(video)): img = cv2.cvtColor(video[i], cv2.COLOR_RGB2BGR) video_writer.write(img) - return {OutputKeys.OUTPUT_VIDEO: output_video_path} + video_writer.release() + if temp_video_file: + video_file_content = b'' + with open(output_video_path, 'rb') as f: + video_file_content = f.read() + os.remove(output_video_path) + return {OutputKeys.OUTPUT_VIDEO: video_file_content} + else: + return {OutputKeys.OUTPUT_VIDEO: output_video_path} def tensor2vid(video, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]): diff --git a/modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py b/modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py index fa7b23b8..a0e75638 100644 --- a/modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py +++ b/modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py @@ -41,7 +41,8 @@ class DialogIntentPredictionPipeline(Pipeline): config_file=config_file, device=device, auto_collate=auto_collate, - **kwargs) + compile=kwargs.pop('compile', False), + compile_options=kwargs.pop('compile_options', {})) if preprocessor is None: self.preprocessor = DialogIntentPredictionPreprocessor( self.model.model_dir, **kwargs) diff --git a/modelscope/pipelines/nlp/document_grounded_dialog_generate_pipeline.py b/modelscope/pipelines/nlp/document_grounded_dialog_generate_pipeline.py index 8c773dfe..dfcd95e6 100644 --- a/modelscope/pipelines/nlp/document_grounded_dialog_generate_pipeline.py +++ b/modelscope/pipelines/nlp/document_grounded_dialog_generate_pipeline.py @@ -47,7 +47,8 @@ class DocumentGroundedDialogGeneratePipeline(Pipeline): config_file=config_file, device=device, auto_collate=auto_collate, - **kwargs) + compile=kwargs.pop('compile', False), + compile_options=kwargs.pop('compile_options', {})) if preprocessor is None: self.preprocessor = DocumentGroundedDialogGeneratePreprocessor( diff --git a/modelscope/pipelines/nlp/document_grounded_dialog_rerank_pipeline.py b/modelscope/pipelines/nlp/document_grounded_dialog_rerank_pipeline.py index 8fdef380..29993594 100644 --- a/modelscope/pipelines/nlp/document_grounded_dialog_rerank_pipeline.py +++ b/modelscope/pipelines/nlp/document_grounded_dialog_rerank_pipeline.py @@ -65,7 +65,8 @@ class DocumentGroundedDialogRerankPipeline(Pipeline): device=device, auto_collate=auto_collate, seed=seed, - **kwarg) + compile=kwarg.pop('compile', False), + compile_options=kwarg.pop('compile_options', {})) self.model = model self.preprocessor = preprocessor self.device = device diff --git a/modelscope/pipelines/nlp/document_grounded_dialog_retrieval_pipeline.py b/modelscope/pipelines/nlp/document_grounded_dialog_retrieval_pipeline.py index c3fb1a32..31890a73 100644 --- a/modelscope/pipelines/nlp/document_grounded_dialog_retrieval_pipeline.py +++ b/modelscope/pipelines/nlp/document_grounded_dialog_retrieval_pipeline.py @@ -56,7 +56,8 @@ class DocumentGroundedDialogRetrievalPipeline(Pipeline): config_file=config_file, device=device, auto_collate=auto_collate, - **kwargs) + compile=kwargs.pop('compile', False), + compile_options=kwargs.pop('compile_options', {})) if preprocessor is None: self.preprocessor = DocumentGroundedDialogRetrievalPreprocessor( diff --git a/modelscope/pipelines/nlp/document_segmentation_pipeline.py b/modelscope/pipelines/nlp/document_segmentation_pipeline.py index 6e195ed0..d528eee0 100644 --- a/modelscope/pipelines/nlp/document_segmentation_pipeline.py +++ b/modelscope/pipelines/nlp/document_segmentation_pipeline.py @@ -6,11 +6,9 @@ from typing import Any, Dict, List, Union import numpy as np import torch from datasets import Dataset -from transformers.models.bert.modeling_bert import BertConfig from modelscope.metainfo import Pipelines from modelscope.models import Model -from modelscope.models.nlp.ponet.configuration import PoNetConfig from modelscope.outputs import OutputKeys from modelscope.pipelines.base import Pipeline, Tensor from modelscope.pipelines.builder import PIPELINES @@ -51,11 +49,9 @@ class DocumentSegmentationPipeline(Pipeline): auto_collate=auto_collate, **kwargs) - kwargs = kwargs - if 'compile' in kwargs.keys(): - kwargs.pop('compile') - if 'compile_options' in kwargs.keys(): - kwargs.pop('compile_options') + kwargs.pop('compile', None) + kwargs.pop('compile_options', None) + self.model_dir = self.model.model_dir self.model_cfg = self.model.model_cfg if preprocessor is None: diff --git a/modelscope/pipelines/nlp/extractive_summarization_pipeline.py b/modelscope/pipelines/nlp/extractive_summarization_pipeline.py index c01f28fc..a4e67607 100644 --- a/modelscope/pipelines/nlp/extractive_summarization_pipeline.py +++ b/modelscope/pipelines/nlp/extractive_summarization_pipeline.py @@ -44,11 +44,8 @@ class ExtractiveSummarizationPipeline(Pipeline): auto_collate=auto_collate, **kwargs) - kwargs = kwargs - if 'compile' in kwargs.keys(): - kwargs.pop('compile') - if 'compile_options' in kwargs.keys(): - kwargs.pop('compile_options') + kwargs.pop('compile', None) + kwargs.pop('compile_options', None) self.model_dir = self.model.model_dir self.model_cfg = self.model.model_cfg diff --git a/modelscope/pipelines/nlp/feature_extraction_pipeline.py b/modelscope/pipelines/nlp/feature_extraction_pipeline.py index 0f6979ba..c82db03c 100644 --- a/modelscope/pipelines/nlp/feature_extraction_pipeline.py +++ b/modelscope/pipelines/nlp/feature_extraction_pipeline.py @@ -54,7 +54,8 @@ class FeatureExtractionPipeline(Pipeline): config_file=config_file, device=device, auto_collate=auto_collate, - **kwargs) + compile=kwargs.pop('compile', False), + compile_options=kwargs.pop('compile_options', {})) assert isinstance(self.model, Model), \ f'please check whether model config exists in {ModelFile.CONFIGURATION}' diff --git a/modelscope/pipelines/nlp/fill_mask_pipeline.py b/modelscope/pipelines/nlp/fill_mask_pipeline.py index 6bc7622f..7b034786 100644 --- a/modelscope/pipelines/nlp/fill_mask_pipeline.py +++ b/modelscope/pipelines/nlp/fill_mask_pipeline.py @@ -63,7 +63,8 @@ class FillMaskPipeline(Pipeline): config_file=config_file, device=device, auto_collate=auto_collate, - **kwargs) + compile=kwargs.pop('compile', False), + compile_options=kwargs.pop('compile_options', {})) assert isinstance(self.model, Model), \ f'please check whether model config exists in {ModelFile.CONFIGURATION}' diff --git a/modelscope/pipelines/nlp/named_entity_recognition_pipeline.py b/modelscope/pipelines/nlp/named_entity_recognition_pipeline.py index 2cf30037..d035802b 100644 --- a/modelscope/pipelines/nlp/named_entity_recognition_pipeline.py +++ b/modelscope/pipelines/nlp/named_entity_recognition_pipeline.py @@ -56,7 +56,8 @@ class NamedEntityRecognitionPipeline(TokenClassificationPipeline): config_file=config_file, device=device, auto_collate=auto_collate, - **kwargs) + compile=kwargs.pop('compile', False), + compile_options=kwargs.pop('compile_options', {})) assert isinstance(self.model, Model), \ f'please check whether model config exists in {ModelFile.CONFIGURATION}' diff --git a/modelscope/pipelines/nlp/sentence_embedding_pipeline.py b/modelscope/pipelines/nlp/sentence_embedding_pipeline.py index 4e01397d..9d5cc80f 100644 --- a/modelscope/pipelines/nlp/sentence_embedding_pipeline.py +++ b/modelscope/pipelines/nlp/sentence_embedding_pipeline.py @@ -43,7 +43,8 @@ class SentenceEmbeddingPipeline(Pipeline): config_file=config_file, device=device, auto_collate=auto_collate, - **kwargs) + compile=kwargs.pop('compile', False), + compile_options=kwargs.pop('compile_options', {})) assert isinstance(self.model, Model), \ f'please check whether model config exists in {ModelFile.CONFIGURATION}' diff --git a/modelscope/pipelines/nlp/siamese_uie_pipeline.py b/modelscope/pipelines/nlp/siamese_uie_pipeline.py index cdbd9119..d548d2e8 100644 --- a/modelscope/pipelines/nlp/siamese_uie_pipeline.py +++ b/modelscope/pipelines/nlp/siamese_uie_pipeline.py @@ -21,7 +21,7 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines.base import Pipeline from modelscope.pipelines.builder import PIPELINES from modelscope.preprocessors import Preprocessor, SiameseUiePreprocessor -from modelscope.utils.constant import Tasks +from modelscope.utils.constant import ModelFile, Tasks Input = Union[str, tuple, MsDataset, 'Image.Image', 'numpy.ndarray'] @@ -68,7 +68,8 @@ class SiameseUiePipeline(Pipeline): config_file=config_file, device=device, auto_collate=auto_collate, - **kwargs) + compile=kwargs.pop('compile', False), + compile_options=kwargs.pop('compile_options', {})) assert isinstance(self.model, Model), \ f'please check whether model config exists in {ModelFile.CONFIGURATION}' diff --git a/modelscope/pipelines/nlp/table_question_answering_pipeline.py b/modelscope/pipelines/nlp/table_question_answering_pipeline.py index 0472ecb8..7c064f57 100644 --- a/modelscope/pipelines/nlp/table_question_answering_pipeline.py +++ b/modelscope/pipelines/nlp/table_question_answering_pipeline.py @@ -52,7 +52,8 @@ class TableQuestionAnsweringPipeline(Pipeline): config_file=config_file, device=device, auto_collate=auto_collate, - **kwargs) + compile=kwargs.pop('compile', False), + compile_options=kwargs.pop('compile_options', {})) assert isinstance(self.model, Model), \ f'please check whether model config exists in {ModelFile.CONFIGURATION}' @@ -402,7 +403,7 @@ class TableQuestionAnsweringPipeline(Pipeline): OutputKeys.SQL_STRING: sql.string, OutputKeys.SQL_QUERY: sql.query, OutputKeys.HISTORY: result['sql'], - OutputKeys.QUERT_RESULT: tabledata, + OutputKeys.QUERY_RESULT: tabledata, } return {OutputKeys.OUTPUT: output} diff --git a/modelscope/pipelines/nlp/text_classification_pipeline.py b/modelscope/pipelines/nlp/text_classification_pipeline.py index a300b008..3b06f435 100644 --- a/modelscope/pipelines/nlp/text_classification_pipeline.py +++ b/modelscope/pipelines/nlp/text_classification_pipeline.py @@ -76,7 +76,7 @@ class TextClassificationPipeline(Pipeline): field=Fields.multi_modal, **kwargs) else: - first_sequence = kwargs.pop('first_sequence', 'first_sequence') + first_sequence = kwargs.pop('first_sequence', 'text') second_sequence = kwargs.pop('second_sequence', None) sequence_length = kwargs.pop('sequence_length', 512) self.preprocessor = Preprocessor.from_pretrained( diff --git a/modelscope/pipelines/nlp/text_generation_pipeline.py b/modelscope/pipelines/nlp/text_generation_pipeline.py index 2b851dc4..d1aa5ff6 100644 --- a/modelscope/pipelines/nlp/text_generation_pipeline.py +++ b/modelscope/pipelines/nlp/text_generation_pipeline.py @@ -59,7 +59,8 @@ class TextGenerationPipeline(Pipeline): config_file=config_file, device=device, auto_collate=auto_collate, - **kwargs) + compile=kwargs.pop('compile', False), + compile_options=kwargs.pop('compile_options', {})) assert isinstance(self.model, Model), \ f'please check whether model config exists in {ModelFile.CONFIGURATION}' diff --git a/modelscope/pipelines/nlp/text_ranking_pipeline.py b/modelscope/pipelines/nlp/text_ranking_pipeline.py index a42baaa2..7539634e 100644 --- a/modelscope/pipelines/nlp/text_ranking_pipeline.py +++ b/modelscope/pipelines/nlp/text_ranking_pipeline.py @@ -44,7 +44,8 @@ class TextRankingPipeline(Pipeline): config_file=config_file, device=device, auto_collate=auto_collate, - **kwargs) + compile=kwargs.pop('compile', False), + compile_options=kwargs.pop('compile_options', {})) assert isinstance(self.model, Model), \ f'please check whether model config exists in {ModelFile.CONFIGURATION}' diff --git a/modelscope/pipelines/nlp/token_classification_pipeline.py b/modelscope/pipelines/nlp/token_classification_pipeline.py index daa4823c..9fd8e325 100644 --- a/modelscope/pipelines/nlp/token_classification_pipeline.py +++ b/modelscope/pipelines/nlp/token_classification_pipeline.py @@ -51,7 +51,9 @@ class TokenClassificationPipeline(Pipeline): preprocessor=preprocessor, config_file=config_file, device=device, - auto_collate=auto_collate) + auto_collate=auto_collate, + compile=kwargs.pop('compile', False), + compile_options=kwargs.pop('compile_options', {})) assert isinstance(self.model, Model), \ f'please check whether model config exists in {ModelFile.CONFIGURATION}' diff --git a/modelscope/pipelines/nlp/translation_evaluation_pipeline.py b/modelscope/pipelines/nlp/translation_evaluation_pipeline.py index 8a339517..4450aad7 100644 --- a/modelscope/pipelines/nlp/translation_evaluation_pipeline.py +++ b/modelscope/pipelines/nlp/translation_evaluation_pipeline.py @@ -9,12 +9,11 @@ import torch from modelscope.metainfo import Pipelines from modelscope.models.base import Model -from modelscope.models.nlp.unite.configuration_unite import EvaluationMode +from modelscope.models.nlp.unite.configuration import InputFormat from modelscope.outputs import OutputKeys from modelscope.pipelines.base import InputModel, Pipeline from modelscope.pipelines.builder import PIPELINES -from modelscope.preprocessors import (Preprocessor, - TranslationEvaluationPreprocessor) +from modelscope.preprocessors import Preprocessor from modelscope.utils.config import Config from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.logger import get_logger @@ -31,16 +30,18 @@ class TranslationEvaluationPipeline(Pipeline): def __init__(self, model: InputModel, preprocessor: Optional[Preprocessor] = None, - eval_mode: EvaluationMode = EvaluationMode.SRC_REF, + input_format: InputFormat = InputFormat.SRC_REF, device: str = 'gpu', **kwargs): - r"""Build a translation pipeline with a model dir or a model id in the model hub. + r"""Build a translation evaluation pipeline with a model dir or a model id in the model hub. Args: model: A Model instance. - eval_mode: Evaluation mode, choosing one from `"EvaluationMode.SRC_REF"`, - `"EvaluationMode.SRC"`, `"EvaluationMode.REF"`. Aside from hypothesis, the + preprocessor: The preprocessor for this pipeline. + input_format: Input format, choosing one from `"InputFormat.SRC_REF"`, + `"InputFormat.SRC"`, `"InputFormat.REF"`. Aside from hypothesis, the source/reference/source+reference can be presented during evaluation. + device: Used device for this pipeline. """ super().__init__( model=model, @@ -48,44 +49,40 @@ class TranslationEvaluationPipeline(Pipeline): compile=kwargs.pop('compile', False), compile_options=kwargs.pop('compile_options', {})) - self.eval_mode = eval_mode - self.checking_eval_mode() + self.input_format = input_format + self.checking_input_format() assert isinstance(self.model, Model), \ f'please check whether model config exists in {ModelFile.CONFIGURATION}' - self.preprocessor = TranslationEvaluationPreprocessor( - self.model.model_dir, - self.eval_mode) if preprocessor is None else preprocessor - self.model.load_checkpoint( osp.join(self.model.model_dir, ModelFile.TORCH_MODEL_BIN_FILE), - self.device) + device=self.device, + plm_only=False) self.model.eval() return - def checking_eval_mode(self): - if self.eval_mode == EvaluationMode.SRC: + def checking_input_format(self): + if self.input_format == InputFormat.SRC: logger.info('Evaluation mode: source-only') - elif self.eval_mode == EvaluationMode.REF: + elif self.input_format == InputFormat.REF: logger.info('Evaluation mode: reference-only') - elif self.eval_mode == EvaluationMode.SRC_REF: + elif self.input_format == InputFormat.SRC_REF: logger.info('Evaluation mode: source-reference-combined') else: - raise ValueError( - 'Evaluation mode should be one choice among' - '\'EvaluationMode.SRC\', \'EvaluationMode.REF\', and' - '\'EvaluationMode.SRC_REF\'.') + raise ValueError('Evaluation mode should be one choice among' + '\'InputFormat.SRC\', \'InputFormat.REF\', and' + '\'InputFormat.SRC_REF\'.') - def change_eval_mode(self, - eval_mode: EvaluationMode = EvaluationMode.SRC_REF): + def change_input_format(self, + input_format: InputFormat = InputFormat.SRC_REF): logger.info('Changing the evaluation mode.') - self.eval_mode = eval_mode - self.checking_eval_mode() - self.preprocessor.eval_mode = eval_mode + self.input_format = input_format + self.checking_input_format() + self.preprocessor.change_input_format(input_format) return - def __call__(self, input: Dict[str, Union[str, List[str]]], **kwargs): + def __call__(self, input_dict: Dict[str, Union[str, List[str]]], **kwargs): r"""Implementation of __call__ function. Args: @@ -108,12 +105,12 @@ class TranslationEvaluationPipeline(Pipeline): } ``` """ - return super().__call__(input=input, **kwargs) + return super().__call__(input=input_dict, **kwargs) - def forward(self, - input_ids: List[torch.Tensor]) -> Dict[str, torch.Tensor]: - return self.model(input_ids) + def forward( + self, input_dict: Dict[str, + torch.Tensor]) -> Dict[str, torch.Tensor]: + return self.model(**input_dict) def postprocess(self, output: torch.Tensor) -> Dict[str, Any]: - result = {OutputKeys.SCORES: output.cpu().tolist()} - return result + return output diff --git a/modelscope/pipelines/nlp/user_satisfaction_estimation_pipeline.py b/modelscope/pipelines/nlp/user_satisfaction_estimation_pipeline.py index 76fcd7a8..197a941f 100644 --- a/modelscope/pipelines/nlp/user_satisfaction_estimation_pipeline.py +++ b/modelscope/pipelines/nlp/user_satisfaction_estimation_pipeline.py @@ -51,7 +51,8 @@ class UserSatisfactionEstimationPipeline(Pipeline): config_file=config_file, device=device, auto_collate=auto_collate, - **kwargs) + compile=kwargs.pop('compile', False), + compile_options=kwargs.pop('compile_options', {})) if hasattr(self.preprocessor, 'id2label'): self.id2label = self.preprocessor.id2label diff --git a/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py b/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py index 9cd27adc..18ba40c8 100644 --- a/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py +++ b/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py @@ -67,7 +67,8 @@ class ZeroShotClassificationPipeline(Pipeline): config_file=config_file, device=device, auto_collate=auto_collate, - **kwargs) + compile=kwargs.pop('compile', False), + compile_options=kwargs.pop('compile_options', {})) self.entailment_id = 0 self.contradiction_id = 2 diff --git a/modelscope/pipelines/pipeline_template.py b/modelscope/pipelines/pipeline_template.py new file mode 100644 index 00000000..a29ce5d7 --- /dev/null +++ b/modelscope/pipelines/pipeline_template.py @@ -0,0 +1,87 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +from typing import Any, Dict + +import numpy as np + +from modelscope.metainfo import Pipelines +from modelscope.models.base.base_model import Model +from modelscope.outputs.outputs import OutputKeys +from modelscope.pipelines.base import Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.utils.constant import Tasks + +__all__ = ['PipelineTemplate'] + + +@PIPELINES.register_module( + Tasks.task_template, module_name=Pipelines.pipeline_template) +class PipelineTemplate(Pipeline): + """A pipeline template explain how to define parameters and input and + output information. As a rule, the first parameter is the input, + followed by the request parameters. The parameter must add type + hint information, and set the default value if necessary, + for the convenience of use. + """ + + def __init__(self, model: Model, **kwargs): + """A pipeline template to describe input and + output and parameter processing + + Args: + model: A Model instance. + """ + # call base init. + super().__init__(model=model, **kwargs) + + def preprocess(self, + input: Any, + max_length: int = 1024, + top_p: float = 0.8) -> Any: + """Pipeline preprocess interface. + + Args: + input (Any): The pipeline input, ref Tasks.task_template TASK_INPUTS. + max_length (int, optional): The max_length parameter. Defaults to 1024. + top_p (float, optional): The top_p parameter. Defaults to 0.8. + + Returns: + Any: Return result process by forward. + """ + pass + + def forward(self, + input: Any, + max_length: int = 1024, + top_p: float = 0.8) -> Any: + """The forward interface. + + Args: + input (Any): The output of the preprocess. + max_length (int, optional): max_length. Defaults to 1024. + top_p (float, optional): top_p. Defaults to 0.8. + + Returns: + Any: Return result process by postprocess. + """ + pass + + def postprocess(self, + inputs: Any, + postprocess_param1: str = None) -> Dict[str, Any]: + """The postprocess interface. + + Args: + input (Any): The output of the forward. + max_length (int, optional): max_length. Defaults to 1024. + top_p (float, optional): top_p. Defaults to 0.8. + + Returns: + Any: Return result process by postprocess. + """ + result = { + OutputKeys.BOXES: np.zeros(4), + OutputKeys.OUTPUT_IMG: np.zeros(10, 4), + OutputKeys.TEXT_EMBEDDING: np.zeros(1, 1000) + } + return result diff --git a/modelscope/preprocessors/__init__.py b/modelscope/preprocessors/__init__.py index a35f130a..dbcb0813 100644 --- a/modelscope/preprocessors/__init__.py +++ b/modelscope/preprocessors/__init__.py @@ -20,7 +20,8 @@ if TYPE_CHECKING: from .tts import KanttsDataPreprocessor from .multi_modal import (DiffusionImageGenerationPreprocessor, OfaPreprocessor, MPlugPreprocessor, - HiTeAPreprocessor) + HiTeAPreprocessor, MplugOwlPreprocessor, + ImageCaptioningClipInterrogatorPreprocessor) from .nlp import ( DocumentSegmentationTransformersPreprocessor, FaqQuestionAnsweringTransformersPreprocessor, @@ -34,16 +35,16 @@ if TYPE_CHECKING: TextErrorCorrectionPreprocessor, TextGenerationT5Preprocessor, WordAlignmentPreprocessor, TextGenerationTransformersPreprocessor, Tokenize, WordSegmentationBlankSetToLabelPreprocessor, - CodeGeeXPreprocessor, MGLMSummarizationPreprocessor, + MGLMSummarizationPreprocessor, ZeroShotClassificationTransformersPreprocessor, TextGenerationJiebaPreprocessor, SentencePiecePreprocessor, DialogIntentPredictionPreprocessor, DialogModelingPreprocessor, DialogStateTrackingPreprocessor, ConversationalTextToSqlPreprocessor, TableQuestionAnsweringPreprocessor, NERPreprocessorViet, NERPreprocessorThai, WordSegmentationPreprocessorThai, - TranslationEvaluationPreprocessor, CanmtTranslationPreprocessor, - DialogueClassificationUsePreprocessor, SiameseUiePreprocessor, - DocumentGroundedDialogGeneratePreprocessor, + TranslationEvaluationTransformersPreprocessor, + CanmtTranslationPreprocessor, DialogueClassificationUsePreprocessor, + SiameseUiePreprocessor, DocumentGroundedDialogGeneratePreprocessor, DocumentGroundedDialogRetrievalPreprocessor, DocumentGroundedDialogRerankPreprocessor) from .video import ReadVideoData, MovieSceneSegmentationPreprocessor @@ -70,7 +71,8 @@ else: 'tts': ['KanttsDataPreprocessor'], 'multi_modal': [ 'DiffusionImageGenerationPreprocessor', 'OfaPreprocessor', - 'MPlugPreprocessor', 'HiTeAPreprocessor' + 'MPlugPreprocessor', 'HiTeAPreprocessor', 'MplugOwlPreprocessor', + 'ImageCaptioningClipInterrogatorPreprocessor' ], 'nlp': [ 'DocumentSegmentationTransformersPreprocessor', @@ -96,7 +98,7 @@ else: 'DialogStateTrackingPreprocessor', 'ConversationalTextToSqlPreprocessor', 'TableQuestionAnsweringPreprocessor', - 'TranslationEvaluationPreprocessor', + 'TranslationEvaluationTransformersPreprocessor', 'CanmtTranslationPreprocessor', 'DialogueClassificationUsePreprocessor', 'SiameseUiePreprocessor', 'DialogueClassificationUsePreprocessor', diff --git a/modelscope/preprocessors/asr.py b/modelscope/preprocessors/asr.py index ea867775..4696c675 100644 --- a/modelscope/preprocessors/asr.py +++ b/modelscope/preprocessors/asr.py @@ -74,14 +74,6 @@ class WavToScp(Preprocessor): if code_base != 'funasr': cmd = self.config_checking(cmd) cmd = self.env_setting(cmd) - if audio_format == 'wav': - cmd['audio_lists'] = self.scp_generation_from_wav(cmd) - elif audio_format == 'kaldi_ark': - cmd['audio_lists'] = self.scp_generation_from_ark(cmd) - elif audio_format == 'tfrecord': - cmd['audio_lists'] = os.path.join(cmd['wav_path'], 'data.records') - elif audio_format == 'pcm' or audio_format == 'scp': - cmd['audio_lists'] = audio_in return cmd @@ -235,63 +227,4 @@ class WavToScp(Preprocessor): inputs['model_lang'] = inputs['model_config']['lang'] else: inputs['model_lang'] = 'zh-cn' - return inputs - - def scp_generation_from_wav(self, inputs: Dict[str, Any]) -> List[Any]: - """scp generation from waveform files - """ - - # find all waveform files - wav_list = [] - if inputs['recog_type'] == 'wav': - file_path = inputs['wav_path'] - if os.path.isfile(file_path): - if file_path.endswith('.wav') or file_path.endswith('.WAV'): - wav_list.append(file_path) - else: - from easyasr.common import asr_utils - wav_dir: str = inputs['wav_path'] - wav_list = asr_utils.recursion_dir_all_wav(wav_list, wav_dir) - - list_count: int = len(wav_list) - inputs['wav_count'] = list_count - - # store all wav into audio list - audio_lists = [] - j: int = 0 - while j < list_count: - wav_file = wav_list[j] - wave_key: str = os.path.splitext(os.path.basename(wav_file))[0] - item = {'key': wave_key, 'file': wav_file} - audio_lists.append(item) - j += 1 - - return audio_lists - - def scp_generation_from_ark(self, inputs: Dict[str, Any]) -> List[Any]: - """scp generation from kaldi ark file - """ - - ark_scp_path = os.path.join(inputs['wav_path'], 'data.scp') - ark_file_path = os.path.join(inputs['wav_path'], 'data.ark') - assert os.path.exists(ark_scp_path), 'data.scp does not exist' - assert os.path.exists(ark_file_path), 'data.ark does not exist' - - with open(ark_scp_path, 'r', encoding='utf-8') as f: - lines = f.readlines() - - # store all ark item into audio list - audio_lists = [] - for line in lines: - outs = line.strip().split(' ') - if len(outs) == 2: - key = outs[0] - sub = outs[1].split(':') - if len(sub) == 2: - nums = sub[1] - content = ark_file_path + ':' + nums - item = {'key': key, 'file': content} - audio_lists.append(item) - - return audio_lists diff --git a/modelscope/preprocessors/multi_modal.py b/modelscope/preprocessors/multi_modal.py index bd37c620..faf796f4 100644 --- a/modelscope/preprocessors/multi_modal.py +++ b/modelscope/preprocessors/multi_modal.py @@ -1,5 +1,6 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import os.path as osp +import re from io import BytesIO from typing import Any, Dict, List, Tuple, Union @@ -29,7 +30,7 @@ from .ofa.utils.constant import OFA_TASK_KEY_MAPPING __all__ = [ 'DiffusionImageGenerationPreprocessor', 'OfaPreprocessor', - 'MPlugPreprocessor', 'HiTeAPreprocessor' + 'MPlugPreprocessor', 'HiTeAPreprocessor', 'MplugOwlPreprocessor' ] @@ -642,3 +643,159 @@ class HiTeAPreprocessor(Preprocessor): 'answer_attention_mask': answer.attention_mask.squeeze(), } return output + + +@PREPROCESSORS.register_module( + Fields.multi_modal, module_name=Preprocessors.mplug_owl_preprocessor) +class MplugOwlPreprocessor(Preprocessor): + + def __init__(self, + model_dir: str, + mode: str = ModeKeys.INFERENCE, + *args, + **kwargs): + super().__init__(*args, **kwargs) + self.model_dir = model_dir + self.mode = mode + + self._tokenizer = None + self._patch_resize_transform = None + self.media_token = {'': 65} + self._image_map = {} + + @property + def tokenizer(self): + from modelscope.models.nlp.llama import LlamaTokenizer + + if self._tokenizer is None: + self._tokenizer = LlamaTokenizer.from_pretrained(self.model_dir) + return self._tokenizer + + @property + def patch_resize_transform(self): + if self._patch_resize_transform is None: + from torchvision import transforms + + mean = (0.48145466, 0.4578275, 0.40821073) + std = (0.26862954, 0.26130258, 0.27577711) + + self._patch_resize_transform = transforms.Compose([ + transforms.Resize((224, 224), interpolation=Image.BICUBIC), + transforms.ToTensor(), + transforms.Normalize(mean=mean, std=std), + ]) + return self._patch_resize_transform + + def image_open(self, path: str) -> Tuple[Image.Image, int]: + if path not in self._image_map: + index = len(self._image_map) + self._image_map[path] = (load_image(path), index) + return self._image_map[path] + + def tokenize_text(self, text: str) -> List[int]: + media_tokens = { + k: -int(i + 1) + for i, k in enumerate(self.media_token.keys()) + } + media_lengths = self.media_token.copy() + + prompt_chunk = [self.tokenizer.bos_token_id] + + # Pure Text + condition = [ + media_token not in text for media_token in media_tokens.keys() + ] + if all(condition): + enc_chunk = prompt_chunk + \ + self.tokenizer(text, add_special_tokens=False)['input_ids'] + + # Multi-Modal Text + else: + enc_chunk = prompt_chunk + pattern = '|'.join(map(re.escape, list(media_tokens.keys()))) + chunk_strs = re.split(f'({pattern})', text) + chunk_strs = [x for x in chunk_strs if len(x) > 0] + for idx, chunk_str in enumerate(chunk_strs): + if chunk_str in media_tokens: + enc_chunk += [media_tokens[chunk_str]] * \ + media_lengths[chunk_str] + else: + tmp_chunk = self.tokenizer( + chunk_str, add_special_tokens=False)['input_ids'] + enc_chunk += tmp_chunk + return enc_chunk + + def convert(self, messages: Dict[str, List[Dict]]) -> str: + texts = [] + image = [] + messages = messages['messages'] + for turn in messages: + if turn['role'] == 'system': + role = '' + elif turn['role'] == 'user': + role = 'Human: ' + else: + role = 'AI: ' + if isinstance(turn['content'], str): + text = f"{role}{turn['content']}" + texts.append(text) + else: + for t in turn['content']: + if isinstance(t, str): + text = f'{role}{t}' + else: + text = f'{role}' + image.append(t['image']) + texts.append(text) + texts = '\n'.join(texts) + texts += '\nAI: ' + return image, texts + + def __call__(self, messages: Dict[str, Any]) -> Dict[str, Any]: + """ + Args: + messages: {[ + {'role': 'system', 'content': 'message1'}, + {'role': 'user', 'content': 'message2'}, + {'role': 'user', 'content': ['message2', {"image": 'image_path'}, 'message3', ...]}, + ]} + The 'role' should be choose from ['system', 'user', 'assistant']. + The 'content' can be either str or List[Union[str, Dict]] + Return: + output: Dict[str, Tensor] + """ + output = {} + images, text = self.convert(messages) + + if len(images) > 0: + pixel_values = [] + for image in images: + pixel_values.append( + self.patch_resize_transform(self.image_open(image)[0])) + pixel_values = torch.stack(pixel_values, dim=0) + else: + pixel_values = None + + input_ids = self.tokenize_text(text) + input_ids = torch.LongTensor([input_ids]) + + output = { + 'pixel_values': pixel_values, + 'input_ids': input_ids, + } + + return output + + +@PREPROCESSORS.register_module( + Fields.multi_modal, + module_name=Preprocessors.image_captioning_clip_interrogator_preprocessor) +class ImageCaptioningClipInterrogatorPreprocessor(Preprocessor): + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def __call__(self, data) -> Dict[str, Any]: + image = load_image(data) + data = np.array(image).transpose(2, 0, 1) + return data diff --git a/modelscope/preprocessors/nlp/__init__.py b/modelscope/preprocessors/nlp/__init__.py index 5904d65e..19421fa0 100644 --- a/modelscope/preprocessors/nlp/__init__.py +++ b/modelscope/preprocessors/nlp/__init__.py @@ -29,7 +29,7 @@ if TYPE_CHECKING: from .space_T_en import ConversationalTextToSqlPreprocessor from .space_T_cn import TableQuestionAnsweringPreprocessor from .mglm_summarization_preprocessor import MGLMSummarizationPreprocessor - from .translation_evaluation_preprocessor import TranslationEvaluationPreprocessor + from .translation_evaluation_preprocessor import TranslationEvaluationTransformersPreprocessor from .canmt_translation import CanmtTranslationPreprocessor from .dialog_classification_use_preprocessor import DialogueClassificationUsePreprocessor from .siamese_uie_preprocessor import SiameseUiePreprocessor @@ -90,7 +90,7 @@ else: 'space_T_en': ['ConversationalTextToSqlPreprocessor'], 'space_T_cn': ['TableQuestionAnsweringPreprocessor'], 'translation_evaluation_preprocessor': - ['TranslationEvaluationPreprocessor'], + ['TranslationEvaluationTransformersPreprocessor'], 'canmt_translation': [ 'CanmtTranslationPreprocessor', ], diff --git a/modelscope/preprocessors/nlp/token_classification_preprocessor.py b/modelscope/preprocessors/nlp/token_classification_preprocessor.py index 66e57cc8..4b4fee1f 100644 --- a/modelscope/preprocessors/nlp/token_classification_preprocessor.py +++ b/modelscope/preprocessors/nlp/token_classification_preprocessor.py @@ -201,7 +201,7 @@ class TokenClassificationTransformersPreprocessor( def __init__(self, model_dir: str = None, - first_sequence: str = None, + first_sequence: str = 'text', label: str = 'label', label2id: Dict = None, label_all_tokens: bool = False, diff --git a/modelscope/preprocessors/nlp/translation_evaluation_preprocessor.py b/modelscope/preprocessors/nlp/translation_evaluation_preprocessor.py index 0bf62cdc..b0b2efd1 100644 --- a/modelscope/preprocessors/nlp/translation_evaluation_preprocessor.py +++ b/modelscope/preprocessors/nlp/translation_evaluation_preprocessor.py @@ -2,10 +2,13 @@ from typing import Any, Dict, List, Union +import torch from transformers import AutoTokenizer from modelscope.metainfo import Preprocessors -from modelscope.models.nlp.unite.configuration_unite import EvaluationMode +from modelscope.models.nlp.unite.configuration import InputFormat +from modelscope.models.nlp.unite.translation_evaluation import \ + combine_input_sentences from modelscope.preprocessors import Preprocessor from modelscope.preprocessors.builder import PREPROCESSORS from modelscope.utils.constant import Fields, ModeKeys @@ -14,43 +17,98 @@ from .transformers_tokenizer import NLPTokenizer @PREPROCESSORS.register_module( Fields.nlp, module_name=Preprocessors.translation_evaluation) -class TranslationEvaluationPreprocessor(Preprocessor): +class TranslationEvaluationTransformersPreprocessor(Preprocessor): r"""The tokenizer preprocessor used for translation evaluation. """ def __init__(self, model_dir: str, - eval_mode: EvaluationMode, + max_len: int, + pad_token_id: int, + eos_token_id: int, + input_format: InputFormat = InputFormat.SRC_REF, mode=ModeKeys.INFERENCE, *args, **kwargs): - r"""preprocess the data via the vocab file from the `model_dir` path + r"""Preprocessing the data for the model in `model_dir` path Args: model_dir: A Model instance. - eval_mode: Evaluation mode, choosing one from `"EvaluationMode.SRC_REF"`, - `"EvaluationMode.SRC"`, `"EvaluationMode.REF"`. Aside from hypothesis, the + max_len: Maximum length for input sequence. + pad_token_id: Token id for padding token. + eos_token_id: Token id for the ending-of-sequence (eos) token. + input_format: Input format, choosing one from `"InputFormat.SRC_REF"`, + `"InputFormat.SRC"`, `"InputFormat.REF"`. Aside from hypothesis, the source/reference/source+reference can be presented during evaluation. + mode: The mode for this preprocessor. """ super().__init__(mode=mode) self.tokenizer = NLPTokenizer( model_dir=model_dir, use_fast=False, tokenize_kwargs=kwargs) - self.eval_mode = eval_mode + self.input_format = input_format + + self.max_len = max_len + self.pad_token_id = pad_token_id + self.eos_token_id = eos_token_id return - def __call__(self, input_dict: Dict[str, Any]) -> List[List[str]]: - if self.eval_mode == EvaluationMode.SRC and 'src' not in input_dict.keys( + def change_input_format(self, input_format: InputFormat): + r"""Change the input format for the preprocessor. + + Args: + input_format: Any choice in InputFormat.SRC_REF, InputFormat.SRC and InputFormat.REF. + + """ + self.input_format = input_format + return + + def collect_input_ids(self, input_dict: Dict[str, Any]): + r"""Collect the input ids for the given examples. + + Args: + input_dict: A dict containing hyp/src/ref sentences. + + Returns: + The token ids for each example. + + """ + output_sents = [ + self.tokenizer( + input_dict['hyp'], return_tensors='pt', + padding=True)['input_ids'] + ] + if self.input_format == InputFormat.SRC or self.input_format == InputFormat.SRC_REF: + output_sents += [ + self.tokenizer( + input_dict['src'], return_tensors='pt', + padding=True)['input_ids'] + ] + if self.input_format == InputFormat.REF or self.input_format == InputFormat.SRC_REF: + output_sents += [ + self.tokenizer( + input_dict['ref'], return_tensors='pt', + padding=True)['input_ids'] + ] + + input_ids = combine_input_sentences(output_sents, self.max_len, + self.pad_token_id, + self.eos_token_id) + + return input_ids + + def __call__(self, input_dict: Dict[str, Any]) -> Dict[str, Any]: + if self.input_format == InputFormat.SRC and 'src' not in input_dict.keys( ): raise ValueError( 'Source sentences are required for source-only evaluation mode.' ) - if self.eval_mode == EvaluationMode.REF and 'ref' not in input_dict.keys( + if self.input_format == InputFormat.REF and 'ref' not in input_dict.keys( ): raise ValueError( 'Reference sentences are required for reference-only evaluation mode.' ) - if self.eval_mode == EvaluationMode.SRC_REF and ( + if self.input_format == InputFormat.SRC_REF and ( 'src' not in input_dict.keys() or 'ref' not in input_dict.keys()): raise ValueError( @@ -59,29 +117,58 @@ class TranslationEvaluationPreprocessor(Preprocessor): if type(input_dict['hyp']) == str: input_dict['hyp'] = [input_dict['hyp']] - if (self.eval_mode == EvaluationMode.SRC or self.eval_mode - == EvaluationMode.SRC_REF) and type(input_dict['src']) == str: + if (self.input_format == InputFormat.SRC or self.input_format + == InputFormat.SRC_REF) and type(input_dict['src']) == str: input_dict['src'] = [input_dict['src']] - if (self.eval_mode == EvaluationMode.REF or self.eval_mode - == EvaluationMode.SRC_REF) and type(input_dict['ref']) == str: + if (self.input_format == InputFormat.REF or self.input_format + == InputFormat.SRC_REF) and type(input_dict['ref']) == str: input_dict['ref'] = [input_dict['ref']] - output_sents = [ - self.tokenizer( - input_dict['hyp'], return_tensors='pt', - padding=True)['input_ids'] - ] - if self.eval_mode == EvaluationMode.SRC or self.eval_mode == EvaluationMode.SRC_REF: - output_sents += [ - self.tokenizer( - input_dict['src'], return_tensors='pt', - padding=True)['input_ids'] - ] - if self.eval_mode == EvaluationMode.REF or self.eval_mode == EvaluationMode.SRC_REF: - output_sents += [ - self.tokenizer( - input_dict['ref'], return_tensors='pt', - padding=True)['input_ids'] - ] + if (self.input_format == InputFormat.SRC + or self.input_format == InputFormat.SRC_REF) and (len( + input_dict['hyp']) != len(input_dict['src'])): + raise ValueError( + 'The number of given hyp sentences (%d) is not equal to that of src (%d).' + % (len(input_dict['hyp']), len(input_dict['src']))) + if (self.input_format == InputFormat.REF + or self.input_format == InputFormat.SRC_REF) and (len( + input_dict['hyp']) != len(input_dict['ref'])): + raise ValueError( + 'The number of given hyp sentences (%d) is not equal to that of ref (%d).' + % (len(input_dict['hyp']), len(input_dict['ref']))) - return output_sents + output_dict = {'input_ids': self.collect_input_ids(input_dict)} + + if self.mode == ModeKeys.TRAIN or self.mode == ModeKeys.EVAL: + if 'score' not in input_dict.keys(): + raise KeyError( + 'During training or evaluating, \'score\' should be provided.' + ) + if (isinstance(input_dict['score'], List) and len(input_dict['score']) != len(output_dict['input_ids'])) \ + or (isinstance(input_dict['score'], float) and len(output['input_ids']) != 1): + raise ValueError( + 'The number of score is not equal to that of the given examples. ' + 'Required %d, given %d.' % + (len(output['input_ids']), len(input_dict['score']))) + + output_dict['score'] = [input_dict['score']] if isinstance( + input_dict['score'], float) else input_dict['score'] + + if self.mode == ModeKeys.EVAL: + if 'lp' not in input_dict.keys(): + raise ValueError( + 'Language pair should be provided for evaluation.') + + if 'segment_id' not in input_dict.keys(): + raise ValueError( + 'Segment id should be provided for evaluation.') + + if 'raw_score' not in input_dict.keys(): + raise ValueError( + 'Raw scores should be provided for evaluation.') + + output_dict['lp'] = input_dict['lp'] + output_dict['segment_id'] = input_dict['segment_id'] + output_dict['raw_score'] = input_dict['raw_score'] + + return output_dict diff --git a/modelscope/trainers/__init__.py b/modelscope/trainers/__init__.py index 90f73a7f..0d20fe00 100644 --- a/modelscope/trainers/__init__.py +++ b/modelscope/trainers/__init__.py @@ -15,6 +15,8 @@ if TYPE_CHECKING: from .nlp import SequenceClassificationTrainer, TextRankingTrainer, SiameseUIETrainer from .nlp_trainer import NlpEpochBasedTrainer, VecoTrainer from .trainer import EpochBasedTrainer + from .training_args import TrainingArgs, build_dataset_from_file + from .hooks import Hook, Priority else: _import_structure = { @@ -32,7 +34,9 @@ else: 'SiameseUIETrainer' ], 'nlp_trainer': ['NlpEpochBasedTrainer', 'VecoTrainer'], - 'trainer': ['EpochBasedTrainer'] + 'trainer': ['EpochBasedTrainer'], + 'training_args': ['TrainingArgs', 'build_dataset_from_file'], + 'hooks': ['Hook'] } import sys diff --git a/modelscope/trainers/cli_argument_parser.py b/modelscope/trainers/cli_argument_parser.py new file mode 100644 index 00000000..f183b9ea --- /dev/null +++ b/modelscope/trainers/cli_argument_parser.py @@ -0,0 +1,151 @@ +from argparse import Action, ArgumentDefaultsHelpFormatter, ArgumentParser +from dataclasses import fields +from typing import List + + +class CliArgumentParser(ArgumentParser): + """ Argument Parser to define and parse command-line args for training. + + Args: + training_args: dict or list of dict which defines different + paramters for training. + """ + + def __init__(self, training_args=None, **kwargs): + if 'formatter_class' not in kwargs: + kwargs['formatter_class'] = ArgumentDefaultsHelpFormatter + super().__init__(**kwargs) + self.training_args = training_args + self.define_args() + + def get_manual_args(self, args): + return [arg[2:] for arg in args if arg.startswith('--')] + + def _parse_known_args(self, args: List = None, namespace=None): + self.model_id = namespace.model if namespace is not None else None + if '--model' in args: + self.model_id = args[args.index('--model') + 1] + self.manual_args = self.get_manual_args(args) + return super()._parse_known_args(args, namespace) + + def print_help(self, file=None): + return super().print_help(file) + + def define_args(self): + if self.training_args is not None: + for f in fields(self.training_args): + arg_name = f.name + arg_attr = getattr(self.training_args, f.name) + name = f'--{arg_name}' + kwargs = dict(type=f.type, help=f.metadata['help']) + kwargs['default'] = arg_attr + + if 'choices' in f.metadata: + kwargs['choices'] = f.metadata['choices'] + + kwargs['action'] = SingleAction + self.add_argument(name, **kwargs) + + +class DictAction(Action): + """ + argparse action to split an argument into KEY=VALUE form + on the first = and append to a dictionary. List options can + be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit + brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build + list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]' + """ + + @staticmethod + def parse_int_float_bool_str(val): + try: + return int(val) + except ValueError: + pass + try: + return float(val) + except ValueError: + pass + if val.lower() in ['true', 'false']: + return val.lower() == 'true' + if val == 'None': + return None + return val + + @staticmethod + def parse_iterable(val): + """Parse iterable values in the string. + All elements inside '()' or '[]' are treated as iterable values. + Args: + val (str): Value string. + Returns: + list | tuple: The expanded list or tuple from the string. + Examples: + >>> DictAction._parse_iterable('1,2,3') + [1, 2, 3] + >>> DictAction._parse_iterable('[a, b, c]') + ['a', 'b', 'c'] + >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]') + [(1, 2, 3), ['a', 'b'], 'c'] + """ + + def find_next_comma(string): + """Find the position of next comma in the string. + If no ',' is found in the string, return the string length. All + chars inside '()' and '[]' are treated as one element and thus ',' + inside these brackets are ignored. + """ + assert (string.count('(') == string.count(')')) and ( + string.count('[') + == string.count(']')), f'Imbalanced brackets exist in {string}' + end = len(string) + for idx, char in enumerate(string): + pre = string[:idx] + # The string before this ',' is balanced + if ((char == ',') and (pre.count('(') == pre.count(')')) + and (pre.count('[') == pre.count(']'))): + end = idx + break + return end + + # Strip ' and " characters and replace whitespace. + val = val.strip('\'\"').replace(' ', '') + is_tuple = False + if val.startswith('(') and val.endswith(')'): + is_tuple = True + val = val[1:-1] + elif val.startswith('[') and val.endswith(']'): + val = val[1:-1] + elif ',' not in val: + # val is a single value + return DictAction.parse_int_float_bool_str(val) + + values = [] + while len(val) > 0: + comma_idx = find_next_comma(val) + element = DictAction.parse_iterable(val[:comma_idx]) + values.append(element) + val = val[comma_idx + 1:] + if is_tuple: + values = tuple(values) + return values + + def __call__(self, parser, namespace, values, option_string): + options = {} + for kv in values: + key, val = kv.split('=', maxsplit=1) + options[key] = self.parse_iterable(val) + setattr(namespace, self.dest, options) + + +class SingleAction(DictAction): + """ Argparse action to convert value to tuple or list or nested structure of + list and tuple, i.e 'V1,V2,V3', or with explicit brackets, i.e. '[V1,V2,V3]'. + It also support nested brackets to build list/tuple values. e.g. '[(V1,V2),(V3,V4)]' + """ + + def __call__(self, parser, namespace, value, option_string): + if isinstance(value, str): + setattr(namespace, self.dest, self.parse_iterable(value)) + else: + setattr(namespace, self.dest, value) diff --git a/modelscope/trainers/default_config.py b/modelscope/trainers/default_config.py index 51a0df40..bb272695 100644 --- a/modelscope/trainers/default_config.py +++ b/modelscope/trainers/default_config.py @@ -4,38 +4,6 @@ from typing import Dict, List, Optional, Tuple from modelscope.utils.config import Config -DEFAULT_CONFIG = Config({ - 'framework': 'pytorch', - 'train': { - 'work_dir': '/tmp', - 'max_epochs': 10, - 'dataloader': { - 'batch_size_per_gpu': 16, - 'workers_per_gpu': 0 - }, - 'optimizer': { - 'type': 'SGD', - 'lr': 1e-3 - }, - 'lr_scheduler': { - 'type': 'StepLR', - 'step_size': 2 - }, - 'checkpoint': { - 'period': { - 'interval': 1 - } - } - }, - 'evaluation': { - 'dataloader': { - 'batch_size_per_gpu': 16, - 'workers_per_gpu': 0, - 'shuffle': False - }, - } -}) - DEFAULT_HOOKS_CONFIG = { 'train.hooks': [{ 'type': 'CheckpointHook', @@ -68,7 +36,7 @@ def merge_cfg(cfg: Config): def merge_hooks(cfg: Config) -> List[Dict]: - hooks = cfg.train.hooks.copy() + hooks = getattr(cfg.train, 'hooks', []).copy() for hook_type, key_chain in _HOOK_KEY_CHAIN_MAP.items(): hook = _key_chain_to_hook(cfg, key_chain, hook_type) if hook is not None: @@ -107,7 +75,8 @@ def _check_basic_hook(cfg: Config, key_chain: str, hook_type: str) -> bool: if cfg.safe_get(key_chain) is None: return False hooks = list( - filter(lambda hook: hook['type'] == hook_type, cfg.train.hooks)) + filter(lambda hook: hook['type'] == hook_type, + getattr(cfg.train, 'hooks', []))) assert len(hooks) == 0, f'The key_chain {key_chain} and the traditional hook ' \ f'cannot exist at the same time, ' \ f'please delete {hook_type} in the configuration file.' diff --git a/modelscope/trainers/easycv/__init__.py b/modelscope/trainers/easycv/__init__.py deleted file mode 100644 index b1b8fc15..00000000 --- a/modelscope/trainers/easycv/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from typing import TYPE_CHECKING - -from modelscope.utils.import_utils import LazyImportModule - -if TYPE_CHECKING: - from .utils import AddLrLogHook, EasyCVMetric -else: - _import_structure = {'utils': ['AddLrLogHook', 'EasyCVMetric']} - - import sys - - sys.modules[__name__] = LazyImportModule( - __name__, - globals()['__file__'], - _import_structure, - module_spec=__spec__, - extra_objects={}, - ) diff --git a/modelscope/trainers/easycv/trainer.py b/modelscope/trainers/easycv/trainer.py deleted file mode 100644 index 58d6a440..00000000 --- a/modelscope/trainers/easycv/trainer.py +++ /dev/null @@ -1,183 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from copy import deepcopy -from functools import partial -from typing import Callable, Optional, Tuple, Union - -import torch -from easycv.utils.checkpoint import load_checkpoint as ev_load_checkpoint -from torch import nn -from torch.utils.data import Dataset - -from modelscope.metainfo import Trainers -from modelscope.models.base import TorchModel -from modelscope.msdatasets import MsDataset -from modelscope.preprocessors import Preprocessor -from modelscope.trainers import EpochBasedTrainer -from modelscope.trainers.base import TRAINERS -from modelscope.trainers.easycv.utils import register_util -from modelscope.trainers.hooks import HOOKS -from modelscope.trainers.parallel.builder import build_parallel -from modelscope.trainers.parallel.utils import is_parallel -from modelscope.utils.config import Config -from modelscope.utils.constant import DEFAULT_MODEL_REVISION -from modelscope.utils.import_utils import LazyImportModule -from modelscope.utils.registry import default_group - - -@TRAINERS.register_module(module_name=Trainers.easycv) -class EasyCVEpochBasedTrainer(EpochBasedTrainer): - """Epoch based Trainer for EasyCV. - - Args: - cfg_file(str): The config file of EasyCV. - model (:obj:`torch.nn.Module` or :obj:`TorchModel` or `str`): The model to be run, or a valid model dir - or a model id. If model is None, build_model method will be called. - train_dataset (`MsDataset` or `torch.utils.data.Dataset`, *optional*): - The dataset to use for training. - Note that if it's a `torch.utils.data.IterableDataset` with some randomization and you are training in a - distributed fashion, your iterable dataset should either use a internal attribute `generator` that is a - `torch.Generator` for the randomization that must be identical on all processes (and the Trainer will - manually set the seed of this `generator` at each epoch) or have a `set_epoch()` method that internally - sets the seed of the RNGs used. - eval_dataset (`MsDataset` or `torch.utils.data.Dataset`, *optional*): The dataset to use for evaluation. - preprocessor (:obj:`Preprocessor`, *optional*): The optional preprocessor. - NOTE: If the preprocessor has been called before the dataset fed into this trainer by user's custom code, - this parameter should be None, meanwhile remove the 'preprocessor' key from the cfg_file. - Else the preprocessor will be instantiated from the cfg_file or assigned from this parameter and - this preprocessing action will be executed every time the dataset's __getitem__ is called. - optimizers (`Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler._LRScheduler]`, *optional*): A tuple - containing the optimizer and the scheduler to use. - max_epochs: (int, optional): Total training epochs. - """ - - def __init__( - self, - cfg_file: Optional[str] = None, - model: Optional[Union[TorchModel, nn.Module, str]] = None, - arg_parse_fn: Optional[Callable] = None, - train_dataset: Optional[Union[MsDataset, Dataset]] = None, - eval_dataset: Optional[Union[MsDataset, Dataset]] = None, - preprocessor: Optional[Preprocessor] = None, - optimizers: Tuple[torch.optim.Optimizer, - torch.optim.lr_scheduler._LRScheduler] = (None, - None), - model_revision: Optional[str] = DEFAULT_MODEL_REVISION, - **kwargs): - - register_util.register_parallel() - register_util.register_part_mmcv_hooks_to_ms() - - super(EasyCVEpochBasedTrainer, self).__init__( - model=model, - cfg_file=cfg_file, - arg_parse_fn=arg_parse_fn, - preprocessor=preprocessor, - optimizers=optimizers, - model_revision=model_revision, - train_dataset=train_dataset, - eval_dataset=eval_dataset, - **kwargs) - - # reset data_collator - from mmcv.parallel import collate - - self.train_data_collator = partial( - collate, - samples_per_gpu=self.cfg.train.dataloader.batch_size_per_gpu) - self.eval_data_collator = partial( - collate, - samples_per_gpu=self.cfg.evaluation.dataloader.batch_size_per_gpu) - - # load pretrained model - load_from = self.cfg.get('load_from', None) - if load_from is not None: - ev_load_checkpoint( - self.model, - filename=load_from, - map_location=self.device, - strict=False, - ) - - # reset parallel - if not self._dist: - assert not is_parallel( - self.model - ), 'Not support model wrapped by custom parallel if not in distributed mode!' - dp_cfg = dict( - type='MMDataParallel', - module=self.model, - device_ids=[torch.cuda.current_device()]) - self.model = build_parallel(dp_cfg) - - def rebuild_config(self, cfg: Config): - cfg = super().rebuild_config(cfg) - # Register easycv hooks dynamicly. If the hook already exists in modelscope, - # the hook in modelscope will be used, otherwise register easycv hook into ms. - # We must manually trigger lazy import to detect whether the hook is in modelscope. - # TODO: use ast index to detect whether the hook is in modelscope - for h_i in cfg.train.get('hooks', []): - sig = ('HOOKS', default_group, h_i['type']) - LazyImportModule.import_module(sig) - if h_i['type'] not in HOOKS._modules[default_group]: - if h_i['type'] in [ - 'TensorboardLoggerHookV2', 'WandbLoggerHookV2' - ]: - raise ValueError( - 'Not support hook %s now, we will support it in the future!' - % h_i['type']) - register_util.register_hook_to_ms(h_i['type']) - return cfg - - def create_optimizer_and_scheduler(self): - """ Create optimizer and lr scheduler - """ - optimizer, lr_scheduler = self.optimizers - if optimizer is None: - optimizer_cfg = self.cfg.train.get('optimizer', None) - else: - optimizer_cfg = None - - optim_options = {} - if optimizer_cfg is not None: - optim_options = optimizer_cfg.pop('options', {}) - from easycv.apis.train import build_optimizer - optimizer = build_optimizer(self.model, optimizer_cfg) - - if lr_scheduler is None: - lr_scheduler_cfg = self.cfg.train.get('lr_scheduler', None) - else: - lr_scheduler_cfg = None - - lr_options = {} - # Adapt to mmcv lr scheduler hook. - # Please refer to: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py - if lr_scheduler_cfg is not None: - assert optimizer is not None - lr_options = lr_scheduler_cfg.pop('options', {}) - assert 'policy' in lr_scheduler_cfg - policy_type = lr_scheduler_cfg.pop('policy') - if policy_type == policy_type.lower(): - policy_type = policy_type.title() - hook_type = policy_type + 'LrUpdaterHook' - lr_scheduler_cfg['type'] = hook_type - - self.cfg.train.lr_scheduler_hook = lr_scheduler_cfg - - self.optimizer = optimizer - self.lr_scheduler = lr_scheduler - - return self.optimizer, self.lr_scheduler, optim_options, lr_options - - def to_parallel(self, model) -> Union[nn.Module, TorchModel]: - if self.cfg.get('parallel', None) is not None: - dp_cfg = deepcopy(self.cfg['parallel']) - dp_cfg.update( - dict(module=model, device_ids=[torch.cuda.current_device()])) - return build_parallel(dp_cfg) - - dp_cfg = dict( - type='MMDistributedDataParallel', - module=model, - device_ids=[torch.cuda.current_device()]) - - return build_parallel(dp_cfg) diff --git a/modelscope/trainers/easycv/utils/__init__.py b/modelscope/trainers/easycv/utils/__init__.py deleted file mode 100644 index 23cfa36a..00000000 --- a/modelscope/trainers/easycv/utils/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from typing import TYPE_CHECKING - -from modelscope.utils.import_utils import LazyImportModule - -if TYPE_CHECKING: - from .hooks import AddLrLogHook - from .metric import EasyCVMetric - -else: - _import_structure = {'hooks': ['AddLrLogHook'], 'metric': ['EasyCVMetric']} - - import sys - - sys.modules[__name__] = LazyImportModule( - __name__, - globals()['__file__'], - _import_structure, - module_spec=__spec__, - extra_objects={}, - ) diff --git a/modelscope/trainers/easycv/utils/hooks.py b/modelscope/trainers/easycv/utils/hooks.py deleted file mode 100644 index 1f1a5c95..00000000 --- a/modelscope/trainers/easycv/utils/hooks.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from modelscope.trainers.hooks import HOOKS, Priority -from modelscope.trainers.hooks.lr_scheduler_hook import LrSchedulerHook -from modelscope.utils.constant import LogKeys - - -@HOOKS.register_module(module_name='AddLrLogHook') -class AddLrLogHook(LrSchedulerHook): - """For EasyCV to adapt to ModelScope, the lr log of EasyCV is added in the trainer, - but the trainer of ModelScope does not and it is added in the lr scheduler hook. - But The lr scheduler hook used by EasyCV is the hook of mmcv, and there is no lr log. - It will be deleted in the future. - """ - PRIORITY = Priority.NORMAL - - def __init__(self): - pass - - def before_run(self, trainer): - pass - - def after_train_iter(self, trainer): - trainer.log_buffer.output[LogKeys.LR] = self._get_log_lr(trainer) - - def before_train_epoch(self, trainer): - trainer.log_buffer.output[LogKeys.LR] = self._get_log_lr(trainer) - - def after_train_epoch(self, trainer): - pass diff --git a/modelscope/trainers/easycv/utils/metric.py b/modelscope/trainers/easycv/utils/metric.py deleted file mode 100644 index d952ec3e..00000000 --- a/modelscope/trainers/easycv/utils/metric.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import itertools -from typing import Dict - -import numpy as np -import torch - -from modelscope.metrics.base import Metric -from modelscope.metrics.builder import METRICS - - -@METRICS.register_module(module_name='EasyCVMetric') -class EasyCVMetric(Metric): - """Adapt to ModelScope Metric for EasyCV evaluator. - """ - - def __init__(self, trainer=None, evaluators=None, *args, **kwargs): - from easycv.core.evaluation.builder import build_evaluator - - self.trainer = trainer - self.evaluators = build_evaluator(evaluators) - self.preds = [] - self.grountruths = [] - - def add(self, outputs: Dict, inputs: Dict): - self.preds.append(outputs) - del inputs - - def evaluate(self): - results = {} - for _, batch in enumerate(self.preds): - for k, v in batch.items(): - if k not in results: - results[k] = [] - results[k].append(v) - - for k, v in results.items(): - if len(v) == 0: - raise ValueError(f'empty result for {k}') - - if isinstance(v[0], torch.Tensor): - results[k] = torch.cat(v, 0) - elif isinstance(v[0], (list, np.ndarray)): - results[k] = list(itertools.chain.from_iterable(v)) - else: - raise ValueError( - f'value of batch prediction dict should only be tensor or list, {k} type is {v[0]}' - ) - - metric_values = self.trainer.eval_dataset.evaluate( - results, self.evaluators) - return metric_values - - def merge(self, other: 'EasyCVMetric'): - self.preds.extend(other.preds) - - def __getstate__(self): - return self.preds - - def __setstate__(self, state): - self.__init__() - self.preds = state diff --git a/modelscope/trainers/easycv/utils/register_util.py b/modelscope/trainers/easycv/utils/register_util.py deleted file mode 100644 index 04bf719b..00000000 --- a/modelscope/trainers/easycv/utils/register_util.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import inspect -import logging - -from modelscope.trainers.hooks import HOOKS -from modelscope.trainers.parallel.builder import PARALLEL -from modelscope.utils.registry import default_group - - -class _RegisterManager: - - def __init__(self): - self.registries = {} - - def add(self, module, name, group_key=default_group): - if module.name not in self.registries: - self.registries[module.name] = {} - if group_key not in self.registries[module.name]: - self.registries[module.name][group_key] = [] - - self.registries[module.name][group_key].append(name) - - def exists(self, module, name, group_key=default_group): - if self.registries.get(module.name, None) is None: - return False - if self.registries[module.name].get(group_key, None) is None: - return False - if name in self.registries[module.name][group_key]: - return True - - return False - - -_dynamic_register = _RegisterManager() - - -def register_parallel(): - from mmcv.parallel import MMDistributedDataParallel, MMDataParallel - - mmddp = 'MMDistributedDataParallel' - mmdp = 'MMDataParallel' - - if not _dynamic_register.exists(PARALLEL, mmddp): - _dynamic_register.add(PARALLEL, mmddp) - PARALLEL.register_module( - module_name=mmddp, module_cls=MMDistributedDataParallel) - if not _dynamic_register.exists(PARALLEL, mmdp): - _dynamic_register.add(PARALLEL, mmdp) - PARALLEL.register_module(module_name=mmdp, module_cls=MMDataParallel) - - -def register_hook_to_ms(hook_name, logger=None): - """Register EasyCV hook to ModelScope.""" - from easycv.hooks import HOOKS as _EV_HOOKS - - if hook_name not in _EV_HOOKS._module_dict: - raise ValueError( - f'Not found hook "{hook_name}" in EasyCV hook registries!') - - if _dynamic_register.exists(HOOKS, hook_name): - return - _dynamic_register.add(HOOKS, hook_name) - - obj = _EV_HOOKS._module_dict[hook_name] - HOOKS.register_module(module_name=hook_name, module_cls=obj) - - log_str = f'Register hook "{hook_name}" to modelscope hooks.' - logger.info(log_str) if logger is not None else logging.info(log_str) - - -def register_part_mmcv_hooks_to_ms(): - """Register required mmcv hooks to ModelScope. - Currently we only registered all lr scheduler hooks in EasyCV and mmcv. - Please refer to: - EasyCV: https://github.com/alibaba/EasyCV/blob/master/easycv/hooks/lr_update_hook.py - mmcv: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py - """ - from mmcv.runner.hooks import lr_updater - from mmcv.runner.hooks import HOOKS as _MMCV_HOOKS - from easycv.hooks import StepFixCosineAnnealingLrUpdaterHook, YOLOXLrUpdaterHook - - mmcv_hooks_in_easycv = [('StepFixCosineAnnealingLrUpdaterHook', - StepFixCosineAnnealingLrUpdaterHook), - ('YOLOXLrUpdaterHook', YOLOXLrUpdaterHook)] - - members = inspect.getmembers(lr_updater) - members.extend(mmcv_hooks_in_easycv) - - for name, obj in members: - if name in _MMCV_HOOKS._module_dict: - if _dynamic_register.exists(HOOKS, name): - continue - _dynamic_register.add(HOOKS, name) - HOOKS.register_module( - module_name=name, - module_cls=obj, - ) diff --git a/modelscope/trainers/hooks/__init__.py b/modelscope/trainers/hooks/__init__.py index 51677f25..072105be 100644 --- a/modelscope/trainers/hooks/__init__.py +++ b/modelscope/trainers/hooks/__init__.py @@ -5,7 +5,6 @@ from modelscope.utils.import_utils import LazyImportModule if TYPE_CHECKING: from .builder import HOOKS, build_hook - from .checkpoint_hook import BestCkptSaverHook, CheckpointHook, LoadCheckpointHook from .early_stop_hook import EarlyStopHook from .compression import SparsityHook from .evaluation_hook import EvaluationHook @@ -16,6 +15,10 @@ if TYPE_CHECKING: from .optimizer import (ApexAMPOptimizerHook, NoneOptimizerHook, OptimizerHook, TorchAMPOptimizerHook) from .priority import Priority, get_priority + from .checkpoint import CheckpointHook, LoadCheckpointHook, BestCkptSaverHook + from .distributed.ddp_hook import DDPHook + from .distributed.deepspeed_hook import DeepspeedHook + from .distributed.megatron_hook import MegatronHook else: _import_structure = { @@ -32,7 +35,12 @@ else: 'ApexAMPOptimizerHook', 'NoneOptimizerHook', 'OptimizerHook', 'TorchAMPOptimizerHook' ], - 'priority': ['Priority', 'get'] + 'checkpoint': + ['CheckpointHook', 'LoadCheckpointHook', 'BestCkptSaverHook'], + 'distributed.ddp_hook': ['DDPHook'], + 'distributed.deepspeed_hook': ['DeepspeedHook'], + 'distributed.megatron_hook': ['MegatronHook'], + 'priority': ['Priority', 'get_priority'] } import sys diff --git a/modelscope/trainers/hooks/checkpoint/__init__.py b/modelscope/trainers/hooks/checkpoint/__init__.py new file mode 100644 index 00000000..e2abb272 --- /dev/null +++ b/modelscope/trainers/hooks/checkpoint/__init__.py @@ -0,0 +1,2 @@ +from .checkpoint_hook import BestCkptSaverHook, CheckpointHook +from .load_checkpoint_hook import LoadCheckpointHook diff --git a/modelscope/trainers/hooks/checkpoint/checkpoint_hook.py b/modelscope/trainers/hooks/checkpoint/checkpoint_hook.py new file mode 100644 index 00000000..4b14a13f --- /dev/null +++ b/modelscope/trainers/hooks/checkpoint/checkpoint_hook.py @@ -0,0 +1,435 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os +import random +import time +from typing import Optional + +import numpy as np +import torch + +from modelscope.hub.check_model import check_model_is_id +from modelscope.hub.push_to_hub import push_to_hub_async +from modelscope.metainfo import Hooks +from modelscope.trainers.hooks.builder import HOOKS +from modelscope.trainers.hooks.checkpoint.checkpoint_processor import \ + CheckpointProcessor +from modelscope.trainers.hooks.hook import Hook +from modelscope.trainers.hooks.priority import Priority +from modelscope.utils.constant import (DEFAULT_REPOSITORY_REVISION, LogKeys, + ModelFile) +from modelscope.utils.logger import get_logger +from modelscope.utils.torch_utils import is_master + + +class CheckpointStrategy: + by_epoch = 'by_epoch' + by_step = 'by_step' + no = 'no' + + +@HOOKS.register_module(module_name=Hooks.CheckpointHook) +class CheckpointHook(Hook): + """Save checkpoints periodically. + + Args: + save_strategy(str): The strategy to save checkpoint, can be `by_epoch`, `by_step` or `no` + interval (int): The frequency to save model. If `by_epoch=True`, + it means the number of epochs, else means the number of iterations + save_dir (str): The directory to save checkpoints. If is None, use `trainer.work_dir` + output_dir (str): The absolute path to save the output files for inference. If it's not specified, + the default dir is `{sub_dir}/output`. + save_last (bool): Whether to save the last checkpoint. Default: True. + max_checkpoint_num (int): The max number of checkpoint files, default None which means never delete anything. + If the number exceeding the limit, earlier checkpoints will be deleted first. + push_to_hub (bool): Whether push the checkpoint to modelhub. + hub_repo_id (str): The hub repo id. + hub_token (str): The token of the modelhub. You can also set the environment variable `MODELSCOPE_API_TOKEN`. + private_hub (bool): Whether push to a private hub, default True. + hub_revision (str): Which branch to push the model to, default is `master` + kwargs: + by_epoch (bool): Same with `save_strategy`, but has a higher priority, legacy argument. + output_sub_dir (str): The folder under the `save_dir` to save the output checkpoint for inference. + This argument is kept to fit the existing configs. + """ + + PRIORITY = Priority.LOW + + EVAL_RESULT_FILE = 'eval_result.txt' + + def __init__(self, + save_strategy: Optional[str] = CheckpointStrategy.by_epoch, + interval: Optional[int] = 0, + save_dir: Optional[str] = None, + output_dir: Optional[str] = None, + save_last: Optional[bool] = True, + max_checkpoint_num: Optional[int] = None, + push_to_hub: Optional[bool] = False, + hub_repo_id: Optional[str] = None, + hub_token: Optional[str] = None, + private_hub: Optional[bool] = True, + hub_revision: Optional[str] = DEFAULT_REPOSITORY_REVISION, + **kwargs): + self.interval = interval + self.save_dir = save_dir + if 'by_epoch' in kwargs: + self.save_strategy = CheckpointStrategy.by_epoch if kwargs[ + 'by_epoch'] else CheckpointStrategy.by_step + else: + self.save_strategy = save_strategy + if 'output_sub_dir' in kwargs: + self.output_sub_dir = kwargs['output_sub_dir'] + self.output_dir = None + else: + self.output_sub_dir = None + self.output_dir = output_dir + self.save_last = save_last + self.rng_state = None + self.push_to_hub = push_to_hub + self.hub_repo_id = hub_repo_id + self.hub_token = hub_token + self.private_hub = private_hub + self.hub_revision = hub_revision + self.tag = -1 + self.is_model_id = None + self.push_to_hub_future = None + self.max_checkpoint_num = None + if max_checkpoint_num is not None: + self.max_checkpoint_num = max(int(max_checkpoint_num), 1) + self.history_checkpoints = [] + self.processor = CheckpointProcessor() + + def set_processor(self, processor): + """ + The checkpoint hook accepts a processor to finish the actual saving/deleting action. + """ + self.processor = processor + + def before_run(self, trainer): + self.tag = -1 + if not self.save_dir: + self.save_dir = trainer.work_dir + if not self.output_dir: + if self.output_sub_dir: + self.output_dir = os.path.join(self.save_dir, + self.output_sub_dir) + else: + self.output_dir = os.path.join(self.save_dir, + ModelFile.TRAIN_OUTPUT_DIR) + + if not os.path.exists(self.save_dir): + os.makedirs(self.save_dir, exist_ok=True) + + if not hasattr(trainer, 'logger'): + self.logger = get_logger() + else: + self.logger = trainer.logger + + if is_master(): + output_dir = self.output_dir + # only global master prepares the output folder + self.processor.prepare_output(trainer, output_dir) + self.logger.info(f'Checkpoints will be saved to {self.save_dir}') + + def generate_prefix(self, trainer, save_strategy): + if save_strategy == CheckpointStrategy.by_epoch: + return f'{LogKeys.EPOCH}_{trainer.epoch + 1}' + else: + return f'{LogKeys.ITER}_{trainer.iter + 1}' + + def _do_save(self, trainer, save_strategy): + # prefix like 'epoch-1' or 'iter-1' + prefix = self.generate_prefix(trainer, save_strategy) + if self.processor.should_save_on_rank(trainer): + if is_master(): + if save_strategy == CheckpointStrategy.by_epoch: + self.logger.info( + f'Saving checkpoint at {trainer.epoch + 1} epoch') + else: + self.logger.info( + f'Saving checkpoint at {trainer.iter + 1} iter') + self._save_checkpoint(trainer, prefix) + if is_master() and self.push_to_hub: + if self.push_to_hub_future is not None and not self.push_to_hub_future.done( + ): + self.logger.error( + f'Another uploading is running, ' + f'this uploading with message {prefix} will be canceled.') + return + self.push_to_hub_future = self._push_to_hub(trainer, prefix) + + def after_train_epoch(self, trainer): + if self.save_strategy != CheckpointStrategy.by_epoch: + return + + if self._should_save(trainer): + self._do_save(trainer, CheckpointStrategy.by_epoch) + + def after_train_iter(self, trainer): + if self.save_strategy != CheckpointStrategy.by_step: + return + + if self._should_save(trainer): + self._do_save(trainer, CheckpointStrategy.by_step) + + def after_run(self, trainer): + if self.push_to_hub_future is not None and not self.push_to_hub_future.done( + ): + self.logger.info('Train finished. Uploading models, waiting...') + while not self.push_to_hub_future.done(): + time.sleep(1) + self.logger.info('Uploading models done.') + + def _push_to_hub(self, trainer, prefix): + if self.is_model_id is None: + self.is_model_id = check_model_is_id(trainer.input_model_id, + self.hub_token) + self.tag += 1 + return push_to_hub_async( + self.hub_repo_id, + self.output_dir, + token=self.hub_token, + private=self.private_hub, + commit_message=prefix, + tag=f'v1.{self.tag}', + revision=self.hub_revision, + source_repo=trainer.input_model_id if self.is_model_id else '') + + def save_evaluate_results(self, trainer): + with open(os.path.join(self.output_dir, self.EVAL_RESULT_FILE), + 'w') as f: + f.write(str(trainer.metric_values)) + + def _save_checkpoint(self, trainer, prefix): + """Save checkpoint files and remove obsolete ones + """ + checkpoint_path_prefix = os.path.join(self.save_dir, prefix) + meta = self._create_training_state(trainer) + self.processor.save_checkpoints(trainer, checkpoint_path_prefix, + self.output_dir, meta) + self.save_evaluate_results(trainer) + self.history_checkpoints.append(checkpoint_path_prefix) + self._remove_obsolete_checkpoints(trainer) + return prefix + + def _remove_obsolete_checkpoints(self, trainer): + if self.max_checkpoint_num is not None and \ + len(self.history_checkpoints) > self.max_checkpoint_num: + history_checkpoints = [ckpt for ckpt in self.history_checkpoints] + self.history_checkpoints.clear() + for i, checkpoint_path_prefix in enumerate(history_checkpoints): + if i < len(history_checkpoints) - self.max_checkpoint_num: + self.logger.info( + f'deleting checkpoint: {checkpoint_path_prefix}') + self.processor.remove_checkpoints( + trainer, checkpoint_path_prefix=checkpoint_path_prefix) + else: + self.history_checkpoints.append(checkpoint_path_prefix) + + def _should_save(self, trainer): + if self.save_strategy == CheckpointStrategy.by_epoch: + check_last = self.is_last_epoch + check_frequency = self.every_n_epochs + elif self.save_strategy == CheckpointStrategy.by_step: + check_last = self.is_last_iter + check_frequency = self.every_n_iters + else: + return False + + if check_frequency(trainer, + self.interval) or (self.save_last + and check_last(trainer)): + return True + return False + + def _create_training_state(self, trainer): + self.rng_state = { + 'random': random.getstate(), + 'numpy': np.random.get_state(), + 'cpu': torch.random.get_rng_state(), + 'cuda': torch.cuda.get_rng_state_all(), + } + + # keep epoch/iter/inner_iter/random_state + meta = { + 'epoch': trainer.epoch, + 'iter': trainer.iter + 1, + 'inner_iter': trainer.inner_iter + 1, + 'rng_state': self.rng_state, + } + + # keep hooks state + i = 0 + for hook in trainer.hooks: + if hasattr(hook, 'state_dict') and getattr(hook, '_should_save', + True): + meta[f'{hook.__class__}-{i}'] = hook.state_dict() + i += 1 + + return meta + + +@HOOKS.register_module(module_name=Hooks.BestCkptSaverHook) +class BestCkptSaverHook(CheckpointHook): + """ + Save best checkpoints hook. + + Args: + metric_key (str): Metric key to compare rule for best score. + save_best(bool): Save the best checkpoint, if set to False, this hook will have no effect. + rule (str): Comparison rule for best score. Support "max" and "min". If rule is "max", the checkpoint + at the maximum `metric_key` will be saved, If rule is "min", the checkpoint at the minimum `metric_key` + will be saved. + save_file_name: The manual specified saving file name. + restore_best (bool): Whether to restore the best checkpoint after training. + max_checkpoint_num (int): The max number of checkpoint files, default None which means never delete anything. + If the number exceeding the limit, checkpoints with worse metric will be deleted, which is judged by the + `rule` and `metric_key` arguments. + + The `BestCkptSaverHook` class accepts `output_sub_dir` and `output_dir` argument as its super class do. + If neither of them are passed, the default value is `{save_dir}/output_best`. + + This class will not accept the `interval` or `save_strategy` or `by_epoch` argument, because the saving interval + will follow the `EvaluationHook`. + """ + + PRIORITY = Priority.LOW + rule_map = {'max': lambda x, y: x > y, 'min': lambda x, y: x < y} + + def __init__(self, + metric_key: str, + save_best: Optional[bool] = True, + rule: Optional[str] = 'max', + save_file_name: Optional[str] = None, + restore_best: Optional[bool] = False, + max_checkpoint_num: Optional[int] = 1, + **kwargs): + assert rule in ['max', 'min'], 'Only support "max" or "min" rule now.' + output_kwargs = {} + if 'output_sub_dir' not in kwargs and 'output_dir' not in kwargs: + output_kwargs['output_sub_dir'] = ModelFile.TRAIN_BEST_OUTPUT_DIR + kwargs.pop('interval', None) + kwargs.pop('save_strategy', None) + super().__init__( + max_checkpoint_num=max_checkpoint_num, + **kwargs, + **output_kwargs, + ) + self.save_best = save_best + self.metric_key = metric_key + self.rule = rule + self._best_metric = None + self._best_ckpt_file = None + self.save_file_name = save_file_name + self.restore_best = restore_best + self.history_checkpoints = set() + + def after_train_epoch(self, trainer): + from modelscope.trainers.hooks import EvaluationHook + eval_hook = trainer.get_hook(EvaluationHook) + if len(eval_hook) == 0: + self.logger.error( + 'Trying to save the best checkpoint, but there is no evaluation, skipping.' + ) + + if eval_hook[0].last_eval_tag == ( + 'epoch', trainer.epoch) and self._should_save(trainer): + self._do_save(trainer, 'by_epoch') + + def after_train_iter(self, trainer): + from modelscope.trainers.hooks import EvaluationHook + eval_hook = trainer.get_hook(EvaluationHook) + if len(eval_hook) == 0: + self.logger.error( + 'Trying to save the best checkpoint, but there is no evaluation, skipping.' + ) + + if eval_hook[0].last_eval_tag == ( + 'iter', trainer.iter) and self._should_save(trainer): + self._do_save(trainer, 'by_step') + + def _should_save(self, trainer): + return self.save_best and self._is_best_metric(trainer.metric_values) + + def _is_best_metric(self, metric_values): + if metric_values is None: + return False + + if self.metric_key not in metric_values: + raise ValueError( + f'Not find metric_key: {self.metric_key} in {metric_values}') + + if self._best_metric is None: + self._best_metric = metric_values[self.metric_key] + return True + else: + compare_fn = self.rule_map[self.rule] + if compare_fn(metric_values[self.metric_key], self._best_metric): + self._best_metric = metric_values[self.metric_key] + return True + return False + + def generate_prefix(self, trainer, save_strategy): + if save_strategy == CheckpointStrategy.by_epoch: + return f'best_{LogKeys.EPOCH}{trainer.epoch + 1}_{self.metric_key}{self._best_metric}' + else: + return f'best_{LogKeys.ITER}{trainer.iter + 1}_{self.metric_key}{self._best_metric}' + + def _save_checkpoint(self, trainer, prefix): + checkpoint_path_prefix = self.save_file_name + if checkpoint_path_prefix is None: + checkpoint_path_prefix = os.path.join(self.save_dir, prefix) + else: + checkpoint_path_prefix = os.path.join(self.save_dir, + checkpoint_path_prefix) + + self._best_ckpt_file = checkpoint_path_prefix + meta = self._create_training_state(trainer) + self.processor.save_checkpoints(trainer, checkpoint_path_prefix, + self.output_dir, meta) + self.save_evaluate_results(trainer) + self.history_checkpoints.add(checkpoint_path_prefix) + self._remove_obsolete_checkpoints(trainer) + return prefix + + def _remove_obsolete_checkpoints(self, trainer): + + def extract_metric_from_filename(name1): + metric1 = float(name1.split(self.metric_key)[1]) + if self.rule == 'max': + return -metric1 + else: + return metric1 + + if self.max_checkpoint_num is not None and \ + len(self.history_checkpoints) > self.max_checkpoint_num: + history_checkpoints = sorted( + self.history_checkpoints, key=extract_metric_from_filename) + self.history_checkpoints.clear() + for i, checkpoint_path_prefix in enumerate(history_checkpoints): + if i < self.max_checkpoint_num: + self.history_checkpoints.add(checkpoint_path_prefix) + else: + self.logger.info( + f'deleting checkpoint: {checkpoint_path_prefix}') + self.processor.remove_checkpoints( + trainer, checkpoint_path_prefix=checkpoint_path_prefix) + + def state_dict(self): + return { + 'best_metric': self._best_metric, + } + + def load_state_dict(self, state_dict): + if state_dict is not None and len(state_dict) > 0: + self._best_metric = state_dict.get('best_metric') + else: + self.logger.warning( + 'The state_dict is not available, the best metric value will be affected.' + ) + + def after_run(self, trainer): + if self.restore_best: + # If restore_best is True, will call the LoadCheckpointHook to load the best checkpoint + # for later evaluation or prediction. + from modelscope.trainers.hooks.checkpoint.load_checkpoint_hook import LoadCheckpointHook + LoadCheckpointHook.load_checkpoint(self._best_ckpt_file, trainer) diff --git a/modelscope/trainers/hooks/checkpoint/checkpoint_processor.py b/modelscope/trainers/hooks/checkpoint/checkpoint_processor.py new file mode 100644 index 00000000..f28fc397 --- /dev/null +++ b/modelscope/trainers/hooks/checkpoint/checkpoint_processor.py @@ -0,0 +1,276 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os +import re +import shutil + +from modelscope.metainfo import Pipelines +from modelscope.utils.checkpoint import (load_checkpoint, save_checkpoint, + save_configuration) +from modelscope.utils.constant import ModelFile +from modelscope.utils.logger import get_logger +from modelscope.utils.torch_utils import is_master + + +class CheckpointProcessor: + + TRAINER_STATE_SUFFIX = '_trainer_state.pth' + + MODEL_STATE_SUFFIX = '.pth' + + def prepare_output(self, trainer, output_dir): + """Prepares the output of target folder. + + This is a strategic function which can be registered by other hook's function. + + Args: + trainer: The trainer instance. + output_dir: The target folder used in inference. + """ + model = trainer.unwrap_module(trainer.model) + config = trainer.cfg + + # override pipeline by tasks name after finetune done, + # avoid case like fill mask pipeline with a text cls task + if config['task'] in [ + getattr(Pipelines, attr) for attr in dir(Pipelines) + if not attr.startswith('__') + ]: + # TODO a temp fix to avoid pipeline_name and task mismatch + config['pipeline'] = {'type': config['task']} + + self.copy_files_and_dump_config(trainer, output_dir, config, + self._bin_file(model)) + + @staticmethod + def copy_files_and_dump_config(trainer, output_dir, config, bin_file): + """Copy useful files to target output folder and dumps the target configuration.json. + """ + model = trainer.unwrap_module(trainer.model) + + class SaveConfig: + + def __init__(self, output_dir, config): + self.output_dir = output_dir + self.config = config + + def __call__(self, _output_dir, _config): + self.config = _config + + def save_config(self): + save_configuration(self.output_dir, self.config) + + for pop_key in [ + 'push_to_hub', 'hub_repo_id', 'hub_token', 'private_hub' + ]: + if config.safe_get('train.checkpoint.period.' + + pop_key) is not None: + config.safe_get('train.checkpoint.period').pop(pop_key) + if config.safe_get('train.checkpoint.best.' + pop_key) is not None: + config.safe_get('train.checkpoint.best').pop(pop_key) + + save_config_fn = SaveConfig(output_dir, config) + + if hasattr(model, 'save_pretrained'): + # Save pretrained of model, skip saving checkpoint + model.save_pretrained( + output_dir, + bin_file, + save_function=lambda *args, **kwargs: None, + config=save_config_fn.config, + save_config_function=save_config_fn) + + if trainer.train_preprocessor is not None: + trainer.train_preprocessor.save_pretrained( + output_dir, + save_config_fn.config, + save_config_function=save_config_fn) + if trainer.eval_preprocessor is not None: + trainer.eval_preprocessor.save_pretrained( + output_dir, + save_config_fn.config, + save_config_function=save_config_fn) + save_config_fn.save_config() + + @staticmethod + def _bin_file(model): + """Get bin file path. + """ + default_bin_file = ModelFile.TORCH_MODEL_BIN_FILE + if hasattr(model, + 'model_dir') and ModelFile.TORCH_MODEL_FILE in os.listdir( + model.model_dir): + default_bin_file = ModelFile.TORCH_MODEL_FILE + return default_bin_file + + def save_checkpoints(self, + trainer, + checkpoint_path_prefix, + output_dir, + meta=None): + """Save the state dict for trainer and model. + + This is a strategic function which can be registered by other hook's function. + + Args: + trainer(`EpochBasedTrainer`): The trainer instance. + checkpoint_path_prefix(`str`): The saving dir with a prefix. + like: /tmp/test/epoch_0 + output_dir(`str`): The output dir for inference. + meta: (`dict`): The meta info needed to be saved into files. + """ + model = trainer.unwrap_module(trainer.model) + _model_file, _train_state_file = self._get_state_file_name( + checkpoint_path_prefix) + + # Save pth file without model state_dict + self.save_trainer_state(trainer, model, _train_state_file, meta) + self.save_model_state(model, _model_file) + self.link(model, _model_file, output_dir) + + def remove_checkpoints(self, trainer, checkpoint_path_prefix): + """Remove obsolete checkpoint files. + + This is a strategic function which can be registered by other hook's function. + + Args: + trainer(`EpochBasedTrainer`): The trainer instance. + checkpoint_path_prefix(`str`): The saving dir with a prefix. + like: /tmp/test/epoch_0 + """ + _model_file, _train_state_file = self._get_state_file_name( + checkpoint_path_prefix) + if os.path.isfile(_train_state_file): + os.remove(_train_state_file) + + if os.path.isfile(_model_file): + os.remove(_model_file) + + def should_save_on_rank(self, trainer): + """Used in ddp or other distributed training scenario, returns whether do saving in current rank. + + This is a strategic function which can be registered by other hook's function. + + Args: + trainer(`EpochBasedTrainer`): The trainer instance. + """ + return is_master() + + def link(self, model, src_file, output_dir): + """Links the src bin file to the output folder. + + Args: + model: The model instance. + src_file: The src bin file path. + output_dir: The target folder used in inference. + """ + + bin_file = self._bin_file(model) + dest_file = os.path.join(output_dir, bin_file) + if os.path.isfile(dest_file): + os.unlink(dest_file) + + try: + os.link(src_file, dest_file) + except OSError as e: + get_logger().error( + f'Link {src_file} to {dest_file} error: {e}, ' + 'changing to copy the bin file, this may use more disk space.') + shutil.copyfile(src_file, dest_file) + + def save_trainer_state(self, trainer, model, train_state_file, meta): + """Save the trainer state, including optimizer/lr_scheduler's state dict, random states etc. + + Args: + trainer: The trainer instance. + model: The model instance. + train_state_file: The target file name for saving trainer states. + meta: Some extra meta info. + """ + save_checkpoint( + model, + train_state_file, + trainer.optimizer, + trainer.lr_scheduler, + meta=meta, + with_model=False) + + def save_model_state(self, model, model_file): + """Save the model state. + + Args: + model: The model instance. + model_file: The target file name for saving model states. + """ + save_checkpoint( + model, model_file, None, None, meta=None, with_meta=False) + + def load_checkpoints(self, checkpoint_path_prefix, trainer, load_all_state, + strict): + """Load checkpoint files of trainer state and model state. + + This is a strategic function which can be registered by other hook's function. + + Args: + checkpoint_path_prefix(str): The checkpoint dir with prefix or a model state file. + Example: '/tmp/test/epoch_0' or '/tmp/test/epoch_0.pth' + trainer(`EpochBasedTrainer`): The trainer instance. + load_all_state(`boolean`): Load all states (else load only module states). + strict(`boolean`): If strict, any unmatched keys will cause an error. + + Returns: + The meta info in json. + """ + _model_file, _train_state_file = self._get_state_file_name( + checkpoint_path_prefix) + meta = {} + if os.path.isfile(_train_state_file): + meta = self.load_trainer_state(trainer, _train_state_file, + load_all_state) + else: + print(f'No trainer state file {_train_state_file} found, skip.') + self.load_model_state(trainer, _model_file, strict) + return meta + + @staticmethod + def load_trainer_state(trainer, train_state_file, load_all_state): + """Load trainer state file. + """ + + optimizer = getattr(trainer, 'optimizer', + None) if load_all_state else None + lr_scheduler = getattr(trainer, 'lr_scheduler', + None) if load_all_state else None + return load_checkpoint(train_state_file, None, optimizer, lr_scheduler) + + def load_model_state(self, trainer, model_file, strict): + """Load model state file. + """ + return load_checkpoint(model_file, + trainer.unwrap_module(trainer.model), None, + None) + + @staticmethod + def _get_state_file_name(checkpoint_path_prefix): + """Get the default file name for state files. + + If the input is a checkpoint dir with prefix, this function will append suffix for both checkpoint files. + If the input is an absolute file name, this function will return it as the model file name, and append + suffix for the trainer file name. + + NOTE: a best checkpoint filename with float or int metric value inside + will not be judged as having a extension file name. like: '/tmp/test/epoch_0_accuracy0.85' + + Args: + checkpoint_path_prefix(`str`): The checkpoint dir with prefix or a model state file + with extension file name. like: '/tmp/test/epoch_0' + + Returns: + A tuple of model state file name and trainer state file name. + """ + base, ext = os.path.splitext(checkpoint_path_prefix) + if len(ext) == 0 or re.match(r'^\d+$', ext[1:]): + return checkpoint_path_prefix + CheckpointProcessor.MODEL_STATE_SUFFIX, \ + checkpoint_path_prefix + CheckpointProcessor.TRAINER_STATE_SUFFIX # noqa + else: + return checkpoint_path_prefix, base + CheckpointProcessor.TRAINER_STATE_SUFFIX.split( + '.')[0] + '.' + ext[1:] diff --git a/modelscope/trainers/hooks/checkpoint/load_checkpoint_hook.py b/modelscope/trainers/hooks/checkpoint/load_checkpoint_hook.py new file mode 100644 index 00000000..3ccb800f --- /dev/null +++ b/modelscope/trainers/hooks/checkpoint/load_checkpoint_hook.py @@ -0,0 +1,138 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import random +from typing import Optional + +import numpy as np +import torch +from packaging import version + +from modelscope.metainfo import Hooks +from modelscope.trainers.hooks.builder import HOOKS +from modelscope.trainers.hooks.checkpoint.checkpoint_processor import \ + CheckpointProcessor +from modelscope.trainers.hooks.hook import Hook +from modelscope.trainers.hooks.priority import Priority +from modelscope.utils.logger import get_logger + + +@HOOKS.register_module(module_name=Hooks.LoadCheckpointHook) +class LoadCheckpointHook(Hook): + """Load a checkpoint file at the beginning of training or evaluating. + + This hook does not need to be configured or saved in the config file. + User should use it by: + >>> trainer.train('some-checkpoint', load_all_state=True) + or + >>> trainer.evaluate('some-checkpoint') + instead. + + Args: + checkpoint_file (str): The checkpoint file to be loaded. + load_all_state (bool): Load all states(optimizer, epoch, lr_scheduler, random_state, etc.) when loading old + training state file or not. The model's state dict will only be loaded if False. + strict (bool): If strict, any unmatched keys will cause an error. + """ + + PRIORITY = Priority.HIGH + + _should_save = False + + # From 1.3.1 version we split one pth file to two files: trainer state pth file/model state pth file. + _TWO_PTH_FILE_VERSION = '1.3.1' + + def __init__( + self, + checkpoint_file: Optional[str] = None, + load_all_state: Optional[bool] = True, + strict: Optional[bool] = False, + ): + self.checkpoint_file = checkpoint_file + self.rng_state = None + self.need_load_rng_state = False + self.load_all_state = load_all_state + self.strict = strict + self.processor = CheckpointProcessor() + + def before_run(self, trainer): + if not hasattr(trainer, 'logger'): + self.logger = get_logger() + else: + self.logger = trainer.logger + + if self.checkpoint_file is not None: + meta = self.load_checkpoint(self.checkpoint_file, trainer, + self.load_all_state, self.strict) + self.rng_state = meta.get('rng_state') + self.need_load_rng_state = self.load_all_state + + def before_train_iter(self, trainer): + if self.need_load_rng_state: + if self.rng_state is not None: + random.setstate(self.rng_state['random']) + np.random.set_state(self.rng_state['numpy']) + torch.random.set_rng_state(self.rng_state['cpu']) + if torch.cuda.is_available(): + torch.cuda.random.set_rng_state_all(self.rng_state['cuda']) + self.need_load_rng_state = False + else: + self.logger.info( + 'Random state cannot be found in checkpoint file, ' + 'this may cause a random data order or model initialization.' + ) + + @staticmethod + def _restore_training_state(trainer, meta): + trainer._epoch = meta.get('epoch', trainer._epoch) + trainer._iter = meta.get('iter', trainer._iter) + trainer._inner_iter = meta.get('inner_iter', trainer._inner_iter) + + i = 0 + for hook in trainer.hooks: + if hasattr(hook, 'load_state_dict') and getattr( + hook, '_should_save', True): + key = f'{hook.__class__}-{i}' + if key in meta: + hook.load_state_dict(meta.get(key, {})) + else: + trainer.logger.warning( + f'The state_dict of hook {hook.__class__} at index {i} is not found in the checkpoint file.' + ) + i += 1 + + @classmethod + def load_checkpoint(cls, + filename, + trainer, + load_all_state=True, + strict=False): + """A static method to load checkpoint files. + + Args: + filename(str): An absolute model bin file(pth or bin) or a dir path with a file prefix(like epoch_1). + trainer(`EpochBasedTrainer`): The trainer instance. + load_all_state(`bool`): Load all states including the trainer states. + strict(`bool`): Load module state dict strictly. + + Returns: + A dict containing the train states saved by `_create_training_state` + """ + meta = cls().processor.load_checkpoints(filename, trainer, + load_all_state, strict) + if load_all_state: + cls._restore_training_state(trainer, meta) + + if meta is not None: + _version = meta.get('modelscope') + if _version is not None and version.parse( + _version) < version.parse( + LoadCheckpointHook._TWO_PTH_FILE_VERSION): + trainer.logger.warning( + 'The unique pth file is split into a model file and ' + f'a trainer file since version {LoadCheckpointHook._TWO_PTH_FILE_VERSION},' + 'consider re-training your model or ' + 'using a converting script to split the single pth file into two.' + ) + trainer.logger.info( + f'Checkpoint {filename} saving time: {meta.get("time")}, modelscope version: {_version}' + ) + return meta diff --git a/modelscope/trainers/hooks/checkpoint_hook.py b/modelscope/trainers/hooks/checkpoint_hook.py deleted file mode 100644 index 59832105..00000000 --- a/modelscope/trainers/hooks/checkpoint_hook.py +++ /dev/null @@ -1,749 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import os -import random -import re -import time - -import numpy as np -import torch -from packaging import version - -from modelscope.hub.check_model import check_model_is_id -from modelscope.hub.push_to_hub import push_to_hub_async -from modelscope.metainfo import Hooks, Pipelines -from modelscope.utils.checkpoint import (load_checkpoint, save_checkpoint, - save_configuration) -from modelscope.utils.constant import LogKeys, ModelFile -from modelscope.utils.logger import get_logger -from modelscope.utils.torch_utils import is_master -from .builder import HOOKS -from .hook import Hook -from .priority import Priority - - -@HOOKS.register_module(module_name=Hooks.CheckpointHook) -class CheckpointHook(Hook): - """Save checkpoints periodically. - - Args: - interval (int): The frequency to save model. If `by_epoch=True`, - it means the number of epochs, else means the number of iterations - by_epoch (bool): Saving checkpoints by epoch or by iteration. - save_optimizer (bool): Whether to save optimizer state dict. Default: True. - save_dir (str): The directory to save checkpoints. If is None, use `trainer.work_dir` - output_sub_dir (str): The sub folder under the `save_dir` to save the output checkpoint for inference. - Default 'output'. - save_last (bool): Whether to save the last checkpoint. Default: True. - max_checkpoint_num (int): The max number of checkpoint files, default None which means never delete anything. - If the number exceeding the limit, earlier checkpoints will be deleted first. - """ - - PRIORITY = Priority.LOW - - TRAINER_STATE_SUFFIX = '_trainer_state.pth' - - MODEL_STATE_SUFFIX = '.pth' - - def __init__(self, - interval=0, - by_epoch=True, - save_optimizer=True, - save_dir=None, - output_sub_dir=ModelFile.TRAIN_OUTPUT_DIR, - save_last=True, - max_checkpoint_num=None, - push_to_hub=False, - model_id_with_org=None, - hub_token=None, - private_hub=True, - **kwargs): - self.interval = interval - self.by_epoch = by_epoch - self.save_optimizer = save_optimizer - self.save_dir = save_dir - self.output_sub_dir = output_sub_dir - self.save_last = save_last - self.rng_state = None - self.max_checkpoint_num = None - self.push_to_hub = push_to_hub - self.model_id_with_org = model_id_with_org - self.hub_token = hub_token - self.private_hub = private_hub - self.is_model_id = None - self.push_to_hub_future = None - if max_checkpoint_num is not None: - self.max_checkpoint_num = max(int(max_checkpoint_num), 1) - self.history_checkpoints = [] - - def before_run(self, trainer): - if not self.save_dir: - self.save_dir = trainer.work_dir - - if not os.path.exists(self.save_dir): - os.makedirs(self.save_dir, exist_ok=True) - - if not hasattr(trainer, 'logger'): - self.logger = get_logger() - else: - self.logger = trainer.logger - - if is_master(): - output_dir = os.path.join(self.save_dir, self.output_sub_dir) - # only global master prepares the output folder - self.prepare_output(trainer, output_dir) - self.logger.info(f'Checkpoints will be saved to {self.save_dir}') - - def generate_prefix(self, trainer): - if self.by_epoch: - return f'{LogKeys.EPOCH}_{trainer.epoch + 1}' - else: - return f'{LogKeys.ITER}_{trainer.iter + 1}' - - def after_train_epoch(self, trainer): - if not self.by_epoch: - return - - if self._should_save(trainer): - # prefix like 'epoch-1' or 'iter-1' - prefix = self.generate_prefix(trainer) - if self.should_save_on_rank(trainer): - if is_master(): - self.logger.info( - f'Saving checkpoint at {trainer.epoch + 1} epoch') - self._save_checkpoint(trainer, prefix) - if is_master() and self.push_to_hub: - if self.push_to_hub_future is not None and not self.push_to_hub_future.done( - ): - self.logger.error( - f'Another uploading is running, ' - f'this uploading with message {prefix} will be canceled.' - ) - return - self.push_to_hub_future = self._push_to_hub(trainer, prefix) - - def after_train_iter(self, trainer): - if self.by_epoch: - return - - if self._should_save(trainer): - # prefix like 'epoch-1' or 'iter-1' - prefix = self.generate_prefix(trainer) - if self.should_save_on_rank(trainer): - if is_master(): - self.logger.info( - f'Saving checkpoint at {trainer.iter + 1} iter') - self._save_checkpoint(trainer, prefix) - if is_master() and self.push_to_hub: - if self.push_to_hub_future is not None and not self.push_to_hub_future.done( - ): - self.logger.error( - f'Another uploading is running, ' - f'this uploading with message {prefix} will be canceled.' - ) - return - self.push_to_hub_future = self._push_to_hub(trainer, prefix) - - def after_run(self, trainer): - if self.push_to_hub_future is not None and not self.push_to_hub_future.done( - ): - self.logger.info('Train finished. Uploading models, waiting...') - while not self.push_to_hub_future.done(): - time.sleep(1) - self.logger.info('Uploading models done.') - - def _push_to_hub(self, trainer, prefix): - if self.is_model_id is None: - self.is_model_id = check_model_is_id(trainer.input_model_id, - self.hub_token) - - return push_to_hub_async( - self.model_id_with_org, - os.path.join(self.save_dir, self.output_sub_dir), - token=self.hub_token, - private=self.private_hub, - commit_message=prefix, - source_repo=trainer.input_model_id if self.is_model_id else '') - - def _save_checkpoint(self, trainer, prefix): - """Save checkpoint files and remove obsolete ones - """ - checkpoint_path_prefix = os.path.join(self.save_dir, prefix) - meta = self._create_training_state(trainer) - self.save_checkpoints(trainer, checkpoint_path_prefix, - self.output_sub_dir, meta) - self.history_checkpoints.append(checkpoint_path_prefix) - self._remove_obsolete_checkpoints(trainer) - return prefix - - def _remove_obsolete_checkpoints(self, trainer): - if self.max_checkpoint_num is not None and \ - len(self.history_checkpoints) > self.max_checkpoint_num: - history_checkpoints = [ckpt for ckpt in self.history_checkpoints] - self.history_checkpoints.clear() - for i, checkpoint_path_prefix in enumerate(history_checkpoints): - if i < len(history_checkpoints) - self.max_checkpoint_num: - self.logger.info( - f'deleting checkpoint: {checkpoint_path_prefix}') - self.remove_checkpoints( - trainer, checkpoint_path_prefix=checkpoint_path_prefix) - else: - self.history_checkpoints.append(checkpoint_path_prefix) - - def _should_save(self, trainer): - if self.by_epoch: - check_last = self.is_last_epoch - check_frequency = self.every_n_epochs - else: - check_last = self.is_last_iter - check_frequency = self.every_n_iters - - if check_frequency(trainer, - self.interval) or (self.save_last - and check_last(trainer)): - return True - return False - - def _create_training_state(self, trainer): - self.rng_state = { - 'random': random.getstate(), - 'numpy': np.random.get_state(), - 'cpu': torch.random.get_rng_state(), - 'cuda': torch.cuda.get_rng_state_all(), - } - - # keep epoch/iter/inner_iter/random_state - meta = { - 'epoch': trainer.epoch, - 'iter': trainer.iter + 1, - 'inner_iter': trainer.inner_iter + 1, - 'rng_state': self.rng_state, - } - - # keep hooks state - i = 0 - for hook in trainer.hooks: - if hasattr(hook, 'state_dict') and getattr(hook, '_should_save', - True): - meta[f'{hook.__class__}-{i}'] = hook.state_dict() - i += 1 - - return meta - - @staticmethod - def copy_files_and_dump_config(trainer, output_dir, config, bin_file): - """Copy useful files to target output folder and dumps the target configuration.json. - """ - model = trainer.unwrap_module(trainer.model) - - class SaveConfig: - - def __init__(self, output_dir, config): - self.output_dir = output_dir - self.config = config - - def __call__(self, _output_dir, _config): - self.config = _config - - def save_config(self): - save_configuration(self.output_dir, self.config) - - for pop_key in [ - 'push_to_hub', 'model_id_with_org', 'hub_token', 'private_hub' - ]: - if config.safe_get('train.checkpoint.period.' - + pop_key) is not None: - config.safe_get('train.checkpoint.period').pop(pop_key) - if config.safe_get('train.checkpoint.best.' + pop_key) is not None: - config.safe_get('train.checkpoint.best').pop(pop_key) - - save_config_fn = SaveConfig(output_dir, config) - - if hasattr(model, 'save_pretrained'): - # Save pretrained of model, skip saving checkpoint - model.save_pretrained( - output_dir, - bin_file, - save_function=lambda *args, **kwargs: None, - config=save_config_fn.config, - save_config_function=save_config_fn) - - if trainer.train_preprocessor is not None: - trainer.train_preprocessor.save_pretrained( - output_dir, - save_config_fn.config, - save_config_function=save_config_fn) - if trainer.eval_preprocessor is not None: - trainer.eval_preprocessor.save_pretrained( - output_dir, - save_config_fn.config, - save_config_function=save_config_fn) - save_config_fn.save_config() - - @staticmethod - def _bin_file(model): - """Get bin file path. - """ - default_bin_file = ModelFile.TORCH_MODEL_BIN_FILE - if hasattr(model, - 'model_dir') and ModelFile.TORCH_MODEL_FILE in os.listdir( - model.model_dir): - default_bin_file = ModelFile.TORCH_MODEL_FILE - return default_bin_file - - @Hook.overload_func(name='CheckpointHook.prepare_output') - def prepare_output(self, trainer, output_dir): - """Prepares the output of target folder. - - This is a strategic function which can be registered by other hook's function. - - Args: - trainer: The trainer instance. - output_dir: The target folder used in inference. - """ - model = trainer.unwrap_module(trainer.model) - config = trainer.cfg - - # override pipeline by tasks name after finetune done, - # avoid case like fill mask pipeline with a text cls task - if config['task'] in [ - getattr(Pipelines, attr) for attr in dir(Pipelines) - if not attr.startswith('__') - ]: - # TODO a temp fix to avoid pipeline_name and task mismatch - config['pipeline'] = {'type': config['task']} - - self.copy_files_and_dump_config(trainer, output_dir, config, - self._bin_file(model)) - - def link(self, model, src_file, output_dir): - """Links the src bin file to the output folder. - - Args: - model: The model instance. - src_file: The src bin file path. - output_dir: The target folder used in inference. - """ - - bin_file = self._bin_file(model) - dest_file = os.path.join(output_dir, bin_file) - if os.path.isfile(dest_file): - os.unlink(dest_file) - - os.link(src_file, dest_file) - - def save_trainer_state(self, trainer, model, train_state_file, meta): - """Save the trainer state, including optimizer/lr_scheduler's state dict, random states etc. - - Args: - trainer: The trainer instance. - model: The model instance. - train_state_file: The target file name for saving trainer states. - meta: Some extra meta info. - """ - save_checkpoint( - model, - train_state_file, - trainer.optimizer, - trainer.lr_scheduler, - meta=meta, - with_model=False) - - def save_model_state(self, model, model_file): - """Save the model state. - - Args: - model: The model instance. - model_file: The target file name for saving model states. - """ - save_checkpoint( - model, model_file, None, None, meta=None, with_meta=False) - - @Hook.overload_func(name='CheckpointHook.save_checkpoints') - def save_checkpoints(self, - trainer, - checkpoint_path_prefix, - output_sub_dir, - meta=None): - """Save the state dict for trainer and model. - - This is a strategic function which can be registered by other hook's function. - - Args: - trainer(`EpochBasedTrainer`): The trainer instance. - checkpoint_path_prefix(`str`): The saving dir with a prefix. - like: /tmp/test/epoch_0 - output_sub_dir(`str`): The sub-dir in the saving dir used in inference. - meta: (`dict`): The meta info needed to be saved into files. - """ - model = trainer.unwrap_module(trainer.model) - _model_file, _train_state_file = _get_state_file_name( - checkpoint_path_prefix) - - # Save pth file without model state_dict - self.save_trainer_state(trainer, model, _train_state_file, meta) - self.save_model_state(model, _model_file) - output_dir = os.path.join(self.save_dir, output_sub_dir) - self.link(model, _model_file, output_dir) - - @Hook.overload_func(name='CheckpointHook.remove_checkpoints') - def remove_checkpoints(self, trainer, checkpoint_path_prefix): - """Remove obsolete checkpoint files. - - This is a strategic function which can be registered by other hook's function. - - Args: - trainer(`EpochBasedTrainer`): The trainer instance. - checkpoint_path_prefix(`str`): The saving dir with a prefix. - like: /tmp/test/epoch_0 - """ - _model_file, _train_state_file = _get_state_file_name( - checkpoint_path_prefix) - if os.path.isfile(_train_state_file): - os.remove(_train_state_file) - - if os.path.isfile(_model_file): - os.remove(_model_file) - - @Hook.overload_func(name='CheckpointHook.should_save_on_rank') - def should_save_on_rank(self, trainer): - """Used in ddp or other distributed training scenario, returns whether do saving in current rank. - - This is a strategic function which can be registered by other hook's function. - - Args: - trainer(`EpochBasedTrainer`): The trainer instance. - """ - return is_master() - - -@HOOKS.register_module(module_name=Hooks.BestCkptSaverHook) -class BestCkptSaverHook(CheckpointHook): - """ - Save best checkpoints hook. - - Args: - metric_key (str): Metric key to compare rule for best score. - rule (str): Comparison rule for best score. Support "max" and "min". If rule is "max", the checkpoint - at the maximum `metric_key` will be saved, If rule is "min", the checkpoint at the minimum `metric_key` - will be saved. - by_epoch (bool): Save best checkpoints by epoch or by iteration. - save_optimizer (bool): Whether to save optimizer state dict. Default: True. - save_dir (str): Output directory to save best checkpoint. - output_sub_dir (str): The sub folder under the `save_dir` to save the output checkpoint for inference. - Default 'output_best'. - restore_best (bool): Whether to restore the best checkpoint after training. - max_checkpoint_num (int): The max number of checkpoint files, default None which means never delete anything. - If the number exceeding the limit, checkpoints with worse metric will be deleted, which is judged by the - `rule` and `metric_key` arguments. - """ - - PRIORITY = Priority.LOW - rule_map = {'max': lambda x, y: x > y, 'min': lambda x, y: x < y} - - def __init__(self, - metric_key, - rule='max', - by_epoch=True, - save_optimizer=True, - save_dir=None, - output_sub_dir=ModelFile.TRAIN_BEST_OUTPUT_DIR, - save_file_name=None, - restore_best=False, - max_checkpoint_num=1, - interval=0, - **kwargs): - assert rule in ['max', 'min'], 'Only support "max" or "min" rule now.' - super().__init__( - interval=interval, - by_epoch=by_epoch, - save_optimizer=save_optimizer, - save_dir=save_dir, - output_sub_dir=output_sub_dir, - max_checkpoint_num=max_checkpoint_num, - **kwargs, - ) - self.metric_key = metric_key - self.rule = rule - self._best_metric = None - self._best_ckpt_file = None - self.save_file_name = save_file_name - self.restore_best = restore_best - self.history_checkpoints = set() - - def _should_save(self, trainer): - return self._is_best_metric(trainer.metric_values) - - def _is_best_metric(self, metric_values): - if metric_values is None: - return False - - if self.metric_key not in metric_values: - raise ValueError( - f'Not find metric_key: {self.metric_key} in {metric_values}') - - if self._best_metric is None: - self._best_metric = metric_values[self.metric_key] - return True - else: - compare_fn = self.rule_map[self.rule] - if compare_fn(metric_values[self.metric_key], self._best_metric): - self._best_metric = metric_values[self.metric_key] - return True - return False - - def generate_prefix(self, trainer): - if self.by_epoch: - return f'best_{LogKeys.EPOCH}{trainer.epoch + 1}_{self.metric_key}{self._best_metric}' - else: - return f'best_{LogKeys.ITER}{trainer.iter + 1}_{self.metric_key}{self._best_metric}' - - def _save_checkpoint(self, trainer, prefix): - checkpoint_path_prefix = self.save_file_name - if checkpoint_path_prefix is None: - checkpoint_path_prefix = os.path.join(self.save_dir, prefix) - else: - checkpoint_path_prefix = os.path.join(self.save_dir, - checkpoint_path_prefix) - - self._best_ckpt_file = checkpoint_path_prefix - meta = self._create_training_state(trainer) - self.save_checkpoints(trainer, checkpoint_path_prefix, - self.output_sub_dir, meta) - self.history_checkpoints.add(checkpoint_path_prefix) - self._remove_obsolete_checkpoints(trainer) - return prefix - - def _remove_obsolete_checkpoints(self, trainer): - - def extract_metric_from_filename(name1): - metric1 = float(name1.split(self.metric_key)[1]) - if self.rule == 'max': - return -metric1 - else: - return metric1 - - if self.max_checkpoint_num is not None and \ - len(self.history_checkpoints) > self.max_checkpoint_num: - history_checkpoints = sorted( - self.history_checkpoints, key=extract_metric_from_filename) - self.history_checkpoints.clear() - for i, checkpoint_path_prefix in enumerate(history_checkpoints): - if i < self.max_checkpoint_num: - self.history_checkpoints.add(checkpoint_path_prefix) - else: - self.logger.info( - f'deleting checkpoint: {checkpoint_path_prefix}') - self.remove_checkpoints( - trainer, checkpoint_path_prefix=checkpoint_path_prefix) - - def state_dict(self): - return { - 'best_metric': self._best_metric, - } - - def load_state_dict(self, state_dict): - if state_dict is not None and len(state_dict) > 0: - self._best_metric = state_dict.get('best_metric') - else: - self.logger.warning( - 'The state_dict is not available, the best metric value will be affected.' - ) - - def after_run(self, trainer): - if self.restore_best: - # If restore_best is True, will call the LoadCheckpointHook to load the best checkpoint - # for later evaluation or prediction. - LoadCheckpointHook.load_checkpoint(self._best_ckpt_file, trainer) - - -@HOOKS.register_module(module_name=Hooks.LoadCheckpointHook) -class LoadCheckpointHook(Hook): - """Load a checkpoint file at the beginning of training or evaluating. - - This hook does not need to be configured or saved in the config file. - User should use it by: - >>> trainer.train('some-checkpoint', load_all_state=True) - or - >>> trainer.evaluate('some-checkpoint') - instead. - - Args: - checkpoint_file (str): The checkpoint file to be loaded. - load_all_state (bool): Load all states(optimizer, epoch, lr_scheduler, random_state, etc.) when loading old - training state file or not. The model's state dict will only be loaded if False. - strict (bool): If strict, any unmatched keys will cause an error. - """ - - PRIORITY = Priority.HIGH - - _should_save = False - - _TWO_PTH_FILE_VERSION = '1.3.1' - - def __init__( - self, - checkpoint_file=None, - load_all_state=True, - strict=False, - ): - self.checkpoint_file = checkpoint_file - self.rng_state = None - self.need_load_rng_state = False - self.load_all_state = load_all_state - self.strict = strict - - def before_run(self, trainer): - if not hasattr(trainer, 'logger'): - self.logger = get_logger() - else: - self.logger = trainer.logger - - if self.checkpoint_file is not None: - meta = self.load_checkpoint(self.checkpoint_file, trainer, - self.load_all_state, self.strict) - self.rng_state = meta.get('rng_state') - self.need_load_rng_state = self.load_all_state - - def before_train_iter(self, trainer): - if self.need_load_rng_state: - if self.rng_state is not None: - random.setstate(self.rng_state['random']) - np.random.set_state(self.rng_state['numpy']) - torch.random.set_rng_state(self.rng_state['cpu']) - if torch.cuda.is_available(): - torch.cuda.random.set_rng_state_all(self.rng_state['cuda']) - self.need_load_rng_state = False - else: - self.logger.info( - 'Random state cannot be found in checkpoint file, ' - 'this may cause a random data order or model initialization.' - ) - - @staticmethod - def _restore_training_state(trainer, meta): - trainer._epoch = meta.get('epoch', trainer._epoch) - trainer._iter = meta.get('iter', trainer._iter) - trainer._inner_iter = meta.get('inner_iter', trainer._inner_iter) - - i = 0 - for hook in trainer.hooks: - if hasattr(hook, 'load_state_dict') and getattr( - hook, '_should_save', True): - key = f'{hook.__class__}-{i}' - if key in meta: - hook.load_state_dict(meta.get(key, {})) - else: - trainer.logger.warning( - f'The state_dict of hook {hook.__class__} at index {i} is not found in the checkpoint file.' - ) - i += 1 - - @classmethod - def load_checkpoint(cls, - filename, - trainer, - load_all_state=True, - strict=False): - """A static method to load checkpoint files. - - Args: - filename(str): An absolute model bin file(pth or bin) or a dir path with a file prefix(like epoch_1). - trainer(`EpochBasedTrainer`): The trainer instance. - load_all_state(`bool`): Load all states including the trainer states. - strict(`bool`): Load module state dict strictly. - - Returns: - A dict containing the train states saved by `_create_training_state` - """ - meta = cls().load_checkpoints(filename, trainer, load_all_state, - strict) - if load_all_state: - cls._restore_training_state(trainer, meta) - - if meta is not None: - _version = meta.get('modelscope') - if _version is not None and version.parse( - _version) < version.parse( - LoadCheckpointHook._TWO_PTH_FILE_VERSION): - trainer.logger.warning( - 'The unique pth file is split into a model file and ' - f'a trainer file since version {LoadCheckpointHook._TWO_PTH_FILE_VERSION},' - 'consider re-training your model or ' - 'using a converting script to split the single pth file into two.' - ) - trainer.logger.info( - f'Checkpoint {filename} saving time: {meta.get("time")}, modelscope version: {_version}' - ) - return meta - - @staticmethod - def load_trainer_state(trainer, train_state_file, load_all_state): - """Load trainer state file. - """ - - optimizer = getattr(trainer, 'optimizer', - None) if load_all_state else None - lr_scheduler = getattr(trainer, 'lr_scheduler', - None) if load_all_state else None - return load_checkpoint(train_state_file, None, optimizer, lr_scheduler) - - def load_model_state(self, trainer, model_file, strict): - """Load model state file. - """ - return load_checkpoint(model_file, - trainer.unwrap_module(trainer.model), None, - None) - - @Hook.overload_func(name='LoadCheckpointHook.load_checkpoints') - def load_checkpoints(self, checkpoint_path_prefix, trainer, load_all_state, - strict): - """Load checkpoint files of trainer state and model state. - - This is a strategic function which can be registered by other hook's function. - - Args: - checkpoint_path_prefix(str): The checkpoint dir with prefix or a model state file. - Example: '/tmp/test/epoch_0' or '/tmp/test/epoch_0.pth' - trainer(`EpochBasedTrainer`): The trainer instance. - load_all_state(`boolean`): Load all states (else load only module states). - strict(`boolean`): If strict, any unmatched keys will cause an error. - - Returns: - The meta info in json. - """ - _model_file, _train_state_file = _get_state_file_name( - checkpoint_path_prefix) - meta = {} - if os.path.isfile(_train_state_file): - meta = self.load_trainer_state(trainer, _train_state_file, - load_all_state) - else: - print(f'No trainer state file {_train_state_file} found, skip.') - self.load_model_state(trainer, _model_file, strict) - return meta - - -def _get_state_file_name(checkpoint_path_prefix): - """Get the default file name for state files. - - If the input is a checkpoint dir with prefix, this function will append suffix for both checkpoint files. - If the input is an absolute file name, this function will return it as the model file name, and append - suffix for the trainer file name. - - NOTE: a best checkpoint filename with float or int metric value inside - will not be judged as having a extension file name. like: '/tmp/test/epoch_0_accuracy0.85' - - Args: - checkpoint_path_prefix(`str`): The checkpoint dir with prefix or a model state file with extension file name. - like: '/tmp/test/epoch_0' - - Returns: - A tuple of model state file name and trainer state file name. - """ - base, ext = os.path.splitext(checkpoint_path_prefix) - if len(ext) == 0 or re.match(r'^\d+$', ext[1:]): - return checkpoint_path_prefix + CheckpointHook.MODEL_STATE_SUFFIX, \ - checkpoint_path_prefix + CheckpointHook.TRAINER_STATE_SUFFIX - else: - return checkpoint_path_prefix, base + CheckpointHook.TRAINER_STATE_SUFFIX.split( - '.')[0] + '.' + ext[1:] diff --git a/modelscope/trainers/hooks/compression/sparsity_hook.py b/modelscope/trainers/hooks/compression/sparsity_hook.py index 993488d8..e71c269a 100644 --- a/modelscope/trainers/hooks/compression/sparsity_hook.py +++ b/modelscope/trainers/hooks/compression/sparsity_hook.py @@ -1,7 +1,6 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import os -from modelscope import __version__ from modelscope.metainfo import Hooks from modelscope.trainers.hooks.builder import HOOKS from modelscope.trainers.hooks.hook import Hook diff --git a/tests/pipelines/easycv_pipelines/__init__.py b/modelscope/trainers/hooks/distributed/__init__.py similarity index 100% rename from tests/pipelines/easycv_pipelines/__init__.py rename to modelscope/trainers/hooks/distributed/__init__.py diff --git a/modelscope/trainers/hooks/ddp_hook.py b/modelscope/trainers/hooks/distributed/ddp_hook.py similarity index 89% rename from modelscope/trainers/hooks/ddp_hook.py rename to modelscope/trainers/hooks/distributed/ddp_hook.py index eaae2d89..2bdbe939 100644 --- a/modelscope/trainers/hooks/ddp_hook.py +++ b/modelscope/trainers/hooks/distributed/ddp_hook.py @@ -1,11 +1,11 @@ # Copyright (c) Alibaba, Inc. and its affiliates. from modelscope.metainfo import Hooks +from modelscope.trainers.hooks.builder import HOOKS +from modelscope.trainers.hooks.hook import Hook +from modelscope.trainers.hooks.priority import Priority from modelscope.utils.constant import DistributedParallelType from modelscope.utils.device import create_device from modelscope.utils.torch_utils import get_local_rank, init_dist -from .builder import HOOKS -from .hook import Hook -from .priority import Priority @HOOKS.register_module(module_name=Hooks.DDPHook) diff --git a/modelscope/trainers/hooks/deepspeed_hook.py b/modelscope/trainers/hooks/distributed/deepspeed_hook.py similarity index 64% rename from modelscope/trainers/hooks/deepspeed_hook.py rename to modelscope/trainers/hooks/distributed/deepspeed_hook.py index a34b3f6f..7dddc5d9 100644 --- a/modelscope/trainers/hooks/deepspeed_hook.py +++ b/modelscope/trainers/hooks/distributed/deepspeed_hook.py @@ -8,72 +8,48 @@ from deepspeed import DeepSpeedEngine from megatron_util import mpu, print_rank_0 from modelscope.metainfo import Hooks +from modelscope.trainers.hooks import LoadCheckpointHook from modelscope.trainers.hooks.builder import HOOKS +from modelscope.trainers.hooks.checkpoint.checkpoint_hook import ( + BestCkptSaverHook, CheckpointHook) from modelscope.trainers.hooks.hook import Hook from modelscope.trainers.hooks.priority import Priority from modelscope.utils.checkpoint import save_checkpoint from modelscope.utils.logger import get_logger -from .checkpoint_hook import CheckpointHook, LoadCheckpointHook -from .megatron_hook import MegatronHook +from ..checkpoint.checkpoint_processor import CheckpointProcessor +from ..lr_scheduler_hook import LrSchedulerProcessor +from ..optimizer.base import OptimizerHook, OptimizerProcessor -@HOOKS.register_module(module_name=Hooks.DeepspeedHook) -class DeepspeedHook(MegatronHook): - PRIORITY = Priority.VERY_HIGH +class DeepspeedProcessor(CheckpointProcessor, LrSchedulerProcessor, + OptimizerProcessor): - def __init__(self, - deepspeed_activation_checkpointing=True, - save_zero_checkpoint=False, - with_mpu=True): - self.save_zero_checkpoint = save_zero_checkpoint - self.deepspeed_activation_checkpointing = deepspeed_activation_checkpointing - # TODO without mpu - self.with_mpu = with_mpu - assert with_mpu, 'DeepspeedHook now is only for mpu models.' + _BIN_FILE_DIR = 'model' - def register_strategy(self): - Hook.overload(name='OptimizerHook.backward', function=self.backward) - Hook.overload( - name='OptimizerHook.initialize_optimizer', function=self.idle) - Hook.overload(name='LrSchedulerHook.step', function=self.idle) - Hook.overload( - name='CheckpointHook.save_checkpoints', - function=self.save_checkpoints) - Hook.overload( - name='LoadCheckpointHook.load_checkpoints', - function=self.load_checkpoints) - Hook.overload( - name='CheckpointHook.remove_checkpoints', - function=self.remove_checkpoints) - Hook.overload( - name='CheckpointHook.prepare_output', function=self.prepare_output) - if self.with_mpu: - Hook.overload( - name='CheckpointHook.should_save_on_rank', - function=self.should_save_on_rank) + def rank_name(self): + # TODO + try: + tp_world_size = mpu.get_tensor_model_parallel_world_size() + if tp_world_size == 1: + return '' + mp_rank = mpu.get_tensor_model_parallel_rank() + return '_mp_rank_{:02d}'.format(mp_rank) + except (ImportError, AssertionError): + return '' - def backward(self, trainer, loss_keys, cumulative_iters, grad_clip): - # assert cumulative_iters == 1, 'DeepSpeed only support cumulative_iters=1' - # The `trainer.model` here is actually a deepspeed engine object. - # backward step - for k in loss_keys: - loss = trainer.train_outputs[k] - trainer.model.backward(loss) - - # update parameters - trainer.model.step() - - def idle(self, *args, **kwargs): - pass + def get_bin_file(self): + mp_rank = mpu.get_tensor_model_parallel_rank() + rank = '{:02d}'.format(mp_rank) + return f'mp_rank_{rank}_model_states.pt' def save_checkpoints(self, trainer, checkpoint_path_prefix, - output_sub_dir, + output_dir, meta=None): model = trainer.unwrap_module(trainer.model) _train_state_file = checkpoint_path_prefix + self.rank_name( - ) + CheckpointHook.TRAINER_STATE_SUFFIX + ) + CheckpointProcessor.TRAINER_STATE_SUFFIX # Save pth file without model state_dict save_checkpoint( model, _train_state_file, None, None, meta=meta, with_model=False) @@ -84,16 +60,22 @@ class DeepspeedHook(MegatronHook): bin_file = self.get_bin_file() src_file = os.path.join(checkpoint_path_prefix, bin_file) - dest_file = os.path.join(save_dir, output_sub_dir, self._BIN_FILE_DIR, - bin_file) + dest_file = os.path.join(output_dir, self._BIN_FILE_DIR, bin_file) if os.path.isfile(dest_file): os.unlink(dest_file) - os.link(src_file, dest_file) + try: + os.link(src_file, dest_file) + except OSError as e: + get_logger().error( + f'Link {src_file} to {dest_file} error: {e}, ' + 'changing to copy the bin file, this may case more space usage.' + ) + shutil.copyfile(src_file, dest_file) def remove_checkpoints(self, trainer, checkpoint_path_prefix): _train_state_file = checkpoint_path_prefix + self.rank_name( - ) + CheckpointHook.TRAINER_STATE_SUFFIX + ) + CheckpointProcessor.TRAINER_STATE_SUFFIX if os.path.isfile(_train_state_file): os.remove(_train_state_file) @@ -107,10 +89,10 @@ class DeepspeedHook(MegatronHook): meta = {} _train_state_file = checkpoint_path_prefix + self.rank_name( - ) + CheckpointHook.TRAINER_STATE_SUFFIX + ) + CheckpointProcessor.TRAINER_STATE_SUFFIX if os.path.isfile(_train_state_file): - meta = LoadCheckpointHook.load_trainer_state( - trainer, _train_state_file, load_all_state) + meta = self.load_trainer_state(trainer, _train_state_file, + load_all_state) if isinstance(trainer.model, DeepSpeedEngine): # DeepSpeedEngine is initialized @@ -138,6 +120,57 @@ class DeepspeedHook(MegatronHook): checkpoint, strict=strict) return meta + def backward(self, trainer, loss_keys, cumulative_iters, grad_clip): + # assert cumulative_iters == 1, 'DeepSpeed only support cumulative_iters=1' + # The `trainer.model` here is actually a deepspeed engine object. + # backward step + for k in loss_keys: + loss = trainer.train_outputs[k] + trainer.model.backward(loss) + + # update parameters + trainer.model.step() + + def initialize_optimizer(self, trainer): + pass + + def step(self, trainer): + pass + + +@HOOKS.register_module(module_name=Hooks.DeepspeedHook) +class DeepspeedHook(Hook): + PRIORITY = Priority.VERY_HIGH + + def __init__(self, + deepspeed_activation_checkpointing=True, + save_zero_checkpoint=False, + with_mpu=True): + self.save_zero_checkpoint = save_zero_checkpoint + self.deepspeed_activation_checkpointing = deepspeed_activation_checkpointing + # TODO without mpu + self.with_mpu = with_mpu + assert with_mpu, 'DeepspeedHook now is only for mpu models.' + + def register_processor(self, trainer): + processor = DeepspeedProcessor() + optimizer_hook = trainer.get_hook(OptimizerHook) + if len(optimizer_hook) > 0 and not isinstance( + optimizer_hook[0].processor, DeepspeedProcessor): + optimizer_hook[0].set_processor(processor) + ckpt_hook = trainer.get_hook(CheckpointHook) + if len(ckpt_hook) > 0 and not isinstance(ckpt_hook[0].processor, + DeepspeedProcessor): + ckpt_hook[0].set_processor(processor) + best_ckpt_hook = trainer.get_hook(BestCkptSaverHook) + if len(best_ckpt_hook) > 0 and not isinstance( + best_ckpt_hook[0].processor, DeepspeedProcessor): + best_ckpt_hook[0].set_processor(processor) + load_ckpt_hook = trainer.get_hook(LoadCheckpointHook) + if len(load_ckpt_hook) > 0 and not isinstance( + load_ckpt_hook[0].processor, DeepspeedProcessor): + load_ckpt_hook[0].set_processor(processor) + def before_val(self, trainer): pass diff --git a/modelscope/trainers/hooks/megatron_hook.py b/modelscope/trainers/hooks/distributed/megatron_hook.py similarity index 70% rename from modelscope/trainers/hooks/megatron_hook.py rename to modelscope/trainers/hooks/distributed/megatron_hook.py index f01288de..c4aeaf19 100644 --- a/modelscope/trainers/hooks/megatron_hook.py +++ b/modelscope/trainers/hooks/distributed/megatron_hook.py @@ -1,19 +1,129 @@ import os -from copy import deepcopy +import shutil import torch from megatron_util import mpu from modelscope.metainfo import Hooks +from modelscope.trainers import EpochBasedTrainer from modelscope.trainers.hooks.builder import HOOKS +from modelscope.trainers.hooks.checkpoint.checkpoint_hook import ( + BestCkptSaverHook, CheckpointHook, CheckpointProcessor) +from modelscope.trainers.hooks.checkpoint.load_checkpoint_hook import \ + LoadCheckpointHook from modelscope.trainers.hooks.hook import Hook -from modelscope.trainers.parallel.builder import build_parallel from modelscope.utils.checkpoint import load_checkpoint, save_checkpoint from modelscope.utils.constant import DistributedParallelType from modelscope.utils.device import create_device +from modelscope.utils.logger import get_logger from modelscope.utils.megatron_utils import is_megatron_initialized from modelscope.utils.torch_utils import get_local_rank -from .checkpoint_hook import CheckpointHook, LoadCheckpointHook + + +class MpuProcessor(CheckpointProcessor): + + _BIN_FILE_DIR = 'model' + + def rank_name(self): + # TODO + try: + tp_world_size = mpu.get_tensor_model_parallel_world_size() + if tp_world_size == 1: + return '' + mp_rank = mpu.get_tensor_model_parallel_rank() + return '_mp_rank_{:02d}'.format(mp_rank) + except (ImportError, AssertionError): + return '' + + def get_bin_file(self): + mp_rank = mpu.get_tensor_model_parallel_rank() + rank = '{:02d}'.format(mp_rank) + return f'mp_rank_{rank}_model_states.pt' + + def should_save_on_rank(self, trainer): + # TODO + return (not torch.distributed.is_initialized() + ) or mpu.get_data_parallel_rank() == 0 + + def prepare_output(self, trainer, output_dir): + config = trainer.cfg + CheckpointProcessor.copy_files_and_dump_config(trainer, output_dir, + config, + self._BIN_FILE_DIR) + os.makedirs( + os.path.join(output_dir, self._BIN_FILE_DIR), exist_ok=True) + + def save_checkpoints(self, + trainer, + checkpoint_path_prefix, + output_dir, + meta=None): + model = trainer.unwrap_module(trainer.model) + _train_state_file = checkpoint_path_prefix + self.rank_name( + ) + CheckpointProcessor.TRAINER_STATE_SUFFIX + # Save pth file without model state_dict + save_checkpoint( + model, + _train_state_file, + trainer.optimizer, + trainer.lr_scheduler, + meta=meta, + with_model=False) + + save_dir = os.path.dirname(checkpoint_path_prefix) + prefix = os.path.basename(checkpoint_path_prefix) + bin_file = self.get_bin_file() + prefix_bin_file = os.path.join(save_dir, prefix + '_' + bin_file) + save_checkpoint(model, prefix_bin_file, with_meta=False) + + src_file = prefix_bin_file + dest_file = os.path.join(output_dir, self._BIN_FILE_DIR, bin_file) + if os.path.isfile(dest_file): + os.unlink(dest_file) + + try: + os.link(src_file, dest_file) + except OSError as e: + get_logger().error( + f'Link {src_file} to {dest_file} error: {e}, ' + 'changing to copy the bin file, this may case more space usage.' + ) + shutil.copyfile(src_file, dest_file) + + def remove_checkpoints(self, trainer, checkpoint_path_prefix): + _train_state_file = checkpoint_path_prefix + self.rank_name( + ) + CheckpointProcessor.TRAINER_STATE_SUFFIX + if os.path.isfile(_train_state_file): + os.remove(_train_state_file) + + save_dir = os.path.dirname(checkpoint_path_prefix) + prefix = os.path.basename(checkpoint_path_prefix) + bin_file = self.get_bin_file() + absolute_file = os.path.join(save_dir, prefix + '_' + bin_file) + if os.path.isfile(absolute_file): + os.remove(absolute_file) + + def load_checkpoints(self, checkpoint_path_prefix, trainer, load_all_state, + strict): + model = trainer.unwrap_module(trainer.model) + if os.path.isdir(checkpoint_path_prefix): + save_dir = checkpoint_path_prefix + bin_file = self.get_bin_file() + model_file = os.path.join(save_dir, bin_file) + load_checkpoint(model_file, model, None, None) + else: + _train_state_file = checkpoint_path_prefix + self.rank_name( + ) + CheckpointProcessor.TRAINER_STATE_SUFFIX + meta = LoadCheckpointHook.load_trainer_state( + trainer, _train_state_file, load_all_state) + + save_dir = os.path.dirname(checkpoint_path_prefix) + prefix = os.path.basename(checkpoint_path_prefix) + bin_file = self.get_bin_file() + + model_file = os.path.join(save_dir, prefix + '_' + bin_file) + load_checkpoint(model_file, model, None, None) + return meta @HOOKS.register_module(module_name=Hooks.MegatronHook) @@ -24,21 +134,20 @@ class MegatronHook(Hook): def __init__(self): self.wrapped = False - def register_strategy(self): - Hook.overload( - name='CheckpointHook.should_save_on_rank', - function=self.should_save_on_rank) - Hook.overload( - name='CheckpointHook.save_checkpoints', - function=self.save_checkpoints) - Hook.overload( - name='LoadCheckpointHook.load_checkpoints', - function=self.load_checkpoints) - Hook.overload( - name='CheckpointHook.remove_checkpoints', - function=self.remove_checkpoints) - Hook.overload( - name='CheckpointHook.prepare_output', function=self.prepare_output) + def register_processor(self, trainer: EpochBasedTrainer): + processor = MpuProcessor() + ckpt_hook = trainer.get_hook(CheckpointHook) + if len(ckpt_hook) > 0 and not isinstance(ckpt_hook[0].processor, + MpuProcessor): + ckpt_hook[0].set_processor(processor) + best_ckpt_hook = trainer.get_hook(BestCkptSaverHook) + if len(best_ckpt_hook) > 0 and not isinstance( + best_ckpt_hook[0].processor, MpuProcessor): + best_ckpt_hook[0].set_processor(processor) + load_ckpt_hook = trainer.get_hook(LoadCheckpointHook) + if len(load_ckpt_hook) > 0 and not isinstance( + load_ckpt_hook[0].processor, MpuProcessor): + load_ckpt_hook[0].set_processor(processor) def after_init(self, trainer): assert is_megatron_initialized() @@ -63,97 +172,3 @@ class MegatronHook(Hook): if not self.wrapped: trainer.model = trainer.to_parallel(trainer.model) self.wrapped = True - - def should_save_on_rank(self, trainer): - # TODO - return (not torch.distributed.is_initialized() - ) or mpu.get_data_parallel_rank() == 0 - - def rank_name(self): - # TODO - try: - tp_world_size = mpu.get_tensor_model_parallel_world_size() - if tp_world_size == 1: - return '' - mp_rank = mpu.get_tensor_model_parallel_rank() - return '_mp_rank_{:02d}'.format(mp_rank) - except (ImportError, AssertionError): - return '' - - def get_bin_file(self): - mp_rank = mpu.get_tensor_model_parallel_rank() - rank = '{:02d}'.format(mp_rank) - return f'mp_rank_{rank}_model_states.pt' - - def save_checkpoints(self, - trainer, - checkpoint_path_prefix, - output_sub_dir, - meta=None): - model = trainer.unwrap_module(trainer.model) - _train_state_file = checkpoint_path_prefix + self.rank_name( - ) + CheckpointHook.TRAINER_STATE_SUFFIX - # Save pth file without model state_dict - save_checkpoint( - model, - _train_state_file, - trainer.optimizer, - trainer.lr_scheduler, - meta=meta, - with_model=False) - - save_dir = os.path.dirname(checkpoint_path_prefix) - prefix = os.path.basename(checkpoint_path_prefix) - bin_file = self.get_bin_file() - prefix_bin_file = os.path.join(save_dir, prefix + '_' + bin_file) - save_checkpoint(model, prefix_bin_file, with_meta=False) - - src_file = prefix_bin_file - dest_file = os.path.join(save_dir, output_sub_dir, self._BIN_FILE_DIR, - bin_file) - if os.path.isfile(dest_file): - os.unlink(dest_file) - - os.link(src_file, dest_file) - - def remove_checkpoints(self, trainer, checkpoint_path_prefix): - _train_state_file = checkpoint_path_prefix + self.rank_name( - ) + CheckpointHook.TRAINER_STATE_SUFFIX - if os.path.isfile(_train_state_file): - os.remove(_train_state_file) - - save_dir = os.path.dirname(checkpoint_path_prefix) - prefix = os.path.basename(checkpoint_path_prefix) - bin_file = self.get_bin_file() - absolute_file = os.path.join(save_dir, prefix + '_' + bin_file) - if os.path.isfile(absolute_file): - os.remove(absolute_file) - - def load_checkpoints(self, checkpoint_path_prefix, trainer, load_all_state, - strict): - model = trainer.unwrap_module(trainer.model) - if os.path.isdir(checkpoint_path_prefix): - save_dir = checkpoint_path_prefix - bin_file = self.get_bin_file() - model_file = os.path.join(save_dir, bin_file) - load_checkpoint(model_file, model, None, None) - else: - _train_state_file = checkpoint_path_prefix + self.rank_name( - ) + CheckpointHook.TRAINER_STATE_SUFFIX - meta = LoadCheckpointHook.load_trainer_state( - trainer, _train_state_file, load_all_state) - - save_dir = os.path.dirname(checkpoint_path_prefix) - prefix = os.path.basename(checkpoint_path_prefix) - bin_file = self.get_bin_file() - - model_file = os.path.join(save_dir, prefix + '_' + bin_file) - load_checkpoint(model_file, model, None, None) - return meta - - def prepare_output(self, trainer, output_dir): - config = trainer.cfg - CheckpointHook.copy_files_and_dump_config(trainer, output_dir, config, - self._BIN_FILE_DIR) - os.makedirs( - os.path.join(output_dir, self._BIN_FILE_DIR), exist_ok=True) diff --git a/modelscope/trainers/hooks/early_stop_hook.py b/modelscope/trainers/hooks/early_stop_hook.py index b15e8e5a..7aba69a4 100644 --- a/modelscope/trainers/hooks/early_stop_hook.py +++ b/modelscope/trainers/hooks/early_stop_hook.py @@ -9,6 +9,12 @@ from .hook import Hook from .priority import Priority +class EarlyStopStrategy: + by_epoch = 'by_epoch' + by_step = 'by_step' + no = 'no' + + @HOOKS.register_module(module_name=Hooks.EarlyStopHook) class EarlyStopHook(Hook): """Early stop when a specific metric stops improving. @@ -16,14 +22,13 @@ class EarlyStopHook(Hook): Args: metric_key (str): Metric key to be monitored. rule (str): Comparison rule for best score. Support "max" and "min". - If rule is "max", the training will stop when `metric_key` has stopped increaing. + If rule is "max", the training will stop when `metric_key` has stopped increasing. If rule is "min", the training will stop when `metric_key` has stopped decreasing. patience (int): Trainer will stop if the monitored metric did not improve for the last `patience` times. - min_delta (float): Minimum change in the monitored metric to quailfy as an improvement. + min_delta (float): Minimum change in the monitored metric to qualify as an improvement. check_finite (bool): If true, stops training when the metric becomes NaN or infinite. - by_epoch (int): Saving checkpoints by epoch or by iteration. - interval (int): The frequency to trigger early stop check. If `by_epoch=True`, - it means the number of epochs, else means the number of iterations. + early_stop_strategy (str): The strategy to early stop, can be by_epoch/by_step/none + interval (int): The frequency to trigger early stop check, by epoch or step. """ PRIORITY = Priority.VERY_LOW @@ -35,14 +40,19 @@ class EarlyStopHook(Hook): patience: int = 3, min_delta: float = 0.0, check_finite: bool = True, - by_epoch: bool = True, - interval: int = 1): + early_stop_strategy: str = EarlyStopStrategy.by_epoch, + interval: int = 1, + **kwargs): self.metric_key = metric_key self.rule = rule self.patience = patience self.min_delta = min_delta self.check_finite = check_finite - self.by_epoch = by_epoch + if 'by_epoch' in kwargs: + self.early_stop_strategy = EarlyStopStrategy.by_epoch if kwargs[ + 'by_epoch'] else EarlyStopStrategy.by_step + else: + self.early_stop_strategy = early_stop_strategy self.interval = interval self.wait_count = 0 @@ -89,7 +99,7 @@ class EarlyStopHook(Hook): trainer._stop_training = True def after_train_epoch(self, trainer): - if not self.by_epoch: + if self.early_stop_strategy != EarlyStopStrategy.by_epoch: return if not self.every_n_epochs(trainer, self.interval): @@ -99,7 +109,7 @@ class EarlyStopHook(Hook): self._stop_training(trainer) def after_train_iter(self, trainer): - if self.by_epoch: + if self.early_stop_strategy != EarlyStopStrategy.by_step: return if not self.every_n_iters(trainer, self.interval): diff --git a/modelscope/trainers/hooks/evaluation_hook.py b/modelscope/trainers/hooks/evaluation_hook.py index 80c8c31a..c29a6d6a 100644 --- a/modelscope/trainers/hooks/evaluation_hook.py +++ b/modelscope/trainers/hooks/evaluation_hook.py @@ -1,11 +1,18 @@ # Copyright (c) Alibaba, Inc. and its affiliates. from collections import OrderedDict +from typing import Optional from modelscope.metainfo import Hooks from .builder import HOOKS from .hook import Hook +class EvaluationStrategy: + by_epoch = 'by_epoch' + by_step = 'by_step' + no = 'no' + + @HOOKS.register_module(module_name=Hooks.EvaluationHook) class EvaluationHook(Hook): """ @@ -18,21 +25,34 @@ class EvaluationHook(Hook): Default: None, validate every interval epochs/iterations from scratch. """ - def __init__(self, interval=1, by_epoch=True, start_idx=None): + def __init__(self, + interval: Optional[int] = 1, + eval_strategy: Optional[str] = EvaluationStrategy.by_epoch, + start_idx: Optional[int] = None, + **kwargs): assert interval > 0, 'interval must be a positive number' self.interval = interval self.start_idx = start_idx - self.by_epoch = by_epoch + self.last_eval_tag = (None, None) + if 'by_epoch' in kwargs: + self.eval_strategy = EvaluationStrategy.by_epoch if kwargs[ + 'by_epoch'] else EvaluationStrategy.by_step + else: + self.eval_strategy = eval_strategy def after_train_iter(self, trainer): """Called after every training iter to evaluate the results.""" - if not self.by_epoch and self._should_evaluate(trainer): + if self.eval_strategy == EvaluationStrategy.by_step and self._should_evaluate( + trainer): self.do_evaluate(trainer) + self.last_eval_tag = ('iter', trainer.iter) def after_train_epoch(self, trainer): """Called after every training epoch to evaluate the results.""" - if self.by_epoch and self._should_evaluate(trainer): + if self.eval_strategy == EvaluationStrategy.by_epoch and self._should_evaluate( + trainer): self.do_evaluate(trainer) + self.last_eval_tag = ('epoch', trainer.epoch) def add_visualization_info(self, trainer, results): if trainer.visualization_buffer.output.get('eval_results', @@ -64,7 +84,7 @@ class EvaluationHook(Hook): Returns: bool: The flag indicating whether to perform evaluation. """ - if self.by_epoch: + if self.eval_strategy == EvaluationStrategy.by_epoch: current = trainer.epoch check_time = self.every_n_epochs else: diff --git a/modelscope/trainers/hooks/hook.py b/modelscope/trainers/hooks/hook.py index 70e06fbd..93ea8541 100644 --- a/modelscope/trainers/hooks/hook.py +++ b/modelscope/trainers/hooks/hook.py @@ -22,9 +22,6 @@ class Hook: PRIORITY = Priority.NORMAL - # The strategic function dict. - _strategies = dict() - def after_init(self, trainer): """ Will be called at the end of the trainer's `__init__` method @@ -201,42 +198,48 @@ class Hook: """ self.after_iter(trainer) - def every_n_epochs(self, trainer, n): + @staticmethod + def every_n_epochs(trainer, n): """ Whether to reach every ``n`` epochs Returns: bool """ return (trainer.epoch + 1) % n == 0 if n > 0 else False - def every_n_inner_iters(self, runner, n): + @staticmethod + def every_n_inner_iters(runner, n): """ Whether to reach every ``n`` iterations at every epoch Returns: bool """ return (runner.inner_iter + 1) % n == 0 if n > 0 else False - def every_n_iters(self, trainer, n): + @staticmethod + def every_n_iters(trainer, n): """ Whether to reach every ``n`` iterations Returns: bool """ return (trainer.iter + 1) % n == 0 if n > 0 else False - def end_of_epoch(self, trainer): + @staticmethod + def end_of_epoch(trainer): """ Whether to reach the end of every epoch Returns: bool """ return trainer.inner_iter + 1 == trainer.iters_per_epoch - def is_last_epoch(self, trainer): + @staticmethod + def is_last_epoch(trainer): """ Whether to reach the last epoch Returns: bool """ return trainer.epoch + 1 == trainer.max_epochs - def is_last_iter(self, trainer): + @staticmethod + def is_last_iter(trainer): """ Whether to reach the last iteration in the entire training process Returns: bool @@ -256,54 +259,3 @@ class Hook: def load_state_dict(self, state_dict): pass - - @staticmethod - def clear_strategies(): - Hook._strategies.clear() - - @staticmethod - def overload(function, name=None): - """Register a function to a strategic function. - - Args: - function(`method` or `Callable`): The function instance. - name(`str`): The name of the strategic function, which specifies by the method `consume` - """ - - _name = name or function.__name__ - if _name not in Hook._strategies: - Hook._strategies[_name] = [] - - Hook._strategies[_name].append(function) - - @staticmethod - def overload_func(name=None): - """Declare a function as a strategic function, which can be replaced by some other functions. - - This function should be used in annotations. - - Args: - name(str): The strategic function name. - """ - - def _register(function): - - @wraps(function) - def _call(*args, **kwargs): - _name = name or function.__name__ - producers = Hook._strategies.get(_name, []) - - if len(producers) == 0: - return function(*args, **kwargs) - else: - if len(producers) > 1: - raise ValueError( - f'Multiple functions registered to {_name}, ' - f'here is the list: {producers}') - if isinstance(args[0], Hook): - args = args[1:] - return producers[0](*args, **kwargs) - - return _call - - return _register diff --git a/modelscope/trainers/hooks/lr_scheduler_hook.py b/modelscope/trainers/hooks/lr_scheduler_hook.py index 28ce250c..51a8e858 100644 --- a/modelscope/trainers/hooks/lr_scheduler_hook.py +++ b/modelscope/trainers/hooks/lr_scheduler_hook.py @@ -1,4 +1,5 @@ # Copyright (c) Alibaba, Inc. and its affiliates. + from modelscope.metainfo import Hooks from modelscope.trainers.lrscheduler.builder import build_lr_scheduler from modelscope.utils.constant import LogKeys @@ -9,6 +10,42 @@ from .hook import Hook from .priority import Priority +class LrSchedulerProcessor: + + def __init__(self): + self.lr_strategy = None + self.warmup_lr_scheduler = None + + def set_lr_strategy(self, lr_strategy): + self.lr_strategy = lr_strategy + + def set_warmup_lr_scheduler(self, warmup_lr_scheduler): + self.warmup_lr_scheduler = warmup_lr_scheduler + + def initialize_lr_scheduler(self, trainer): + """Initialize the lr scheduler. + + This is a strategic function which can be registered by other hook's function. + """ + pass + + def step(self, trainer): + """Do lr scheduler's step. + + This is a strategic function which can be registered by other hook's function. + """ + if self.warmup_lr_scheduler is not None: + self.warmup_lr_scheduler.step() + else: + trainer.lr_scheduler.step() + + +class LrStrategy: + by_epoch = 'by_epoch' + by_step = 'by_step' + no = 'no' + + @HOOKS.register_module(module_name=Hooks.LrSchedulerHook) class LrSchedulerHook(Hook): """Lr scheduler. @@ -19,38 +56,33 @@ class LrSchedulerHook(Hook): """ PRIORITY = Priority.LOW - def __init__(self, by_epoch=True, warmup=None, **kwargs) -> None: + def __init__(self, + lr_strategy=LrStrategy.by_epoch, + warmup=None, + **kwargs) -> None: super().__init__() - self.by_epoch = by_epoch + if 'by_epoch' in kwargs: + self.lr_strategy = LrStrategy.by_epoch if kwargs[ + 'by_epoch'] else LrStrategy.by_step + else: + self.lr_strategy = lr_strategy self.warmup = warmup self.warmup_lr_scheduler = None + self.processor = LrSchedulerProcessor() + + def set_processor(self, processor): + self.processor = processor def before_run(self, trainer): - self.initialize_lr_scheduler(trainer) + self.processor.set_lr_strategy(self.lr_strategy) if self.warmup is not None: assert isinstance(self.warmup, dict) and 'type' in self.warmup self.warmup_lr_scheduler = build_lr_scheduler( cfg=self.warmup, default_args={'base_scheduler': trainer.lr_scheduler}) + self.processor.set_warmup_lr_scheduler(self.warmup_lr_scheduler) - @Hook.overload_func(name='LrSchedulerHook.initialize_lr_scheduler') - def initialize_lr_scheduler(self, trainer): - """Initialize the lr scheduler. - - This is a strategic function which can be registered by other hook's function. - """ - pass - - @Hook.overload_func(name='LrSchedulerHook.step') - def step(self, trainer): - """Do lr scheduler's step. - - This is a strategic function which can be registered by other hook's function. - """ - if self.warmup_lr_scheduler is not None: - self.warmup_lr_scheduler.step() - else: - trainer.lr_scheduler.step() + self.processor.initialize_lr_scheduler(trainer) def get_current_lr(self, trainer): import torch @@ -67,17 +99,17 @@ class LrSchedulerHook(Hook): return lr def after_train_iter(self, trainer): - if not self.by_epoch and trainer.iter >= getattr( + if self.lr_strategy == LrStrategy.by_step and trainer.iter >= getattr( trainer, 'cumulative_iters', 1) - 1: - self.step(trainer) + self.processor.step(trainer) trainer.log_buffer.output[LogKeys.LR] = self._get_log_lr(trainer) def before_train_epoch(self, trainer): trainer.log_buffer.output[LogKeys.LR] = self._get_log_lr(trainer) def after_train_epoch(self, trainer): - if self.by_epoch: - self.step(trainer) + if self.lr_strategy == LrStrategy.by_epoch: + self.processor.step(trainer) def _get_log_lr(self, trainer): cur_lr = self.get_current_lr(trainer) @@ -94,6 +126,29 @@ class LrSchedulerHook(Hook): return lr +class PlateauLrSchedulerProcessor(LrSchedulerProcessor): + + def __init__(self, metric_key): + super().__init__() + self.metric_key = metric_key + + def step(self, trainer): + # adapt to evaluation interval is greater than 1 + if trainer.metric_values is None: + if is_master(): + print( + f'Current epoch {trainer.epoch} has no evaluation metric values, skip lr_scheduler.step() !' + ) + return + + metrics = trainer.metric_values[self.metric_key] + if self.lr_strategy == LrStrategy.by_epoch: + if self.warmup_lr_scheduler is not None: + self.warmup_lr_scheduler.step(metrics=metrics) + else: + trainer.lr_scheduler.step(metrics=metrics) + + @HOOKS.register_module(module_name=Hooks.PlateauLrSchedulerHook) class PlateauLrSchedulerHook(Hook): """Lr scheduler hook for `ReduceLROnPlateau`. @@ -105,10 +160,16 @@ class PlateauLrSchedulerHook(Hook): PRIORITY = Priority.LOW # should be after EvaluationHook def __init__(self, metric_key, **kwargs): + super().__init__() self.metric_key = metric_key - def register_strategy(self): - Hook.overload(name='LrSchedulerHook.step', function=self.step) + def register_processor(self, trainer): + lr_scheduler_hook = trainer.get_hook(LrSchedulerHook) + if len(lr_scheduler_hook) > 0 and type( + lr_scheduler_hook[0].processor) in (type(None), + LrSchedulerProcessor): + lr_scheduler_hook[0].set_processor( + PlateauLrSchedulerProcessor(self.metric_key)) def before_run(self, trainer): if not hasattr(trainer, 'logger'): @@ -116,23 +177,6 @@ class PlateauLrSchedulerHook(Hook): else: self.logger = trainer.logger - def step(self, trainer): - # adapt to evaluation intervel is greater than 1 - if trainer.metric_values is None: - if is_master(): - self.logger.warning( - f'Current epoch {trainer.epoch} has no evaluation metric values, skip lr_scheduler.step() !' - ) - return - - metrics = trainer.metric_values[self.metric_key] - lr_scheduler_hook = trainer.get_hook(LrSchedulerHook)[0] - if lr_scheduler_hook.by_epoch: - if lr_scheduler_hook.warmup_lr_scheduler is not None: - lr_scheduler_hook.warmup_lr_scheduler.step(metrics=metrics) - else: - trainer.lr_scheduler.step(metrics=metrics) - @HOOKS.register_module(module_name=Hooks.NoneLrSchedulerHook) class NoneLrSchedulerHook(LrSchedulerHook): diff --git a/modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py b/modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py index bd1034f3..3c874ccf 100644 --- a/modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py +++ b/modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py @@ -7,40 +7,14 @@ from packaging import version from modelscope.metainfo import Hooks from modelscope.trainers.hooks import Hook from modelscope.trainers.hooks.builder import HOOKS -from .base import OptimizerHook +from .base import OptimizerHook, OptimizerProcessor -@HOOKS.register_module(module_name=Hooks.ApexAMPOptimizerHook) -class ApexAMPOptimizerHook(Hook): - """ - Fp16 optimizer, if torch version is less than 1.6.0, - you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default +class ApexOptimizerProcessor(OptimizerProcessor): - Args: - opt_level (str): "O0" and "O3" are not true mixed precision, - but they are useful for establishing accuracy and speed baselines, respectively. - "O1" and "O2" are different implementations of mixed precision. - Try both, and see what gives the best speedup and accuracy for your model. - """ - - PRIORITY = OptimizerHook.PRIORITY - - def __init__(self, opt_level='O1', **kwargs): + def __init__(self, opt_level): self.opt_level = opt_level - try: - from apex import amp - except ImportError: - raise ValueError( - 'apex not installed, please install apex from https://www.github.com/nvidia/apex.' - ) - - def register_strategy(self): - Hook.overload( - name='OptimizerHook.initialize_optimizer', - function=self.initialize_optimizer) - Hook.overload(name='OptimizerHook.backward', function=self.backward) - def initialize_optimizer(self, trainer): from apex import amp @@ -68,10 +42,44 @@ class ApexAMPOptimizerHook(Hook): trainer.optimizer) as scaled_loss: scaled_loss.backward() - if self.every_n_iters(trainer, cumulative_iters): + if Hook.every_n_iters(trainer, cumulative_iters): if grad_clip is not None: - OptimizerHook.clip_grads(trainer.model.parameters(), - **grad_clip) + OptimizerProcessor.clip_grads(trainer.model.parameters(), + **grad_clip) trainer.optimizer.step() trainer.optimizer.zero_grad() + + +@HOOKS.register_module(module_name=Hooks.ApexAMPOptimizerHook) +class ApexAMPOptimizerHook(Hook): + """ + Fp16 optimizer, if torch version is less than 1.6.0, + you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default + + Args: + opt_level (str): "O0" and "O3" are not true mixed precision, + but they are useful for establishing accuracy and speed baselines, respectively. + "O1" and "O2" are different implementations of mixed precision. + Try both, and see what gives the best speedup and accuracy for your model. + """ + + PRIORITY = OptimizerHook.PRIORITY + + def __init__(self, opt_level='O1', **kwargs): + self.opt_level = opt_level + + try: + from apex import amp + except ImportError: + raise ValueError( + 'apex not installed, please install apex from https://www.github.com/nvidia/apex.' + ) + + def register_processor(self, trainer): + optimizer_hook = trainer.get_hook(OptimizerHook) + if len(optimizer_hook) > 0 and type( + optimizer_hook[0].processor) in (type(None), + OptimizerProcessor): + optimizer_hook[0].set_processor( + ApexOptimizerProcessor(self.opt_level)) diff --git a/modelscope/trainers/hooks/optimizer/base.py b/modelscope/trainers/hooks/optimizer/base.py index f0d62612..ca20720d 100644 --- a/modelscope/trainers/hooks/optimizer/base.py +++ b/modelscope/trainers/hooks/optimizer/base.py @@ -10,6 +10,48 @@ from modelscope.trainers.hooks.hook import Hook from modelscope.trainers.hooks.priority import Priority +class OptimizerProcessor: + + def initialize_optimizer(self, trainer): + """Initialize the optimizer. + + This is a strategic function which can be registered by other hook's function. + """ + trainer.optimizer.zero_grad() + + def before_forward(self, trainer): + pass + + def backward(self, trainer, loss_keys, cumulative_iters, grad_clip): + """Do module backward, optimizer's step and zero_grad and clip the grads. + + This is a strategic function which can be registered by other hook's function. + + Args: + trainer(`EpochBasedTrainer`): The trainer instance. + loss_keys(`list`): The list of loss keys. + cumulative_iters(`int`): The cumulative iters for gradients. + grad_clip(`dict`): The grad clipping options. + """ + for k in loss_keys: + trainer.train_outputs[k] /= cumulative_iters + trainer.train_outputs[k].backward() + + if Hook.every_n_iters(trainer, cumulative_iters): + if grad_clip is not None: + self.clip_grads(trainer.model.parameters(), **grad_clip) + + trainer.optimizer.step() + trainer.optimizer.zero_grad() + + @staticmethod + def clip_grads(params, **clip_args): + params = list( + filter(lambda p: p.requires_grad and p.grad is not None, params)) + if len(params) > 0: + return clip_grad.clip_grad_norm_(params, **clip_args) + + @HOOKS.register_module(module_name=Hooks.OptimizerHook) class OptimizerHook(Hook): """Optimizer hook @@ -36,52 +78,21 @@ class OptimizerHook(Hook): self.loss_keys = loss_keys self.cumulative_iters = cumulative_iters self.grad_clip = grad_clip + self.processor = OptimizerProcessor() - @staticmethod - def clip_grads(params, **clip_args): - params = list( - filter(lambda p: p.requires_grad and p.grad is not None, params)) - if len(params) > 0: - return clip_grad.clip_grad_norm_(params, **clip_args) - - @Hook.overload_func(name='OptimizerHook.initialize_optimizer') - def initialize_optimizer(self, trainer): - """Initialize the optimizer. - - This is a strategic function which can be registered by other hook's function. - """ - trainer.optimizer.zero_grad() + def set_processor(self, processor): + self.processor = processor def before_run(self, trainer): - self.initialize_optimizer(trainer) trainer.cumulative_iters = self.cumulative_iters + self.processor.initialize_optimizer(trainer) - @Hook.overload_func(name='OptimizerHook.backward') - def backward(self, trainer, loss_keys, cumulative_iters, grad_clip): - """Do module backward, optimizer's step and zero_grad and clip the grads. - - This is a strategic function which can be registered by other hook's function. - - Args: - trainer(`EpochBasedTrainer`): The trainer instance. - loss_keys(`list`): The list of loss keys. - cumulative_iters(`int`): The cumulative iters for gradients. - grad_clip(`dict`): The grad clipping options. - """ - for k in loss_keys: - trainer.train_outputs[k] /= cumulative_iters - trainer.train_outputs[k].backward() - - if self.every_n_iters(trainer, cumulative_iters): - if grad_clip is not None: - self.clip_grads(trainer.model.parameters(), **grad_clip) - - trainer.optimizer.step() - trainer.optimizer.zero_grad() + def before_train_iter(self, trainer): + self.processor.before_forward(trainer) def after_train_iter(self, trainer): - self.backward(trainer, self.loss_keys, self.cumulative_iters, - self.grad_clip) + self.processor.backward(trainer, self.loss_keys, self.cumulative_iters, + self.grad_clip) @HOOKS.register_module(module_name=Hooks.NoneOptimizerHook) diff --git a/modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py b/modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py index 1ab89720..fc7d2672 100644 --- a/modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py +++ b/modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py @@ -4,7 +4,45 @@ import logging from modelscope.metainfo import Hooks from modelscope.trainers.hooks import Hook from modelscope.trainers.hooks.builder import HOOKS -from .base import OptimizerHook +from .base import OptimizerHook, OptimizerProcessor + + +class TorchAMPOptimizerProcessor(OptimizerProcessor): + + def __init__(self, scaler, scale_update_param): + self.scaler = scaler + self.scale_update_param = scale_update_param + + def before_forward(self, trainer): + from torch.cuda import amp + setattr(self._model, 'forward', amp.autocast()(self._model.forward)) + + def initialize_optimizer(self, trainer): + logging.info('open fp16') + trainer.optimizer.zero_grad() + + model = trainer.unwrap_module(trainer.model) + self._ori_model_forward = model.forward + self._model = model + + def backward(self, trainer, loss_keys, cumulative_iters, grad_clip): + for k in loss_keys: + trainer.train_outputs[k] /= cumulative_iters + + for k in loss_keys: + self.scaler.scale(trainer.train_outputs[k]).backward() + + if Hook.every_n_iters(trainer, cumulative_iters): + self.scaler.unscale_(trainer.optimizer) + if grad_clip is not None: + OptimizerProcessor.clip_grads(trainer.model.parameters(), + **grad_clip) + + self.scaler.step(trainer.optimizer) + self.scaler.update(self.scale_update_param) + trainer.optimizer.zero_grad() + + setattr(self._model, 'forward', self._ori_model_forward) @HOOKS.register_module(module_name=Hooks.TorchAMPOptimizerHook) @@ -44,39 +82,11 @@ class TorchAMPOptimizerHook(Hook): '`loss_scale` type must be in [float, dict], but got {loss_scale}' ) - def register_strategy(self): - Hook.overload( - name='OptimizerHook.initialize_optimizer', - function=self.initialize_optimizer) - Hook.overload(name='OptimizerHook.backward', function=self.backward) - - def initialize_optimizer(self, trainer): - logging.info('open fp16') - trainer.optimizer.zero_grad() - - model = trainer.unwrap_module(trainer.model) - self._ori_model_forward = model.forward - self._model = model - - def before_train_iter(self, trainer): - from torch.cuda import amp - setattr(self._model, 'forward', amp.autocast()(self._model.forward)) - - def backward(self, trainer, loss_keys, cumulative_iters, grad_clip): - for k in loss_keys: - trainer.train_outputs[k] /= cumulative_iters - - for k in loss_keys: - self.scaler.scale(trainer.train_outputs[k]).backward() - - if self.every_n_iters(trainer, cumulative_iters): - self.scaler.unscale_(trainer.optimizer) - if grad_clip is not None: - OptimizerHook.clip_grads(trainer.model.parameters(), - **grad_clip) - - self.scaler.step(trainer.optimizer) - self.scaler.update(self._scale_update_param) - trainer.optimizer.zero_grad() - - setattr(self._model, 'forward', self._ori_model_forward) + def register_processor(self, trainer): + optimizer_hook = trainer.get_hook(OptimizerHook) + if len(optimizer_hook) > 0 and type( + optimizer_hook[0].processor) in (type(None), + OptimizerProcessor): + optimizer_hook[0].set_processor( + TorchAMPOptimizerProcessor(self.scaler, + self._scale_update_param)) diff --git a/modelscope/trainers/multi_modal/clip/clip_trainer.py b/modelscope/trainers/multi_modal/clip/clip_trainer.py index b0415bc2..ae00232f 100644 --- a/modelscope/trainers/multi_modal/clip/clip_trainer.py +++ b/modelscope/trainers/multi_modal/clip/clip_trainer.py @@ -176,11 +176,10 @@ class CLIPTrainer(EpochBasedTrainer): self.dataset_cfg = cfg.dataset if hasattr(self.dataset_cfg, 'column_map'): # cases where dataset key names are not "img" and "text" - img_key_name = getattr(self.dataset_cfg.column_map, 'img', 'img') + img_key_name = self.dataset_cfg['column_map'].get('img', 'img') preprocessor[ConfigKeys.train].set_input_img_key(img_key_name) preprocessor[ConfigKeys.val].set_input_img_key(img_key_name) - text_key_name = getattr(self.dataset_cfg.column_map, 'text', - 'text') + text_key_name = self.dataset_cfg['column_map'].get('text', 'text') preprocessor[ConfigKeys.train].set_input_text_key(text_key_name) preprocessor[ConfigKeys.val].set_input_text_key(text_key_name) self.global_batch_size = cfg.train.dataloader.batch_size_per_gpu * world_size diff --git a/modelscope/trainers/nlp/__init__.py b/modelscope/trainers/nlp/__init__.py index 755e5387..ae102efa 100644 --- a/modelscope/trainers/nlp/__init__.py +++ b/modelscope/trainers/nlp/__init__.py @@ -10,6 +10,7 @@ if TYPE_CHECKING: from .text_generation_trainer import TextGenerationTrainer from .sentence_embedding_trainer import SentenceEmbeddingTrainer from .siamese_uie_trainer import SiameseUIETrainer + from .translation_evaluation_trainer import TranslationEvaluationTrainer else: _import_structure = { 'sequence_classification_trainer': ['SequenceClassificationTrainer'], @@ -17,7 +18,8 @@ else: 'text_ranking_trainer': ['TextRankingTrainer'], 'text_generation_trainer': ['TextGenerationTrainer'], 'sentence_emebedding_trainer': ['SentenceEmbeddingTrainer'], - 'siamese_uie_trainer': ['SiameseUIETrainer'] + 'siamese_uie_trainer': ['SiameseUIETrainer'], + 'translation_evaluation_trainer': ['TranslationEvaluationTrainer'] } import sys diff --git a/modelscope/trainers/nlp/translation_evaluation_trainer.py b/modelscope/trainers/nlp/translation_evaluation_trainer.py new file mode 100644 index 00000000..05e9db89 --- /dev/null +++ b/modelscope/trainers/nlp/translation_evaluation_trainer.py @@ -0,0 +1,396 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +"""PyTorch trainer for UniTE model.""" + +import os.path as osp +import random +from math import ceil +from os import mkdir +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +import torch +from pandas import DataFrame +from torch.nn.functional import pad +from torch.nn.utils import clip_grad_norm_ +from torch.optim import AdamW, Optimizer +from torch.utils.data import (BatchSampler, DataLoader, Dataset, Sampler, + SequentialSampler, SubsetRandomSampler) +from torch.utils.tensorboard import SummaryWriter +from tqdm import tqdm +from transformers import AutoTokenizer + +from modelscope.metainfo import Metrics, Trainers +from modelscope.metrics import Metric +from modelscope.metrics.builder import MetricKeys, build_metric +from modelscope.models.base import TorchModel +from modelscope.models.nlp.unite.configuration import InputFormat +from modelscope.models.nlp.unite.translation_evaluation import ( + UniTEForTranslationEvaluation, combine_input_sentences) +from modelscope.msdatasets import MsDataset +from modelscope.preprocessors import Preprocessor +from modelscope.trainers.builder import TRAINERS +from modelscope.trainers.hooks import Hook +from modelscope.trainers.trainer import EpochBasedTrainer +from modelscope.utils.config import ConfigDict +from modelscope.utils.constant import (ConfigKeys, Fields, ModeKeys, ModelFile, + TrainerStages) +from modelscope.utils.device import create_device +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +class TranslationEvaluationTrainingSampler(Sampler): + + def __init__(self, num_of_samples: int, + batch_size_for_each_input_format: int): + r"""Build a sampler for model training with translation evaluation trainer. + The trainer should derive samples for each subset of the entire dataset. + + Args: + num_of_samples: The number of samples in total. + batch_size_for_each_input_format: During training, the batch size for each input format + + Returns: + A data sampler for translation evaluation model training. + + """ + + self.num_of_samples = num_of_samples + self.batch_size_for_each_input_format = batch_size_for_each_input_format + + self.num_of_samples_for_each_input_format = self.num_of_samples // 3 + num_of_samples_to_use = self.num_of_samples_for_each_input_format * 3 + + logger.info( + '%d samples are given for training. ' + 'Using %d samples for each input format. ' + 'Leaving the last %d samples unused.' % + (self.num_of_samples, self.num_of_samples_for_each_input_format, + self.num_of_samples - num_of_samples_to_use)) + self.num_of_samples = num_of_samples_to_use + + random_permutations = torch.randperm( + self.num_of_samples).cpu().tolist() + + self.subset_iterators = dict() + self.subset_samplers = dict() + self.indices_for_each_input_format = dict() + for input_format_index, input_format in \ + enumerate((InputFormat.SRC_REF, InputFormat.SRC, InputFormat.REF)): + start_idx = input_format_index * self.num_of_samples_for_each_input_format + end_idx = start_idx + self.num_of_samples_for_each_input_format + self.indices_for_each_input_format[ + input_format] = random_permutations[start_idx:end_idx] + self.subset_samplers[input_format] = \ + BatchSampler(SubsetRandomSampler(self.indices_for_each_input_format[input_format]), + batch_size=self.batch_size_for_each_input_format, + drop_last=True) + self.subset_iterators[input_format] = iter( + self.subset_samplers[input_format]) + + self.num_of_sampled_batches = 0 + + if self.__len__() == 0: + raise ValueError( + 'The dataset doesn\'t contain enough examples to form a single batch.', + 'Please reduce the batch_size or use more examples for training.' + ) + + return + + def __iter__(self): + while True: + try: + if self.num_of_sampled_batches == self.__len__(): + for input_format in (InputFormat.SRC_REF, InputFormat.SRC, + InputFormat.REF): + while True: + try: + next(self.subset_iterators[input_format]) + except StopIteration: + self.subset_iterators[input_format] = \ + iter(self.subset_samplers[input_format]) + break + + self.num_of_sampled_batches = 0 + + output = list() + for input_format_idx, input_format in \ + enumerate((InputFormat.SRC_REF, InputFormat.SRC, InputFormat.REF)): + output += next(self.subset_iterators[input_format]) + + self.num_of_sampled_batches += 1 + + yield output + except StopIteration: + break + + def __len__(self) -> int: + return self.num_of_samples_for_each_input_format // self.batch_size_for_each_input_format + + +def convert_csv_dict_to_input( + batch: List[Dict[str, Any]], + preprocessor: Preprocessor) -> Tuple[List[torch.Tensor]]: + + input_dict = dict() + + for key in batch[0].keys(): + input_dict[key] = list(x[key] for x in batch) + + input_dict = preprocessor(input_dict) + + return input_dict + + +def data_collate_fn(batch: List[Dict[str, Any]], batch_size: int, + preprocessor: Preprocessor) -> List[Dict[str, Any]]: + + output_dict = dict() + output_dict['input_format'] = list() + + if preprocessor.mode == ModeKeys.TRAIN: + for input_format_index, input_format in \ + enumerate((InputFormat.SRC_REF, InputFormat.SRC, InputFormat.REF)): + start_idx = input_format_index * batch_size + end_idx = start_idx + batch_size + batch_to_process = batch[start_idx:end_idx] + output_dict['input_format'] += [input_format] * batch_size + preprocessor.change_input_format(input_format) + batch_to_process = convert_csv_dict_to_input( + batch_to_process, preprocessor) + + for key, value in batch_to_process.items(): + if key not in output_dict.keys(): + output_dict[key] = list() + output_dict[key].append(value) + elif preprocessor.mode == ModeKeys.EVAL: + output_dict['input_format'] += [preprocessor.input_format] * len(batch) + batch = convert_csv_dict_to_input(batch, preprocessor) + + for key, value in batch.items(): + if key not in output_dict.keys(): + output_dict[key] = list() + output_dict[key].append(value) + else: + raise ValueError( + 'During training, %s mode is not allowed for preprocessor.' + % preprocessor.mode) + + input_max_lengths = max(x.size(-1) for x in output_dict['input_ids']) + output_dict['input_ids'] = list( + pad(x, + pad=(0, input_max_lengths - x.size(-1)), + value=preprocessor.pad_token_id) for x in output_dict['input_ids']) + + output_dict['input_ids'] = torch.cat(output_dict['input_ids'], dim=0) + output_dict['score'] = torch.Tensor(output_dict['score']).view(-1) + + if preprocessor.mode == ModeKeys.EVAL: + output_dict['lp'] = sum(output_dict['lp'], list()) + output_dict['raw_score'] = sum(output_dict['raw_score'], list()) + output_dict['segment_id'] = sum(output_dict['segment_id'], list()) + + return output_dict + + +@TRAINERS.register_module(module_name=Trainers.translation_evaluation_trainer) +class TranslationEvaluationTrainer(EpochBasedTrainer): + + def __init__(self, + model: Optional[Union[TorchModel, torch.nn.Module, + str]] = None, + cfg_file: Optional[str] = None, + device: str = 'gpu', + *args, + **kwargs): + r"""Build a translation evaluation trainer with a model dir or a model id in the model hub. + + Args: + model: A Model instance. + cfg_file: The path for the configuration file (configuration.json). + device: Used device for this trainer. + + """ + + def data_collator_for_train(x): + return data_collate_fn( + x, + batch_size=self.cfg.train.batch_size, + preprocessor=self.train_preprocessor) + + def data_collator_for_eval(x): + return data_collate_fn( + x, + batch_size=self.cfg.evaluation.batch_size, + preprocessor=self.eval_preprocessor) + + data_collator = { + ConfigKeys.train: data_collator_for_train, + ConfigKeys.val: data_collator_for_eval + } + + super().__init__( + model, + cfg_file=cfg_file, + data_collator=data_collator, + *args, + **kwargs) + + self.train_dataloader = None + self.eval_dataloader = None + + return + + def build_optimizer(self, cfg: ConfigDict) -> Optimizer: + r"""Sets the optimizers to be used during training.""" + if self.cfg.train.optimizer.type != 'AdamW': + return super().build_optimizer(cfg) + + # Freezing embedding layers for more efficient training. + for param in self.model.encoder.embeddings.parameters(): + param.requires_grad = False + + logger.info('Building AdamW optimizer ...') + learning_rates_and_parameters = list({ + 'params': + self.model.encoder.encoder.layer[i].parameters(), + 'lr': + self.cfg.train.optimizer.plm_lr + * self.cfg.train.optimizer.plm_lr_layerwise_decay**i, + } for i in range(0, self.cfg.model.num_hidden_layers)) + + learning_rates_and_parameters.append({ + 'params': + self.model.encoder.embeddings.parameters(), + 'lr': + self.cfg.train.optimizer.plm_lr, + }) + + learning_rates_and_parameters.append({ + 'params': + self.model.estimator.parameters(), + 'lr': + self.cfg.train.optimizer.mlp_lr + }) + + learning_rates_and_parameters.append({ + 'params': + self.model.layerwise_attention.parameters(), + 'lr': + self.cfg.train.optimizer.mlp_lr, + }) + + optimizer = AdamW( + learning_rates_and_parameters, + lr=self.cfg.train.optimizer.plm_lr, + betas=self.cfg.train.optimizer.betas, + eps=self.cfg.train.optimizer.eps, + weight_decay=self.cfg.train.optimizer.weight_decay, + ) + + return optimizer + + def get_train_dataloader(self) -> DataLoader: + logger.info('Building dataloader for training ...') + + if self.train_dataset is None: + logger.info('Reading train csv file from %s ...' + % self.cfg.dataset.train.name) + self.train_dataset = MsDataset.load( + osp.join(self.model_dir, self.cfg.dataset.train.name), + split=self.cfg.dataset.train.split) + + train_dataloader = DataLoader( + self.train_dataset, + batch_sampler=TranslationEvaluationTrainingSampler( + len(self.train_dataset), + batch_size_for_each_input_format=self.cfg.train.batch_size), + num_workers=4, + collate_fn=self.train_data_collator, + generator=None) + + logger.info('Reading done, %d items in total' + % len(self.train_dataset)) + + return train_dataloader + + def get_eval_data_loader(self) -> DataLoader: + logger.info('Building dataloader for evaluating ...') + + if self.eval_dataset is None: + logger.info('Reading eval csv file from %s ...' + % self.cfg.dataset.valid.name) + + self.eval_dataset = MsDataset.load( + osp.join(self.model_dir, self.cfg.dataset.valid.name), + split=self.cfg.dataset.valid.split) + + eval_dataloader = DataLoader( + self.eval_dataset, + batch_sampler=BatchSampler( + SequentialSampler(range(0, len(self.eval_dataset))), + batch_size=self.cfg.evaluation.batch_size, + drop_last=False), + num_workers=4, + collate_fn=self.eval_data_collator, + generator=None) + + logger.info('Reading done, %d items in total' % len(self.eval_dataset)) + + return eval_dataloader + + def evaluation_loop(self, data_loader, metric_classes): + """ Evaluation loop used by `TranslationEvaluationTrainer.evaluate()`. + + The evaluation process of UniTE model should be arranged with three loops, + corresponding to the input formats of `InputFormat.SRC_REF`, `InputFormat.REF`, + and `InputFormat.SRC`. + + Here we directly copy the codes of `EpochBasedTrainer.evaluation_loop`, and change + the input format during each evaluation subloop. + """ + vis_closure = None + if hasattr(self.cfg.evaluation, 'visualization'): + vis_cfg = self.cfg.evaluation.visualization + vis_closure = partial( + self.visualization, dataset=self.eval_dataset, **vis_cfg) + + self.invoke_hook(TrainerStages.before_val) + metric_values = dict() + + for input_format in (InputFormat.SRC_REF, InputFormat.SRC, + InputFormat.REF): + self.eval_preprocessor.change_input_format(input_format) + + if self._dist: + from modelscope.trainers.utils.inference import multi_gpu_test + # list of batched result and data samples + metric_values.update( + multi_gpu_test( + self, + data_loader, + device=self.device, + metric_classes=metric_classes, + vis_closure=vis_closure, + tmpdir=self.cfg.evaluation.get('cache_dir', None), + gpu_collect=self.cfg.evaluation.get( + 'gpu_collect', False), + data_loader_iters_per_gpu=self._eval_iters_per_epoch)) + else: + from modelscope.trainers.utils.inference import single_gpu_test + metric_values.update( + single_gpu_test( + self, + data_loader, + device=self.device, + metric_classes=metric_classes, + vis_closure=vis_closure, + data_loader_iters=self._eval_iters_per_epoch)) + + for m in metric_classes: + if hasattr(m, 'clear') and callable(m.clear): + m.clear() + + self.invoke_hook(TrainerStages.after_val) + return metric_values diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py index 683ff2f5..c980de04 100644 --- a/modelscope/trainers/trainer.py +++ b/modelscope/trainers/trainer.py @@ -11,7 +11,7 @@ import json import torch from torch import distributed as dist from torch import nn -from torch.utils.data import DataLoader, Dataset +from torch.utils.data import DataLoader, Dataset, Sampler from torch.utils.data.dataloader import default_collate from torch.utils.data.distributed import DistributedSampler @@ -88,7 +88,7 @@ class EpochBasedTrainer(BaseTrainer): compile_options (dict, optional): The compile options if compile=True, default None to use the default params of 'TorchModel.compile'. efficient_tuners (dict, optional): The tuners to use to train the model - + samplers: (:obj:`Sampler` or `Dict[Sampler]`, *optional*): samplers used in the train/eval DataLoader. Examples of cfg_modify_fn: >>> def cfg_modify_fn(cfg): >>> cfg.preprocessor.first_sequence= 'text1' @@ -114,6 +114,7 @@ class EpochBasedTrainer(BaseTrainer): model_revision: Optional[str] = DEFAULT_MODEL_REVISION, seed: int = 42, callbacks: Optional[List[Hook]] = None, + samplers: Optional[Union[Sampler, Dict[str, Sampler]]] = None, efficient_tuners: List[Dict] = None, **kwargs): @@ -132,6 +133,7 @@ class EpochBasedTrainer(BaseTrainer): self.train_dataloader = None self.eval_dataloader = None self.data_loader = None + self._samplers = samplers if isinstance(model, str): third_party = kwargs.get(ThirdParty.KEY) @@ -224,9 +226,6 @@ class EpochBasedTrainer(BaseTrainer): # Please check the DDPHook and MegatronHook for details. self.parallel_groups = {} - # Clear the Hook overload functions to avoid duplication. - Hook.clear_strategies() - if self.launcher is not None and not self.cfg.safe_get( 'train.hooks.DDPHook'): # A logic to fit the current code @@ -681,6 +680,7 @@ class EpochBasedTrainer(BaseTrainer): self.train_dataloader = self.get_train_dataloader() self.data_loader = self.train_dataloader self.register_optimizers_hook() + self.register_processors() self.print_hook_info() self.set_checkpoint_file_to_hook(checkpoint_path, load_all_state, kwargs.get('strict', False)) @@ -720,6 +720,7 @@ class EpochBasedTrainer(BaseTrainer): strict(`boolean`): If strict, any unmatched keys will cause an error. """ + self.register_processors() self.print_hook_info() if checkpoint_path is not None: from modelscope.trainers.hooks import LoadCheckpointHook @@ -758,6 +759,7 @@ class EpochBasedTrainer(BaseTrainer): kwargs: strict(`boolean`): If strict, any unmatched keys will cause an error. """ + self.register_processors() self.print_hook_info() if checkpoint_path is not None: from modelscope.trainers.hooks import LoadCheckpointHook @@ -897,11 +899,18 @@ class EpochBasedTrainer(BaseTrainer): """ if self.train_dataset is None: raise 'The train_dataset cannot be None.' + + sampler_cfg = {} + if self._samplers is not None: + sampler_cfg['sampler'] = self._samplers[ + ConfigKeys.train] if isinstance(self._samplers, + dict) else self._samplers data_loader = self._build_dataloader_with_dataset( self.train_dataset, dist=self._dist, seed=self._seed, collate_fn=self.train_data_collator, + **sampler_cfg, **self.cfg.train.get('dataloader', {})) return data_loader @@ -915,6 +924,11 @@ class EpochBasedTrainer(BaseTrainer): if self.eval_dataset is None: raise 'The eval_dataset cannot be None.' + sampler_cfg = {} + if self._samplers is not None: + sampler_cfg['sampler'] = self._samplers[ + ConfigKeys.val] if isinstance(self._samplers, + dict) else self._samplers default_config = {'shuffle': False} default_config.update(self.cfg.evaluation.get('dataloader', {})) data_loader = self._build_dataloader_with_dataset( @@ -922,6 +936,7 @@ class EpochBasedTrainer(BaseTrainer): dist=self._dist, seed=self._seed, collate_fn=self.eval_data_collator, + **sampler_cfg, **default_config) return data_loader @@ -938,6 +953,11 @@ class EpochBasedTrainer(BaseTrainer): mode=ModeKeys.EVAL, preprocessor=self.eval_preprocessor) + sampler_cfg = {} + if self._samplers is not None: + sampler_cfg['sampler'] = self._samplers[ + ConfigKeys.val] if isinstance(self._samplers, + dict) else self._samplers default_config = {'shuffle': False} default_config.update(self.cfg.evaluation.get('dataloader', {})) data_loader = self._build_dataloader_with_dataset( @@ -945,6 +965,7 @@ class EpochBasedTrainer(BaseTrainer): dist=self._dist, seed=self._seed, collate_fn=self.eval_data_collator, + **sampler_cfg, **default_config) return data_loader @@ -1132,13 +1153,19 @@ class EpochBasedTrainer(BaseTrainer): batch_size = batch_size_per_gpu num_workers = workers_per_gpu - if dist and not isinstance(dataset, torch.utils.data.IterableDataset): - sampler = DistributedSampler( - dataset, num_replicas=world_size, rank=rank, shuffle=shuffle) - else: - sampler = None - if not isinstance(dataset, torch.utils.data.IterableDataset): - kwargs['shuffle'] = shuffle + sampler = kwargs.pop('sampler', None) + if sampler is None: + if dist and not isinstance(dataset, + torch.utils.data.IterableDataset): + sampler = DistributedSampler( + dataset, + num_replicas=world_size, + rank=rank, + shuffle=shuffle) + else: + sampler = None + if not isinstance(dataset, torch.utils.data.IterableDataset): + kwargs['shuffle'] = shuffle batch_sampler = None @@ -1169,7 +1196,6 @@ class EpochBasedTrainer(BaseTrainer): """ Training loop used by `EpochBasedTrainer.train()` """ self.invoke_hook(TrainerStages.before_run) - kwargs = {} self.model.train() for _ in range(self._epoch, self._max_epochs): self.invoke_hook(TrainerStages.before_train_epoch) @@ -1181,7 +1207,7 @@ class EpochBasedTrainer(BaseTrainer): self.data_batch = data_batch self._inner_iter = i self.invoke_hook(TrainerStages.before_train_iter) - self.train_step(self.model, data_batch, **kwargs) + self.train_step(self.model, data_batch) self.invoke_hook(TrainerStages.after_train_iter) # Value changed after the hooks are invoked, do not move them above the invoke_hook code. del self.data_batch @@ -1320,12 +1346,17 @@ class EpochBasedTrainer(BaseTrainer): hooks = [] for cfg_i in hook_cfg: hook = build_from_cfg(cfg_i, HOOKS) - if hasattr(hook, 'register_strategy'): - hook.register_strategy() self.register_hook(hook) hooks.append(hook) return hooks + def register_processors(self): + """Register processors to hooks + """ + for hook in self.hooks: + if hasattr(hook, 'register_processor'): + hook.register_processor(self) + def get_hook(self, cls): return [h for h in self._hooks if h.__class__ == cls] @@ -1381,14 +1412,7 @@ class EpochBasedTrainer(BaseTrainer): info += '\n -------------------- ' stage_hook_infos.append(info) stage_hook_infos = '\n'.join(stage_hook_infos) - - strategy_info = '\n --- Hook strategies info --- \n' - for consumer, methods in Hook._strategies.items(): - strategy_info += f'Method: {consumer} ' \ - f'replaced by: ' \ - f'{[method.__self__.__class__.__name__ + "." + method.__name__ for method in methods]}\n' - strategy_info += '\n --- Hook strategies info end --- \n' - return stage_hook_infos + strategy_info + return stage_hook_infos def worker_init_fn(worker_id, num_workers, rank, seed): diff --git a/modelscope/trainers/training_args.py b/modelscope/trainers/training_args.py index f4e4e138..b7236163 100644 --- a/modelscope/trainers/training_args.py +++ b/modelscope/trainers/training_args.py @@ -1,108 +1,560 @@ # Copyright (c) Alibaba, Inc. and its affiliates. - import re -from argparse import Action, ArgumentDefaultsHelpFormatter, ArgumentParser +from copy import deepcopy from dataclasses import dataclass, field, fields -from functools import partial -from typing import Any, Dict, List, Tuple, Union +from typing import List, Union -from modelscope.trainers.default_config import DEFAULT_CONFIG -from modelscope.utils.config import Config, ConfigDict -from modelscope.utils.hub import read_config +import addict +import json + +from modelscope.trainers.cli_argument_parser import CliArgumentParser +from modelscope.utils.config import Config -def get_flatten_value(config: Config, metadata: Dict, exclusions=None): - cfg_node = metadata['cfg_node'] - if exclusions is None: - exclusions = [] - - values = config.safe_get(cfg_node) - if isinstance(values, dict): - param_map = [] - for key, value in values.items(): - if key in exclusions or not isinstance(value, - (str, int, float, bool)): - continue - value = add_quotes_for_str(value) - param_map.append(f'{key}={value}') - return ','.join(param_map) - else: - return values - - -def set_flatten_value(config: Config, values: Union[str, List[str]], - metadata: Dict): - cfg_node = metadata['cfg_node'] - if values is None: - return config - +def set_flatten_value(values: Union[str, List[str]]): pairs = values.split(',') if isinstance(values, str) else values - for kv in pairs: + _params = {} + for kv in pairs or []: if len(kv.strip()) == 0: continue key, value = kv.split('=') - value = parse_value(value) - config.merge_from_dict({cfg_node + '.' + key: value}) - return config + _params[key] = parse_value(value) + return _params -def get_base_hook_args(config: Config, metadata: Dict): - cfg_node = metadata['cfg_node'] - hook_type = metadata['hook_type'] - key = metadata['key'] - value = config.safe_get(cfg_node) - if value is None: - return get_hook_param(config, hook_type, key) - else: - return True if key == 'type' else value +@dataclass +class DatasetArgs: + + train_dataset_name: str = field( + default=None, + metadata={ + 'help': + 'The dataset name used for training, can be an id in the datahub or a local dir', + }) + + val_dataset_name: str = field( + default=None, + metadata={ + 'help': + 'The subset name used for evaluating, can be an id in the datahub or a local dir', + }) + + train_subset_name: str = field( + default=None, + metadata={ + 'help': 'The subset name used for training, can be None', + }) + + val_subset_name: str = field( + default=None, + metadata={ + 'help': 'The subset name used for evaluating, can be None', + }) + + train_split: str = field( + default=None, metadata={ + 'help': 'The split of train dataset', + }) + + val_split: str = field( + default=None, metadata={ + 'help': 'The split of val dataset', + }) + + train_dataset_namespace: str = field( + default=None, + metadata={ + 'help': 'The dataset namespace used for training', + }) + + val_dataset_namespace: str = field( + default=None, + metadata={ + 'help': 'The dataset namespace used for evaluating', + }) + + dataset_json_file: str = field( + default=None, + metadata={ + 'help': + 'The json file to parse all datasets from, used in a complex dataset scenario,' + 'the json format should be like:' + ''' + [ + { + "dataset": { + # All args used in the MsDataset.load function + "dataset_name": "xxx", + ... + }, + # All columns used, mapping the column names in each dataset in same names. + "column_mapping": { + "text1": "sequence1", + "text2": "sequence2", + "label": "label", + }, + # float or str, float means to split the dataset into train/val, + # or just str(train/val) + "split": 0.8, + } + ] + ''', + }) -def set_base_hook_args(config: Config, value: Any, metadata: Dict): - cfg_node = metadata['cfg_node'] - hook_type = metadata['hook_type'] - key = metadata['key'] - if 'hooks' in config.train: - config.train.hooks = [ - hook for hook in config.train.hooks if hook['type'] != hook_type +@dataclass +class ModelArgs: + task: str = field( + default=None, + metadata={ + 'help': 'The task code to be used', + 'cfg_node': 'task' + }) + + model: str = field( + default=None, metadata={ + 'help': 'A model id or model dir', + }) + + model_type: str = field( + default=None, + metadata={ + 'help': + 'The mode type, if load_model_config is False, user need to fill this field', + 'cfg_node': 'model.type' + }) + + +@dataclass +class TrainArgs: + + seed: int = field( + default=42, metadata={ + 'help': 'The random seed', + }) + + per_device_train_batch_size: int = field( + default=16, + metadata={ + 'cfg_node': 'train.dataloader.batch_size_per_gpu', + 'help': + 'The `batch_size_per_gpu` argument for the train dataloader', + }) + + train_data_worker: int = field( + default=0, + metadata={ + 'cfg_node': 'train.dataloader.workers_per_gpu', + 'help': 'The `workers_per_gpu` argument for the train dataloader', + }) + + train_shuffle: bool = field( + default=False, + metadata={ + 'cfg_node': 'train.dataloader.shuffle', + 'help': 'The `shuffle` argument for the train dataloader', + }) + + train_drop_last: bool = field( + default=False, + metadata={ + 'cfg_node': 'train.dataloader.drop_last', + 'help': 'The `drop_last` argument for the train dataloader', + }) + + per_device_eval_batch_size: int = field( + default=16, + metadata={ + 'cfg_node': 'evaluation.dataloader.batch_size_per_gpu', + 'help': + 'The `batch_size_per_gpu` argument for the eval dataloader', + }) + + eval_data_worker: int = field( + default=0, + metadata={ + 'cfg_node': 'evaluation.dataloader.workers_per_gpu', + 'help': 'The `workers_per_gpu` argument for the eval dataloader', + }) + + eval_shuffle: bool = field( + default=False, + metadata={ + 'cfg_node': 'evaluation.dataloader.shuffle', + 'help': 'The `shuffle` argument for the eval dataloader', + }) + + eval_drop_last: bool = field( + default=False, + metadata={ + 'cfg_node': 'evaluation.dataloader.drop_last', + 'help': 'The `drop_last` argument for the eval dataloader', + }) + + max_epochs: int = field( + default=5, + metadata={ + 'cfg_node': 'train.max_epochs', + 'help': 'The training epochs', + }) + + work_dir: str = field( + default='./train_target', + metadata={ + 'cfg_node': 'train.work_dir', + 'help': 'The directory to save models and logs', + }) + + lr: float = field( + default=5e-5, + metadata={ + 'cfg_node': 'train.optimizer.lr', + 'help': 'The learning rate of the optimizer', + }) + + lr_scheduler: str = field( + default='LinearLR', + metadata={ + 'cfg_node': 'train.lr_scheduler.type', + 'help': 'The lr_scheduler type in torch', + }) + + optimizer: str = field( + default='AdamW', + metadata={ + 'cfg_node': 'train.optimizer.type', + 'help': 'The optimizer type in PyTorch, like `AdamW`', + }) + + optimizer_params: str = field( + default=None, + metadata={ + 'cfg_node': 'train.optimizer', + 'help': 'The optimizer params', + 'cfg_setter': set_flatten_value, + }) + + lr_scheduler_params: str = field( + default=None, + metadata={ + 'cfg_node': 'train.lr_scheduler', + 'help': 'The lr scheduler params', + 'cfg_setter': set_flatten_value, + }) + + lr_strategy: str = field( + default='by_epoch', + metadata={ + 'cfg_node': 'train.lr_scheduler.options.lr_strategy', + 'help': 'The lr decay strategy', + 'choices': ['by_epoch', 'by_step', 'no'], + }) + + local_rank: int = field( + default=0, metadata={ + 'help': 'The local rank', + }) + + logging_interval: int = field( + default=5, + metadata={ + 'help': 'The interval of iter of logging information', + 'cfg_node': 'train.logging.interval', + }) + + eval_strategy: str = field( + default='by_epoch', + metadata={ + 'help': 'Eval strategy, can be `by_epoch` or `by_step` or `no`', + 'cfg_node': 'evaluation.period.eval_strategy', + 'choices': ['by_epoch', 'by_step', 'no'], + }) + + eval_interval: int = field( + default=1, + metadata={ + 'help': 'Eval interval', + 'cfg_node': 'evaluation.period.interval', + }) + + eval_metrics: str = field( + default=None, + metadata={ + 'help': 'The metric name for evaluation', + 'cfg_node': 'evaluation.metrics' + }) + + save_strategy: str = field( + default='by_epoch', + metadata={ + 'help': + 'Checkpointing strategy, can be `by_epoch` or `by_step` or `no`', + 'cfg_node': 'train.checkpoint.period.save_strategy', + 'choices': ['by_epoch', 'by_step', 'no'], + }) + + save_interval: int = field( + default=1, + metadata={ + 'help': + 'The interval of epoch or iter of saving checkpoint period', + 'cfg_node': 'train.checkpoint.period.interval', + }) + + save_best_checkpoint: bool = field( + default=False, + metadata={ + 'help': + 'Save the checkpoint(if it\'s the best) after the evaluation.', + 'cfg_node': 'train.checkpoint.best.save_best', + }) + + metric_for_best_model: str = field( + default=None, + metadata={ + 'help': 'The metric used to measure the model.', + 'cfg_node': 'train.checkpoint.best.metric_key', + }) + + metric_rule_for_best_model: str = field( + default='max', + metadata={ + 'help': + 'The rule to measure the model with the metric, can be `max` or `min`', + 'cfg_node': 'train.checkpoint.best.rule', + }) + + max_checkpoint_num: int = field( + default=None, + metadata={ + 'help': + 'The max number of checkpoints to keep, older ones will be deleted.', + 'cfg_node': 'train.checkpoint.period.max_checkpoint_num', + }) + + max_checkpoint_num_best: int = field( + default=1, + metadata={ + 'help': + 'The max number of best checkpoints to keep, worse ones will be deleted.', + 'cfg_node': 'train.checkpoint.best.max_checkpoint_num', + }) + + push_to_hub: bool = field( + default=False, + metadata={ + 'help': 'Push to hub after each checkpointing', + 'cfg_node': 'train.checkpoint.period.push_to_hub', + }) + + repo_id: str = field( + default=None, + metadata={ + 'help': + 'The repo id in modelhub, usually the format is "group/model"', + 'cfg_node': 'train.checkpoint.period.hub_repo_id', + }) + + hub_token: str = field( + default=None, + metadata={ + 'help': + 'The modelhub token, you can also set the token to the env variable `MODELSCOPE_API_TOKEN`', + 'cfg_node': 'train.checkpoint.period.hub_token', + }) + + private_hub: bool = field( + default=True, + metadata={ + 'help': 'Upload to a private hub', + 'cfg_node': 'train.checkpoint.period.private_hub', + }) + + hub_revision: str = field( + default='master', + metadata={ + 'help': 'Which branch to commit to', + 'cfg_node': 'train.checkpoint.period.hub_revision', + }) + + push_to_hub_best: bool = field( + default=False, + metadata={ + 'help': 'Push to hub after each checkpointing', + 'cfg_node': 'train.checkpoint.best.push_to_hub', + }) + + repo_id_best: str = field( + default=None, + metadata={ + 'help': + 'The repo id in modelhub, usually the format is "group/model"', + 'cfg_node': 'train.checkpoint.best.hub_repo_id', + }) + + hub_token_best: str = field( + default=None, + metadata={ + 'help': + 'The modelhub token, you can also set the token to the env variable `MODELSCOPE_API_TOKEN`', + 'cfg_node': 'train.checkpoint.best.hub_token', + }) + + private_hub_best: bool = field( + default=True, + metadata={ + 'help': 'Upload to a private hub', + 'cfg_node': 'train.checkpoint.best.private_hub', + }) + + hub_revision_best: str = field( + default='master', + metadata={ + 'help': 'Which branch to commit to', + 'cfg_node': 'train.checkpoint.best.hub_revision', + }) + + +@dataclass(init=False) +class TrainingArgs(DatasetArgs, TrainArgs, ModelArgs): + + use_model_config: bool = field( + default=False, + metadata={ + 'help': + 'Use the configuration of the model, ' + 'default will only use the parameters in the CLI and the dataclass', + }) + + def __init__(self, **kwargs): + self.manual_args = list(kwargs.keys()) + for f in fields(self): + if f.name in kwargs: + setattr(self, f.name, kwargs[f.name]) + self._unknown_args = {} + + def parse_cli(self, parser_args=None): + """Construct a TrainingArg class by the parameters of CLI. + + Returns: + Self + """ + parser = CliArgumentParser(self) + args, unknown = parser.parse_known_args(parser_args) + unknown = [ + item for item in unknown + if item not in ('\\', '\n') and '--local-rank=' not in item ] - if key == 'type': - if value and config.safe_get(cfg_node) is None: - config.merge_from_dict({cfg_node: {}}) - else: - config.merge_from_dict({cfg_node: value}) + _unknown = {} + for i in range(0, len(unknown), 2): + _unknown[unknown[i].replace('-', '')] = parse_value(unknown[i + 1]) + args_dict = vars(args) + self.manual_args += parser.manual_args + for key, value in deepcopy(args_dict).items(): + if key is not None and hasattr(self, key): + setattr(self, key, value) + return self -def get_strategy(config: Config, - metadata: Dict, - value_pair: Tuple[str] = ('by_epoch', 'by_step')): - flag = get_base_hook_args(config, metadata) - if flag is None: + def to_config(self, ignore_default_config=None): + """Convert the TrainingArgs to the `Config` + + Returns: + The Config, and extra parameters in dict. + """ + cfg = Config() + args_dict = addict.Dict() + + if ignore_default_config is None: + ignore_default_config = self.use_model_config + + for f in fields(self): + cfg_node = f.metadata.get('cfg_node') + cfg_setter = f.metadata.get('cfg_setter') or (lambda x: x) + if cfg_node is not None: + if f.name in self.manual_args or not ignore_default_config: + if isinstance(cfg_node, str): + cfg_node = [cfg_node] + for _node in cfg_node: + cfg.merge_from_dict( + {_node: cfg_setter(getattr(self, f.name))}) + else: + args_dict[f.name] = getattr(self, f.name) + + cfg.merge_from_dict(self._unknown_args) + return cfg, args_dict + + def get_metadata(self, key): + _fields = fields(self) + for f in _fields: + if f.name == key: + return f return None - return value_pair[0] if flag else value_pair[1] -def set_strategy(config: Config, - value: Any, - metadata: Dict, - value_pair: Tuple[str] = ('by_epoch', 'by_step')): - set_base_hook_args(config, value == value_pair[0], metadata) +def build_dataset_from_file(filename): + """ + The filename format: + [ + { + "dataset": { + "dataset_name": "xxx", + ... + }, + "column_mapping": { + "text1": "sequence1", + "text2": "sequence2", + "label": "label", + } + "split": 0.8, + } + ] + """ + from modelscope import MsDataset + train_set = [] + eval_set = [] + with open(filename, 'r') as f: + ds_json = json.load(f) + for ds in ds_json: + dataset = MsDataset.load(**ds['dataset']).to_hf_dataset() + all_columns = dataset.column_names + keep_columns = ds['column_mapping'].keys() + remove_columns = [ + column for column in all_columns if column not in keep_columns + ] + from datasets import Features + from datasets import Value + from datasets import ClassLabel + features = [ + f for f in dataset.features.items() if f[0] in keep_columns + ] + new_features = {} + for f in features: + if isinstance(f[1], ClassLabel): + new_features[f[0]] = Value(f[1].dtype) + else: + new_features[f[0]] = f[1] + new_features = Features(new_features) + dataset = dataset.map( + lambda x: x, + remove_columns=remove_columns, + features=new_features).rename_columns(ds['column_mapping']) + split = ds['split'] + if isinstance(split, str): + assert split in ('train', 'val') + if split == 'train': + train_set.append(dataset) + else: + eval_set.append(dataset) + else: + assert isinstance(split, float) and 0 < split < 1 + ds_dict = dataset.train_test_split(train_size=split) + train_set.append(ds_dict['train']) + eval_set.append(ds_dict['test']) -def get_hook_param(config, hook_type: str, key='type'): - hooks = config.safe_get('train.hooks', []) - _hooks = list(filter(lambda hook: hook['type'] == hook_type, hooks)) - if key == 'type': - return len(_hooks) > 0 - elif len(_hooks) > 0: - return getattr(_hooks[0], key, None) - return None - - -def add_quotes_for_str(value: Union[str, float, bool, None]) -> str: - if isinstance(value, str): - return f'"{value}"' - else: - return str(value) + from datasets import concatenate_datasets + return concatenate_datasets(train_set), concatenate_datasets(eval_set) def parse_value(value: str) -> Union[str, float, bool, None]: @@ -126,717 +578,3 @@ def parse_value(value: str) -> Union[str, float, bool, None]: return float(value) else: return value - - -@dataclass -class TrainingArgs: - model: str = field( - default=None, metadata={ - 'help': 'A model id or model dir', - }) - - seed: int = field( - default=42, metadata={ - 'help': 'The random seed', - }) - - task: str = field( - default=None, - metadata={ - 'help': 'The task code to be used', - 'cfg_node': 'task' - }) - - dataset_name: str = field( - default=None, metadata={ - 'help': 'The dataset name', - }) - - subset_name: str = field( - default=None, metadata={ - 'help': 'The subset name of the dataset', - }) - - train_dataset_name: str = field( - default=None, metadata={ - 'help': 'The train dataset name', - }) - - val_dataset_name: str = field( - default=None, metadata={ - 'help': 'The validation dataset name', - }) - - per_device_train_batch_size: int = field( - default=None, - metadata={ - 'cfg_node': 'train.dataloader.batch_size_per_gpu', - 'help': 'The training batch size per GPU', - }) - - train_data_worker: int = field( - default=0, - metadata={ - 'cfg_node': 'train.dataloader.workers_per_gpu', - 'help': 'The number of data workers for train dataloader', - }) - - train_shuffle: bool = field( - default=None, - metadata={ - 'cfg_node': 'train.dataloader.shuffle', - 'help': 'Shuffle the train dataset or not', - }) - - train_drop_last: bool = field( - default=None, - metadata={ - 'cfg_node': - 'train.dataloader.drop_last', - 'help': - 'Whether to drop out the last set of data in the train_dataset', - }) - - per_device_eval_batch_size: int = field( - default=None, - metadata={ - 'cfg_node': 'evaluation.dataloader.batch_size_per_gpu', - 'help': 'The eval batch size per GPU', - }) - - eval_data_worker: int = field( - default=0, - metadata={ - 'cfg_node': 'evaluation.dataloader.workers_per_gpu', - 'help': 'The number of data workers for eval dataloader', - }) - - eval_shuffle: bool = field( - default=None, - metadata={ - 'cfg_node': 'evaluation.dataloader.shuffle', - 'help': 'Shuffle the eval dataset or not', - }) - - eval_drop_last: bool = field( - default=None, - metadata={ - 'cfg_node': 'evaluation.dataloader.drop_last', - 'help': - 'Whether to drop out the last set of data in the eval_dataset', - }) - - max_epochs: int = field( - default=None, - metadata={ - 'cfg_node': 'train.max_epochs', - 'help': 'The training epochs', - }) - - work_dir: str = field( - default=None, - metadata={ - 'cfg_node': 'train.work_dir', - 'help': 'The training dir to save models and logs', - }) - - lr: float = field( - default=None, - metadata={ - 'cfg_node': 'train.optimizer.lr', - 'help': 'The learning rate of the optimizer', - }) - - optimizer: str = field( - default=None, - metadata={ - 'cfg_node': 'train.optimizer.type', - 'help': 'The optimizer type', - }) - - optimizer_params: str = field( - default=None, - metadata={ - 'cfg_node': - 'train.optimizer', - 'cfg_getter': - partial(get_flatten_value, exclusions=['type', 'lr', 'options']), - 'cfg_setter': - set_flatten_value, - 'help': - 'The optimizer init params except `lr`', - }) - - lr_scheduler_params: str = field( - default=None, - metadata={ - 'cfg_node': - 'train.lr_scheduler', - 'cfg_getter': - partial(get_flatten_value, exclusions=['type', 'lr', 'options']), - 'cfg_setter': - set_flatten_value, - 'help': - 'The lr_scheduler init params', - }) - - local_rank: int = field( - default=0, metadata={ - 'help': 'The training local rank', - }) - - save_ckpt: bool = field( - default=True, - metadata={ - 'help': - 'Periodically save checkpoint when True, corresponding to CheckpointHook', - 'cfg_node': 'train.checkpoint.period', - 'hook_type': 'CheckpointHook', - 'key': 'type', - 'cfg_getter': get_base_hook_args, - 'cfg_setter': set_base_hook_args, - }) - - save_ckpt_best: bool = field( - default=None, - metadata={ - 'help': - 'Save best checkpoint when True, corresponding to BestCkptSaverHook', - 'cfg_node': 'train.checkpoint.best', - 'hook_type': 'BestCkptSaverHook', - 'key': 'type', - 'cfg_getter': get_base_hook_args, - 'cfg_setter': set_base_hook_args, - }) - - evaluate: bool = field( - default=True, - metadata={ - 'help': 'Evaluate when True, corresponding to EvaluationHook', - 'cfg_node': 'evaluation.period', - 'hook_type': 'EvaluationHook', - 'key': 'type', - 'cfg_getter': get_base_hook_args, - 'cfg_setter': set_base_hook_args, - }) - - save_ckpt_strategy: str = field( - default=None, - metadata={ - 'help': 'Periodically save checkpoint by epoch or by step' - 'use with `CheckpointHook`, can be `by_epoch` or `by_step`', - 'cfg_node': 'train.checkpoint.period.by_epoch', - 'hook_type': 'CheckpointHook', - 'key': 'by_epoch', - 'choices': ['by_epoch', 'by_step'], - 'cfg_getter': get_strategy, - 'cfg_setter': set_strategy, - }) - - save_ckpt_best_strategy: str = field( - default=None, - metadata={ - 'help': 'Save best checkpoint by epoch or by step' - 'use with `BestCkptSaverHook`, can be `by_epoch` or `by_step`', - 'cfg_node': 'train.checkpoint.best.by_epoch', - 'hook_type': 'BestCkptSaverHook', - 'key': 'by_epoch', - 'choices': ['by_epoch', 'by_step'], - 'cfg_getter': get_strategy, - 'cfg_setter': set_strategy, - }) - - push_to_hub: bool = field( - default=None, - metadata={ - 'help': - 'Push to hub after one checkpoint saved by CheckpointHook in the local disk', - 'cfg_node': 'train.checkpoint.period.push_to_hub', - 'hook_type': 'CheckpointHook', - 'key': 'push_to_hub', - 'cfg_getter': get_base_hook_args, - 'cfg_setter': set_base_hook_args, - }) - - model_id_with_org: str = field( - default=None, - metadata={ - 'help': - 'The repo id in modelhub, usually it\'s like "group/model"', - 'cfg_node': 'train.checkpoint.period.model_id_with_org', - 'hook_type': 'CheckpointHook', - 'key': 'model_id_with_org', - 'cfg_getter': get_base_hook_args, - 'cfg_setter': set_base_hook_args, - }) - - hub_token: str = field( - default=None, - metadata={ - 'help': - 'The token to push to hub, you can also set the token to the env variable `MODELSCOPE_API_TOKEN`', - 'cfg_node': 'train.checkpoint.period.hub_token', - 'hook_type': 'CheckpointHook', - 'key': 'hub_token', - 'cfg_getter': get_base_hook_args, - 'cfg_setter': set_base_hook_args, - }) - - private_hub: bool = field( - default=None, - metadata={ - 'help': 'Upload to a private hub', - 'cfg_node': 'train.checkpoint.period.private_hub', - 'hook_type': 'CheckpointHook', - 'key': 'private_hub', - 'cfg_getter': get_base_hook_args, - 'cfg_setter': set_base_hook_args, - }) - - push_to_hub_best_model: bool = field( - default=None, - metadata={ - 'help': - 'Push to hub after one checkpoint saved by BestCkptSaverHook in the local disk', - 'cfg_node': 'train.checkpoint.best.push_to_hub', - 'hook_type': 'BestCkptSaverHook', - 'key': 'push_to_hub', - 'cfg_getter': get_base_hook_args, - 'cfg_setter': set_base_hook_args, - }) - - model_id_with_org_best_model: str = field( - default=None, - metadata={ - 'help': - 'The repo id in modelhub, usually it\'s like "group/model"', - 'cfg_node': 'train.checkpoint.best.model_id_with_org', - 'hook_type': 'BestCkptSaverHook', - 'key': 'model_id_with_org', - 'cfg_getter': get_base_hook_args, - 'cfg_setter': set_base_hook_args, - }) - - hub_token_best_model: str = field( - default=None, - metadata={ - 'help': - 'The token to push to hub, you can also set the token to the env variable `MODELSCOPE_API_TOKEN`', - 'cfg_node': 'train.checkpoint.best.hub_token', - 'hook_type': 'BestCkptSaverHook', - 'key': 'hub_token', - 'cfg_getter': get_base_hook_args, - 'cfg_setter': set_base_hook_args, - }) - - private_hub_best_model: bool = field( - default=None, - metadata={ - 'help': 'Upload to a private hub', - 'cfg_node': 'train.checkpoint.best.private_hub', - 'hook_type': 'BestCkptSaverHook', - 'key': 'private_hub', - 'cfg_getter': get_base_hook_args, - 'cfg_setter': set_base_hook_args, - }) - - ckpt_period_interval: int = field( - default=1, - metadata={ - 'help': - 'The interval of epoch or iter of saving checkpoint period', - 'cfg_node': 'train.checkpoint.period.interval', - 'hook_type': 'CheckpointHook', - 'key': 'interval', - 'cfg_getter': get_base_hook_args, - 'cfg_setter': set_base_hook_args, - }) - - ckpt_best_interval: int = field( - default=None, - metadata={ - 'help': 'The interval of epoch or iter of saving checkpoint best', - 'cfg_node': 'train.checkpoint.best.interval', - 'hook_type': 'BestCkptSaverHook', - 'key': 'interval', - 'cfg_getter': get_base_hook_args, - 'cfg_setter': set_base_hook_args, - }) - - metric_for_best_model: str = field( - default=None, - metadata={ - 'help': - 'Which metric key to judge the checkpoint is better or not, use with `BestCkptSaverHook`, ' - 'please make sure this key is returned by the `evaluation_metrics` classes', - 'cfg_node': - 'train.checkpoint.best.metric_key', - 'hook_type': - 'BestCkptSaverHook', - 'key': - 'metric_key', - 'cfg_getter': - get_base_hook_args, - 'cfg_setter': - set_base_hook_args, - }) - - metric_rule_for_best_model: str = field( - default=None, - metadata={ - 'help': - 'Which rule to compare the value of `checkpoint_saving_metric`, ' - 'use with `BestCkptSaverHook`, can be `max` or `min`', - 'cfg_node': - 'train.checkpoint.best.rule', - 'hook_type': - 'BestCkptSaverHook', - 'key': - 'rule', - 'cfg_getter': - get_base_hook_args, - 'cfg_setter': - set_base_hook_args, - }) - - save_ckpt_peroid_limit: int = field( - default=None, - metadata={ - 'help': - 'The max saving number of checkpoint, older checkpoints will be deleted.', - 'cfg_node': 'train.checkpoint.period.max_checkpoint_num', - 'hook_type': 'CheckpointHook', - 'key': 'max_checkpoint_num', - 'cfg_getter': get_base_hook_args, - 'cfg_setter': set_base_hook_args, - }) - - save_ckpt_best_limit: int = field( - default=None, - metadata={ - 'help': - 'The max saving number of checkpoint, worse checkpoints will be deleted.', - 'cfg_node': 'train.checkpoint.best.max_checkpoint_num', - 'hook_type': 'BestCkptSaverHook', - 'key': 'max_checkpoint_num', - 'cfg_getter': get_base_hook_args, - 'cfg_setter': set_base_hook_args, - }) - - logging_interval: int = field( - default=None, - metadata={ - 'help': 'The interval of iter of logging information', - 'cfg_node': 'train.logging.interval', - 'hook_type': 'TextLoggerHook', - 'key': 'interval', - 'cfg_getter': get_base_hook_args, - 'cfg_setter': set_base_hook_args, - }) - - eval_strategy: str = field( - default=None, - metadata={ - 'help': 'Evaluate model by epoch or by step' - 'use with `EvaluationHook`, can be `by_epoch` or `by_step`', - 'cfg_node': 'evaluation.period.by_epoch', - 'hook_type': 'EvaluationHook', - 'key': 'by_epoch', - 'choices': ['by_epoch', 'by_step'], - 'cfg_getter': get_strategy, - 'cfg_setter': set_strategy, - }) - - eval_interval: int = field( - default=None, - metadata={ - 'help': 'Evaluation interval by epoch or iter', - 'cfg_node': 'evaluation.period.interval', - 'hook_type': 'EvaluationHook', - 'key': 'interval', - 'cfg_getter': get_base_hook_args, - 'cfg_setter': set_base_hook_args, - }) - - eval_metrics: str = field( - default=None, - metadata={ - 'help': 'The metric module name used in evaluation', - 'cfg_node': 'evaluation.metrics' - }) - - namespace: str = field( - default=None, metadata={'help': 'The namespace of dataset'}) - - @classmethod - def from_cli(cls, parser_args=None, **extra_kwargs): - """Construct a TrainingArg class by the parameters of CLI. - - Args: - **extra_kwargs: Extra args which can be defined in code. - - Returns: - The output TrainingArg class with the parameters from CLI. - """ - self = cls(**extra_kwargs) - parser = CliArgumentParser(self) - args, unknown = parser.parse_known_args(parser_args) - unknown = [item for item in unknown if item not in ('\\', '\n')] - _unknown = {} - for i in range(0, len(unknown), 2): - _unknown[unknown[i].replace('-', '')] = parse_value(unknown[i + 1]) - cfg_dict = vars(args) - - if args.model is not None: - try: - cfg = read_config(args.model) - except Exception as e: - print('Read config failed with error:', e) - else: - self = cls.from_config(cfg, **extra_kwargs) - for key, value in cfg_dict.items(): - if key is not None and hasattr(self, - key) and key in parser.manual_args: - setattr(self, key, value) - self.extra_args = _unknown - return self - - def to_args(self): - """Convert the TrainingArg class to key-value pairs. - - Returns: The key-value pair. - - """ - _args = {} - for f in fields(self): - _args[f.name] = getattr(self, f.name) - return _args - - @classmethod - def from_config(cls, config=DEFAULT_CONFIG, **kwargs): - """Construct the TrainingArg class by a `Config` class. - - Args: - config: The Config class. By default, `DEFAULT_CONFIG` is used. - **kwargs: Extra args which can be defined in code. - - Returns: The output TrainingArg class with the parameters from the config. - - """ - - self = cls(**kwargs) - for f in fields(self): - if 'cfg_node' in f.metadata and getattr(self, f.name) is None: - self._to_field(f, config) - return self - - def _to_field(self, f, config): - assert 'cfg_node' in f.metadata - if 'cfg_getter' in f.metadata: - cfg_getter = f.metadata['cfg_getter'] - setattr(self, f.name, cfg_getter(config, f.metadata)) - else: - cfg_node = f.metadata['cfg_node'] - setattr(self, f.name, config.safe_get(cfg_node)) - - def _to_config(self, f, config: Config): - assert 'cfg_node' in f.metadata - value = getattr(self, f.name) - if 'cfg_setter' in f.metadata: - cfg_setter = f.metadata['cfg_setter'] - config = cfg_setter(config, value, f.metadata) - else: - cfg_node = f.metadata['cfg_node'] - if isinstance(cfg_node, str): - cfg_node = [cfg_node] - for _node in cfg_node: - config.merge_from_dict({_node: value}) - return config - - def __call__(self, cfg: Config): - for f in fields(self): - if 'cfg_node' not in f.metadata: - continue - - value = getattr(self, f.name) - if value is not None: - self._to_config(f, cfg) - if hasattr(self, 'extra_args'): - cfg.merge_from_dict(self.extra_args) - else: - self._to_field(f, cfg) - return cfg - - -class CliArgumentParser(ArgumentParser): - """ Argument Parser to define and parse command-line args for training. - - Args: - training_args (TrainingArgs): dict or list of dict which defines different - paramters for training. - """ - - def __init__(self, training_args: TrainingArgs = None, **kwargs): - if 'formatter_class' not in kwargs: - kwargs['formatter_class'] = ArgumentDefaultsHelpFormatter - super().__init__(**kwargs) - self.training_args = training_args - self.define_args() - - def get_manual_args(self, args): - return [arg[2:] for arg in args if arg.startswith('--')] - - def _parse_known_args(self, args: List = None, namespace=None): - self.model_id = namespace.model if namespace is not None else None - if '--model' in args: - self.model_id = args[args.index('--model') + 1] - self.manual_args = self.get_manual_args(args) - return super()._parse_known_args(args, namespace) - - def print_help(self, file=None): - config = DEFAULT_CONFIG - if self.model_id is not None: - try: - config = read_config(self.model_id) - except Exception as e: - print('Read config failed with error:', e) - - if config is not None: - for action_group in self._optionals._group_actions: - if hasattr(self.training_args, action_group.dest): - value = getattr(self.training_args, action_group.dest) - f = {f.name: f - for f in fields(self.training_args) - }.get(action_group.dest) - if value is not None: - action_group.default = value - elif 'cfg_node' in f.metadata: - cfg_node = f.metadata['cfg_node'] - if isinstance(cfg_node, str): - cfg_node = [cfg_node] - - assert isinstance(cfg_node, (list, tuple)) - if isinstance(cfg_node[0], str): - action_group.default = config.safe_get(cfg_node[0]) - else: - action_group.default = cfg_node[0](config) - return super().print_help(file) - - def define_args(self): - if self.training_args is not None: - for f in fields(self.training_args): - arg_name = f.name - arg_attr = getattr(self.training_args, f.name) - name = f'--{arg_name}' - kwargs = dict(type=f.type, help=f.metadata['help']) - kwargs['default'] = arg_attr - - if 'choices' in f.metadata: - kwargs['choices'] = f.metadata['choices'] - - kwargs['action'] = SingleAction - self.add_argument(name, **kwargs) - - -class DictAction(Action): - """ - argparse action to split an argument into KEY=VALUE form - on the first = and append to a dictionary. List options can - be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit - brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build - list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]' - """ - - @staticmethod - def parse_int_float_bool_str(val): - try: - return int(val) - except ValueError: - pass - try: - return float(val) - except ValueError: - pass - if val.lower() in ['true', 'false']: - return val.lower() == 'true' - if val == 'None': - return None - return val - - @staticmethod - def parse_iterable(val): - """Parse iterable values in the string. - All elements inside '()' or '[]' are treated as iterable values. - Args: - val (str): Value string. - Returns: - list | tuple: The expanded list or tuple from the string. - Examples: - >>> DictAction._parse_iterable('1,2,3') - [1, 2, 3] - >>> DictAction._parse_iterable('[a, b, c]') - ['a', 'b', 'c'] - >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]') - [(1, 2, 3), ['a', 'b'], 'c'] - """ - - def find_next_comma(string): - """Find the position of next comma in the string. - If no ',' is found in the string, return the string length. All - chars inside '()' and '[]' are treated as one element and thus ',' - inside these brackets are ignored. - """ - assert (string.count('(') == string.count(')')) and ( - string.count('[') - == string.count(']')), f'Imbalanced brackets exist in {string}' - end = len(string) - for idx, char in enumerate(string): - pre = string[:idx] - # The string before this ',' is balanced - if ((char == ',') and (pre.count('(') == pre.count(')')) - and (pre.count('[') == pre.count(']'))): - end = idx - break - return end - - # Strip ' and " characters and replace whitespace. - val = val.strip('\'\"').replace(' ', '') - is_tuple = False - if val.startswith('(') and val.endswith(')'): - is_tuple = True - val = val[1:-1] - elif val.startswith('[') and val.endswith(']'): - val = val[1:-1] - elif ',' not in val: - # val is a single value - return DictAction.parse_int_float_bool_str(val) - - values = [] - while len(val) > 0: - comma_idx = find_next_comma(val) - element = DictAction.parse_iterable(val[:comma_idx]) - values.append(element) - val = val[comma_idx + 1:] - if is_tuple: - values = tuple(values) - return values - - def __call__(self, parser, namespace, values, option_string): - options = {} - for kv in values: - key, val = kv.split('=', maxsplit=1) - options[key] = self.parse_iterable(val) - setattr(namespace, self.dest, options) - - -class SingleAction(DictAction): - """ Argparse action to convert value to tuple or list or nested structure of - list and tuple, i.e 'V1,V2,V3', or with explicit brackets, i.e. '[V1,V2,V3]'. - It also support nested brackets to build list/tuple values. e.g. '[(V1,V2),(V3,V4)]' - """ - - def __call__(self, parser, namespace, value, option_string): - if isinstance(value, str): - setattr(namespace, self.dest, self.parse_iterable(value)) - else: - setattr(namespace, self.dest, value) diff --git a/modelscope/utils/ast_index_file.py b/modelscope/utils/ast_index_file.py new file mode 100644 index 00000000..5aedf1bb --- /dev/null +++ b/modelscope/utils/ast_index_file.py @@ -0,0 +1 @@ +{"index": {"('MODELS', 'protein-structure', 'unifold')": {"filepath": "TEMPLATE_PATH/models/science/unifold/model.py", "imports": ["torch", "os", "typing", "argparse"], "module": "modelscope.models.science.unifold.model"}, "('MODELS', 'acoustic-noise-suppression', 'speech_dfsmn_ans')": {"filepath": "TEMPLATE_PATH/models/audio/ans/denoise_net.py", "imports": ["torch"], "module": "modelscope.models.audio.ans.denoise_net"}, "('MODELS', 'acoustic-noise-suppression', 'speech_frcrn_ans_cirm_16k')": {"filepath": "TEMPLATE_PATH/models/audio/ans/frcrn.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.audio.ans.frcrn"}, "('MODELS', 'speaker-verification', 'ecapa-tdnn-sv')": {"filepath": "TEMPLATE_PATH/models/audio/sv/ecapa_tdnn.py", "imports": ["torch", "torchaudio", "math", "os", "typing"], "module": "modelscope.models.audio.sv.ecapa_tdnn"}, "('MODELS', 'speaker-verification', 'eres2net-sv')": {"filepath": "TEMPLATE_PATH/models/audio/sv/ERes2Net.py", "imports": ["torch", "torchaudio", "math", "os", "typing"], "module": "modelscope.models.audio.sv.ERes2Net"}, "('MODELS', 'speaker-verification', 'cam++-sv')": {"filepath": "TEMPLATE_PATH/models/audio/sv/DTDNN.py", "imports": ["torch", "torchaudio", "collections", "os", "typing"], "module": "modelscope.models.audio.sv.DTDNN"}, "('MODELS', 'speaker-verification', 'generic-sv')": {"filepath": "TEMPLATE_PATH/models/audio/sv/generic_speaker_verification.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.sv.generic_speaker_verification"}, "('MODELS', 'speaker-diarization', 'generic-sv')": {"filepath": "TEMPLATE_PATH/models/audio/sv/generic_speaker_verification.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.sv.generic_speaker_verification"}, "('MODELS', 'speaker-diarization', 'scl-sd')": {"filepath": "TEMPLATE_PATH/models/audio/sv/speaker_change_locator.py", "imports": ["numpy", "torch", "torchaudio", "collections", "os", "typing"], "module": "modelscope.models.audio.sv.speaker_change_locator"}, "('MODELS', 'speaker-verification', 'rdino_ecapa-tdnn-sv')": {"filepath": "TEMPLATE_PATH/models/audio/sv/rdino.py", "imports": ["torch", "torchaudio", "math", "os", "typing"], "module": "modelscope.models.audio.sv.rdino"}, "('MODELS', 'inverse-text-processing', 'generic-itn')": {"filepath": "TEMPLATE_PATH/models/audio/itn/generic_inverse_text_processing.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.itn.generic_inverse_text_processing"}, "('MODELS', 'auto-speech-recognition', 'wenet-asr')": {"filepath": "TEMPLATE_PATH/models/audio/asr/wenet_automatic_speech_recognition.py", "imports": ["json", "os", "wenetruntime", "typing"], "module": "modelscope.models.audio.asr.wenet_automatic_speech_recognition"}, "('MODELS', 'auto-speech-recognition', 'generic-asr')": {"filepath": "TEMPLATE_PATH/models/audio/asr/generic_automatic_speech_recognition.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.asr.generic_automatic_speech_recognition"}, "('MODELS', 'voice-activity-detection', 'generic-asr')": {"filepath": "TEMPLATE_PATH/models/audio/asr/generic_automatic_speech_recognition.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.asr.generic_automatic_speech_recognition"}, "('MODELS', 'language-score-prediction', 'generic-asr')": {"filepath": "TEMPLATE_PATH/models/audio/asr/generic_automatic_speech_recognition.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.asr.generic_automatic_speech_recognition"}, "('MODELS', 'speech-timestamp', 'generic-asr')": {"filepath": "TEMPLATE_PATH/models/audio/asr/generic_automatic_speech_recognition.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.asr.generic_automatic_speech_recognition"}, "('MODELS', 'punctuation', 'generic-punc')": {"filepath": "TEMPLATE_PATH/models/audio/punc/generic_punctuation.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.punc.generic_punctuation"}, "('MODELS', 'text-to-speech', 'sambert-hifigan')": {"filepath": "TEMPLATE_PATH/models/audio/tts/sambert_hifi.py", "imports": ["shutil", "numpy", "json", "__future__", "wave", "matplotlib", "datetime", "yaml", "os", "zipfile"], "module": "modelscope.models.audio.tts.sambert_hifi"}, "('MODELS', 'speech-separation', 'speech_mossformer_separation_temporal_8k')": {"filepath": "TEMPLATE_PATH/models/audio/separation/mossformer.py", "imports": ["torch", "os", "copy", "typing"], "module": "modelscope.models.audio.separation.mossformer"}, "('MODELS', 'keyword-spotting', 'speech_dfsmn_kws_char_farfield')": {"filepath": "TEMPLATE_PATH/models/audio/kws/farfield/model.py", "imports": ["os", "typing", "tempfile"], "module": "modelscope.models.audio.kws.farfield.model"}, "('MODELS', 'keyword-spotting', 'speech_dfsmn_kws_char_farfield_iot')": {"filepath": "TEMPLATE_PATH/models/audio/kws/farfield/model.py", "imports": ["os", "typing", "tempfile"], "module": "modelscope.models.audio.kws.farfield.model"}, "('MODELS', 'keyword-spotting', 'kws-kwsbp')": {"filepath": "TEMPLATE_PATH/models/audio/kws/generic_key_word_spotting.py", "imports": ["os", "typing"], "module": "modelscope.models.audio.kws.generic_key_word_spotting"}, "('MODELS', 'keyword-spotting', 'speech_kws_fsmn_char_ctc_nearfield')": {"filepath": "TEMPLATE_PATH/models/audio/kws/nearfield/model.py", "imports": ["torch", "tempfile", "sys", "os", "typing"], "module": "modelscope.models.audio.kws.nearfield.model"}, "('MODELS', 'image-captioning', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'ocr-recognition', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'visual-grounding', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'visual-question-answering', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'visual-entailment', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'image-classification', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'text-summarization', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'text-classification', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'auto-speech-recognition', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'sudoku', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'text2sql', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py", "imports": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_all_tasks"}, "('MODELS', 'multi-modal-embedding', 'clip-multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/models/multi_modal/clip/model.py", "imports": ["numpy", "json", "torch", "collections", "os", "typing"], "module": "modelscope.models.multi_modal.clip.model"}, "('MODELS', 'visual-question-answering', 'mplug')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mplug_for_all_tasks.py", "imports": ["os", "typing"], "module": "modelscope.models.multi_modal.mplug_for_all_tasks"}, "('MODELS', 'image-captioning', 'mplug')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mplug_for_all_tasks.py", "imports": ["os", "typing"], "module": "modelscope.models.multi_modal.mplug_for_all_tasks"}, "('MODELS', 'image-text-retrieval', 'mplug')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mplug_for_all_tasks.py", "imports": ["os", "typing"], "module": "modelscope.models.multi_modal.mplug_for_all_tasks"}, "('MODELS', 'video-question-answering', 'hitea')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mplug_for_all_tasks.py", "imports": ["os", "typing"], "module": "modelscope.models.multi_modal.mplug_for_all_tasks"}, "('MODELS', 'video-captioning', 'hitea')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mplug_for_all_tasks.py", "imports": ["os", "typing"], "module": "modelscope.models.multi_modal.mplug_for_all_tasks"}, "('MODELS', 'text-to-image-synthesis', 'multi-stage-diffusion-text-to-image-synthesis')": {"filepath": "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/model.py", "imports": ["PIL", "numpy", "json", "torch", "math", "os", "typing"], "module": "modelscope.models.multi_modal.multi_stage_diffusion.model"}, "('MODELS', 'text-to-image-synthesis', 'diffusion-text-to-image-synthesis')": {"filepath": "TEMPLATE_PATH/models/multi_modal/diffusion/model.py", "imports": ["numpy", "json", "torch", "os", "typing"], "module": "modelscope.models.multi_modal.diffusion.model"}, "('MODELS', 'efficient-diffusion-tuning', 'efficient-diffusion-tuning')": {"filepath": "TEMPLATE_PATH/models/multi_modal/efficient_diffusion_tuning/efficient_stable_diffusion.py", "imports": ["transformers", "torch", "functools", "diffusers", "os", "typing"], "module": "modelscope.models.multi_modal.efficient_diffusion_tuning.efficient_stable_diffusion"}, "('MODELS', 'generative-multi-modal-embedding', 'gemm-generative-multi-modal')": {"filepath": "TEMPLATE_PATH/models/multi_modal/gemm/gemm_model.py", "imports": ["PIL", "numpy", "json", "torch", "torchvision", "os", "typing"], "module": "modelscope.models.multi_modal.gemm.gemm_model"}, "('MODELS', 'video-multi-modal-embedding', 'video-clip-multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py", "imports": ["urllib", "PIL", "random", "numpy", "json", "torch", "decord", "tempfile", "os", "typing", "uuid"], "module": "modelscope.models.multi_modal.mmr.models.clip_for_mm_video_embedding"}, "('MODELS', 'multi-modal-similarity', 'team-multi-modal-similarity')": {"filepath": "TEMPLATE_PATH/models/multi_modal/team/team_model.py", "imports": ["PIL", "numpy", "torch", "cv2", "tokenizers", "torchvision", "typing"], "module": "modelscope.models.multi_modal.team.team_model"}, "('MODELS', 'document-vl-embedding', 'vldoc')": {"filepath": "TEMPLATE_PATH/models/multi_modal/vldoc/model.py", "imports": ["json", "torch", "logging", "re", "math", "sys", "copy", "torchvision", "os"], "module": "modelscope.models.multi_modal.vldoc.model"}, "('MODELS', 'video-temporal-grounding', 'soonet')": {"filepath": "TEMPLATE_PATH/models/multi_modal/soonet/model.py", "imports": ["torch", "os"], "module": "modelscope.models.multi_modal.soonet.model"}, "('MODELS', 'text-ranking', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/text_ranking.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.text_ranking"}, "('MODELS', 'backbone', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/backbone.py", "imports": ["dataclasses", "transformers", "random", "torch", "math", "warnings", "os", "typing"], "module": "modelscope.models.multi_modal.mgeo.backbone"}, "('MODELS', 'text-classification', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/text_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.text_classification"}, "('MODELS', 'nli', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/text_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.text_classification"}, "('MODELS', 'sentiment-classification', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/text_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.text_classification"}, "('MODELS', 'sentence-similarity', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/text_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.text_classification"}, "('MODELS', 'zero-shot-classification', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/text_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.text_classification"}, "('MODELS', 'token-classification', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/token_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.token_classification"}, "('MODELS', 'part-of-speech', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/token_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.token_classification"}, "('MODELS', 'word-segmentation', 'mgeo')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mgeo/token_classification.py", "imports": ["torch"], "module": "modelscope.models.multi_modal.mgeo.token_classification"}, "('MODELS', 'multimodal-dialogue', 'mplug-owl')": {"filepath": "TEMPLATE_PATH/models/multi_modal/mplug_owl/modeling_mplug_owl.py", "imports": ["dataclasses", "transformers", "random", "torch", "logging", "math", "copy", "io", "os", "typing"], "module": "modelscope.models.multi_modal.mplug_owl.modeling_mplug_owl"}, "('MODELS', 'text-to-image-synthesis', 'ofa')": {"filepath": "TEMPLATE_PATH/models/multi_modal/ofa_for_text_to_image_synthesis_model.py", "imports": ["PIL", "pkg_resources", "numpy", "json", "torch", "taming", "torchvision", "os", "typing"], "module": "modelscope.models.multi_modal.ofa_for_text_to_image_synthesis_model"}, "('MODELS', 'text-to-video-synthesis', 'latent-text-to-video-synthesis')": {"filepath": "TEMPLATE_PATH/models/multi_modal/video_synthesis/text_to_video_synthesis_model.py", "imports": ["open_clip", "torch", "einops", "os", "typing"], "module": "modelscope.models.multi_modal.video_synthesis.text_to_video_synthesis_model"}, "('MODELS', 'image-captioning', 'clip-interrogator')": {"filepath": "TEMPLATE_PATH/models/multi_modal/clip_interrogator/model.py", "imports": ["PIL", "hashlib", "numpy", "open_clip", "torch", "dataclasses", "os", "typing", "requests", "transformers", "safetensors", "tqdm", "math", "time", "torchvision"], "module": "modelscope.models.multi_modal.clip_interrogator.model"}, "('MODELS', 'generative-multi-modal-embedding', 'rleg-generative-multi-modal')": {"filepath": "TEMPLATE_PATH/models/multi_modal/rleg/rleg.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.models.multi_modal.rleg.rleg"}, "('MODELS', 'translation-evaluation', 'unite')": {"filepath": "TEMPLATE_PATH/models/nlp/unite/translation_evaluation.py", "imports": ["transformers", "numpy", "torch", "math", "warnings", "packaging", "dataclasses", "typing"], "module": "modelscope.models.nlp.unite.translation_evaluation"}, "('MODELS', 'text-generation', 'palm-v2')": {"filepath": "TEMPLATE_PATH/models/nlp/palm_v2/text_generation.py", "imports": ["dataclasses", "subprocess", "codecs", "transformers", "numpy", "json", "torch", "math", "copy", "os", "typing"], "module": "modelscope.models.nlp.palm_v2.text_generation"}, "('MODELS', 'fill-mask', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/fill_mask.py", "imports": ["torch", "transformers"], "module": "modelscope.models.nlp.structbert.fill_mask"}, "('MODELS', 'backbone', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/backbone.py", "imports": ["transformers", "torch", "math", "packaging", "dataclasses", "typing"], "module": "modelscope.models.nlp.structbert.backbone"}, "('MODELS', 'faq-question-answering', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/faq_question_answering.py", "imports": ["torch", "math", "collections", "os", "typing"], "module": "modelscope.models.nlp.structbert.faq_question_answering"}, "('MODELS', 'text-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.text_classification"}, "('MODELS', 'nli', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.text_classification"}, "('MODELS', 'sentiment-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.text_classification"}, "('MODELS', 'sentence-similarity', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.text_classification"}, "('MODELS', 'zero-shot-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.text_classification"}, "('MODELS', 'token-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/token_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.token_classification"}, "('MODELS', 'word-segmentation', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/token_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.token_classification"}, "('MODELS', 'part-of-speech', 'structbert')": {"filepath": "TEMPLATE_PATH/models/nlp/structbert/token_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.structbert.token_classification"}, "('MODELS', 'backbone', 'transformers')": {"filepath": "TEMPLATE_PATH/models/nlp/hf_transformers/backbone.py", "imports": ["transformers"], "module": "modelscope.models.nlp.hf_transformers.backbone"}, "('MODELS', 'fill-mask', 'fill-mask')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/fill_mask.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.models.nlp.task_models.fill_mask"}, "('MODELS', 'text-ranking', 'text-ranking')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/text_ranking.py", "imports": ["typing", "numpy"], "module": "modelscope.models.nlp.task_models.text_ranking"}, "('MODELS', 'feature-extraction', 'feature-extraction')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/feature_extraction.py", "imports": ["typing", "numpy"], "module": "modelscope.models.nlp.task_models.feature_extraction"}, "('MODELS', 'text-classification', 'text-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/text_classification.py", "imports": ["typing", "numpy"], "module": "modelscope.models.nlp.task_models.text_classification"}, "('MODELS', 'text-generation', 'text-generation')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/text_generation.py", "imports": ["torch", "typing", "transformers", "numpy"], "module": "modelscope.models.nlp.task_models.text_generation"}, "('MODELS', 'information-extraction', 'information-extraction')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/information_extraction.py", "imports": ["typing", "numpy"], "module": "modelscope.models.nlp.task_models.information_extraction"}, "('MODELS', 'relation-extraction', 'information-extraction')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/information_extraction.py", "imports": ["typing", "numpy"], "module": "modelscope.models.nlp.task_models.information_extraction"}, "('MODELS', 'token-classification', 'token-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'part-of-speech', 'token-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'named-entity-recognition', 'token-classification-for-ner')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'transformer-crf', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'token-classification', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'token-classification', 'transformer-crf-for-word-segmentation')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'named-entity-recognition', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'part-of-speech', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'word-segmentation', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'word-segmentation', 'transformer-crf-for-word-segmentation')": {"filepath": "TEMPLATE_PATH/models/nlp/task_models/token_classification.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.task_models.token_classification"}, "('MODELS', 'fill-mask', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/fill_mask.py", "imports": ["transformers"], "module": "modelscope.models.nlp.veco.fill_mask"}, "('MODELS', 'backbone', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/backbone.py", "imports": ["transformers"], "module": "modelscope.models.nlp.veco.backbone"}, "('MODELS', 'nli', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/text_classification.py", "imports": ["transformers"], "module": "modelscope.models.nlp.veco.text_classification"}, "('MODELS', 'sentiment-classification', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/text_classification.py", "imports": ["transformers"], "module": "modelscope.models.nlp.veco.text_classification"}, "('MODELS', 'sentence-similarity', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/text_classification.py", "imports": ["transformers"], "module": "modelscope.models.nlp.veco.text_classification"}, "('MODELS', 'text-classification', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/text_classification.py", "imports": ["transformers"], "module": "modelscope.models.nlp.veco.text_classification"}, "('MODELS', 'token-classification', 'veco')": {"filepath": "TEMPLATE_PATH/models/nlp/veco/token_classification.py", "imports": ["torch", "transformers"], "module": "modelscope.models.nlp.veco.token_classification"}, "('MODELS', 'text-generation', 'glm130b')": {"filepath": "TEMPLATE_PATH/models/nlp/glm_130b/text_generation.py", "imports": ["random", "stat", "torch", "SwissArmyTransformer", "re", "functools", "sys", "copy", "time", "os", "typing"], "module": "modelscope.models.nlp.glm_130b.text_generation"}, "('MODELS', 'text-summarization', 'mglm')": {"filepath": "TEMPLATE_PATH/models/nlp/mglm/mglm_for_text_summarization.py", "imports": ["random", "numpy", "torch", "megatron_util", "os", "typing"], "module": "modelscope.models.nlp.mglm.mglm_for_text_summarization"}, "('MODELS', 'backbone', 'plug-mental')": {"filepath": "TEMPLATE_PATH/models/nlp/plug_mental/backbone.py", "imports": ["transformers", "torch", "math", "packaging", "dataclasses", "typing"], "module": "modelscope.models.nlp.plug_mental.backbone"}, "('MODELS', 'text-classification', 'plug-mental')": {"filepath": "TEMPLATE_PATH/models/nlp/plug_mental/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.plug_mental.text_classification"}, "('MODELS', 'nli', 'plug-mental')": {"filepath": "TEMPLATE_PATH/models/nlp/plug_mental/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.plug_mental.text_classification"}, "('MODELS', 'sentiment-classification', 'plug-mental')": {"filepath": "TEMPLATE_PATH/models/nlp/plug_mental/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.plug_mental.text_classification"}, "('MODELS', 'sentence-similarity', 'plug-mental')": {"filepath": "TEMPLATE_PATH/models/nlp/plug_mental/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.plug_mental.text_classification"}, "('MODELS', 'zero-shot-classification', 'plug-mental')": {"filepath": "TEMPLATE_PATH/models/nlp/plug_mental/text_classification.py", "imports": ["torch"], "module": "modelscope.models.nlp.plug_mental.text_classification"}, "('MODELS', 'text-generation', 'gpt-moe')": {"filepath": "TEMPLATE_PATH/models/nlp/gpt_moe/text_generation.py", "imports": ["typing", "transformers"], "module": "modelscope.models.nlp.gpt_moe.text_generation"}, "('MODELS', 'translation', 'csanmt-translation')": {"filepath": "TEMPLATE_PATH/models/nlp/csanmt/translation.py", "imports": ["tensorflow", "typing", "math", "collections"], "module": "modelscope.models.nlp.csanmt.translation"}, "('MODELS', 'text2text-generation', 'T5')": {"filepath": "TEMPLATE_PATH/models/nlp/T5/text2text_generation.py", "imports": ["transformers", "torch", "copy", "warnings", "typing"], "module": "modelscope.models.nlp.T5.text2text_generation"}, "('MODELS', 'backbone', 'T5')": {"filepath": "TEMPLATE_PATH/models/nlp/T5/backbone.py", "imports": ["transformers", "torch", "math", "copy", "warnings", "os", "typing"], "module": "modelscope.models.nlp.T5.backbone"}, "('HEADS', 'text-classification', 'text-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/text_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.text_classification_head"}, "('HEADS', 'sentence-similarity', 'text-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/text_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.text_classification_head"}, "('HEADS', 'nli', 'text-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/text_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.text_classification_head"}, "('HEADS', 'sentiment-classification', 'text-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/text_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.text_classification_head"}, "('HEADS', 'information-extraction', 'information-extraction')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/infromation_extraction_head.py", "imports": ["torch"], "module": "modelscope.models.nlp.heads.infromation_extraction_head"}, "('HEADS', 'relation-extraction', 'information-extraction')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/infromation_extraction_head.py", "imports": ["torch"], "module": "modelscope.models.nlp.heads.infromation_extraction_head"}, "('HEADS', 'token-classification', 'token-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/token_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.token_classification_head"}, "('HEADS', 'named-entity-recognition', 'token-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/token_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.token_classification_head"}, "('HEADS', 'part-of-speech', 'token-classification')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/token_classification_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.token_classification_head"}, "('HEADS', 'text-generation', 'text-generation')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/text_generation_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.text_generation_head"}, "('HEADS', 'token-classification', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'named-entity-recognition', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'word-segmentation', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'part-of-speech', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'transformer-crf', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'token-classification', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'named-entity-recognition', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'word-segmentation', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'part-of-speech', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/crf_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.crf_head"}, "('HEADS', 'fill-mask', 'roberta-mlm')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/torch_pretrain_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.torch_pretrain_head"}, "('HEADS', 'fill-mask', 'bert-mlm')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/fill_mask_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.fill_mask_head"}, "('HEADS', 'fill-mask', 'fill-mask')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/fill_mask_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.fill_mask_head"}, "('HEADS', 'fill-mask', 'xlm-roberta-mlm')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/fill_mask_head.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.heads.fill_mask_head"}, "('HEADS', 'text-ranking', 'text-ranking')": {"filepath": "TEMPLATE_PATH/models/nlp/heads/text_ranking_head.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.heads.text_ranking_head"}, "('BACKBONES', 'backbone', 'bloom')": {"filepath": "TEMPLATE_PATH/models/nlp/bloom/backbone.py", "imports": ["transformers"], "module": "modelscope.models.nlp.bloom.backbone"}, "('MODELS', 'backbone', 'xlm-roberta')": {"filepath": "TEMPLATE_PATH/models/nlp/xlm_roberta/backbone.py", "imports": ["torch", "transformers", "math", "packaging"], "module": "modelscope.models.nlp.xlm_roberta.backbone"}, "('MODELS', 'text-classification', 'peer')": {"filepath": "TEMPLATE_PATH/models/nlp/peer/text_classification.py", "imports": ["copy", "torch"], "module": "modelscope.models.nlp.peer.text_classification"}, "('MODELS', 'nli', 'peer')": {"filepath": "TEMPLATE_PATH/models/nlp/peer/text_classification.py", "imports": ["copy", "torch"], "module": "modelscope.models.nlp.peer.text_classification"}, "('MODELS', 'sentiment-classification', 'peer')": {"filepath": "TEMPLATE_PATH/models/nlp/peer/text_classification.py", "imports": ["copy", "torch"], "module": "modelscope.models.nlp.peer.text_classification"}, "('MODELS', 'sentence-similarity', 'peer')": {"filepath": "TEMPLATE_PATH/models/nlp/peer/text_classification.py", "imports": ["copy", "torch"], "module": "modelscope.models.nlp.peer.text_classification"}, "('MODELS', 'zero-shot-classification', 'peer')": {"filepath": "TEMPLATE_PATH/models/nlp/peer/text_classification.py", "imports": ["copy", "torch"], "module": "modelscope.models.nlp.peer.text_classification"}, "('MODELS', 'fid-dialogue', 'fid-T5')": {"filepath": "TEMPLATE_PATH/models/nlp/fid_T5/text_generation.py", "imports": ["torch", "os", "io", "transformers"], "module": "modelscope.models.nlp.fid_T5.text_generation"}, "('MODELS', 'table-question-answering', 'space-T-en')": {"filepath": "TEMPLATE_PATH/models/nlp/space_T_en/text_to_sql.py", "imports": ["torch", "os", "typing", "text2sql_lgesql"], "module": "modelscope.models.nlp.space_T_en.text_to_sql"}, "('MODELS', 'competency-aware-translation', 'canmt')": {"filepath": "TEMPLATE_PATH/models/nlp/canmt/canmt_translation.py", "imports": ["numpy", "torch", "math", "os", "typing"], "module": "modelscope.models.nlp.canmt.canmt_translation"}, "('MODELS', 'text-error-correction', 'bart')": {"filepath": "TEMPLATE_PATH/models/nlp/bart/text_error_correction.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.nlp.bart.text_error_correction"}, "('MODELS', 'text-classification', 'user-satisfaction-estimation')": {"filepath": "TEMPLATE_PATH/models/nlp/use/user_satisfaction_estimation.py", "imports": ["transformers", "numpy", "torch", "os", "typing"], "module": "modelscope.models.nlp.use.user_satisfaction_estimation"}, "('BACKBONES', 'backbone', 'gpt-neo')": {"filepath": "TEMPLATE_PATH/models/nlp/gpt_neo/backbone.py", "imports": ["transformers"], "module": "modelscope.models.nlp.gpt_neo.backbone"}, "('MODELS', 'siamese-uie', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/siamese_uie.py", "imports": ["torch", "copy"], "module": "modelscope.models.nlp.bert.siamese_uie"}, "('MODELS', 'fill-mask', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/fill_mask.py", "imports": [], "module": "modelscope.models.nlp.bert.fill_mask"}, "('MODELS', 'word-alignment', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/word_alignment.py", "imports": ["torch"], "module": "modelscope.models.nlp.bert.word_alignment"}, "('MODELS', 'text-ranking', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/text_ranking.py", "imports": [], "module": "modelscope.models.nlp.bert.text_ranking"}, "('MODELS', 'backbone', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/backbone.py", "imports": ["torch", "transformers", "math", "packaging"], "module": "modelscope.models.nlp.bert.backbone"}, "('MODELS', 'text-classification', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/text_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.text_classification"}, "('MODELS', 'nli', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/text_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.text_classification"}, "('MODELS', 'sentiment-classification', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/text_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.text_classification"}, "('MODELS', 'sentence-similarity', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/text_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.text_classification"}, "('MODELS', 'zero-shot-classification', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/text_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.text_classification"}, "('MODELS', 'sentence-embedding', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/sentence_embedding.py", "imports": ["torch"], "module": "modelscope.models.nlp.bert.sentence_embedding"}, "('MODELS', 'document-segmentation', 'bert-for-document-segmentation')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/document_segmentation.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.bert.document_segmentation"}, "('MODELS', 'token-classification', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/token_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.token_classification"}, "('MODELS', 'part-of-speech', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/token_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.token_classification"}, "('MODELS', 'word-segmentation', 'bert')": {"filepath": "TEMPLATE_PATH/models/nlp/bert/token_classification.py", "imports": [], "module": "modelscope.models.nlp.bert.token_classification"}, "('MODELS', 'document-grounded-dialog-rerank', 'doc2bot')": {"filepath": "TEMPLATE_PATH/models/nlp/dgds/document_grounded_dialog_rerank.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.nlp.dgds.document_grounded_dialog_rerank"}, "('MODELS', 'document-grounded-dialog-generate', 'doc2bot')": {"filepath": "TEMPLATE_PATH/models/nlp/dgds/document_grounded_dialog_generate.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.nlp.dgds.document_grounded_dialog_generate"}, "('MODELS', 'document-grounded-dialog-retrieval', 'doc2bot')": {"filepath": "TEMPLATE_PATH/models/nlp/dgds/document_grounded_dialog_retrieval.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.nlp.dgds.document_grounded_dialog_retrieval"}, "('MODELS', 'text-generation', 'gpt3')": {"filepath": "TEMPLATE_PATH/models/nlp/gpt3/text_generation.py", "imports": ["torch", "typing", "transformers", "collections"], "module": "modelscope.models.nlp.gpt3.text_generation"}, "('MODELS', 'fill-mask', 'deberta_v2')": {"filepath": "TEMPLATE_PATH/models/nlp/deberta_v2/fill_mask.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.deberta_v2.fill_mask"}, "('MODELS', 'backbone', 'deberta_v2')": {"filepath": "TEMPLATE_PATH/models/nlp/deberta_v2/backbone.py", "imports": ["torch", "typing", "transformers", "collections"], "module": "modelscope.models.nlp.deberta_v2.backbone"}, "('MODELS', 'code-translation', 'codegeex')": {"filepath": "TEMPLATE_PATH/models/nlp/codegeex/codegeex_for_code_translation.py", "imports": ["torch", "copy", "typing"], "module": "modelscope.models.nlp.codegeex.codegeex_for_code_translation"}, "('MODELS', 'code-generation', 'codegeex')": {"filepath": "TEMPLATE_PATH/models/nlp/codegeex/codegeex_for_code_generation.py", "imports": ["torch", "copy", "typing"], "module": "modelscope.models.nlp.codegeex.codegeex_for_code_generation"}, "('MODELS', 'task-oriented-conversation', 'space-modeling')": {"filepath": "TEMPLATE_PATH/models/nlp/space/dialog_modeling.py", "imports": ["os", "typing"], "module": "modelscope.models.nlp.space.dialog_modeling"}, "('MODELS', 'task-oriented-conversation', 'space-dst')": {"filepath": "TEMPLATE_PATH/models/nlp/space/dialog_state_tracking.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.models.nlp.space.dialog_state_tracking"}, "('MODELS', 'task-oriented-conversation', 'space-intent')": {"filepath": "TEMPLATE_PATH/models/nlp/space/dialog_intent_prediction.py", "imports": ["os", "typing"], "module": "modelscope.models.nlp.space.dialog_intent_prediction"}, "('MODELS', 'fid-dialogue', 'fid-plug')": {"filepath": "TEMPLATE_PATH/models/nlp/fid_plug/text_generation.py", "imports": ["torch", "os", "io", "transformers"], "module": "modelscope.models.nlp.fid_plug.text_generation"}, "('BACKBONES', 'backbone', 'gpt2')": {"filepath": "TEMPLATE_PATH/models/nlp/gpt2/backbone.py", "imports": ["transformers"], "module": "modelscope.models.nlp.gpt2.backbone"}, "('MODELS', 'fill-mask', 'megatron-bert')": {"filepath": "TEMPLATE_PATH/models/nlp/megatron_bert/fill_mask.py", "imports": ["torch", "transformers"], "module": "modelscope.models.nlp.megatron_bert.fill_mask"}, "('MODELS', 'backbone', 'megatron-bert')": {"filepath": "TEMPLATE_PATH/models/nlp/megatron_bert/backbone.py", "imports": ["torch", "transformers", "math"], "module": "modelscope.models.nlp.megatron_bert.backbone"}, "('MODELS', 'table-question-answering', 'space-T-cn')": {"filepath": "TEMPLATE_PATH/models/nlp/space_T_cn/table_question_answering.py", "imports": ["transformers", "numpy", "torch", "os", "typing"], "module": "modelscope.models.nlp.space_T_cn.table_question_answering"}, "('MODELS', 'fill-mask', 'ponet')": {"filepath": "TEMPLATE_PATH/models/nlp/ponet/fill_mask.py", "imports": ["torch", "transformers"], "module": "modelscope.models.nlp.ponet.fill_mask"}, "('MODELS', 'backbone', 'ponet')": {"filepath": "TEMPLATE_PATH/models/nlp/ponet/backbone.py", "imports": ["distutils", "transformers", "torch", "math", "packaging"], "module": "modelscope.models.nlp.ponet.backbone"}, "('MODELS', 'document-segmentation', 'ponet-for-document-segmentation')": {"filepath": "TEMPLATE_PATH/models/nlp/ponet/document_segmentation.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.ponet.document_segmentation"}, "('MODELS', 'extractive-summarization', 'ponet-for-document-segmentation')": {"filepath": "TEMPLATE_PATH/models/nlp/ponet/document_segmentation.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.ponet.document_segmentation"}, "('MODELS', 'backbone', 'llama')": {"filepath": "TEMPLATE_PATH/models/nlp/llama/backbone.py", "imports": ["torch", "typing", "transformers", "math"], "module": "modelscope.models.nlp.llama.backbone"}, "('MODELS', 'text-generation', 'llama')": {"filepath": "TEMPLATE_PATH/models/nlp/llama/text_generation.py", "imports": ["torch", "typing"], "module": "modelscope.models.nlp.llama.text_generation"}, "('MODELS', 'backbone', 'lstm')": {"filepath": "TEMPLATE_PATH/models/nlp/lstm/backbone.py", "imports": ["torch"], "module": "modelscope.models.nlp.lstm.backbone"}, "('MODELS', 'token-classification', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/lstm/token_classification.py", "imports": [], "module": "modelscope.models.nlp.lstm.token_classification"}, "('MODELS', 'named-entity-recognition', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/lstm/token_classification.py", "imports": [], "module": "modelscope.models.nlp.lstm.token_classification"}, "('MODELS', 'part-of-speech', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/lstm/token_classification.py", "imports": [], "module": "modelscope.models.nlp.lstm.token_classification"}, "('MODELS', 'word-segmentation', 'lstm-crf')": {"filepath": "TEMPLATE_PATH/models/nlp/lstm/token_classification.py", "imports": [], "module": "modelscope.models.nlp.lstm.token_classification"}, "('MODELS', 'word-segmentation', 'lstm-crf-for-word-segmentation')": {"filepath": "TEMPLATE_PATH/models/nlp/lstm/token_classification.py", "imports": [], "module": "modelscope.models.nlp.lstm.token_classification"}, "('MODELS', 'image-deblurring', 'nafnet')": {"filepath": "TEMPLATE_PATH/models/cv/image_deblur/nafnet_for_image_deblur.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_deblur.nafnet_for_image_deblur"}, "('MODELS', 'image-segmentation', 'vision-middleware')": {"filepath": "TEMPLATE_PATH/models/cv/vision_middleware/model.py", "imports": ["json", "torch", "typing", "os"], "module": "modelscope.models.cv.vision_middleware.model"}, "('MODELS', 'image-quality-assessment-mos', 'image-quality-assessment-man')": {"filepath": "TEMPLATE_PATH/models/cv/image_quality_assessment_man/image_quality_assessment_man.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_quality_assessment_man.image_quality_assessment_man"}, "('MODELS', 'product-retrieval-embedding', 'product-retrieval-embedding')": {"filepath": "TEMPLATE_PATH/models/cv/product_retrieval_embedding/item_model.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.models.cv.product_retrieval_embedding.item_model"}, "('MODELS', 'body-2d-keypoints', 'body-2d-keypoints')": {"filepath": "TEMPLATE_PATH/models/cv/body_2d_keypoints/hrnet_v2.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.body_2d_keypoints.hrnet_v2"}, "('MODELS', 'indoor-layout-estimation', 'panovit-layout-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/indoor_layout_estimation/panovit.py", "imports": ["torch", "os", "yacs", "numpy"], "module": "modelscope.models.cv.indoor_layout_estimation.panovit"}, "('MODELS', 'semantic-segmentation', 'detection')": {"filepath": "TEMPLATE_PATH/models/cv/salient_detection/salient_model.py", "imports": ["PIL", "torch", "cv2", "torchvision", "os"], "module": "modelscope.models.cv.salient_detection.salient_model"}, "('MODELS', 'image-quality-assessment-degradation', 'image-quality-assessment-degradation')": {"filepath": "TEMPLATE_PATH/models/cv/image_quality_assessment_degradation/image_quality_assessment_degradation.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_quality_assessment_degradation.image_quality_assessment_degradation"}, "('MODELS', 'image-portrait-enhancement', 'gpen')": {"filepath": "TEMPLATE_PATH/models/cv/image_portrait_enhancement/image_portrait_enhancement.py", "imports": ["torch", "os", "typing", "math"], "module": "modelscope.models.cv.image_portrait_enhancement.image_portrait_enhancement"}, "('HEADS', 'default', 'MaskScoringNRoIHead')": {"filepath": "TEMPLATE_PATH/models/cv/abnormal_object_detection/mmdet_ms/roi_head/mask_scoring_roi_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.abnormal_object_detection.mmdet_ms.roi_head.mask_scoring_roi_head"}, "('ROI_EXTRACTORS', 'default', 'SingleRoINExtractor')": {"filepath": "TEMPLATE_PATH/models/cv/abnormal_object_detection/mmdet_ms/roi_head/roi_extractors/single_level_roi_extractor.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.abnormal_object_detection.mmdet_ms.roi_head.roi_extractors.single_level_roi_extractor"}, "('MODELS', 'image-object-detection', 'MaskScoring')": {"filepath": "TEMPLATE_PATH/models/cv/abnormal_object_detection/mmdet_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.abnormal_object_detection.mmdet_model"}, "('MODELS', 'image-classification', 'image-probing-model')": {"filepath": "TEMPLATE_PATH/models/cv/image_probing_model/model.py", "imports": ["json", "torch", "typing", "os"], "module": "modelscope.models.cv.image_probing_model.model"}, "('MODELS', 'video-human-matting', 'video-human-matting')": {"filepath": "TEMPLATE_PATH/models/cv/video_human_matting/model.py", "imports": ["numpy", "torch", "torchvision", "os", "typing"], "module": "modelscope.models.cv.video_human_matting.model"}, "('MODELS', 'language-guided-video-summarization', 'clip-it-language-guided-video-summarization')": {"filepath": "TEMPLATE_PATH/models/cv/language_guided_video_summarization/summarizer.py", "imports": ["numpy", "videofeatures_clipit", "torch", "bmt_clipit", "os", "typing", "argparse"], "module": "modelscope.models.cv.language_guided_video_summarization.summarizer"}, "('MODELS', 'face-2d-keypoints', 'flc')": {"filepath": "TEMPLATE_PATH/models/cv/facial_landmark_confidence/flc/facial_landmark_confidence.py", "imports": ["PIL", "numpy", "torch", "cv2", "os"], "module": "modelscope.models.cv.facial_landmark_confidence.flc.facial_landmark_confidence"}, "('MODELS', 'image-body-reshaping', 'image-body-reshaping')": {"filepath": "TEMPLATE_PATH/models/cv/image_body_reshaping/image_body_reshaping.py", "imports": ["numpy", "cv2", "torch", "os", "typing"], "module": "modelscope.models.cv.image_body_reshaping.image_body_reshaping"}, "('MODELS', 'image-segmentation', 'm2fp')": {"filepath": "TEMPLATE_PATH/models/cv/image_human_parsing/m2fp_net.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_human_parsing.m2fp_net"}, "('PREPROCESSORS', 'cv', 'image-sky-change-preprocessor')": {"filepath": "TEMPLATE_PATH/models/cv/image_skychange/preprocessor.py", "imports": ["numbers", "pdb", "numpy", "cv2", "json", "torch", "torchvision", "typing"], "module": "modelscope.models.cv.image_skychange.preprocessor"}, "('MODELS', 'image-skychange', 'image-skychange')": {"filepath": "TEMPLATE_PATH/models/cv/image_skychange/skychange_model.py", "imports": ["pdb", "cv2", "torch", "json", "math", "collections", "time", "os", "typing"], "module": "modelscope.models.cv.image_skychange.skychange_model"}, "('MODELS', 'video-object-segmentation', 'video-object-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/video_object_segmentation/model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.video_object_segmentation.model"}, "('MODELS', 'face-reconstruction', 'face_reconstruction')": {"filepath": "TEMPLATE_PATH/models/cv/face_reconstruction/models/facerecon_model.py", "imports": ["numpy", "cv2", "torch", "collections", "os"], "module": "modelscope.models.cv.face_reconstruction.models.facerecon_model"}, "('MODELS', 'facial-expression-recognition', 'fer')": {"filepath": "TEMPLATE_PATH/models/cv/facial_expression_recognition/fer/facial_expression_recognition.py", "imports": ["PIL", "numpy", "torch", "cv2", "os"], "module": "modelscope.models.cv.facial_expression_recognition.fer.facial_expression_recognition"}, "('MODELS', 'face-recognition', 'rts-backbone')": {"filepath": "TEMPLATE_PATH/models/cv/face_recognition/torchkit/rts_backbone.py", "imports": ["torch", "os", "math", "collections"], "module": "modelscope.models.cv.face_recognition.torchkit.rts_backbone"}, "('MODELS', 'shop-segmentation', 'shop-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/shop_segmentation/shop_seg_model.py", "imports": ["PIL", "numpy", "torch", "json", "os", "typing"], "module": "modelscope.models.cv.shop_segmentation.shop_seg_model"}, "('MODELS', 'image-segmentation', 'fastinst')": {"filepath": "TEMPLATE_PATH/models/cv/image_instance_segmentation/fastinst_model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_instance_segmentation.fastinst_model"}, "('MODELS', 'image-segmentation', 'cascade_mask_rcnn_swin')": {"filepath": "TEMPLATE_PATH/models/cv/image_instance_segmentation/model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_instance_segmentation.model"}, "('MODELS', 'image-segmentation', 'maskdino_swin')": {"filepath": "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino_model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_instance_segmentation.maskdino_model"}, "('MODELS', 'video-text-retrieval', 'vop-retrieval-model')": {"filepath": "TEMPLATE_PATH/models/cv/vop_retrieval/model.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.vop_retrieval.model"}, "('MODELS', 'video-text-retrieval', 'vop-retrieval-model-se')": {"filepath": "TEMPLATE_PATH/models/cv/vop_retrieval/model_se.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.vop_retrieval.model_se"}, "('HEADS', 'default', 'KernelUpdateHeadVideo')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/track/kernel_update_head.py", "imports": ["torch", "mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.video_instance_segmentation.track.kernel_update_head"}, "('MATCH_COST', 'default', 'MaskCost')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/track/mask_hungarian_assigner.py", "imports": ["torch", "scipy", "mmdet", "numpy"], "module": "modelscope.models.cv.video_instance_segmentation.track.mask_hungarian_assigner"}, "('BBOX_ASSIGNERS', 'default', 'MaskHungarianAssignerVideo')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/track/mask_hungarian_assigner.py", "imports": ["torch", "scipy", "mmdet", "numpy"], "module": "modelscope.models.cv.video_instance_segmentation.track.mask_hungarian_assigner"}, "('MODELS', 'video-instance-segmentation', 'swinb-video-instance-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/video_knet.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.video_instance_segmentation.video_knet"}, "('TRANSFORMER_LAYER', 'default', 'KernelUpdator')": {"filepath": "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_updator.py", "imports": ["torch", "mmcv"], "module": "modelscope.models.cv.video_panoptic_segmentation.head.kernel_updator"}, "('HEADS', 'default', 'KernelUpdateHead')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_update_head.py", "imports": ["torch", "mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.video_instance_segmentation.head.kernel_update_head"}, "('HEADS', 'default', 'KernelFrameIterHeadVideo')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_frame_iter_head.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.video_instance_segmentation.head.kernel_frame_iter_head"}, "('HEADS', 'default', 'ConvKernelHeadVideo')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_head.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.video_instance_segmentation.head.kernel_head"}, "('HEADS', 'default', 'KernelIterHeadVideo')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_iter_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.video_instance_segmentation.head.kernel_iter_head"}, "('NECKS', 'default', 'MSDeformAttnPixelDecoder')": {"filepath": "TEMPLATE_PATH/models/cv/video_instance_segmentation/neck/msdeformattn_decoder.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.video_instance_segmentation.neck.msdeformattn_decoder"}, "('MODELS', 'image-super-resolution', 'ecbsr')": {"filepath": "TEMPLATE_PATH/models/cv/super_resolution/ecbsr_model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.super_resolution.ecbsr_model"}, "('PREPROCESSORS', 'cv', 'ocr-detection')": {"filepath": "TEMPLATE_PATH/models/cv/ocr_detection/preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "module": "modelscope.models.cv.ocr_detection.preprocessor"}, "('MODELS', 'ocr-detection', 'OCRDetection')": {"filepath": "TEMPLATE_PATH/models/cv/ocr_detection/model.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.models.cv.ocr_detection.model"}, "('MODELS', 'panorama-depth-estimation', 'unifuse-depth-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/panorama_depth_estimation/unifuse_model.py", "imports": ["torch", "os", "torchvision", "numpy"], "module": "modelscope.models.cv.panorama_depth_estimation.unifuse_model"}, "('MODELS', 'video-object-detection', 'realtime-video-object-detection')": {"filepath": "TEMPLATE_PATH/models/cv/stream_yolo/realtime_video_detector.py", "imports": ["numpy", "cv2", "torch", "logging", "json", "tqdm", "time", "os", "argparse"], "module": "modelscope.models.cv.stream_yolo.realtime_video_detector"}, "('MODELS', 'bad-image-detecting', 'bad-image-detecting')": {"filepath": "TEMPLATE_PATH/models/cv/bad_image_detecting/bad_image_detecting.py", "imports": ["numpy", "torch", "torchvision", "os", "typing"], "module": "modelscope.models.cv.bad_image_detecting.bad_image_detecting"}, "('MODELS', 'human-reconstruction', 'human-reconstruction')": {"filepath": "TEMPLATE_PATH/models/cv/human_reconstruction/Reconstruction.py", "imports": ["PIL", "skimage", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.models.cv.human_reconstruction.Reconstruction"}, "('PREPROCESSORS', 'cv', 'image-driving-perception-preprocessor')": {"filepath": "TEMPLATE_PATH/models/cv/image_driving_perception/preprocessor.py", "imports": ["cv2", "torch", "typing", "numpy"], "module": "modelscope.models.cv.image_driving_perception.preprocessor"}, "('MODELS', 'image-driving-perception', 'yolopv2')": {"filepath": "TEMPLATE_PATH/models/cv/image_driving_perception/image_driving_percetion_model.py", "imports": ["numpy", "cv2", "torch", "os", "typing"], "module": "modelscope.models.cv.image_driving_perception.image_driving_percetion_model"}, "('MODELS', 'video-object-detection', 'longshortnet')": {"filepath": "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/longshortnet.py", "imports": ["numpy", "cv2", "torch", "logging", "json", "tqdm", "time", "os", "argparse"], "module": "modelscope.models.cv.video_streaming_perception.longshortnet.longshortnet"}, "('MODELS', 'image-paintbyexample', 'Stablediffusion-Paintbyexample')": {"filepath": "TEMPLATE_PATH/models/cv/image_paintbyexample/model.py", "imports": ["torch", "paint_ldm", "omegaconf", "os", "typing"], "module": "modelscope.models.cv.image_paintbyexample.model"}, "('MODELS', 'image-inpainting', 'FFTInpainting')": {"filepath": "TEMPLATE_PATH/models/cv/image_inpainting/model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_inpainting.model"}, "('BBOX_CODERS', 'default', 'NMSFreeCoder')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.coders.nms_free_coder"}, "('MATCH_COST', 'default', 'BBox3DL1Cost')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/match_costs/match_cost.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.match_costs.match_cost"}, "('BBOX_ASSIGNERS', 'default', 'HungarianAssigner3D')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py", "imports": ["torch", "scipy", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.assigners.hungarian_assigner_3d"}, "('DATASETS', 'default', 'CustomNuScenesDataset')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/nuscenes_dataset.py", "imports": ["mmdet3d", "mmdet", "numpy"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.nuscenes_dataset"}, "('PIPELINES', 'default', 'LoadMultiViewImageFromMultiSweepsFiles')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/loading.py", "imports": ["mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.pipelines.loading"}, "('PIPELINES', 'default', 'PadMultiViewImage')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/transform_3d.py", "imports": ["PIL", "copy", "mmcv", "numpy", "torch", "mmdet3d", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.pipelines.transform_3d"}, "('PIPELINES', 'default', 'NormalizeMultiviewImage')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/transform_3d.py", "imports": ["PIL", "copy", "mmcv", "numpy", "torch", "mmdet3d", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.pipelines.transform_3d"}, "('PIPELINES', 'default', 'ResizeCropFlipImage')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/transform_3d.py", "imports": ["PIL", "copy", "mmcv", "numpy", "torch", "mmdet3d", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.pipelines.transform_3d"}, "('HEADS', 'default', 'PETRv2DEDNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/petrv2_dednhead.py", "imports": ["mmcv", "numpy", "torch", "math", "copy", "mmdet3d", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.dense_heads.petrv2_dednhead"}, "('NECKS', 'default', 'CPFPN')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/necks/cp_fpn.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.necks.cp_fpn"}, "('TRANSFORMER', 'default', 'PETRDNTransformer')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py", "imports": ["copy", "warnings", "mmcv", "torch", "mmdet", "typing", "math"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.petr_transformer"}, "('TRANSFORMER_LAYER', 'default', 'PETRTransformerDecoderLayer')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py", "imports": ["copy", "warnings", "mmcv", "torch", "mmdet", "typing", "math"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.petr_transformer"}, "('ATTENTION', 'default', 'PETRMultiheadAttention')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py", "imports": ["copy", "warnings", "mmcv", "torch", "mmdet", "typing", "math"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.petr_transformer"}, "('TRANSFORMER_LAYER_SEQUENCE', 'default', 'PETRTransformerEncoder')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py", "imports": ["copy", "warnings", "mmcv", "torch", "mmdet", "typing", "math"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.petr_transformer"}, "('TRANSFORMER_LAYER_SEQUENCE', 'default', 'PETRTransformerDecoder')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py", "imports": ["copy", "warnings", "mmcv", "torch", "mmdet", "typing", "math"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.petr_transformer"}, "('POSITIONAL_ENCODING', 'default', 'SinePositionalEncoding3D')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/positional_encoding.py", "imports": ["torch", "mmcv", "math"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.positional_encoding"}, "('BACKBONES', 'default', 'VoVNet')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/backbones/vovnet.py", "imports": ["torch", "mmdet", "mmcv", "collections"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.backbones.vovnet"}, "('DETECTORS', 'default', 'Petr3D')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/detectors/petr3d.py", "imports": ["mmcv", "numpy", "torch", "mmdet3d", "mmdet"], "module": "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.detectors.petr3d"}, "('MODELS', 'object-detection-3d', 'depe')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection_3d/depe/depe_detect.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.models.cv.object_detection_3d.depe.depe_detect"}, "('MODELS', 'image-quality-assessment-mos', 'image-quality-assessment-mos')": {"filepath": "TEMPLATE_PATH/models/cv/image_quality_assessment_mos/image_quality_assessment_mos.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_quality_assessment_mos.image_quality_assessment_mos"}, "('MODELS', 'image-debanding', 'rrdb')": {"filepath": "TEMPLATE_PATH/models/cv/image_debanding/rrdb/rrdb_image_debanding.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_debanding.rrdb.rrdb_image_debanding"}, "('MODELS', 'image-demoireing', 'image-restoration')": {"filepath": "TEMPLATE_PATH/models/cv/image_restoration/image_restoration_model.py", "imports": ["cv2", "torch", "os", "numpy"], "module": "modelscope.models.cv.image_restoration.image_restoration_model"}, "('MODELS', 'vision-efficient-tuning', 'vision-efficient-tuning')": {"filepath": "TEMPLATE_PATH/models/cv/vision_efficient_tuning/model.py", "imports": ["torch", "typing"], "module": "modelscope.models.cv.vision_efficient_tuning.model"}, "('MODELS', 'movie-scene-segmentation', 'resnet50-bert')": {"filepath": "TEMPLATE_PATH/models/cv/movie_scene_segmentation/model.py", "imports": ["PIL", "numpy", "torch", "einops", "tqdm", "math", "shotdetect_scenedetect_lgss", "torchvision", "os", "typing"], "module": "modelscope.models.cv.movie_scene_segmentation.model"}, "('MODELS', 'video-summarization', 'pgl-video-summarization')": {"filepath": "TEMPLATE_PATH/models/cv/video_summarization/summarizer.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.models.cv.video_summarization.summarizer"}, "('MODELS', 'lineless-table-recognition', 'LoreModel')": {"filepath": "TEMPLATE_PATH/models/cv/table_recognition/model_lore.py", "imports": ["numpy", "torch", "math", "copy", "os", "typing"], "module": "modelscope.models.cv.table_recognition.model_lore"}, "('MODELS', 'image-matching', 'quadtree-attention-image-matching')": {"filepath": "TEMPLATE_PATH/models/cv/image_matching/quadtree_attention_model.py", "imports": ["numpy", "cv2", "torch", "pathlib", "os"], "module": "modelscope.models.cv.image_matching.quadtree_attention_model"}, "('MODELS', 'image-object-detection', 'tinynas-detection')": {"filepath": "TEMPLATE_PATH/models/cv/tinynas_detection/tinynas_detector.py", "imports": [], "module": "modelscope.models.cv.tinynas_detection.tinynas_detector"}, "('MODELS', 'domain-specific-object-detection', 'tinynas-damoyolo')": {"filepath": "TEMPLATE_PATH/models/cv/tinynas_detection/tinynas_damoyolo.py", "imports": [], "module": "modelscope.models.cv.tinynas_detection.tinynas_damoyolo"}, "('MODELS', 'image-object-detection', 'tinynas-damoyolo')": {"filepath": "TEMPLATE_PATH/models/cv/tinynas_detection/tinynas_damoyolo.py", "imports": [], "module": "modelscope.models.cv.tinynas_detection.tinynas_damoyolo"}, "('MODELS', 'nerf-recon-acc', 'nerf-recon-acc')": {"filepath": "TEMPLATE_PATH/models/cv/nerf_recon_acc/nerf_recon_acc.py", "imports": ["numpy", "cv2", "torch", "tqdm", "time", "os", "glob"], "module": "modelscope.models.cv.nerf_recon_acc.nerf_recon_acc"}, "('PREPROCESSORS', 'cv', 'nerf-recon-acc-preprocessor')": {"filepath": "TEMPLATE_PATH/models/cv/nerf_recon_acc/nerf_preprocess.py", "imports": ["subprocess", "tensorflow", "numpy", "cv2", "glob", "os", "typing"], "module": "modelscope.models.cv.nerf_recon_acc.nerf_preprocess"}, "('MODELS', 'video-deinterlace', 'video-deinterlace')": {"filepath": "TEMPLATE_PATH/models/cv/video_deinterlace/UNet_for_video_deinterlace.py", "imports": ["torch", "os", "copy", "typing"], "module": "modelscope.models.cv.video_deinterlace.UNet_for_video_deinterlace"}, "('MODELS', 'image-depth-estimation', 'bts-depth-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/image_depth_estimation_bts/depth_estimation_bts_model.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.image_depth_estimation_bts.depth_estimation_bts_model"}, "('MODELS', 'image-fewshot-detection', 'defrcn')": {"filepath": "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/defrcn_for_fewshot.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_defrcn_fewshot.defrcn_for_fewshot"}, "('PREPROCESSORS', 'cv', 'ocr-recognition')": {"filepath": "TEMPLATE_PATH/models/cv/ocr_recognition/preprocessor.py", "imports": ["PIL", "numpy", "torch", "cv2", "os"], "module": "modelscope.models.cv.ocr_recognition.preprocessor"}, "('MODELS', 'ocr-recognition', 'OCRRecognition')": {"filepath": "TEMPLATE_PATH/models/cv/ocr_recognition/model.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.ocr_recognition.model"}, "('TRACKERS', 'default', 'QuasiDenseEmbedTracker')": {"filepath": "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/track/quasi_dense_embed_tracker.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.video_panoptic_segmentation.track.quasi_dense_embed_tracker"}, "('HEADS', 'default', 'VideoKernelUpdateHead')": {"filepath": "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_update_head.py", "imports": ["torch", "mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.video_panoptic_segmentation.head.kernel_update_head"}, "('NECKS', 'default', 'SemanticFPNWrapper')": {"filepath": "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/semantic_fpn_wrapper.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.video_panoptic_segmentation.head.semantic_fpn_wrapper"}, "('HEADS', 'default', 'VideoKernelIterHead')": {"filepath": "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_iter_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.video_panoptic_segmentation.head.kernel_iter_head"}, "('MODELS', 'video-panoptic-segmentation', 'swinb-video-panoptic-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/video_k_net.py", "imports": ["torch", "mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.video_panoptic_segmentation.video_k_net"}, "('MODELS', 'open-vocabulary-detection', 'open-vocabulary-detection-vild')": {"filepath": "TEMPLATE_PATH/models/cv/open_vocabulary_detection_vild/vild.py", "imports": ["tensorflow", "numpy", "torch", "scipy", "clip", "os", "typing"], "module": "modelscope.models.cv.open_vocabulary_detection_vild.vild"}, "('MODELS', 'image-reid-person', 'passvitb')": {"filepath": "TEMPLATE_PATH/models/cv/image_reid_person/pass_model.py", "imports": ["torch", "os", "enum"], "module": "modelscope.models.cv.image_reid_person.pass_model"}, "('MODELS', 'image-face-fusion', 'image-face-fusion')": {"filepath": "TEMPLATE_PATH/models/cv/image_face_fusion/image_face_fusion.py", "imports": ["PIL", "numpy", "torch", "cv2", "collections", "torchvision", "os", "typing"], "module": "modelscope.models.cv.image_face_fusion.image_face_fusion"}, "('MODELS', 'product-segmentation', 'product-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/product_segmentation/seg_infer.py", "imports": ["PIL", "torch", "cv2", "numpy"], "module": "modelscope.models.cv.product_segmentation.seg_infer"}, "('MODELS', 'controllable-image-generation', 'controllable-image-generation')": {"filepath": "TEMPLATE_PATH/models/cv/controllable_image_generation/controlnet.py", "imports": ["PIL", "random", "numpy", "cv2", "torch", "einops", "tempfile", "sys", "math", "control_ldm", "os", "typing"], "module": "modelscope.models.cv.controllable_image_generation.controlnet"}, "('MODELS', 'video-inpainting', 'video-inpainting')": {"filepath": "TEMPLATE_PATH/models/cv/video_inpainting/inpainting_model.py", "imports": ["torch", "torchvision", "math", "numpy"], "module": "modelscope.models.cv.video_inpainting.inpainting_model"}, "('MODELS', 'image-multi-view-depth-estimation', 'image-casmvs-depth-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/casmvs_model.py", "imports": ["numpy", "cv2", "torch", "os", "easydict"], "module": "modelscope.models.cv.image_mvs_depth_estimation.casmvs_model"}, "('MODELS', 'image-classification', 'bnext')": {"filepath": "TEMPLATE_PATH/models/cv/image_binary_quant_classification/binary_quant_model.py", "imports": ["torch", "os", "collections"], "module": "modelscope.models.cv.image_binary_quant_classification.binary_quant_model"}, "('MODELS', 'body-3d-keypoints', 'hdformer')": {"filepath": "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/hdformer_detector.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.models.cv.body_3d_keypoints.hdformer.hdformer_detector"}, "('MODELS', 'body-3d-keypoints', 'body-3d-keypoints')": {"filepath": "TEMPLATE_PATH/models/cv/body_3d_keypoints/cannonical_pose/body_3d_pose.py", "imports": ["numpy", "torch", "logging", "os", "typing"], "module": "modelscope.models.cv.body_3d_keypoints.cannonical_pose.body_3d_pose"}, "('MODELS', 'video-frame-interpolation', 'video-frame-interpolation')": {"filepath": "TEMPLATE_PATH/models/cv/video_frame_interpolation/VFINet_for_video_frame_interpolation.py", "imports": ["torch", "os", "copy", "typing"], "module": "modelscope.models.cv.video_frame_interpolation.VFINet_for_video_frame_interpolation"}, "('HEADS', 'default', 'RPNNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/dense_heads/rpn_head.py", "imports": ["torch", "copy", "mmcv", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.dense_heads.rpn_head"}, "('HEADS', 'default', 'AnchorNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/dense_heads/anchor_head.py", "imports": ["mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.dense_heads.anchor_head"}, "('NECKS', 'default', 'FPNF')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/necks/fpn.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.necks.fpn"}, "('BACKBONES', 'default', 'ViT')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/backbones/vit.py", "imports": ["timm", "torch", "functools", "math", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.backbones.vit"}, "('HEADS', 'default', 'ConvFCBBoxNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.roi_heads.bbox_heads.convfc_bbox_head"}, "('HEADS', 'default', 'Shared2FCBBoxNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.roi_heads.bbox_heads.convfc_bbox_head"}, "('HEADS', 'default', 'Shared4Conv1FCBBoxNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.roi_heads.bbox_heads.convfc_bbox_head"}, "('HEADS', 'default', 'FCNMaskNHead')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/fcn_mask_head.py", "imports": ["mmcv", "numpy", "torch", "warnings", "mmdet"], "module": "modelscope.models.cv.object_detection.mmdet_ms.roi_heads.mask_heads.fcn_mask_head"}, "('MODELS', 'human-detection', 'detection')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.object_detection.mmdet_model"}, "('MODELS', 'image-object-detection', 'detection')": {"filepath": "TEMPLATE_PATH/models/cv/object_detection/mmdet_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.object_detection.mmdet_model"}, "('MODELS', 'pedestrian-attribute-recognition', 'pedestrian-attribute-recognition')": {"filepath": "TEMPLATE_PATH/models/cv/pedestrian_attribute_recognition/model.py", "imports": ["torch", "os", "torchvision", "numpy"], "module": "modelscope.models.cv.pedestrian_attribute_recognition.model"}, "('MODELS', 'pointcloud-sceneflow-estimation', 'rcp-sceneflow-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/pointcloud_sceneflow_estimation/rcp_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.pointcloud_sceneflow_estimation.rcp_model"}, "('MODELS', 'video-stabilization', 'video-stabilization')": {"filepath": "TEMPLATE_PATH/models/cv/video_stabilization/DUTRAFTStabilizer.py", "imports": ["numpy", "cv2", "torch", "tempfile", "sys", "math", "os", "typing"], "module": "modelscope.models.cv.video_stabilization.DUTRAFTStabilizer"}, "('MODELS', 'video-depth-estimation', 'dro-resnet18-depth-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/video_depth_estimation/dro_model.py", "imports": ["numpy", "cv2", "torch", "tqdm", "os", "glob"], "module": "modelscope.models.cv.video_depth_estimation.dro_model"}, "('MODELS', 'image-object-detection', 'vidt')": {"filepath": "TEMPLATE_PATH/models/cv/vidt/model.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.vidt.model"}, "('MODELS', 'face-human-hand-detection', 'face-human-hand-detection')": {"filepath": "TEMPLATE_PATH/models/cv/face_human_hand_detection/det_infer.py", "imports": ["cv2", "torch", "numpy"], "module": "modelscope.models.cv.face_human_hand_detection.det_infer"}, "('MODELS', 'referring-video-object-segmentation', 'swinT-referring-video-object-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.referring_video_object_segmentation.model"}, "('MODELS', 'hand-static', 'hand-static')": {"filepath": "TEMPLATE_PATH/models/cv/hand_static/hand_model.py", "imports": ["PIL", "numpy", "torch", "cv2", "sys", "torchvision", "os"], "module": "modelscope.models.cv.hand_static.hand_model"}, "('MODELS', 'image-depth-estimation', 'newcrfs-depth-estimation')": {"filepath": "TEMPLATE_PATH/models/cv/image_depth_estimation/newcrfs_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.image_depth_estimation.newcrfs_model"}, "('MODELS', 'image-colorization', 'ddcolor')": {"filepath": "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/ddcolor_for_image_colorization.py", "imports": ["numpy", "torch", "copy", "os", "typing"], "module": "modelscope.models.cv.image_colorization.ddcolor.ddcolor_for_image_colorization"}, "('MODELS', 'face-detection', 'retinaface')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/retinaface/detection.py", "imports": ["cv2", "torch", "numpy"], "module": "modelscope.models.cv.face_detection.retinaface.detection"}, "('MODELS', 'face-detection', 'mtcnn')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/mtcnn/models/detector.py", "imports": ["PIL", "torch", "os", "numpy"], "module": "modelscope.models.cv.face_detection.mtcnn.models.detector"}, "('MODELS', 'face-detection', 'ulfd')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/detection.py", "imports": ["cv2", "torch", "os", "numpy"], "module": "modelscope.models.cv.face_detection.ulfd_slim.detection"}, "('MODELS', 'face-detection', 'scrfd')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/scrfd_detect.py", "imports": ["numpy", "torch", "copy", "os", "typing"], "module": "modelscope.models.cv.face_detection.scrfd.scrfd_detect"}, "('MODELS', 'card-detection', 'scrfd')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/scrfd_detect.py", "imports": ["numpy", "torch", "copy", "os", "typing"], "module": "modelscope.models.cv.face_detection.scrfd.scrfd_detect"}, "('DATASETS', 'default', 'RetinaFaceDataset')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/retinaface.py", "imports": ["mmdet", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.retinaface"}, "('PIPELINES', 'default', 'RotateV2')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/auto_augment.py", "imports": ["copy", "mmcv", "numpy", "cv2", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.auto_augment"}, "('PIPELINES', 'default', 'ResizeV2')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py", "imports": ["mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.transforms"}, "('PIPELINES', 'default', 'RandomFlipV2')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py", "imports": ["mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.transforms"}, "('PIPELINES', 'default', 'RandomSquareCrop')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py", "imports": ["mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.transforms"}, "('PIPELINES', 'default', 'LoadAnnotationsV2')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/loading.py", "imports": ["os", "mmdet", "numpy", "pycocotools"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.loading"}, "('PIPELINES', 'default', 'DefaultFormatBundleV2')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/formating.py", "imports": ["torch", "mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.formating"}, "('HEADS', 'default', 'SCRFDHead')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/scrfd_head.py", "imports": ["torch", "mmcv", "mmdet", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.dense_heads.scrfd_head"}, "('BACKBONES', 'default', 'MasterNet')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/master_net.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones.master_net"}, "('BACKBONES', 'default', 'MobileNetV1')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/mobilenet.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones.mobilenet"}, "('BACKBONES', 'default', 'ResNetV1e')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/resnet.py", "imports": ["torch", "mmcv", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones.resnet"}, "('DETECTORS', 'default', 'SCRFD')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/scrfd.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.scrfd"}, "('DETECTORS', 'default', 'CustomSingleStageDetector')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/single_stage.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.single_stage"}, "('DETECTORS', 'default', 'TinyMog')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/tinymog.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.tinymog"}, "('MODELS', 'face-detection', 'tinymog')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/tinymog_detect.py", "imports": ["torch", "os", "copy", "typing"], "module": "modelscope.models.cv.face_detection.scrfd.tinymog_detect"}, "('PREPROCESSORS', 'cv', 'object-detection-scrfd')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/preprocessor.py", "imports": ["PIL", "typing", "numpy"], "module": "modelscope.models.cv.face_detection.scrfd.preprocessor"}, "('MODELS', 'face-detection', 'damofd')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/scrfd/damofd_detect.py", "imports": ["torch", "os", "copy", "typing"], "module": "modelscope.models.cv.face_detection.scrfd.damofd_detect"}, "('MODELS', 'face-detection', 'mogface')": {"filepath": "TEMPLATE_PATH/models/cv/face_detection/mogface/models/detectors.py", "imports": ["cv2", "torch", "os", "numpy"], "module": "modelscope.models.cv.face_detection.mogface.models.detectors"}, "('MODELS', 'image-classification', 'EasyRobustModel')": {"filepath": "TEMPLATE_PATH/models/cv/robust_image_classification/easyrobust_model.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.robust_image_classification.easyrobust_model"}, "('MODELS', 'semantic-segmentation', 'ddpm')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/ddpm_segmentation_model.py", "imports": ["torch", "os", "typing", "ddpm_guided_diffusion"], "module": "modelscope.models.cv.image_semantic_segmentation.ddpm_segmentation_model"}, "('PIPELINES', 'default', 'ResizeToMultiple')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/utils/data_process_func.py", "imports": ["mmcv", "mmdet"], "module": "modelscope.models.cv.image_semantic_segmentation.vit_adapter.utils.data_process_func"}, "('BACKBONES', 'default', 'BEiTAdapter')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/beit_adapter.py", "imports": ["timm", "torch", "logging", "math", "mmdet"], "module": "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.backbone.beit_adapter"}, "('BACKBONES', 'default', 'BASEBEiT')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/base/beit.py", "imports": ["timm", "mmcv", "torch", "mmdet", "functools", "math"], "module": "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.backbone.base.beit"}, "('DETECTORS', 'default', 'EncoderDecoderMask2Former')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/encoder_decoder_mask2former.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.segmentors.encoder_decoder_mask2former"}, "('HEADS', 'default', 'Mask2FormerHeadFromMMSeg')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/mask2former_head_from_mmseg.py", "imports": ["torch", "copy", "mmcv", "mmdet"], "module": "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.decode_heads.mask2former_head_from_mmseg"}, "('MODELS', 'image-segmentation', 'swinL-semantic-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/semantic_seg_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.image_semantic_segmentation.semantic_seg_model"}, "('MODELS', 'image-segmentation', 'vitadapter-semantic-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/semantic_seg_model.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.models.cv.image_semantic_segmentation.semantic_seg_model"}, "('HEADS', 'default', 'MaskFormerSemanticHead')": {"filepath": "TEMPLATE_PATH/models/cv/image_semantic_segmentation/pan_merge/maskformer_semantic_head.py", "imports": ["torch", "mmdet"], "module": "modelscope.models.cv.image_semantic_segmentation.pan_merge.maskformer_semantic_head"}, "('MODELS', 'text-driven-segmentation', 'text-driven-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/text_driven_segmentation/lseg_model.py", "imports": ["PIL", "numpy", "torch", "json", "os", "typing"], "module": "modelscope.models.cv.text_driven_segmentation.lseg_model"}, "('MODELS', 'crowd-counting', 'HRNetCrowdCounting')": {"filepath": "TEMPLATE_PATH/models/cv/crowd_counting/cc_model.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.crowd_counting.cc_model"}, "('MODELS', 'image-segmentation', 'swinL-panoptic-segmentation')": {"filepath": "TEMPLATE_PATH/models/cv/image_panoptic_segmentation/panseg_model.py", "imports": ["torch", "os"], "module": "modelscope.models.cv.image_panoptic_segmentation.panseg_model"}, "('MODELS', 'face-emotion', 'face-emotion')": {"filepath": "TEMPLATE_PATH/models/cv/face_emotion/emotion_model.py", "imports": ["torch", "os", "sys"], "module": "modelscope.models.cv.face_emotion.emotion_model"}, "('MODELS', 'video-super-resolution', 'msrresnet-lite')": {"filepath": "TEMPLATE_PATH/models/cv/video_super_resolution/msrresnet_lite_model.py", "imports": ["torch", "os", "functools", "typing"], "module": "modelscope.models.cv.video_super_resolution.msrresnet_lite_model"}, "('MODELS', 'video-super-resolution', 'real-basicvsr')": {"filepath": "TEMPLATE_PATH/models/cv/video_super_resolution/real_basicvsr_for_video_super_resolution.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.video_super_resolution.real_basicvsr_for_video_super_resolution"}, "('MODELS', 'face-attribute-recognition', 'fairface')": {"filepath": "TEMPLATE_PATH/models/cv/face_attribute_recognition/fair_face/face_attribute_recognition.py", "imports": ["PIL", "numpy", "torch", "cv2", "torchvision", "os"], "module": "modelscope.models.cv.face_attribute_recognition.fair_face.face_attribute_recognition"}, "('MODELS', 'image-denoising', 'nafnet')": {"filepath": "TEMPLATE_PATH/models/cv/image_denoise/nafnet_for_image_denoise.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_denoise.nafnet_for_image_denoise"}, "('MODELS', 'image-classification', 'ClassificationModel')": {"filepath": "TEMPLATE_PATH/models/cv/image_classification/mmcls_model.py", "imports": ["os"], "module": "modelscope.models.cv.image_classification.mmcls_model"}, "('BACKBONES', 'default', 'BEiTv2')": {"filepath": "TEMPLATE_PATH/models/cv/image_classification/backbones/beit_v2.py", "imports": ["itertools", "mmcv", "torch", "einops", "functools", "mmcls", "math", "collections", "warnings", "os", "typing"], "module": "modelscope.models.cv.image_classification.backbones.beit_v2"}, "('BACKBONES', 'default', 'NextViT')": {"filepath": "TEMPLATE_PATH/models/cv/image_classification/backbones/nextvit.py", "imports": ["itertools", "mmcv", "torch", "einops", "functools", "mmcls", "math", "collections", "warnings", "os", "typing"], "module": "modelscope.models.cv.image_classification.backbones.nextvit"}, "('MODELS', 'image-classification', 'content-check')": {"filepath": "TEMPLATE_PATH/models/cv/image_classification/resnet50_cc.py", "imports": ["torch", "math", "collections", "torchvision", "os"], "module": "modelscope.models.cv.image_classification.resnet50_cc"}, "('MODELS', 'image-color-enhancement', 'deeplpfnet')": {"filepath": "TEMPLATE_PATH/models/cv/image_color_enhance/deeplpf/deeplpf_image_color_enhance.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_color_enhance.deeplpf.deeplpf_image_color_enhance"}, "('MODELS', 'image-color-enhancement', 'csrnet')": {"filepath": "TEMPLATE_PATH/models/cv/image_color_enhance/image_color_enhance.py", "imports": ["torch", "os", "typing"], "module": "modelscope.models.cv.image_color_enhance.image_color_enhance"}, "('MODELS', 'image-color-enhancement', 'adaint')": {"filepath": "TEMPLATE_PATH/models/cv/image_color_enhance/adaint/adaint.py", "imports": ["numbers", "torch", "torchvision", "os", "typing"], "module": "modelscope.models.cv.image_color_enhance.adaint.adaint"}, "('METRICS', 'default', 'image-quality-assessment-degradation-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_quality_assessment_degradation_metric.py", "imports": ["numpy", "cv2", "torch", "scipy", "tempfile", "sys", "collections", "tqdm", "os", "typing"], "module": "modelscope.metrics.image_quality_assessment_degradation_metric"}, "('METRICS', 'default', 'prediction-saving-wrapper')": {"filepath": "TEMPLATE_PATH/metrics/prediction_saving_wrapper.py", "imports": ["typing", "sklearn", "numpy"], "module": "modelscope.metrics.prediction_saving_wrapper"}, "('METRICS', 'default', 'video-stabilization-metric')": {"filepath": "TEMPLATE_PATH/metrics/video_stabilization_metric.py", "imports": ["numpy", "cv2", "tqdm", "tempfile", "sys", "os", "typing"], "module": "modelscope.metrics.video_stabilization_metric"}, "('METRICS', 'default', 'ppl')": {"filepath": "TEMPLATE_PATH/metrics/ppl_metric.py", "imports": ["torch", "typing", "math", "numpy"], "module": "modelscope.metrics.ppl_metric"}, "('METRICS', 'default', 'inbatch_recall')": {"filepath": "TEMPLATE_PATH/metrics/inbatch_recall_metric.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.metrics.inbatch_recall_metric"}, "('METRICS', 'default', 'loss-metric')": {"filepath": "TEMPLATE_PATH/metrics/loss_metric.py", "imports": ["typing", "sklearn", "numpy"], "module": "modelscope.metrics.loss_metric"}, "('METRICS', 'default', 'ocr-recognition-metric')": {"filepath": "TEMPLATE_PATH/metrics/ocr_recognition_metric.py", "imports": ["torch", "edit_distance", "typing", "numpy"], "module": "modelscope.metrics.ocr_recognition_metric"}, "('METRICS', 'default', 'mAP')": {"filepath": "TEMPLATE_PATH/metrics/map_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.map_metric"}, "('METRICS', 'default', 'image-colorization-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_colorization_metric.py", "imports": ["numpy", "cv2", "torch", "scipy", "torchvision", "typing"], "module": "modelscope.metrics.image_colorization_metric"}, "('METRICS', 'default', 'seq-cls-metric')": {"filepath": "TEMPLATE_PATH/metrics/sequence_classification_metric.py", "imports": ["typing", "sklearn", "numpy"], "module": "modelscope.metrics.sequence_classification_metric"}, "('METRICS', 'default', 'audio-noise-metric')": {"filepath": "TEMPLATE_PATH/metrics/audio_noise_metric.py", "imports": ["typing"], "module": "modelscope.metrics.audio_noise_metric"}, "('METRICS', 'default', 'translation-evaluation-metric')": {"filepath": "TEMPLATE_PATH/metrics/translation_evaluation_metric.py", "imports": ["pandas", "typing", "importlib"], "module": "modelscope.metrics.translation_evaluation_metric"}, "('METRICS', 'default', 'video-frame-interpolation-metric')": {"filepath": "TEMPLATE_PATH/metrics/video_frame_interpolation_metric.py", "imports": ["numpy", "torch", "lpips", "math", "typing"], "module": "modelscope.metrics.video_frame_interpolation_metric"}, "('METRICS', 'default', 'image-inpainting-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_inpainting_metric.py", "imports": ["torch", "scipy", "typing", "numpy"], "module": "modelscope.metrics.image_inpainting_metric"}, "('METRICS', 'default', 'image-denoise-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_denoise_metric.py", "imports": ["cv2", "torch", "typing", "numpy"], "module": "modelscope.metrics.image_denoise_metric"}, "('METRICS', 'default', 'referring-video-object-segmentation-metric')": {"filepath": "TEMPLATE_PATH/metrics/referring_video_object_segmentation_metric.py", "imports": ["numpy", "pycocotools", "torch", "tqdm", "typing"], "module": "modelscope.metrics.referring_video_object_segmentation_metric"}, "('METRICS', 'default', 'token-cls-metric')": {"filepath": "TEMPLATE_PATH/metrics/token_classification_metric.py", "imports": ["typing", "numpy", "importlib"], "module": "modelscope.metrics.token_classification_metric"}, "('METRICS', 'default', 'video-summarization-metric')": {"filepath": "TEMPLATE_PATH/metrics/video_summarization_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.video_summarization_metric"}, "('METRICS', 'default', 'image-quality-assessment-mos-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_quality_assessment_mos_metric.py", "imports": ["numpy", "cv2", "torch", "scipy", "tempfile", "sys", "tqdm", "os", "typing"], "module": "modelscope.metrics.image_quality_assessment_mos_metric"}, "('METRICS', 'default', 'ned')": {"filepath": "TEMPLATE_PATH/metrics/ned_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.ned_metric"}, "('METRICS', 'default', 'text-ranking-metric')": {"filepath": "TEMPLATE_PATH/metrics/text_ranking_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.text_ranking_metric"}, "('METRICS', 'default', 'movie-scene-segmentation-metric')": {"filepath": "TEMPLATE_PATH/metrics/movie_scene_segmentation_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.movie_scene_segmentation_metric"}, "('METRICS', 'default', 'accuracy')": {"filepath": "TEMPLATE_PATH/metrics/accuracy_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.accuracy_metric"}, "('METRICS', 'default', 'image-ins-seg-coco-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_instance_segmentation_metric.py", "imports": ["numpy", "pycocotools", "tempfile", "collections", "os", "typing"], "module": "modelscope.metrics.image_instance_segmentation_metric"}, "('METRICS', 'default', 'video-super-resolution-metric')": {"filepath": "TEMPLATE_PATH/metrics/video_super_resolution_metric/video_super_resolution_metric.py", "imports": ["typing", "numpy"], "module": "modelscope.metrics.video_super_resolution_metric.video_super_resolution_metric"}, "('METRICS', 'default', 'image-color-enhance-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_color_enhance_metric.py", "imports": ["cv2", "typing", "numpy"], "module": "modelscope.metrics.image_color_enhance_metric"}, "('METRICS', 'default', 'image-portrait-enhancement-metric')": {"filepath": "TEMPLATE_PATH/metrics/image_portrait_enhancement_metric.py", "imports": ["cv2", "typing", "numpy"], "module": "modelscope.metrics.image_portrait_enhancement_metric"}, "('METRICS', 'default', 'bleu')": {"filepath": "TEMPLATE_PATH/metrics/bleu_metric.py", "imports": ["typing", "itertools", "sacrebleu"], "module": "modelscope.metrics.bleu_metric"}, "('METRICS', 'default', 'text-gen-metric')": {"filepath": "TEMPLATE_PATH/metrics/text_generation_metric.py", "imports": ["nltk", "rouge", "typing"], "module": "modelscope.metrics.text_generation_metric"}, "('PIPELINES', 'protein-structure', 'unifold-protein-structure')": {"filepath": "TEMPLATE_PATH/pipelines/science/protein_structure_pipeline.py", "imports": ["numpy", "json", "torch", "unicore", "time", "os", "typing"], "module": "modelscope.pipelines.science.protein_structure_pipeline"}, "('PIPELINES', 'task-template', 'pipeline-template')": {"filepath": "TEMPLATE_PATH/pipelines/pipeline_template.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.pipeline_template"}, "('PIPELINES', 'speech-timestamp', 'speech-timestamp-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/timestamp_pipeline.py", "imports": ["json", "typing", "yaml", "os", "funasr"], "module": "modelscope.pipelines.audio.timestamp_pipeline"}, "('PIPELINES', 'keyword-spotting', 'speech_dfsmn_kws_char_farfield')": {"filepath": "TEMPLATE_PATH/pipelines/audio/kws_farfield_pipeline.py", "imports": ["numpy", "wave", "soundfile", "io", "typing"], "module": "modelscope.pipelines.audio.kws_farfield_pipeline"}, "('PIPELINES', 'speaker-verification', 'sv-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/speaker_verification_pipeline.py", "imports": ["os", "typing", "shutil", "yaml"], "module": "modelscope.pipelines.audio.speaker_verification_pipeline"}, "('PIPELINES', 'inverse-text-processing', 'itn-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/inverse_text_processing_pipeline.py", "imports": ["os", "typing", "shutil", "yaml"], "module": "modelscope.pipelines.audio.inverse_text_processing_pipeline"}, "('PIPELINES', 'speech-separation', 'speech-separation')": {"filepath": "TEMPLATE_PATH/pipelines/audio/separation_pipeline.py", "imports": ["numpy", "torch", "soundfile", "io", "typing"], "module": "modelscope.pipelines.audio.separation_pipeline"}, "('PIPELINES', 'voice-activity-detection', 'vad-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/voice_activity_detection_pipeline.py", "imports": ["json", "typing", "yaml", "os", "funasr"], "module": "modelscope.pipelines.audio.voice_activity_detection_pipeline"}, "('PIPELINES', 'text-to-speech', 'sambert-hifigan-tts')": {"filepath": "TEMPLATE_PATH/pipelines/audio/text_to_speech_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.audio.text_to_speech_pipeline"}, "('PIPELINES', 'keyword-spotting', 'kws-kwsbp')": {"filepath": "TEMPLATE_PATH/pipelines/audio/kws_kwsbp_pipeline.py", "imports": ["json", "os", "typing"], "module": "modelscope.pipelines.audio.kws_kwsbp_pipeline"}, "('PIPELINES', 'acoustic-echo-cancellation', 'speech-dfsmn-aec-psm-16k')": {"filepath": "TEMPLATE_PATH/pipelines/audio/linear_aec_pipeline.py", "imports": ["numpy", "torch", "scipy", "yaml", "importlib", "os", "typing"], "module": "modelscope.pipelines.audio.linear_aec_pipeline"}, "('PIPELINES', 'acoustic-noise-suppression', 'speech_frcrn_ans_cirm_16k')": {"filepath": "TEMPLATE_PATH/pipelines/audio/ans_pipeline.py", "imports": ["numpy", "torch", "librosa", "soundfile", "io", "typing"], "module": "modelscope.pipelines.audio.ans_pipeline"}, "('PIPELINES', 'speaker-verification', 'speaker-verification-eres2net')": {"filepath": "TEMPLATE_PATH/pipelines/audio/speaker_verification_eres2net_pipeline.py", "imports": ["torch", "io", "typing", "soundfile"], "module": "modelscope.pipelines.audio.speaker_verification_eres2net_pipeline"}, "('PIPELINES', 'language-score-prediction', 'language-score-prediction')": {"filepath": "TEMPLATE_PATH/pipelines/audio/lm_infer_pipeline.py", "imports": ["os", "typing"], "module": "modelscope.pipelines.audio.lm_infer_pipeline"}, "('PIPELINES', 'acoustic-noise-suppression', 'speech_dfsmn_ans_psm_48k_causal')": {"filepath": "TEMPLATE_PATH/pipelines/audio/ans_dfsmn_pipeline.py", "imports": ["numpy", "torch", "sys", "collections", "librosa", "soundfile", "io", "os", "typing"], "module": "modelscope.pipelines.audio.ans_dfsmn_pipeline"}, "('PIPELINES', 'auto-speech-recognition', 'asr-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/asr_inference_pipeline.py", "imports": ["json", "os", "typing", "yaml"], "module": "modelscope.pipelines.audio.asr_inference_pipeline"}, "('PIPELINES', 'speaker-diarization', 'speaker-diarization-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/speaker_diarization_pipeline.py", "imports": ["shutil", "numpy", "json", "yaml", "os", "typing"], "module": "modelscope.pipelines.audio.speaker_diarization_pipeline"}, "('PIPELINES', 'speaker-verification', 'speaker-verification-rdino')": {"filepath": "TEMPLATE_PATH/pipelines/audio/speaker_verification_rdino_pipeline.py", "imports": ["torch", "io", "typing", "soundfile"], "module": "modelscope.pipelines.audio.speaker_verification_rdino_pipeline"}, "('PIPELINES', 'punctuation', 'punc-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/punctuation_processing_pipeline.py", "imports": ["os", "typing", "shutil", "yaml"], "module": "modelscope.pipelines.audio.punctuation_processing_pipeline"}, "('PIPELINES', 'speaker-verification', 'speaker-verification')": {"filepath": "TEMPLATE_PATH/pipelines/audio/speaker_verification_light_pipeline.py", "imports": ["torch", "io", "typing", "soundfile"], "module": "modelscope.pipelines.audio.speaker_verification_light_pipeline"}, "('PIPELINES', 'speaker-diarization', 'speaker-change-locating')": {"filepath": "TEMPLATE_PATH/pipelines/audio/speaker_change_locating_pipeline.py", "imports": ["numpy", "torch", "soundfile", "io", "typing"], "module": "modelscope.pipelines.audio.speaker_change_locating_pipeline"}, "('PIPELINES', 'auto-speech-recognition', 'asr-wenet-inference')": {"filepath": "TEMPLATE_PATH/pipelines/audio/asr_wenet_inference_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.audio.asr_wenet_inference_pipeline"}, "('PIPELINES', 'auto-speech-recognition', 'ofa-asr')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/asr_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.asr_pipeline"}, "('PIPELINES', 'image-captioning', 'image-captioning')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/image_captioning_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.multi_modal.image_captioning_pipeline"}, "('PIPELINES', 'text-to-video-synthesis', 'latent-text-to-video-synthesis')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/text_to_video_synthesis_pipeline.py", "imports": ["cv2", "torch", "einops", "tempfile", "os", "typing"], "module": "modelscope.pipelines.multi_modal.text_to_video_synthesis_pipeline"}, "('PIPELINES', 'text-ranking', 'mgeo-ranking')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/mgeo_ranking_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.multi_modal.mgeo_ranking_pipeline"}, "('PIPELINES', 'generative-multi-modal-embedding', 'generative-multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/generative_multi_modal_embedding_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.multi_modal.generative_multi_modal_embedding_pipeline"}, "('PIPELINES', 'multimodal-dialogue', 'multimodal-dialogue')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/multimodal_dialogue_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.multimodal_dialogue_pipeline"}, "('PIPELINES', 'text-to-image-synthesis', 'text-to-image-synthesis')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/text_to_image_synthesis_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.text_to_image_synthesis_pipeline"}, "('PIPELINES', 'text2sql', 'ofa-text2sql')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/text2sql_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.text2sql_pipeline"}, "('PIPELINES', 'visual-entailment', 'visual-entailment')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/visual_entailment_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.visual_entailment_pipeline"}, "('PIPELINES', 'text-to-image-synthesis', 'disco_guided_diffusion')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/disco_guided_diffusion_pipeline/disco_guided_diffusion.py", "imports": ["PIL", "gc", "numpy", "cv2", "json", "torch", "math", "clip", "importlib", "torchvision", "os"], "module": "modelscope.pipelines.multi_modal.disco_guided_diffusion_pipeline.disco_guided_diffusion"}, "('PIPELINES', 'visual-question-answering', 'visual-question-answering')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/visual_question_answering_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.visual_question_answering_pipeline"}, "('PIPELINES', 'video-question-answering', 'video-question-answering')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/video_question_answering_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.video_question_answering_pipeline"}, "('PIPELINES', 'video-captioning', 'video-captioning')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/video_captioning_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.video_captioning_pipeline"}, "('PIPELINES', 'video-multi-modal-embedding', 'video-multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/video_multi_modal_embedding_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.multi_modal.video_multi_modal_embedding_pipeline"}, "('PIPELINES', 'efficient-diffusion-tuning', 'efficient-diffusion-tuning')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/efficient_diffusion_tuning_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "torchvision", "typing"], "module": "modelscope.pipelines.multi_modal.efficient_diffusion_tuning_pipeline"}, "('PIPELINES', 'multi-modal-similarity', 'multi-modal-similarity')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/team_multi_modal_similarity_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.multi_modal.team_multi_modal_similarity_pipeline"}, "('PIPELINES', 'text-to-image-synthesis', 'diffusers-stable-diffusion')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "diffusers", "typing"], "module": "modelscope.pipelines.multi_modal.diffusers_wrapped.stable_diffusion.stable_diffusion_pipeline"}, "('PIPELINES', 'text-to-image-synthesis', 'chinese-stable-diffusion')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py", "imports": ["PIL", "transformers", "numpy", "cv2", "torch", "diffusers", "typing"], "module": "modelscope.pipelines.multi_modal.diffusers_wrapped.stable_diffusion.chinese_stable_diffusion_pipeline"}, "('PIPELINES', 'image-text-retrieval', 'multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/multi_modal_embedding_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.multi_modal.multi_modal_embedding_pipeline"}, "('PIPELINES', 'multi-modal-embedding', 'multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/multi_modal_embedding_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.multi_modal.multi_modal_embedding_pipeline"}, "('PIPELINES', 'ocr-recognition', 'ofa-ocr-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/ocr_recognition_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.ocr_recognition_pipeline"}, "('PIPELINES', 'document-vl-embedding', 'document-vl-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/document_vl_embedding_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.document_vl_embedding_pipeline"}, "('PIPELINES', 'image-text-retrieval', 'image-text-retrieval')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/image_text_retrieval_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.image_text_retrieval_pipeline"}, "('PIPELINES', 'visual-question-answering', 'gridvlp-multi-modal-classification')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/gridvlp_pipeline.py", "imports": ["PIL", "transformers", "numpy", "json", "torch", "time", "os", "traceback", "typing"], "module": "modelscope.pipelines.multi_modal.gridvlp_pipeline"}, "('PIPELINES', 'multi-modal-embedding', 'gridvlp-multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/gridvlp_pipeline.py", "imports": ["PIL", "transformers", "numpy", "json", "torch", "time", "os", "traceback", "typing"], "module": "modelscope.pipelines.multi_modal.gridvlp_pipeline"}, "('PIPELINES', 'visual-grounding', 'visual-grounding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/visual_grounding_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.visual_grounding_pipeline"}, "('PIPELINES', 'video-temporal-grounding', 'soonet-video-temporal-grounding')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/soonet_video_temporal_grounding_pipeline.py", "imports": ["numpy", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.multi_modal.soonet_video_temporal_grounding_pipeline"}, "('PIPELINES', 'sudoku', 'ofa-sudoku')": {"filepath": "TEMPLATE_PATH/pipelines/multi_modal/sudoku_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.multi_modal.sudoku_pipeline"}, "('PIPELINES', 'translation-evaluation', 'translation-evaluation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/translation_evaluation_pipeline.py", "imports": ["numpy", "enum", "torch", "os", "typing"], "module": "modelscope.pipelines.nlp.translation_evaluation_pipeline"}, "('PIPELINES', 'text-generation', 'glm130b-text-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/glm130b_text_generation_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.glm130b_text_generation_pipeline"}, "('PIPELINES', 'faq-question-answering', 'faq-question-answering')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/faq_question_answering_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.faq_question_answering_pipeline"}, "('PIPELINES', 'document-grounded-dialog-generate', 'document-grounded-dialog-generate')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/document_grounded_dialog_generate_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.document_grounded_dialog_generate_pipeline"}, "('PIPELINES', 'translation', 'automatic-post-editing')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/automatic_post_editing_pipeline.py", "imports": ["tensorflow", "sacremoses", "numpy", "jieba", "sentencepiece", "os", "typing", "html"], "module": "modelscope.pipelines.nlp.automatic_post_editing_pipeline"}, "('PIPELINES', 'named-entity-recognition', 'named-entity-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/named_entity_recognition_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.named_entity_recognition_pipeline"}, "('PIPELINES', 'named-entity-recognition', 'named-entity-recognition-thai')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/named_entity_recognition_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.named_entity_recognition_pipeline"}, "('PIPELINES', 'named-entity-recognition', 'named-entity-recognition-viet')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/named_entity_recognition_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.named_entity_recognition_pipeline"}, "('PIPELINES', 'translation', 'interactive-translation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/interactive_translation_pipeline.py", "imports": ["tensorflow", "sacremoses", "numpy", "jieba", "subword_nmt", "os", "typing"], "module": "modelscope.pipelines.nlp.interactive_translation_pipeline"}, "('PIPELINES', 'text-summarization', 'text-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/summarization_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.summarization_pipeline"}, "('PIPELINES', 'document-grounded-dialog-retrieval', 'document-grounded-dialog-retrieval')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/document_grounded_dialog_retrieval_pipeline.py", "imports": ["numpy", "json", "faiss", "os", "typing"], "module": "modelscope.pipelines.nlp.document_grounded_dialog_retrieval_pipeline"}, "('PIPELINES', 'text-classification', 'domain-classification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/fasttext_text_classification_pipeline.py", "imports": ["numpy", "fasttext", "sentencepiece", "os", "typing"], "module": "modelscope.pipelines.nlp.fasttext_text_classification_pipeline"}, "('PIPELINES', 'word-alignment', 'word-alignment')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/word_alignment_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.nlp.word_alignment_pipeline"}, "('PIPELINES', 'feature-extraction', 'feature-extraction')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/feature_extraction_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.nlp.feature_extraction_pipeline"}, "('PIPELINES', 'text-ranking', 'text-ranking')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_ranking_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.nlp.text_ranking_pipeline"}, "('PIPELINES', 'fid-dialogue', 'fid-dialogue')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/fid_dialogue_pipeline.py", "imports": ["torch", "re", "typing"], "module": "modelscope.pipelines.nlp.fid_dialogue_pipeline"}, "('PIPELINES', 'text-classification', 'sentiment-analysis')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'nli', 'nli')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'sentence-similarity', 'sentence-similarity')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'text-classification', 'text-classification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'text-classification', 'sentiment-classification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'text-classification', 'sentence-similarity')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'sentiment-classification', 'sentiment-classification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.text_classification_pipeline"}, "('PIPELINES', 'code-generation', 'codegeex-code-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/codegeex_code_generation_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.codegeex_code_generation_pipeline"}, "('PIPELINES', 'sentence-similarity', 'translation-quality-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/translation_quality_estimation_pipeline.py", "imports": ["transformers", "torch", "io", "os", "typing"], "module": "modelscope.pipelines.nlp.translation_quality_estimation_pipeline"}, "('PIPELINES', 'fill-mask', 'fill-mask')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/fill_mask_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.nlp.fill_mask_pipeline"}, "('PIPELINES', 'fill-mask', 'fill-mask-ponet')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/fill_mask_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.nlp.fill_mask_pipeline"}, "('PIPELINES', 'text-generation', 'plug-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/distributed_plug_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.distributed_plug_pipeline"}, "('PIPELINES', 'table-question-answering', 'conversational-text-to-sql')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/conversational_text_to_sql_pipeline.py", "imports": ["torch", "typing", "text2sql_lgesql"], "module": "modelscope.pipelines.nlp.conversational_text_to_sql_pipeline"}, "('PIPELINES', 'text-generation', 'gpt3-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/distributed_gpt3_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.distributed_gpt3_pipeline"}, "('PIPELINES', 'information-extraction', 'relation-extraction')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/information_extraction_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.information_extraction_pipeline"}, "('PIPELINES', 'relation-extraction', 'relation-extraction')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/information_extraction_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.information_extraction_pipeline"}, "('PIPELINES', 'table-question-answering', 'table-question-answering-pipeline')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/table_question_answering_pipeline.py", "imports": ["transformers", "json", "torch", "os", "typing"], "module": "modelscope.pipelines.nlp.table_question_answering_pipeline"}, "('PIPELINES', 'text-classification', 'user-satisfaction-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/user_satisfaction_estimation_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.user_satisfaction_estimation_pipeline"}, "('PIPELINES', 'task-oriented-conversation', 'dialog-modeling')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/dialog_modeling_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.dialog_modeling_pipeline"}, "('PIPELINES', 'competency-aware-translation', 'canmt-translation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/canmt_translation_pipeline.py", "imports": ["torch", "os", "sacremoses", "typing"], "module": "modelscope.pipelines.nlp.canmt_translation_pipeline"}, "('PIPELINES', 'word-segmentation', 'word-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/word_segmentation_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.word_segmentation_pipeline"}, "('PIPELINES', 'word-segmentation', 'multilingual-word-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/word_segmentation_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.word_segmentation_pipeline"}, "('PIPELINES', 'word-segmentation', 'word-segmentation-thai')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/word_segmentation_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.word_segmentation_pipeline"}, "('PIPELINES', 'document-segmentation', 'document-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/document_segmentation_pipeline.py", "imports": ["datasets", "numpy", "torch", "re", "typing"], "module": "modelscope.pipelines.nlp.document_segmentation_pipeline"}, "('PIPELINES', 'text-generation', 'gpt-moe-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/distributed_gpt_moe_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.distributed_gpt_moe_pipeline"}, "('PIPELINES', 'extractive-summarization', 'extractive-summarization')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/extractive_summarization_pipeline.py", "imports": ["datasets", "numpy", "torch", "re", "typing"], "module": "modelscope.pipelines.nlp.extractive_summarization_pipeline"}, "('PIPELINES', 'text-error-correction', 'text-error-correction')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_error_correction_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.nlp.text_error_correction_pipeline"}, "('PIPELINES', 'task-oriented-conversation', 'dialog-state-tracking')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/dialog_state_tracking_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.dialog_state_tracking_pipeline"}, "('PIPELINES', 'text-summarization', 'mglm-text-summarization')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/mglm_text_summarization_pipeline.py", "imports": ["os", "typing"], "module": "modelscope.pipelines.nlp.mglm_text_summarization_pipeline"}, "('PIPELINES', 'translation', 'csanmt-translation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/translation_pipeline.py", "imports": ["tensorflow", "sacremoses", "numpy", "jieba", "subword_nmt", "os", "typing"], "module": "modelscope.pipelines.nlp.translation_pipeline"}, "('PIPELINES', 'siamese-uie', 'siamese-uie')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/siamese_uie_pipeline.py", "imports": ["json", "torch", "logging", "scipy", "tqdm", "math", "copy", "time", "pathlib", "os", "typing"], "module": "modelscope.pipelines.nlp.siamese_uie_pipeline"}, "('PIPELINES', 'task-oriented-conversation', 'dialog-intent-prediction')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/dialog_intent_prediction_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.dialog_intent_prediction_pipeline"}, "('PIPELINES', 'sentence-embedding', 'sentence-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/sentence_embedding_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.sentence_embedding_pipeline"}, "('PIPELINES', 'document-grounded-dialog-rerank', 'document-grounded-dialog-rerank')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/document_grounded_dialog_rerank_pipeline.py", "imports": ["ujson", "transformers", "random", "numpy", "torch", "re", "sys", "collections", "time", "os", "typing", "pprint"], "module": "modelscope.pipelines.nlp.document_grounded_dialog_rerank_pipeline"}, "('PIPELINES', 'zero-shot-classification', 'zero-shot-classification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/zero_shot_classification_pipeline.py", "imports": ["torch", "scipy", "typing"], "module": "modelscope.pipelines.nlp.zero_shot_classification_pipeline"}, "('PIPELINES', 'text-generation', 'text-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_generation_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.nlp.text_generation_pipeline"}, "('PIPELINES', 'text2text-generation', 'translation_en_to_de')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_generation_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.nlp.text_generation_pipeline"}, "('PIPELINES', 'text2text-generation', 'translation_en_to_ro')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_generation_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.nlp.text_generation_pipeline"}, "('PIPELINES', 'text2text-generation', 'translation_en_to_fr')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_generation_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.nlp.text_generation_pipeline"}, "('PIPELINES', 'text2text-generation', 'text2text-generation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/text_generation_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.nlp.text_generation_pipeline"}, "('PIPELINES', 'text-classification', 'language_identification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/language_identification_pipline.py", "imports": ["tensorflow", "numpy", "re", "os", "typing"], "module": "modelscope.pipelines.nlp.language_identification_pipline"}, "('PIPELINES', 'token-classification', 'token-classification')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/token_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.token_classification_pipeline"}, "('PIPELINES', 'token-classification', 'part-of-speech')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/token_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.token_classification_pipeline"}, "('PIPELINES', 'token-classification', 'word-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/token_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.token_classification_pipeline"}, "('PIPELINES', 'token-classification', 'named-entity-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/token_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.token_classification_pipeline"}, "('PIPELINES', 'part-of-speech', 'part-of-speech')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/token_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.nlp.token_classification_pipeline"}, "('PIPELINES', 'code-translation', 'codegeex-code-translation')": {"filepath": "TEMPLATE_PATH/pipelines/nlp/codegeex_code_translation_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.nlp.codegeex_code_translation_pipeline"}, "('PIPELINES', 'bad-image-detecting', 'bad-image-detecting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/bad_image_detecting_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.bad_image_detecting_pipeline"}, "('PIPELINES', 'image-portrait-stylization', 'unet-person-image-cartoon')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_cartoon_pipeline.py", "imports": ["tensorflow", "numpy", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.image_cartoon_pipeline"}, "('PIPELINES', 'image-to-image-generation', 'image-to-image-generation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_to_image_generate_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.image_to_image_generate_pipeline"}, "('PIPELINES', 'facial-expression-recognition', 'vgg19-facial-expression-recognition-fer')": {"filepath": "TEMPLATE_PATH/pipelines/cv/facial_expression_recognition_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.facial_expression_recognition_pipeline"}, "('PIPELINES', 'face-detection', 'resnet50-face-detection-retinaface')": {"filepath": "TEMPLATE_PATH/pipelines/cv/retina_face_detection_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.retina_face_detection_pipeline"}, "('PIPELINES', 'image-style-transfer', 'AAMS-style-transfer')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_style_transfer_pipeline.py", "imports": ["cv2", "os", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_style_transfer_pipeline"}, "('PIPELINES', 'image-face-fusion', 'image-face-fusion')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_face_fusion_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.image_face_fusion_pipeline"}, "('PIPELINES', 'face-detection', 'manual-face-detection-ulfd')": {"filepath": "TEMPLATE_PATH/pipelines/cv/ulfd_face_detection_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.ulfd_face_detection_pipeline"}, "('PIPELINES', 'pedestrian-attribute-recognition', 'resnet50_pedestrian-attribute-recognition_image')": {"filepath": "TEMPLATE_PATH/pipelines/cv/pedestrian_attribute_recognition_pipeline.py", "imports": ["PIL", "numpy", "cv2", "json", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.pedestrian_attribute_recognition_pipeline"}, "('PIPELINES', 'image-denoising', 'nafnet-image-denoise')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_denoise_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_denoise_pipeline"}, "('PIPELINES', 'video-text-retrieval', 'vop-video-text-retrieval-se')": {"filepath": "TEMPLATE_PATH/pipelines/cv/vop_retrieval_se_pipeline.py", "imports": ["numpy", "torch", "gzip", "os", "typing"], "module": "modelscope.pipelines.cv.vop_retrieval_se_pipeline"}, "('PIPELINES', 'portrait-matting', 'unet-image-matting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_matting_pipeline.py", "imports": ["tensorflow", "numpy", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.image_matting_pipeline"}, "('PIPELINES', 'universal-matting', 'unet-universal-matting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_matting_pipeline.py", "imports": ["tensorflow", "numpy", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.image_matting_pipeline"}, "('PIPELINES', 'image-deblurring', 'nafnet-image-deblur')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_deblur_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_deblur_pipeline"}, "('PIPELINES', 'video-human-matting', 'video-human-matting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_human_matting_pipeline.py", "imports": ["numpy", "cv2", "torch", "moviepy", "os", "typing"], "module": "modelscope.pipelines.cv.video_human_matting_pipeline"}, "('PIPELINES', 'live-category', 'live-category')": {"filepath": "TEMPLATE_PATH/pipelines/cv/live_category_pipeline.py", "imports": ["PIL", "numpy", "torch", "decord", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.live_category_pipeline"}, "('PIPELINES', 'image-classification', 'image-structured-model-probing')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_structured_model_probing_pipeline.py", "imports": ["mmcv", "numpy", "torch", "math", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.image_structured_model_probing_pipeline"}, "('PIPELINES', 'face-quality-assessment', 'manual-face-quality-assessment-fqa')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_quality_assessment_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "module": "modelscope.pipelines.cv.face_quality_assessment_pipeline"}, "('PIPELINES', 'image-portrait-enhancement', 'gpen-image-portrait-enhancement')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_portrait_enhancement_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "scipy", "math", "typing"], "module": "modelscope.pipelines.cv.image_portrait_enhancement_pipeline"}, "('PIPELINES', 'image-color-enhancement', 'adaint-image-color-enhance')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_color_enhance_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_color_enhance_pipeline"}, "('PIPELINES', 'image-color-enhancement', 'deeplpf-image-color-enhance')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_color_enhance_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_color_enhance_pipeline"}, "('PIPELINES', 'image-color-enhancement', 'csrnet-image-color-enhance')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_color_enhance_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_color_enhance_pipeline"}, "('PIPELINES', 'vision-efficient-tuning', 'vision-efficient-tuning')": {"filepath": "TEMPLATE_PATH/pipelines/cv/vision_efficient_tuning_pipeline.py", "imports": ["torch", "torchvision", "typing", "numpy"], "module": "modelscope.pipelines.cv.vision_efficient_tuning_pipeline"}, "('PIPELINES', 'video-object-segmentation', 'video-object-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_object_segmentation_pipeline.py", "imports": ["PIL", "numpy", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.video_object_segmentation_pipeline"}, "('PIPELINES', 'face-detection', 'resnet-face-detection-scrfd10gkps')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_detection_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.face_detection_pipeline"}, "('PIPELINES', 'body-3d-keypoints', 'canonical_body-3d-keypoints_video')": {"filepath": "TEMPLATE_PATH/pipelines/cv/body_3d_keypoints_pipeline.py", "imports": ["numpy", "cv2", "torch", "tempfile", "matplotlib", "datetime", "mpl_toolkits", "os", "typing"], "module": "modelscope.pipelines.cv.body_3d_keypoints_pipeline"}, "('PIPELINES', 'image-paintbyexample', 'stablediffusion-paintbyexample')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_paintbyexample_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "einops", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_paintbyexample_pipeline"}, "('PIPELINES', 'face-recognition', 'ir-face-recognition-rts')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_recognition_ood_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.face_recognition_ood_pipeline"}, "('PIPELINES', 'image-classification', 'image-classification')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'vit-base_image-classification_ImageNet-labels')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'vit-base_image-classification_Dailylife-labels')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'nextvit-small_image-classification_Dailylife-labels')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'convnext-base_image-classification_garbage')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'common-image-classification')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'easyrobust-classification')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'image-classification', 'bnext-small_image-classification_ImageNet-labels')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_classification_pipeline"}, "('PIPELINES', 'card-detection', 'resnet-card-detection-scrfd34gkps')": {"filepath": "TEMPLATE_PATH/pipelines/cv/card_detection_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.card_detection_pipeline"}, "('PIPELINES', 'table-recognition', 'dla34-table-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/table_recognition_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "module": "modelscope.pipelines.cv.table_recognition_pipeline"}, "('PIPELINES', 'image-to-image-translation', 'image-to-image-translation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_to_image_translation_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "sys", "io", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.image_to_image_translation_pipeline"}, "('PIPELINES', 'face-attribute-recognition', 'resnet34-face-attribute-recognition-fairface')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_attribute_recognition_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.face_attribute_recognition_pipeline"}, "('PIPELINES', 'image-debanding', 'rrdb-image-debanding')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_debanding_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_debanding_pipeline"}, "('PIPELINES', 'video-instance-segmentation', 'video-instance-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_instance_segmentation_pipeline.py", "imports": ["mmcv", "numpy", "cv2", "torch", "tqdm", "os", "typing"], "module": "modelscope.pipelines.cv.video_instance_segmentation_pipeline"}, "('PIPELINES', 'image-classification', 'tinynas-classification')": {"filepath": "TEMPLATE_PATH/pipelines/cv/tinynas_classification_pipeline.py", "imports": ["torch", "math", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.tinynas_classification_pipeline"}, "('PIPELINES', 'human-reconstruction', 'human-reconstruction')": {"filepath": "TEMPLATE_PATH/pipelines/cv/human_reconstruction_pipeline.py", "imports": ["trimesh", "shutil", "numpy", "torch", "os", "typing"], "module": "modelscope.pipelines.cv.human_reconstruction_pipeline"}, "('PIPELINES', 'video-multi-object-tracking', 'video-multi-object-tracking')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_multi_object_tracking_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.cv.video_multi_object_tracking_pipeline"}, "('PIPELINES', 'controllable-image-generation', 'controllable-image-generation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/controllable_image_generation_pipeline.py", "imports": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "glob", "os", "typing"], "module": "modelscope.pipelines.cv.controllable_image_generation_pipeline"}, "('PIPELINES', 'image-fewshot-detection', 'image-fewshot-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_defrcn_fewshot_pipeline.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_defrcn_fewshot_pipeline"}, "('PIPELINES', 'semantic-segmentation', 'ddpm-image-semantic-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/ddpm_semantic_segmentation_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.ddpm_semantic_segmentation_pipeline"}, "('PIPELINES', 'image-classification', 'resnet50-image-classification-cc')": {"filepath": "TEMPLATE_PATH/pipelines/cv/content_check_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.content_check_pipeline"}, "('PIPELINES', 'video-text-retrieval', 'vop-video-text-retrieval')": {"filepath": "TEMPLATE_PATH/pipelines/cv/vop_retrieval_pipeline.py", "imports": ["random", "numpy", "torch", "tqdm", "math", "collections", "gzip", "os", "typing", "pickle"], "module": "modelscope.pipelines.cv.vop_retrieval_pipeline"}, "('PIPELINES', 'object-detection-3d', 'object-detection-3d-depe')": {"filepath": "TEMPLATE_PATH/pipelines/cv/object_detection_3d_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "tempfile", "os", "typing"], "module": "modelscope.pipelines.cv.object_detection_3d_pipeline"}, "('PIPELINES', 'lineless-table-recognition', 'lore-lineless-table-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/lineless_table_recognition_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "module": "modelscope.pipelines.cv.lineless_table_recognition_pipeline"}, "('PIPELINES', 'video-embedding', 'cmdssl-r2p1d_video_embedding')": {"filepath": "TEMPLATE_PATH/pipelines/cv/cmdssl_video_embedding_pipeline.py", "imports": ["PIL", "numpy", "torch", "decord", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.cmdssl_video_embedding_pipeline"}, "('PIPELINES', 'domain-specific-object-detection', 'tinynas-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/tinynas_detection_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.tinynas_detection_pipeline"}, "('PIPELINES', 'image-object-detection', 'tinynas-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/tinynas_detection_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.tinynas_detection_pipeline"}, "('PIPELINES', 'video-deinterlace', 'video-deinterlace')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_deinterlace_pipeline.py", "imports": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.video_deinterlace_pipeline"}, "('PIPELINES', 'open-vocabulary-detection', 'open-vocabulary-detection-vild')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_open_vocabulary_detection_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.image_open_vocabulary_detection_pipeline"}, "('PIPELINES', 'language-guided-video-summarization', 'clip-it-video-summarization')": {"filepath": "TEMPLATE_PATH/pipelines/cv/language_guided_video_summarization_pipeline.py", "imports": ["PIL", "shutil", "random", "numpy", "cv2", "torch", "tempfile", "clip", "os", "typing"], "module": "modelscope.pipelines.cv.language_guided_video_summarization_pipeline"}, "('PIPELINES', 'body-2d-keypoints', 'hrnetv2w32_body-2d-keypoints_image')": {"filepath": "TEMPLATE_PATH/pipelines/cv/body_2d_keypoints_pipeline.py", "imports": ["PIL", "numpy", "cv2", "json", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.body_2d_keypoints_pipeline"}, "('PIPELINES', 'face-human-hand-detection', 'face-human-hand-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_human_hand_detection_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.face_human_hand_detection_pipeline"}, "('PIPELINES', 'video-embedding', 'hicossl-s3dg-video_embedding')": {"filepath": "TEMPLATE_PATH/pipelines/cv/hicossl_video_embedding_pipeline.py", "imports": ["torch", "os", "typing", "math"], "module": "modelscope.pipelines.cv.hicossl_video_embedding_pipeline"}, "('PIPELINES', 'face-recognition', 'ir101-face-recognition-cfglint')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_recognition_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.face_recognition_pipeline"}, "('PIPELINES', 'image-body-reshaping', 'flow-based-body-reshaping')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_body_reshaping_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.image_body_reshaping_pipeline"}, "('PIPELINES', 'image-inpainting', 'fft-inpainting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_inpainting_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.image_inpainting_pipeline"}, "('PIPELINES', 'face-recognition', 'manual-face-recognition-frfm')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_recognition_onnx_fm_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "module": "modelscope.pipelines.cv.face_recognition_onnx_fm_pipeline"}, "('PIPELINES', 'image-driving-perception', 'yolopv2_image-driving-percetion_bdd100k')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_driving_perception_pipeline.py", "imports": ["cv2", "os", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_driving_perception_pipeline"}, "('PIPELINES', 'video-stabilization', 'video-stabilization')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_stabilization_pipeline.py", "imports": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "glob", "os", "typing"], "module": "modelscope.pipelines.cv.video_stabilization_pipeline"}, "('PIPELINES', 'indoor-layout-estimation', 'indoor-layout-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/indoor_layout_estimation_pipeline.py", "imports": ["cv2", "typing", "numpy"], "module": "modelscope.pipelines.cv.indoor_layout_estimation_pipeline"}, "('PIPELINES', 'image-colorization', 'ddcolor-image-colorization')": {"filepath": "TEMPLATE_PATH/pipelines/cv/ddcolor_image_colorization_pipeline.py", "imports": ["numpy", "cv2", "torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.ddcolor_image_colorization_pipeline"}, "('PIPELINES', 'face-emotion', 'face-emotion')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_emotion_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.face_emotion_pipeline"}, "('PIPELINES', 'face-detection', 'manual-face-detection-mtcnn')": {"filepath": "TEMPLATE_PATH/pipelines/cv/mtcnn_face_detection_pipeline.py", "imports": ["torch", "os", "typing"], "module": "modelscope.pipelines.cv.mtcnn_face_detection_pipeline"}, "('PIPELINES', 'nerf-recon-acc', 'nerf-recon-acc')": {"filepath": "TEMPLATE_PATH/pipelines/cv/nerf_recon_acc_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.nerf_recon_acc_pipeline"}, "('PIPELINES', 'image-depth-estimation', 'image-bts-depth-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_bts_depth_estimation_pipeline.py", "imports": ["albumentations", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.image_bts_depth_estimation_pipeline"}, "('PIPELINES', 'face-2d-keypoints', 'manual-facial-landmark-confidence-flcm')": {"filepath": "TEMPLATE_PATH/pipelines/cv/facial_landmark_confidence_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.facial_landmark_confidence_pipeline"}, "('PIPELINES', 'face-reconstruction', 'resnet50-face-reconstruction')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_reconstruction_pipeline.py", "imports": ["PIL", "tensorflow", "shutil", "numpy", "cv2", "torch", "scipy", "io", "face_alignment", "os", "typing"], "module": "modelscope.pipelines.cv.face_reconstruction_pipeline"}, "('PIPELINES', 'face-detection', 'resnet101-face-detection-cvpr22papermogface')": {"filepath": "TEMPLATE_PATH/pipelines/cv/mog_face_detection_pipeline.py", "imports": ["os", "typing", "numpy"], "module": "modelscope.pipelines.cv.mog_face_detection_pipeline"}, "('PIPELINES', 'skin-retouching', 'unet-skin-retouching')": {"filepath": "TEMPLATE_PATH/pipelines/cv/skin_retouching_pipeline.py", "imports": ["PIL", "tensorflow", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.skin_retouching_pipeline"}, "('PIPELINES', 'image-segmentation', 'vision-middleware-multi-task')": {"filepath": "TEMPLATE_PATH/pipelines/cv/vision_middleware_pipeline.py", "imports": ["mmcv", "numpy", "torch", "math", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.vision_middleware_pipeline"}, "('PIPELINES', 'face-liveness', 'manual-face-liveness-flir')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_liveness_ir_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "module": "modelscope.pipelines.cv.face_liveness_ir_pipeline"}, "('PIPELINES', 'human-detection', 'resnet18-human-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_detection_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.image_detection_pipeline"}, "('PIPELINES', 'image-object-detection', 'vit-object-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_detection_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.image_detection_pipeline"}, "('PIPELINES', 'image-object-detection', 'abnormal-object-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_detection_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.image_detection_pipeline"}, "('PIPELINES', 'video-object-detection', 'cspnet_realtime-video-object-detection_streamyolo')": {"filepath": "TEMPLATE_PATH/pipelines/cv/realtime_video_object_detection_pipeline.py", "imports": ["PIL", "numpy", "cv2", "json", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.realtime_video_object_detection_pipeline"}, "('PIPELINES', 'video-panoptic-segmentation', 'video-panoptic-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_panoptic_segmentation_pipeline.py", "imports": ["mmcv", "numpy", "cv2", "torch", "tqdm", "os", "typing"], "module": "modelscope.pipelines.cv.video_panoptic_segmentation_pipeline"}, "('PIPELINES', 'action-detection', 'ResNetC3D-action-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/action_detection_pipeline.py", "imports": ["os", "typing", "math"], "module": "modelscope.pipelines.cv.action_detection_pipeline"}, "('PIPELINES', 'product-segmentation', 'product-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/product_segmentation_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.product_segmentation_pipeline"}, "('PIPELINES', 'image-object-detection', 'tbs-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/tbs_detection_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "colorsys", "os", "typing"], "module": "modelscope.pipelines.cv.tbs_detection_pipeline"}, "('PIPELINES', 'image-matching', 'image-matching')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_matching_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.image_matching_pipeline"}, "('PIPELINES', 'video-category', 'video-category')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_category_pipeline.py", "imports": ["PIL", "numpy", "json", "torch", "decord", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.video_category_pipeline"}, "('PIPELINES', 'hand-static', 'hand-static')": {"filepath": "TEMPLATE_PATH/pipelines/cv/hand_static_pipeline.py", "imports": ["typing", "numpy"], "module": "modelscope.pipelines.cv.hand_static_pipeline"}, "('PIPELINES', 'animal-recognition', 'resnet101-animal-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/animal_recognition_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.animal_recognition_pipeline"}, "('PIPELINES', 'pointcloud-sceneflow-estimation', 'pointcloud-sceneflow-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/pointcloud_sceneflow_estimation_pipeline.py", "imports": ["torch", "typing", "plyfile", "numpy"], "module": "modelscope.pipelines.cv.pointcloud_sceneflow_estimation_pipeline"}, "('PIPELINES', 'image-segmentation', 'cascade-mask-rcnn-swin-image-instance-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_instance_segmentation_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.image_instance_segmentation_pipeline"}, "('PIPELINES', 'video-frame-interpolation', 'video-frame-interpolation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_frame_interpolation_pipeline.py", "imports": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "glob", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.video_frame_interpolation_pipeline"}, "('PIPELINES', 'image-quality-assessment-mos', 'image-quality-assessment-mos')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_quality_assessment_mos_pipeline.py", "imports": ["numpy", "cv2", "torch", "tempfile", "math", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_quality_assessment_mos_pipeline"}, "('PIPELINES', 'video-summarization', 'googlenet_pgl_video_summarization')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_summarization_pipeline.py", "imports": ["numpy", "cv2", "torch", "tqdm", "os", "typing"], "module": "modelscope.pipelines.cv.video_summarization_pipeline"}, "('PIPELINES', 'panorama-depth-estimation', 'panorama-depth-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/panorama_depth_estimation_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.panorama_depth_estimation_pipeline"}, "('PIPELINES', 'image-segmentation', 'fast-instance-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/fast_instance_segmentation_pipeline.py", "imports": ["torch", "torchvision", "typing", "numpy"], "module": "modelscope.pipelines.cv.fast_instance_segmentation_pipeline"}, "('PIPELINES', 'image-object-detection', 'vidt')": {"filepath": "TEMPLATE_PATH/pipelines/cv/vidt_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.vidt_pipeline"}, "('PIPELINES', 'image-skychange', 'image-skychange')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_skychange_pipeline.py", "imports": ["PIL", "pdb", "numpy", "cv2", "time", "typing"], "module": "modelscope.pipelines.cv.image_skychange_pipeline"}, "('PIPELINES', 'image-quality-assessment-mos', 'image-quality-assessment-man')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_quality_assessment_man_pipeline.py", "imports": ["numpy", "cv2", "torch", "tempfile", "math", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_quality_assessment_man_pipeline"}, "('PIPELINES', 'image-demoireing', 'uhdm-image-demoireing')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_restoration_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.image_restoration_pipeline"}, "('PIPELINES', 'video-inpainting', 'video-inpainting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_inpainting_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.video_inpainting_pipeline"}, "('PIPELINES', 'face-image-generation', 'gan-face-image-generation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_image_generation_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.face_image_generation_pipeline"}, "('PIPELINES', 'video-super-resolution', 'realbasicvsr-video-super-resolution')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_super_resolution_pipeline.py", "imports": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.video_super_resolution_pipeline"}, "('PIPELINES', 'referring-video-object-segmentation', 'referring-video-object-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/referring_video_object_segmentation_pipeline.py", "imports": ["PIL", "numpy", "torch", "einops", "tqdm", "tempfile", "moviepy", "torchvision", "typing"], "module": "modelscope.pipelines.cv.referring_video_object_segmentation_pipeline"}, "('PIPELINES', 'virtual-try-on', 'virtual-try-on')": {"filepath": "TEMPLATE_PATH/pipelines/cv/virtual_try_on_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.virtual_try_on_pipeline"}, "('PIPELINES', 'ocr-recognition', 'convnextTiny-ocr-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/ocr_recognition_pipeline.py", "imports": [], "module": "modelscope.pipelines.cv.ocr_recognition_pipeline"}, "('PIPELINES', 'ocr-detection', 'resnet18-ocr-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/ocr_detection_pipeline.py", "imports": ["tensorflow", "tf_slim", "numpy", "cv2", "torch", "math", "os", "typing"], "module": "modelscope.pipelines.cv.ocr_detection_pipeline"}, "('PIPELINES', 'movie-scene-segmentation', 'resnet50-bert-movie-scene-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/movie_scene_segmentation_pipeline.py", "imports": ["torch", "typing"], "module": "modelscope.pipelines.cv.movie_scene_segmentation_pipeline"}, "('PIPELINES', 'image-segmentation', 'maskdino-swin-image-instance-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/maskdino_instance_segmentation_pipeline.py", "imports": ["torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.maskdino_instance_segmentation_pipeline"}, "('PIPELINES', 'video-colorization', 'video-colorization')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_colorization_pipeline.py", "imports": ["PIL", "subprocess", "numpy", "cv2", "torch", "tempfile", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.video_colorization_pipeline"}, "('PIPELINES', 'image-segmentation', 'm2fp-image-human-parsing')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_human_parsing_pipeline.py", "imports": ["torch", "torchvision", "typing", "numpy"], "module": "modelscope.pipelines.cv.image_human_parsing_pipeline"}, "('PIPELINES', 'face-liveness', 'manual-face-liveness-flxc')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_liveness_xc_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "module": "modelscope.pipelines.cv.face_liveness_xc_pipeline"}, "('PIPELINES', 'crowd-counting', 'hrnet-crowd-counting')": {"filepath": "TEMPLATE_PATH/pipelines/cv/crowd_counting_pipeline.py", "imports": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "module": "modelscope.pipelines.cv.crowd_counting_pipeline"}, "('PIPELINES', 'video-depth-estimation', 'video-depth-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_depth_estimation_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.video_depth_estimation_pipeline"}, "('PIPELINES', 'image-colorization', 'unet-image-colorization')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_colorization_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_colorization_pipeline"}, "('PIPELINES', 'face-recognition', 'ir50-face-recognition-arcface')": {"filepath": "TEMPLATE_PATH/pipelines/cv/arc_face_recognition_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "module": "modelscope.pipelines.cv.arc_face_recognition_pipeline"}, "('PIPELINES', 'image-quality-assessment-degradation', 'image-quality-assessment-degradation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_quality_assessment_degradation_pipeline.py", "imports": ["numpy", "cv2", "torch", "tempfile", "math", "torchvision", "typing"], "module": "modelscope.pipelines.cv.image_quality_assessment_degradation_pipeline"}, "('PIPELINES', 'image-inpainting', 'image-inpainting-sdv2')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_inpainting_sdv2_pipeline.py", "imports": ["numpy", "cv2", "torch", "tempfile", "sys", "math", "diffusers", "os", "typing"], "module": "modelscope.pipelines.cv.image_inpainting_sdv2_pipeline"}, "('PIPELINES', 'image-super-resolution', 'rrdb-image-super-resolution')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_super_resolution_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.image_super_resolution_pipeline"}, "('PIPELINES', 'semantic-segmentation', 'u2net-salient-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_salient_detection_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.image_salient_detection_pipeline"}, "('PIPELINES', 'semantic-segmentation', 'res2net-salient-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_salient_detection_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.image_salient_detection_pipeline"}, "('PIPELINES', 'semantic-segmentation', 'res2net-camouflaged-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_salient_detection_pipeline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.image_salient_detection_pipeline"}, "('PIPELINES', 'video-single-object-tracking', 'procontext-vitb-video-single-object-tracking')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_single_object_tracking_pipeline.py", "imports": ["cv2", "os", "typing"], "module": "modelscope.pipelines.cv.video_single_object_tracking_pipeline"}, "('PIPELINES', 'video-single-object-tracking', 'ostrack-vitb-video-single-object-tracking')": {"filepath": "TEMPLATE_PATH/pipelines/cv/video_single_object_tracking_pipeline.py", "imports": ["cv2", "os", "typing"], "module": "modelscope.pipelines.cv.video_single_object_tracking_pipeline"}, "('PIPELINES', 'face-recognition', 'manual-face-recognition-frir')": {"filepath": "TEMPLATE_PATH/pipelines/cv/face_recognition_onnx_ir_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "module": "modelscope.pipelines.cv.face_recognition_onnx_ir_pipeline"}, "('PIPELINES', 'product-retrieval-embedding', 'resnet50-product-retrieval-embedding')": {"filepath": "TEMPLATE_PATH/pipelines/cv/product_retrieval_embedding_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.product_retrieval_embedding_pipeline"}, "('PIPELINES', 'face-recognition', 'resnet-face-recognition-facemask')": {"filepath": "TEMPLATE_PATH/pipelines/cv/mask_face_recognition_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "collections", "os", "typing"], "module": "modelscope.pipelines.cv.mask_face_recognition_pipeline"}, "('PIPELINES', 'image-super-resolution', 'mobile-image-super-resolution')": {"filepath": "TEMPLATE_PATH/pipelines/cv/mobile_image_super_resolution_pipeline.py", "imports": ["skimage", "numpy", "torch", "torchvision", "typing"], "module": "modelscope.pipelines.cv.mobile_image_super_resolution_pipeline"}, "('PIPELINES', 'license-plate-detection', 'resnet18-license-plate-detection')": {"filepath": "TEMPLATE_PATH/pipelines/cv/license_plate_detection_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "module": "modelscope.pipelines.cv.license_plate_detection_pipeline"}, "('PIPELINES', 'image-segmentation', 'image-semantic-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_semantic_segmentation_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.image_semantic_segmentation_pipeline"}, "('PIPELINES', 'text-driven-segmentation', 'text-driven-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/text_driven_segmentation_pipleline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.text_driven_segmentation_pipleline"}, "('PIPELINES', 'motion-generation', 'mdm-motion-generation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/motion_generation_pipeline.py", "imports": ["numpy", "torch", "tempfile", "os", "typing"], "module": "modelscope.pipelines.cv.motion_generation_pipeline"}, "('PIPELINES', 'image-multi-view-depth-estimation', 'image-multi-view-depth-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_mvs_depth_estimation_pipeline.py", "imports": ["os", "typing", "tempfile", "shutil"], "module": "modelscope.pipelines.cv.image_mvs_depth_estimation_pipeline"}, "('PIPELINES', 'image-depth-estimation', 'image-depth-estimation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_depth_estimation_pipeline.py", "imports": ["PIL", "numpy", "torch", "cv2", "typing"], "module": "modelscope.pipelines.cv.image_depth_estimation_pipeline"}, "('PIPELINES', 'action-recognition', 'TAdaConv_action-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/action_recognition_pipeline.py", "imports": ["torch", "os", "typing", "math"], "module": "modelscope.pipelines.cv.action_recognition_pipeline"}, "('PIPELINES', 'action-recognition', 'patchshift-action-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/action_recognition_pipeline.py", "imports": ["torch", "os", "typing", "math"], "module": "modelscope.pipelines.cv.action_recognition_pipeline"}, "('PIPELINES', 'image-reid-person', 'passvitb-image-reid-person')": {"filepath": "TEMPLATE_PATH/pipelines/cv/image_reid_person_pipeline.py", "imports": ["PIL", "torch", "math", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.image_reid_person_pipeline"}, "('PIPELINES', 'general-recognition', 'resnet101-general-recognition')": {"filepath": "TEMPLATE_PATH/pipelines/cv/general_recognition_pipeline.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.pipelines.cv.general_recognition_pipeline"}, "('PIPELINES', 'shop-segmentation', 'shop-segmentation')": {"filepath": "TEMPLATE_PATH/pipelines/cv/shop_segmentation_pipleline.py", "imports": ["typing"], "module": "modelscope.pipelines.cv.shop_segmentation_pipleline"}, "('PREPROCESSORS', 'audio', 'wav-to-lists')": {"filepath": "TEMPLATE_PATH/preprocessors/kws.py", "imports": ["os", "typing", "yaml"], "module": "modelscope.preprocessors.kws"}, "('PREPROCESSORS', 'multi-modal', 'diffusion-image-generation-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'ofa-tasks-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'clip-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'mplug-tasks-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'vldoc-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'hitea-tasks-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'mplug-owl-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'multi-modal', 'image-captioning-clip-interrogator-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/multi_modal.py", "imports": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.multi_modal"}, "('PREPROCESSORS', 'science', 'unifold-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/science/uni_fold.py", "imports": ["unittest", "hashlib", "ipdb", "random", "numpy", "torch", "json", "tarfile", "pathlib", "os", "typing", "requests", "logging", "re", "tqdm", "time", "gzip", "pickle"], "module": "modelscope.preprocessors.science.uni_fold"}, "('PREPROCESSORS', 'text-to-speech', 'kantts-data-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/tts.py", "imports": ["os", "kantts", "typing"], "module": "modelscope.preprocessors.tts"}, "('PREPROCESSORS', 'audio', 'wav-to-scp')": {"filepath": "TEMPLATE_PATH/preprocessors/asr.py", "imports": ["os", "typing"], "module": "modelscope.preprocessors.asr"}, "('PREPROCESSORS', 'default', 'Compose')": {"filepath": "TEMPLATE_PATH/preprocessors/common.py", "imports": ["numpy", "torch", "collections", "time", "typing"], "module": "modelscope.preprocessors.common"}, "('PREPROCESSORS', 'default', 'ToTensor')": {"filepath": "TEMPLATE_PATH/preprocessors/common.py", "imports": ["numpy", "torch", "collections", "time", "typing"], "module": "modelscope.preprocessors.common"}, "('PREPROCESSORS', 'default', 'Filter')": {"filepath": "TEMPLATE_PATH/preprocessors/common.py", "imports": ["numpy", "torch", "collections", "time", "typing"], "module": "modelscope.preprocessors.common"}, "('PREPROCESSORS', 'default', 'ToNumpy')": {"filepath": "TEMPLATE_PATH/preprocessors/common.py", "imports": ["numpy", "torch", "collections", "time", "typing"], "module": "modelscope.preprocessors.common"}, "('PREPROCESSORS', 'default', 'Rename')": {"filepath": "TEMPLATE_PATH/preprocessors/common.py", "imports": ["numpy", "torch", "collections", "time", "typing"], "module": "modelscope.preprocessors.common"}, "('PREPROCESSORS', 'default', 'Identity')": {"filepath": "TEMPLATE_PATH/preprocessors/common.py", "imports": ["numpy", "torch", "collections", "time", "typing"], "module": "modelscope.preprocessors.common"}, "('PREPROCESSORS', 'nlp', 'word-segment-text-to-label-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_preprocessor.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.token_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'ner-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_preprocessor.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.token_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'token-cls-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_preprocessor.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.token_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'sequence-labeling-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_preprocessor.py", "imports": ["torch", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.token_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'siamese-uie-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/siamese_uie_preprocessor.py", "imports": ["typing", "transformers"], "module": "modelscope.preprocessors.nlp.siamese_uie_preprocessor"}, "('PREPROCESSORS', 'nlp', 're-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/relation_extraction_preprocessor.py", "imports": ["typing", "transformers"], "module": "modelscope.preprocessors.nlp.relation_extraction_preprocessor"}, "('PREPROCESSORS', 'nlp', 'viet-ner-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_viet_preprocessor.py", "imports": ["torch", "typing"], "module": "modelscope.preprocessors.nlp.token_classification_viet_preprocessor"}, "('PREPROCESSORS', 'nlp', 'translation-evaluation-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/translation_evaluation_preprocessor.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.translation_evaluation_preprocessor"}, "('PREPROCESSORS', 'nlp', 'nli-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_classification_preprocessor.py", "imports": ["typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'sen-sim-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_classification_preprocessor.py", "imports": ["typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'bert-seq-cls-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_classification_preprocessor.py", "imports": ["typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'sen-cls-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_classification_preprocessor.py", "imports": ["typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'document-grounded-dialog-retrieval')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/document_grounded_dialog_retrieval_preprocessor.py", "imports": ["torch", "os", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.document_grounded_dialog_retrieval_preprocessor"}, "('PREPROCESSORS', 'nlp', 'zero-shot-cls-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/zero_shot_classification_preprocessor.py", "imports": ["typing"], "module": "modelscope.preprocessors.nlp.zero_shot_classification_preprocessor"}, "('PREPROCESSORS', 'nlp', 'canmt-translation')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/canmt_translation.py", "imports": ["sacremoses", "jieba", "torch", "subword_nmt", "os", "typing"], "module": "modelscope.preprocessors.nlp.canmt_translation"}, "('PREPROCESSORS', 'nlp', 'fill-mask')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/fill_mask_preprocessor.py", "imports": ["numpy", "torch", "abc", "re", "os", "typing"], "module": "modelscope.preprocessors.nlp.fill_mask_preprocessor"}, "('PREPROCESSORS', 'nlp', 'fill-mask-ponet')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/fill_mask_preprocessor.py", "imports": ["numpy", "torch", "abc", "re", "os", "typing"], "module": "modelscope.preprocessors.nlp.fill_mask_preprocessor"}, "('PREPROCESSORS', 'nlp', 'word-alignment')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/word_alignment_preprocessor.py", "imports": ["itertools", "numpy", "torch", "os", "typing"], "module": "modelscope.preprocessors.nlp.word_alignment_preprocessor"}, "('PREPROCESSORS', 'nlp', 'conversational-text-to-sql')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/space_T_en/conversational_text_to_sql_preprocessor.py", "imports": ["json", "torch", "text2sql_lgesql", "os", "typing"], "module": "modelscope.preprocessors.nlp.space_T_en.conversational_text_to_sql_preprocessor"}, "('PREPROCESSORS', 'nlp', 'document-grounded-dialog-generate')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/document_grounded_dialog_generate_preprocessor.py", "imports": ["torch", "os", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.document_grounded_dialog_generate_preprocessor"}, "('PREPROCESSORS', 'nlp', 'text-error-correction')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_error_correction.py", "imports": ["torch", "os", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.text_error_correction"}, "('PREPROCESSORS', 'nlp', 'text-ranking')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_ranking_preprocessor.py", "imports": ["typing", "transformers"], "module": "modelscope.preprocessors.nlp.text_ranking_preprocessor"}, "('PREPROCESSORS', 'nlp', 'Tokenize')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/bert_seq_cls_tokenizer.py", "imports": ["typing", "transformers"], "module": "modelscope.preprocessors.nlp.bert_seq_cls_tokenizer"}, "('PREPROCESSORS', 'nlp', 'document-segmentation')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/document_segmentation_preprocessor.py", "imports": ["typing"], "module": "modelscope.preprocessors.nlp.document_segmentation_preprocessor"}, "('PREPROCESSORS', 'nlp', 'sentence-embedding')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/sentence_embedding_preprocessor.py", "imports": ["typing"], "module": "modelscope.preprocessors.nlp.sentence_embedding_preprocessor"}, "('PREPROCESSORS', 'nlp', 'mglm-summarization')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/mglm_summarization_preprocessor.py", "imports": ["os", "re", "typing"], "module": "modelscope.preprocessors.nlp.mglm_summarization_preprocessor"}, "('PREPROCESSORS', 'nlp', 'thai-ner-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_thai_preprocessor.py", "imports": ["typing"], "module": "modelscope.preprocessors.nlp.token_classification_thai_preprocessor"}, "('PREPROCESSORS', 'nlp', 'thai-wseg-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/token_classification_thai_preprocessor.py", "imports": ["typing"], "module": "modelscope.preprocessors.nlp.token_classification_thai_preprocessor"}, "('PREPROCESSORS', 'nlp', 'mgeo-ranking')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/mgeo_ranking_preprocessor.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.mgeo_ranking_preprocessor"}, "('PREPROCESSORS', 'nlp', 'dialog-intent-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/space/dialog_intent_prediction_preprocessor.py", "imports": ["json", "os", "typing"], "module": "modelscope.preprocessors.nlp.space.dialog_intent_prediction_preprocessor"}, "('PREPROCESSORS', 'nlp', 'dialog-state-tracking-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/space/dialog_state_tracking_preprocessor.py", "imports": ["typing"], "module": "modelscope.preprocessors.nlp.space.dialog_state_tracking_preprocessor"}, "('PREPROCESSORS', 'nlp', 'dialog-modeling-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/space/dialog_modeling_preprocessor.py", "imports": ["os", "typing"], "module": "modelscope.preprocessors.nlp.space.dialog_modeling_preprocessor"}, "('PREPROCESSORS', 'nlp', 'dialog-use-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/dialog_classification_use_preprocessor.py", "imports": ["torch", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.dialog_classification_use_preprocessor"}, "('PREPROCESSORS', 'nlp', 'text-gen-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_generation_preprocessor.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_generation_preprocessor"}, "('PREPROCESSORS', 'nlp', 'text-gen-jieba-tokenizer')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_generation_preprocessor.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_generation_preprocessor"}, "('PREPROCESSORS', 'nlp', 'sentence-piece')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_generation_preprocessor.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_generation_preprocessor"}, "('PREPROCESSORS', 'nlp', 'text2text-gen-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/text_generation_preprocessor.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.preprocessors.nlp.text_generation_preprocessor"}, "('PREPROCESSORS', 'nlp', 'table-question-answering-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/space_T_cn/table_question_answering_preprocessor.py", "imports": ["torch", "os", "typing", "transformers"], "module": "modelscope.preprocessors.nlp.space_T_cn.table_question_answering_preprocessor"}, "('PREPROCESSORS', 'nlp', 'document-grounded-dialog-rerank')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/document_grounded_dialog_rerank_preprocessor.py", "imports": ["transformers", "torch", "copy", "os", "typing"], "module": "modelscope.preprocessors.nlp.document_grounded_dialog_rerank_preprocessor"}, "('PREPROCESSORS', 'nlp', 'feature-extraction')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/feature_extraction_preprocessor.py", "imports": ["typing", "numpy"], "module": "modelscope.preprocessors.nlp.feature_extraction_preprocessor"}, "('PREPROCESSORS', 'nlp', 'faq-question-answering-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/nlp/faq_question_answering_preprocessor.py", "imports": ["torch", "typing"], "module": "modelscope.preprocessors.nlp.faq_question_answering_preprocessor"}, "('PREPROCESSORS', 'audio', 'LinearAECAndFbank')": {"filepath": "TEMPLATE_PATH/preprocessors/audio.py", "imports": ["numpy", "torch", "scipy", "io", "os", "typing"], "module": "modelscope.preprocessors.audio"}, "('PREPROCESSORS', 'cv', 'RandomCrop')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'RandomResizedCrop')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'Resize')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'CenterCrop')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'RandomHorizontalFlip')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'Normalize')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'ImageToTensor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'image-classification-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py", "imports": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.image_classification_preprocessor"}, "('PREPROCESSORS', 'cv', 'bad-image-detecting-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/bad_image_detecting_preprocessor.py", "imports": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "module": "modelscope.preprocessors.cv.bad_image_detecting_preprocessor"}, "('PREPROCESSORS', 'cv', 'image-classification-mmcv-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/mmcls_preprocessor.py", "imports": ["os", "typing", "numpy"], "module": "modelscope.preprocessors.cv.mmcls_preprocessor"}, "('PREPROCESSORS', 'cv', 'controllable-image-generation-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/controllable_image_generation.py", "imports": ["PIL", "numpy", "cv2", "torch", "math", "torchvision", "os", "typing"], "module": "modelscope.preprocessors.cv.controllable_image_generation"}, "('PREPROCESSORS', 'cv', 'image-quality_assessment-mos-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_quality_assessment_mos.py", "imports": ["numpy", "cv2", "math", "torchvision", "typing"], "module": "modelscope.preprocessors.cv.image_quality_assessment_mos"}, "('PREPROCESSORS', 'cv', 'image-demoire-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_restoration_preprocessor.py", "imports": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "module": "modelscope.preprocessors.cv.image_restoration_preprocessor"}, "('PREPROCESSORS', 'cv', 'image-quality_assessment-man-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/cv/image_quality_assessment_man.py", "imports": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "module": "modelscope.preprocessors.cv.image_quality_assessment_man"}, "('PREPROCESSORS', 'cv', 'movie-scene-segmentation-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/video.py", "imports": ["urllib", "numpy", "random", "torch", "decord", "tempfile", "math", "torchvision", "os", "uuid"], "module": "modelscope.preprocessors.video"}, "('PREPROCESSORS', 'cv', 'load-image')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'object-detection-tinynas-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'image-color-enhance-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'image-denoise-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'image-deblur-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'image-portrait-enhancement-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'image-instance-segmentation-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'video-summarization-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PREPROCESSORS', 'cv', 'image-classification-bypass-preprocessor')": {"filepath": "TEMPLATE_PATH/preprocessors/image.py", "imports": ["PIL", "numpy", "cv2", "io", "typing"], "module": "modelscope.preprocessors.image"}, "('PARALLEL', 'default', 'DistributedDataParallel')": {"filepath": "TEMPLATE_PATH/trainers/parallel/builder.py", "imports": ["torch"], "module": "modelscope.trainers.parallel.builder"}, "('OPTIMIZERS', 'default', 'ChildTuningAdamW')": {"filepath": "TEMPLATE_PATH/trainers/optimizer/child_tuning_adamw_optimizer.py", "imports": ["numpy", "torch", "types", "math", "typing"], "module": "modelscope.trainers.optimizer.child_tuning_adamw_optimizer"}, "('LR_SCHEDULER', 'default', 'ConstantWarmup')": {"filepath": "TEMPLATE_PATH/trainers/lrscheduler/warmup/warmup.py", "imports": [], "module": "modelscope.trainers.lrscheduler.warmup.warmup"}, "('LR_SCHEDULER', 'default', 'LinearWarmup')": {"filepath": "TEMPLATE_PATH/trainers/lrscheduler/warmup/warmup.py", "imports": [], "module": "modelscope.trainers.lrscheduler.warmup.warmup"}, "('LR_SCHEDULER', 'default', 'ExponentialWarmup')": {"filepath": "TEMPLATE_PATH/trainers/lrscheduler/warmup/warmup.py", "imports": [], "module": "modelscope.trainers.lrscheduler.warmup.warmup"}, "('TRAINERS', 'default', 'nlp-base-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp_trainer.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.trainers.nlp_trainer"}, "('TRAINERS', 'default', 'nlp-veco-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp_trainer.py", "imports": ["torch", "os", "typing", "numpy"], "module": "modelscope.trainers.nlp_trainer"}, "('TRAINERS', 'default', 'speech_kws_fsmn_char_ctc_nearfield')": {"filepath": "TEMPLATE_PATH/trainers/audio/kws_nearfield_trainer.py", "imports": ["torch", "re", "tensorboardX", "copy", "datetime", "yaml", "os", "typing"], "module": "modelscope.trainers.audio.kws_nearfield_trainer"}, "('TRAINERS', 'default', 'speech_dfsmn_kws_char_farfield')": {"filepath": "TEMPLATE_PATH/trainers/audio/kws_farfield_trainer.py", "imports": ["numpy", "torch", "math", "datetime", "glob", "os", "typing", "pickle"], "module": "modelscope.trainers.audio.kws_farfield_trainer"}, "('TRAINERS', 'default', 'speech-separation')": {"filepath": "TEMPLATE_PATH/trainers/audio/separation_trainer.py", "imports": ["numpy", "torch", "torchaudio", "tqdm", "csv", "os", "speechbrain", "typing"], "module": "modelscope.trainers.audio.separation_trainer"}, "('TRAINERS', 'default', 'speech-asr-trainer')": {"filepath": "TEMPLATE_PATH/trainers/audio/asr_trainer.py", "imports": ["shutil", "json", "typing", "tempfile", "os", "funasr"], "module": "modelscope.trainers.audio.asr_trainer"}, "('TRAINERS', 'default', 'speech-kantts-trainer')": {"filepath": "TEMPLATE_PATH/trainers/audio/tts_trainer.py", "imports": ["shutil", "json", "tempfile", "os", "typing", "zipfile"], "module": "modelscope.trainers.audio.tts_trainer"}, "('TRAINERS', 'default', 'speech_frcrn_ans_cirm_16k')": {"filepath": "TEMPLATE_PATH/trainers/audio/ans_trainer.py", "imports": [], "module": "modelscope.trainers.audio.ans_trainer"}, "('HOOKS', 'default', 'CheckpointHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/checkpoint/checkpoint_hook.py", "imports": ["random", "numpy", "torch", "time", "os", "typing"], "module": "modelscope.trainers.hooks.checkpoint.checkpoint_hook"}, "('HOOKS', 'default', 'BestCkptSaverHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/checkpoint/checkpoint_hook.py", "imports": ["random", "numpy", "torch", "time", "os", "typing"], "module": "modelscope.trainers.hooks.checkpoint.checkpoint_hook"}, "('HOOKS', 'default', 'LoadCheckpointHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/checkpoint/load_checkpoint_hook.py", "imports": ["random", "numpy", "torch", "packaging", "typing"], "module": "modelscope.trainers.hooks.checkpoint.load_checkpoint_hook"}, "('HOOKS', 'default', 'TextLoggerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/logger/text_logger_hook.py", "imports": ["json", "torch", "collections", "datetime", "os"], "module": "modelscope.trainers.hooks.logger.text_logger_hook"}, "('HOOKS', 'default', 'TensorboardHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/logger/tensorboard_hook.py", "imports": ["torch", "os", "numpy"], "module": "modelscope.trainers.hooks.logger.tensorboard_hook"}, "('HOOKS', 'default', 'ApexAMPOptimizerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/optimizer/apex_optimizer_hook.py", "imports": ["torch", "logging", "packaging"], "module": "modelscope.trainers.hooks.optimizer.apex_optimizer_hook"}, "('HOOKS', 'default', 'TorchAMPOptimizerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/optimizer/torch_optimizer_hook.py", "imports": ["logging"], "module": "modelscope.trainers.hooks.optimizer.torch_optimizer_hook"}, "('HOOKS', 'default', 'OptimizerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/optimizer/base.py", "imports": ["torch", "logging"], "module": "modelscope.trainers.hooks.optimizer.base"}, "('HOOKS', 'default', 'NoneOptimizerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/optimizer/base.py", "imports": ["torch", "logging"], "module": "modelscope.trainers.hooks.optimizer.base"}, "('HOOKS', 'default', 'MegatronHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/distributed/megatron_hook.py", "imports": ["torch", "os", "shutil", "megatron_util"], "module": "modelscope.trainers.hooks.distributed.megatron_hook"}, "('HOOKS', 'default', 'DeepspeedHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/distributed/deepspeed_hook.py", "imports": ["shutil", "torch", "megatron_util", "deepspeed", "os"], "module": "modelscope.trainers.hooks.distributed.deepspeed_hook"}, "('HOOKS', 'default', 'DDPHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/distributed/ddp_hook.py", "imports": [], "module": "modelscope.trainers.hooks.distributed.ddp_hook"}, "('HOOKS', 'default', 'LrSchedulerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/lr_scheduler_hook.py", "imports": [], "module": "modelscope.trainers.hooks.lr_scheduler_hook"}, "('HOOKS', 'default', 'PlateauLrSchedulerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/lr_scheduler_hook.py", "imports": [], "module": "modelscope.trainers.hooks.lr_scheduler_hook"}, "('HOOKS', 'default', 'NoneLrSchedulerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/lr_scheduler_hook.py", "imports": [], "module": "modelscope.trainers.hooks.lr_scheduler_hook"}, "('HOOKS', 'default', 'EarlyStopHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/early_stop_hook.py", "imports": ["numpy"], "module": "modelscope.trainers.hooks.early_stop_hook"}, "('HOOKS', 'default', 'ClipClampLogitScaleHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/clip_clamp_logit_scale_hook.py", "imports": ["torch"], "module": "modelscope.trainers.hooks.clip_clamp_logit_scale_hook"}, "('HOOKS', 'default', 'SparsityHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/compression/sparsity_hook.py", "imports": ["os"], "module": "modelscope.trainers.hooks.compression.sparsity_hook"}, "('HOOKS', 'default', 'IterTimerHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/iter_timer_hook.py", "imports": ["time"], "module": "modelscope.trainers.hooks.iter_timer_hook"}, "('HOOKS', 'default', 'EvaluationHook')": {"filepath": "TEMPLATE_PATH/trainers/hooks/evaluation_hook.py", "imports": ["typing", "collections"], "module": "modelscope.trainers.hooks.evaluation_hook"}, "('TRAINERS', 'default', 'clip-multi-modal-embedding')": {"filepath": "TEMPLATE_PATH/trainers/multi_modal/clip/clip_trainer.py", "imports": ["torch", "os", "typing", "math"], "module": "modelscope.trainers.multi_modal.clip.clip_trainer"}, "('TRAINERS', 'default', 'efficient-diffusion-tuning')": {"filepath": "TEMPLATE_PATH/trainers/multi_modal/efficient_diffusion_tuning/efficient_diffusion_tuning_trainer.py", "imports": ["torch", "typing"], "module": "modelscope.trainers.multi_modal.efficient_diffusion_tuning.efficient_diffusion_tuning_trainer"}, "('TRAINERS', 'default', 'mplug')": {"filepath": "TEMPLATE_PATH/trainers/multi_modal/mplug/mplug_trainer.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.trainers.multi_modal.mplug.mplug_trainer"}, "('TRAINERS', 'default', 'image-classification-team')": {"filepath": "TEMPLATE_PATH/trainers/multi_modal/team/team_trainer.py", "imports": ["numpy", "torch", "collections", "sklearn", "os", "typing"], "module": "modelscope.trainers.multi_modal.team.team_trainer"}, "('TRAINERS', 'default', 'mgeo-ranking-trainer')": {"filepath": "TEMPLATE_PATH/trainers/multi_modal/mgeo_ranking_trainer.py", "imports": ["torch", "dataclasses", "typing"], "module": "modelscope.trainers.multi_modal.mgeo_ranking_trainer"}, "('TRAINERS', 'default', 'ofa')": {"filepath": "TEMPLATE_PATH/trainers/multi_modal/ofa/ofa_trainer.py", "imports": ["shutil", "json", "torch", "functools", "tempfile", "math", "os", "typing"], "module": "modelscope.trainers.multi_modal.ofa.ofa_trainer"}, "('TRAINERS', 'default', 'nlp-gpt-moe-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/gpt_moe_trainer.py", "imports": ["torch", "collections", "megatron_util", "os", "typing"], "module": "modelscope.trainers.nlp.gpt_moe_trainer"}, "('TRAINERS', 'default', 'nlp-plug-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/plug_trainer.py", "imports": ["torch", "megatron_util", "deepspeed", "os", "typing"], "module": "modelscope.trainers.nlp.plug_trainer"}, "('TRAINERS', 'default', 'text-generation-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/text_generation_trainer.py", "imports": ["torch", "collections"], "module": "modelscope.trainers.nlp.text_generation_trainer"}, "('TRAINERS', 'default', 'document-grounded-dialog-rerank-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/document_grounded_dialog_rerank_trainer.py", "imports": ["transformers", "numpy", "random", "torch", "time", "os", "typing"], "module": "modelscope.trainers.nlp.document_grounded_dialog_rerank_trainer"}, "('TRAINERS', 'default', 'csanmt-translation')": {"filepath": "TEMPLATE_PATH/trainers/nlp/csanmt_translation_trainer.py", "imports": ["os", "tensorflow", "typing", "time"], "module": "modelscope.trainers.nlp.csanmt_translation_trainer"}, "('TRAINERS', 'default', 'translation-evaluation-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/translation_evaluation_trainer.py", "imports": ["transformers", "random", "torch", "tqdm", "math", "pandas", "os", "typing"], "module": "modelscope.trainers.nlp.translation_evaluation_trainer"}, "('TRAINERS', 'default', 'faq-question-answering-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/faq_question_answering_trainer.py", "imports": ["distutils", "contextlib", "numpy", "torch", "functools", "collections", "dataclasses", "typing"], "module": "modelscope.trainers.nlp.faq_question_answering_trainer"}, "('TRAINERS', 'default', 'table-question-answering-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/table_question_answering_trainer.py", "imports": ["numpy", "json", "torch", "tqdm", "time", "os", "typing"], "module": "modelscope.trainers.nlp.table_question_answering_trainer"}, "('TRAINERS', 'default', 'bert-sentiment-analysis')": {"filepath": "TEMPLATE_PATH/trainers/nlp/sequence_classification_trainer.py", "imports": ["time", "typing", "numpy"], "module": "modelscope.trainers.nlp.sequence_classification_trainer"}, "('TRAINERS', 'default', 'nlp-sentence-embedding-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/sentence_embedding_trainer.py", "imports": ["transformers", "numpy", "torch", "tqdm", "time", "dataclasses", "typing"], "module": "modelscope.trainers.nlp.sentence_embedding_trainer"}, "('TRAINERS', 'default', 'nlp-gpt3-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/gpt3_trainer.py", "imports": ["torch", "os", "copy", "typing"], "module": "modelscope.trainers.nlp.gpt3_trainer"}, "('TRAINERS', 'default', 'nlp-text-ranking-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/text_ranking_trainer.py", "imports": ["numpy", "torch", "tqdm", "time", "dataclasses", "typing"], "module": "modelscope.trainers.nlp.text_ranking_trainer"}, "('TRAINERS', 'default', 'siamese-uie-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/siamese_uie_trainer.py", "imports": ["random", "numpy", "json", "torch", "collections", "math", "time", "os", "typing"], "module": "modelscope.trainers.nlp.siamese_uie_trainer"}, "('TRAINERS', 'default', 'dialog-intent-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/space/dialog_intent_trainer.py", "imports": ["os", "typing", "numpy"], "module": "modelscope.trainers.nlp.space.dialog_intent_trainer"}, "('TRAINERS', 'default', 'dialog-modeling-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/space/dialog_modeling_trainer.py", "imports": ["os", "time", "typing", "numpy"], "module": "modelscope.trainers.nlp.space.dialog_modeling_trainer"}, "('TRAINERS', 'default', 'document-grounded-dialog-retrieval-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/document_grounded_dialog_retrieval_trainer.py", "imports": ["transformers", "numpy", "json", "torch", "tqdm", "faiss", "os"], "module": "modelscope.trainers.nlp.document_grounded_dialog_retrieval_trainer"}, "('TRAINERS', 'default', 'document-grounded-dialog-generate-trainer')": {"filepath": "TEMPLATE_PATH/trainers/nlp/document_grounded_dialog_generate_trainer.py", "imports": ["string", "transformers", "json", "torch", "rouge", "re", "tqdm", "collections", "os", "sacrebleu"], "module": "modelscope.trainers.nlp.document_grounded_dialog_generate_trainer"}, "('TRAINERS', 'default', 'ocr-recognition')": {"filepath": "TEMPLATE_PATH/trainers/cv/ocr_recognition_trainer.py", "imports": ["torch", "time", "collections"], "module": "modelscope.trainers.cv.ocr_recognition_trainer"}, "('TRAINERS', 'default', 'image-instance-segmentation')": {"filepath": "TEMPLATE_PATH/trainers/cv/image_instance_segmentation_trainer.py", "imports": [], "module": "modelscope.trainers.cv.image_instance_segmentation_trainer"}, "('TRAINERS', 'default', 'referring-video-object-segmentation')": {"filepath": "TEMPLATE_PATH/trainers/cv/referring_video_object_segmentation_trainer.py", "imports": ["torch", "os"], "module": "modelscope.trainers.cv.referring_video_object_segmentation_trainer"}, "('TRAINERS', 'default', 'vision-efficient-tuning')": {"filepath": "TEMPLATE_PATH/trainers/cv/vision_efficient_tuning_trainer.py", "imports": ["torch", "typing"], "module": "modelscope.trainers.cv.vision_efficient_tuning_trainer"}, "('TRAINERS', 'default', 'movie-scene-segmentation')": {"filepath": "TEMPLATE_PATH/trainers/cv/movie_scene_segmentation_trainer.py", "imports": [], "module": "modelscope.trainers.cv.movie_scene_segmentation_trainer"}, "('TRAINERS', 'default', 'nerf-recon-acc')": {"filepath": "TEMPLATE_PATH/trainers/cv/nerf_recon_acc_trainer.py", "imports": ["random", "numpy", "cv2", "torch", "tqdm", "time", "datetime", "glob", "os", "typing"], "module": "modelscope.trainers.cv.nerf_recon_acc_trainer"}, "('TRAINERS', 'default', 'tinynas-damoyolo')": {"filepath": "TEMPLATE_PATH/trainers/cv/image_detection_damoyolo_trainer.py", "imports": ["torch", "math", "datetime", "time", "os", "easydict", "typing"], "module": "modelscope.trainers.cv.image_detection_damoyolo_trainer"}, "('TRAINERS', 'default', 'image-classification')": {"filepath": "TEMPLATE_PATH/trainers/cv/image_classifition_trainer.py", "imports": ["numpy", "torch", "copy", "time", "os", "typing"], "module": "modelscope.trainers.cv.image_classifition_trainer"}, "('TRAINERS', 'default', 'cartoon-translation')": {"filepath": "TEMPLATE_PATH/trainers/cv/cartoon_translation_trainer.py", "imports": ["tensorflow", "numpy", "tqdm", "packaging", "os", "typing"], "module": "modelscope.trainers.cv.cartoon_translation_trainer"}, "('TRAINERS', 'default', 'ocr-detection-db')": {"filepath": "TEMPLATE_PATH/trainers/cv/ocr_detection_db_trainer.py", "imports": ["numpy", "torch", "tqdm", "math", "copy", "datetime", "time", "os", "easydict", "typing"], "module": "modelscope.trainers.cv.ocr_detection_db_trainer"}, "('TRAINERS', 'default', 'card-detection-scrfd')": {"filepath": "TEMPLATE_PATH/trainers/cv/card_detection_scrfd_trainer.py", "imports": [], "module": "modelscope.trainers.cv.card_detection_scrfd_trainer"}, "('TRAINERS', 'default', 'face-detection-scrfd')": {"filepath": "TEMPLATE_PATH/trainers/cv/face_detection_scrfd_trainer.py", "imports": ["copy", "time", "typing", "os"], "module": "modelscope.trainers.cv.face_detection_scrfd_trainer"}, "('TRAINERS', 'default', 'image-inpainting')": {"filepath": "TEMPLATE_PATH/trainers/cv/image_inpainting_trainer.py", "imports": ["torch", "time", "collections"], "module": "modelscope.trainers.cv.image_inpainting_trainer"}, "('TRAINERS', 'default', 'image-portrait-enhancement')": {"filepath": "TEMPLATE_PATH/trainers/cv/image_portrait_enhancement_trainer.py", "imports": ["torch", "collections"], "module": "modelscope.trainers.cv.image_portrait_enhancement_trainer"}, "('TRAINERS', 'default', 'action-detection')": {"filepath": "TEMPLATE_PATH/trainers/cv/action_detection_trainer.py", "imports": ["torch", "fvcore", "os", "typing", "detectron2"], "module": "modelscope.trainers.cv.action_detection_trainer"}, "('TRAINERS', 'default', 'image-fewshot-detection')": {"filepath": "TEMPLATE_PATH/trainers/cv/image_defrcn_fewshot_detection_trainer.py", "imports": ["torch", "collections", "os", "typing", "detectron2"], "module": "modelscope.trainers.cv.image_defrcn_fewshot_detection_trainer"}, "('TRAINERS', 'default', 'trainer')": {"filepath": "TEMPLATE_PATH/trainers/trainer.py", "imports": ["distutils", "json", "torch", "functools", "collections", "copy", "inspect", "os", "typing"], "module": "modelscope.trainers.trainer"}, "('TRAINERS', 'default', 'dummy')": {"filepath": "TEMPLATE_PATH/trainers/base.py", "imports": ["os", "abc", "typing", "time"], "module": "modelscope.trainers.base"}, "('CUSTOM_DATASETS', 'image-quality-assessment-degradation', 'image-quality-assessment-degradation')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_quality_assessment_degradation/image_quality_assessment_degradation_dataset.py", "imports": ["torchvision"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.image_quality_assessment_degradation.image_quality_assessment_degradation_dataset"}, "('CUSTOM_DATASETS', 'image-portrait-enhancement', 'PairedDataset')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_portrait_enhancement/image_portrait_enhancement_dataset.py", "imports": ["cv2", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.image_portrait_enhancement.image_portrait_enhancement_dataset"}, "('CUSTOM_DATASETS', 'nli', 'veco')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/veco_dataset.py", "imports": ["datasets", "typing", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.veco_dataset"}, "('CUSTOM_DATASETS', 'image-segmentation', 'cascade_mask_rcnn_swin')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_instance_segmentation_coco_dataset.py", "imports": ["os", "numpy", "pycocotools"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.image_instance_segmentation_coco_dataset"}, "('CUSTOM_DATASETS', 'ocr-recognition', 'OCRRecognition')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py", "imports": ["PIL", "numpy", "cv2", "json", "torch", "six", "lmdb", "os"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_recognition_dataset"}, "('CUSTOM_DATASETS', 'bad-image-detecting', 'bad-image-detecting')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/bad_image_detecting/bad_image_detecting_dataset.py", "imports": [], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.bad_image_detecting.bad_image_detecting_dataset"}, "('CUSTOM_DATASETS', 'image-inpainting', 'FFTInpainting')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_inpainting/image_inpainting_dataset.py", "imports": ["albumentations", "numpy", "enum", "cv2", "os", "glob"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.image_inpainting.image_inpainting_dataset"}, "('CUSTOM_DATASETS', 'language-guided-video-summarization', 'clip-it-language-guided-video-summarization')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/language_guided_video_summarization_dataset.py", "imports": ["numpy", "json", "torch", "h5py", "os"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.language_guided_video_summarization_dataset"}, "('CUSTOM_DATASETS', 'movie-scene-segmentation', 'resnet50-bert')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py", "imports": ["random", "json", "torch", "copy", "torchvision", "os"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.movie_scene_segmentation.movie_scene_segmentation_dataset"}, "('CUSTOM_DATASETS', 'text-ranking', 'bert')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/text_ranking_dataset.py", "imports": ["torch", "typing", "random"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.text_ranking_dataset"}, "('CUSTOM_DATASETS', 'sentence-embedding', 'bert')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/text_ranking_dataset.py", "imports": ["torch", "typing", "random"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.text_ranking_dataset"}, "('CUSTOM_DATASETS', 'image-denoising', 'SiddDataset')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/sidd_image_denoising_dataset.py", "imports": ["cv2", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.sidd_image_denoising.sidd_image_denoising_dataset"}, "('CUSTOM_DATASETS', 'image-deblurring', 'RedsDataset')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/reds_image_deblurring_dataset.py", "imports": ["cv2", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.reds_image_deblurring_dataset"}, "('CUSTOM_DATASETS', 'video-frame-interpolation', 'video-frame-interpolation')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_frame_interpolation/video_frame_interpolation_dataset.py", "imports": ["cv2", "torch", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.video_frame_interpolation.video_frame_interpolation_dataset"}, "('CUSTOM_DATASETS', 'image-quality-assessment-mos', 'image-quality-assessment-mos')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_quality_assmessment_mos/image_quality_assessment_mos_dataset.py", "imports": [], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.image_quality_assmessment_mos.image_quality_assessment_mos_dataset"}, "('CUSTOM_DATASETS', 'text-ranking', 'mgeo')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/mgeo_ranking_dataset.py", "imports": ["json", "torch", "typing", "random"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.mgeo_ranking_dataset"}, "('CUSTOM_DATASETS', 'video-stabilization', 'video-stabilization')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_stabilization/video_stabilization_dataset.py", "imports": [], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.video_stabilization.video_stabilization_dataset"}, "('CUSTOM_DATASETS', 'image-deblurring', 'GoproDataset')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/gopro_image_deblurring_dataset.py", "imports": ["cv2", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.gopro_image_deblurring_dataset"}, "('CUSTOM_DATASETS', 'referring-video-object-segmentation', 'swinT-referring-video-object-segmentation')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py", "imports": ["numpy", "pycocotools", "json", "torch", "tqdm", "h5py", "glob", "torchvision", "pandas", "os"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.referring_video_object_segmentation.referring_video_object_segmentation_dataset"}, "('CUSTOM_DATASETS', 'image-colorization', 'ddcolor')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_colorization/image_colorization_dataset.py", "imports": ["cv2", "torch", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.image_colorization.image_colorization_dataset"}, "('CUSTOM_DATASETS', 'video-super-resolution', 'real-basicvsr')": {"filepath": "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_super_resolution/video_super_resolution_dataset.py", "imports": ["cv2", "torch", "collections", "numpy"], "module": "modelscope.msdatasets.dataset_cls.custom_datasets.video_super_resolution.video_super_resolution_dataset"}, "('EXPORTERS', 'acoustic-noise-suppression', 'speech_dfsmn_ans')": {"filepath": "TEMPLATE_PATH/exporters/audio/ans_dfsmn_exporter.py", "imports": ["torch", "os"], "module": "modelscope.exporters.audio.ans_dfsmn_exporter"}, "('EXPORTERS', 'translation', 'csanmt-translation')": {"filepath": "TEMPLATE_PATH/exporters/nlp/csanmt_for_translation_exporter.py", "imports": ["os", "typing", "tensorflow"], "module": "modelscope.exporters.nlp.csanmt_for_translation_exporter"}, "('EXPORTERS', 'transformer-crf', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/exporters/nlp/model_for_token_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.model_for_token_classification_exporter"}, "('EXPORTERS', 'token-classification', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/exporters/nlp/model_for_token_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.model_for_token_classification_exporter"}, "('EXPORTERS', 'named-entity-recognition', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/exporters/nlp/model_for_token_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.model_for_token_classification_exporter"}, "('EXPORTERS', 'part-of-speech', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/exporters/nlp/model_for_token_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.model_for_token_classification_exporter"}, "('EXPORTERS', 'word-segmentation', 'transformer-crf')": {"filepath": "TEMPLATE_PATH/exporters/nlp/model_for_token_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.model_for_token_classification_exporter"}, "('EXPORTERS', 'text-classification', 'bert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'text-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'sentence-similarity', 'bert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'sentiment-classification', 'bert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'nli', 'bert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'sentence-similarity', 'structbert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'sentiment-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'nli', 'structbert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py", "imports": ["torch", "typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter"}, "('EXPORTERS', 'zero-shot-classification', 'bert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_zero_shot_classification_exporter.py", "imports": ["typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_zero_shot_classification_exporter"}, "('EXPORTERS', 'zero-shot-classification', 'structbert')": {"filepath": "TEMPLATE_PATH/exporters/nlp/sbert_for_zero_shot_classification_exporter.py", "imports": ["typing", "collections"], "module": "modelscope.exporters.nlp.sbert_for_zero_shot_classification_exporter"}, "('EXPORTERS', 'image-object-detection', 'tinynas-damoyolo')": {"filepath": "TEMPLATE_PATH/exporters/cv/object_detection_damoyolo_exporter.py", "imports": ["numpy", "torch", "functools", "onnx", "os", "typing"], "module": "modelscope.exporters.cv.object_detection_damoyolo_exporter"}, "('EXPORTERS', 'face-detection', 'scrfd')": {"filepath": "TEMPLATE_PATH/exporters/cv/face_detection_scrfd_exporter.py", "imports": ["numpy", "torch", "functools", "onnx", "os", "typing"], "module": "modelscope.exporters.cv.face_detection_scrfd_exporter"}, "('EXPORTERS', 'default', 'cartoon-translation')": {"filepath": "TEMPLATE_PATH/exporters/cv/cartoon_translation_exporter.py", "imports": ["os", "tensorflow", "typing", "packaging"], "module": "modelscope.exporters.cv.cartoon_translation_exporter"}}, "requirements": {"modelscope.models.science.unifold.config": ["copy", "typing", "ml_collections"], "modelscope.models.science.unifold.msa.tools.hmmsearch": ["os", "subprocess", "absl", "typing"], "modelscope.models.science.unifold.msa.tools.hhblits": ["subprocess", "absl", "glob", "os", "typing"], "modelscope.models.science.unifold.msa.tools.kalign": ["os", "subprocess", "absl", "typing"], "modelscope.models.science.unifold.msa.tools.utils": ["time", "contextlib", "shutil", "absl", "typing", "tempfile"], "modelscope.models.science.unifold.msa.tools.hmmbuild": ["os", "subprocess", "absl", "re"], "modelscope.models.science.unifold.msa.tools.jackhmmer": ["urllib", "subprocess", "absl", "glob", "os", "concurrent", "typing"], "modelscope.models.science.unifold.msa.tools.hhsearch": ["subprocess", "absl", "glob", "os", "typing"], "modelscope.models.science.unifold.msa.mmcif": ["io", "dataclasses", "absl", "functools", "typing", "collections", "Bio"], "modelscope.models.science.unifold.msa.msa_identifiers": ["dataclasses", "re", "typing"], "modelscope.models.science.unifold.msa.parsers": ["string", "itertools", "dataclasses", "re", "typing", "collections"], "modelscope.models.science.unifold.msa.templates": ["numpy", "abc", "absl", "functools", "re", "datetime", "glob", "os", "dataclasses", "typing"], "modelscope.models.science.unifold.msa.utils": ["json", "os", "absl", "typing"], "modelscope.models.science.unifold.msa.pipeline": ["os", "absl", "typing", "numpy"], "modelscope.models.science.unifold.model": ["torch", "os", "typing", "argparse"], "modelscope.models.science.unifold.dataset": ["numpy", "json", "torch", "logging", "unicore", "copy", "ml_collections", "os", "typing"], "modelscope.models.science.unifold.modules.confidence": ["torch", "typing"], "modelscope.models.science.unifold.modules.alphafold": ["torch", "unicore"], "modelscope.models.science.unifold.modules.evoformer": ["torch", "functools", "typing", "unicore"], "modelscope.models.science.unifold.modules.auxillary_heads": ["torch", "typing", "unicore"], "modelscope.models.science.unifold.modules.attentions": ["torch", "functools", "typing", "unicore"], "modelscope.models.science.unifold.modules.embedders": ["torch", "typing", "unicore"], "modelscope.models.science.unifold.modules.structure_module": ["torch", "typing", "math", "unicore"], "modelscope.models.science.unifold.modules.common": ["torch", "functools", "typing", "unicore"], "modelscope.models.science.unifold.modules.frame": ["torch", "__future__", "typing", "numpy"], "modelscope.models.science.unifold.modules.template": ["torch", "functools", "math", "unicore", "typing"], "modelscope.models.science.unifold.modules.triangle_multiplication": ["torch", "functools", "typing", "unicore"], "modelscope.models.science.unifold.modules.featurization": ["torch", "typing", "unicore"], "modelscope.models.science.unifold.data.process_multimer": ["typing", "collections", "numpy"], "modelscope.models.science.unifold.data.protein": ["numpy", "Bio", "io", "dataclasses", "typing"], "modelscope.models.science.unifold.data.residue_constants": ["numpy", "os", "functools", "typing", "collections", "unicore"], "modelscope.models.science.unifold.data.utils": ["numpy", "json", "functools", "scipy", "copy", "gzip", "typing", "pickle"], "modelscope.models.science.unifold.data.process": ["torch", "typing", "numpy"], "modelscope.models.science.unifold.data.msa_pairing": ["numpy", "scipy", "collections", "pandas", "typing"], "modelscope.models.science.unifold.data.data_ops": ["itertools", "numpy", "torch", "functools", "operator", "unicore", "typing"], "modelscope.models.builder": [], "modelscope.models.audio.ans.layers.activations": ["torch"], "modelscope.models.audio.ans.layers.layer_base": ["six", "abc", "torch", "numpy"], "modelscope.models.audio.ans.layers.affine_transform": ["torch"], "modelscope.models.audio.ans.layers.uni_deep_fsmn": ["torch", "numpy"], "modelscope.models.audio.ans.unet": ["torch"], "modelscope.models.audio.ans.conv_stft": ["torch", "scipy", "numpy"], "modelscope.models.audio.ans.denoise_net": ["torch"], "modelscope.models.audio.ans.complex_nn": ["torch"], "modelscope.models.audio.ans.se_module_complex": ["torch"], "modelscope.models.audio.ans.frcrn": ["torch", "os", "typing"], "modelscope.models.audio.sv.DTDNN_layers": ["torch"], "modelscope.models.audio.sv.ecapa_tdnn": ["torch", "torchaudio", "math", "os", "typing"], "modelscope.models.audio.sv.ERes2Net": ["torch", "torchaudio", "math", "os", "typing"], "modelscope.models.audio.sv.pooling_layers": ["torch"], "modelscope.models.audio.sv.DTDNN": ["torch", "torchaudio", "collections", "os", "typing"], "modelscope.models.audio.sv.fusion": ["torch"], "modelscope.models.audio.sv.generic_speaker_verification": ["os", "typing"], "modelscope.models.audio.sv.speaker_change_locator": ["numpy", "torch", "torchaudio", "collections", "os", "typing"], "modelscope.models.audio.sv.rdino": ["torch", "torchaudio", "math", "os", "typing"], "modelscope.models.audio.itn.generic_inverse_text_processing": ["os", "typing"], "modelscope.models.audio.aec.layers.activations": ["torch"], "modelscope.models.audio.aec.layers.layer_base": ["torch", "abc", "re", "numpy"], "modelscope.models.audio.aec.layers.deep_fsmn": ["torch", "numpy"], "modelscope.models.audio.aec.layers.affine_transform": ["torch", "numpy"], "modelscope.models.audio.aec.layers.uni_deep_fsmn": ["torch", "numpy"], "modelscope.models.audio.aec.network.se_net": ["torch"], "modelscope.models.audio.aec.network.loss": ["torch"], "modelscope.models.audio.aec.network.modulation_loss": ["torch", "torchaudio", "math"], "modelscope.models.audio.asr.wenet_automatic_speech_recognition": ["json", "os", "wenetruntime", "typing"], "modelscope.models.audio.asr.generic_automatic_speech_recognition": ["os", "typing"], "modelscope.models.audio.punc.generic_punctuation": ["os", "typing"], "modelscope.models.audio.tts.voice": ["numpy", "json", "torch", "kantts", "collections", "time", "yaml", "os", "threading", "pickle"], "modelscope.models.audio.tts.sambert_hifi": ["shutil", "numpy", "json", "__future__", "wave", "matplotlib", "datetime", "yaml", "os", "zipfile"], "modelscope.models.audio.separation.mossformer": ["torch", "os", "copy", "typing"], "modelscope.models.audio.separation.mossformer_conv_module": ["torch"], "modelscope.models.audio.separation.mossformer_block": ["torch"], "modelscope.models.audio.separation.layer_norm": ["torch", "__future__"], "modelscope.models.audio.kws.farfield.fsmn": ["torch", "numpy"], "modelscope.models.audio.kws.farfield.fsmn_sele_v2": ["torch"], "modelscope.models.audio.kws.farfield.fsmn_sele_v3": ["torch"], "modelscope.models.audio.kws.farfield.model_def": ["math", "struct", "enum"], "modelscope.models.audio.kws.farfield.model": ["os", "typing", "tempfile"], "modelscope.models.audio.kws.generic_key_word_spotting": ["os", "typing"], "modelscope.models.audio.kws.nearfield.fsmn": ["torch", "typing", "numpy"], "modelscope.models.audio.kws.nearfield.model": ["torch", "tempfile", "sys", "os", "typing"], "modelscope.models.audio.kws.nearfield.cmvn": ["torch", "re", "numpy"], "modelscope.models.multi_modal.ofa_for_all_tasks": ["string", "json", "torch", "re", "functools", "math", "os", "typing"], "modelscope.models.multi_modal.clip.configuration_bert": ["__future__", "logging"], "modelscope.models.multi_modal.clip.bert_tokenizer": ["six", "unicodedata", "__future__", "re", "os", "collections"], "modelscope.models.multi_modal.clip.model": ["numpy", "json", "torch", "collections", "os", "typing"], "modelscope.models.multi_modal.clip.modeling_bert": ["json", "torch", "logging", "__future__", "sys", "math", "io", "os"], "modelscope.models.multi_modal.mplug_for_all_tasks": ["os", "typing"], "modelscope.models.multi_modal.multi_stage_diffusion.decoder": ["torch", "math"], "modelscope.models.multi_modal.multi_stage_diffusion.prior": ["torch", "math"], "modelscope.models.multi_modal.multi_stage_diffusion.upsampler": ["torch", "math"], "modelscope.models.multi_modal.multi_stage_diffusion.model": ["PIL", "numpy", "json", "torch", "math", "os", "typing"], "modelscope.models.multi_modal.multi_stage_diffusion.tokenizer": ["transformers", "gzip", "torch", "regex", "functools", "ftfy", "html"], "modelscope.models.multi_modal.multi_stage_diffusion.xglm": ["torch", "math"], "modelscope.models.multi_modal.multi_stage_diffusion.gaussian_diffusion": ["torch", "math"], "modelscope.models.multi_modal.multi_stage_diffusion.clip": ["torch", "math"], "modelscope.models.multi_modal.diffusion.structbert": ["copy", "six", "numpy", "json", "torch", "__future__", "math"], "modelscope.models.multi_modal.diffusion.diffusion": ["torch", "math"], "modelscope.models.multi_modal.diffusion.unet_generator": ["torch", "math"], "modelscope.models.multi_modal.diffusion.model": ["numpy", "json", "torch", "os", "typing"], "modelscope.models.multi_modal.diffusion.tokenizer": ["unicodedata", "six", "__future__", "collections"], "modelscope.models.multi_modal.diffusion.unet_upsampler_256": ["torch", "functools", "math"], "modelscope.models.multi_modal.diffusion.unet_upsampler_1024": ["torch", "math"], "modelscope.models.multi_modal.efficient_diffusion_tuning.efficient_stable_diffusion": ["transformers", "torch", "functools", "diffusers", "os", "typing"], "modelscope.models.multi_modal.gemm.gemm_base": ["numpy", "json", "torch", "collections", "os", "typing"], "modelscope.models.multi_modal.gemm.gemm_model": ["PIL", "numpy", "json", "torch", "torchvision", "os", "typing"], "modelscope.models.multi_modal.gemm.tokenizer": ["gzip", "torch", "os", "regex", "functools", "ftfy", "html"], "modelscope.models.multi_modal.mmr.dataloaders.rawvideo_util": ["PIL", "numpy", "torch", "cv2", "torchvision"], "modelscope.models.multi_modal.mmr.models.module_clip": ["urllib", "hashlib", "torch", "tqdm", "collections", "warnings", "os", "typing"], "modelscope.models.multi_modal.mmr.models.clip_for_mm_video_embedding": ["urllib", "PIL", "random", "numpy", "json", "torch", "decord", "tempfile", "os", "typing", "uuid"], "modelscope.models.multi_modal.mmr.models.module_cross": ["json", "torch", "logging", "collections", "__future__"], "modelscope.models.multi_modal.mmr.models.until_module": ["torch", "logging", "math", "numpy"], "modelscope.models.multi_modal.mmr.models.tokenization_clip": ["gzip", "os", "regex", "functools", "ftfy", "html"], "modelscope.models.multi_modal.mmr.models.modeling": ["torch", "types", "collections", "platform", "os"], "modelscope.models.multi_modal.mmr.models.dynamic_inverted_softmax": ["numpy"], "modelscope.models.multi_modal.mplug.predictor": ["torch", "__future__"], "modelscope.models.multi_modal.mplug.clip.clip": ["torch", "typing", "collections"], "modelscope.models.multi_modal.mplug.modeling_mplug": ["transformers", "torch", "math", "os", "typing"], "modelscope.models.multi_modal.mplug.mvit": ["timm", "numpy", "torch", "functools", "fairscale", "collections"], "modelscope.models.multi_modal.mplug.configuration_mplug": ["os", "typing", "transformers", "yaml"], "modelscope.models.multi_modal.team.team_model": ["PIL", "numpy", "torch", "cv2", "tokenizers", "torchvision", "typing"], "modelscope.models.multi_modal.team.utils": ["transformers", "numpy", "torch", "typing", "collections"], "modelscope.models.multi_modal.guided_diffusion.respace": ["torch", "numpy"], "modelscope.models.multi_modal.guided_diffusion.unet": ["transformers", "numpy", "torch", "abc", "math"], "modelscope.models.multi_modal.guided_diffusion.gaussian_diffusion": ["torch", "math", "numpy", "enum"], "modelscope.models.multi_modal.guided_diffusion.script": [], "modelscope.models.multi_modal.vldoc.tokenization": ["os", "transformers"], "modelscope.models.multi_modal.vldoc.model": ["json", "torch", "logging", "re", "math", "sys", "copy", "torchvision", "os"], "modelscope.models.multi_modal.vldoc.conv_fpn_trans": ["timm", "random", "torch", "collections", "apex"], "modelscope.models.multi_modal.vldoc.transformer_local": ["copy", "torch"], "modelscope.models.multi_modal.vldoc.modeling_layout_roberta": ["transformers", "packaging", "torch", "os", "math"], "modelscope.models.multi_modal.vldoc.processing": ["PIL", "timm", "numpy", "cv2", "torch", "collections", "torchvision", "typing"], "modelscope.models.multi_modal.vldoc.convnext": ["torch", "os", "timm"], "modelscope.models.multi_modal.soonet.model": ["torch", "os"], "modelscope.models.multi_modal.soonet.tokenizer": ["gzip", "torch", "regex", "functools", "ftfy", "html"], "modelscope.models.multi_modal.soonet.utils": ["copy", "decord", "numpy", "tqdm"], "modelscope.models.multi_modal.soonet.blocks": ["torch", "math"], "modelscope.models.multi_modal.soonet.swin_transformer": ["torch", "numpy"], "modelscope.models.multi_modal.soonet.clip": ["warnings", "numpy", "torch", "typing", "collections"], "modelscope.models.multi_modal.mgeo.text_ranking": ["torch"], "modelscope.models.multi_modal.mgeo.backbone": ["dataclasses", "transformers", "random", "torch", "math", "warnings", "os", "typing"], "modelscope.models.multi_modal.mgeo.text_classification": ["torch"], "modelscope.models.multi_modal.mgeo.token_classification": ["torch"], "modelscope.models.multi_modal.mplug_owl.configuration_mplug_owl": ["copy", "os", "typing", "transformers"], "modelscope.models.multi_modal.mplug_owl.modeling_mplug_owl": ["dataclasses", "transformers", "random", "torch", "logging", "math", "copy", "io", "os", "typing"], "modelscope.models.multi_modal.ofa_for_text_to_image_synthesis_model": ["PIL", "pkg_resources", "numpy", "json", "torch", "taming", "torchvision", "os", "typing"], "modelscope.models.multi_modal.video_synthesis.diffusion": ["torch"], "modelscope.models.multi_modal.video_synthesis.text_to_video_synthesis_model": ["open_clip", "torch", "einops", "os", "typing"], "modelscope.models.multi_modal.video_synthesis.autoencoder": ["torch", "numpy"], "modelscope.models.multi_modal.video_synthesis.unet_sd": ["torch", "einops", "math"], "modelscope.models.multi_modal.clip_interrogator.model": ["PIL", "hashlib", "numpy", "open_clip", "torch", "dataclasses", "os", "typing", "requests", "transformers", "safetensors", "tqdm", "math", "time", "torchvision"], "modelscope.models.multi_modal.rleg.model": ["json", "os", "torch"], "modelscope.models.multi_modal.rleg.rleg": ["torch", "torchvision", "typing"], "modelscope.models.multi_modal.dpm_solver_pytorch": ["torch", "math"], "modelscope.models.multi_modal.ofa.modeling_ofa": ["transformers", "random", "torch", "math", "packaging", "apex", "dataclasses", "typing"], "modelscope.models.multi_modal.ofa.utils.utils": ["torch", "typing"], "modelscope.models.multi_modal.ofa.utils.constant": [], "modelscope.models.multi_modal.ofa.vit": ["torch", "collections", "fairseq"], "modelscope.models.multi_modal.ofa.modeling_mmspeech": ["transformers", "numpy", "torch", "math", "fairseq", "packaging", "apex", "dataclasses", "typing"], "modelscope.models.multi_modal.ofa.resnet": ["torch"], "modelscope.models.multi_modal.ofa.tokenization_ofa": ["os", "typing", "transformers", "collections"], "modelscope.models.multi_modal.ofa.generate.multihead_attention": ["torch", "typing", "math", "fairseq"], "modelscope.models.multi_modal.ofa.generate.ngram_repeat_block": ["warnings", "torch", "typing", "math", "fairseq"], "modelscope.models.multi_modal.ofa.generate.sequence_generator": ["torch", "math", "typing", "sys"], "modelscope.models.multi_modal.ofa.generate.incremental_decoding_utils": ["torch", "typing", "uuid"], "modelscope.models.multi_modal.ofa.generate.utils": ["amp_C", "itertools", "torch_xla", "torch", "collections"], "modelscope.models.multi_modal.ofa.generate.search": ["torch", "typing", "math"], "modelscope.models.multi_modal.ofa.generate.token_generation_constraints": ["torch", "typing", "collections"], "modelscope.models.multi_modal.ofa.tokenization_ofa_fast": ["json", "typing", "transformers", "tokenizers"], "modelscope.models.multi_modal.ofa.configuration_mmspeech": ["warnings", "transformers"], "modelscope.models.multi_modal.ofa.configuration_ofa": ["warnings", "transformers"], "modelscope.models.nlp.unite.configuration": ["enum"], "modelscope.models.nlp.unite.translation_evaluation": ["transformers", "numpy", "torch", "math", "warnings", "packaging", "dataclasses", "typing"], "modelscope.models.nlp.palm_v2.configuration": ["transformers"], "modelscope.models.nlp.palm_v2.dureader_eval": ["zipfile", "numpy", "json", "rouge", "re", "sys", "math", "collections", "copy", "argparse"], "modelscope.models.nlp.palm_v2.text_generation": ["dataclasses", "subprocess", "codecs", "transformers", "numpy", "json", "torch", "math", "copy", "os", "typing"], "modelscope.models.nlp.structbert.configuration": ["transformers"], "modelscope.models.nlp.structbert.fill_mask": ["torch", "transformers"], "modelscope.models.nlp.structbert.backbone": ["transformers", "torch", "math", "packaging", "dataclasses", "typing"], "modelscope.models.nlp.structbert.faq_question_answering": ["torch", "math", "collections", "os", "typing"], "modelscope.models.nlp.structbert.adv_utils": ["torch"], "modelscope.models.nlp.structbert.text_classification": ["torch"], "modelscope.models.nlp.structbert.token_classification": ["torch"], "modelscope.models.nlp.hf_transformers.backbone": ["transformers"], "modelscope.models.nlp.task_models.fill_mask": ["torch", "typing", "numpy"], "modelscope.models.nlp.task_models.text_ranking": ["typing", "numpy"], "modelscope.models.nlp.task_models.feature_extraction": ["typing", "numpy"], "modelscope.models.nlp.task_models.text_classification": ["typing", "numpy"], "modelscope.models.nlp.task_models.task_model": ["torch", "abc", "re", "collections", "os", "typing"], "modelscope.models.nlp.task_models.text_generation": ["torch", "typing", "transformers", "numpy"], "modelscope.models.nlp.task_models.information_extraction": ["typing", "numpy"], "modelscope.models.nlp.task_models.token_classification": ["torch", "typing"], "modelscope.models.nlp.veco.configuration": ["transformers"], "modelscope.models.nlp.veco.fill_mask": ["transformers"], "modelscope.models.nlp.veco.backbone": ["transformers"], "modelscope.models.nlp.veco.text_classification": ["transformers"], "modelscope.models.nlp.veco.token_classification": ["torch", "transformers"], "modelscope.models.nlp.glm_130b.initialize": ["torch", "time", "argparse", "SwissArmyTransformer"], "modelscope.models.nlp.glm_130b.quantization.functional": ["torch"], "modelscope.models.nlp.glm_130b.quantization.layers": ["torch", "SwissArmyTransformer"], "modelscope.models.nlp.glm_130b.text_generation": ["random", "stat", "torch", "SwissArmyTransformer", "re", "functools", "sys", "copy", "time", "os", "typing"], "modelscope.models.nlp.glm_130b.generation.strategies": ["torch", "numpy", "SwissArmyTransformer"], "modelscope.models.nlp.mglm.tasks.superglue.pvp": ["string", "tasks", "random", "numpy", "abc", "utils", "math", "collections", "copy", "typing"], "modelscope.models.nlp.mglm.tasks.superglue.dataset": ["random", "numpy", "json", "abc", "torch", "collections", "os", "typing", "re", "tqdm", "utils", "csv", "copy", "glob", "pandas", "data_utils"], "modelscope.models.nlp.mglm.tasks.superglue.evaluate": ["string", "tasks", "__future__", "functools", "typing", "re", "collections"], "modelscope.models.nlp.mglm.tasks.superglue.finetune": ["tasks", "collections", "finetune_glm"], "modelscope.models.nlp.mglm.tasks.data_utils": ["numpy", "json", "torch", "re", "copy", "megatron_util", "typing", "pickle"], "modelscope.models.nlp.mglm.tasks.seq2seq.dataset": ["tasks", "random", "numpy", "json", "torch", "tqdm", "utils", "os", "data_utils"], "modelscope.models.nlp.mglm.tasks.seq2seq.evaluate": ["string", "rouge_score", "datetime", "random", "megatron_util", "torch", "generation_utils"], "modelscope.models.nlp.mglm.tasks.seq2seq.finetune": ["tasks", "pretrain_glm", "megatron_util", "torch", "functools", "collections", "finetune_glm"], "modelscope.models.nlp.mglm.tasks.language_model.detokenizer": ["re"], "modelscope.models.nlp.mglm.tasks.language_model.dataset": ["tasks", "itertools", "numpy", "json", "torch", "utils", "math", "bisect"], "modelscope.models.nlp.mglm.tasks.language_model.finetune": ["tasks", "pretrain_glm", "megatron_util", "torch", "functools", "math", "finetune_glm"], "modelscope.models.nlp.mglm.tasks.eval_utils": ["tasks", "random", "torch", "utils", "collections", "finetune_glm", "datetime", "time", "sklearn", "megatron_util", "os", "typing"], "modelscope.models.nlp.mglm.blocklm_utils": ["copy", "numpy", "random", "torch", "megatron_util", "scipy", "math"], "modelscope.models.nlp.mglm.train_utils": ["torch", "apex", "deepspeed", "megatron_util"], "modelscope.models.nlp.mglm.test.test_block": ["numpy", "argparse", "blocklm_utils", "random"], "modelscope.models.nlp.mglm.test.test_rel_shift": ["torch", "learning_rates", "numpy", "matplotlib"], "modelscope.models.nlp.mglm.arguments": ["json", "torch", "deepspeed", "os", "argparse"], "modelscope.models.nlp.mglm.data_utils.tokenization_gpt2": ["json", "logging", "__future__", "sys", "functools", "io", "os", "regex"], "modelscope.models.nlp.mglm.data_utils.lazy_loader": ["time", "itertools", "mmap", "numpy", "torch", "os", "pickle"], "modelscope.models.nlp.mglm.data_utils.wordpiece": ["logging", "collections", "io", "unicodedata", "__future__", "os"], "modelscope.models.nlp.mglm.data_utils.datasets": ["random", "numpy", "json", "torch", "operator", "nltk", "bisect", "os", "itertools", "tqdm", "math", "csv", "time", "pandas"], "modelscope.models.nlp.mglm.data_utils.tokenization": ["itertools", "random", "torch", "collections", "csv", "sentencepiece", "nltk", "os", "regex"], "modelscope.models.nlp.mglm.data_utils.extraction": ["os", "glob", "json", "nltk"], "modelscope.models.nlp.mglm.data_utils.file_utils": ["urllib", "hashlib", "json", "botocore", "sys", "io", "pathlib", "os", "requests", "shutil", "logging", "functools", "tempfile", "tqdm", "boto3", "__future__"], "modelscope.models.nlp.mglm.data_utils.sp_tokenizer": ["os"], "modelscope.models.nlp.mglm.data_utils.corpora": ["multiprocessing", "random", "json", "torch", "tqdm", "collections", "queue", "os"], "modelscope.models.nlp.mglm.data_utils.samplers": ["numpy", "torch", "math", "os", "sys"], "modelscope.models.nlp.mglm.mglm_for_text_summarization": ["random", "numpy", "torch", "megatron_util", "os", "typing"], "modelscope.models.nlp.mglm.process_grid": ["os", "json", "glob", "statistics", "sys"], "modelscope.models.nlp.mglm.generation_utils": ["torch", "abc", "typing", "collections"], "modelscope.models.nlp.mglm.utils": ["subprocess", "random", "numpy", "json", "torch", "time", "megatron_util", "os"], "modelscope.models.nlp.mglm.configure_data": ["itertools", "random", "numpy", "torch", "copy", "bisect", "megatron_util", "os"], "modelscope.models.nlp.mglm.model.distributed": ["torch", "megatron_util"], "modelscope.models.nlp.mglm.model.transformer": ["apex", "deepspeed", "megatron_util", "torch", "math"], "modelscope.models.nlp.mglm.model.modeling_bert": ["shutil", "json", "torch", "logging", "__future__", "tempfile", "math", "copy", "apex", "megatron_util", "tarfile", "os", "data_utils"], "modelscope.models.nlp.mglm.model.prompt": ["torch", "random"], "modelscope.models.nlp.mglm.model.modeling_glm": ["torch", "megatron_util"], "modelscope.models.nlp.mglm.model.downstream": ["torch"], "modelscope.models.nlp.mglm.run_test": ["sys", "test"], "modelscope.models.nlp.plug_mental.configuration": ["transformers"], "modelscope.models.nlp.plug_mental.backbone": ["transformers", "torch", "math", "packaging", "dataclasses", "typing"], "modelscope.models.nlp.plug_mental.adv_utils": ["torch"], "modelscope.models.nlp.plug_mental.text_classification": ["torch"], "modelscope.models.nlp.gpt_moe.configuration": ["torch", "transformers"], "modelscope.models.nlp.gpt_moe.backbone": ["transformers", "torch", "typing", "math", "os", "addict"], "modelscope.models.nlp.gpt_moe.tokenizer": ["tokenizers"], "modelscope.models.nlp.gpt_moe.distributed_gpt_moe": ["torch", "transformers", "math", "megatron_util"], "modelscope.models.nlp.gpt_moe.text_generation": ["typing", "transformers"], "modelscope.models.nlp.gpt_moe.moe.sharded_moe": ["tutel", "torch", "scipy", "math", "apex", "megatron_util", "typing"], "modelscope.models.nlp.gpt_moe.moe.utils": ["torch", "typing"], "modelscope.models.nlp.gpt_moe.moe.layer": ["torch", "typing", "megatron_util"], "modelscope.models.nlp.gpt_moe.moe.experts": ["copy", "torch"], "modelscope.models.nlp.gpt_moe.moe.mappings": ["torch", "megatron_util"], "modelscope.models.nlp.gpt_moe.checkpointing": ["torch", "os", "megatron_util"], "modelscope.models.nlp.csanmt.translation": ["tensorflow", "typing", "math", "collections"], "modelscope.models.nlp.T5.text2text_generation": ["transformers", "torch", "copy", "warnings", "typing"], "modelscope.models.nlp.T5.configuration": ["typing", "transformers"], "modelscope.models.nlp.T5.backbone": ["transformers", "torch", "math", "copy", "warnings", "os", "typing"], "modelscope.models.nlp.heads.text_classification_head": ["torch", "typing"], "modelscope.models.nlp.heads.infromation_extraction_head": ["torch"], "modelscope.models.nlp.heads.token_classification_head": ["torch", "typing"], "modelscope.models.nlp.heads.text_generation_head": ["torch", "typing"], "modelscope.models.nlp.heads.crf_head": ["torch", "typing", "transformers"], "modelscope.models.nlp.heads.torch_pretrain_head": ["torch", "typing", "transformers"], "modelscope.models.nlp.heads.fill_mask_head": ["torch", "typing", "transformers"], "modelscope.models.nlp.heads.text_ranking_head": ["torch", "typing"], "modelscope.models.nlp.bloom.backbone": ["transformers"], "modelscope.models.nlp.xlm_roberta.configuration": ["typing", "transformers", "collections"], "modelscope.models.nlp.xlm_roberta.backbone": ["torch", "transformers", "math", "packaging"], "modelscope.models.nlp.peer.configuration": ["transformers"], "modelscope.models.nlp.peer.sas_utils": ["numpy", "nltk", "torch", "random"], "modelscope.models.nlp.peer.backbone": ["transformers", "torch", "math", "dataclasses", "typing"], "modelscope.models.nlp.peer.text_classification": ["copy", "torch"], "modelscope.models.nlp.fid_T5.text_generation": ["torch", "os", "io", "transformers"], "modelscope.models.nlp.space_T_en.text_to_sql": ["torch", "os", "typing", "text2sql_lgesql"], "modelscope.models.nlp.canmt.sequence_generator": ["numpy", "torch", "math", "typing", "sys", "fairseq"], "modelscope.models.nlp.canmt.canmt_translation": ["numpy", "torch", "math", "os", "typing"], "modelscope.models.nlp.canmt.canmt_model": ["numpy", "torch", "typing", "math", "fairseq"], "modelscope.models.nlp.bart.text_error_correction": ["torch", "os", "typing"], "modelscope.models.nlp.use.transformer": ["torch", "math"], "modelscope.models.nlp.use.user_satisfaction_estimation": ["transformers", "numpy", "torch", "os", "typing"], "modelscope.models.nlp.gpt_neo.backbone": ["transformers"], "modelscope.models.nlp.bert.configuration": ["typing", "transformers", "collections"], "modelscope.models.nlp.bert.siamese_uie": ["torch", "copy"], "modelscope.models.nlp.bert.fill_mask": [], "modelscope.models.nlp.bert.word_alignment": ["torch"], "modelscope.models.nlp.bert.text_ranking": [], "modelscope.models.nlp.bert.backbone": ["torch", "transformers", "math", "packaging"], "modelscope.models.nlp.bert.text_classification": [], "modelscope.models.nlp.bert.sentence_embedding": ["torch"], "modelscope.models.nlp.bert.document_segmentation": ["torch", "typing"], "modelscope.models.nlp.bert.token_classification": [], "modelscope.models.nlp.dgds.backbone": ["torch", "__future__", "os", "transformers"], "modelscope.models.nlp.dgds.document_grounded_dialog_rerank": ["torch", "os", "typing"], "modelscope.models.nlp.dgds.document_grounded_dialog_generate": ["torch", "os", "typing"], "modelscope.models.nlp.dgds.document_grounded_dialog_retrieval": ["torch", "os", "typing"], "modelscope.models.nlp.gpt3.configuration": ["torch", "transformers"], "modelscope.models.nlp.gpt3.backbone": ["transformers", "torch", "typing", "math", "os", "addict"], "modelscope.models.nlp.gpt3.tokenizer": ["typing", "tokenizers"], "modelscope.models.nlp.gpt3.distributed_gpt3": ["transformers", "torch", "math", "collections", "megatron_util", "os", "typing"], "modelscope.models.nlp.gpt3.text_generation": ["torch", "typing", "transformers", "collections"], "modelscope.models.nlp.deberta_v2.configuration": ["transformers"], "modelscope.models.nlp.deberta_v2.fill_mask": ["torch", "typing", "transformers"], "modelscope.models.nlp.deberta_v2.backbone": ["torch", "typing", "transformers", "collections"], "modelscope.models.nlp.deberta_v2.tokenization": ["transformers", "unicodedata", "sentencepiece", "typing", "os"], "modelscope.models.nlp.deberta_v2.tokenization_fast": ["os", "typing", "transformers", "shutil"], "modelscope.models.nlp.codegeex.codegeex_for_code_translation": ["torch", "copy", "typing"], "modelscope.models.nlp.codegeex.tokenizer": ["torch", "typing", "transformers"], "modelscope.models.nlp.codegeex.codegeex_for_code_generation": ["torch", "copy", "typing"], "modelscope.models.nlp.codegeex.inference": ["torch", "typing"], "modelscope.models.nlp.codegeex.codegeex": ["torch", "math"], "modelscope.models.nlp.space.configuration": [], "modelscope.models.nlp.space.dialog_modeling": ["os", "typing"], "modelscope.models.nlp.space.dialog_state_tracking": ["torch", "typing", "transformers"], "modelscope.models.nlp.space.model.intent_unified_transformer": ["torch"], "modelscope.models.nlp.space.model.tokenization_space": ["transformers"], "modelscope.models.nlp.space.model.unified_transformer": ["torch", "numpy"], "modelscope.models.nlp.space.model.model_base": ["torch", "os"], "modelscope.models.nlp.space.model.generator": ["torch", "math", "numpy"], "modelscope.models.nlp.space.model.gen_unified_transformer": ["torch"], "modelscope.models.nlp.space.dialog_intent_prediction": ["os", "typing"], "modelscope.models.nlp.space.modules.transformer_block": ["torch"], "modelscope.models.nlp.space.modules.functions": ["torch", "numpy"], "modelscope.models.nlp.space.modules.multihead_attention": ["torch"], "modelscope.models.nlp.space.modules.feedforward": ["torch"], "modelscope.models.nlp.space.modules.embedder": ["torch"], "modelscope.models.nlp.fid_plug.configuration": ["transformers"], "modelscope.models.nlp.fid_plug.backbone": ["dataclasses", "transformers", "numpy", "torch", "math", "copy", "os", "typing"], "modelscope.models.nlp.fid_plug.text_generation": ["torch", "os", "io", "transformers"], "modelscope.models.nlp.gpt2.backbone": ["transformers"], "modelscope.models.nlp.plug.distributed_plug": ["torch", "typing", "megatron_util"], "modelscope.models.nlp.plug.configuration": ["copy", "json", "transformers"], "modelscope.models.nlp.plug.backbone": ["torch", "logging", "math", "megatron_util", "__future__"], "modelscope.models.nlp.plug.AnnealingLR": ["torch", "math"], "modelscope.models.nlp.plug.generator": ["torch"], "modelscope.models.nlp.megatron_bert.configuration": ["typing", "transformers", "collections"], "modelscope.models.nlp.megatron_bert.fill_mask": ["torch", "transformers"], "modelscope.models.nlp.megatron_bert.backbone": ["torch", "transformers", "math"], "modelscope.models.nlp.space_T_cn.configuration": ["copy", "__future__", "logging", "json"], "modelscope.models.nlp.space_T_cn.backbone": ["shutil", "numpy", "torch", "__future__", "tempfile", "math", "copy", "tarfile", "os"], "modelscope.models.nlp.space_T_cn.table_question_answering": ["transformers", "numpy", "torch", "os", "typing"], "modelscope.models.nlp.ponet.configuration": ["transformers"], "modelscope.models.nlp.ponet.fill_mask": ["torch", "transformers"], "modelscope.models.nlp.ponet.backbone": ["distutils", "transformers", "torch", "math", "packaging"], "modelscope.models.nlp.ponet.tokenization": ["typing", "transformers"], "modelscope.models.nlp.ponet.document_segmentation": ["torch", "typing"], "modelscope.models.nlp.llama.configuration": ["transformers"], "modelscope.models.nlp.llama.convert_llama_weights_to_hf": ["shutil", "gc", "json", "torch", "math", "os", "argparse"], "modelscope.models.nlp.llama.backbone": ["torch", "typing", "transformers", "math"], "modelscope.models.nlp.llama.tokenization": ["transformers", "shutil", "sentencepiece", "os", "typing"], "modelscope.models.nlp.llama.tokenization_fast": ["os", "typing", "transformers", "shutil"], "modelscope.models.nlp.llama.text_generation": ["torch", "typing"], "modelscope.models.nlp.lstm.backbone": ["torch"], "modelscope.models.nlp.lstm.token_classification": [], "modelscope.models.cv.image_deblur.nafnet_for_image_deblur": ["torch", "os", "typing"], "modelscope.models.cv.vision_middleware.backbone": ["numpy", "torch", "math", "collections", "os", "typing"], "modelscope.models.cv.vision_middleware.model": ["json", "torch", "typing", "os"], "modelscope.models.cv.vision_middleware.head": ["torch", "abc", "mmcv", "numpy"], "modelscope.models.cv.vision_middleware.vim": ["torch", "einops", "math"], "modelscope.models.cv.image_quality_assessment_man.swin": ["warnings", "itertools", "torch", "einops", "math", "collections"], "modelscope.models.cv.image_quality_assessment_man.maniqa": ["timm", "torch", "einops"], "modelscope.models.cv.image_quality_assessment_man.image_quality_assessment_man": ["torch", "os", "typing"], "modelscope.models.cv.product_retrieval_embedding.item_detection": ["cv2", "numpy"], "modelscope.models.cv.product_retrieval_embedding.item_model": ["torch", "os", "typing", "numpy"], "modelscope.models.cv.product_retrieval_embedding.item_embedding": ["cv2", "torch", "numpy"], "modelscope.models.cv.body_2d_keypoints.w48": [], "modelscope.models.cv.body_2d_keypoints.hrnet_v2": ["torch", "os", "numpy"], "modelscope.models.cv.body_2d_keypoints.hrnet_basic_modules": ["torch"], "modelscope.models.cv.indoor_layout_estimation.panovit": ["torch", "os", "yacs", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.modality.layout": ["numpy", "torch", "scipy", "math", "shapely"], "modelscope.models.cv.indoor_layout_estimation.networks.misc.panostretch": ["functools", "scipy", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.misc.fourier": ["PIL", "scipy", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.misc.post_proc": ["scipy", "sklearn", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.panovit": ["torch", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.utils": ["torch", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.backbone.vit_horizon_pry_image": ["timm", "torch", "numpy"], "modelscope.models.cv.indoor_layout_estimation.networks.backbone.resnet_DA": ["torch", "torchvision"], "modelscope.models.cv.salient_detection.salient_model": ["PIL", "torch", "cv2", "torchvision", "os"], "modelscope.models.cv.salient_detection.models.senet": ["torch"], "modelscope.models.cv.salient_detection.models.utils": ["torch"], "modelscope.models.cv.salient_detection.models.modules": ["torch"], "modelscope.models.cv.salient_detection.models.u2net": ["torch"], "modelscope.models.cv.salient_detection.models.backbone.Res2Net_v1b": ["torch", "math"], "modelscope.models.cv.image_quality_assessment_degradation.degradation_model": ["time", "torchvision", "json", "numpy", "cv2", "torch", "collections"], "modelscope.models.cv.image_quality_assessment_degradation.image_quality_assessment_degradation": ["torch", "os", "typing"], "modelscope.models.cv.image_portrait_enhancement.losses.model_irse": ["torch"], "modelscope.models.cv.image_portrait_enhancement.losses.losses": ["torch"], "modelscope.models.cv.image_portrait_enhancement.losses.helpers": ["torch", "collections"], "modelscope.models.cv.image_portrait_enhancement.retinaface.detection": ["cv2", "torch", "os", "numpy"], "modelscope.models.cv.image_portrait_enhancement.retinaface.models.retinaface": ["torch", "torchvision", "collections"], "modelscope.models.cv.image_portrait_enhancement.retinaface.models.net": ["torch", "time", "torchvision"], "modelscope.models.cv.image_portrait_enhancement.retinaface.utils": ["torch", "itertools", "math", "numpy"], "modelscope.models.cv.image_portrait_enhancement.gpen": ["itertools", "random", "torch", "functools", "operator", "math"], "modelscope.models.cv.image_portrait_enhancement.image_portrait_enhancement": ["torch", "os", "typing", "math"], "modelscope.models.cv.image_portrait_enhancement.align_faces": ["cv2", "skimage", "numpy"], "modelscope.models.cv.image_portrait_enhancement.eqface.fqa": ["cv2", "torch", "os", "numpy"], "modelscope.models.cv.image_portrait_enhancement.eqface.model_resnet": ["torch"], "modelscope.models.cv.abnormal_object_detection.mmdet_ms.roi_head.mask_scoring_roi_head": ["torch", "mmdet"], "modelscope.models.cv.abnormal_object_detection.mmdet_ms.roi_head.roi_extractors.single_level_roi_extractor": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.abnormal_object_detection.mmdet_model": ["torch", "os", "numpy"], "modelscope.models.cv.image_probing_model.backbone": ["PIL", "numpy", "torch", "functools", "operator", "sys", "math", "collections", "torchvision"], "modelscope.models.cv.image_probing_model.model": ["json", "torch", "typing", "os"], "modelscope.models.cv.image_probing_model.utils": ["torch", "re"], "modelscope.models.cv.tinynas_classfication.super_res_kxkx": ["torch", "uuid"], "modelscope.models.cv.tinynas_classfication.super_res_k1kxk1": ["torch", "uuid"], "modelscope.models.cv.tinynas_classfication.model_zoo": [], "modelscope.models.cv.tinynas_classfication.super_blocks": ["torch", "uuid"], "modelscope.models.cv.tinynas_classfication.basic_blocks": ["torch", "uuid", "numpy"], "modelscope.models.cv.tinynas_classfication.master_net": ["torch"], "modelscope.models.cv.tinynas_classfication.plain_net_utils": ["torch"], "modelscope.models.cv.tinynas_classfication.super_res_idwexkx": ["torch", "uuid"], "modelscope.models.cv.tinynas_classfication.global_utils": [], "modelscope.models.cv.image_to_image_translation.model_translation": ["torch", "math"], "modelscope.models.cv.image_to_image_translation.models.autoencoder": ["torch", "math"], "modelscope.models.cv.image_to_image_translation.models.clip": ["torch", "math"], "modelscope.models.cv.image_to_image_translation.ops.metrics": ["torch", "scipy", "numpy"], "modelscope.models.cv.image_to_image_translation.ops.diffusion": ["torch", "math"], "modelscope.models.cv.image_to_image_translation.ops.apps": ["PIL", "numpy", "torch", "artist", "torchvision", "os"], "modelscope.models.cv.image_to_image_translation.ops.svd": ["torch"], "modelscope.models.cv.image_to_image_translation.ops.random_mask": ["cv2", "numpy"], "modelscope.models.cv.image_to_image_translation.ops.degradation": ["random", "numpy", "cv2", "torch", "scipy", "os", "math"], "modelscope.models.cv.image_to_image_translation.ops.random_color": ["colorsys", "random"], "modelscope.models.cv.image_to_image_translation.ops.utils": ["PIL", "hashlib", "multiprocessing", "base64", "numpy", "cv2", "json", "torch", "math", "io", "binascii", "os", "zipfile"], "modelscope.models.cv.image_to_image_translation.ops.losses": ["torch", "math"], "modelscope.models.cv.image_to_image_translation.data.transforms": ["torchvision", "PIL", "math", "random"], "modelscope.models.cv.video_human_matting.models.decoder": ["torch", "typing"], "modelscope.models.cv.video_human_matting.models.effv2": ["torch"], "modelscope.models.cv.video_human_matting.models.lraspp": ["torch"], "modelscope.models.cv.video_human_matting.models.matting": ["torch", "typing"], "modelscope.models.cv.video_human_matting.models.deep_guided_filter": ["torch"], "modelscope.models.cv.video_human_matting.model": ["numpy", "torch", "torchvision", "os", "typing"], "modelscope.models.cv.language_guided_video_summarization.transformer.models": ["torch", "numpy"], "modelscope.models.cv.language_guided_video_summarization.transformer.modules": ["torch"], "modelscope.models.cv.language_guided_video_summarization.transformer.sub_layers": ["torch", "numpy"], "modelscope.models.cv.language_guided_video_summarization.transformer.layers": ["torch"], "modelscope.models.cv.language_guided_video_summarization.summarizer": ["numpy", "videofeatures_clipit", "torch", "bmt_clipit", "os", "typing", "argparse"], "modelscope.models.cv.facial_landmark_confidence.flc.facial_landmark_confidence": ["PIL", "numpy", "torch", "cv2", "os"], "modelscope.models.cv.facial_landmark_confidence.flc.manual_landmark_net": ["torch", "math"], "modelscope.models.cv.image_to_image_generation.models.autoencoder": ["torch", "math"], "modelscope.models.cv.image_to_image_generation.models.clip": ["torch", "math"], "modelscope.models.cv.image_to_image_generation.model": ["torch", "math"], "modelscope.models.cv.image_to_image_generation.ops.diffusion": ["torch", "math"], "modelscope.models.cv.image_to_image_generation.ops.losses": ["torch", "math"], "modelscope.models.cv.image_to_image_generation.data.transforms": ["torchvision", "PIL", "math", "random"], "modelscope.models.cv.image_body_reshaping.person_info": ["torch", "cv2", "copy", "numpy"], "modelscope.models.cv.image_body_reshaping.model": ["torch"], "modelscope.models.cv.image_body_reshaping.slim_utils": ["random", "numpy", "cv2", "torch", "os", "math", "numba"], "modelscope.models.cv.image_body_reshaping.pose_estimator.body": ["numpy", "cv2", "torch", "scipy", "math"], "modelscope.models.cv.image_body_reshaping.pose_estimator.util": ["numpy"], "modelscope.models.cv.image_body_reshaping.pose_estimator.model": ["torch", "collections"], "modelscope.models.cv.image_body_reshaping.image_body_reshaping": ["numpy", "cv2", "torch", "os", "typing"], "modelscope.models.cv.image_human_parsing.m2fp_net": ["torch", "os", "typing"], "modelscope.models.cv.image_human_parsing.m2fp.m2fp_decoder": ["torch"], "modelscope.models.cv.image_human_parsing.m2fp.m2fp_encoder": ["torch", "typing", "numpy"], "modelscope.models.cv.image_human_parsing.parsing_utils": ["copy", "torch", "PIL", "numpy"], "modelscope.models.cv.image_human_parsing.backbone.deeplab_resnet": ["torch", "numpy"], "modelscope.models.cv.image_skychange.ptsemseg.hrnet_super_and_ocr": ["torch", "__future__", "numpy"], "modelscope.models.cv.image_skychange.ptsemseg.BlockModules": ["torch"], "modelscope.models.cv.image_skychange.ptsemseg.unet": ["torch"], "modelscope.models.cv.image_skychange.ptsemseg.hrnet_backnone": ["torch", "os", "logging", "numpy"], "modelscope.models.cv.image_skychange.skychange": ["numbers", "PIL", "pdb", "numpy", "cv2", "json", "torch", "collections", "torchvision", "os"], "modelscope.models.cv.image_skychange.preprocessor": ["numbers", "pdb", "numpy", "cv2", "json", "torch", "torchvision", "typing"], "modelscope.models.cv.image_skychange.skychange_model": ["pdb", "cv2", "torch", "json", "math", "collections", "time", "os", "typing"], "modelscope.models.cv.video_object_segmentation.aggregate": ["torch"], "modelscope.models.cv.video_object_segmentation.inference_memory_bank": ["torch", "math"], "modelscope.models.cv.video_object_segmentation.inference_core": ["torch"], "modelscope.models.cv.video_object_segmentation.model": ["torch", "os", "typing"], "modelscope.models.cv.video_object_segmentation.eval_network": ["torch"], "modelscope.models.cv.video_object_segmentation.mod_resnet": ["torch", "math", "collections"], "modelscope.models.cv.video_object_segmentation.network": ["torch", "math"], "modelscope.models.cv.video_object_segmentation.modules": ["torch", "torchvision"], "modelscope.models.cv.video_object_segmentation.cbam": ["torch"], "modelscope.models.cv.face_reconstruction.models.nv_diffrast": ["nvdiffrast", "numpy", "torch", "warnings", "typing"], "modelscope.models.cv.face_reconstruction.models.renderer": ["torch", "imageio", "skimage", "numpy"], "modelscope.models.cv.face_reconstruction.models.unet": ["torch", "warnings"], "modelscope.models.cv.face_reconstruction.models.bfm": ["torch", "os", "scipy", "numpy"], "modelscope.models.cv.face_reconstruction.models.opt": [], "modelscope.models.cv.face_reconstruction.models.networks": ["torch", "os", "typing", "kornia"], "modelscope.models.cv.face_reconstruction.models.de_retouching_module": ["torch"], "modelscope.models.cv.face_reconstruction.models.losses": ["torch", "numpy", "kornia"], "modelscope.models.cv.face_reconstruction.models.pix2pix.pix2pix_options": [], "modelscope.models.cv.face_reconstruction.models.pix2pix.pix2pix_model": ["torch"], "modelscope.models.cv.face_reconstruction.models.pix2pix.networks": ["torch", "functools"], "modelscope.models.cv.face_reconstruction.models.facelandmark.nets.large_eyeball_net": ["torch"], "modelscope.models.cv.face_reconstruction.models.facelandmark.nets.large_base_lmks_net": ["torch"], "modelscope.models.cv.face_reconstruction.models.facelandmark.large_base_lmks_infer": ["torch", "numpy"], "modelscope.models.cv.face_reconstruction.models.facerecon_model": ["numpy", "cv2", "torch", "collections", "os"], "modelscope.models.cv.face_reconstruction.utils": ["PIL", "numpy", "array", "cv2", "torch", "scipy", "math", "numba", "os", "argparse"], "modelscope.models.cv.facial_expression_recognition.fer.transforms": ["numbers", "PIL", "numpy", "torch", "types"], "modelscope.models.cv.facial_expression_recognition.fer.vgg": ["torch"], "modelscope.models.cv.facial_expression_recognition.fer.facial_expression_recognition": ["PIL", "numpy", "torch", "cv2", "os"], "modelscope.models.cv.face_recognition.align_face": ["cv2", "skimage", "numpy"], "modelscope.models.cv.face_recognition.torchkit.rts_backbone": ["torch", "os", "math", "collections"], "modelscope.models.cv.face_recognition.torchkit.backbone.facemask_backbone": ["torch", "collections"], "modelscope.models.cv.face_recognition.torchkit.backbone.model_irse": ["torch", "collections"], "modelscope.models.cv.face_recognition.torchkit.backbone.model_resnet": ["torch"], "modelscope.models.cv.face_recognition.torchkit.backbone.common": ["torch"], "modelscope.models.cv.face_recognition.torchkit.backbone.arcface_backbone": ["torch"], "modelscope.models.cv.face_generation.stylegan2": ["random", "torch", "functools", "operator", "math"], "modelscope.models.cv.face_generation.op.fused_act": ["torch", "os"], "modelscope.models.cv.face_generation.op.upfirdn2d": ["torch", "os", "collections"], "modelscope.models.cv.face_generation.op.conv2d_gradfix": ["torch", "warnings", "contextlib"], "modelscope.models.cv.shop_segmentation.head_fpn": ["timm", "torch", "mmcv", "numpy"], "modelscope.models.cv.shop_segmentation.models": ["torch", "timm", "math", "collections"], "modelscope.models.cv.shop_segmentation.common": ["torch", "warnings"], "modelscope.models.cv.shop_segmentation.utils": ["torch", "functools", "ftfy", "gzip", "os", "regex", "typing", "html"], "modelscope.models.cv.shop_segmentation.shop_seg_base": ["torch"], "modelscope.models.cv.shop_segmentation.neck_fpn": ["torch", "mmcv", "timm"], "modelscope.models.cv.shop_segmentation.shop_seg_model": ["PIL", "numpy", "torch", "json", "os", "typing"], "modelscope.models.cv.image_instance_segmentation.maskdino.ms_deform_attn": ["warnings", "mmcv", "torch", "__future__", "math"], "modelscope.models.cv.image_instance_segmentation.maskdino.position_encoding": ["torch", "math"], "modelscope.models.cv.image_instance_segmentation.maskdino.dino_decoder": ["torch", "typing"], "modelscope.models.cv.image_instance_segmentation.maskdino.maskdino_encoder": ["torch", "typing", "numpy"], "modelscope.models.cv.image_instance_segmentation.maskdino.utils": ["copy", "torch", "math"], "modelscope.models.cv.image_instance_segmentation.maskdino.maskdino_decoder": ["torch"], "modelscope.models.cv.image_instance_segmentation.maskdino_swin": ["torch", "os"], "modelscope.models.cv.image_instance_segmentation.datasets.transforms": ["os", "numpy"], "modelscope.models.cv.image_instance_segmentation.fastinst.fastinst_encoder": ["torch", "logging", "typing"], "modelscope.models.cv.image_instance_segmentation.fastinst.fastinst_decoder": ["torch", "math"], "modelscope.models.cv.image_instance_segmentation.cascade_mask_rcnn_swin": ["torch", "os", "collections"], "modelscope.models.cv.image_instance_segmentation.fastinst_model": ["torch", "os", "typing"], "modelscope.models.cv.image_instance_segmentation.model": ["torch", "os", "typing"], "modelscope.models.cv.image_instance_segmentation.postprocess_utils": ["itertools", "numpy", "pycocotools", "cv2", "torch"], "modelscope.models.cv.image_instance_segmentation.backbones.resnet": ["torch"], "modelscope.models.cv.image_instance_segmentation.backbones.swin_transformer": ["torch", "timm", "numpy"], "modelscope.models.cv.image_instance_segmentation.maskdino_model": ["torch", "os", "typing"], "modelscope.models.cv.action_detection.modules.resnet": ["torch", "detectron2"], "modelscope.models.cv.action_detection.modules.action_detection_pytorch": ["torch", "fvcore", "logging", "typing", "detectron2"], "modelscope.models.cv.action_detection.action_detection_onnx": ["urllib", "subprocess", "shutil", "numpy", "cv2", "tempfile", "onnxruntime", "os", "uuid"], "modelscope.models.cv.vop_retrieval.backbone": ["urllib", "hashlib", "numpy", "torch", "tqdm", "collections", "warnings", "os", "typing"], "modelscope.models.cv.vop_retrieval.basic_utils": ["PIL", "ujson", "shutil", "random", "numpy", "cv2", "torch", "collections", "torchvision", "os", "pickle", "zipfile"], "modelscope.models.cv.vop_retrieval.model": ["torch", "os"], "modelscope.models.cv.vop_retrieval.tokenization_clip": ["gzip", "torch", "os", "regex", "functools", "ftfy", "html"], "modelscope.models.cv.vop_retrieval.model_se": ["torch", "os"], "modelscope.models.cv.video_instance_segmentation.track.kernel_update_head": ["torch", "mmcv", "mmdet", "numpy"], "modelscope.models.cv.video_instance_segmentation.track.mask_hungarian_assigner": ["torch", "scipy", "mmdet", "numpy"], "modelscope.models.cv.video_instance_segmentation.video_knet": ["torch", "mmdet"], "modelscope.models.cv.video_instance_segmentation.head.kernel_updator": ["torch", "mmcv"], "modelscope.models.cv.video_instance_segmentation.head.kernel_update_head": ["torch", "mmcv", "mmdet", "numpy"], "modelscope.models.cv.video_instance_segmentation.head.kernel_frame_iter_head": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.video_instance_segmentation.head.kernel_head": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.video_instance_segmentation.head.kernel_iter_head": ["torch", "mmdet"], "modelscope.models.cv.video_instance_segmentation.utils": ["torch", "mmdet", "numpy"], "modelscope.models.cv.video_instance_segmentation.neck.msdeformattn_decoder": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.super_resolution.ecb": ["torch"], "modelscope.models.cv.super_resolution.ecbsr_model": ["torch", "os", "typing"], "modelscope.models.cv.super_resolution.rrdbnet_arch": ["torch"], "modelscope.models.cv.super_resolution.arch_util": ["torchvision", "warnings", "itertools", "torch", "math", "collections"], "modelscope.models.cv.ocr_detection.preprocessor": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "modelscope.models.cv.ocr_detection.model": ["torch", "os", "typing", "numpy"], "modelscope.models.cv.ocr_detection.utils": ["cv2", "pyclipper", "shapely", "numpy"], "modelscope.models.cv.ocr_detection.modules.dbnet": ["torch", "math", "os", "sys", "collections"], "modelscope.models.cv.ocr_detection.modules.seg_detector_loss": ["torch", "sys"], "modelscope.models.cv.panorama_depth_estimation.networks.util": ["cv2", "scipy", "numpy"], "modelscope.models.cv.panorama_depth_estimation.networks.mobilenet": ["torch"], "modelscope.models.cv.panorama_depth_estimation.networks.equi": ["torch", "__future__", "collections", "numpy"], "modelscope.models.cv.panorama_depth_estimation.networks.resnet": ["torch"], "modelscope.models.cv.panorama_depth_estimation.networks.unifuse": ["torch", "__future__", "collections", "numpy"], "modelscope.models.cv.panorama_depth_estimation.networks.layers": ["torch", "numpy"], "modelscope.models.cv.panorama_depth_estimation.unifuse_model": ["torch", "os", "torchvision", "numpy"], "modelscope.models.cv.stream_yolo.utils.format": ["math"], "modelscope.models.cv.stream_yolo.utils.boxes": ["torch", "torchvision"], "modelscope.models.cv.stream_yolo.models.tal_head": ["torch"], "modelscope.models.cv.stream_yolo.models.dfp_pafpn": ["torch"], "modelscope.models.cv.stream_yolo.models.streamyolo": ["torch"], "modelscope.models.cv.stream_yolo.models.network_blocks": ["torch"], "modelscope.models.cv.stream_yolo.models.darknet": ["torch"], "modelscope.models.cv.stream_yolo.realtime_video_detector": ["numpy", "cv2", "torch", "logging", "json", "tqdm", "time", "os", "argparse"], "modelscope.models.cv.stream_yolo.exp.build": ["os", "sys"], "modelscope.models.cv.stream_yolo.exp.base_exp": ["torch", "abc"], "modelscope.models.cv.stream_yolo.exp.default.streamyolo": ["torch", "os", "sys"], "modelscope.models.cv.stream_yolo.exp.yolox_base": ["torch", "os", "random"], "modelscope.models.cv.stream_yolo.data.data_augment": ["cv2", "math", "random", "numpy"], "modelscope.models.cv.virual_tryon.sdafnet": ["torch", "random", "numpy"], "modelscope.models.cv.bad_image_detecting.bad_image_detecting": ["numpy", "torch", "torchvision", "os", "typing"], "modelscope.models.cv.human_reconstruction.Reconstruction": ["PIL", "skimage", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.models.cv.human_reconstruction.models.Surface_head": ["torch"], "modelscope.models.cv.human_reconstruction.models.Res_backbone": ["torch", "numpy"], "modelscope.models.cv.human_reconstruction.models.Embedding": ["torch"], "modelscope.models.cv.human_reconstruction.models.PixToMesh": ["torch"], "modelscope.models.cv.human_reconstruction.models.networks": ["torch", "functools", "numpy"], "modelscope.models.cv.human_reconstruction.models.human_segmenter": ["cv2", "tensorflow", "numpy"], "modelscope.models.cv.human_reconstruction.models.geometry": ["torch"], "modelscope.models.cv.human_reconstruction.models.detectors": ["torch", "numpy"], "modelscope.models.cv.human_reconstruction.utils": ["mcubes", "os", "torch", "numpy"], "modelscope.models.cv.image_driving_perception.preprocessor": ["cv2", "torch", "typing", "numpy"], "modelscope.models.cv.image_driving_perception.utils": ["torch", "time", "torchvision", "numpy"], "modelscope.models.cv.image_driving_perception.image_driving_percetion_model": ["numpy", "cv2", "torch", "os", "typing"], "modelscope.models.cv.video_streaming_perception.longshortnet.longshortnet": ["numpy", "cv2", "torch", "logging", "json", "tqdm", "time", "os", "argparse"], "modelscope.models.cv.video_streaming_perception.longshortnet.models.longshort_backbone_neck": ["torch"], "modelscope.models.cv.video_streaming_perception.longshortnet.models.longshort": ["torch"], "modelscope.models.cv.video_streaming_perception.longshortnet.models.dfp_pafpn_short": ["torch", "collections"], "modelscope.models.cv.video_streaming_perception.longshortnet.models.dfp_pafpn_long": ["torch", "collections"], "modelscope.models.cv.video_streaming_perception.longshortnet.exp.longshortnet_base": [], "modelscope.models.cv.image_paintbyexample.model": ["torch", "paint_ldm", "omegaconf", "os", "typing"], "modelscope.models.cv.image_inpainting.refinement": ["numpy", "cv2", "torch", "tqdm", "kornia"], "modelscope.models.cv.image_inpainting.model": ["torch", "os", "typing"], "modelscope.models.cv.image_inpainting.default": ["torch", "bisect"], "modelscope.models.cv.image_inpainting.modules.ade20k.resnet": ["torch", "os", "math"], "modelscope.models.cv.image_inpainting.modules.ade20k.base": ["torch", "os"], "modelscope.models.cv.image_inpainting.modules.adversarial": ["torch", "typing"], "modelscope.models.cv.image_inpainting.modules.perceptual": ["torch", "torchvision"], "modelscope.models.cv.image_inpainting.modules.inception": ["torch", "torchvision"], "modelscope.models.cv.image_inpainting.modules.ffc": ["torch", "numpy", "kornia"], "modelscope.models.cv.image_inpainting.modules.pix2pixhd": ["numpy", "torch", "logging", "functools", "collections"], "modelscope.models.cv.image_inpainting.modules.feature_matching": ["torch", "typing"], "modelscope.models.cv.image_inpainting.base": ["torch", "typing"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.coders.nms_free_coder": ["torch", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.util": ["torch", "mmdet3d", "numpy"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.match_costs.match_cost": ["torch", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.core.bbox.assigners.hungarian_assigner_3d": ["torch", "scipy", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.nuscenes_dataset": ["mmdet3d", "mmdet", "numpy"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.pipelines.loading": ["mmcv", "mmdet", "numpy"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.datasets.pipelines.transform_3d": ["PIL", "copy", "mmcv", "numpy", "torch", "mmdet3d", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.dense_heads.petrv2_dednhead": ["mmcv", "numpy", "torch", "math", "copy", "mmdet3d", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.dense_heads.depth_net": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.necks.cp_fpn": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.petr_transformer": ["copy", "warnings", "mmcv", "torch", "mmdet", "typing", "math"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.utils.positional_encoding": ["torch", "mmcv", "math"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.backbones.vovnet": ["torch", "mmdet", "mmcv", "collections"], "modelscope.models.cv.object_detection_3d.depe.mmdet3d_plugin.models.detectors.petr3d": ["mmcv", "numpy", "torch", "mmdet3d", "mmdet"], "modelscope.models.cv.object_detection_3d.depe.result_vis": ["numpy", "cv2", "json", "pyquaternion", "os", "mmdet3d", "pickle", "argparse"], "modelscope.models.cv.object_detection_3d.depe.depe_detect": ["torch", "os", "typing", "numpy"], "modelscope.models.cv.image_quality_assessment_mos.image_quality_assessment_mos": ["torch", "os", "typing"], "modelscope.models.cv.image_quality_assessment_mos.heads.simple_head": ["torch"], "modelscope.models.cv.image_quality_assessment_mos.backbones.resnet": ["torch", "os"], "modelscope.models.cv.image_quality_assessment_mos.censeo_ivqa_model": ["torch"], "modelscope.models.cv.image_debanding.rrdb.rrdb_image_debanding": ["torch", "os", "typing"], "modelscope.models.cv.image_restoration.demoire_models.nets": ["torch"], "modelscope.models.cv.image_restoration.image_restoration_model": ["cv2", "torch", "os", "numpy"], "modelscope.models.cv.cartoon.model_tf": ["tensorflow", "typing"], "modelscope.models.cv.cartoon.facelib.facer": ["cv2", "time", "numpy"], "modelscope.models.cv.cartoon.facelib.config": ["os", "easydict", "numpy"], "modelscope.models.cv.cartoon.facelib.LK.lk": ["numpy"], "modelscope.models.cv.cartoon.facelib.face_detector": ["cv2", "tensorflow", "numpy", "time"], "modelscope.models.cv.cartoon.facelib.face_landmark": ["cv2", "tensorflow", "numpy"], "modelscope.models.cv.cartoon.loss": ["tensorflow", "joblib", "skimage", "numpy", "scipy", "os"], "modelscope.models.cv.cartoon.utils": ["tensorflow", "random", "numpy", "cv2", "os"], "modelscope.models.cv.cartoon.mtcnn_pytorch.src.align_trans": ["cv2", "numpy"], "modelscope.models.cv.cartoon.mtcnn_pytorch.src.matlab_cp2tform": ["numpy"], "modelscope.models.cv.cartoon.network": ["tensorflow"], "modelscope.models.cv.vision_efficient_tuning.vision_efficient_tuning": ["torch", "os", "collections"], "modelscope.models.cv.vision_efficient_tuning.timm_vision_transformer": ["itertools", "torch", "logging", "functools", "math", "collections"], "modelscope.models.cv.vision_efficient_tuning.backbone": ["torch", "functools"], "modelscope.models.cv.vision_efficient_tuning.timm_weight_init": ["torch", "warnings", "math"], "modelscope.models.cv.vision_efficient_tuning.timm_helpers": ["torch", "typing", "itertools", "math"], "modelscope.models.cv.vision_efficient_tuning.petl": ["torch", "torchvision", "math", "collections"], "modelscope.models.cv.vision_efficient_tuning.model": ["torch", "typing"], "modelscope.models.cv.vision_efficient_tuning.head": ["torch"], "modelscope.models.cv.movie_scene_segmentation.utils.save_op": ["subprocess", "numpy", "cv2", "os", "tqdm"], "modelscope.models.cv.movie_scene_segmentation.utils.shot_encoder": ["torch", "typing"], "modelscope.models.cv.movie_scene_segmentation.utils.trn": ["torch", "transformers"], "modelscope.models.cv.movie_scene_segmentation.utils.head": ["torch"], "modelscope.models.cv.movie_scene_segmentation.model": ["PIL", "numpy", "torch", "einops", "tqdm", "math", "shotdetect_scenedetect_lgss", "torchvision", "os", "typing"], "modelscope.models.cv.movie_scene_segmentation.get_model": [], "modelscope.models.cv.video_summarization.pgl_sum": ["torch", "math"], "modelscope.models.cv.video_summarization.base_model": ["cv2", "torch", "numpy"], "modelscope.models.cv.video_summarization.summarizer": ["torch", "os", "typing", "numpy"], "modelscope.models.cv.video_summarization.kts.cpd_auto": ["numpy"], "modelscope.models.cv.video_summarization.kts.cpd_nonlin": ["numpy"], "modelscope.models.cv.table_recognition.lineless_table_process": ["cv2", "torch", "shapely", "numpy"], "modelscope.models.cv.table_recognition.model_lore": ["numpy", "torch", "math", "copy", "os", "typing"], "modelscope.models.cv.table_recognition.modules.lore_processor": ["copy", "numpy", "torch", "os", "math"], "modelscope.models.cv.table_recognition.modules.lore_detector": ["copy", "numpy", "torch", "os", "math"], "modelscope.models.cv.image_matching.quadtree_attention_model": ["numpy", "cv2", "torch", "pathlib", "os"], "modelscope.models.cv.image_matching.config.default": ["yacs"], "modelscope.models.cv.image_matching.utils.misc": ["yacs"], "modelscope.models.cv.image_matching.loftr_quadtree.loftr": ["torch", "einops"], "modelscope.models.cv.image_matching.loftr_quadtree.utils.position_encoding": ["torch", "math"], "modelscope.models.cv.image_matching.loftr_quadtree.utils.coarse_matching": ["torch", "einops"], "modelscope.models.cv.image_matching.loftr_quadtree.utils.fine_matching": ["torch", "math", "kornia"], "modelscope.models.cv.image_matching.loftr_quadtree.loftr_module.quadtree_attention": ["torch", "timm"], "modelscope.models.cv.image_matching.loftr_quadtree.loftr_module.fine_preprocess": ["torch", "einops"], "modelscope.models.cv.image_matching.loftr_quadtree.loftr_module.transformer": ["timm", "torch", "einops", "math", "copy"], "modelscope.models.cv.image_matching.loftr_quadtree.loftr_module.linear_attention": ["torch"], "modelscope.models.cv.image_matching.loftr_quadtree.backbone.resnet_fpn": ["torch"], "modelscope.models.cv.tinynas_detection.detector": ["torch", "os", "torchvision", "pickle"], "modelscope.models.cv.tinynas_detection.tinynas_detector": [], "modelscope.models.cv.tinynas_detection.damo.apis.detector_evaluater": ["torch", "os"], "modelscope.models.cv.tinynas_detection.damo.apis.detector_inference": ["torch", "os", "tqdm"], "modelscope.models.cv.tinynas_detection.damo.structures.boxlist_ops": ["torch"], "modelscope.models.cv.tinynas_detection.damo.structures.bounding_box": ["torch"], "modelscope.models.cv.tinynas_detection.damo.structures.image_list": ["torch", "__future__"], "modelscope.models.cv.tinynas_detection.damo.utils.model_utils": ["copy", "time", "torch", "thop", "math"], "modelscope.models.cv.tinynas_detection.damo.utils.boxes": ["torch", "torchvision", "numpy"], "modelscope.models.cv.tinynas_detection.damo.utils.scheduler": ["math"], "modelscope.models.cv.tinynas_detection.damo.augmentations.box_level_augs.box_level_augs": ["random", "numpy"], "modelscope.models.cv.tinynas_detection.damo.augmentations.box_level_augs.gaussian_maps": ["torch", "math"], "modelscope.models.cv.tinynas_detection.damo.augmentations.box_level_augs.color_augs": ["torch", "random"], "modelscope.models.cv.tinynas_detection.damo.augmentations.box_level_augs.geometric_augs": ["torch", "torchvision", "copy", "random"], "modelscope.models.cv.tinynas_detection.damo.augmentations.scale_aware_aug": ["copy"], "modelscope.models.cv.tinynas_detection.damo.detectors.detector": ["torch"], "modelscope.models.cv.tinynas_detection.damo.base_models.losses.distill_loss": ["torch"], "modelscope.models.cv.tinynas_detection.damo.base_models.losses.gfocal_loss": ["torch", "functools"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.ota_assigner": ["torch", "warnings"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.base_ops": ["torch", "math"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.repvgg_block": ["torch", "numpy"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.neck_ops": ["torch", "numpy"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.ops": ["torch", "numpy"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.utils": ["torch", "functools"], "modelscope.models.cv.tinynas_detection.damo.base_models.core.weight_init": ["torch", "numpy"], "modelscope.models.cv.tinynas_detection.damo.base_models.necks.giraffe_config": ["collections", "networkx"], "modelscope.models.cv.tinynas_detection.damo.base_models.necks.giraffe_fpn": ["timm", "numpy", "torch", "functools", "math", "collections", "typing"], "modelscope.models.cv.tinynas_detection.damo.base_models.necks.giraffe_fpn_btn": ["torch"], "modelscope.models.cv.tinynas_detection.damo.base_models.heads.gfocal_v2_tiny": ["torch", "functools", "numpy"], "modelscope.models.cv.tinynas_detection.damo.base_models.heads.zero_head": ["torch"], "modelscope.models.cv.tinynas_detection.damo.base_models.backbones.tinynas_csp": ["torch"], "modelscope.models.cv.tinynas_detection.damo.base_models.backbones.tinynas_res": ["torch"], "modelscope.models.cv.tinynas_detection.damo.base_models.backbones.darknet": ["torch"], "modelscope.models.cv.tinynas_detection.utils": ["shutil", "importlib", "os", "easydict", "tempfile", "sys"], "modelscope.models.cv.tinynas_detection.tinynas_damoyolo": [], "modelscope.models.cv.video_multi_object_tracking.utils.visualization": ["cv2", "numpy"], "modelscope.models.cv.video_multi_object_tracking.utils.utils": ["cv2", "torch", "numpy"], "modelscope.models.cv.video_multi_object_tracking.utils.kalman_filter": ["scipy", "numpy"], "modelscope.models.cv.video_multi_object_tracking.utils.image": ["cv2", "numpy"], "modelscope.models.cv.video_multi_object_tracking.tracker.matching": ["scipy", "lap", "numpy"], "modelscope.models.cv.video_multi_object_tracking.tracker.multitracker": ["torch", "collections", "numpy"], "modelscope.models.cv.video_multi_object_tracking.tracker.basetrack": ["collections", "numpy"], "modelscope.models.cv.video_multi_object_tracking.models.model": ["torch"], "modelscope.models.cv.video_multi_object_tracking.models.common": ["torch"], "modelscope.models.cv.video_multi_object_tracking.models.yolo": ["torch", "copy", "math"], "modelscope.models.cv.video_multi_object_tracking.models.decode": ["torch"], "modelscope.models.cv.nerf_recon_acc.nerf_recon_acc": ["numpy", "cv2", "torch", "tqdm", "time", "os", "glob"], "modelscope.models.cv.nerf_recon_acc.network.nerf": ["torch", "nerfacc", "numpy", "tinycudann"], "modelscope.models.cv.nerf_recon_acc.network.utils": ["mcubes", "numpy", "gc", "torch", "collections", "tinycudann"], "modelscope.models.cv.nerf_recon_acc.network.segmenter": ["tensorflow", "numpy"], "modelscope.models.cv.nerf_recon_acc.nerf_preprocess": ["subprocess", "tensorflow", "numpy", "cv2", "glob", "os", "typing"], "modelscope.models.cv.nerf_recon_acc.dataloader.nerf_dataset": ["PIL", "numpy", "json", "torch", "math", "torchvision", "os"], "modelscope.models.cv.nerf_recon_acc.dataloader.read_write_model": ["struct", "numpy", "os", "collections", "argparse"], "modelscope.models.cv.video_deinterlace.UNet_for_video_deinterlace": ["torch", "os", "copy", "typing"], "modelscope.models.cv.video_deinterlace.deinterlace_arch": ["torch"], "modelscope.models.cv.video_deinterlace.models.deep_fourier_upsampling": ["torch", "numpy"], "modelscope.models.cv.video_deinterlace.models.fre": ["torch"], "modelscope.models.cv.video_deinterlace.models.utils": ["torch"], "modelscope.models.cv.video_deinterlace.models.archs": ["torch", "numpy"], "modelscope.models.cv.video_deinterlace.models.enh": ["torch"], "modelscope.models.cv.cmdssl_video_embedding.resnet3d": ["torch"], "modelscope.models.cv.cmdssl_video_embedding.resnet2p1d": ["torch"], "modelscope.models.cv.cmdssl_video_embedding.c3d": ["torch"], "modelscope.models.cv.image_depth_estimation_bts.depth_estimation_bts_model": ["torch", "os"], "modelscope.models.cv.image_depth_estimation_bts.networks.decoder": ["torch"], "modelscope.models.cv.image_depth_estimation_bts.networks.bts_model": ["torch"], "modelscope.models.cv.image_depth_estimation_bts.networks.encoder": ["torch", "torchvision"], "modelscope.models.cv.image_depth_estimation_bts.networks.utils": ["torch", "math"], "modelscope.models.cv.motion_generation.model": [], "modelscope.models.cv.motion_generation.modules.rotation2xyz": ["torch"], "modelscope.models.cv.motion_generation.modules.respace": ["torch", "numpy"], "modelscope.models.cv.motion_generation.modules.smpl": ["contextlib", "numpy", "torch", "os", "smplx"], "modelscope.models.cv.motion_generation.modules.mdm": ["torch", "numpy", "clip"], "modelscope.models.cv.motion_generation.modules.gaussian_diffusion": ["copy", "numpy", "enum", "torch", "math"], "modelscope.models.cv.motion_generation.modules.cfg_sampler": ["torch", "copy"], "modelscope.models.cv.image_defrcn_fewshot.utils.requirements_check": ["importlib_metadata", "sys", "collections", "packaging", "importlib"], "modelscope.models.cv.image_defrcn_fewshot.utils.voc_register": ["numpy", "os", "fvcore", "xml", "detectron2"], "modelscope.models.cv.image_defrcn_fewshot.utils.configuration_mapper": ["detectron2"], "modelscope.models.cv.image_defrcn_fewshot.utils.model_surgery_op": ["torch", "os", "argparse"], "modelscope.models.cv.image_defrcn_fewshot.utils.coco_register": ["io", "contextlib", "pycocotools", "os", "fvcore", "detectron2"], "modelscope.models.cv.image_defrcn_fewshot.utils.register_data": [], "modelscope.models.cv.image_defrcn_fewshot.models.fast_rcnn": ["torch", "fvcore", "detectron2", "numpy"], "modelscope.models.cv.image_defrcn_fewshot.models.defrcn": ["torch", "os", "typing", "detectron2"], "modelscope.models.cv.image_defrcn_fewshot.models.resnet": ["torch", "torchvision"], "modelscope.models.cv.image_defrcn_fewshot.models.calibration_layer": ["cv2", "torch", "sklearn", "detectron2"], "modelscope.models.cv.image_defrcn_fewshot.models.gdl": ["torch"], "modelscope.models.cv.image_defrcn_fewshot.models.roi_heads": ["torch", "detectron2"], "modelscope.models.cv.image_defrcn_fewshot.defrcn_for_fewshot": ["torch", "os", "typing"], "modelscope.models.cv.image_defrcn_fewshot.evaluation.coco_evaluation": ["contextlib", "itertools", "numpy", "pycocotools", "json", "torch", "logging", "fvcore", "tabulate", "collections", "copy", "io", "os", "detectron2"], "modelscope.models.cv.image_defrcn_fewshot.evaluation.pascal_voc_evaluation": ["detectron2", "numpy", "os", "tempfile", "collections"], "modelscope.models.cv.image_defrcn_fewshot.evaluation.evaluator": ["torch", "logging", "datetime", "time", "detectron2"], "modelscope.models.cv.ocr_recognition.preprocessor": ["PIL", "numpy", "torch", "cv2", "os"], "modelscope.models.cv.ocr_recognition.model": ["torch", "os"], "modelscope.models.cv.ocr_recognition.modules.convnextvit": ["torch"], "modelscope.models.cv.ocr_recognition.modules.crnn": ["torch"], "modelscope.models.cv.ocr_recognition.modules.vitstr": ["torch", "logging", "functools", "copy", "__future__"], "modelscope.models.cv.ocr_recognition.modules.timm_tinyc": ["copy", "itertools", "torch", "logging", "functools", "math", "collections"], "modelscope.models.cv.ocr_recognition.modules.convnext": ["torch"], "modelscope.models.cv.video_panoptic_segmentation.track.quasi_dense_embed_tracker": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.video_panoptic_segmentation.head.mask": ["numpy", "pycocotools", "cv2", "torch", "__future__"], "modelscope.models.cv.video_panoptic_segmentation.head.kernel_updator": ["torch", "mmcv"], "modelscope.models.cv.video_panoptic_segmentation.head.kernel_update_head": ["torch", "mmcv", "mmdet", "numpy"], "modelscope.models.cv.video_panoptic_segmentation.head.semantic_fpn_wrapper": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.video_panoptic_segmentation.head.kernel_head": ["torch", "mmcv"], "modelscope.models.cv.video_panoptic_segmentation.head.kernel_iter_head": ["torch", "mmdet"], "modelscope.models.cv.video_panoptic_segmentation.head.track_heads": ["torch", "mmcv", "numpy"], "modelscope.models.cv.video_panoptic_segmentation.neck.fpn": ["torch", "mmcv"], "modelscope.models.cv.video_panoptic_segmentation.video_k_net": ["torch", "mmcv", "mmdet", "numpy"], "modelscope.models.cv.video_panoptic_segmentation.backbone.swin_checkpoint": ["pkgutil", "torchvision", "importlib", "torch", "os", "collections"], "modelscope.models.cv.video_panoptic_segmentation.backbone.swin_transformer": ["timm", "torch", "mmdet", "numpy"], "modelscope.models.cv.video_panoptic_segmentation.visualizer": ["cv2", "hashlib", "numpy"], "modelscope.models.cv.open_vocabulary_detection_vild.vild": ["tensorflow", "numpy", "torch", "scipy", "clip", "os", "typing"], "modelscope.models.cv.image_reid_person.pass_model": ["torch", "os", "enum"], "modelscope.models.cv.image_reid_person.transreid_model": ["torch", "functools", "itertools", "collections"], "modelscope.models.cv.image_face_fusion.facelib.align_trans": ["cv2", "numpy"], "modelscope.models.cv.image_face_fusion.facelib.matlab_cp2tform": ["numpy"], "modelscope.models.cv.image_face_fusion.network.aad_layer": ["torch"], "modelscope.models.cv.image_face_fusion.network.dense_motion": ["torch"], "modelscope.models.cv.image_face_fusion.network.model_irse": ["torch", "collections"], "modelscope.models.cv.image_face_fusion.network.bfm": ["torch", "os", "scipy", "numpy"], "modelscope.models.cv.image_face_fusion.network.ops": ["torch"], "modelscope.models.cv.image_face_fusion.network.aei_flow_net": ["torch"], "modelscope.models.cv.image_face_fusion.network.facerecon_model": ["torch", "os", "typing"], "modelscope.models.cv.image_face_fusion.image_face_fusion": ["PIL", "numpy", "torch", "cv2", "collections", "torchvision", "os", "typing"], "modelscope.models.cv.image_face_fusion.facegan.gan_wrap": ["PIL", "numpy", "torch", "cv2", "torchvision", "os"], "modelscope.models.cv.image_face_fusion.facegan.op.fused_act": ["torch"], "modelscope.models.cv.image_face_fusion.facegan.op.upfirdn2d": ["torch", "collections"], "modelscope.models.cv.image_face_fusion.facegan.op.conv2d_gradfix": ["torch", "warnings", "contextlib"], "modelscope.models.cv.image_face_fusion.facegan.model": ["torch", "math", "random"], "modelscope.models.cv.product_segmentation.net": ["torch"], "modelscope.models.cv.product_segmentation.seg_infer": ["PIL", "torch", "cv2", "numpy"], "modelscope.models.cv.controllable_image_generation.controlnet": ["PIL", "random", "numpy", "cv2", "torch", "einops", "tempfile", "sys", "math", "control_ldm", "os", "typing"], "modelscope.models.cv.controllable_image_generation.annotator.openpose.body": ["numpy", "cv2", "torch", "scipy", "math", "matplotlib", "time", "torchvision"], "modelscope.models.cv.controllable_image_generation.annotator.openpose.util": ["cv2", "math", "numpy", "matplotlib"], "modelscope.models.cv.controllable_image_generation.annotator.openpose.model": ["torch", "collections"], "modelscope.models.cv.controllable_image_generation.annotator.openpose.hand": ["skimage", "numpy", "cv2", "json", "torch", "scipy", "math", "matplotlib", "time"], "modelscope.models.cv.controllable_image_generation.annotator.annotator": ["mmcv", "mmseg", "numpy", "cv2", "torch", "einops", "os"], "modelscope.models.cv.controllable_image_generation.annotator.midas.api": ["cv2", "torch", "torchvision", "os"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.midas_net_custom": ["torch"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.transforms": ["cv2", "math", "numpy"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.midas_net": ["torch"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.dpt_depth": ["torch"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.base_model": ["torch"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.vit": ["timm", "torch", "types", "math"], "modelscope.models.cv.controllable_image_generation.annotator.midas.midas.blocks": ["torch"], "modelscope.models.cv.controllable_image_generation.annotator.midas.utils": ["numpy", "cv2", "torch", "re", "sys"], "modelscope.models.cv.controllable_image_generation.annotator.mlsd.utils": ["cv2", "os", "torch", "numpy"], "modelscope.models.cv.controllable_image_generation.annotator.mlsd.mbv2_mlsd_large": ["torch", "os", "sys"], "modelscope.models.cv.video_inpainting.inpainting": ["PIL", "time", "torchvision", "numpy", "cv2", "torch", "os"], "modelscope.models.cv.video_inpainting.inpainting_model": ["torch", "torchvision", "math", "numpy"], "modelscope.models.cv.image_mvs_depth_estimation.casmvs_model": ["numpy", "cv2", "torch", "os", "easydict"], "modelscope.models.cv.image_mvs_depth_estimation.colmap2mvsnet": ["multiprocessing", "struct", "shutil", "numpy", "cv2", "__future__", "functools", "collections", "os"], "modelscope.models.cv.image_mvs_depth_estimation.cas_mvsnet": ["torch"], "modelscope.models.cv.image_mvs_depth_estimation.utils": ["torch", "numpy", "torchvision", "random"], "modelscope.models.cv.image_mvs_depth_estimation.depth_filter": ["PIL", "numpy", "cv2", "plyfile", "os"], "modelscope.models.cv.image_mvs_depth_estimation.module": ["torch"], "modelscope.models.cv.image_mvs_depth_estimation.general_eval_dataset": ["PIL", "numpy", "cv2", "torch", "re", "os", "sys"], "modelscope.models.cv.image_binary_quant_classification.binary_quant_model": ["torch", "os", "collections"], "modelscope.models.cv.image_binary_quant_classification.bnext": ["torch", "numpy"], "modelscope.models.cv.skin_retouching.detection_model.detection_unet_in": ["torch"], "modelscope.models.cv.skin_retouching.detection_model.detection_module": ["torch"], "modelscope.models.cv.skin_retouching.retinaface.net": ["torch", "typing"], "modelscope.models.cv.skin_retouching.retinaface.prior_box": ["torch", "itertools", "math"], "modelscope.models.cv.skin_retouching.retinaface.box_utils": ["torch", "typing", "numpy"], "modelscope.models.cv.skin_retouching.retinaface.utils": ["pathlib", "numpy", "cv2", "torch", "re", "typing"], "modelscope.models.cv.skin_retouching.retinaface.network": ["torch", "torchvision", "typing"], "modelscope.models.cv.skin_retouching.retinaface.predict_single": ["albumentations", "numpy", "torch", "torchvision", "typing"], "modelscope.models.cv.skin_retouching.unet_deploy": ["torch", "warnings"], "modelscope.models.cv.skin_retouching.weights_init": ["torch"], "modelscope.models.cv.skin_retouching.utils": ["time", "numpy", "cv2", "torch", "einops", "typing"], "modelscope.models.cv.skin_retouching.inpainting_model.gconv": ["torch"], "modelscope.models.cv.skin_retouching.inpainting_model.inpainting_unet": ["torch"], "modelscope.models.cv.body_3d_keypoints.hdformer.directed_graph": ["typing", "sys", "numpy"], "modelscope.models.cv.body_3d_keypoints.hdformer.hdformer_detector": ["torch", "os", "typing", "numpy"], "modelscope.models.cv.body_3d_keypoints.hdformer.skeleton": ["numpy"], "modelscope.models.cv.body_3d_keypoints.hdformer.backbone": ["torch"], "modelscope.models.cv.body_3d_keypoints.hdformer.hdformer": ["torch"], "modelscope.models.cv.body_3d_keypoints.hdformer.block": ["torch", "einops", "math"], "modelscope.models.cv.body_3d_keypoints.cannonical_pose.canonical_pose_modules": ["torch"], "modelscope.models.cv.body_3d_keypoints.cannonical_pose.body_3d_pose": ["numpy", "torch", "logging", "os", "typing"], "modelscope.models.cv.action_recognition.models": ["torch"], "modelscope.models.cv.action_recognition.s3dg": ["torch"], "modelscope.models.cv.action_recognition.tada_convnext": ["torch", "math"], "modelscope.models.cv.action_recognition.temporal_patch_shift_transformer": ["timm", "numpy", "torch", "abc", "einops", "functools", "operator", "torchvision"], "modelscope.models.cv.video_frame_interpolation.interp_model.flow_reversal": ["torch"], "modelscope.models.cv.video_frame_interpolation.interp_model.UNet": ["torch"], "modelscope.models.cv.video_frame_interpolation.interp_model.IFNet_swin": ["torch", "timm", "numpy"], "modelscope.models.cv.video_frame_interpolation.interp_model.refinenet_arch": ["torch", "numpy"], "modelscope.models.cv.video_frame_interpolation.interp_model.transformer_layers": ["timm", "torch", "math", "functools", "sys"], "modelscope.models.cv.video_frame_interpolation.utils.utils": ["torch", "scipy", "numpy"], "modelscope.models.cv.video_frame_interpolation.utils.scene_change_detection": ["torch", "numpy"], "modelscope.models.cv.video_frame_interpolation.VFINet_for_video_frame_interpolation": ["torch", "os", "copy", "typing"], "modelscope.models.cv.video_frame_interpolation.VFINet_arch": ["torch"], "modelscope.models.cv.video_frame_interpolation.flow_model.update": ["torch"], "modelscope.models.cv.video_frame_interpolation.flow_model.corr": ["torch"], "modelscope.models.cv.video_frame_interpolation.flow_model.extractor": ["torch"], "modelscope.models.cv.video_frame_interpolation.flow_model.raft": ["torch", "numpy"], "modelscope.models.cv.object_detection.mmdet_ms.dense_heads.rpn_head": ["torch", "copy", "mmcv", "mmdet"], "modelscope.models.cv.object_detection.mmdet_ms.dense_heads.anchor_head": ["mmdet"], "modelscope.models.cv.object_detection.mmdet_ms.necks.fpn": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.object_detection.mmdet_ms.utils.convModule_norm": ["mmcv"], "modelscope.models.cv.object_detection.mmdet_ms.utils.checkpoint": ["mmcv", "torch", "tempfile", "collections", "time", "pkgutil", "io", "warnings", "importlib", "torchvision", "os"], "modelscope.models.cv.object_detection.mmdet_ms.backbones.vit": ["timm", "torch", "functools", "math", "mmdet"], "modelscope.models.cv.object_detection.mmdet_ms.roi_heads.bbox_heads.convfc_bbox_head": ["torch", "mmdet"], "modelscope.models.cv.object_detection.mmdet_ms.roi_heads.mask_heads.fcn_mask_head": ["mmcv", "numpy", "torch", "warnings", "mmdet"], "modelscope.models.cv.object_detection.mmdet_model": ["torch", "os", "numpy"], "modelscope.models.cv.pedestrian_attribute_recognition.model": ["torch", "os", "torchvision", "numpy"], "modelscope.models.cv.pointcloud_sceneflow_estimation.sf_rcp": ["torch"], "modelscope.models.cv.pointcloud_sceneflow_estimation.rcp_model": ["torch", "os", "numpy"], "modelscope.models.cv.pointcloud_sceneflow_estimation.common": ["torch", "math"], "modelscope.models.cv.pointcloud_sceneflow_estimation.pointnet2_utils": ["torch", "typing", "pointnet2_cuda"], "modelscope.models.cv.animal_recognition.splat": ["torch"], "modelscope.models.cv.animal_recognition.resnet": ["torch", "math"], "modelscope.models.cv.video_stabilization.utils.image_utils": ["torch", "skimage"], "modelscope.models.cv.video_stabilization.utils.RAFTUtils": ["torch", "scipy", "numpy"], "modelscope.models.cv.video_stabilization.utils.math_utils": ["torch", "numpy"], "modelscope.models.cv.video_stabilization.utils.ProjectionUtils": ["cv2", "torch", "math", "numpy"], "modelscope.models.cv.video_stabilization.utils.WarpUtils": ["torch", "tqdm", "numpy"], "modelscope.models.cv.video_stabilization.utils.MedianFilter": ["cv2", "torch", "math", "numpy"], "modelscope.models.cv.video_stabilization.utils.IterativeSmooth": ["torch", "os", "math", "numpy"], "modelscope.models.cv.video_stabilization.DUTRAFTStabilizer": ["numpy", "cv2", "torch", "tempfile", "sys", "math", "os", "typing"], "modelscope.models.cv.video_stabilization.DUT.config": ["__future__", "easydict"], "modelscope.models.cv.video_stabilization.DUT.rf_det_so": ["torch"], "modelscope.models.cv.video_stabilization.DUT.Smoother": ["torch", "math", "numpy"], "modelscope.models.cv.video_stabilization.DUT.DUT_raft": ["cv2", "torch", "sys", "numpy"], "modelscope.models.cv.video_stabilization.DUT.MotionPro": ["numpy", "cv2", "torch", "math", "os"], "modelscope.models.cv.video_stabilization.DUT.RAFT.update": ["torch"], "modelscope.models.cv.video_stabilization.DUT.RAFT.corr": ["torch", "alt_cuda_corr"], "modelscope.models.cv.video_stabilization.DUT.RAFT.extractor": ["torch"], "modelscope.models.cv.video_stabilization.DUT.RAFT.raft": ["torch", "numpy"], "modelscope.models.cv.video_stabilization.DUT.rf_det_module": ["torch"], "modelscope.models.cv.video_depth_estimation.dro_model": ["numpy", "cv2", "torch", "tqdm", "os", "glob"], "modelscope.models.cv.video_depth_estimation.utils.misc": ["termcolor"], "modelscope.models.cv.video_depth_estimation.utils.config": ["torch", "datetime", "yacs", "os"], "modelscope.models.cv.video_depth_estimation.utils.horovod": ["horovod"], "modelscope.models.cv.video_depth_estimation.utils.image_gt": ["PIL", "torch", "cv2", "functools"], "modelscope.models.cv.video_depth_estimation.utils.types": ["torch", "yacs", "numpy"], "modelscope.models.cv.video_depth_estimation.utils.depth": ["torch", "torchvision", "numpy", "matplotlib"], "modelscope.models.cv.video_depth_estimation.utils.load": ["torch", "logging", "collections", "inspect", "warnings", "importlib", "os"], "modelscope.models.cv.video_depth_estimation.utils.image": ["PIL", "numpy", "torch", "cv2", "functools", "os"], "modelscope.models.cv.video_depth_estimation.utils.augmentations": ["PIL", "random", "numpy", "cv2", "torchvision"], "modelscope.models.cv.video_depth_estimation.models.model_utils": [], "modelscope.models.cv.video_depth_estimation.models.sfm_model_mf": ["torch", "random"], "modelscope.models.cv.video_depth_estimation.models.model_checkpoint": ["torch", "os", "re", "numpy"], "modelscope.models.cv.video_depth_estimation.models.model_wrapper": ["random", "numpy", "torch", "collections", "importlib"], "modelscope.models.cv.video_depth_estimation.models.sup_model_mf": [], "modelscope.models.cv.video_depth_estimation.networks.layers.resnet.pose_decoder": ["torch", "__future__", "collections"], "modelscope.models.cv.video_depth_estimation.networks.layers.resnet.resnet_encoder": ["torch", "__future__", "torchvision", "numpy"], "modelscope.models.cv.video_depth_estimation.networks.layers.resnet.layers": ["torch", "__future__"], "modelscope.models.cv.video_depth_estimation.networks.layers.resnet.depth_decoder": ["torch", "__future__", "collections", "numpy"], "modelscope.models.cv.video_depth_estimation.networks.optim.update": ["torch"], "modelscope.models.cv.video_depth_estimation.networks.optim.extractor": ["torch", "torchvision"], "modelscope.models.cv.video_depth_estimation.networks.depth_pose.depth_pose_net": ["torch", "functools"], "modelscope.models.cv.video_depth_estimation.configs.default_config": ["os", "yacs"], "modelscope.models.cv.video_depth_estimation.geometry.pose_utils": ["torch", "numpy"], "modelscope.models.cv.video_depth_estimation.geometry.camera_utils": ["torch"], "modelscope.models.cv.video_depth_estimation.geometry.camera": ["torch", "functools"], "modelscope.models.cv.video_depth_estimation.geometry.pose": ["torch"], "modelscope.models.cv.vidt.backbone": ["timm", "numpy", "torch", "os", "math"], "modelscope.models.cv.vidt.model": ["torch", "os"], "modelscope.models.cv.vidt.head": ["copy", "torch", "math"], "modelscope.models.cv.vidt.fpn_fusion": ["torch"], "modelscope.models.cv.vidt.deformable_transformer": ["timm", "copy", "warnings", "torch", "math"], "modelscope.models.cv.face_human_hand_detection.shufflenetv2": ["torch"], "modelscope.models.cv.face_human_hand_detection.one_stage_detector": ["torch"], "modelscope.models.cv.face_human_hand_detection.nanodet_plus_head": ["numpy", "cv2", "torch", "math", "torchvision"], "modelscope.models.cv.face_human_hand_detection.det_infer": ["cv2", "torch", "numpy"], "modelscope.models.cv.face_human_hand_detection.ghost_pan": ["torch", "math"], "modelscope.models.cv.face_human_hand_detection.utils": ["torch"], "modelscope.models.cv.referring_video_object_segmentation.utils.misc": ["torch", "torchvision", "typing", "pickle"], "modelscope.models.cv.referring_video_object_segmentation.utils.mttr": ["torch", "einops"], "modelscope.models.cv.referring_video_object_segmentation.utils.multimodal_transformer": ["transformers", "torch", "einops", "copy", "os", "typing"], "modelscope.models.cv.referring_video_object_segmentation.utils.matcher": ["torch", "scipy"], "modelscope.models.cv.referring_video_object_segmentation.utils.backbone": ["torch", "torchvision", "einops"], "modelscope.models.cv.referring_video_object_segmentation.utils.position_encoding_2d": ["torch", "math"], "modelscope.models.cv.referring_video_object_segmentation.utils.postprocessing": ["torch", "einops", "numpy", "pycocotools"], "modelscope.models.cv.referring_video_object_segmentation.utils.criterion": ["torch"], "modelscope.models.cv.referring_video_object_segmentation.utils.swin_transformer": ["timm", "numpy", "torch", "einops", "functools", "operator"], "modelscope.models.cv.referring_video_object_segmentation.utils.segmentation": ["torch", "typing"], "modelscope.models.cv.referring_video_object_segmentation.model": ["torch", "os", "typing"], "modelscope.models.cv.hand_static.networks": ["torch", "os", "torchvision"], "modelscope.models.cv.hand_static.hand_model": ["PIL", "numpy", "torch", "cv2", "sys", "torchvision", "os"], "modelscope.models.cv.image_depth_estimation.newcrfs_model": ["torch", "os", "numpy"], "modelscope.models.cv.image_depth_estimation.networks.uper_crf_head": ["torch", "mmcv"], "modelscope.models.cv.image_depth_estimation.networks.newcrf_layers": ["torch", "timm", "numpy"], "modelscope.models.cv.image_depth_estimation.networks.newcrf_depth": ["torch"], "modelscope.models.cv.image_depth_estimation.networks.newcrf_utils": ["pkgutil", "warnings", "torchvision", "importlib", "torch", "os", "collections"], "modelscope.models.cv.image_depth_estimation.networks.swin_transformer": ["torch", "timm", "numpy"], "modelscope.models.cv.image_colorization.unet.unet": ["torch", "numpy"], "modelscope.models.cv.image_colorization.unet.utils": ["torch", "functools", "enum"], "modelscope.models.cv.image_colorization.ddcolor.ddcolor_for_image_colorization": ["numpy", "torch", "copy", "os", "typing"], "modelscope.models.cv.image_colorization.ddcolor.ddcolor": ["torch"], "modelscope.models.cv.image_colorization.ddcolor.utils.vgg": ["torch", "os", "torchvision", "collections"], "modelscope.models.cv.image_colorization.ddcolor.utils.unet": ["torch", "collections", "enum"], "modelscope.models.cv.image_colorization.ddcolor.utils.transformer_utils": ["torch", "typing"], "modelscope.models.cv.image_colorization.ddcolor.utils.position_encoding": ["torch", "math"], "modelscope.models.cv.image_colorization.ddcolor.utils.convnext": ["torch", "timm"], "modelscope.models.cv.image_colorization.ddcolor.loss": ["torch"], "modelscope.models.cv.face_detection.retinaface.detection": ["cv2", "torch", "numpy"], "modelscope.models.cv.face_detection.retinaface.models.retinaface": ["torch", "torchvision", "collections"], "modelscope.models.cv.face_detection.retinaface.models.net": ["torch", "time", "torchvision"], "modelscope.models.cv.face_detection.retinaface.utils": ["torch", "itertools", "math", "numpy"], "modelscope.models.cv.face_detection.mtcnn.models.detector": ["PIL", "torch", "os", "numpy"], "modelscope.models.cv.face_detection.mtcnn.models.get_nets": ["torch", "collections", "numpy"], "modelscope.models.cv.face_detection.mtcnn.models.box_utils": ["PIL", "numpy"], "modelscope.models.cv.face_detection.mtcnn.models.first_stage": ["PIL", "torch", "math", "numpy"], "modelscope.models.cv.face_detection.ulfd_slim.detection": ["cv2", "torch", "os", "numpy"], "modelscope.models.cv.face_detection.ulfd_slim.vision.ssd.predictor": ["torch"], "modelscope.models.cv.face_detection.ulfd_slim.vision.ssd.data_preprocessing": [], "modelscope.models.cv.face_detection.ulfd_slim.vision.ssd.ssd": ["torch", "typing", "collections", "numpy"], "modelscope.models.cv.face_detection.ulfd_slim.vision.ssd.mb_tiny_fd": ["torch"], "modelscope.models.cv.face_detection.ulfd_slim.vision.ssd.fd_config": ["numpy"], "modelscope.models.cv.face_detection.ulfd_slim.vision.transforms": ["cv2", "torch", "types", "numpy"], "modelscope.models.cv.face_detection.ulfd_slim.vision.box_utils": ["torch", "math"], "modelscope.models.cv.face_detection.ulfd_slim.vision.mb_tiny": ["torch"], "modelscope.models.cv.face_detection.peppa_pig_face.facer": ["cv2", "numpy"], "modelscope.models.cv.face_detection.peppa_pig_face.LK.lk": ["numpy"], "modelscope.models.cv.face_detection.peppa_pig_face.face_detector": ["cv2", "tensorflow", "numpy"], "modelscope.models.cv.face_detection.peppa_pig_face.face_landmark": ["cv2", "tensorflow", "numpy"], "modelscope.models.cv.face_detection.scrfd.scrfd_detect": ["numpy", "torch", "copy", "os", "typing"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.core.post_processing.bbox_nms": ["torch"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.core.bbox.transforms": ["torch", "numpy"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.retinaface": ["mmdet", "numpy"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.auto_augment": ["copy", "mmcv", "numpy", "cv2", "mmdet"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.transforms": ["mmcv", "mmdet", "numpy"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.loading": ["os", "mmdet", "numpy", "pycocotools"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.datasets.pipelines.formating": ["torch", "mmcv", "mmdet", "numpy"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.dense_heads.scrfd_head": ["torch", "mmcv", "mmdet", "numpy"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones.master_net": ["torch", "mmdet"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones.mobilenet": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.backbones.resnet": ["torch", "mmcv", "mmdet"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.scrfd": ["torch", "mmdet"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.single_stage": ["torch", "mmdet"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.base": ["mmcv", "numpy", "torch", "abc", "mmdet", "collections"], "modelscope.models.cv.face_detection.scrfd.mmdet_patch.models.detectors.tinymog": ["torch", "mmdet"], "modelscope.models.cv.face_detection.scrfd.tinymog_detect": ["torch", "os", "copy", "typing"], "modelscope.models.cv.face_detection.scrfd.preprocessor": ["PIL", "typing", "numpy"], "modelscope.models.cv.face_detection.scrfd.damofd_detect": ["torch", "os", "copy", "typing"], "modelscope.models.cv.face_detection.mogface.models.mogprednet": ["torch", "math"], "modelscope.models.cv.face_detection.mogface.models.resnet": ["torch"], "modelscope.models.cv.face_detection.mogface.models.utils": ["torch", "itertools", "math", "numpy"], "modelscope.models.cv.face_detection.mogface.models.detectors": ["cv2", "torch", "os", "numpy"], "modelscope.models.cv.face_detection.mogface.models.mogface": ["torch"], "modelscope.models.cv.robust_image_classification.easyrobust_model": ["torch", "os"], "modelscope.models.cv.image_semantic_segmentation.ddpm_segmentation_model": ["torch", "os", "typing", "ddpm_guided_diffusion"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.utils.data_process_func": ["mmcv", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.utils.builder": ["mmcv"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.utils.seg_func": ["torch", "warnings"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.backbone.beit_adapter": ["timm", "torch", "logging", "math", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.backbone.adapter_modules": ["timm", "torch", "logging", "functools", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.backbone.base.beit": ["timm", "mmcv", "torch", "mmdet", "functools", "math"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.segmentors.encoder_decoder_mask2former": ["torch", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.segmentors.base_segmentor": ["warnings", "mmcv", "numpy", "torch", "abc", "collections"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.decode_heads.mask2former_head_from_mmseg": ["torch", "copy", "mmcv", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.vit_adapter.models.decode_heads.base_decode_head": ["torch", "abc", "mmcv", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.semantic_seg_model": ["torch", "os", "numpy"], "modelscope.models.cv.image_semantic_segmentation.pan_merge.base_panoptic_fusion_head": ["abc", "mmcv", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.pan_merge.maskformer_semantic_head": ["torch", "mmdet"], "modelscope.models.cv.image_semantic_segmentation.ddpm_seg.data_util": [], "modelscope.models.cv.image_semantic_segmentation.ddpm_seg.utils": ["torch", "numpy", "PIL", "random"], "modelscope.models.cv.image_semantic_segmentation.ddpm_seg.feature_extractors": ["torch", "typing"], "modelscope.models.cv.image_semantic_segmentation.ddpm_seg.pixel_classifier": ["PIL", "numpy", "torch", "collections", "os"], "modelscope.models.cv.video_single_object_tracking.config.ostrack": ["easydict"], "modelscope.models.cv.video_single_object_tracking.utils.utils": ["numpy", "cv2", "torch", "typing", "math"], "modelscope.models.cv.video_single_object_tracking.tracker.procontext": ["torch", "copy"], "modelscope.models.cv.video_single_object_tracking.tracker.ostrack": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.layers.attn_blocks": ["torch", "math", "timm"], "modelscope.models.cv.video_single_object_tracking.models.layers.head": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.layers.patch_embed": ["torch", "timm"], "modelscope.models.cv.video_single_object_tracking.models.layers.attn": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.ostrack.base_backbone": ["torch", "timm"], "modelscope.models.cv.video_single_object_tracking.models.ostrack.ostrack": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.ostrack.utils": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.ostrack.vit_ce": ["timm", "torch", "functools"], "modelscope.models.cv.video_single_object_tracking.models.procontext.procontext": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.procontext.utils": ["torch"], "modelscope.models.cv.video_single_object_tracking.models.procontext.vit_ce": ["timm", "torch", "functools"], "modelscope.models.cv.text_driven_segmentation.lseg_net": ["torch", "numpy"], "modelscope.models.cv.text_driven_segmentation.lseg_blocks": ["torch"], "modelscope.models.cv.text_driven_segmentation.lseg_model": ["PIL", "numpy", "torch", "json", "os", "typing"], "modelscope.models.cv.text_driven_segmentation.model": ["torch", "typing", "collections", "numpy"], "modelscope.models.cv.text_driven_segmentation.lseg_vit": ["timm", "torch", "types", "math"], "modelscope.models.cv.text_driven_segmentation.clip": ["urllib", "hashlib", "PIL", "pkg_resources", "torch", "tqdm", "warnings", "torchvision", "os", "typing"], "modelscope.models.cv.text_driven_segmentation.simple_tokenizer": ["gzip", "os", "regex", "functools", "ftfy", "html"], "modelscope.models.cv.text_driven_segmentation.lseg_base": ["torch"], "modelscope.models.cv.crowd_counting.hrnet_aspp_relu": ["numpy", "torch", "logging", "functools", "os"], "modelscope.models.cv.crowd_counting.cc_model": ["torch", "os", "typing"], "modelscope.models.cv.image_panoptic_segmentation.panseg_model": ["torch", "os"], "modelscope.models.cv.face_emotion.emotion_model": ["torch", "os", "sys"], "modelscope.models.cv.face_emotion.emotion_infer": ["PIL", "torch", "torchvision"], "modelscope.models.cv.face_emotion.face_alignment.face_align": ["PIL", "numpy", "cv2", "sys", "os"], "modelscope.models.cv.face_emotion.face_alignment.face": ["cv2", "os", "numpy", "tensorflow"], "modelscope.models.cv.face_emotion.efficient.model": ["torch"], "modelscope.models.cv.face_emotion.efficient.utils": ["functools", "torch", "re", "math", "collections"], "modelscope.models.cv.video_super_resolution.real_basicvsr_net": ["torch"], "modelscope.models.cv.video_super_resolution.msrresnet_lite_model": ["torch", "os", "functools", "typing"], "modelscope.models.cv.video_super_resolution.common": ["torch"], "modelscope.models.cv.video_super_resolution.real_basicvsr_for_video_super_resolution": ["torch", "os", "typing"], "modelscope.models.cv.video_super_resolution.basicvsr_net": ["torch"], "modelscope.models.cv.face_attribute_recognition.fair_face.face_attribute_recognition": ["PIL", "numpy", "torch", "cv2", "torchvision", "os"], "modelscope.models.cv.image_denoise.nafnet.NAFNet_arch": ["torch", "numpy"], "modelscope.models.cv.image_denoise.nafnet.arch_util": ["torch"], "modelscope.models.cv.image_denoise.nafnet_for_image_denoise": ["torch", "os", "typing"], "modelscope.models.cv.image_classification.mmcls_model": ["os"], "modelscope.models.cv.image_classification.utils": ["itertools", "numpy", "torch", "os", "mmcls", "math", "collections"], "modelscope.models.cv.image_classification.backbones.beit_v2": ["itertools", "mmcv", "torch", "einops", "functools", "mmcls", "math", "collections", "warnings", "os", "typing"], "modelscope.models.cv.image_classification.backbones.nextvit": ["itertools", "mmcv", "torch", "einops", "functools", "mmcls", "math", "collections", "warnings", "os", "typing"], "modelscope.models.cv.image_classification.resnet50_cc": ["torch", "math", "collections", "torchvision", "os"], "modelscope.models.cv.image_color_enhance.csrnet": ["torch", "functools", "math"], "modelscope.models.cv.image_color_enhance.deeplpf.deeplpfnet": ["torch", "math", "matplotlib"], "modelscope.models.cv.image_color_enhance.deeplpf.deeplpf_image_color_enhance": ["torch", "os", "typing"], "modelscope.models.cv.image_color_enhance.image_color_enhance": ["torch", "os", "typing"], "modelscope.models.cv.image_color_enhance.adaint.adaint": ["numbers", "torch", "torchvision", "os", "typing"], "modelscope.models.base.base_torch_head": ["torch", "typing"], "modelscope.models.base.base_model": ["os", "abc", "typing"], "modelscope.models.base.base_torch_model": ["torch", "functools", "copy", "packaging", "os", "typing"], "modelscope.models.base.base_head": ["abc", "typing"], "modelscope.metrics.image_quality_assessment_degradation_metric": ["numpy", "cv2", "torch", "scipy", "tempfile", "sys", "collections", "tqdm", "os", "typing"], "modelscope.metrics.prediction_saving_wrapper": ["typing", "sklearn", "numpy"], "modelscope.metrics.video_stabilization_metric": ["numpy", "cv2", "tqdm", "tempfile", "sys", "os", "typing"], "modelscope.metrics.ppl_metric": ["torch", "typing", "math", "numpy"], "modelscope.metrics.inbatch_recall_metric": ["torch", "typing", "numpy"], "modelscope.metrics.loss_metric": ["typing", "sklearn", "numpy"], "modelscope.metrics.ocr_recognition_metric": ["torch", "edit_distance", "typing", "numpy"], "modelscope.metrics.map_metric": ["typing", "numpy"], "modelscope.metrics.image_colorization_metric": ["numpy", "cv2", "torch", "scipy", "torchvision", "typing"], "modelscope.metrics.sequence_classification_metric": ["typing", "sklearn", "numpy"], "modelscope.metrics.audio_noise_metric": ["typing"], "modelscope.metrics.translation_evaluation_metric": ["pandas", "typing", "importlib"], "modelscope.metrics.video_frame_interpolation_metric": ["numpy", "torch", "lpips", "math", "typing"], "modelscope.metrics.image_inpainting_metric": ["torch", "scipy", "typing", "numpy"], "modelscope.metrics.image_denoise_metric": ["cv2", "torch", "typing", "numpy"], "modelscope.metrics.referring_video_object_segmentation_metric": ["numpy", "pycocotools", "torch", "tqdm", "typing"], "modelscope.metrics.token_classification_metric": ["typing", "numpy", "importlib"], "modelscope.metrics.video_summarization_metric": ["typing", "numpy"], "modelscope.metrics.builder": ["typing"], "modelscope.metrics.image_quality_assessment_mos_metric": ["numpy", "cv2", "torch", "scipy", "tempfile", "sys", "tqdm", "os", "typing"], "modelscope.metrics.ned_metric": ["typing", "numpy"], "modelscope.metrics.text_ranking_metric": ["typing", "numpy"], "modelscope.metrics.movie_scene_segmentation_metric": ["typing", "numpy"], "modelscope.metrics.accuracy_metric": ["typing", "numpy"], "modelscope.metrics.image_instance_segmentation_metric": ["numpy", "pycocotools", "tempfile", "collections", "os", "typing"], "modelscope.metrics.video_super_resolution_metric.metric_util": ["numpy"], "modelscope.metrics.video_super_resolution_metric.video_super_resolution_metric": ["typing", "numpy"], "modelscope.metrics.video_super_resolution_metric.niqe": ["cv2", "scipy", "math", "numpy"], "modelscope.metrics.video_super_resolution_metric.matlab_functions": ["torch", "math", "numpy"], "modelscope.metrics.ciderD.ciderD": ["__future__"], "modelscope.metrics.ciderD.ciderD_scorer": ["pdb", "numpy", "__future__", "math", "collections", "copy", "six", "os"], "modelscope.metrics.action_detection_evaluator": ["numpy", "logging", "scipy", "pandas", "collections", "copy", "os", "detectron2"], "modelscope.metrics.image_color_enhance_metric": ["cv2", "typing", "numpy"], "modelscope.metrics.image_portrait_enhancement_metric": ["cv2", "typing", "numpy"], "modelscope.metrics.bleu_metric": ["typing", "itertools", "sacrebleu"], "modelscope.metrics.text_generation_metric": ["nltk", "rouge", "typing"], "modelscope.metrics.base": ["abc", "typing"], "modelscope.pipelines.util": ["os", "typing"], "modelscope.pipelines.science.protein_structure_pipeline": ["numpy", "json", "torch", "unicore", "time", "os", "typing"], "modelscope.pipelines.builder": ["os", "typing"], "modelscope.pipelines.pipeline_template": ["typing", "numpy"], "modelscope.pipelines.audio.timestamp_pipeline": ["json", "typing", "yaml", "os", "funasr"], "modelscope.pipelines.audio.kws_farfield_pipeline": ["numpy", "wave", "soundfile", "io", "typing"], "modelscope.pipelines.audio.speaker_verification_pipeline": ["os", "typing", "shutil", "yaml"], "modelscope.pipelines.audio.inverse_text_processing_pipeline": ["os", "typing", "shutil", "yaml"], "modelscope.pipelines.audio.separation_pipeline": ["numpy", "torch", "soundfile", "io", "typing"], "modelscope.pipelines.audio.voice_activity_detection_pipeline": ["json", "typing", "yaml", "os", "funasr"], "modelscope.pipelines.audio.text_to_speech_pipeline": ["typing", "numpy"], "modelscope.pipelines.audio.kws_kwsbp_pipeline": ["json", "os", "typing"], "modelscope.pipelines.audio.linear_aec_pipeline": ["numpy", "torch", "scipy", "yaml", "importlib", "os", "typing"], "modelscope.pipelines.audio.ans_pipeline": ["numpy", "torch", "librosa", "soundfile", "io", "typing"], "modelscope.pipelines.audio.speaker_verification_eres2net_pipeline": ["torch", "io", "typing", "soundfile"], "modelscope.pipelines.audio.lm_infer_pipeline": ["os", "typing"], "modelscope.pipelines.audio.ans_dfsmn_pipeline": ["numpy", "torch", "sys", "collections", "librosa", "soundfile", "io", "os", "typing"], "modelscope.pipelines.audio.asr_inference_pipeline": ["json", "os", "typing", "yaml"], "modelscope.pipelines.audio.speaker_diarization_pipeline": ["shutil", "numpy", "json", "yaml", "os", "typing"], "modelscope.pipelines.audio.speaker_verification_rdino_pipeline": ["torch", "io", "typing", "soundfile"], "modelscope.pipelines.audio.punctuation_processing_pipeline": ["os", "typing", "shutil", "yaml"], "modelscope.pipelines.audio.speaker_verification_light_pipeline": ["torch", "io", "typing", "soundfile"], "modelscope.pipelines.audio.speaker_change_locating_pipeline": ["numpy", "torch", "soundfile", "io", "typing"], "modelscope.pipelines.audio.asr_wenet_inference_pipeline": ["typing"], "modelscope.pipelines.multi_modal.asr_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.image_captioning_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.multi_modal.text_to_video_synthesis_pipeline": ["cv2", "torch", "einops", "tempfile", "os", "typing"], "modelscope.pipelines.multi_modal.mgeo_ranking_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.multi_modal.generative_multi_modal_embedding_pipeline": ["typing"], "modelscope.pipelines.multi_modal.multimodal_dialogue_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.text_to_image_synthesis_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.text2sql_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.visual_entailment_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.disco_guided_diffusion_pipeline.disco_guided_diffusion": ["PIL", "gc", "numpy", "cv2", "json", "torch", "math", "clip", "importlib", "torchvision", "os"], "modelscope.pipelines.multi_modal.disco_guided_diffusion_pipeline.utils": ["fractions", "warnings", "numpy", "torch", "math"], "modelscope.pipelines.multi_modal.visual_question_answering_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.video_question_answering_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.video_captioning_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.video_multi_modal_embedding_pipeline": ["typing"], "modelscope.pipelines.multi_modal.efficient_diffusion_tuning_pipeline": ["PIL", "numpy", "torch", "cv2", "torchvision", "typing"], "modelscope.pipelines.multi_modal.team_multi_modal_similarity_pipeline": ["typing"], "modelscope.pipelines.multi_modal.diffusers_wrapped.diffusers_pipeline": ["os", "typing"], "modelscope.pipelines.multi_modal.diffusers_wrapped.stable_diffusion.stable_diffusion_pipeline": ["PIL", "numpy", "torch", "cv2", "diffusers", "typing"], "modelscope.pipelines.multi_modal.diffusers_wrapped.stable_diffusion.chinese_stable_diffusion_pipeline": ["PIL", "transformers", "numpy", "cv2", "torch", "diffusers", "typing"], "modelscope.pipelines.multi_modal.multi_modal_embedding_pipeline": ["typing"], "modelscope.pipelines.multi_modal.ocr_recognition_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.document_vl_embedding_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.image_text_retrieval_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.gridvlp_pipeline": ["PIL", "transformers", "numpy", "json", "torch", "time", "os", "traceback", "typing"], "modelscope.pipelines.multi_modal.visual_grounding_pipeline": ["torch", "typing"], "modelscope.pipelines.multi_modal.soonet_video_temporal_grounding_pipeline": ["numpy", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.multi_modal.sudoku_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.translation_evaluation_pipeline": ["numpy", "enum", "torch", "os", "typing"], "modelscope.pipelines.nlp.glm130b_text_generation_pipeline": ["typing"], "modelscope.pipelines.nlp.faq_question_answering_pipeline": ["typing"], "modelscope.pipelines.nlp.document_grounded_dialog_generate_pipeline": ["typing"], "modelscope.pipelines.nlp.automatic_post_editing_pipeline": ["tensorflow", "sacremoses", "numpy", "jieba", "sentencepiece", "os", "typing", "html"], "modelscope.pipelines.nlp.named_entity_recognition_pipeline": ["typing"], "modelscope.pipelines.nlp.interactive_translation_pipeline": ["tensorflow", "sacremoses", "numpy", "jieba", "subword_nmt", "os", "typing"], "modelscope.pipelines.nlp.summarization_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.document_grounded_dialog_retrieval_pipeline": ["numpy", "json", "faiss", "os", "typing"], "modelscope.pipelines.nlp.fasttext_text_classification_pipeline": ["numpy", "fasttext", "sentencepiece", "os", "typing"], "modelscope.pipelines.nlp.word_alignment_pipeline": ["typing", "numpy"], "modelscope.pipelines.nlp.feature_extraction_pipeline": ["torch", "os", "typing"], "modelscope.pipelines.nlp.text_ranking_pipeline": ["typing", "numpy"], "modelscope.pipelines.nlp.fid_dialogue_pipeline": ["torch", "re", "typing"], "modelscope.pipelines.nlp.text_classification_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.nlp.codegeex_code_generation_pipeline": ["typing"], "modelscope.pipelines.nlp.translation_quality_estimation_pipeline": ["transformers", "torch", "io", "os", "typing"], "modelscope.pipelines.nlp.fill_mask_pipeline": ["typing", "numpy"], "modelscope.pipelines.nlp.distributed_plug_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.conversational_text_to_sql_pipeline": ["torch", "typing", "text2sql_lgesql"], "modelscope.pipelines.nlp.distributed_gpt3_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.information_extraction_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.table_question_answering_pipeline": ["transformers", "json", "torch", "os", "typing"], "modelscope.pipelines.nlp.user_satisfaction_estimation_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.nlp.dialog_modeling_pipeline": ["typing"], "modelscope.pipelines.nlp.canmt_translation_pipeline": ["torch", "os", "sacremoses", "typing"], "modelscope.pipelines.nlp.word_segmentation_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.document_segmentation_pipeline": ["datasets", "numpy", "torch", "re", "typing"], "modelscope.pipelines.nlp.distributed_gpt_moe_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.extractive_summarization_pipeline": ["datasets", "numpy", "torch", "re", "typing"], "modelscope.pipelines.nlp.text_error_correction_pipeline": ["torch", "typing"], "modelscope.pipelines.nlp.dialog_state_tracking_pipeline": ["typing"], "modelscope.pipelines.nlp.mglm_text_summarization_pipeline": ["os", "typing"], "modelscope.pipelines.nlp.translation_pipeline": ["tensorflow", "sacremoses", "numpy", "jieba", "subword_nmt", "os", "typing"], "modelscope.pipelines.nlp.siamese_uie_pipeline": ["json", "torch", "logging", "scipy", "tqdm", "math", "copy", "time", "pathlib", "os", "typing"], "modelscope.pipelines.nlp.dialog_intent_prediction_pipeline": ["typing"], "modelscope.pipelines.nlp.sentence_embedding_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.nlp.document_grounded_dialog_rerank_pipeline": ["ujson", "transformers", "random", "numpy", "torch", "re", "sys", "collections", "time", "os", "typing", "pprint"], "modelscope.pipelines.nlp.zero_shot_classification_pipeline": ["torch", "scipy", "typing"], "modelscope.pipelines.nlp.text_generation_pipeline": ["torch", "os", "typing"], "modelscope.pipelines.nlp.language_identification_pipline": ["tensorflow", "numpy", "re", "os", "typing"], "modelscope.pipelines.nlp.token_classification_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.nlp.codegeex_code_translation_pipeline": ["typing"], "modelscope.pipelines.cv.bad_image_detecting_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.cv.image_cartoon_pipeline": ["tensorflow", "numpy", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_to_image_generate_pipeline": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.facial_expression_recognition_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.retina_face_detection_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_style_transfer_pipeline": ["cv2", "os", "typing", "numpy"], "modelscope.pipelines.cv.image_face_fusion_pipeline": ["typing", "numpy"], "modelscope.pipelines.cv.ulfd_face_detection_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.pedestrian_attribute_recognition_pipeline": ["PIL", "numpy", "cv2", "json", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.image_denoise_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.vop_retrieval_se_pipeline": ["numpy", "torch", "gzip", "os", "typing"], "modelscope.pipelines.cv.image_matting_pipeline": ["tensorflow", "numpy", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_deblur_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.video_human_matting_pipeline": ["numpy", "cv2", "torch", "moviepy", "os", "typing"], "modelscope.pipelines.cv.live_category_pipeline": ["PIL", "numpy", "torch", "decord", "torchvision", "os", "typing"], "modelscope.pipelines.cv.image_structured_model_probing_pipeline": ["mmcv", "numpy", "torch", "math", "torchvision", "os", "typing"], "modelscope.pipelines.cv.face_quality_assessment_pipeline": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "modelscope.pipelines.cv.face_processing_base_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_portrait_enhancement_pipeline": ["PIL", "numpy", "cv2", "torch", "scipy", "math", "typing"], "modelscope.pipelines.cv.image_color_enhance_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.vision_efficient_tuning_pipeline": ["torch", "torchvision", "typing", "numpy"], "modelscope.pipelines.cv.tbs_detection_utils.utils": ["PIL", "numpy", "torch", "__future__", "colorsys", "pandas", "matplotlib", "torchvision", "os"], "modelscope.pipelines.cv.video_object_segmentation_pipeline": ["PIL", "numpy", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.face_detection_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.body_3d_keypoints_pipeline": ["numpy", "cv2", "torch", "tempfile", "matplotlib", "datetime", "mpl_toolkits", "os", "typing"], "modelscope.pipelines.cv.image_paintbyexample_pipeline": ["PIL", "numpy", "cv2", "torch", "einops", "torchvision", "typing"], "modelscope.pipelines.cv.face_recognition_ood_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_classification_pipeline": ["torch", "typing", "numpy"], "modelscope.pipelines.cv.card_detection_pipeline": ["typing"], "modelscope.pipelines.cv.table_recognition_pipeline": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "modelscope.pipelines.cv.image_to_image_translation_pipeline": ["PIL", "numpy", "cv2", "torch", "sys", "io", "torchvision", "os", "typing"], "modelscope.pipelines.cv.face_attribute_recognition_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_debanding_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.video_instance_segmentation_pipeline": ["mmcv", "numpy", "cv2", "torch", "tqdm", "os", "typing"], "modelscope.pipelines.cv.tinynas_classification_pipeline": ["torch", "math", "torchvision", "os", "typing"], "modelscope.pipelines.cv.human_reconstruction_pipeline": ["trimesh", "shutil", "numpy", "torch", "os", "typing"], "modelscope.pipelines.cv.video_multi_object_tracking_pipeline": ["torch", "os", "typing"], "modelscope.pipelines.cv.controllable_image_generation_pipeline": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "glob", "os", "typing"], "modelscope.pipelines.cv.image_defrcn_fewshot_pipeline": ["torch", "os", "typing", "numpy"], "modelscope.pipelines.cv.ddpm_semantic_segmentation_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.content_check_pipeline": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.vop_retrieval_pipeline": ["random", "numpy", "torch", "tqdm", "math", "collections", "gzip", "os", "typing", "pickle"], "modelscope.pipelines.cv.object_detection_3d_pipeline": ["PIL", "numpy", "cv2", "torch", "tempfile", "os", "typing"], "modelscope.pipelines.cv.lineless_table_recognition_pipeline": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "modelscope.pipelines.cv.cmdssl_video_embedding_pipeline": ["PIL", "numpy", "torch", "decord", "torchvision", "os", "typing"], "modelscope.pipelines.cv.tinynas_detection_pipeline": ["typing"], "modelscope.pipelines.cv.video_deinterlace_pipeline": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "torchvision", "os", "typing"], "modelscope.pipelines.cv.image_open_vocabulary_detection_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.language_guided_video_summarization_pipeline": ["PIL", "shutil", "random", "numpy", "cv2", "torch", "tempfile", "clip", "os", "typing"], "modelscope.pipelines.cv.body_2d_keypoints_pipeline": ["PIL", "numpy", "cv2", "json", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.face_human_hand_detection_pipeline": ["typing", "numpy"], "modelscope.pipelines.cv.hicossl_video_embedding_pipeline": ["torch", "os", "typing", "math"], "modelscope.pipelines.cv.face_recognition_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_body_reshaping_pipeline": ["typing"], "modelscope.pipelines.cv.image_inpainting_pipeline": ["PIL", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.face_recognition_onnx_fm_pipeline": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "modelscope.pipelines.cv.image_driving_perception_pipeline": ["cv2", "os", "typing", "numpy"], "modelscope.pipelines.cv.video_stabilization_pipeline": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "glob", "os", "typing"], "modelscope.pipelines.cv.indoor_layout_estimation_pipeline": ["cv2", "typing", "numpy"], "modelscope.pipelines.cv.ddcolor_image_colorization_pipeline": ["numpy", "cv2", "torch", "torchvision", "typing"], "modelscope.pipelines.cv.face_emotion_pipeline": ["typing", "numpy"], "modelscope.pipelines.cv.mtcnn_face_detection_pipeline": ["torch", "os", "typing"], "modelscope.pipelines.cv.nerf_recon_acc_pipeline": ["typing"], "modelscope.pipelines.cv.image_bts_depth_estimation_pipeline": ["albumentations", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.facial_landmark_confidence_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.face_reconstruction_pipeline": ["PIL", "tensorflow", "shutil", "numpy", "cv2", "torch", "scipy", "io", "face_alignment", "os", "typing"], "modelscope.pipelines.cv.mog_face_detection_pipeline": ["os", "typing", "numpy"], "modelscope.pipelines.cv.skin_retouching_pipeline": ["PIL", "tensorflow", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.vision_middleware_pipeline": ["mmcv", "numpy", "torch", "math", "torchvision", "os", "typing"], "modelscope.pipelines.cv.face_liveness_ir_pipeline": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "modelscope.pipelines.cv.image_detection_pipeline": ["typing", "numpy"], "modelscope.pipelines.cv.realtime_video_object_detection_pipeline": ["PIL", "numpy", "cv2", "json", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.video_panoptic_segmentation_pipeline": ["mmcv", "numpy", "cv2", "torch", "tqdm", "os", "typing"], "modelscope.pipelines.cv.action_detection_pipeline": ["os", "typing", "math"], "modelscope.pipelines.cv.product_segmentation_pipeline": ["typing", "numpy"], "modelscope.pipelines.cv.tbs_detection_pipeline": ["PIL", "numpy", "cv2", "torch", "colorsys", "os", "typing"], "modelscope.pipelines.cv.image_matching_pipeline": ["PIL", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.video_category_pipeline": ["PIL", "numpy", "json", "torch", "decord", "torchvision", "os", "typing"], "modelscope.pipelines.cv.hand_static_pipeline": ["typing", "numpy"], "modelscope.pipelines.cv.animal_recognition_pipeline": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.pointcloud_sceneflow_estimation_pipeline": ["torch", "typing", "plyfile", "numpy"], "modelscope.pipelines.cv.image_instance_segmentation_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.video_frame_interpolation_pipeline": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "glob", "torchvision", "os", "typing"], "modelscope.pipelines.cv.image_quality_assessment_mos_pipeline": ["numpy", "cv2", "torch", "tempfile", "math", "torchvision", "typing"], "modelscope.pipelines.cv.video_summarization_pipeline": ["numpy", "cv2", "torch", "tqdm", "os", "typing"], "modelscope.pipelines.cv.panorama_depth_estimation_pipeline": ["PIL", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.fast_instance_segmentation_pipeline": ["torch", "torchvision", "typing", "numpy"], "modelscope.pipelines.cv.vidt_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.image_skychange_pipeline": ["PIL", "pdb", "numpy", "cv2", "time", "typing"], "modelscope.pipelines.cv.image_quality_assessment_man_pipeline": ["numpy", "cv2", "torch", "tempfile", "math", "torchvision", "typing"], "modelscope.pipelines.cv.image_restoration_pipeline": ["typing"], "modelscope.pipelines.cv.video_inpainting_pipeline": ["typing"], "modelscope.pipelines.cv.face_image_generation_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.video_super_resolution_pipeline": ["subprocess", "numpy", "cv2", "torch", "tempfile", "math", "torchvision", "os", "typing"], "modelscope.pipelines.cv.referring_video_object_segmentation_pipeline": ["PIL", "numpy", "torch", "einops", "tqdm", "tempfile", "moviepy", "torchvision", "typing"], "modelscope.pipelines.cv.virtual_try_on_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.ocr_recognition_pipeline": [], "modelscope.pipelines.cv.ocr_detection_pipeline": ["tensorflow", "tf_slim", "numpy", "cv2", "torch", "math", "os", "typing"], "modelscope.pipelines.cv.movie_scene_segmentation_pipeline": ["torch", "typing"], "modelscope.pipelines.cv.maskdino_instance_segmentation_pipeline": ["torch", "torchvision", "typing"], "modelscope.pipelines.cv.video_colorization_pipeline": ["PIL", "subprocess", "numpy", "cv2", "torch", "tempfile", "torchvision", "os", "typing"], "modelscope.pipelines.cv.image_human_parsing_pipeline": ["torch", "torchvision", "typing", "numpy"], "modelscope.pipelines.cv.face_liveness_xc_pipeline": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "modelscope.pipelines.cv.crowd_counting_pipeline": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "modelscope.pipelines.cv.video_depth_estimation_pipeline": ["typing"], "modelscope.pipelines.cv.image_colorization_pipeline": ["PIL", "numpy", "torch", "cv2", "torchvision", "typing"], "modelscope.pipelines.cv.arc_face_recognition_pipeline": ["PIL", "numpy", "torch", "cv2", "os", "typing"], "modelscope.pipelines.cv.image_quality_assessment_degradation_pipeline": ["numpy", "cv2", "torch", "tempfile", "math", "torchvision", "typing"], "modelscope.pipelines.cv.ocr_utils.model_convnext_transformer": ["torch"], "modelscope.pipelines.cv.ocr_utils.model_resnet18_half": ["torch", "os"], "modelscope.pipelines.cv.ocr_utils.resnet18_v1": ["tensorflow", "tf_slim"], "modelscope.pipelines.cv.ocr_utils.model_dla34": ["torch", "os", "math", "numpy"], "modelscope.pipelines.cv.ocr_utils.ocr_modules.vitstr": ["torch", "logging", "functools", "copy", "__future__"], "modelscope.pipelines.cv.ocr_utils.ocr_modules.timm_tinyc": ["copy", "itertools", "torch", "logging", "functools", "math", "collections"], "modelscope.pipelines.cv.ocr_utils.ocr_modules.convnext": ["torch"], "modelscope.pipelines.cv.ocr_utils.table_process": ["copy", "numpy", "random", "cv2", "torch", "math"], "modelscope.pipelines.cv.ocr_utils.resnet_utils": ["tensorflow", "collections", "tf_slim"], "modelscope.pipelines.cv.ocr_utils.ops": ["tensorflow", "shutil", "numpy", "cv2", "absl", "sys", "math", "os", "uuid"], "modelscope.pipelines.cv.ocr_utils.utils": ["cv2", "pyclipper", "shapely", "numpy"], "modelscope.pipelines.cv.ocr_utils.model_vlpt": ["torch", "os", "sys", "math"], "modelscope.pipelines.cv.ocr_utils.model_resnet_mutex_v4_linewithchar": ["tensorflow", "tf_slim"], "modelscope.pipelines.cv.image_inpainting_sdv2_pipeline": ["numpy", "cv2", "torch", "tempfile", "sys", "math", "diffusers", "os", "typing"], "modelscope.pipelines.cv.image_super_resolution_pipeline": ["PIL", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.image_salient_detection_pipeline": ["typing"], "modelscope.pipelines.cv.video_single_object_tracking_pipeline": ["cv2", "os", "typing"], "modelscope.pipelines.cv.face_recognition_onnx_ir_pipeline": ["PIL", "numpy", "torch", "cv2", "onnxruntime", "os", "typing"], "modelscope.pipelines.cv.product_retrieval_embedding_pipeline": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.mask_face_recognition_pipeline": ["PIL", "numpy", "torch", "cv2", "collections", "os", "typing"], "modelscope.pipelines.cv.mobile_image_super_resolution_pipeline": ["skimage", "numpy", "torch", "torchvision", "typing"], "modelscope.pipelines.cv.license_plate_detection_pipeline": ["PIL", "numpy", "cv2", "torch", "math", "os", "typing"], "modelscope.pipelines.cv.image_semantic_segmentation_pipeline": ["PIL", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.text_driven_segmentation_pipleline": ["typing"], "modelscope.pipelines.cv.motion_generation_pipeline": ["numpy", "torch", "tempfile", "os", "typing"], "modelscope.pipelines.cv.image_mvs_depth_estimation_pipeline": ["os", "typing", "tempfile", "shutil"], "modelscope.pipelines.cv.image_depth_estimation_pipeline": ["PIL", "numpy", "torch", "cv2", "typing"], "modelscope.pipelines.cv.action_recognition_pipeline": ["torch", "os", "typing", "math"], "modelscope.pipelines.cv.image_reid_person_pipeline": ["PIL", "torch", "math", "torchvision", "os", "typing"], "modelscope.pipelines.cv.general_recognition_pipeline": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.pipelines.cv.shop_segmentation_pipleline": ["typing"], "modelscope.pipelines.base": ["multiprocessing", "numpy", "random", "torch", "abc", "functools", "packaging", "os", "threading", "typing"], "modelscope.preprocessors.kws": ["os", "typing", "yaml"], "modelscope.preprocessors.multi_modal": ["PIL", "timm", "numpy", "json", "torch", "decord", "re", "io", "torchvision", "os", "typing"], "modelscope.preprocessors.science.uni_fold": ["unittest", "hashlib", "ipdb", "random", "numpy", "torch", "json", "tarfile", "pathlib", "os", "typing", "requests", "logging", "re", "tqdm", "time", "gzip", "pickle"], "modelscope.preprocessors.tts": ["os", "kantts", "typing"], "modelscope.preprocessors.asr": ["os", "typing"], "modelscope.preprocessors.builder": [], "modelscope.preprocessors.movie_scene_segmentation.transforms": ["numbers", "PIL", "random", "numpy", "torch", "torchvision", "os", "typing"], "modelscope.preprocessors.common": ["numpy", "torch", "collections", "time", "typing"], "modelscope.preprocessors.nlp.token_classification_preprocessor": ["torch", "typing", "numpy"], "modelscope.preprocessors.nlp.siamese_uie_preprocessor": ["typing", "transformers"], "modelscope.preprocessors.nlp.relation_extraction_preprocessor": ["typing", "transformers"], "modelscope.preprocessors.nlp.token_classification_viet_preprocessor": ["torch", "typing"], "modelscope.preprocessors.nlp.translation_evaluation_preprocessor": ["torch", "typing", "transformers"], "modelscope.preprocessors.nlp.text_classification_preprocessor": ["typing", "numpy"], "modelscope.preprocessors.nlp.document_grounded_dialog_retrieval_preprocessor": ["torch", "os", "typing", "transformers"], "modelscope.preprocessors.nlp.zero_shot_classification_preprocessor": ["typing"], "modelscope.preprocessors.nlp.canmt_translation": ["sacremoses", "jieba", "torch", "subword_nmt", "os", "typing"], "modelscope.preprocessors.nlp.fill_mask_preprocessor": ["numpy", "torch", "abc", "re", "os", "typing"], "modelscope.preprocessors.nlp.word_alignment_preprocessor": ["itertools", "numpy", "torch", "os", "typing"], "modelscope.preprocessors.nlp.space_T_en.fields.preprocess_dataset": ["text2sql_lgesql"], "modelscope.preprocessors.nlp.space_T_en.fields.parse": [], "modelscope.preprocessors.nlp.space_T_en.fields.common_utils": ["itertools", "numpy", "nltk", "sqlite3", "text2sql_lgesql", "os"], "modelscope.preprocessors.nlp.space_T_en.fields.process_dataset": ["os", "sys", "pickle", "text2sql_lgesql"], "modelscope.preprocessors.nlp.space_T_en.conversational_text_to_sql_preprocessor": ["json", "torch", "text2sql_lgesql", "os", "typing"], "modelscope.preprocessors.nlp.document_grounded_dialog_generate_preprocessor": ["torch", "os", "typing", "transformers"], "modelscope.preprocessors.nlp.text_error_correction": ["torch", "os", "typing", "transformers"], "modelscope.preprocessors.nlp.text_ranking_preprocessor": ["typing", "transformers"], "modelscope.preprocessors.nlp.transformers_tokenizer": ["json", "os", "transformers", "collections"], "modelscope.preprocessors.nlp.bert_seq_cls_tokenizer": ["typing", "transformers"], "modelscope.preprocessors.nlp.text_clean": ["re", "codecs", "sys"], "modelscope.preprocessors.nlp.utils": ["transformers", "numpy", "json", "collections", "os", "typing"], "modelscope.preprocessors.nlp.document_segmentation_preprocessor": ["typing"], "modelscope.preprocessors.nlp.sentence_embedding_preprocessor": ["typing"], "modelscope.preprocessors.nlp.mglm_summarization_preprocessor": ["os", "re", "typing"], "modelscope.preprocessors.nlp.token_classification_thai_preprocessor": ["typing"], "modelscope.preprocessors.nlp.mgeo_ranking_preprocessor": ["torch", "typing", "transformers"], "modelscope.preprocessors.nlp.space.dialog_intent_prediction_preprocessor": ["json", "os", "typing"], "modelscope.preprocessors.nlp.space.lazy_dataset": ["json"], "modelscope.preprocessors.nlp.space.dialog_state_tracking_preprocessor": ["typing"], "modelscope.preprocessors.nlp.space.preprocess": ["os", "glob"], "modelscope.preprocessors.nlp.space.data_loader": ["os", "math", "numpy"], "modelscope.preprocessors.nlp.space.batch": [], "modelscope.preprocessors.nlp.space.dialog_modeling_preprocessor": ["os", "typing"], "modelscope.preprocessors.nlp.space.tokenizer": ["json", "logging", "__future__", "functools", "sys", "collections", "unicodedata", "os", "regex"], "modelscope.preprocessors.nlp.space.dst_processors": ["six", "numpy", "json", "logging", "re", "tqdm"], "modelscope.preprocessors.nlp.space.args": ["json", "argparse"], "modelscope.preprocessors.nlp.space.fields.gen_field": ["itertools", "random", "numpy", "json", "collections", "asyncio", "os"], "modelscope.preprocessors.nlp.space.fields.intent_field": ["multiprocessing", "itertools", "random", "numpy", "json", "re", "tqdm", "collections", "time", "glob", "os"], "modelscope.preprocessors.nlp.space.sampler": ["numpy"], "modelscope.preprocessors.nlp.space.tensorlistdataset": ["torch"], "modelscope.preprocessors.nlp.dialog_classification_use_preprocessor": ["torch", "typing", "transformers"], "modelscope.preprocessors.nlp.text_generation_preprocessor": ["torch", "os", "typing", "numpy"], "modelscope.preprocessors.nlp.space_T_cn.table_question_answering_preprocessor": ["torch", "os", "typing", "transformers"], "modelscope.preprocessors.nlp.space_T_cn.fields.database": ["json", "sqlite3", "tqdm"], "modelscope.preprocessors.nlp.space_T_cn.fields.schema_link": ["re"], "modelscope.preprocessors.nlp.space_T_cn.fields.struct": [], "modelscope.preprocessors.nlp.document_grounded_dialog_rerank_preprocessor": ["transformers", "torch", "copy", "os", "typing"], "modelscope.preprocessors.nlp.feature_extraction_preprocessor": ["typing", "numpy"], "modelscope.preprocessors.nlp.faq_question_answering_preprocessor": ["torch", "typing"], "modelscope.preprocessors.audio": ["numpy", "torch", "scipy", "io", "os", "typing"], "modelscope.preprocessors.cv.image_classification_preprocessor": ["PIL", "numpy", "cv2", "torch", "torchvision", "os", "typing"], "modelscope.preprocessors.cv.util": ["os", "sys", "shutil", "collections"], "modelscope.preprocessors.cv.timer": ["time"], "modelscope.preprocessors.cv.bad_image_detecting_preprocessor": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "modelscope.preprocessors.cv.mmcls_preprocessor": ["os", "typing", "numpy"], "modelscope.preprocessors.cv.controllable_image_generation": ["PIL", "numpy", "cv2", "torch", "math", "torchvision", "os", "typing"], "modelscope.preprocessors.cv.image_quality_assessment_mos": ["numpy", "cv2", "math", "torchvision", "typing"], "modelscope.preprocessors.cv.image_restoration_preprocessor": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "modelscope.preprocessors.cv.cv2_transforms": ["numbers", "random", "numpy", "cv2", "torch", "math", "collections"], "modelscope.preprocessors.cv.video_super_resolution": ["cv2", "os", "collections"], "modelscope.preprocessors.cv.image_quality_assessment_man": ["PIL", "numpy", "torch", "math", "torchvision", "typing"], "modelscope.preprocessors.cv.action_detection_mapper": ["copy", "numpy", "random", "torch", "decord", "scipy", "detectron2"], "modelscope.preprocessors.cv.video_stabilization": ["cv2", "torch", "numpy"], "modelscope.preprocessors.video": ["urllib", "numpy", "random", "torch", "decord", "tempfile", "math", "torchvision", "os", "uuid"], "modelscope.preprocessors.image": ["PIL", "numpy", "cv2", "io", "typing"], "modelscope.preprocessors.base": ["os", "abc", "typing"], "modelscope.preprocessors.ofa.visual_question_answering": ["PIL", "torch", "torchvision", "typing"], "modelscope.preprocessors.ofa.image_classification": ["PIL", "timm", "torch", "functools", "torchvision", "typing"], "modelscope.preprocessors.ofa.utils.transforms": ["PIL", "torchvision", "numpy", "random", "torch"], "modelscope.preprocessors.ofa.utils.bridge_content_encoder": ["sqlite3", "difflib", "rapidfuzz", "functools", "typing"], "modelscope.preprocessors.ofa.utils.collate": ["torch", "typing", "numpy"], "modelscope.preprocessors.ofa.utils.get_tables": ["traceback", "sqlite3", "sys"], "modelscope.preprocessors.ofa.utils.text2phone": [], "modelscope.preprocessors.ofa.utils.audio_helper": ["torch", "typing", "numpy"], "modelscope.preprocessors.ofa.utils.random_help": ["torch", "torch_xla"], "modelscope.preprocessors.ofa.utils.vision_helper": ["cv2", "numpy"], "modelscope.preprocessors.ofa.utils.constant": [], "modelscope.preprocessors.ofa.asr": ["random", "torch", "librosa", "fairseq", "soundfile", "pathlib", "os", "typing"], "modelscope.preprocessors.ofa.text2sql": ["random", "torch", "re", "os", "typing"], "modelscope.preprocessors.ofa.text_classification": ["torch", "typing"], "modelscope.preprocessors.ofa.image_captioning": ["torch", "torchvision", "typing"], "modelscope.preprocessors.ofa.ocr_recognition": ["torch", "unicodedata2", "torchvision", "typing", "zhconv"], "modelscope.preprocessors.ofa.visual_entailment": ["PIL", "torch", "torchvision", "typing"], "modelscope.preprocessors.ofa.visual_grounding": ["PIL", "numpy", "torch", "torchvision", "typing"], "modelscope.preprocessors.ofa.summarization": ["torch", "typing"], "modelscope.preprocessors.ofa.text_to_image_synthesis": ["torch", "typing"], "modelscope.preprocessors.ofa.sudoku": ["torch", "typing", "numpy"], "modelscope.preprocessors.ofa.base": ["PIL", "string", "numpy", "json", "torch", "torchaudio", "re", "io", "os"], "modelscope.trainers.parallel.builder": ["torch"], "modelscope.trainers.parallel.utils": [], "modelscope.trainers.optimizer.builder": ["torch", "inspect", "typing"], "modelscope.trainers.optimizer.child_tuning_adamw_optimizer": ["numpy", "torch", "types", "math", "typing"], "modelscope.trainers.lrscheduler.builder": ["torch", "inspect", "packaging"], "modelscope.trainers.lrscheduler.warmup.warmup": [], "modelscope.trainers.lrscheduler.warmup.base": ["torch"], "modelscope.trainers.nlp_trainer": ["torch", "os", "typing", "numpy"], "modelscope.trainers.utils.inference": ["shutil", "torch", "logging", "tqdm", "collections", "os", "pickle"], "modelscope.trainers.utils.log_buffer": ["collections", "numpy"], "modelscope.trainers.training_args": ["json", "re", "addict", "copy", "dataclasses", "typing"], "modelscope.trainers.builder": [], "modelscope.trainers.audio.kws_nearfield_trainer": ["torch", "re", "tensorboardX", "copy", "datetime", "yaml", "os", "typing"], "modelscope.trainers.audio.kws_utils.model_utils": ["shutil", "numpy", "torch", "re", "glob", "yaml", "os"], "modelscope.trainers.audio.kws_utils.runtime_utils": ["codecs", "shutil", "json", "re", "sys", "collections", "os", "stat"], "modelscope.trainers.audio.kws_utils.det_utils": ["kaldiio", "numpy", "json", "torch", "matplotlib", "os", "glob", "threading"], "modelscope.trainers.audio.kws_utils.batch_utils": ["numpy", "torch", "sys", "math", "collections", "datetime", "os", "typing"], "modelscope.trainers.audio.kws_utils.file_utils": ["re"], "modelscope.trainers.audio.kws_farfield_trainer": ["numpy", "torch", "math", "datetime", "glob", "os", "typing", "pickle"], "modelscope.trainers.audio.separation_trainer": ["numpy", "torch", "torchaudio", "tqdm", "csv", "os", "speechbrain", "typing"], "modelscope.trainers.audio.asr_trainer": ["shutil", "json", "typing", "tempfile", "os", "funasr"], "modelscope.trainers.audio.tts_trainer": ["shutil", "json", "tempfile", "os", "typing", "zipfile"], "modelscope.trainers.audio.ans_trainer": [], "modelscope.trainers.hooks.checkpoint.checkpoint_hook": ["random", "numpy", "torch", "time", "os", "typing"], "modelscope.trainers.hooks.checkpoint.checkpoint_processor": ["os", "re", "shutil"], "modelscope.trainers.hooks.checkpoint.load_checkpoint_hook": ["random", "numpy", "torch", "packaging", "typing"], "modelscope.trainers.hooks.logger.text_logger_hook": ["json", "torch", "collections", "datetime", "os"], "modelscope.trainers.hooks.logger.tensorboard_hook": ["torch", "os", "numpy"], "modelscope.trainers.hooks.logger.base": ["numbers", "torch", "abc", "numpy"], "modelscope.trainers.hooks.optimizer.apex_optimizer_hook": ["torch", "logging", "packaging"], "modelscope.trainers.hooks.optimizer.torch_optimizer_hook": ["logging"], "modelscope.trainers.hooks.optimizer.base": ["torch", "logging"], "modelscope.trainers.hooks.distributed.megatron_hook": ["torch", "os", "shutil", "megatron_util"], "modelscope.trainers.hooks.distributed.deepspeed_hook": ["shutil", "torch", "megatron_util", "deepspeed", "os"], "modelscope.trainers.hooks.distributed.ddp_hook": [], "modelscope.trainers.hooks.lr_scheduler_hook": [], "modelscope.trainers.hooks.early_stop_hook": ["numpy"], "modelscope.trainers.hooks.hook": ["functools"], "modelscope.trainers.hooks.priority": ["typing", "enum"], "modelscope.trainers.hooks.builder": [], "modelscope.trainers.hooks.clip_clamp_logit_scale_hook": ["torch"], "modelscope.trainers.hooks.compression.sparsity_hook": ["os"], "modelscope.trainers.hooks.compression.utils": ["torch"], "modelscope.trainers.hooks.iter_timer_hook": ["time"], "modelscope.trainers.hooks.evaluation_hook": ["typing", "collections"], "modelscope.trainers.multi_modal.clip.clip_trainer": ["torch", "os", "typing", "math"], "modelscope.trainers.multi_modal.clip.clip_trainer_utils": ["torch", "functools", "math", "inspect", "os"], "modelscope.trainers.multi_modal.efficient_diffusion_tuning.efficient_diffusion_tuning_trainer": ["torch", "typing"], "modelscope.trainers.multi_modal.mplug.mplug_trainer": ["torch", "typing", "collections"], "modelscope.trainers.multi_modal.team.team_trainer": ["numpy", "torch", "collections", "sklearn", "os", "typing"], "modelscope.trainers.multi_modal.team.team_trainer_utils": ["torch", "torchvision", "PIL"], "modelscope.trainers.multi_modal.mgeo_ranking_trainer": ["torch", "dataclasses", "typing"], "modelscope.trainers.multi_modal.ofa.ofa_trainer": ["shutil", "json", "torch", "functools", "tempfile", "math", "os", "typing"], "modelscope.trainers.multi_modal.ofa.ofa_trainer_utils": ["transformers", "shutil", "numpy", "torch", "os", "math"], "modelscope.trainers.default_config": ["typing"], "modelscope.trainers.nlp.gpt_moe_trainer": ["torch", "collections", "megatron_util", "os", "typing"], "modelscope.trainers.nlp.plug_trainer": ["torch", "megatron_util", "deepspeed", "os", "typing"], "modelscope.trainers.nlp.text_generation_trainer": ["torch", "collections"], "modelscope.trainers.nlp.document_grounded_dialog_rerank_trainer": ["transformers", "numpy", "random", "torch", "time", "os", "typing"], "modelscope.trainers.nlp.csanmt_translation_trainer": ["os", "tensorflow", "typing", "time"], "modelscope.trainers.nlp.translation_evaluation_trainer": ["transformers", "random", "torch", "tqdm", "math", "pandas", "os", "typing"], "modelscope.trainers.nlp.faq_question_answering_trainer": ["distutils", "contextlib", "numpy", "torch", "functools", "collections", "dataclasses", "typing"], "modelscope.trainers.nlp.table_question_answering_trainer": ["numpy", "json", "torch", "tqdm", "time", "os", "typing"], "modelscope.trainers.nlp.sequence_classification_trainer": ["time", "typing", "numpy"], "modelscope.trainers.nlp.sentence_embedding_trainer": ["transformers", "numpy", "torch", "tqdm", "time", "dataclasses", "typing"], "modelscope.trainers.nlp.gpt3_trainer": ["torch", "os", "copy", "typing"], "modelscope.trainers.nlp.text_ranking_trainer": ["numpy", "torch", "tqdm", "time", "dataclasses", "typing"], "modelscope.trainers.nlp.siamese_uie_trainer": ["random", "numpy", "json", "torch", "collections", "math", "time", "os", "typing"], "modelscope.trainers.nlp.space.metrics.metrics_tracker": ["math", "collections"], "modelscope.trainers.nlp.space.dialog_intent_trainer": ["os", "typing", "numpy"], "modelscope.trainers.nlp.space.eval": ["numpy", "json", "math", "collections", "nltk", "sklearn"], "modelscope.trainers.nlp.space.trainer.intent_trainer": ["transformers", "numpy", "json", "torch", "tqdm", "collections", "time", "os"], "modelscope.trainers.nlp.space.trainer.gen_trainer": ["transformers", "numpy", "json", "torch", "tqdm", "collections", "time", "os"], "modelscope.trainers.nlp.space.dialog_modeling_trainer": ["os", "time", "typing", "numpy"], "modelscope.trainers.nlp.document_grounded_dialog_retrieval_trainer": ["transformers", "numpy", "json", "torch", "tqdm", "faiss", "os"], "modelscope.trainers.nlp.document_grounded_dialog_generate_trainer": ["string", "transformers", "json", "torch", "rouge", "re", "tqdm", "collections", "os", "sacrebleu"], "modelscope.trainers.cli_argument_parser": ["dataclasses", "typing", "argparse"], "modelscope.trainers.cv.ocr_recognition_trainer": ["torch", "time", "collections"], "modelscope.trainers.cv.image_instance_segmentation_trainer": [], "modelscope.trainers.cv.referring_video_object_segmentation_trainer": ["torch", "os"], "modelscope.trainers.cv.vision_efficient_tuning_trainer": ["torch", "typing"], "modelscope.trainers.cv.movie_scene_segmentation_trainer": [], "modelscope.trainers.cv.nerf_recon_acc_trainer": ["random", "numpy", "cv2", "torch", "tqdm", "time", "datetime", "glob", "os", "typing"], "modelscope.trainers.cv.image_detection_damoyolo_trainer": ["torch", "math", "datetime", "time", "os", "easydict", "typing"], "modelscope.trainers.cv.image_classifition_trainer": ["numpy", "torch", "copy", "time", "os", "typing"], "modelscope.trainers.cv.cartoon_translation_trainer": ["tensorflow", "numpy", "tqdm", "packaging", "os", "typing"], "modelscope.trainers.cv.ocr_detection_db_trainer": ["numpy", "torch", "tqdm", "math", "copy", "datetime", "time", "os", "easydict", "typing"], "modelscope.trainers.cv.card_detection_scrfd_trainer": [], "modelscope.trainers.cv.face_detection_scrfd_trainer": ["copy", "time", "typing", "os"], "modelscope.trainers.cv.image_inpainting_trainer": ["torch", "time", "collections"], "modelscope.trainers.cv.image_portrait_enhancement_trainer": ["torch", "collections"], "modelscope.trainers.cv.action_detection_trainer": ["torch", "fvcore", "os", "typing", "detectron2"], "modelscope.trainers.cv.image_defrcn_fewshot_detection_trainer": ["torch", "collections", "os", "typing", "detectron2"], "modelscope.trainers.trainer": ["distutils", "json", "torch", "functools", "collections", "copy", "inspect", "os", "typing"], "modelscope.trainers.base": ["os", "abc", "typing", "time"], "modelscope.msdatasets.ms_dataset": ["datasets", "numpy", "warnings", "os", "typing"], "modelscope.msdatasets.context.dataset_context_config": ["typing"], "modelscope.msdatasets.auth.auth_config": ["http", "typing"], "modelscope.msdatasets.meta.data_meta_config": [], "modelscope.msdatasets.meta.data_meta_manager": ["datasets", "shutil", "json", "collections", "os"], "modelscope.msdatasets.utils.oss_utils": ["multiprocessing", "datasets", "__future__", "oss2", "os"], "modelscope.msdatasets.utils.maxcompute_utils": ["pandas", "math"], "modelscope.msdatasets.utils.dataset_utils": ["os", "typing", "collections"], "modelscope.msdatasets.utils.delete_utils": [], "modelscope.msdatasets.utils.upload_utils": ["os", "tqdm", "multiprocessing"], "modelscope.msdatasets.task_datasets.video_summarization_dataset": [], "modelscope.msdatasets.task_datasets.sidd_image_denoising": [], "modelscope.msdatasets.task_datasets.torch_base_dataset": [], "modelscope.msdatasets.task_datasets.reds_image_deblurring_dataset": [], "modelscope.msdatasets.task_datasets.gopro_image_deblurring_dataset": [], "modelscope.msdatasets.data_files.data_files_manager": ["os", "datasets", "typing"], "modelscope.msdatasets.audio.asr_dataset": [], "modelscope.msdatasets.download.download_config": ["datasets", "typing"], "modelscope.msdatasets.download.download_manager": ["datasets"], "modelscope.msdatasets.download.dataset_builder": ["datasets", "pandas", "pyarrow", "os", "typing"], "modelscope.msdatasets.dataset_cls.dataset": ["copy", "pandas", "datasets", "os"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_quality_assessment_degradation.image_quality_assessment_degradation_dataset": ["torchvision"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_portrait_enhancement.data_utils": ["cv2", "torch"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_portrait_enhancement.image_portrait_enhancement_dataset": ["cv2", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.veco_dataset": ["datasets", "typing", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_instance_segmentation_coco_dataset": ["os", "numpy", "pycocotools"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_recognition_dataset": ["PIL", "numpy", "cv2", "json", "torch", "six", "lmdb", "os"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.image_dataset": ["numpy", "cv2", "torch", "logging", "functools", "math", "bisect", "os", "glob"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.make_border_map": ["cv2", "pyclipper", "shapely", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.normalize_image": ["torch", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.make_icdar_data": ["cv2", "torch", "collections", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.make_seg_detection_data": ["cv2", "pyclipper", "shapely", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.data_process": [], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.augment_data": ["cv2", "imgaug", "math", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.processes.random_crop_data": ["cv2", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.data_loader": ["numpy", "torch", "math", "imgaug", "bisect"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.measures.quad_measurer": ["numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.measures.iou_evaluator": ["numpy", "shapely", "collections"], "modelscope.msdatasets.dataset_cls.custom_datasets.ocr_detection.augmenter": ["imgaug"], "modelscope.msdatasets.dataset_cls.custom_datasets.bad_image_detecting.bad_image_detecting_dataset": [], "modelscope.msdatasets.dataset_cls.custom_datasets.video_summarization_dataset": ["numpy", "json", "torch", "h5py", "os"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_inpainting.image_inpainting_dataset": ["albumentations", "numpy", "enum", "cv2", "os", "glob"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_inpainting.aug": ["albumentations", "imgaug"], "modelscope.msdatasets.dataset_cls.custom_datasets.language_guided_video_summarization_dataset": ["numpy", "json", "torch", "h5py", "os"], "modelscope.msdatasets.dataset_cls.custom_datasets.builder": [], "modelscope.msdatasets.dataset_cls.custom_datasets.movie_scene_segmentation.movie_scene_segmentation_dataset": ["random", "json", "torch", "copy", "torchvision", "os"], "modelscope.msdatasets.dataset_cls.custom_datasets.movie_scene_segmentation.sampler": ["random", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.text_ranking_dataset": ["torch", "typing", "random"], "modelscope.msdatasets.dataset_cls.custom_datasets.audio.kws_nearfield_dataset": ["torch", "random"], "modelscope.msdatasets.dataset_cls.custom_datasets.audio.kws_farfield_dataset": ["numpy", "torch", "math", "queue", "os", "threading"], "modelscope.msdatasets.dataset_cls.custom_datasets.audio.kws_nearfield_processor": ["kaldiio", "numpy", "random", "json", "torch", "torchaudio"], "modelscope.msdatasets.dataset_cls.custom_datasets.audio.asr_dataset": ["os"], "modelscope.msdatasets.dataset_cls.custom_datasets.sidd_image_denoising.transforms": ["random"], "modelscope.msdatasets.dataset_cls.custom_datasets.sidd_image_denoising.sidd_image_denoising_dataset": ["cv2", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.sidd_image_denoising.data_utils": ["cv2", "torch"], "modelscope.msdatasets.dataset_cls.custom_datasets.reds_image_deblurring_dataset": ["cv2", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.video_frame_interpolation.data_utils": ["cv2", "torch"], "modelscope.msdatasets.dataset_cls.custom_datasets.video_frame_interpolation.video_frame_interpolation_dataset": ["cv2", "torch", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_quality_assmessment_mos.image_quality_assessment_mos_dataset": [], "modelscope.msdatasets.dataset_cls.custom_datasets.mgeo_ranking_dataset": ["json", "torch", "typing", "random"], "modelscope.msdatasets.dataset_cls.custom_datasets.video_stabilization.video_stabilization_dataset": [], "modelscope.msdatasets.dataset_cls.custom_datasets.gopro_image_deblurring_dataset": ["cv2", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.referring_video_object_segmentation.transformers": ["PIL", "torch", "torchvision", "random"], "modelscope.msdatasets.dataset_cls.custom_datasets.referring_video_object_segmentation.referring_video_object_segmentation_dataset": ["numpy", "pycocotools", "json", "torch", "tqdm", "h5py", "glob", "torchvision", "pandas", "os"], "modelscope.msdatasets.dataset_cls.custom_datasets.image_colorization.image_colorization_dataset": ["cv2", "torch", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.build": ["torch", "copy", "bisect", "math"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.datasets.coco": ["cv2", "torch", "torchvision", "numpy"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.datasets.mosaic_wrapper": ["random", "numpy", "cv2", "torch", "math"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.transforms.build": [], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.transforms.transforms": ["torchvision", "numpy", "random", "cv2", "torch"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.collate_batch": [], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.samplers.grouped_batch_sampler": ["torch", "itertools"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.samplers.distributed": ["torch", "math"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.samplers.iteration_based_batch_sampler": ["torch"], "modelscope.msdatasets.dataset_cls.custom_datasets.damoyolo.evaluation.coco.coco_eval": ["torch", "os", "tempfile", "collections"], "modelscope.msdatasets.dataset_cls.custom_datasets.easycv_base": ["os"], "modelscope.msdatasets.dataset_cls.custom_datasets.torch_custom_dataset": ["torch", "typing"], "modelscope.msdatasets.dataset_cls.custom_datasets.video_super_resolution.video_super_resolution_dataset": ["cv2", "torch", "collections", "numpy"], "modelscope.msdatasets.data_loader.data_loader_manager": ["os", "abc", "datasets", "enum"], "modelscope.msdatasets.data_loader.data_loader": ["os", "abc", "datasets", "typing"], "modelscope.exporters.torch_model_exporter": ["itertools", "contextlib", "torch", "os", "typing"], "modelscope.exporters.builder": [], "modelscope.exporters.audio.ans_dfsmn_exporter": ["torch", "os"], "modelscope.exporters.nlp.csanmt_for_translation_exporter": ["os", "typing", "tensorflow"], "modelscope.exporters.nlp.model_for_token_classification_exporter": ["torch", "typing", "collections"], "modelscope.exporters.nlp.sbert_for_sequence_classification_exporter": ["torch", "typing", "collections"], "modelscope.exporters.nlp.sbert_for_zero_shot_classification_exporter": ["typing", "collections"], "modelscope.exporters.cv.object_detection_damoyolo_exporter": ["numpy", "torch", "functools", "onnx", "os", "typing"], "modelscope.exporters.cv.face_detection_scrfd_exporter": ["numpy", "torch", "functools", "onnx", "os", "typing"], "modelscope.exporters.cv.cartoon_translation_exporter": ["os", "tensorflow", "typing", "packaging"], "modelscope.exporters.tf_model_exporter": ["os", "tensorflow", "typing"], "modelscope.exporters.base": ["os", "abc", "typing"]}, "version": "1.6.0", "md5": "5e46ad1c70848d28c7aeafd9db9c3aac", "files_mtime": {"TEMPLATE_PATH/models/science/unifold/config.py": 1666778289.6766584, "TEMPLATE_PATH/models/science/unifold/msa/tools/hmmsearch.py": 1666778289.6888485, "TEMPLATE_PATH/models/science/unifold/msa/tools/hhblits.py": 1666778289.6881094, "TEMPLATE_PATH/models/science/unifold/msa/tools/kalign.py": 1666778289.689356, "TEMPLATE_PATH/models/science/unifold/msa/tools/utils.py": 1666778289.6895845, "TEMPLATE_PATH/models/science/unifold/msa/tools/hmmbuild.py": 1666778289.6885293, "TEMPLATE_PATH/models/science/unifold/msa/tools/jackhmmer.py": 1666778289.6891205, "TEMPLATE_PATH/models/science/unifold/msa/tools/hhsearch.py": 1666778289.6883202, "TEMPLATE_PATH/models/science/unifold/msa/mmcif.py": 1666778289.6854372, "TEMPLATE_PATH/models/science/unifold/msa/msa_identifiers.py": 1666778289.6857276, "TEMPLATE_PATH/models/science/unifold/msa/parsers.py": 1666778289.6860957, "TEMPLATE_PATH/models/science/unifold/msa/templates.py": 1684246001.5188344, "TEMPLATE_PATH/models/science/unifold/msa/utils.py": 1666778289.6898172, "TEMPLATE_PATH/models/science/unifold/msa/pipeline.py": 1669108798.6335008, "TEMPLATE_PATH/models/science/unifold/model.py": 1669108798.6326127, "TEMPLATE_PATH/models/science/unifold/dataset.py": 1669108798.63184, "TEMPLATE_PATH/models/science/unifold/modules/confidence.py": 1666778289.6826582, "TEMPLATE_PATH/models/science/unifold/modules/alphafold.py": 1666778289.6816177, "TEMPLATE_PATH/models/science/unifold/modules/evoformer.py": 1666778289.683275, "TEMPLATE_PATH/models/science/unifold/modules/auxillary_heads.py": 1666778289.682163, "TEMPLATE_PATH/models/science/unifold/modules/attentions.py": 1678345974.7664688, "TEMPLATE_PATH/models/science/unifold/modules/embedders.py": 1666778289.6829705, "TEMPLATE_PATH/models/science/unifold/modules/structure_module.py": 1669108798.6331663, "TEMPLATE_PATH/models/science/unifold/modules/common.py": 1666778289.6823854, "TEMPLATE_PATH/models/science/unifold/modules/frame.py": 1666778289.683827, "TEMPLATE_PATH/models/science/unifold/modules/template.py": 1666778289.6844184, "TEMPLATE_PATH/models/science/unifold/modules/triangle_multiplication.py": 1666778289.6846595, "TEMPLATE_PATH/models/science/unifold/modules/featurization.py": 1666778289.6835535, "TEMPLATE_PATH/models/science/unifold/data/process_multimer.py": 1666778289.6789792, "TEMPLATE_PATH/models/science/unifold/data/protein.py": 1666778289.6792727, "TEMPLATE_PATH/models/science/unifold/data/residue_constants.py": 1669108798.6314445, "TEMPLATE_PATH/models/science/unifold/data/utils.py": 1666778289.6802187, "TEMPLATE_PATH/models/science/unifold/data/process.py": 1666778289.6786027, "TEMPLATE_PATH/models/science/unifold/data/msa_pairing.py": 1684246001.5181975, "TEMPLATE_PATH/models/science/unifold/data/data_ops.py": 1678345974.7659872, "TEMPLATE_PATH/models/builder.py": 1678695526.2830884, "TEMPLATE_PATH/models/audio/ans/layers/activations.py": 1678695526.2749481, "TEMPLATE_PATH/models/audio/ans/layers/layer_base.py": 1678695526.2758405, "TEMPLATE_PATH/models/audio/ans/layers/affine_transform.py": 1678695526.2755635, "TEMPLATE_PATH/models/audio/ans/layers/uni_deep_fsmn.py": 1678695526.2761767, "TEMPLATE_PATH/models/audio/ans/unet.py": 1666757257.138642, "TEMPLATE_PATH/models/audio/ans/conv_stft.py": 1684246001.4631696, "TEMPLATE_PATH/models/audio/ans/denoise_net.py": 1678695526.2738411, "TEMPLATE_PATH/models/audio/ans/complex_nn.py": 1678695526.273521, "TEMPLATE_PATH/models/audio/ans/se_module_complex.py": 1666757257.1383736, "TEMPLATE_PATH/models/audio/ans/frcrn.py": 1678695526.2743342, "TEMPLATE_PATH/models/audio/sv/DTDNN_layers.py": 1683889954.4686143, "TEMPLATE_PATH/models/audio/sv/ecapa_tdnn.py": 1678345974.1721325, "TEMPLATE_PATH/models/audio/sv/ERes2Net.py": 1684247769.663397, "TEMPLATE_PATH/models/audio/sv/pooling_layers.py": 1684247769.6642458, "TEMPLATE_PATH/models/audio/sv/DTDNN.py": 1684246001.4648209, "TEMPLATE_PATH/models/audio/sv/fusion.py": 1684247769.6637704, "TEMPLATE_PATH/models/audio/sv/generic_speaker_verification.py": 1678695526.2780309, "TEMPLATE_PATH/models/audio/sv/speaker_change_locator.py": 1684246001.4659781, "TEMPLATE_PATH/models/audio/sv/rdino.py": 1684246001.4655278, "TEMPLATE_PATH/models/audio/itn/generic_inverse_text_processing.py": 1678345974.1680963, "TEMPLATE_PATH/models/audio/aec/layers/activations.py": 1666757257.1350431, "TEMPLATE_PATH/models/audio/aec/layers/layer_base.py": 1666757257.1358142, "TEMPLATE_PATH/models/audio/aec/layers/deep_fsmn.py": 1666757257.1355417, "TEMPLATE_PATH/models/audio/aec/layers/affine_transform.py": 1666757257.1352675, "TEMPLATE_PATH/models/audio/aec/layers/uni_deep_fsmn.py": 1666757257.1360576, "TEMPLATE_PATH/models/audio/aec/network/se_net.py": 1666757257.1370454, "TEMPLATE_PATH/models/audio/aec/network/loss.py": 1666757257.1365721, "TEMPLATE_PATH/models/audio/aec/network/modulation_loss.py": 1666757257.136794, "TEMPLATE_PATH/models/audio/asr/wenet_automatic_speech_recognition.py": 1678345974.1674347, "TEMPLATE_PATH/models/audio/asr/generic_automatic_speech_recognition.py": 1684246001.463599, "TEMPLATE_PATH/models/audio/punc/generic_punctuation.py": 1678345974.1698205, "TEMPLATE_PATH/models/audio/tts/voice.py": 1684246001.466414, "TEMPLATE_PATH/models/audio/tts/sambert_hifi.py": 1678695526.2786689, "TEMPLATE_PATH/models/audio/separation/mossformer.py": 1678345974.1705601, "TEMPLATE_PATH/models/audio/separation/mossformer_conv_module.py": 1678345974.1713047, "TEMPLATE_PATH/models/audio/separation/mossformer_block.py": 1678345974.1711044, "TEMPLATE_PATH/models/audio/separation/layer_norm.py": 1678345974.1702788, "TEMPLATE_PATH/models/audio/kws/farfield/fsmn.py": 1666757257.1401393, "TEMPLATE_PATH/models/audio/kws/farfield/fsmn_sele_v2.py": 1683889954.4661622, "TEMPLATE_PATH/models/audio/kws/farfield/fsmn_sele_v3.py": 1684246001.4639575, "TEMPLATE_PATH/models/audio/kws/farfield/model_def.py": 1666757257.140835, "TEMPLATE_PATH/models/audio/kws/farfield/model.py": 1684246001.4643233, "TEMPLATE_PATH/models/audio/kws/generic_key_word_spotting.py": 1666757257.1410184, "TEMPLATE_PATH/models/audio/kws/nearfield/fsmn.py": 1683889954.4674246, "TEMPLATE_PATH/models/audio/kws/nearfield/model.py": 1683889954.4677804, "TEMPLATE_PATH/models/audio/kws/nearfield/cmvn.py": 1678345974.1689863, "TEMPLATE_PATH/models/multi_modal/ofa_for_all_tasks.py": 1678345974.6520555, "TEMPLATE_PATH/models/multi_modal/clip/configuration_bert.py": 1666757257.302656, "TEMPLATE_PATH/models/multi_modal/clip/bert_tokenizer.py": 1669108798.597482, "TEMPLATE_PATH/models/multi_modal/clip/model.py": 1678345974.6126437, "TEMPLATE_PATH/models/multi_modal/clip/modeling_bert.py": 1678345974.6139398, "TEMPLATE_PATH/models/multi_modal/mplug_for_all_tasks.py": 1678345974.6332867, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/decoder.py": 1666757257.3277674, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/prior.py": 1666757257.3294334, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/upsampler.py": 1666757257.3308744, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/model.py": 1678345974.6361222, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/tokenizer.py": 1678695526.5035207, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/xglm.py": 1678695526.5041978, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/gaussian_diffusion.py": 1678695526.502133, "TEMPLATE_PATH/models/multi_modal/multi_stage_diffusion/clip.py": 1678695526.5006785, "TEMPLATE_PATH/models/multi_modal/diffusion/structbert.py": 1678345974.617392, "TEMPLATE_PATH/models/multi_modal/diffusion/diffusion.py": 1681714768.8946908, "TEMPLATE_PATH/models/multi_modal/diffusion/unet_generator.py": 1678695526.4923015, "TEMPLATE_PATH/models/multi_modal/diffusion/model.py": 1678345974.6162271, "TEMPLATE_PATH/models/multi_modal/diffusion/tokenizer.py": 1678345974.6183596, "TEMPLATE_PATH/models/multi_modal/diffusion/unet_upsampler_256.py": 1678695526.494478, "TEMPLATE_PATH/models/multi_modal/diffusion/unet_upsampler_1024.py": 1678695526.4934785, "TEMPLATE_PATH/models/multi_modal/efficient_diffusion_tuning/efficient_stable_diffusion.py": 1683889954.5002546, "TEMPLATE_PATH/models/multi_modal/gemm/gemm_base.py": 1669108798.5997014, "TEMPLATE_PATH/models/multi_modal/gemm/gemm_model.py": 1666757257.312342, "TEMPLATE_PATH/models/multi_modal/gemm/tokenizer.py": 1666757257.3128963, "TEMPLATE_PATH/models/multi_modal/mmr/dataloaders/rawvideo_util.py": 1666757257.3151526, "TEMPLATE_PATH/models/multi_modal/mmr/models/module_clip.py": 1666757257.3185143, "TEMPLATE_PATH/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py": 1684246001.5114832, "TEMPLATE_PATH/models/multi_modal/mmr/models/module_cross.py": 1666757257.319208, "TEMPLATE_PATH/models/multi_modal/mmr/models/until_module.py": 1666757257.3205154, "TEMPLATE_PATH/models/multi_modal/mmr/models/tokenization_clip.py": 1666757257.3197618, "TEMPLATE_PATH/models/multi_modal/mmr/models/modeling.py": 1666757257.3177187, "TEMPLATE_PATH/models/multi_modal/mmr/models/dynamic_inverted_softmax.py": 1666757257.3171651, "TEMPLATE_PATH/models/multi_modal/mplug/predictor.py": 1666757257.3251338, "TEMPLATE_PATH/models/multi_modal/mplug/clip/clip.py": 1666757257.322875, "TEMPLATE_PATH/models/multi_modal/mplug/modeling_mplug.py": 1678345974.631151, "TEMPLATE_PATH/models/multi_modal/mplug/mvit.py": 1678345974.632255, "TEMPLATE_PATH/models/multi_modal/mplug/configuration_mplug.py": 1678345974.629229, "TEMPLATE_PATH/models/multi_modal/team/team_model.py": 1666757257.344364, "TEMPLATE_PATH/models/multi_modal/team/utils.py": 1666757257.3448434, "TEMPLATE_PATH/models/multi_modal/guided_diffusion/respace.py": 1681714768.8972325, "TEMPLATE_PATH/models/multi_modal/guided_diffusion/unet.py": 1681714768.8985677, "TEMPLATE_PATH/models/multi_modal/guided_diffusion/gaussian_diffusion.py": 1681714768.896804, "TEMPLATE_PATH/models/multi_modal/guided_diffusion/script.py": 1681714768.8976767, "TEMPLATE_PATH/models/multi_modal/vldoc/tokenization.py": 1678345974.6881084, "TEMPLATE_PATH/models/multi_modal/vldoc/model.py": 1678345974.6853945, "TEMPLATE_PATH/models/multi_modal/vldoc/conv_fpn_trans.py": 1678345974.6839283, "TEMPLATE_PATH/models/multi_modal/vldoc/transformer_local.py": 1678345974.6888812, "TEMPLATE_PATH/models/multi_modal/vldoc/modeling_layout_roberta.py": 1678345974.6863377, "TEMPLATE_PATH/models/multi_modal/vldoc/processing.py": 1678345974.6873274, "TEMPLATE_PATH/models/multi_modal/vldoc/convnext.py": 1678345974.6846218, "TEMPLATE_PATH/models/multi_modal/soonet/model.py": 1681714768.901114, "TEMPLATE_PATH/models/multi_modal/soonet/tokenizer.py": 1681714768.9022171, "TEMPLATE_PATH/models/multi_modal/soonet/utils.py": 1681714768.9025855, "TEMPLATE_PATH/models/multi_modal/soonet/blocks.py": 1681714768.9001248, "TEMPLATE_PATH/models/multi_modal/soonet/swin_transformer.py": 1681714768.90172, "TEMPLATE_PATH/models/multi_modal/soonet/clip.py": 1681714768.90064, "TEMPLATE_PATH/models/multi_modal/mgeo/text_ranking.py": 1678345974.626834, "TEMPLATE_PATH/models/multi_modal/mgeo/backbone.py": 1678345974.6254547, "TEMPLATE_PATH/models/multi_modal/mgeo/text_classification.py": 1678345974.6262727, "TEMPLATE_PATH/models/multi_modal/mgeo/token_classification.py": 1678345974.6274276, "TEMPLATE_PATH/models/multi_modal/mplug_owl/configuration_mplug_owl.py": 1684246001.513214, "TEMPLATE_PATH/models/multi_modal/mplug_owl/modeling_mplug_owl.py": 1684246001.5142066, "TEMPLATE_PATH/models/multi_modal/ofa_for_text_to_image_synthesis_model.py": 1678345974.6531072, "TEMPLATE_PATH/models/multi_modal/video_synthesis/diffusion.py": 1681714768.9037023, "TEMPLATE_PATH/models/multi_modal/video_synthesis/text_to_video_synthesis_model.py": 1678695526.5113559, "TEMPLATE_PATH/models/multi_modal/video_synthesis/autoencoder.py": 1678695526.510036, "TEMPLATE_PATH/models/multi_modal/video_synthesis/unet_sd.py": 1678695526.5119526, "TEMPLATE_PATH/models/multi_modal/clip_interrogator/model.py": 1684246001.5105355, "TEMPLATE_PATH/models/multi_modal/rleg/model.py": 1678695526.5053334, "TEMPLATE_PATH/models/multi_modal/rleg/rleg.py": 1678695526.5057476, "TEMPLATE_PATH/models/multi_modal/dpm_solver_pytorch.py": 1678695526.4955242, "TEMPLATE_PATH/models/multi_modal/ofa/modeling_ofa.py": 1678345974.6454003, "TEMPLATE_PATH/models/multi_modal/ofa/utils/utils.py": 1678345974.6500447, "TEMPLATE_PATH/models/multi_modal/ofa/utils/constant.py": 1678345974.649251, "TEMPLATE_PATH/models/multi_modal/ofa/vit.py": 1678345974.6508958, "TEMPLATE_PATH/models/multi_modal/ofa/modeling_mmspeech.py": 1678345974.6442, "TEMPLATE_PATH/models/multi_modal/ofa/resnet.py": 1678345974.6463652, "TEMPLATE_PATH/models/multi_modal/ofa/tokenization_ofa.py": 1678345974.6473439, "TEMPLATE_PATH/models/multi_modal/ofa/generate/multihead_attention.py": 1666757257.335432, "TEMPLATE_PATH/models/multi_modal/ofa/generate/ngram_repeat_block.py": 1666757257.335963, "TEMPLATE_PATH/models/multi_modal/ofa/generate/sequence_generator.py": 1678345974.642128, "TEMPLATE_PATH/models/multi_modal/ofa/generate/incremental_decoding_utils.py": 1666757257.3349085, "TEMPLATE_PATH/models/multi_modal/ofa/generate/utils.py": 1678345974.6431253, "TEMPLATE_PATH/models/multi_modal/ofa/generate/search.py": 1678345974.6410236, "TEMPLATE_PATH/models/multi_modal/ofa/generate/token_generation_constraints.py": 1666757257.3377285, "TEMPLATE_PATH/models/multi_modal/ofa/tokenization_ofa_fast.py": 1678345974.6482744, "TEMPLATE_PATH/models/multi_modal/ofa/configuration_mmspeech.py": 1678345974.6392608, "TEMPLATE_PATH/models/multi_modal/ofa/configuration_ofa.py": 1678345974.640075, "TEMPLATE_PATH/models/nlp/unite/configuration.py": 1684246001.5170493, "TEMPLATE_PATH/models/nlp/unite/translation_evaluation.py": 1684246001.5173905, "TEMPLATE_PATH/models/nlp/palm_v2/configuration.py": 1678345974.7403622, "TEMPLATE_PATH/models/nlp/palm_v2/dureader_eval.py": 1666757257.3743646, "TEMPLATE_PATH/models/nlp/palm_v2/text_generation.py": 1681714768.9220717, "TEMPLATE_PATH/models/nlp/structbert/configuration.py": 1678345974.7552435, "TEMPLATE_PATH/models/nlp/structbert/fill_mask.py": 1678345974.7563565, "TEMPLATE_PATH/models/nlp/structbert/backbone.py": 1678345974.7548847, "TEMPLATE_PATH/models/nlp/structbert/faq_question_answering.py": 1678345974.7559564, "TEMPLATE_PATH/models/nlp/structbert/adv_utils.py": 1678695526.531147, "TEMPLATE_PATH/models/nlp/structbert/text_classification.py": 1678345974.7566974, "TEMPLATE_PATH/models/nlp/structbert/token_classification.py": 1678345974.7570403, "TEMPLATE_PATH/models/nlp/hf_transformers/backbone.py": 1678695526.5259144, "TEMPLATE_PATH/models/nlp/task_models/fill_mask.py": 1678345974.7579868, "TEMPLATE_PATH/models/nlp/task_models/text_ranking.py": 1678345974.7599752, "TEMPLATE_PATH/models/nlp/task_models/feature_extraction.py": 1678345974.7576537, "TEMPLATE_PATH/models/nlp/task_models/text_classification.py": 1678345974.7593715, "TEMPLATE_PATH/models/nlp/task_models/task_model.py": 1683889954.520566, "TEMPLATE_PATH/models/nlp/task_models/text_generation.py": 1683889954.521497, "TEMPLATE_PATH/models/nlp/task_models/information_extraction.py": 1678345974.758443, "TEMPLATE_PATH/models/nlp/task_models/token_classification.py": 1678345974.7602658, "TEMPLATE_PATH/models/nlp/veco/configuration.py": 1678345974.76297, "TEMPLATE_PATH/models/nlp/veco/fill_mask.py": 1678345974.7633657, "TEMPLATE_PATH/models/nlp/veco/backbone.py": 1678345974.762673, "TEMPLATE_PATH/models/nlp/veco/text_classification.py": 1678345974.7637107, "TEMPLATE_PATH/models/nlp/veco/token_classification.py": 1678345974.7641091, "TEMPLATE_PATH/models/nlp/glm_130b/initialize.py": 1683889954.5107641, "TEMPLATE_PATH/models/nlp/glm_130b/quantization/functional.py": 1683889954.512782, "TEMPLATE_PATH/models/nlp/glm_130b/quantization/layers.py": 1683889954.5129745, "TEMPLATE_PATH/models/nlp/glm_130b/text_generation.py": 1683889954.5132122, "TEMPLATE_PATH/models/nlp/glm_130b/generation/strategies.py": 1683889954.5105143, "TEMPLATE_PATH/models/nlp/mglm/tasks/superglue/pvp.py": 1678345974.739084, "TEMPLATE_PATH/models/nlp/mglm/tasks/superglue/dataset.py": 1669108798.6253061, "TEMPLATE_PATH/models/nlp/mglm/tasks/superglue/evaluate.py": 1669108798.6255116, "TEMPLATE_PATH/models/nlp/mglm/tasks/superglue/finetune.py": 1669108798.6256893, "TEMPLATE_PATH/models/nlp/mglm/tasks/data_utils.py": 1678345974.737032, "TEMPLATE_PATH/models/nlp/mglm/tasks/seq2seq/dataset.py": 1669108798.6240597, "TEMPLATE_PATH/models/nlp/mglm/tasks/seq2seq/evaluate.py": 1678345974.738028, "TEMPLATE_PATH/models/nlp/mglm/tasks/seq2seq/finetune.py": 1678345974.7383432, "TEMPLATE_PATH/models/nlp/mglm/tasks/language_model/detokenizer.py": 1669108798.6234415, "TEMPLATE_PATH/models/nlp/mglm/tasks/language_model/dataset.py": 1669108798.6232784, "TEMPLATE_PATH/models/nlp/mglm/tasks/language_model/finetune.py": 1678345974.7376661, "TEMPLATE_PATH/models/nlp/mglm/tasks/eval_utils.py": 1678345974.7373278, "TEMPLATE_PATH/models/nlp/mglm/blocklm_utils.py": 1684246001.5148673, "TEMPLATE_PATH/models/nlp/mglm/train_utils.py": 1678345974.7394702, "TEMPLATE_PATH/models/nlp/mglm/test/test_block.py": 1684246001.515939, "TEMPLATE_PATH/models/nlp/mglm/test/test_rel_shift.py": 1684246001.5163302, "TEMPLATE_PATH/models/nlp/mglm/arguments.py": 1669108798.609255, "TEMPLATE_PATH/models/nlp/mglm/data_utils/tokenization_gpt2.py": 1669108798.6142075, "TEMPLATE_PATH/models/nlp/mglm/data_utils/lazy_loader.py": 1678345974.7336698, "TEMPLATE_PATH/models/nlp/mglm/data_utils/wordpiece.py": 1678345974.7343767, "TEMPLATE_PATH/models/nlp/mglm/data_utils/datasets.py": 1684246001.515576, "TEMPLATE_PATH/models/nlp/mglm/data_utils/tokenization.py": 1669108798.613975, "TEMPLATE_PATH/models/nlp/mglm/data_utils/extraction.py": 1678345974.733312, "TEMPLATE_PATH/models/nlp/mglm/data_utils/file_utils.py": 1669108798.6124434, "TEMPLATE_PATH/models/nlp/mglm/data_utils/sp_tokenizer.py": 1669108798.6136456, "TEMPLATE_PATH/models/nlp/mglm/data_utils/corpora.py": 1669108798.6114604, "TEMPLATE_PATH/models/nlp/mglm/data_utils/samplers.py": 1669108798.6129339, "TEMPLATE_PATH/models/nlp/mglm/mglm_for_text_summarization.py": 1678345974.7347617, "TEMPLATE_PATH/models/nlp/mglm/process_grid.py": 1669108798.6219385, "TEMPLATE_PATH/models/nlp/mglm/generation_utils.py": 1669108798.6156476, "TEMPLATE_PATH/models/nlp/mglm/utils.py": 1678345974.739819, "TEMPLATE_PATH/models/nlp/mglm/configure_data.py": 1678345974.7326682, "TEMPLATE_PATH/models/nlp/mglm/model/distributed.py": 1678345974.735162, "TEMPLATE_PATH/models/nlp/mglm/model/transformer.py": 1678345974.7363741, "TEMPLATE_PATH/models/nlp/mglm/model/modeling_bert.py": 1678345974.735637, "TEMPLATE_PATH/models/nlp/mglm/model/prompt.py": 1669108798.617736, "TEMPLATE_PATH/models/nlp/mglm/model/modeling_glm.py": 1678345974.7359483, "TEMPLATE_PATH/models/nlp/mglm/model/downstream.py": 1669108798.6170213, "TEMPLATE_PATH/models/nlp/mglm/run_test.py": 1669108798.6222408, "TEMPLATE_PATH/models/nlp/plug_mental/configuration.py": 1678345974.7478015, "TEMPLATE_PATH/models/nlp/plug_mental/backbone.py": 1678345974.7475746, "TEMPLATE_PATH/models/nlp/plug_mental/adv_utils.py": 1678345974.7465599, "TEMPLATE_PATH/models/nlp/plug_mental/text_classification.py": 1678345974.747977, "TEMPLATE_PATH/models/nlp/gpt_moe/configuration.py": 1678345974.7217705, "TEMPLATE_PATH/models/nlp/gpt_moe/backbone.py": 1678345974.7208388, "TEMPLATE_PATH/models/nlp/gpt_moe/tokenizer.py": 1678345974.725533, "TEMPLATE_PATH/models/nlp/gpt_moe/distributed_gpt_moe.py": 1678695526.523395, "TEMPLATE_PATH/models/nlp/gpt_moe/text_generation.py": 1678345974.7252653, "TEMPLATE_PATH/models/nlp/gpt_moe/moe/sharded_moe.py": 1678345974.7245455, "TEMPLATE_PATH/models/nlp/gpt_moe/moe/utils.py": 1678345974.7249217, "TEMPLATE_PATH/models/nlp/gpt_moe/moe/layer.py": 1678345974.7238333, "TEMPLATE_PATH/models/nlp/gpt_moe/moe/experts.py": 1678345974.7235267, "TEMPLATE_PATH/models/nlp/gpt_moe/moe/mappings.py": 1678345974.7241268, "TEMPLATE_PATH/models/nlp/gpt_moe/checkpointing.py": 1678695526.5199594, "TEMPLATE_PATH/models/nlp/csanmt/translation.py": 1678345974.710362, "TEMPLATE_PATH/models/nlp/T5/text2text_generation.py": 1678345974.6919267, "TEMPLATE_PATH/models/nlp/T5/configuration.py": 1678345974.6909628, "TEMPLATE_PATH/models/nlp/T5/backbone.py": 1683889954.5021315, "TEMPLATE_PATH/models/nlp/heads/text_classification_head.py": 1678345974.727904, "TEMPLATE_PATH/models/nlp/heads/infromation_extraction_head.py": 1678345974.7273557, "TEMPLATE_PATH/models/nlp/heads/token_classification_head.py": 1678345974.728869, "TEMPLATE_PATH/models/nlp/heads/text_generation_head.py": 1678345974.7283216, "TEMPLATE_PATH/models/nlp/heads/crf_head.py": 1678695526.5250702, "TEMPLATE_PATH/models/nlp/heads/torch_pretrain_head.py": 1666757257.3713884, "TEMPLATE_PATH/models/nlp/heads/fill_mask_head.py": 1683889954.5144427, "TEMPLATE_PATH/models/nlp/heads/text_ranking_head.py": 1678345974.7285597, "TEMPLATE_PATH/models/nlp/bloom/backbone.py": 1669108798.6061795, "TEMPLATE_PATH/models/nlp/xlm_roberta/configuration.py": 1678345974.7653904, "TEMPLATE_PATH/models/nlp/xlm_roberta/backbone.py": 1678345974.7651584, "TEMPLATE_PATH/models/nlp/peer/configuration.py": 1678695526.529261, "TEMPLATE_PATH/models/nlp/peer/sas_utils.py": 1678695526.5296216, "TEMPLATE_PATH/models/nlp/peer/backbone.py": 1678695526.5284507, "TEMPLATE_PATH/models/nlp/peer/text_classification.py": 1678695526.5302649, "TEMPLATE_PATH/models/nlp/fid_T5/text_generation.py": 1683889954.5068686, "TEMPLATE_PATH/models/nlp/space_T_en/text_to_sql.py": 1666757257.3954694, "TEMPLATE_PATH/models/nlp/canmt/sequence_generator.py": 1683889954.5052524, "TEMPLATE_PATH/models/nlp/canmt/canmt_translation.py": 1683889954.5044076, "TEMPLATE_PATH/models/nlp/canmt/canmt_model.py": 1683889954.5040576, "TEMPLATE_PATH/models/nlp/bart/text_error_correction.py": 1678345974.693962, "TEMPLATE_PATH/models/nlp/use/transformer.py": 1678345974.7618728, "TEMPLATE_PATH/models/nlp/use/user_satisfaction_estimation.py": 1678345974.7620804, "TEMPLATE_PATH/models/nlp/gpt_neo/backbone.py": 1666757257.3668969, "TEMPLATE_PATH/models/nlp/bert/configuration.py": 1678345974.6969304, "TEMPLATE_PATH/models/nlp/bert/siamese_uie.py": 1678695526.5135634, "TEMPLATE_PATH/models/nlp/bert/fill_mask.py": 1678345974.6990001, "TEMPLATE_PATH/models/nlp/bert/word_alignment.py": 1678695526.5139036, "TEMPLATE_PATH/models/nlp/bert/text_ranking.py": 1678345974.703262, "TEMPLATE_PATH/models/nlp/bert/backbone.py": 1678345974.6959348, "TEMPLATE_PATH/models/nlp/bert/text_classification.py": 1678345974.7023563, "TEMPLATE_PATH/models/nlp/bert/sentence_embedding.py": 1678345974.7002544, "TEMPLATE_PATH/models/nlp/bert/document_segmentation.py": 1678345974.6980228, "TEMPLATE_PATH/models/nlp/bert/token_classification.py": 1678345974.7041605, "TEMPLATE_PATH/models/nlp/dgds/backbone.py": 1683889954.5060863, "TEMPLATE_PATH/models/nlp/dgds/document_grounded_dialog_rerank.py": 1678345974.7150524, "TEMPLATE_PATH/models/nlp/dgds/document_grounded_dialog_generate.py": 1678345974.71487, "TEMPLATE_PATH/models/nlp/dgds/document_grounded_dialog_retrieval.py": 1678345974.715238, "TEMPLATE_PATH/models/nlp/gpt3/configuration.py": 1678695526.5156965, "TEMPLATE_PATH/models/nlp/gpt3/backbone.py": 1681714768.9173203, "TEMPLATE_PATH/models/nlp/gpt3/tokenizer.py": 1678695526.5179377, "TEMPLATE_PATH/models/nlp/gpt3/distributed_gpt3.py": 1683889954.51408, "TEMPLATE_PATH/models/nlp/gpt3/text_generation.py": 1681714768.9190643, "TEMPLATE_PATH/models/nlp/deberta_v2/configuration.py": 1678345974.7123609, "TEMPLATE_PATH/models/nlp/deberta_v2/fill_mask.py": 1678345974.7131743, "TEMPLATE_PATH/models/nlp/deberta_v2/backbone.py": 1678345974.7115374, "TEMPLATE_PATH/models/nlp/deberta_v2/tokenization.py": 1666757257.361269, "TEMPLATE_PATH/models/nlp/deberta_v2/tokenization_fast.py": 1678345974.7137625, "TEMPLATE_PATH/models/nlp/codegeex/codegeex_for_code_translation.py": 1678345974.707734, "TEMPLATE_PATH/models/nlp/codegeex/tokenizer.py": 1678345974.7089796, "TEMPLATE_PATH/models/nlp/codegeex/codegeex_for_code_generation.py": 1678345974.7071722, "TEMPLATE_PATH/models/nlp/codegeex/inference.py": 1678345974.7083764, "TEMPLATE_PATH/models/nlp/codegeex/codegeex.py": 1678345974.706545, "TEMPLATE_PATH/models/nlp/space/configuration.py": 1678345974.7504852, "TEMPLATE_PATH/models/nlp/space/dialog_modeling.py": 1678345974.7508473, "TEMPLATE_PATH/models/nlp/space/dialog_state_tracking.py": 1666757257.3844292, "TEMPLATE_PATH/models/nlp/space/model/intent_unified_transformer.py": 1666757257.386785, "TEMPLATE_PATH/models/nlp/space/model/tokenization_space.py": 1678345974.7516365, "TEMPLATE_PATH/models/nlp/space/model/unified_transformer.py": 1678345974.7521238, "TEMPLATE_PATH/models/nlp/space/model/model_base.py": 1678345974.7511904, "TEMPLATE_PATH/models/nlp/space/model/generator.py": 1666757257.3862689, "TEMPLATE_PATH/models/nlp/space/model/gen_unified_transformer.py": 1666757257.3857656, "TEMPLATE_PATH/models/nlp/space/dialog_intent_prediction.py": 1666757257.3833244, "TEMPLATE_PATH/models/nlp/space/modules/transformer_block.py": 1666757257.391351, "TEMPLATE_PATH/models/nlp/space/modules/functions.py": 1666757257.3904216, "TEMPLATE_PATH/models/nlp/space/modules/multihead_attention.py": 1666757257.3908985, "TEMPLATE_PATH/models/nlp/space/modules/feedforward.py": 1666757257.3899465, "TEMPLATE_PATH/models/nlp/space/modules/embedder.py": 1666757257.3894768, "TEMPLATE_PATH/models/nlp/fid_plug/configuration.py": 1683889954.50833, "TEMPLATE_PATH/models/nlp/fid_plug/backbone.py": 1683889954.507869, "TEMPLATE_PATH/models/nlp/fid_plug/text_generation.py": 1683889954.5088115, "TEMPLATE_PATH/models/nlp/gpt2/backbone.py": 1678345974.7169101, "TEMPLATE_PATH/models/nlp/plug/distributed_plug.py": 1678345974.7456992, "TEMPLATE_PATH/models/nlp/plug/configuration.py": 1678345974.7445607, "TEMPLATE_PATH/models/nlp/plug/backbone.py": 1678345974.7441673, "TEMPLATE_PATH/models/nlp/plug/AnnealingLR.py": 1678345974.7434573, "TEMPLATE_PATH/models/nlp/plug/generator.py": 1678345974.7459483, "TEMPLATE_PATH/models/nlp/megatron_bert/configuration.py": 1678345974.7317162, "TEMPLATE_PATH/models/nlp/megatron_bert/fill_mask.py": 1678345974.7319267, "TEMPLATE_PATH/models/nlp/megatron_bert/backbone.py": 1678345974.731479, "TEMPLATE_PATH/models/nlp/space_T_cn/configuration.py": 1666757257.3935158, "TEMPLATE_PATH/models/nlp/space_T_cn/backbone.py": 1678345974.752695, "TEMPLATE_PATH/models/nlp/space_T_cn/table_question_answering.py": 1678345974.7536259, "TEMPLATE_PATH/models/nlp/ponet/configuration.py": 1678345974.7491364, "TEMPLATE_PATH/models/nlp/ponet/fill_mask.py": 1678345974.7497096, "TEMPLATE_PATH/models/nlp/ponet/backbone.py": 1678345974.7488022, "TEMPLATE_PATH/models/nlp/ponet/tokenization.py": 1678345974.7501063, "TEMPLATE_PATH/models/nlp/ponet/document_segmentation.py": 1678345974.749312, "TEMPLATE_PATH/models/nlp/llama/configuration.py": 1683889954.5161562, "TEMPLATE_PATH/models/nlp/llama/convert_llama_weights_to_hf.py": 1683889954.5163944, "TEMPLATE_PATH/models/nlp/llama/backbone.py": 1683889954.5156515, "TEMPLATE_PATH/models/nlp/llama/tokenization.py": 1683889954.517054, "TEMPLATE_PATH/models/nlp/llama/tokenization_fast.py": 1683889954.5174031, "TEMPLATE_PATH/models/nlp/llama/text_generation.py": 1683889954.5166035, "TEMPLATE_PATH/models/nlp/lstm/backbone.py": 1678345974.7302816, "TEMPLATE_PATH/models/nlp/lstm/token_classification.py": 1678345974.7304647, "TEMPLATE_PATH/models/cv/image_deblur/nafnet_for_image_deblur.py": 1678345974.289103, "TEMPLATE_PATH/models/cv/vision_middleware/backbone.py": 1678345974.6052146, "TEMPLATE_PATH/models/cv/vision_middleware/model.py": 1678345974.6064956, "TEMPLATE_PATH/models/cv/vision_middleware/head.py": 1678345974.605873, "TEMPLATE_PATH/models/cv/vision_middleware/vim.py": 1678345974.607082, "TEMPLATE_PATH/models/cv/image_quality_assessment_man/swin.py": 1678695526.3478003, "TEMPLATE_PATH/models/cv/image_quality_assessment_man/maniqa.py": 1678695526.3473833, "TEMPLATE_PATH/models/cv/image_quality_assessment_man/image_quality_assessment_man.py": 1678695526.3470078, "TEMPLATE_PATH/models/cv/product_retrieval_embedding/item_detection.py": 1666757257.2308764, "TEMPLATE_PATH/models/cv/product_retrieval_embedding/item_model.py": 1666757257.231389, "TEMPLATE_PATH/models/cv/product_retrieval_embedding/item_embedding.py": 1666757257.2311432, "TEMPLATE_PATH/models/cv/body_2d_keypoints/w48.py": 1666757257.1529067, "TEMPLATE_PATH/models/cv/body_2d_keypoints/hrnet_v2.py": 1684246001.4672918, "TEMPLATE_PATH/models/cv/body_2d_keypoints/hrnet_basic_modules.py": 1666757257.1524448, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/panovit.py": 1678345974.3350315, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/modality/layout.py": 1678345974.3345408, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/misc/panostretch.py": 1678345974.3337135, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/misc/fourier.py": 1678345974.3334966, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/misc/post_proc.py": 1678345974.3339539, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/panovit.py": 1678345974.3347096, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/utils.py": 1678345974.3348787, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/backbone/vit_horizon_pry_image.py": 1678345974.3330128, "TEMPLATE_PATH/models/cv/indoor_layout_estimation/networks/backbone/resnet_DA.py": 1678345974.332792, "TEMPLATE_PATH/models/cv/salient_detection/salient_model.py": 1678345974.3975854, "TEMPLATE_PATH/models/cv/salient_detection/models/senet.py": 1678345974.3712454, "TEMPLATE_PATH/models/cv/salient_detection/models/utils.py": 1678695526.4316845, "TEMPLATE_PATH/models/cv/salient_detection/models/modules.py": 1678345974.3710551, "TEMPLATE_PATH/models/cv/salient_detection/models/u2net.py": 1666757257.241171, "TEMPLATE_PATH/models/cv/salient_detection/models/backbone/Res2Net_v1b.py": 1678695526.4297223, "TEMPLATE_PATH/models/cv/image_quality_assessment_degradation/degradation_model.py": 1678345974.321454, "TEMPLATE_PATH/models/cv/image_quality_assessment_degradation/image_quality_assessment_degradation.py": 1678345974.3216996, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/losses/model_irse.py": 1666757257.2036955, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/losses/losses.py": 1666757257.203465, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/losses/helpers.py": 1666757257.203164, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/retinaface/detection.py": 1673508904.826248, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/retinaface/models/retinaface.py": 1666757257.2049234, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/retinaface/models/net.py": 1666757257.2047052, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/retinaface/utils.py": 1666757257.2051783, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/gpen.py": 1666757257.2019858, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/image_portrait_enhancement.py": 1678345974.3197925, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/align_faces.py": 1666757257.2006574, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/eqface/fqa.py": 1666757257.201287, "TEMPLATE_PATH/models/cv/image_portrait_enhancement/eqface/model_resnet.py": 1666757257.2015626, "TEMPLATE_PATH/models/cv/abnormal_object_detection/mmdet_ms/roi_head/mask_scoring_roi_head.py": 1678695526.2852845, "TEMPLATE_PATH/models/cv/abnormal_object_detection/mmdet_ms/roi_head/roi_extractors/single_level_roi_extractor.py": 1678695526.2864377, "TEMPLATE_PATH/models/cv/abnormal_object_detection/mmdet_model.py": 1678345974.1796575, "TEMPLATE_PATH/models/cv/image_probing_model/backbone.py": 1678345974.3205512, "TEMPLATE_PATH/models/cv/image_probing_model/model.py": 1678345974.320754, "TEMPLATE_PATH/models/cv/image_probing_model/utils.py": 1678345974.320998, "TEMPLATE_PATH/models/cv/tinynas_classfication/super_res_kxkx.py": 1666757257.268841, "TEMPLATE_PATH/models/cv/tinynas_classfication/super_res_k1kxk1.py": 1666757257.2682607, "TEMPLATE_PATH/models/cv/tinynas_classfication/model_zoo.py": 1666757257.265972, "TEMPLATE_PATH/models/cv/tinynas_classfication/super_blocks.py": 1666757257.267099, "TEMPLATE_PATH/models/cv/tinynas_classfication/basic_blocks.py": 1666757257.2643123, "TEMPLATE_PATH/models/cv/tinynas_classfication/master_net.py": 1666757257.2654593, "TEMPLATE_PATH/models/cv/tinynas_classfication/plain_net_utils.py": 1669108798.596394, "TEMPLATE_PATH/models/cv/tinynas_classfication/super_res_idwexkx.py": 1666757257.267718, "TEMPLATE_PATH/models/cv/tinynas_classfication/global_utils.py": 1666757257.264953, "TEMPLATE_PATH/models/cv/image_to_image_translation/model_translation.py": 1666757257.2173638, "TEMPLATE_PATH/models/cv/image_to_image_translation/models/autoencoder.py": 1666757257.2180924, "TEMPLATE_PATH/models/cv/image_to_image_translation/models/clip.py": 1678695526.3520553, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/metrics.py": 1666757257.2199914, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/diffusion.py": 1678695526.3534672, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/apps.py": 1666757257.2189667, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/svd.py": 1666757257.2207708, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/random_mask.py": 1666757257.2205741, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/degradation.py": 1666757257.2193289, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/random_color.py": 1666757257.2203503, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/utils.py": 1666757257.2211437, "TEMPLATE_PATH/models/cv/image_to_image_translation/ops/losses.py": 1666757257.2197845, "TEMPLATE_PATH/models/cv/image_to_image_translation/data/transforms.py": 1666757257.217096, "TEMPLATE_PATH/models/cv/video_human_matting/models/decoder.py": 1678345974.4896257, "TEMPLATE_PATH/models/cv/video_human_matting/models/effv2.py": 1678345974.4909832, "TEMPLATE_PATH/models/cv/video_human_matting/models/lraspp.py": 1678345974.4915452, "TEMPLATE_PATH/models/cv/video_human_matting/models/matting.py": 1678345974.4921389, "TEMPLATE_PATH/models/cv/video_human_matting/models/deep_guided_filter.py": 1678345974.4903216, "TEMPLATE_PATH/models/cv/video_human_matting/model.py": 1678345974.488256, "TEMPLATE_PATH/models/cv/language_guided_video_summarization/transformer/models.py": 1673508904.8344479, "TEMPLATE_PATH/models/cv/language_guided_video_summarization/transformer/modules.py": 1673508904.8346016, "TEMPLATE_PATH/models/cv/language_guided_video_summarization/transformer/sub_layers.py": 1673508904.8347619, "TEMPLATE_PATH/models/cv/language_guided_video_summarization/transformer/layers.py": 1673508904.8342712, "TEMPLATE_PATH/models/cv/language_guided_video_summarization/summarizer.py": 1678345974.3353753, "TEMPLATE_PATH/models/cv/facial_landmark_confidence/flc/facial_landmark_confidence.py": 1678345974.2760296, "TEMPLATE_PATH/models/cv/facial_landmark_confidence/flc/manual_landmark_net.py": 1678345974.276284, "TEMPLATE_PATH/models/cv/image_to_image_generation/models/autoencoder.py": 1666757257.2149377, "TEMPLATE_PATH/models/cv/image_to_image_generation/models/clip.py": 1678695526.3506653, "TEMPLATE_PATH/models/cv/image_to_image_generation/model.py": 1666757257.2143965, "TEMPLATE_PATH/models/cv/image_to_image_generation/ops/diffusion.py": 1666757257.215869, "TEMPLATE_PATH/models/cv/image_to_image_generation/ops/losses.py": 1666757257.2161045, "TEMPLATE_PATH/models/cv/image_to_image_generation/data/transforms.py": 1666757257.2141688, "TEMPLATE_PATH/models/cv/image_body_reshaping/person_info.py": 1666757257.1893692, "TEMPLATE_PATH/models/cv/image_body_reshaping/model.py": 1666757257.1891172, "TEMPLATE_PATH/models/cv/image_body_reshaping/slim_utils.py": 1666757257.1903415, "TEMPLATE_PATH/models/cv/image_body_reshaping/pose_estimator/body.py": 1666757257.1897807, "TEMPLATE_PATH/models/cv/image_body_reshaping/pose_estimator/util.py": 1666757257.1901324, "TEMPLATE_PATH/models/cv/image_body_reshaping/pose_estimator/model.py": 1666757257.1899562, "TEMPLATE_PATH/models/cv/image_body_reshaping/image_body_reshaping.py": 1666757257.188921, "TEMPLATE_PATH/models/cv/image_human_parsing/m2fp_net.py": 1678345974.3071952, "TEMPLATE_PATH/models/cv/image_human_parsing/m2fp/m2fp_decoder.py": 1678345974.3068166, "TEMPLATE_PATH/models/cv/image_human_parsing/m2fp/m2fp_encoder.py": 1678345974.3069928, "TEMPLATE_PATH/models/cv/image_human_parsing/parsing_utils.py": 1678345974.307423, "TEMPLATE_PATH/models/cv/image_human_parsing/backbone/deeplab_resnet.py": 1678345974.3061016, "TEMPLATE_PATH/models/cv/image_skychange/ptsemseg/hrnet_super_and_ocr.py": 1684246001.4751763, "TEMPLATE_PATH/models/cv/image_skychange/ptsemseg/BlockModules.py": 1678345974.3284485, "TEMPLATE_PATH/models/cv/image_skychange/ptsemseg/unet.py": 1678345974.3294759, "TEMPLATE_PATH/models/cv/image_skychange/ptsemseg/hrnet_backnone.py": 1684246001.4747965, "TEMPLATE_PATH/models/cv/image_skychange/skychange.py": 1678345974.330122, "TEMPLATE_PATH/models/cv/image_skychange/preprocessor.py": 1678345974.3279777, "TEMPLATE_PATH/models/cv/image_skychange/skychange_model.py": 1678345974.3304164, "TEMPLATE_PATH/models/cv/video_object_segmentation/aggregate.py": 1678345974.5072932, "TEMPLATE_PATH/models/cv/video_object_segmentation/inference_memory_bank.py": 1678345974.5102427, "TEMPLATE_PATH/models/cv/video_object_segmentation/inference_core.py": 1678345974.5094788, "TEMPLATE_PATH/models/cv/video_object_segmentation/model.py": 1678345974.51162, "TEMPLATE_PATH/models/cv/video_object_segmentation/eval_network.py": 1678345974.5086596, "TEMPLATE_PATH/models/cv/video_object_segmentation/mod_resnet.py": 1678345974.5108964, "TEMPLATE_PATH/models/cv/video_object_segmentation/network.py": 1678345974.5134938, "TEMPLATE_PATH/models/cv/video_object_segmentation/modules.py": 1678345974.5123272, "TEMPLATE_PATH/models/cv/video_object_segmentation/cbam.py": 1678345974.5079415, "TEMPLATE_PATH/models/cv/face_reconstruction/models/nv_diffrast.py": 1681714768.8716514, "TEMPLATE_PATH/models/cv/face_reconstruction/models/renderer.py": 1681714768.8736632, "TEMPLATE_PATH/models/cv/face_reconstruction/models/unet.py": 1681714768.873916, "TEMPLATE_PATH/models/cv/face_reconstruction/models/bfm.py": 1681714768.8695195, "TEMPLATE_PATH/models/cv/face_reconstruction/models/opt.py": 1681714768.8720403, "TEMPLATE_PATH/models/cv/face_reconstruction/models/networks.py": 1678345974.2734904, "TEMPLATE_PATH/models/cv/face_reconstruction/models/de_retouching_module.py": 1681714768.8699348, "TEMPLATE_PATH/models/cv/face_reconstruction/models/losses.py": 1681714768.8712077, "TEMPLATE_PATH/models/cv/face_reconstruction/models/pix2pix/pix2pix_options.py": 1681714768.8733847, "TEMPLATE_PATH/models/cv/face_reconstruction/models/pix2pix/pix2pix_model.py": 1681714768.873153, "TEMPLATE_PATH/models/cv/face_reconstruction/models/pix2pix/networks.py": 1681714768.8728101, "TEMPLATE_PATH/models/cv/face_reconstruction/models/facelandmark/nets/large_eyeball_net.py": 1678345974.2724826, "TEMPLATE_PATH/models/cv/face_reconstruction/models/facelandmark/nets/large_base_lmks_net.py": 1678345974.2721982, "TEMPLATE_PATH/models/cv/face_reconstruction/models/facelandmark/large_base_lmks_infer.py": 1678345974.2711725, "TEMPLATE_PATH/models/cv/face_reconstruction/models/facerecon_model.py": 1681714768.870774, "TEMPLATE_PATH/models/cv/face_reconstruction/utils.py": 1681714768.8743782, "TEMPLATE_PATH/models/cv/facial_expression_recognition/fer/transforms.py": 1666757257.186491, "TEMPLATE_PATH/models/cv/facial_expression_recognition/fer/vgg.py": 1666757257.1866848, "TEMPLATE_PATH/models/cv/facial_expression_recognition/fer/facial_expression_recognition.py": 1673508904.8041663, "TEMPLATE_PATH/models/cv/face_recognition/align_face.py": 1678695526.3292472, "TEMPLATE_PATH/models/cv/face_recognition/torchkit/rts_backbone.py": 1678345974.2696226, "TEMPLATE_PATH/models/cv/face_recognition/torchkit/backbone/facemask_backbone.py": 1678345974.2694073, "TEMPLATE_PATH/models/cv/face_recognition/torchkit/backbone/model_irse.py": 1666757257.1852279, "TEMPLATE_PATH/models/cv/face_recognition/torchkit/backbone/model_resnet.py": 1666757257.1854684, "TEMPLATE_PATH/models/cv/face_recognition/torchkit/backbone/common.py": 1666757257.1850357, "TEMPLATE_PATH/models/cv/face_recognition/torchkit/backbone/arcface_backbone.py": 1678345974.269119, "TEMPLATE_PATH/models/cv/face_generation/stylegan2.py": 1666757257.1819198, "TEMPLATE_PATH/models/cv/face_generation/op/fused_act.py": 1666757257.181432, "TEMPLATE_PATH/models/cv/face_generation/op/upfirdn2d.py": 1666757257.18165, "TEMPLATE_PATH/models/cv/face_generation/op/conv2d_gradfix.py": 1666757257.1812036, "TEMPLATE_PATH/models/cv/shop_segmentation/head_fpn.py": 1666757257.242077, "TEMPLATE_PATH/models/cv/shop_segmentation/models.py": 1666757257.2425845, "TEMPLATE_PATH/models/cv/shop_segmentation/common.py": 1666757257.241814, "TEMPLATE_PATH/models/cv/shop_segmentation/utils.py": 1666757257.2446902, "TEMPLATE_PATH/models/cv/shop_segmentation/shop_seg_base.py": 1666757257.243692, "TEMPLATE_PATH/models/cv/shop_segmentation/neck_fpn.py": 1666757257.2431688, "TEMPLATE_PATH/models/cv/shop_segmentation/shop_seg_model.py": 1666757257.2441843, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino/ms_deform_attn.py": 1678345974.3103385, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino/position_encoding.py": 1678345974.310542, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino/dino_decoder.py": 1678345974.3097205, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino/maskdino_encoder.py": 1678345974.3101413, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino/utils.py": 1678345974.3107386, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino/maskdino_decoder.py": 1678345974.3099248, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino_swin.py": 1678345974.3111138, "TEMPLATE_PATH/models/cv/image_instance_segmentation/datasets/transforms.py": 1666757257.1984863, "TEMPLATE_PATH/models/cv/image_instance_segmentation/fastinst/fastinst_encoder.py": 1684246001.4722662, "TEMPLATE_PATH/models/cv/image_instance_segmentation/fastinst/fastinst_decoder.py": 1684246001.4720163, "TEMPLATE_PATH/models/cv/image_instance_segmentation/cascade_mask_rcnn_swin.py": 1678345974.3089857, "TEMPLATE_PATH/models/cv/image_instance_segmentation/fastinst_model.py": 1684246001.472576, "TEMPLATE_PATH/models/cv/image_instance_segmentation/model.py": 1666757257.198652, "TEMPLATE_PATH/models/cv/image_instance_segmentation/postprocess_utils.py": 1684246001.4729404, "TEMPLATE_PATH/models/cv/image_instance_segmentation/backbones/resnet.py": 1684246001.4712603, "TEMPLATE_PATH/models/cv/image_instance_segmentation/backbones/swin_transformer.py": 1678345974.3086588, "TEMPLATE_PATH/models/cv/image_instance_segmentation/maskdino_model.py": 1678345974.3109038, "TEMPLATE_PATH/models/cv/action_detection/modules/resnet.py": 1678695526.2903874, "TEMPLATE_PATH/models/cv/action_detection/modules/action_detection_pytorch.py": 1678695526.288069, "TEMPLATE_PATH/models/cv/action_detection/action_detection_onnx.py": 1678345974.1868067, "TEMPLATE_PATH/models/cv/vop_retrieval/backbone.py": 1678695526.4892921, "TEMPLATE_PATH/models/cv/vop_retrieval/basic_utils.py": 1678345974.6089652, "TEMPLATE_PATH/models/cv/vop_retrieval/model.py": 1678345974.6095595, "TEMPLATE_PATH/models/cv/vop_retrieval/tokenization_clip.py": 1678695526.4906054, "TEMPLATE_PATH/models/cv/vop_retrieval/model_se.py": 1678695526.489979, "TEMPLATE_PATH/models/cv/video_instance_segmentation/track/kernel_update_head.py": 1681714768.8891828, "TEMPLATE_PATH/models/cv/video_instance_segmentation/track/mask_hungarian_assigner.py": 1681714768.8895793, "TEMPLATE_PATH/models/cv/video_instance_segmentation/video_knet.py": 1681714768.8901427, "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_updator.py": 1681714768.8875823, "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_update_head.py": 1681714768.887322, "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_frame_iter_head.py": 1681714768.8861332, "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_head.py": 1681714768.8865519, "TEMPLATE_PATH/models/cv/video_instance_segmentation/head/kernel_iter_head.py": 1681714768.88694, "TEMPLATE_PATH/models/cv/video_instance_segmentation/utils.py": 1681714768.8898368, "TEMPLATE_PATH/models/cv/video_instance_segmentation/neck/msdeformattn_decoder.py": 1681714768.888164, "TEMPLATE_PATH/models/cv/super_resolution/ecb.py": 1678345974.436123, "TEMPLATE_PATH/models/cv/super_resolution/ecbsr_model.py": 1678345974.4364467, "TEMPLATE_PATH/models/cv/super_resolution/rrdbnet_arch.py": 1666757257.2570488, "TEMPLATE_PATH/models/cv/super_resolution/arch_util.py": 1666757257.2563787, "TEMPLATE_PATH/models/cv/ocr_detection/preprocessor.py": 1684246001.5023808, "TEMPLATE_PATH/models/cv/ocr_detection/model.py": 1678695526.3969364, "TEMPLATE_PATH/models/cv/ocr_detection/utils.py": 1678695526.3993652, "TEMPLATE_PATH/models/cv/ocr_detection/modules/dbnet.py": 1678695526.3981876, "TEMPLATE_PATH/models/cv/ocr_detection/modules/seg_detector_loss.py": 1678695526.3986294, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/networks/util.py": 1678345974.3670025, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/networks/mobilenet.py": 1678345974.3609436, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/networks/equi.py": 1678345974.360484, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/networks/resnet.py": 1678345974.3611743, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/networks/unifuse.py": 1678345974.3667643, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/networks/layers.py": 1684246001.5053837, "TEMPLATE_PATH/models/cv/panorama_depth_estimation/unifuse_model.py": 1678345974.3672006, "TEMPLATE_PATH/models/cv/stream_yolo/utils/format.py": 1678345974.4351218, "TEMPLATE_PATH/models/cv/stream_yolo/utils/boxes.py": 1678345974.4347887, "TEMPLATE_PATH/models/cv/stream_yolo/models/tal_head.py": 1678345974.4333599, "TEMPLATE_PATH/models/cv/stream_yolo/models/dfp_pafpn.py": 1678345974.4322102, "TEMPLATE_PATH/models/cv/stream_yolo/models/streamyolo.py": 1678345974.4329953, "TEMPLATE_PATH/models/cv/stream_yolo/models/network_blocks.py": 1678345974.4326873, "TEMPLATE_PATH/models/cv/stream_yolo/models/darknet.py": 1678345974.4318306, "TEMPLATE_PATH/models/cv/stream_yolo/realtime_video_detector.py": 1678345974.433779, "TEMPLATE_PATH/models/cv/stream_yolo/exp/build.py": 1678345974.4007049, "TEMPLATE_PATH/models/cv/stream_yolo/exp/base_exp.py": 1678345974.4003, "TEMPLATE_PATH/models/cv/stream_yolo/exp/default/streamyolo.py": 1678345974.4303985, "TEMPLATE_PATH/models/cv/stream_yolo/exp/yolox_base.py": 1678345974.4308836, "TEMPLATE_PATH/models/cv/stream_yolo/data/data_augment.py": 1678345974.3993874, "TEMPLATE_PATH/models/cv/virual_tryon/sdafnet.py": 1666757257.299963, "TEMPLATE_PATH/models/cv/bad_image_detecting/bad_image_detecting.py": 1678695526.2924836, "TEMPLATE_PATH/models/cv/human_reconstruction/Reconstruction.py": 1681714768.874672, "TEMPLATE_PATH/models/cv/human_reconstruction/models/Surface_head.py": 1681714768.8764422, "TEMPLATE_PATH/models/cv/human_reconstruction/models/Res_backbone.py": 1681714768.87622, "TEMPLATE_PATH/models/cv/human_reconstruction/models/Embedding.py": 1681714768.8756416, "TEMPLATE_PATH/models/cv/human_reconstruction/models/PixToMesh.py": 1681714768.875951, "TEMPLATE_PATH/models/cv/human_reconstruction/models/networks.py": 1681714768.877559, "TEMPLATE_PATH/models/cv/human_reconstruction/models/human_segmenter.py": 1684246001.4695294, "TEMPLATE_PATH/models/cv/human_reconstruction/models/geometry.py": 1681714768.8770833, "TEMPLATE_PATH/models/cv/human_reconstruction/models/detectors.py": 1681714768.876841, "TEMPLATE_PATH/models/cv/human_reconstruction/utils.py": 1684246001.4699862, "TEMPLATE_PATH/models/cv/image_driving_perception/preprocessor.py": 1678695526.3451977, "TEMPLATE_PATH/models/cv/image_driving_perception/utils.py": 1678695526.3456447, "TEMPLATE_PATH/models/cv/image_driving_perception/image_driving_percetion_model.py": 1678695526.3447573, "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/longshortnet.py": 1678695526.4507868, "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/models/longshort_backbone_neck.py": 1678695526.4542763, "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/models/longshort.py": 1678695526.4533079, "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/models/dfp_pafpn_short.py": 1678695526.45279, "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/models/dfp_pafpn_long.py": 1678695526.452267, "TEMPLATE_PATH/models/cv/video_streaming_perception/longshortnet/exp/longshortnet_base.py": 1678695526.4501612, "TEMPLATE_PATH/models/cv/image_paintbyexample/model.py": 1678345974.3189397, "TEMPLATE_PATH/models/cv/image_inpainting/refinement.py": 1666757257.1970024, "TEMPLATE_PATH/models/cv/image_inpainting/model.py": 1666757257.1947935, "TEMPLATE_PATH/models/cv/image_inpainting/default.py": 1666757257.1946204, "TEMPLATE_PATH/models/cv/image_inpainting/modules/ade20k/resnet.py": 1666757257.1956348, "TEMPLATE_PATH/models/cv/image_inpainting/modules/ade20k/base.py": 1666757257.1954472, "TEMPLATE_PATH/models/cv/image_inpainting/modules/adversarial.py": 1666757257.1958177, "TEMPLATE_PATH/models/cv/image_inpainting/modules/perceptual.py": 1666757257.196634, "TEMPLATE_PATH/models/cv/image_inpainting/modules/inception.py": 1666757257.1964645, "TEMPLATE_PATH/models/cv/image_inpainting/modules/ffc.py": 1666757257.1962402, "TEMPLATE_PATH/models/cv/image_inpainting/modules/pix2pixhd.py": 1666757257.1967993, "TEMPLATE_PATH/models/cv/image_inpainting/modules/feature_matching.py": 1666757257.196007, "TEMPLATE_PATH/models/cv/image_inpainting/base.py": 1666757257.1944175, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py": 1678695526.3792994, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/util.py": 1678695526.381083, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/match_costs/match_cost.py": 1678695526.3804727, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py": 1678695526.37819, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/nuscenes_dataset.py": 1678695526.3820806, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/loading.py": 1678695526.383117, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/transform_3d.py": 1678695526.3837686, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/petrv2_dednhead.py": 1678695526.3906348, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/depth_net.py": 1678695526.3890011, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/necks/cp_fpn.py": 1678695526.3925595, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py": 1678695526.393985, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/positional_encoding.py": 1678695526.3945107, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/backbones/vovnet.py": 1678695526.3856297, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/detectors/petr3d.py": 1678695526.3916428, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/result_vis.py": 1684246001.4759786, "TEMPLATE_PATH/models/cv/object_detection_3d/depe/depe_detect.py": 1678345974.347357, "TEMPLATE_PATH/models/cv/image_quality_assessment_mos/image_quality_assessment_mos.py": 1678345974.3244548, "TEMPLATE_PATH/models/cv/image_quality_assessment_mos/heads/simple_head.py": 1678345974.3242753, "TEMPLATE_PATH/models/cv/image_quality_assessment_mos/backbones/resnet.py": 1678345974.3235202, "TEMPLATE_PATH/models/cv/image_quality_assessment_mos/censeo_ivqa_model.py": 1678345974.3237197, "TEMPLATE_PATH/models/cv/image_debanding/rrdb/rrdb_image_debanding.py": 1678345974.2884033, "TEMPLATE_PATH/models/cv/image_restoration/demoire_models/nets.py": 1678345974.3251178, "TEMPLATE_PATH/models/cv/image_restoration/image_restoration_model.py": 1678345974.325296, "TEMPLATE_PATH/models/cv/cartoon/model_tf.py": 1678695526.2941835, "TEMPLATE_PATH/models/cv/cartoon/facelib/facer.py": 1683889954.472153, "TEMPLATE_PATH/models/cv/cartoon/facelib/config.py": 1666757257.1560297, "TEMPLATE_PATH/models/cv/cartoon/facelib/LK/lk.py": 1666757257.1556334, "TEMPLATE_PATH/models/cv/cartoon/facelib/face_detector.py": 1666757257.1562476, "TEMPLATE_PATH/models/cv/cartoon/facelib/face_landmark.py": 1684246001.4677038, "TEMPLATE_PATH/models/cv/cartoon/loss.py": 1678695526.2937913, "TEMPLATE_PATH/models/cv/cartoon/utils.py": 1678695526.295007, "TEMPLATE_PATH/models/cv/cartoon/mtcnn_pytorch/src/align_trans.py": 1666757257.1581075, "TEMPLATE_PATH/models/cv/cartoon/mtcnn_pytorch/src/matlab_cp2tform.py": 1666757257.158323, "TEMPLATE_PATH/models/cv/cartoon/network.py": 1678695526.2945373, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/vision_efficient_tuning.py": 1678695526.463801, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/timm_vision_transformer.py": 1678345974.6026883, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/backbone.py": 1678695526.4603705, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/timm_weight_init.py": 1678345974.6033437, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/timm_helpers.py": 1678345974.601856, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/petl.py": 1678695526.4632218, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/model.py": 1678695526.4610043, "TEMPLATE_PATH/models/cv/vision_efficient_tuning/head.py": 1678345974.5989482, "TEMPLATE_PATH/models/cv/movie_scene_segmentation/utils/save_op.py": 1684121077.52684, "TEMPLATE_PATH/models/cv/movie_scene_segmentation/utils/shot_encoder.py": 1666757257.2231948, "TEMPLATE_PATH/models/cv/movie_scene_segmentation/utils/trn.py": 1666757257.2234836, "TEMPLATE_PATH/models/cv/movie_scene_segmentation/utils/head.py": 1678695526.3568585, "TEMPLATE_PATH/models/cv/movie_scene_segmentation/model.py": 1684121077.525873, "TEMPLATE_PATH/models/cv/movie_scene_segmentation/get_model.py": 1666757257.2217636, "TEMPLATE_PATH/models/cv/video_summarization/pgl_sum.py": 1666757257.297918, "TEMPLATE_PATH/models/cv/video_summarization/base_model.py": 1666757257.2957783, "TEMPLATE_PATH/models/cv/video_summarization/summarizer.py": 1678345974.5919068, "TEMPLATE_PATH/models/cv/video_summarization/kts/cpd_auto.py": 1666757257.2969224, "TEMPLATE_PATH/models/cv/video_summarization/kts/cpd_nonlin.py": 1666757257.2974133, "TEMPLATE_PATH/models/cv/table_recognition/lineless_table_process.py": 1678695526.4324372, "TEMPLATE_PATH/models/cv/table_recognition/model_lore.py": 1678695526.4331207, "TEMPLATE_PATH/models/cv/table_recognition/modules/lore_processor.py": 1678695526.434709, "TEMPLATE_PATH/models/cv/table_recognition/modules/lore_detector.py": 1678695526.4340818, "TEMPLATE_PATH/models/cv/image_matching/quadtree_attention_model.py": 1678345974.3155432, "TEMPLATE_PATH/models/cv/image_matching/config/default.py": 1678345974.3125448, "TEMPLATE_PATH/models/cv/image_matching/utils/misc.py": 1678345974.315888, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/loftr.py": 1678345974.3134868, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/utils/position_encoding.py": 1678345974.3152256, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/utils/coarse_matching.py": 1678345974.3148923, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/utils/fine_matching.py": 1678345974.3150685, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/loftr_module/quadtree_attention.py": 1678345974.3143134, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/loftr_module/fine_preprocess.py": 1678345974.3139389, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/loftr_module/transformer.py": 1678345974.3145041, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/loftr_module/linear_attention.py": 1678345974.3141232, "TEMPLATE_PATH/models/cv/image_matching/loftr_quadtree/backbone/resnet_fpn.py": 1678345974.3133032, "TEMPLATE_PATH/models/cv/tinynas_detection/detector.py": 1678695526.4374578, "TEMPLATE_PATH/models/cv/tinynas_detection/tinynas_detector.py": 1678345974.460455, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/apis/detector_evaluater.py": 1681714768.8838654, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/apis/detector_inference.py": 1681714768.8841915, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/structures/boxlist_ops.py": 1678345974.4569457, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/structures/bounding_box.py": 1678345974.4566479, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/structures/image_list.py": 1678345974.4573236, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/utils/model_utils.py": 1678345974.4585514, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/utils/boxes.py": 1678345974.4581728, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/utils/scheduler.py": 1678345974.4589145, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/augmentations/box_level_augs/box_level_augs.py": 1678345974.4441965, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/augmentations/box_level_augs/gaussian_maps.py": 1678345974.4447448, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/augmentations/box_level_augs/color_augs.py": 1678345974.4444985, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/augmentations/box_level_augs/geometric_augs.py": 1683889954.4839153, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/augmentations/scale_aware_aug.py": 1678345974.4453552, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/detectors/detector.py": 1678345974.4558744, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/losses/distill_loss.py": 1678345974.4532282, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/losses/gfocal_loss.py": 1678345974.4535718, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/ota_assigner.py": 1678345974.4496946, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/base_ops.py": 1678345974.4481623, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/repvgg_block.py": 1678345974.4501693, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/neck_ops.py": 1678345974.4485013, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/ops.py": 1678345974.4491763, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/utils.py": 1678345974.450523, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/core/weight_init.py": 1678345974.4508731, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/necks/giraffe_config.py": 1678345974.4543374, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/necks/giraffe_fpn.py": 1678345974.4547024, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/necks/giraffe_fpn_btn.py": 1678345974.4552062, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/heads/gfocal_v2_tiny.py": 1678345974.4517708, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/heads/zero_head.py": 1678345974.45238, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/backbones/tinynas_csp.py": 1678345974.4469912, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/backbones/tinynas_res.py": 1678345974.447312, "TEMPLATE_PATH/models/cv/tinynas_detection/damo/base_models/backbones/darknet.py": 1678345974.4466953, "TEMPLATE_PATH/models/cv/tinynas_detection/utils.py": 1678345974.4609265, "TEMPLATE_PATH/models/cv/tinynas_detection/tinynas_damoyolo.py": 1678345974.4599845, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/utils/visualization.py": 1678345974.5058522, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/utils/utils.py": 1678345974.5049293, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/utils/kalman_filter.py": 1678345974.5041819, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/utils/image.py": 1678345974.503496, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/tracker/matching.py": 1684246001.5073156, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/tracker/multitracker.py": 1684246001.507944, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/tracker/basetrack.py": 1678345974.501055, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/models/model.py": 1678345974.4990714, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/models/common.py": 1678345974.497754, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/models/yolo.py": 1678345974.4996593, "TEMPLATE_PATH/models/cv/video_multi_object_tracking/models/decode.py": 1678345974.4984286, "TEMPLATE_PATH/models/cv/nerf_recon_acc/nerf_recon_acc.py": 1678695526.3603125, "TEMPLATE_PATH/models/cv/nerf_recon_acc/network/nerf.py": 1678345974.340633, "TEMPLATE_PATH/models/cv/nerf_recon_acc/network/utils.py": 1678345974.340971, "TEMPLATE_PATH/models/cv/nerf_recon_acc/network/segmenter.py": 1684246001.4756348, "TEMPLATE_PATH/models/cv/nerf_recon_acc/nerf_preprocess.py": 1678695526.359052, "TEMPLATE_PATH/models/cv/nerf_recon_acc/dataloader/nerf_dataset.py": 1678345974.3388634, "TEMPLATE_PATH/models/cv/nerf_recon_acc/dataloader/read_write_model.py": 1678345974.3391125, "TEMPLATE_PATH/models/cv/video_deinterlace/UNet_for_video_deinterlace.py": 1678345974.461295, "TEMPLATE_PATH/models/cv/video_deinterlace/deinterlace_arch.py": 1678345974.4619946, "TEMPLATE_PATH/models/cv/video_deinterlace/models/deep_fourier_upsampling.py": 1678345974.4627986, "TEMPLATE_PATH/models/cv/video_deinterlace/models/fre.py": 1678345974.4634838, "TEMPLATE_PATH/models/cv/video_deinterlace/models/utils.py": 1678345974.464179, "TEMPLATE_PATH/models/cv/video_deinterlace/models/archs.py": 1678345974.4625406, "TEMPLATE_PATH/models/cv/video_deinterlace/models/enh.py": 1678345974.4631467, "TEMPLATE_PATH/models/cv/cmdssl_video_embedding/resnet3d.py": 1666757257.1593952, "TEMPLATE_PATH/models/cv/cmdssl_video_embedding/resnet2p1d.py": 1666757257.1591942, "TEMPLATE_PATH/models/cv/cmdssl_video_embedding/c3d.py": 1666757257.1590006, "TEMPLATE_PATH/models/cv/image_depth_estimation_bts/depth_estimation_bts_model.py": 1678695526.3416724, "TEMPLATE_PATH/models/cv/image_depth_estimation_bts/networks/decoder.py": 1678695526.3428533, "TEMPLATE_PATH/models/cv/image_depth_estimation_bts/networks/bts_model.py": 1678695526.3423235, "TEMPLATE_PATH/models/cv/image_depth_estimation_bts/networks/encoder.py": 1678695526.3433921, "TEMPLATE_PATH/models/cv/image_depth_estimation_bts/networks/utils.py": 1678695526.343739, "TEMPLATE_PATH/models/cv/motion_generation/model.py": 1678345974.3359873, "TEMPLATE_PATH/models/cv/motion_generation/modules/rotation2xyz.py": 1678345974.3373065, "TEMPLATE_PATH/models/cv/motion_generation/modules/respace.py": 1678345974.3370926, "TEMPLATE_PATH/models/cv/motion_generation/modules/smpl.py": 1678345974.3374798, "TEMPLATE_PATH/models/cv/motion_generation/modules/mdm.py": 1678345974.33691, "TEMPLATE_PATH/models/cv/motion_generation/modules/gaussian_diffusion.py": 1678345974.3366945, "TEMPLATE_PATH/models/cv/motion_generation/modules/cfg_sampler.py": 1678345974.3364377, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/utils/requirements_check.py": 1678345974.2937963, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/utils/voc_register.py": 1684246001.4703872, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/utils/configuration_mapper.py": 1678345974.2932599, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/utils/model_surgery_op.py": 1678345974.293452, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/utils/coco_register.py": 1678345974.2930408, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/utils/register_data.py": 1678345974.293619, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/models/fast_rcnn.py": 1678345974.2918143, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/models/defrcn.py": 1678345974.2915351, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/models/resnet.py": 1678345974.2922988, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/models/calibration_layer.py": 1678345974.2913256, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/models/gdl.py": 1678345974.2920313, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/models/roi_heads.py": 1678345974.2925265, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/defrcn_for_fewshot.py": 1678345974.289662, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/evaluation/coco_evaluation.py": 1678345974.2904465, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/evaluation/pascal_voc_evaluation.py": 1678345974.2908285, "TEMPLATE_PATH/models/cv/image_defrcn_fewshot/evaluation/evaluator.py": 1678345974.2906368, "TEMPLATE_PATH/models/cv/ocr_recognition/preprocessor.py": 1681714768.8827155, "TEMPLATE_PATH/models/cv/ocr_recognition/model.py": 1684246001.5035024, "TEMPLATE_PATH/models/cv/ocr_recognition/modules/convnextvit.py": 1681714768.881167, "TEMPLATE_PATH/models/cv/ocr_recognition/modules/crnn.py": 1681714768.8814888, "TEMPLATE_PATH/models/cv/ocr_recognition/modules/vitstr.py": 1681714768.8823054, "TEMPLATE_PATH/models/cv/ocr_recognition/modules/timm_tinyc.py": 1678345974.3579545, "TEMPLATE_PATH/models/cv/ocr_recognition/modules/convnext.py": 1678345974.3574538, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/track/quasi_dense_embed_tracker.py": 1678345974.5580919, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/mask.py": 1678345974.5271971, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_updator.py": 1678345974.5264003, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_update_head.py": 1678345974.5256743, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/semantic_fpn_wrapper.py": 1681714768.8905349, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_head.py": 1678345974.519709, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/kernel_iter_head.py": 1678345974.5233328, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/head/track_heads.py": 1678345974.5286357, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/neck/fpn.py": 1678345974.5311077, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/video_k_net.py": 1678345974.5597517, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/backbone/swin_checkpoint.py": 1678345974.516286, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/backbone/swin_transformer.py": 1678345974.517262, "TEMPLATE_PATH/models/cv/video_panoptic_segmentation/visualizer.py": 1678345974.5604084, "TEMPLATE_PATH/models/cv/open_vocabulary_detection_vild/vild.py": 1684246001.5045104, "TEMPLATE_PATH/models/cv/image_reid_person/pass_model.py": 1666757257.2059593, "TEMPLATE_PATH/models/cv/image_reid_person/transreid_model.py": 1666757257.2062182, "TEMPLATE_PATH/models/cv/image_face_fusion/facelib/align_trans.py": 1678345974.3027532, "TEMPLATE_PATH/models/cv/image_face_fusion/facelib/matlab_cp2tform.py": 1678345974.3029947, "TEMPLATE_PATH/models/cv/image_face_fusion/network/aad_layer.py": 1678345974.3037808, "TEMPLATE_PATH/models/cv/image_face_fusion/network/dense_motion.py": 1678345974.3045554, "TEMPLATE_PATH/models/cv/image_face_fusion/network/model_irse.py": 1678345974.3051307, "TEMPLATE_PATH/models/cv/image_face_fusion/network/bfm.py": 1678345974.3042998, "TEMPLATE_PATH/models/cv/image_face_fusion/network/ops.py": 1678345974.3053207, "TEMPLATE_PATH/models/cv/image_face_fusion/network/aei_flow_net.py": 1678345974.3040216, "TEMPLATE_PATH/models/cv/image_face_fusion/network/facerecon_model.py": 1678345974.3048775, "TEMPLATE_PATH/models/cv/image_face_fusion/image_face_fusion.py": 1678345974.3033106, "TEMPLATE_PATH/models/cv/image_face_fusion/facegan/gan_wrap.py": 1678345974.3008904, "TEMPLATE_PATH/models/cv/image_face_fusion/facegan/op/fused_act.py": 1678345974.3021884, "TEMPLATE_PATH/models/cv/image_face_fusion/facegan/op/upfirdn2d.py": 1678345974.3023663, "TEMPLATE_PATH/models/cv/image_face_fusion/facegan/op/conv2d_gradfix.py": 1678345974.3019848, "TEMPLATE_PATH/models/cv/image_face_fusion/facegan/model.py": 1678345974.3014028, "TEMPLATE_PATH/models/cv/product_segmentation/net.py": 1678695526.4043183, "TEMPLATE_PATH/models/cv/product_segmentation/seg_infer.py": 1666778289.670906, "TEMPLATE_PATH/models/cv/controllable_image_generation/controlnet.py": 1678695526.3069751, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/openpose/body.py": 1678695526.3047397, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/openpose/util.py": 1678695526.3063028, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/openpose/model.py": 1678695526.3059084, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/openpose/hand.py": 1678695526.305337, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/annotator.py": 1678695526.296671, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/api.py": 1678695526.2974072, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/midas_net_custom.py": 1678695526.299506, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/transforms.py": 1678695526.2998872, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/midas_net.py": 1678695526.2992017, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/dpt_depth.py": 1678695526.298864, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/base_model.py": 1678695526.2981143, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/vit.py": 1678695526.300227, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/midas/blocks.py": 1678695526.298546, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/midas/utils.py": 1678695526.3005583, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/mlsd/utils.py": 1678695526.3033025, "TEMPLATE_PATH/models/cv/controllable_image_generation/annotator/mlsd/mbv2_mlsd_large.py": 1678695526.3022327, "TEMPLATE_PATH/models/cv/video_inpainting/inpainting.py": 1678695526.438486, "TEMPLATE_PATH/models/cv/video_inpainting/inpainting_model.py": 1678695526.438962, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/casmvs_model.py": 1678345974.3164253, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/colmap2mvsnet.py": 1684246001.4733398, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/cas_mvsnet.py": 1678345974.3162477, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/utils.py": 1678345974.317991, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/depth_filter.py": 1684246001.4736886, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/module.py": 1678345974.31774, "TEMPLATE_PATH/models/cv/image_mvs_depth_estimation/general_eval_dataset.py": 1678345974.3174586, "TEMPLATE_PATH/models/cv/image_binary_quant_classification/binary_quant_model.py": 1678345974.2778409, "TEMPLATE_PATH/models/cv/image_binary_quant_classification/bnext.py": 1678345974.2784865, "TEMPLATE_PATH/models/cv/skin_retouching/detection_model/detection_unet_in.py": 1666757257.24693, "TEMPLATE_PATH/models/cv/skin_retouching/detection_model/detection_module.py": 1666757257.2464738, "TEMPLATE_PATH/models/cv/skin_retouching/retinaface/net.py": 1666757257.2504349, "TEMPLATE_PATH/models/cv/skin_retouching/retinaface/prior_box.py": 1666757257.2523744, "TEMPLATE_PATH/models/cv/skin_retouching/retinaface/box_utils.py": 1666757257.249882, "TEMPLATE_PATH/models/cv/skin_retouching/retinaface/utils.py": 1666757257.2532027, "TEMPLATE_PATH/models/cv/skin_retouching/retinaface/network.py": 1666757257.2511058, "TEMPLATE_PATH/models/cv/skin_retouching/retinaface/predict_single.py": 1666757257.251693, "TEMPLATE_PATH/models/cv/skin_retouching/unet_deploy.py": 1666757257.2537475, "TEMPLATE_PATH/models/cv/skin_retouching/weights_init.py": 1666757257.2549121, "TEMPLATE_PATH/models/cv/skin_retouching/utils.py": 1666757257.2543528, "TEMPLATE_PATH/models/cv/skin_retouching/inpainting_model/gconv.py": 1666757257.2480178, "TEMPLATE_PATH/models/cv/skin_retouching/inpainting_model/inpainting_unet.py": 1666757257.248478, "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/directed_graph.py": 1683889954.471591, "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/hdformer_detector.py": 1678345974.191, "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/skeleton.py": 1678345974.1912234, "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/backbone.py": 1678345974.190157, "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/hdformer.py": 1678345974.19077, "TEMPLATE_PATH/models/cv/body_3d_keypoints/hdformer/block.py": 1678345974.1903841, "TEMPLATE_PATH/models/cv/body_3d_keypoints/cannonical_pose/canonical_pose_modules.py": 1678345974.189541, "TEMPLATE_PATH/models/cv/body_3d_keypoints/cannonical_pose/body_3d_pose.py": 1683889954.4713006, "TEMPLATE_PATH/models/cv/action_recognition/models.py": 1666757257.1497922, "TEMPLATE_PATH/models/cv/action_recognition/s3dg.py": 1666757257.1501145, "TEMPLATE_PATH/models/cv/action_recognition/tada_convnext.py": 1666757257.1504557, "TEMPLATE_PATH/models/cv/action_recognition/temporal_patch_shift_transformer.py": 1683889954.4706383, "TEMPLATE_PATH/models/cv/video_frame_interpolation/interp_model/flow_reversal.py": 1678345974.4836097, "TEMPLATE_PATH/models/cv/video_frame_interpolation/interp_model/UNet.py": 1678345974.4825158, "TEMPLATE_PATH/models/cv/video_frame_interpolation/interp_model/IFNet_swin.py": 1678345974.481858, "TEMPLATE_PATH/models/cv/video_frame_interpolation/interp_model/refinenet_arch.py": 1678345974.4842384, "TEMPLATE_PATH/models/cv/video_frame_interpolation/interp_model/transformer_layers.py": 1678345974.4849417, "TEMPLATE_PATH/models/cv/video_frame_interpolation/utils/utils.py": 1678345974.4869297, "TEMPLATE_PATH/models/cv/video_frame_interpolation/utils/scene_change_detection.py": 1684246001.5067093, "TEMPLATE_PATH/models/cv/video_frame_interpolation/VFINet_for_video_frame_interpolation.py": 1678345974.4789994, "TEMPLATE_PATH/models/cv/video_frame_interpolation/VFINet_arch.py": 1678345974.4787207, "TEMPLATE_PATH/models/cv/video_frame_interpolation/flow_model/update.py": 1678345974.4809961, "TEMPLATE_PATH/models/cv/video_frame_interpolation/flow_model/corr.py": 1678345974.4801412, "TEMPLATE_PATH/models/cv/video_frame_interpolation/flow_model/extractor.py": 1678345974.480411, "TEMPLATE_PATH/models/cv/video_frame_interpolation/flow_model/raft.py": 1678345974.4806812, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/dense_heads/rpn_head.py": 1678695526.3648705, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/dense_heads/anchor_head.py": 1678695526.3625498, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/necks/fpn.py": 1678695526.3662295, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/utils/convModule_norm.py": 1678695526.3763406, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/utils/checkpoint.py": 1678695526.375762, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/backbones/vit.py": 1666757257.2256925, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py": 1678695526.3729281, "TEMPLATE_PATH/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/fcn_mask_head.py": 1678695526.3743196, "TEMPLATE_PATH/models/cv/object_detection/mmdet_model.py": 1666757257.2243414, "TEMPLATE_PATH/models/cv/pedestrian_attribute_recognition/model.py": 1683889954.4834628, "TEMPLATE_PATH/models/cv/pointcloud_sceneflow_estimation/sf_rcp.py": 1678345974.3682785, "TEMPLATE_PATH/models/cv/pointcloud_sceneflow_estimation/rcp_model.py": 1678345974.3680415, "TEMPLATE_PATH/models/cv/pointcloud_sceneflow_estimation/common.py": 1678345974.367603, "TEMPLATE_PATH/models/cv/pointcloud_sceneflow_estimation/pointnet2_utils.py": 1678345974.36784, "TEMPLATE_PATH/models/cv/animal_recognition/splat.py": 1666757257.151845, "TEMPLATE_PATH/models/cv/animal_recognition/resnet.py": 1666757257.1516247, "TEMPLATE_PATH/models/cv/video_stabilization/utils/image_utils.py": 1678345974.5841804, "TEMPLATE_PATH/models/cv/video_stabilization/utils/RAFTUtils.py": 1678345974.5826185, "TEMPLATE_PATH/models/cv/video_stabilization/utils/math_utils.py": 1678345974.5846765, "TEMPLATE_PATH/models/cv/video_stabilization/utils/ProjectionUtils.py": 1678345974.5819445, "TEMPLATE_PATH/models/cv/video_stabilization/utils/WarpUtils.py": 1678345974.5831873, "TEMPLATE_PATH/models/cv/video_stabilization/utils/MedianFilter.py": 1678345974.5813267, "TEMPLATE_PATH/models/cv/video_stabilization/utils/IterativeSmooth.py": 1678345974.5807827, "TEMPLATE_PATH/models/cv/video_stabilization/DUTRAFTStabilizer.py": 1678345974.5794287, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/config.py": 1678345974.5730486, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/rf_det_so.py": 1678345974.5783482, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/Smoother.py": 1678345974.572002, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/DUT_raft.py": 1678345974.5681, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/MotionPro.py": 1678345974.568633, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/RAFT/update.py": 1678345974.5714862, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/RAFT/corr.py": 1678345974.5699692, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/RAFT/extractor.py": 1678345974.5704808, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/RAFT/raft.py": 1678345974.5709665, "TEMPLATE_PATH/models/cv/video_stabilization/DUT/rf_det_module.py": 1678345974.573552, "TEMPLATE_PATH/models/cv/video_depth_estimation/dro_model.py": 1678345974.4664078, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/misc.py": 1678345974.4781265, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/config.py": 1678345974.4763255, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/horovod.py": 1678345974.4769518, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/image_gt.py": 1678345974.477618, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/types.py": 1678345974.4784274, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/depth.py": 1684246001.5061839, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/load.py": 1678345974.4778461, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/image.py": 1678345974.4773502, "TEMPLATE_PATH/models/cv/video_depth_estimation/utils/augmentations.py": 1678345974.4760456, "TEMPLATE_PATH/models/cv/video_depth_estimation/models/model_utils.py": 1678345974.4694006, "TEMPLATE_PATH/models/cv/video_depth_estimation/models/sfm_model_mf.py": 1678345974.4700265, "TEMPLATE_PATH/models/cv/video_depth_estimation/models/model_checkpoint.py": 1678345974.4691477, "TEMPLATE_PATH/models/cv/video_depth_estimation/models/model_wrapper.py": 1678345974.469756, "TEMPLATE_PATH/models/cv/video_depth_estimation/models/sup_model_mf.py": 1678345974.4702911, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/layers/resnet/pose_decoder.py": 1678345974.4731765, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/layers/resnet/resnet_encoder.py": 1678345974.473565, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/layers/resnet/layers.py": 1678345974.4729247, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/layers/resnet/depth_decoder.py": 1678345974.4725597, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/optim/update.py": 1678345974.4748883, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/optim/extractor.py": 1678345974.4745526, "TEMPLATE_PATH/models/cv/video_depth_estimation/networks/depth_pose/depth_pose_net.py": 1678345974.4713385, "TEMPLATE_PATH/models/cv/video_depth_estimation/configs/default_config.py": 1678345974.4657435, "TEMPLATE_PATH/models/cv/video_depth_estimation/geometry/pose_utils.py": 1678345974.4684362, "TEMPLATE_PATH/models/cv/video_depth_estimation/geometry/camera_utils.py": 1678345974.467637, "TEMPLATE_PATH/models/cv/video_depth_estimation/geometry/camera.py": 1678345974.4673057, "TEMPLATE_PATH/models/cv/video_depth_estimation/geometry/pose.py": 1678345974.4680007, "TEMPLATE_PATH/models/cv/vidt/backbone.py": 1681714768.8921459, "TEMPLATE_PATH/models/cv/vidt/model.py": 1681714768.8937347, "TEMPLATE_PATH/models/cv/vidt/head.py": 1681714768.8931575, "TEMPLATE_PATH/models/cv/vidt/fpn_fusion.py": 1681714768.8928485, "TEMPLATE_PATH/models/cv/vidt/deformable_transformer.py": 1681714768.8925443, "TEMPLATE_PATH/models/cv/face_human_hand_detection/shufflenetv2.py": 1678695526.3277714, "TEMPLATE_PATH/models/cv/face_human_hand_detection/one_stage_detector.py": 1678695526.3271508, "TEMPLATE_PATH/models/cv/face_human_hand_detection/nanodet_plus_head.py": 1678695526.326374, "TEMPLATE_PATH/models/cv/face_human_hand_detection/det_infer.py": 1666778289.6696548, "TEMPLATE_PATH/models/cv/face_human_hand_detection/ghost_pan.py": 1678695526.3257587, "TEMPLATE_PATH/models/cv/face_human_hand_detection/utils.py": 1678695526.328504, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/misc.py": 1666757257.2392309, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/mttr.py": 1673508904.8399704, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/multimodal_transformer.py": 1673508904.8402708, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/matcher.py": 1669108798.5943944, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/backbone.py": 1666757257.2390404, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/position_encoding_2d.py": 1666757257.239805, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/postprocessing.py": 1666757257.239986, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/criterion.py": 1669108798.5941, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/swin_transformer.py": 1669108798.5960565, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/utils/segmentation.py": 1666757257.240216, "TEMPLATE_PATH/models/cv/referring_video_object_segmentation/model.py": 1673508904.8396866, "TEMPLATE_PATH/models/cv/hand_static/networks.py": 1678695526.3304467, "TEMPLATE_PATH/models/cv/hand_static/hand_model.py": 1666778289.6702523, "TEMPLATE_PATH/models/cv/image_depth_estimation/newcrfs_model.py": 1678345974.297565, "TEMPLATE_PATH/models/cv/image_depth_estimation/networks/uper_crf_head.py": 1678345974.2973852, "TEMPLATE_PATH/models/cv/image_depth_estimation/networks/newcrf_layers.py": 1678345974.2962215, "TEMPLATE_PATH/models/cv/image_depth_estimation/networks/newcrf_depth.py": 1678345974.2958264, "TEMPLATE_PATH/models/cv/image_depth_estimation/networks/newcrf_utils.py": 1678345974.2965019, "TEMPLATE_PATH/models/cv/image_depth_estimation/networks/swin_transformer.py": 1678345974.2970595, "TEMPLATE_PATH/models/cv/image_colorization/unet/unet.py": 1678345974.287222, "TEMPLATE_PATH/models/cv/image_colorization/unet/utils.py": 1678345974.287506, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/ddcolor_for_image_colorization.py": 1681714768.8788333, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/ddcolor.py": 1678345974.284877, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/utils/vgg.py": 1681714768.8797908, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/utils/unet.py": 1678345974.2865462, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/utils/transformer_utils.py": 1678345974.2863536, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/utils/position_encoding.py": 1678345974.2861621, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/utils/convnext.py": 1678345974.285967, "TEMPLATE_PATH/models/cv/image_colorization/ddcolor/loss.py": 1681714768.879511, "TEMPLATE_PATH/models/cv/face_detection/retinaface/detection.py": 1673508904.7995956, "TEMPLATE_PATH/models/cv/face_detection/retinaface/models/retinaface.py": 1666757257.1662319, "TEMPLATE_PATH/models/cv/face_detection/retinaface/models/net.py": 1666757257.1660082, "TEMPLATE_PATH/models/cv/face_detection/retinaface/utils.py": 1666757257.166439, "TEMPLATE_PATH/models/cv/face_detection/mtcnn/models/detector.py": 1673508904.7983325, "TEMPLATE_PATH/models/cv/face_detection/mtcnn/models/get_nets.py": 1666757257.1649437, "TEMPLATE_PATH/models/cv/face_detection/mtcnn/models/box_utils.py": 1666757257.1642718, "TEMPLATE_PATH/models/cv/face_detection/mtcnn/models/first_stage.py": 1666757257.1647036, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/detection.py": 1673508904.8020747, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/ssd/predictor.py": 1678345974.263985, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/ssd/data_preprocessing.py": 1666757257.1760805, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/ssd/ssd.py": 1666757257.1772814, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/ssd/mb_tiny_fd.py": 1666757257.1766155, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/ssd/fd_config.py": 1666757257.1763618, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/transforms.py": 1666757257.1775296, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/box_utils.py": 1666757257.1750665, "TEMPLATE_PATH/models/cv/face_detection/ulfd_slim/vision/mb_tiny.py": 1666757257.1753407, "TEMPLATE_PATH/models/cv/face_detection/peppa_pig_face/facer.py": 1678345974.2030537, "TEMPLATE_PATH/models/cv/face_detection/peppa_pig_face/LK/lk.py": 1678345974.202424, "TEMPLATE_PATH/models/cv/face_detection/peppa_pig_face/face_detector.py": 1678345974.2027018, "TEMPLATE_PATH/models/cv/face_detection/peppa_pig_face/face_landmark.py": 1684246001.4691453, "TEMPLATE_PATH/models/cv/face_detection/scrfd/scrfd_detect.py": 1678345974.262096, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/core/post_processing/bbox_nms.py": 1678695526.308986, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/core/bbox/transforms.py": 1678695526.3082, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/retinaface.py": 1678695526.3129826, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/auto_augment.py": 1678695526.3106256, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py": 1678695526.312393, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/loading.py": 1678695526.3117406, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/formating.py": 1678695526.311245, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/scrfd_head.py": 1678695526.316167, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/master_net.py": 1683889954.473481, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/mobilenet.py": 1678695526.3142238, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/resnet.py": 1678695526.3147054, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/scrfd.py": 1678695526.31966, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/single_stage.py": 1678695526.3215094, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/base.py": 1678695526.3180547, "TEMPLATE_PATH/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/tinymog.py": 1678695526.3232183, "TEMPLATE_PATH/models/cv/face_detection/scrfd/tinymog_detect.py": 1678345974.263417, "TEMPLATE_PATH/models/cv/face_detection/scrfd/preprocessor.py": 1678345974.2381473, "TEMPLATE_PATH/models/cv/face_detection/scrfd/damofd_detect.py": 1683889954.4730425, "TEMPLATE_PATH/models/cv/face_detection/mogface/models/mogprednet.py": 1666757257.1628885, "TEMPLATE_PATH/models/cv/face_detection/mogface/models/resnet.py": 1666757257.1630945, "TEMPLATE_PATH/models/cv/face_detection/mogface/models/utils.py": 1666757257.1633208, "TEMPLATE_PATH/models/cv/face_detection/mogface/models/detectors.py": 1673508904.7980537, "TEMPLATE_PATH/models/cv/face_detection/mogface/models/mogface.py": 1666757257.162678, "TEMPLATE_PATH/models/cv/robust_image_classification/easyrobust_model.py": 1678345974.369159, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/ddpm_segmentation_model.py": 1678695526.3499827, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/utils/data_process_func.py": 1666757257.213139, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/utils/builder.py": 1666757257.212899, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/utils/seg_func.py": 1678345974.3271163, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/beit_adapter.py": 1666757257.2102795, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/adapter_modules.py": 1666757257.2093182, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/base/beit.py": 1666757257.210009, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/encoder_decoder_mask2former.py": 1666757257.2121763, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/base_segmentor.py": 1666757257.2117958, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/mask2former_head_from_mmseg.py": 1666757257.2112045, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/base_decode_head.py": 1666757257.2109008, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/semantic_seg_model.py": 1684246001.4743931, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/pan_merge/base_panoptic_fusion_head.py": 1666757257.2071388, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/pan_merge/maskformer_semantic_head.py": 1666757257.2074032, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/ddpm_seg/data_util.py": 1678695526.3485208, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/ddpm_seg/utils.py": 1678345974.3266795, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/ddpm_seg/feature_extractors.py": 1678695526.3490577, "TEMPLATE_PATH/models/cv/image_semantic_segmentation/ddpm_seg/pixel_classifier.py": 1678695526.3495007, "TEMPLATE_PATH/models/cv/video_single_object_tracking/config/ostrack.py": 1666757257.2861888, "TEMPLATE_PATH/models/cv/video_single_object_tracking/utils/utils.py": 1666757257.294515, "TEMPLATE_PATH/models/cv/video_single_object_tracking/tracker/procontext.py": 1678695526.4486487, "TEMPLATE_PATH/models/cv/video_single_object_tracking/tracker/ostrack.py": 1666757257.2933815, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/layers/attn_blocks.py": 1678695526.4463873, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/layers/head.py": 1678695526.446728, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/layers/patch_embed.py": 1666757257.2896674, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/layers/attn.py": 1666757257.2881665, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/ostrack/base_backbone.py": 1666757257.290771, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/ostrack/ostrack.py": 1678695526.44714, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/ostrack/utils.py": 1666757257.291744, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/ostrack/vit_ce.py": 1666757257.292233, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/procontext/procontext.py": 1678695526.4476662, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/procontext/utils.py": 1678695526.447871, "TEMPLATE_PATH/models/cv/video_single_object_tracking/models/procontext/vit_ce.py": 1678695526.4480832, "TEMPLATE_PATH/models/cv/text_driven_segmentation/lseg_net.py": 1666757257.2610106, "TEMPLATE_PATH/models/cv/text_driven_segmentation/lseg_blocks.py": 1666757257.2598405, "TEMPLATE_PATH/models/cv/text_driven_segmentation/lseg_model.py": 1666757257.2603793, "TEMPLATE_PATH/models/cv/text_driven_segmentation/model.py": 1666757257.2622228, "TEMPLATE_PATH/models/cv/text_driven_segmentation/lseg_vit.py": 1666757257.2616358, "TEMPLATE_PATH/models/cv/text_driven_segmentation/clip.py": 1666757257.258649, "TEMPLATE_PATH/models/cv/text_driven_segmentation/simple_tokenizer.py": 1666757257.2628324, "TEMPLATE_PATH/models/cv/text_driven_segmentation/lseg_base.py": 1666757257.2592633, "TEMPLATE_PATH/models/cv/crowd_counting/hrnet_aspp_relu.py": 1684246001.4687607, "TEMPLATE_PATH/models/cv/crowd_counting/cc_model.py": 1666757257.1599863, "TEMPLATE_PATH/models/cv/image_panoptic_segmentation/panseg_model.py": 1666757257.1997347, "TEMPLATE_PATH/models/cv/face_emotion/emotion_model.py": 1666757257.1793659, "TEMPLATE_PATH/models/cv/face_emotion/emotion_infer.py": 1678345974.266066, "TEMPLATE_PATH/models/cv/face_emotion/face_alignment/face_align.py": 1666757257.1801705, "TEMPLATE_PATH/models/cv/face_emotion/face_alignment/face.py": 1666757257.1799781, "TEMPLATE_PATH/models/cv/face_emotion/efficient/model.py": 1678695526.3245037, "TEMPLATE_PATH/models/cv/face_emotion/efficient/utils.py": 1678695526.3249586, "TEMPLATE_PATH/models/cv/video_super_resolution/real_basicvsr_net.py": 1678345974.5962114, "TEMPLATE_PATH/models/cv/video_super_resolution/msrresnet_lite_model.py": 1678345974.5949175, "TEMPLATE_PATH/models/cv/video_super_resolution/common.py": 1678345974.5942144, "TEMPLATE_PATH/models/cv/video_super_resolution/real_basicvsr_for_video_super_resolution.py": 1678345974.5955362, "TEMPLATE_PATH/models/cv/video_super_resolution/basicvsr_net.py": 1678345974.5935404, "TEMPLATE_PATH/models/cv/face_attribute_recognition/fair_face/face_attribute_recognition.py": 1678345974.201238, "TEMPLATE_PATH/models/cv/image_denoise/nafnet/NAFNet_arch.py": 1666757257.1934595, "TEMPLATE_PATH/models/cv/image_denoise/nafnet/arch_util.py": 1666757257.1938126, "TEMPLATE_PATH/models/cv/image_denoise/nafnet_for_image_denoise.py": 1678345974.2943788, "TEMPLATE_PATH/models/cv/image_classification/mmcls_model.py": 1678345974.2806082, "TEMPLATE_PATH/models/cv/image_classification/utils.py": 1678345974.2812235, "TEMPLATE_PATH/models/cv/image_classification/backbones/beit_v2.py": 1678345974.279836, "TEMPLATE_PATH/models/cv/image_classification/backbones/nextvit.py": 1678345974.2801979, "TEMPLATE_PATH/models/cv/image_classification/resnet50_cc.py": 1678345974.2809508, "TEMPLATE_PATH/models/cv/image_color_enhance/csrnet.py": 1666757257.1914177, "TEMPLATE_PATH/models/cv/image_color_enhance/deeplpf/deeplpfnet.py": 1678345974.2835712, "TEMPLATE_PATH/models/cv/image_color_enhance/deeplpf/deeplpf_image_color_enhance.py": 1678345974.2831166, "TEMPLATE_PATH/models/cv/image_color_enhance/image_color_enhance.py": 1678345974.283899, "TEMPLATE_PATH/models/cv/image_color_enhance/adaint/adaint.py": 1678345974.2824776, "TEMPLATE_PATH/models/base/base_torch_head.py": 1678345974.176039, "TEMPLATE_PATH/models/base/base_model.py": 1681714768.8640296, "TEMPLATE_PATH/models/base/base_torch_model.py": 1681714768.8644474, "TEMPLATE_PATH/models/base/base_head.py": 1678695526.2807148, "TEMPLATE_PATH/metrics/image_quality_assessment_degradation_metric.py": 1678345974.1558983, "TEMPLATE_PATH/metrics/prediction_saving_wrapper.py": 1678345974.1587963, "TEMPLATE_PATH/metrics/video_stabilization_metric.py": 1678345974.161685, "TEMPLATE_PATH/metrics/ppl_metric.py": 1678345974.158113, "TEMPLATE_PATH/metrics/inbatch_recall_metric.py": 1678345974.1564841, "TEMPLATE_PATH/metrics/loss_metric.py": 1678345974.1567907, "TEMPLATE_PATH/metrics/ocr_recognition_metric.py": 1681714768.859318, "TEMPLATE_PATH/metrics/map_metric.py": 1678695526.2701354, "TEMPLATE_PATH/metrics/image_colorization_metric.py": 1681714768.8586574, "TEMPLATE_PATH/metrics/sequence_classification_metric.py": 1678345974.159533, "TEMPLATE_PATH/metrics/audio_noise_metric.py": 1678345974.1516218, "TEMPLATE_PATH/metrics/translation_evaluation_metric.py": 1684246001.462436, "TEMPLATE_PATH/metrics/video_frame_interpolation_metric.py": 1678345974.1614027, "TEMPLATE_PATH/metrics/image_inpainting_metric.py": 1678345974.1546395, "TEMPLATE_PATH/metrics/image_denoise_metric.py": 1678345974.1542509, "TEMPLATE_PATH/metrics/referring_video_object_segmentation_metric.py": 1678345974.1591957, "TEMPLATE_PATH/metrics/token_classification_metric.py": 1678345974.1608303, "TEMPLATE_PATH/metrics/video_summarization_metric.py": 1678345974.1620147, "TEMPLATE_PATH/metrics/builder.py": 1684246001.462111, "TEMPLATE_PATH/metrics/image_quality_assessment_mos_metric.py": 1678345974.1561337, "TEMPLATE_PATH/metrics/ned_metric.py": 1678345974.1578484, "TEMPLATE_PATH/metrics/text_ranking_metric.py": 1678345974.1604652, "TEMPLATE_PATH/metrics/movie_scene_segmentation_metric.py": 1678345974.1574643, "TEMPLATE_PATH/metrics/accuracy_metric.py": 1678345974.151063, "TEMPLATE_PATH/metrics/image_instance_segmentation_metric.py": 1678345974.1552966, "TEMPLATE_PATH/metrics/video_super_resolution_metric/metric_util.py": 1678345974.1631625, "TEMPLATE_PATH/metrics/video_super_resolution_metric/video_super_resolution_metric.py": 1678345974.163586, "TEMPLATE_PATH/metrics/video_super_resolution_metric/niqe.py": 1678695526.272421, "TEMPLATE_PATH/metrics/video_super_resolution_metric/matlab_functions.py": 1678695526.2713144, "TEMPLATE_PATH/metrics/ciderD/ciderD.py": 1666757257.1302783, "TEMPLATE_PATH/metrics/ciderD/ciderD_scorer.py": 1678695526.2677228, "TEMPLATE_PATH/metrics/action_detection_evaluator.py": 1678695526.2662494, "TEMPLATE_PATH/metrics/image_color_enhance_metric.py": 1678345974.153906, "TEMPLATE_PATH/metrics/image_portrait_enhancement_metric.py": 1678345974.1556726, "TEMPLATE_PATH/metrics/bleu_metric.py": 1678345974.1524482, "TEMPLATE_PATH/metrics/text_generation_metric.py": 1678345974.1598558, "TEMPLATE_PATH/metrics/base.py": 1678345974.152117, "TEMPLATE_PATH/pipelines/util.py": 1678345974.9337575, "TEMPLATE_PATH/pipelines/science/protein_structure_pipeline.py": 1678345974.9334872, "TEMPLATE_PATH/pipelines/builder.py": 1681714768.9746857, "TEMPLATE_PATH/pipelines/pipeline_template.py": 1684246001.5603435, "TEMPLATE_PATH/pipelines/audio/timestamp_pipeline.py": 1684246001.539448, "TEMPLATE_PATH/pipelines/audio/kws_farfield_pipeline.py": 1678695526.592166, "TEMPLATE_PATH/pipelines/audio/speaker_verification_pipeline.py": 1684246001.538074, "TEMPLATE_PATH/pipelines/audio/inverse_text_processing_pipeline.py": 1678345974.833208, "TEMPLATE_PATH/pipelines/audio/separation_pipeline.py": 1678345974.835587, "TEMPLATE_PATH/pipelines/audio/voice_activity_detection_pipeline.py": 1684246001.5403378, "TEMPLATE_PATH/pipelines/audio/text_to_speech_pipeline.py": 1678345974.837081, "TEMPLATE_PATH/pipelines/audio/kws_kwsbp_pipeline.py": 1678345974.8338838, "TEMPLATE_PATH/pipelines/audio/linear_aec_pipeline.py": 1678345974.8341885, "TEMPLATE_PATH/pipelines/audio/ans_pipeline.py": 1678695526.5817752, "TEMPLATE_PATH/pipelines/audio/speaker_verification_eres2net_pipeline.py": 1684247769.6647675, "TEMPLATE_PATH/pipelines/audio/lm_infer_pipeline.py": 1684246001.5343251, "TEMPLATE_PATH/pipelines/audio/ans_dfsmn_pipeline.py": 1678695526.5813322, "TEMPLATE_PATH/pipelines/audio/asr_inference_pipeline.py": 1684246001.5326667, "TEMPLATE_PATH/pipelines/audio/speaker_diarization_pipeline.py": 1684246001.537162, "TEMPLATE_PATH/pipelines/audio/speaker_verification_rdino_pipeline.py": 1684246001.5384402, "TEMPLATE_PATH/pipelines/audio/punctuation_processing_pipeline.py": 1684246001.5355213, "TEMPLATE_PATH/pipelines/audio/speaker_verification_light_pipeline.py": 1678345974.8364737, "TEMPLATE_PATH/pipelines/audio/speaker_change_locating_pipeline.py": 1684246001.5362113, "TEMPLATE_PATH/pipelines/audio/asr_wenet_inference_pipeline.py": 1678345974.8329349, "TEMPLATE_PATH/pipelines/multi_modal/asr_pipeline.py": 1678345974.9102848, "TEMPLATE_PATH/pipelines/multi_modal/image_captioning_pipeline.py": 1684246001.5449712, "TEMPLATE_PATH/pipelines/multi_modal/text_to_video_synthesis_pipeline.py": 1684246001.54651, "TEMPLATE_PATH/pipelines/multi_modal/mgeo_ranking_pipeline.py": 1678345974.913822, "TEMPLATE_PATH/pipelines/multi_modal/generative_multi_modal_embedding_pipeline.py": 1666757257.5339417, "TEMPLATE_PATH/pipelines/multi_modal/multimodal_dialogue_pipeline.py": 1684246001.5457838, "TEMPLATE_PATH/pipelines/multi_modal/text_to_image_synthesis_pipeline.py": 1683891255.6625693, "TEMPLATE_PATH/pipelines/multi_modal/text2sql_pipeline.py": 1678345974.9150336, "TEMPLATE_PATH/pipelines/multi_modal/visual_entailment_pipeline.py": 1678345974.916273, "TEMPLATE_PATH/pipelines/multi_modal/disco_guided_diffusion_pipeline/disco_guided_diffusion.py": 1681714768.987968, "TEMPLATE_PATH/pipelines/multi_modal/disco_guided_diffusion_pipeline/utils.py": 1681714768.988303, "TEMPLATE_PATH/pipelines/multi_modal/visual_question_answering_pipeline.py": 1678345974.916901, "TEMPLATE_PATH/pipelines/multi_modal/video_question_answering_pipeline.py": 1678345974.9160104, "TEMPLATE_PATH/pipelines/multi_modal/video_captioning_pipeline.py": 1678345974.915723, "TEMPLATE_PATH/pipelines/multi_modal/video_multi_modal_embedding_pipeline.py": 1666757257.5376425, "TEMPLATE_PATH/pipelines/multi_modal/efficient_diffusion_tuning_pipeline.py": 1683889954.550607, "TEMPLATE_PATH/pipelines/multi_modal/team_multi_modal_similarity_pipeline.py": 1666757257.5365796, "TEMPLATE_PATH/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py": 1684121077.5650501, "TEMPLATE_PATH/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py": 1684121077.566238, "TEMPLATE_PATH/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py": 1684121077.5659308, "TEMPLATE_PATH/pipelines/multi_modal/multi_modal_embedding_pipeline.py": 1678345974.9142408, "TEMPLATE_PATH/pipelines/multi_modal/ocr_recognition_pipeline.py": 1678345974.914581, "TEMPLATE_PATH/pipelines/multi_modal/document_vl_embedding_pipeline.py": 1678345974.9124298, "TEMPLATE_PATH/pipelines/multi_modal/image_text_retrieval_pipeline.py": 1678345974.913492, "TEMPLATE_PATH/pipelines/multi_modal/gridvlp_pipeline.py": 1678345974.9127157, "TEMPLATE_PATH/pipelines/multi_modal/visual_grounding_pipeline.py": 1678345974.9165354, "TEMPLATE_PATH/pipelines/multi_modal/soonet_video_temporal_grounding_pipeline.py": 1681714768.9888954, "TEMPLATE_PATH/pipelines/multi_modal/sudoku_pipeline.py": 1678345974.9148157, "TEMPLATE_PATH/pipelines/nlp/translation_evaluation_pipeline.py": 1684246001.5580392, "TEMPLATE_PATH/pipelines/nlp/glm130b_text_generation_pipeline.py": 1683889954.5535533, "TEMPLATE_PATH/pipelines/nlp/faq_question_answering_pipeline.py": 1678345974.9225557, "TEMPLATE_PATH/pipelines/nlp/document_grounded_dialog_generate_pipeline.py": 1684246001.5480056, "TEMPLATE_PATH/pipelines/nlp/automatic_post_editing_pipeline.py": 1666757257.5406618, "TEMPLATE_PATH/pipelines/nlp/named_entity_recognition_pipeline.py": 1684246001.5530941, "TEMPLATE_PATH/pipelines/nlp/interactive_translation_pipeline.py": 1678345974.9250765, "TEMPLATE_PATH/pipelines/nlp/summarization_pipeline.py": 1678345974.9273708, "TEMPLATE_PATH/pipelines/nlp/document_grounded_dialog_retrieval_pipeline.py": 1684246001.5499012, "TEMPLATE_PATH/pipelines/nlp/fasttext_text_classification_pipeline.py": 1678345974.9229462, "TEMPLATE_PATH/pipelines/nlp/word_alignment_pipeline.py": 1678695526.6479418, "TEMPLATE_PATH/pipelines/nlp/feature_extraction_pipeline.py": 1684246001.5515616, "TEMPLATE_PATH/pipelines/nlp/text_ranking_pipeline.py": 1684246001.5570047, "TEMPLATE_PATH/pipelines/nlp/fid_dialogue_pipeline.py": 1684246001.552004, "TEMPLATE_PATH/pipelines/nlp/text_classification_pipeline.py": 1684246001.5557013, "TEMPLATE_PATH/pipelines/nlp/codegeex_code_generation_pipeline.py": 1678345974.9174054, "TEMPLATE_PATH/pipelines/nlp/translation_quality_estimation_pipeline.py": 1678345974.931919, "TEMPLATE_PATH/pipelines/nlp/fill_mask_pipeline.py": 1684246001.5525877, "TEMPLATE_PATH/pipelines/nlp/distributed_plug_pipeline.py": 1678345974.91977, "TEMPLATE_PATH/pipelines/nlp/conversational_text_to_sql_pipeline.py": 1678345974.9181793, "TEMPLATE_PATH/pipelines/nlp/distributed_gpt3_pipeline.py": 1681714768.9907482, "TEMPLATE_PATH/pipelines/nlp/information_extraction_pipeline.py": 1678345974.9244976, "TEMPLATE_PATH/pipelines/nlp/table_question_answering_pipeline.py": 1684246001.555219, "TEMPLATE_PATH/pipelines/nlp/user_satisfaction_estimation_pipeline.py": 1684246001.5591247, "TEMPLATE_PATH/pipelines/nlp/dialog_modeling_pipeline.py": 1678345974.9186983, "TEMPLATE_PATH/pipelines/nlp/canmt_translation_pipeline.py": 1683889954.5525997, "TEMPLATE_PATH/pipelines/nlp/word_segmentation_pipeline.py": 1678345974.9326284, "TEMPLATE_PATH/pipelines/nlp/document_segmentation_pipeline.py": 1684246001.5505779, "TEMPLATE_PATH/pipelines/nlp/distributed_gpt_moe_pipeline.py": 1678345974.9194465, "TEMPLATE_PATH/pipelines/nlp/extractive_summarization_pipeline.py": 1684246001.5509684, "TEMPLATE_PATH/pipelines/nlp/text_error_correction_pipeline.py": 1678695526.6476424, "TEMPLATE_PATH/pipelines/nlp/dialog_state_tracking_pipeline.py": 1684246001.5474644, "TEMPLATE_PATH/pipelines/nlp/mglm_text_summarization_pipeline.py": 1678695526.6446507, "TEMPLATE_PATH/pipelines/nlp/translation_pipeline.py": 1678345974.9313443, "TEMPLATE_PATH/pipelines/nlp/siamese_uie_pipeline.py": 1684246001.5545502, "TEMPLATE_PATH/pipelines/nlp/dialog_intent_prediction_pipeline.py": 1684246001.5471377, "TEMPLATE_PATH/pipelines/nlp/sentence_embedding_pipeline.py": 1684246001.5536666, "TEMPLATE_PATH/pipelines/nlp/document_grounded_dialog_rerank_pipeline.py": 1684246001.5493042, "TEMPLATE_PATH/pipelines/nlp/zero_shot_classification_pipeline.py": 1684246001.559703, "TEMPLATE_PATH/pipelines/nlp/text_generation_pipeline.py": 1684246001.5563328, "TEMPLATE_PATH/pipelines/nlp/language_identification_pipline.py": 1678695526.6442416, "TEMPLATE_PATH/pipelines/nlp/token_classification_pipeline.py": 1684246001.5576875, "TEMPLATE_PATH/pipelines/nlp/codegeex_code_translation_pipeline.py": 1678345974.9175882, "TEMPLATE_PATH/pipelines/cv/bad_image_detecting_pipeline.py": 1678345974.84121, "TEMPLATE_PATH/pipelines/cv/image_cartoon_pipeline.py": 1666757257.498452, "TEMPLATE_PATH/pipelines/cv/image_to_image_generate_pipeline.py": 1666757257.5083926, "TEMPLATE_PATH/pipelines/cv/facial_expression_recognition_pipeline.py": 1683889954.5460215, "TEMPLATE_PATH/pipelines/cv/retina_face_detection_pipeline.py": 1666757257.525595, "TEMPLATE_PATH/pipelines/cv/image_style_transfer_pipeline.py": 1684246001.5433357, "TEMPLATE_PATH/pipelines/cv/image_face_fusion_pipeline.py": 1678345974.8556206, "TEMPLATE_PATH/pipelines/cv/ulfd_face_detection_pipeline.py": 1666757257.5294175, "TEMPLATE_PATH/pipelines/cv/pedestrian_attribute_recognition_pipeline.py": 1683889954.5479586, "TEMPLATE_PATH/pipelines/cv/image_denoise_pipeline.py": 1678345974.8544917, "TEMPLATE_PATH/pipelines/cv/vop_retrieval_se_pipeline.py": 1678695526.6399703, "TEMPLATE_PATH/pipelines/cv/image_matting_pipeline.py": 1684246001.542891, "TEMPLATE_PATH/pipelines/cv/image_deblur_pipeline.py": 1678345974.8534613, "TEMPLATE_PATH/pipelines/cv/video_human_matting_pipeline.py": 1678345974.9057999, "TEMPLATE_PATH/pipelines/cv/live_category_pipeline.py": 1666757257.5096319, "TEMPLATE_PATH/pipelines/cv/image_structured_model_probing_pipeline.py": 1678345974.890545, "TEMPLATE_PATH/pipelines/cv/face_quality_assessment_pipeline.py": 1683889954.5438397, "TEMPLATE_PATH/pipelines/cv/face_processing_base_pipeline.py": 1683889954.5433195, "TEMPLATE_PATH/pipelines/cv/image_portrait_enhancement_pipeline.py": 1678345974.8593307, "TEMPLATE_PATH/pipelines/cv/image_color_enhance_pipeline.py": 1678345974.852745, "TEMPLATE_PATH/pipelines/cv/vision_efficient_tuning_pipeline.py": 1678695526.6396506, "TEMPLATE_PATH/pipelines/cv/tbs_detection_utils/utils.py": 1681714768.9856553, "TEMPLATE_PATH/pipelines/cv/video_object_segmentation_pipeline.py": 1678345974.9070742, "TEMPLATE_PATH/pipelines/cv/face_detection_pipeline.py": 1678345974.8464031, "TEMPLATE_PATH/pipelines/cv/body_3d_keypoints_pipeline.py": 1678345974.8419, "TEMPLATE_PATH/pipelines/cv/image_paintbyexample_pipeline.py": 1678695526.6043956, "TEMPLATE_PATH/pipelines/cv/face_recognition_ood_pipeline.py": 1683889954.5451627, "TEMPLATE_PATH/pipelines/cv/image_classification_pipeline.py": 1678345974.8523827, "TEMPLATE_PATH/pipelines/cv/card_detection_pipeline.py": 1678345974.8422294, "TEMPLATE_PATH/pipelines/cv/table_recognition_pipeline.py": 1678345974.9024644, "TEMPLATE_PATH/pipelines/cv/image_to_image_translation_pipeline.py": 1666757257.5090609, "TEMPLATE_PATH/pipelines/cv/face_attribute_recognition_pipeline.py": 1683889954.5421839, "TEMPLATE_PATH/pipelines/cv/image_debanding_pipeline.py": 1678345974.8531418, "TEMPLATE_PATH/pipelines/cv/video_instance_segmentation_pipeline.py": 1681714768.9862943, "TEMPLATE_PATH/pipelines/cv/tinynas_classification_pipeline.py": 1669108798.6442235, "TEMPLATE_PATH/pipelines/cv/human_reconstruction_pipeline.py": 1681714768.9829588, "TEMPLATE_PATH/pipelines/cv/video_multi_object_tracking_pipeline.py": 1678345974.906372, "TEMPLATE_PATH/pipelines/cv/controllable_image_generation_pipeline.py": 1678695526.600098, "TEMPLATE_PATH/pipelines/cv/image_defrcn_fewshot_pipeline.py": 1678345974.8541288, "TEMPLATE_PATH/pipelines/cv/ddpm_semantic_segmentation_pipeline.py": 1678345974.8439617, "TEMPLATE_PATH/pipelines/cv/content_check_pipeline.py": 1678345974.8424017, "TEMPLATE_PATH/pipelines/cv/vop_retrieval_pipeline.py": 1678345974.9095361, "TEMPLATE_PATH/pipelines/cv/object_detection_3d_pipeline.py": 1678695526.6065028, "TEMPLATE_PATH/pipelines/cv/lineless_table_recognition_pipeline.py": 1678695526.6051717, "TEMPLATE_PATH/pipelines/cv/cmdssl_video_embedding_pipeline.py": 1666757257.487139, "TEMPLATE_PATH/pipelines/cv/tinynas_detection_pipeline.py": 1678345974.9044118, "TEMPLATE_PATH/pipelines/cv/video_deinterlace_pipeline.py": 1678695526.6368866, "TEMPLATE_PATH/pipelines/cv/image_open_vocabulary_detection_pipeline.py": 1678345974.8585114, "TEMPLATE_PATH/pipelines/cv/language_guided_video_summarization_pipeline.py": 1678345974.891131, "TEMPLATE_PATH/pipelines/cv/body_2d_keypoints_pipeline.py": 1666757257.4853406, "TEMPLATE_PATH/pipelines/cv/face_human_hand_detection_pipeline.py": 1666778289.6917272, "TEMPLATE_PATH/pipelines/cv/hicossl_video_embedding_pipeline.py": 1666757257.4973748, "TEMPLATE_PATH/pipelines/cv/face_recognition_pipeline.py": 1678345974.8498085, "TEMPLATE_PATH/pipelines/cv/image_body_reshaping_pipeline.py": 1666757257.497916, "TEMPLATE_PATH/pipelines/cv/image_inpainting_pipeline.py": 1666757257.5020847, "TEMPLATE_PATH/pipelines/cv/face_recognition_onnx_fm_pipeline.py": 1683889954.5441782, "TEMPLATE_PATH/pipelines/cv/image_driving_perception_pipeline.py": 1678695526.6034508, "TEMPLATE_PATH/pipelines/cv/video_stabilization_pipeline.py": 1678345974.9080534, "TEMPLATE_PATH/pipelines/cv/indoor_layout_estimation_pipeline.py": 1678345974.8907528, "TEMPLATE_PATH/pipelines/cv/ddcolor_image_colorization_pipeline.py": 1678345974.8437521, "TEMPLATE_PATH/pipelines/cv/face_emotion_pipeline.py": 1666778289.691363, "TEMPLATE_PATH/pipelines/cv/mtcnn_face_detection_pipeline.py": 1666757257.5116644, "TEMPLATE_PATH/pipelines/cv/nerf_recon_acc_pipeline.py": 1678695526.6060696, "TEMPLATE_PATH/pipelines/cv/image_bts_depth_estimation_pipeline.py": 1681714768.983773, "TEMPLATE_PATH/pipelines/cv/facial_landmark_confidence_pipeline.py": 1683889954.5463324, "TEMPLATE_PATH/pipelines/cv/face_reconstruction_pipeline.py": 1684246001.5418012, "TEMPLATE_PATH/pipelines/cv/mog_face_detection_pipeline.py": 1666757257.5102239, "TEMPLATE_PATH/pipelines/cv/skin_retouching_pipeline.py": 1684246001.5436969, "TEMPLATE_PATH/pipelines/cv/vision_middleware_pipeline.py": 1678345974.9092615, "TEMPLATE_PATH/pipelines/cv/face_liveness_ir_pipeline.py": 1683889954.542443, "TEMPLATE_PATH/pipelines/cv/image_detection_pipeline.py": 1678345974.8551383, "TEMPLATE_PATH/pipelines/cv/realtime_video_object_detection_pipeline.py": 1678695526.631697, "TEMPLATE_PATH/pipelines/cv/video_panoptic_segmentation_pipeline.py": 1678345974.9074109, "TEMPLATE_PATH/pipelines/cv/action_detection_pipeline.py": 1678345974.8401477, "TEMPLATE_PATH/pipelines/cv/product_segmentation_pipeline.py": 1666778289.692797, "TEMPLATE_PATH/pipelines/cv/tbs_detection_pipeline.py": 1684246001.544016, "TEMPLATE_PATH/pipelines/cv/image_matching_pipeline.py": 1678345974.857486, "TEMPLATE_PATH/pipelines/cv/video_category_pipeline.py": 1669108798.6445787, "TEMPLATE_PATH/pipelines/cv/hand_static_pipeline.py": 1666778289.6920865, "TEMPLATE_PATH/pipelines/cv/animal_recognition_pipeline.py": 1678345974.840479, "TEMPLATE_PATH/pipelines/cv/pointcloud_sceneflow_estimation_pipeline.py": 1678345974.9002383, "TEMPLATE_PATH/pipelines/cv/image_instance_segmentation_pipeline.py": 1678345974.8571947, "TEMPLATE_PATH/pipelines/cv/video_frame_interpolation_pipeline.py": 1678345974.9055316, "TEMPLATE_PATH/pipelines/cv/image_quality_assessment_mos_pipeline.py": 1678345974.8893554, "TEMPLATE_PATH/pipelines/cv/video_summarization_pipeline.py": 1666757257.5319543, "TEMPLATE_PATH/pipelines/cv/panorama_depth_estimation_pipeline.py": 1678345974.8999748, "TEMPLATE_PATH/pipelines/cv/fast_instance_segmentation_pipeline.py": 1684246001.5421734, "TEMPLATE_PATH/pipelines/cv/vidt_pipeline.py": 1681714768.9865973, "TEMPLATE_PATH/pipelines/cv/image_skychange_pipeline.py": 1678345974.8903258, "TEMPLATE_PATH/pipelines/cv/image_quality_assessment_man_pipeline.py": 1678695526.6047776, "TEMPLATE_PATH/pipelines/cv/image_restoration_pipeline.py": 1678345974.8897073, "TEMPLATE_PATH/pipelines/cv/video_inpainting_pipeline.py": 1666757257.530707, "TEMPLATE_PATH/pipelines/cv/face_image_generation_pipeline.py": 1666757257.4936557, "TEMPLATE_PATH/pipelines/cv/video_super_resolution_pipeline.py": 1678695526.6385, "TEMPLATE_PATH/pipelines/cv/referring_video_object_segmentation_pipeline.py": 1678695526.633713, "TEMPLATE_PATH/pipelines/cv/virtual_try_on_pipeline.py": 1678345974.9087186, "TEMPLATE_PATH/pipelines/cv/ocr_recognition_pipeline.py": 1681714768.98453, "TEMPLATE_PATH/pipelines/cv/ocr_detection_pipeline.py": 1678695526.607303, "TEMPLATE_PATH/pipelines/cv/movie_scene_segmentation_pipeline.py": 1684121077.5641353, "TEMPLATE_PATH/pipelines/cv/maskdino_instance_segmentation_pipeline.py": 1678345974.892828, "TEMPLATE_PATH/pipelines/cv/video_colorization_pipeline.py": 1678345974.904686, "TEMPLATE_PATH/pipelines/cv/image_human_parsing_pipeline.py": 1678345974.8562174, "TEMPLATE_PATH/pipelines/cv/face_liveness_xc_pipeline.py": 1683889954.5426972, "TEMPLATE_PATH/pipelines/cv/crowd_counting_pipeline.py": 1666757257.4877608, "TEMPLATE_PATH/pipelines/cv/video_depth_estimation_pipeline.py": 1678345974.9052026, "TEMPLATE_PATH/pipelines/cv/image_colorization_pipeline.py": 1666757257.5002234, "TEMPLATE_PATH/pipelines/cv/arc_face_recognition_pipeline.py": 1683889954.5418775, "TEMPLATE_PATH/pipelines/cv/image_quality_assessment_degradation_pipeline.py": 1678345974.8601525, "TEMPLATE_PATH/pipelines/cv/ocr_utils/model_convnext_transformer.py": 1666757257.5147195, "TEMPLATE_PATH/pipelines/cv/ocr_utils/model_resnet18_half.py": 1678345974.8975644, "TEMPLATE_PATH/pipelines/cv/ocr_utils/resnet18_v1.py": 1666757257.5203307, "TEMPLATE_PATH/pipelines/cv/ocr_utils/model_dla34.py": 1678345974.897257, "TEMPLATE_PATH/pipelines/cv/ocr_utils/ocr_modules/vitstr.py": 1666757257.5185978, "TEMPLATE_PATH/pipelines/cv/ocr_utils/ocr_modules/timm_tinyc.py": 1666757257.517882, "TEMPLATE_PATH/pipelines/cv/ocr_utils/ocr_modules/convnext.py": 1666757257.5171049, "TEMPLATE_PATH/pipelines/cv/ocr_utils/table_process.py": 1678345974.8992608, "TEMPLATE_PATH/pipelines/cv/ocr_utils/resnet_utils.py": 1666757257.520979, "TEMPLATE_PATH/pipelines/cv/ocr_utils/ops.py": 1678345974.898596, "TEMPLATE_PATH/pipelines/cv/ocr_utils/utils.py": 1678345974.8997033, "TEMPLATE_PATH/pipelines/cv/ocr_utils/model_vlpt.py": 1678345974.8979936, "TEMPLATE_PATH/pipelines/cv/ocr_utils/model_resnet_mutex_v4_linewithchar.py": 1666757257.5153735, "TEMPLATE_PATH/pipelines/cv/image_inpainting_sdv2_pipeline.py": 1678345974.8565032, "TEMPLATE_PATH/pipelines/cv/image_super_resolution_pipeline.py": 1666757257.5076354, "TEMPLATE_PATH/pipelines/cv/image_salient_detection_pipeline.py": 1678345974.890104, "TEMPLATE_PATH/pipelines/cv/video_single_object_tracking_pipeline.py": 1678695526.637953, "TEMPLATE_PATH/pipelines/cv/face_recognition_onnx_ir_pipeline.py": 1683889954.5448508, "TEMPLATE_PATH/pipelines/cv/product_retrieval_embedding_pipeline.py": 1666757257.5221663, "TEMPLATE_PATH/pipelines/cv/mask_face_recognition_pipeline.py": 1678345974.8923888, "TEMPLATE_PATH/pipelines/cv/mobile_image_super_resolution_pipeline.py": 1678345974.893134, "TEMPLATE_PATH/pipelines/cv/license_plate_detection_pipeline.py": 1678345974.8913991, "TEMPLATE_PATH/pipelines/cv/image_semantic_segmentation_pipeline.py": 1666757257.5062222, "TEMPLATE_PATH/pipelines/cv/text_driven_segmentation_pipleline.py": 1666757257.5275502, "TEMPLATE_PATH/pipelines/cv/motion_generation_pipeline.py": 1678345974.8933938, "TEMPLATE_PATH/pipelines/cv/image_mvs_depth_estimation_pipeline.py": 1678345974.8579566, "TEMPLATE_PATH/pipelines/cv/image_depth_estimation_pipeline.py": 1678345974.854762, "TEMPLATE_PATH/pipelines/cv/action_recognition_pipeline.py": 1666757257.4842403, "TEMPLATE_PATH/pipelines/cv/image_reid_person_pipeline.py": 1666757257.5051024, "TEMPLATE_PATH/pipelines/cv/general_recognition_pipeline.py": 1678345974.851133, "TEMPLATE_PATH/pipelines/cv/shop_segmentation_pipleline.py": 1666757257.5262067, "TEMPLATE_PATH/pipelines/base.py": 1684246001.5408666, "TEMPLATE_PATH/preprocessors/kws.py": 1669108798.6509876, "TEMPLATE_PATH/preprocessors/multi_modal.py": 1684246001.5620222, "TEMPLATE_PATH/preprocessors/science/uni_fold.py": 1678345974.9713385, "TEMPLATE_PATH/preprocessors/tts.py": 1678695526.6618354, "TEMPLATE_PATH/preprocessors/asr.py": 1684246001.5611215, "TEMPLATE_PATH/preprocessors/builder.py": 1666757257.563367, "TEMPLATE_PATH/preprocessors/movie_scene_segmentation/transforms.py": 1678695526.6574507, "TEMPLATE_PATH/preprocessors/common.py": 1678695526.6514163, "TEMPLATE_PATH/preprocessors/nlp/token_classification_preprocessor.py": 1684246001.562926, "TEMPLATE_PATH/preprocessors/nlp/siamese_uie_preprocessor.py": 1678695526.6586974, "TEMPLATE_PATH/preprocessors/nlp/relation_extraction_preprocessor.py": 1678345974.9533129, "TEMPLATE_PATH/preprocessors/nlp/token_classification_viet_preprocessor.py": 1678345974.962513, "TEMPLATE_PATH/preprocessors/nlp/translation_evaluation_preprocessor.py": 1684246001.5633366, "TEMPLATE_PATH/preprocessors/nlp/text_classification_preprocessor.py": 1678345974.957994, "TEMPLATE_PATH/preprocessors/nlp/document_grounded_dialog_retrieval_preprocessor.py": 1678345974.9482706, "TEMPLATE_PATH/preprocessors/nlp/zero_shot_classification_preprocessor.py": 1678345974.9639843, "TEMPLATE_PATH/preprocessors/nlp/canmt_translation.py": 1683889954.5585697, "TEMPLATE_PATH/preprocessors/nlp/fill_mask_preprocessor.py": 1678345974.952453, "TEMPLATE_PATH/preprocessors/nlp/word_alignment_preprocessor.py": 1678695526.6615062, "TEMPLATE_PATH/preprocessors/nlp/space_T_en/fields/preprocess_dataset.py": 1666757257.5921733, "TEMPLATE_PATH/preprocessors/nlp/space_T_en/fields/parse.py": 1666757257.5916936, "TEMPLATE_PATH/preprocessors/nlp/space_T_en/fields/common_utils.py": 1666757257.5911734, "TEMPLATE_PATH/preprocessors/nlp/space_T_en/fields/process_dataset.py": 1666757257.5927129, "TEMPLATE_PATH/preprocessors/nlp/space_T_en/conversational_text_to_sql_preprocessor.py": 1669108798.6572416, "TEMPLATE_PATH/preprocessors/nlp/document_grounded_dialog_generate_preprocessor.py": 1678345974.946995, "TEMPLATE_PATH/preprocessors/nlp/text_error_correction.py": 1678695526.6594443, "TEMPLATE_PATH/preprocessors/nlp/text_ranking_preprocessor.py": 1678345974.9603443, "TEMPLATE_PATH/preprocessors/nlp/transformers_tokenizer.py": 1683889954.5599382, "TEMPLATE_PATH/preprocessors/nlp/bert_seq_cls_tokenizer.py": 1666757257.5692148, "TEMPLATE_PATH/preprocessors/nlp/text_clean.py": 1683889954.5587656, "TEMPLATE_PATH/preprocessors/nlp/utils.py": 1678345974.9635713, "TEMPLATE_PATH/preprocessors/nlp/document_segmentation_preprocessor.py": 1678345974.94956, "TEMPLATE_PATH/preprocessors/nlp/sentence_embedding_preprocessor.py": 1678345974.9542353, "TEMPLATE_PATH/preprocessors/nlp/mglm_summarization_preprocessor.py": 1669108798.653473, "TEMPLATE_PATH/preprocessors/nlp/token_classification_thai_preprocessor.py": 1678345974.9620914, "TEMPLATE_PATH/preprocessors/nlp/mgeo_ranking_preprocessor.py": 1678345974.9527726, "TEMPLATE_PATH/preprocessors/nlp/space/dialog_intent_prediction_preprocessor.py": 1669108798.6542664, "TEMPLATE_PATH/preprocessors/nlp/space/lazy_dataset.py": 1666757257.582808, "TEMPLATE_PATH/preprocessors/nlp/space/dialog_state_tracking_preprocessor.py": 1666757257.579098, "TEMPLATE_PATH/preprocessors/nlp/space/preprocess.py": 1666757257.583339, "TEMPLATE_PATH/preprocessors/nlp/space/data_loader.py": 1666757257.5771036, "TEMPLATE_PATH/preprocessors/nlp/space/batch.py": 1678345974.9554622, "TEMPLATE_PATH/preprocessors/nlp/space/dialog_modeling_preprocessor.py": 1666757257.5782604, "TEMPLATE_PATH/preprocessors/nlp/space/tokenizer.py": 1678345974.9572399, "TEMPLATE_PATH/preprocessors/nlp/space/dst_processors.py": 1669108798.6548202, "TEMPLATE_PATH/preprocessors/nlp/space/args.py": 1666757257.5759423, "TEMPLATE_PATH/preprocessors/nlp/space/fields/gen_field.py": 1678345974.95627, "TEMPLATE_PATH/preprocessors/nlp/space/fields/intent_field.py": 1666757257.5822835, "TEMPLATE_PATH/preprocessors/nlp/space/sampler.py": 1666757257.5839186, "TEMPLATE_PATH/preprocessors/nlp/space/tensorlistdataset.py": 1666757257.5844374, "TEMPLATE_PATH/preprocessors/nlp/dialog_classification_use_preprocessor.py": 1678345974.9462962, "TEMPLATE_PATH/preprocessors/nlp/text_generation_preprocessor.py": 1681714768.996753, "TEMPLATE_PATH/preprocessors/nlp/space_T_cn/table_question_answering_preprocessor.py": 1666757257.5885906, "TEMPLATE_PATH/preprocessors/nlp/space_T_cn/fields/database.py": 1669108798.6561291, "TEMPLATE_PATH/preprocessors/nlp/space_T_cn/fields/schema_link.py": 1666757257.587568, "TEMPLATE_PATH/preprocessors/nlp/space_T_cn/fields/struct.py": 1678345974.9575932, "TEMPLATE_PATH/preprocessors/nlp/document_grounded_dialog_rerank_preprocessor.py": 1678345974.9476607, "TEMPLATE_PATH/preprocessors/nlp/feature_extraction_preprocessor.py": 1678345974.9510474, "TEMPLATE_PATH/preprocessors/nlp/faq_question_answering_preprocessor.py": 1678345974.950317, "TEMPLATE_PATH/preprocessors/audio.py": 1678345974.9349864, "TEMPLATE_PATH/preprocessors/cv/image_classification_preprocessor.py": 1678695526.6556287, "TEMPLATE_PATH/preprocessors/cv/util.py": 1678345974.9410372, "TEMPLATE_PATH/preprocessors/cv/timer.py": 1678345974.940564, "TEMPLATE_PATH/preprocessors/cv/bad_image_detecting_preprocessor.py": 1678345974.9370506, "TEMPLATE_PATH/preprocessors/cv/mmcls_preprocessor.py": 1678345974.9400585, "TEMPLATE_PATH/preprocessors/cv/controllable_image_generation.py": 1678695526.6533623, "TEMPLATE_PATH/preprocessors/cv/image_quality_assessment_mos.py": 1678345974.939592, "TEMPLATE_PATH/preprocessors/cv/image_restoration_preprocessor.py": 1678345974.939753, "TEMPLATE_PATH/preprocessors/cv/cv2_transforms.py": 1678695526.6544669, "TEMPLATE_PATH/preprocessors/cv/video_super_resolution.py": 1678345974.941883, "TEMPLATE_PATH/preprocessors/cv/image_quality_assessment_man.py": 1678695526.6563368, "TEMPLATE_PATH/preprocessors/cv/action_detection_mapper.py": 1678695526.6529279, "TEMPLATE_PATH/preprocessors/cv/video_stabilization.py": 1678345974.9413676, "TEMPLATE_PATH/preprocessors/video.py": 1678345974.971809, "TEMPLATE_PATH/preprocessors/image.py": 1678695526.6569033, "TEMPLATE_PATH/preprocessors/base.py": 1683889954.557044, "TEMPLATE_PATH/preprocessors/ofa/visual_question_answering.py": 1678345974.97092, "TEMPLATE_PATH/preprocessors/ofa/image_classification.py": 1678345974.9656992, "TEMPLATE_PATH/preprocessors/ofa/utils/transforms.py": 1666757257.6044796, "TEMPLATE_PATH/preprocessors/ofa/utils/bridge_content_encoder.py": 1678345974.9681842, "TEMPLATE_PATH/preprocessors/ofa/utils/collate.py": 1678345974.968443, "TEMPLATE_PATH/preprocessors/ofa/utils/get_tables.py": 1678345974.9691057, "TEMPLATE_PATH/preprocessors/ofa/utils/text2phone.py": 1678345974.9699347, "TEMPLATE_PATH/preprocessors/ofa/utils/audio_helper.py": 1678345974.967928, "TEMPLATE_PATH/preprocessors/ofa/utils/random_help.py": 1678345974.969357, "TEMPLATE_PATH/preprocessors/ofa/utils/vision_helper.py": 1666757257.6050525, "TEMPLATE_PATH/preprocessors/ofa/utils/constant.py": 1678345974.9688995, "TEMPLATE_PATH/preprocessors/ofa/asr.py": 1678345974.964469, "TEMPLATE_PATH/preprocessors/ofa/text2sql.py": 1678345974.9668753, "TEMPLATE_PATH/preprocessors/ofa/text_classification.py": 1678345974.9671476, "TEMPLATE_PATH/preprocessors/ofa/image_captioning.py": 1678345974.9651005, "TEMPLATE_PATH/preprocessors/ofa/ocr_recognition.py": 1678345974.9659903, "TEMPLATE_PATH/preprocessors/ofa/visual_entailment.py": 1678345974.9702795, "TEMPLATE_PATH/preprocessors/ofa/visual_grounding.py": 1678345974.970591, "TEMPLATE_PATH/preprocessors/ofa/summarization.py": 1678345974.96643, "TEMPLATE_PATH/preprocessors/ofa/text_to_image_synthesis.py": 1678345974.967411, "TEMPLATE_PATH/preprocessors/ofa/sudoku.py": 1678345974.966176, "TEMPLATE_PATH/preprocessors/ofa/base.py": 1678345974.9648006, "TEMPLATE_PATH/trainers/parallel/builder.py": 1666757257.6517034, "TEMPLATE_PATH/trainers/parallel/utils.py": 1666757257.652214, "TEMPLATE_PATH/trainers/optimizer/builder.py": 1678345975.0087109, "TEMPLATE_PATH/trainers/optimizer/child_tuning_adamw_optimizer.py": 1678345975.0091202, "TEMPLATE_PATH/trainers/lrscheduler/builder.py": 1681714769.013421, "TEMPLATE_PATH/trainers/lrscheduler/warmup/warmup.py": 1666757257.6361334, "TEMPLATE_PATH/trainers/lrscheduler/warmup/base.py": 1666757257.635629, "TEMPLATE_PATH/trainers/nlp_trainer.py": 1681714769.015515, "TEMPLATE_PATH/trainers/utils/inference.py": 1681714769.069791, "TEMPLATE_PATH/trainers/utils/log_buffer.py": 1666757257.6546545, "TEMPLATE_PATH/trainers/training_args.py": 1684246001.5734115, "TEMPLATE_PATH/trainers/builder.py": 1683889954.5614784, "TEMPLATE_PATH/trainers/audio/kws_nearfield_trainer.py": 1683889954.5606887, "TEMPLATE_PATH/trainers/audio/kws_utils/model_utils.py": 1678345974.9777398, "TEMPLATE_PATH/trainers/audio/kws_utils/runtime_utils.py": 1678345974.9782813, "TEMPLATE_PATH/trainers/audio/kws_utils/det_utils.py": 1681714769.0015252, "TEMPLATE_PATH/trainers/audio/kws_utils/batch_utils.py": 1683889954.5611897, "TEMPLATE_PATH/trainers/audio/kws_utils/file_utils.py": 1681714769.0070894, "TEMPLATE_PATH/trainers/audio/kws_farfield_trainer.py": 1681714768.9980917, "TEMPLATE_PATH/trainers/audio/separation_trainer.py": 1678345974.9789073, "TEMPLATE_PATH/trainers/audio/asr_trainer.py": 1678345974.974677, "TEMPLATE_PATH/trainers/audio/tts_trainer.py": 1678695526.6909325, "TEMPLATE_PATH/trainers/audio/ans_trainer.py": 1666757257.6118267, "TEMPLATE_PATH/trainers/hooks/checkpoint/checkpoint_hook.py": 1684246001.5663064, "TEMPLATE_PATH/trainers/hooks/checkpoint/checkpoint_processor.py": 1684246001.5666258, "TEMPLATE_PATH/trainers/hooks/checkpoint/load_checkpoint_hook.py": 1684246001.5668476, "TEMPLATE_PATH/trainers/hooks/logger/text_logger_hook.py": 1683889954.5632632, "TEMPLATE_PATH/trainers/hooks/logger/tensorboard_hook.py": 1678695526.7020135, "TEMPLATE_PATH/trainers/hooks/logger/base.py": 1666757257.6279666, "TEMPLATE_PATH/trainers/hooks/optimizer/apex_optimizer_hook.py": 1684246001.5697649, "TEMPLATE_PATH/trainers/hooks/optimizer/torch_optimizer_hook.py": 1684246001.5703554, "TEMPLATE_PATH/trainers/hooks/optimizer/base.py": 1684246001.5700371, "TEMPLATE_PATH/trainers/hooks/distributed/megatron_hook.py": 1684246001.5681868, "TEMPLATE_PATH/trainers/hooks/distributed/deepspeed_hook.py": 1684246001.5679266, "TEMPLATE_PATH/trainers/hooks/distributed/ddp_hook.py": 1684246001.567703, "TEMPLATE_PATH/trainers/hooks/lr_scheduler_hook.py": 1684246001.569458, "TEMPLATE_PATH/trainers/hooks/early_stop_hook.py": 1684246001.5685089, "TEMPLATE_PATH/trainers/hooks/hook.py": 1684246001.5691583, "TEMPLATE_PATH/trainers/hooks/priority.py": 1666757257.6328363, "TEMPLATE_PATH/trainers/hooks/builder.py": 1666757257.6225636, "TEMPLATE_PATH/trainers/hooks/clip_clamp_logit_scale_hook.py": 1669108798.683138, "TEMPLATE_PATH/trainers/hooks/compression/sparsity_hook.py": 1684246001.567191, "TEMPLATE_PATH/trainers/hooks/compression/utils.py": 1678345974.9935489, "TEMPLATE_PATH/trainers/hooks/iter_timer_hook.py": 1666757257.6266162, "TEMPLATE_PATH/trainers/hooks/evaluation_hook.py": 1684246001.5688426, "TEMPLATE_PATH/trainers/multi_modal/clip/clip_trainer.py": 1684246001.571492, "TEMPLATE_PATH/trainers/multi_modal/clip/clip_trainer_utils.py": 1669108798.6861904, "TEMPLATE_PATH/trainers/multi_modal/efficient_diffusion_tuning/efficient_diffusion_tuning_trainer.py": 1683889954.5644114, "TEMPLATE_PATH/trainers/multi_modal/mplug/mplug_trainer.py": 1678345975.001256, "TEMPLATE_PATH/trainers/multi_modal/team/team_trainer.py": 1678345975.0027127, "TEMPLATE_PATH/trainers/multi_modal/team/team_trainer_utils.py": 1669108798.690418, "TEMPLATE_PATH/trainers/multi_modal/mgeo_ranking_trainer.py": 1678345975.0009506, "TEMPLATE_PATH/trainers/multi_modal/ofa/ofa_trainer.py": 1678345975.0016596, "TEMPLATE_PATH/trainers/multi_modal/ofa/ofa_trainer_utils.py": 1678345975.002343, "TEMPLATE_PATH/trainers/default_config.py": 1684246001.5650253, "TEMPLATE_PATH/trainers/nlp/gpt_moe_trainer.py": 1678345975.0055368, "TEMPLATE_PATH/trainers/nlp/plug_trainer.py": 1678695526.7082524, "TEMPLATE_PATH/trainers/nlp/text_generation_trainer.py": 1681714769.0152323, "TEMPLATE_PATH/trainers/nlp/document_grounded_dialog_rerank_trainer.py": 1678345975.004579, "TEMPLATE_PATH/trainers/nlp/csanmt_translation_trainer.py": 1678345975.0036387, "TEMPLATE_PATH/trainers/nlp/translation_evaluation_trainer.py": 1684246001.5721004, "TEMPLATE_PATH/trainers/nlp/faq_question_answering_trainer.py": 1678345975.0051053, "TEMPLATE_PATH/trainers/nlp/table_question_answering_trainer.py": 1678345975.0076075, "TEMPLATE_PATH/trainers/nlp/sequence_classification_trainer.py": 1678345975.0066545, "TEMPLATE_PATH/trainers/nlp/sentence_embedding_trainer.py": 1678695526.7085762, "TEMPLATE_PATH/trainers/nlp/gpt3_trainer.py": 1681714769.014518, "TEMPLATE_PATH/trainers/nlp/text_ranking_trainer.py": 1666757257.6478848, "TEMPLATE_PATH/trainers/nlp/siamese_uie_trainer.py": 1681714769.014841, "TEMPLATE_PATH/trainers/nlp/space/metrics/metrics_tracker.py": 1666757257.645518, "TEMPLATE_PATH/trainers/nlp/space/dialog_intent_trainer.py": 1666757257.6433034, "TEMPLATE_PATH/trainers/nlp/space/eval.py": 1669108798.6920927, "TEMPLATE_PATH/trainers/nlp/space/trainer/intent_trainer.py": 1666757257.6473625, "TEMPLATE_PATH/trainers/nlp/space/trainer/gen_trainer.py": 1666757257.6467648, "TEMPLATE_PATH/trainers/nlp/space/dialog_modeling_trainer.py": 1666757257.64378, "TEMPLATE_PATH/trainers/nlp/document_grounded_dialog_retrieval_trainer.py": 1678345975.004814, "TEMPLATE_PATH/trainers/nlp/document_grounded_dialog_generate_trainer.py": 1678345975.0042856, "TEMPLATE_PATH/trainers/cli_argument_parser.py": 1684246001.5641959, "TEMPLATE_PATH/trainers/cv/ocr_recognition_trainer.py": 1681714769.0097866, "TEMPLATE_PATH/trainers/cv/image_instance_segmentation_trainer.py": 1666757257.6163688, "TEMPLATE_PATH/trainers/cv/referring_video_object_segmentation_trainer.py": 1678695526.6955135, "TEMPLATE_PATH/trainers/cv/vision_efficient_tuning_trainer.py": 1678695526.696203, "TEMPLATE_PATH/trainers/cv/movie_scene_segmentation_trainer.py": 1666757257.617418, "TEMPLATE_PATH/trainers/cv/nerf_recon_acc_trainer.py": 1678695526.694501, "TEMPLATE_PATH/trainers/cv/image_detection_damoyolo_trainer.py": 1681714769.008786, "TEMPLATE_PATH/trainers/cv/image_classifition_trainer.py": 1684246001.5647185, "TEMPLATE_PATH/trainers/cv/cartoon_translation_trainer.py": 1678695526.6927238, "TEMPLATE_PATH/trainers/cv/ocr_detection_db_trainer.py": 1681714769.0095627, "TEMPLATE_PATH/trainers/cv/card_detection_scrfd_trainer.py": 1666757257.6147146, "TEMPLATE_PATH/trainers/cv/face_detection_scrfd_trainer.py": 1666757257.6152842, "TEMPLATE_PATH/trainers/cv/image_inpainting_trainer.py": 1666757257.6158333, "TEMPLATE_PATH/trainers/cv/image_portrait_enhancement_trainer.py": 1666757257.6168902, "TEMPLATE_PATH/trainers/cv/action_detection_trainer.py": 1678695526.6920478, "TEMPLATE_PATH/trainers/cv/image_defrcn_fewshot_detection_trainer.py": 1678345974.9814935, "TEMPLATE_PATH/trainers/trainer.py": 1684246001.5725896, "TEMPLATE_PATH/trainers/base.py": 1681714769.0076036, "TEMPLATE_PATH/msdatasets/ms_dataset.py": 1684317777.8670049, "TEMPLATE_PATH/msdatasets/context/dataset_context_config.py": 1684246001.5200734, "TEMPLATE_PATH/msdatasets/auth/auth_config.py": 1684121077.5437593, "TEMPLATE_PATH/msdatasets/meta/data_meta_config.py": 1681714768.9622037, "TEMPLATE_PATH/msdatasets/meta/data_meta_manager.py": 1684246001.5257208, "TEMPLATE_PATH/msdatasets/utils/oss_utils.py": 1678345974.7962904, "TEMPLATE_PATH/msdatasets/utils/maxcompute_utils.py": 1684246001.5273504, "TEMPLATE_PATH/msdatasets/utils/dataset_utils.py": 1681789723.9124653, "TEMPLATE_PATH/msdatasets/utils/delete_utils.py": 1669108798.6375175, "TEMPLATE_PATH/msdatasets/utils/upload_utils.py": 1678345974.7967587, "TEMPLATE_PATH/msdatasets/task_datasets/video_summarization_dataset.py": 1681714768.9676905, "TEMPLATE_PATH/msdatasets/task_datasets/sidd_image_denoising.py": 1681714768.966313, "TEMPLATE_PATH/msdatasets/task_datasets/torch_base_dataset.py": 1681714768.9668994, "TEMPLATE_PATH/msdatasets/task_datasets/reds_image_deblurring_dataset.py": 1681714768.9657562, "TEMPLATE_PATH/msdatasets/task_datasets/gopro_image_deblurring_dataset.py": 1681714768.9650407, "TEMPLATE_PATH/msdatasets/data_files/data_files_manager.py": 1681714541.6009839, "TEMPLATE_PATH/msdatasets/audio/asr_dataset.py": 1681714768.9251826, "TEMPLATE_PATH/msdatasets/download/download_config.py": 1678345974.7719202, "TEMPLATE_PATH/msdatasets/download/download_manager.py": 1678345974.772169, "TEMPLATE_PATH/msdatasets/download/dataset_builder.py": 1684246001.5244808, "TEMPLATE_PATH/msdatasets/dataset_cls/dataset.py": 1684246001.5233805, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_quality_assessment_degradation/image_quality_assessment_degradation_dataset.py": 1681714769.3278096, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_portrait_enhancement/data_utils.py": 1681714769.329037, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_portrait_enhancement/image_portrait_enhancement_dataset.py": 1681714769.3286672, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/veco_dataset.py": 1681714769.3091025, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_instance_segmentation_coco_dataset.py": 1681714769.3300066, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_recognition_dataset.py": 1684246001.5225265, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/image_dataset.py": 1681714769.3208869, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/make_border_map.py": 1681714769.316306, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/normalize_image.py": 1681714769.3145473, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/make_icdar_data.py": 1681714769.3156052, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/make_seg_detection_data.py": 1681714769.3150744, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/data_process.py": 1681714769.3170214, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/augment_data.py": 1681714769.3176525, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/processes/random_crop_data.py": 1681714769.3139958, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/data_loader.py": 1681714769.3214602, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/measures/quad_measurer.py": 1681714769.319075, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/measures/iou_evaluator.py": 1681714769.3196485, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/ocr_detection/augmenter.py": 1681714769.3220074, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/bad_image_detecting/bad_image_detecting_dataset.py": 1681714769.3407733, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_summarization_dataset.py": 1681714768.9606102, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_inpainting/image_inpainting_dataset.py": 1681714769.3304625, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_inpainting/aug.py": 1681714769.33086, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/language_guided_video_summarization_dataset.py": 1681714769.3258283, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/builder.py": 1681714769.3403647, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py": 1681714769.324932, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/movie_scene_segmentation/sampler.py": 1681714769.32447, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/text_ranking_dataset.py": 1681714769.3096716, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/audio/kws_nearfield_dataset.py": 1681714769.3418102, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/audio/kws_farfield_dataset.py": 1681714769.3421595, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/audio/kws_nearfield_processor.py": 1681714769.3414555, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/audio/asr_dataset.py": 1681714768.928494, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/transforms.py": 1681714769.3102627, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/sidd_image_denoising_dataset.py": 1681714769.3109276, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/sidd_image_denoising/data_utils.py": 1681714769.3115368, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/reds_image_deblurring_dataset.py": 1681714768.9551075, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_frame_interpolation/data_utils.py": 1681714769.3079662, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_frame_interpolation/video_frame_interpolation_dataset.py": 1681714769.3073726, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_quality_assmessment_mos/image_quality_assessment_mos_dataset.py": 1681714769.3270853, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/mgeo_ranking_dataset.py": 1681714769.32538, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_stabilization/video_stabilization_dataset.py": 1681714769.3063674, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/gopro_image_deblurring_dataset.py": 1681714768.9389687, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/referring_video_object_segmentation/transformers.py": 1681714769.3127193, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py": 1681714769.3133628, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/image_colorization/image_colorization_dataset.py": 1681714768.9425967, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/build.py": 1681714769.3396943, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/datasets/coco.py": 1681714769.3387377, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/datasets/mosaic_wrapper.py": 1681714769.338384, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/transforms/build.py": 1681714769.3349338, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/transforms/transforms.py": 1681714769.334567, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/collate_batch.py": 1681714769.3393688, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/samplers/grouped_batch_sampler.py": 1681714769.3360593, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/samplers/distributed.py": 1681714769.336487, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/samplers/iteration_based_batch_sampler.py": 1681714769.3356428, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/damoyolo/evaluation/coco/coco_eval.py": 1681714769.3372462, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/easycv_base.py": 1681714769.3340495, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/torch_custom_dataset.py": 1681714768.9585028, "TEMPLATE_PATH/msdatasets/dataset_cls/custom_datasets/video_super_resolution/video_super_resolution_dataset.py": 1681714769.305487, "TEMPLATE_PATH/msdatasets/data_loader/data_loader_manager.py": 1684121077.5480983, "TEMPLATE_PATH/msdatasets/data_loader/data_loader.py": 1684246001.5212934, "TEMPLATE_PATH/exporters/torch_model_exporter.py": 1678695526.18393, "TEMPLATE_PATH/exporters/builder.py": 1666757257.1189609, "TEMPLATE_PATH/exporters/audio/ans_dfsmn_exporter.py": 1684246001.4573822, "TEMPLATE_PATH/exporters/nlp/csanmt_for_translation_exporter.py": 1681714768.8512428, "TEMPLATE_PATH/exporters/nlp/model_for_token_classification_exporter.py": 1683889954.460512, "TEMPLATE_PATH/exporters/nlp/sbert_for_sequence_classification_exporter.py": 1678345974.1415546, "TEMPLATE_PATH/exporters/nlp/sbert_for_zero_shot_classification_exporter.py": 1678345974.1418796, "TEMPLATE_PATH/exporters/cv/object_detection_damoyolo_exporter.py": 1678695526.1809118, "TEMPLATE_PATH/exporters/cv/face_detection_scrfd_exporter.py": 1678695526.1807334, "TEMPLATE_PATH/exporters/cv/cartoon_translation_exporter.py": 1678695526.1803331, "TEMPLATE_PATH/exporters/tf_model_exporter.py": 1678695526.1826663, "TEMPLATE_PATH/exporters/base.py": 1678345974.1376836}, "modelscope_path": "TEMPLATE_PATH"} \ No newline at end of file diff --git a/modelscope/utils/ast_utils.py b/modelscope/utils/ast_utils.py index 374ada20..5cee374d 100644 --- a/modelscope/utils/ast_utils.py +++ b/modelscope/utils/ast_utils.py @@ -1,7 +1,6 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import ast -import contextlib import hashlib import os import os.path as osp @@ -9,12 +8,11 @@ import time import traceback from functools import reduce from pathlib import Path -from typing import Generator, Union +from typing import Union import gast import json -from modelscope import __version__ from modelscope.fileio.file import LocalStorage from modelscope.metainfo import (CustomDatasets, Heads, Hooks, LR_Schedulers, Metrics, Models, Optimizers, Pipelines, @@ -574,6 +572,7 @@ file_scanner = FilesAstScanning() def _save_index(index, file_path, file_list=None, with_template=False): # convert tuple key to str key index[INDEX_KEY] = {str(k): v for k, v in index[INDEX_KEY].items()} + from modelscope.version import __version__ index[VERSION_KEY] = __version__ index[MD5_KEY], index[FILES_MTIME_KEY] = file_scanner.files_mtime_md5( file_list=file_list) @@ -682,6 +681,7 @@ def load_index( if not force_rebuild and os.path.exists(file_path): wrapped_index = _load_index(file_path) md5, files_mtime = file_scanner.files_mtime_md5(file_list=file_list) + from modelscope.version import __version__ if (wrapped_index[VERSION_KEY] == __version__): index = wrapped_index if (wrapped_index[MD5_KEY] != md5): diff --git a/modelscope/utils/audio/audio_utils.py b/modelscope/utils/audio/audio_utils.py index 3336af06..562769b8 100644 --- a/modelscope/utils/audio/audio_utils.py +++ b/modelscope/utils/audio/audio_utils.py @@ -11,6 +11,7 @@ from urllib.parse import urlparse import numpy as np from modelscope.fileio.file import HTTPStorage +from modelscope.hub.utils.utils import get_cache_dir from modelscope.utils.hub import snapshot_download from modelscope.utils.logger import get_logger @@ -323,34 +324,31 @@ def generate_sd_scp_from_url(urls: Union[tuple, list]): def update_local_model(model_config, model_path, extra_args): + if 'update_model' in extra_args and not extra_args['update_model']: + return + model_revision = None if 'update_model' in extra_args: if extra_args['update_model'] == 'latest': model_revision = None else: model_revision = extra_args['update_model'] - if model_config.__contains__('model'): - model_name = model_config['model'] - if isinstance(model_path, str) and os.path.exists(model_path): - try: - logger.info( - 'Download the model to local path {0} ...'.format( - model_path)) - src_path = snapshot_download( - model_name, revision=model_revision) - # cp to model_path - if src_path == model_path: - logger.warning('src_path is the same with model_path') - return - for filename in os.listdir(src_path): - src_file = os.path.join(src_path, filename) - dst_file = os.path.join(model_path, filename) - if os.path.isfile(src_file): - shutil.copy2(src_file, model_path) - elif os.path.isdir(src_file): - if os.path.exists(dst_file): - shutil.rmtree(dst_file) - shutil.copytree(src_file, dst_file) - except Exception as e: - logger.warning(str(e)) - else: - logger.warning('Can not find model name in configuration') + if model_config.__contains__('model'): + model_name = model_config['model'] + dst_dir_root = get_cache_dir() + if isinstance(model_path, str) and os.path.exists( + model_path) and not model_path.startswith(dst_dir_root): + try: + dst = os.path.join(dst_dir_root, '.cache/' + model_name) + dst_dir = os.path.dirname(dst) + os.makedirs(dst_dir, exist_ok=True) + if not os.path.exists(dst): + os.symlink(os.path.abspath(model_path), dst) + + snapshot_download( + model_name, + cache_dir=dst_dir_root, + revision=model_revision) + except Exception as e: + logger.warning(str(e)) + else: + logger.warning('Can not find model name in configuration') diff --git a/modelscope/utils/checkpoint.py b/modelscope/utils/checkpoint.py index 64681db4..bbde6034 100644 --- a/modelscope/utils/checkpoint.py +++ b/modelscope/utils/checkpoint.py @@ -5,7 +5,6 @@ import os import re import time from collections import OrderedDict -from functools import partial from shutil import copytree, ignore_patterns, rmtree from typing import Callable, Dict, Optional, Union @@ -15,7 +14,6 @@ from torch import nn from torch.optim import Optimizer from torch.optim.lr_scheduler import _LRScheduler -from modelscope import __version__ from modelscope.fileio import File, LocalStorage from modelscope.utils.config import Config, JSONIteratorEncoder from modelscope.utils.constant import ConfigFields, ModelFile @@ -76,6 +74,7 @@ def save_checkpoint(model: torch.nn.Module, elif not isinstance(meta, dict): raise TypeError( f'meta must be a dict or None, but got {type(meta)}') + from modelscope import __version__ meta.update(modelscope=__version__, time=time.asctime()) if isinstance(model, torch.nn.parallel.DistributedDataParallel): diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 2382825a..1f44fc01 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -247,6 +247,7 @@ class MultiModalTasks(object): video_temporal_grounding = 'video-temporal-grounding' text_to_video_synthesis = 'text-to-video-synthesis' efficient_diffusion_tuning = 'efficient-diffusion-tuning' + multimodal_dialogue = 'multimodal-dialogue' class ScienceTasks(object): @@ -277,6 +278,7 @@ class Tasks(CVTasks, NLPTasks, AudioTasks, MultiModalTasks, ScienceTasks): This should be used to register models, pipelines, trainers. """ reverse_field_index = {} + task_template = 'task-template' @staticmethod def find_field_by_task(task_name): @@ -327,6 +329,7 @@ class Hubs(enum.Enum): """ modelscope = 'modelscope' huggingface = 'huggingface' + virgo = 'virgo' class DownloadMode(enum.Enum): @@ -539,3 +542,37 @@ class DistributedParallelType(object): class DatasetTensorflowConfig: BATCH_SIZE = 'batch_size' DEFAULT_BATCH_SIZE_VALUE = 5 + + +class VirgoDatasetConfig: + + default_virgo_namespace = 'default_namespace' + + default_dataset_version = '1' + + env_virgo_endpoint = 'VIRGO_ENDPOINT' + + # Columns for meta request + meta_content = 'metaContent' + sampling_type = 'samplingType' + + # Columns for meta content + col_id = 'id' + col_meta_info = 'meta_info' + col_analysis_result = 'analysis_result' + col_external_info = 'external_info' + col_cache_file = 'cache_file' + + +DEFAULT_MAXCOMPUTE_ENDPOINT = 'http://service-corp.odps.aliyun-inc.com/api' + + +class MaxComputeEnvs: + + ACCESS_ID = 'ODPS_ACCESS_ID' + + ACCESS_SECRET_KEY = 'ODPS_ACCESS_SECRET_KEY' + + PROJECT_NAME = 'ODPS_PROJECT_NAME' + + ENDPOINT = 'ODPS_ENDPOINT' diff --git a/modelscope/utils/demo_utils.py b/modelscope/utils/demo_utils.py deleted file mode 100644 index 99e61d45..00000000 --- a/modelscope/utils/demo_utils.py +++ /dev/null @@ -1,275 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. - -import io - -import json - -from modelscope.outputs import OutputKeys -from modelscope.pipelines import pipeline -from modelscope.utils.constant import Tasks, TasksIODescriptions -from modelscope.utils.service_utils import NumpyEncoder - -TASKS_INPUT_TEMPLATES = { - # vision tasks - Tasks.image_portrait_stylization: TasksIODescriptions.image_to_image, - Tasks.portrait_matting: TasksIODescriptions.image_to_image, - Tasks.skin_retouching: TasksIODescriptions.image_to_image, - Tasks.image_captioning: TasksIODescriptions.image_to_text, - Tasks.image_denoising: TasksIODescriptions.image_to_image, - Tasks.image_portrait_enhancement: TasksIODescriptions.image_to_image, - Tasks.image_super_resolution: TasksIODescriptions.image_to_image, - Tasks.image_colorization: TasksIODescriptions.image_to_image, - Tasks.image_color_enhancement: TasksIODescriptions.image_to_image, - Tasks.face_image_generation: TasksIODescriptions.seed_to_image, - Tasks.image_style_transfer: TasksIODescriptions.images_to_image, - Tasks.image_segmentation: TasksIODescriptions.image_to_text, - Tasks.image_object_detection: TasksIODescriptions.image_to_text, - - # not tested - Tasks.image_classification: TasksIODescriptions.image_to_text, - Tasks.ocr_detection: TasksIODescriptions.image_to_text, - Tasks.ocr_recognition: TasksIODescriptions.image_to_text, - Tasks.body_2d_keypoints: TasksIODescriptions.image_to_text, - Tasks.vision_efficient_tuning: TasksIODescriptions.image_to_text, - - # nlp tasks - Tasks.text_classification: TasksIODescriptions.text_to_text, - Tasks.text_generation: TasksIODescriptions.text_to_text, - Tasks.word_segmentation: TasksIODescriptions.text_to_text, - Tasks.text_error_correction: TasksIODescriptions.text_to_text, - Tasks.named_entity_recognition: TasksIODescriptions.text_to_text, - Tasks.sentiment_classification: TasksIODescriptions.text_to_text, - - # audio tasks - Tasks.text_to_speech: TasksIODescriptions.text_to_speech, - Tasks.auto_speech_recognition: TasksIODescriptions.speech_to_text, - Tasks.keyword_spotting: TasksIODescriptions.speech_to_text, - Tasks.acoustic_noise_suppression: TasksIODescriptions.speech_to_speech, - Tasks.acoustic_echo_cancellation: TasksIODescriptions.speeches_to_speech, - - # multi-modal - Tasks.visual_grounding: TasksIODescriptions.visual_grounding, - Tasks.visual_question_answering: - TasksIODescriptions.visual_question_answering, - Tasks.visual_entailment: TasksIODescriptions.visual_entailment, - Tasks.generative_multi_modal_embedding: - TasksIODescriptions.generative_multi_modal_embedding, - - # new tasks - Tasks.virtual_try_on: TasksIODescriptions.images_to_image, - - # TODO(lingcai.wl): support more tasks and implement corresponding example -} - -INPUT_EXAMPLES = { - # Must align with task schema defined in the Widget section of model card= - # cv - TasksIODescriptions.image_to_image: { - 'inputs': [ - 'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/image_cartoon.png' - ], - 'urlPaths': { - 'outUrls': [{ - 'outputKey': OutputKeys.OUTPUT_IMG, - 'fileType': 'png' - }] - } - }, - TasksIODescriptions.images_to_image: { - 'inputs': [ - 'https://modelscope.oss-cn-beijing.aliyuncs.com/demo/image-style-transfer/style_transfer_content.jpg', - 'https://modelscope.oss-cn-beijing.aliyuncs.com/demo/image-style-transfer/style_transfer_style.jpg' - ], - 'urlPaths': { - 'outUrls': [{ - 'outputKey': OutputKeys.OUTPUT_IMG, - 'fileType': 'png' - }] - } - }, - TasksIODescriptions.image_to_text: { - 'inputs': [ - 'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/image_cartoon.png' - ], - 'urlPaths': {} - }, - # nlp - TasksIODescriptions.text_to_text: { - 'inputs': ['test'], - 'urlPaths': {} - }, - - # audio - TasksIODescriptions.speech_to_text: { - 'inputs': [ - 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example.wav' - ], - 'urlPaths': {} - }, - TasksIODescriptions.text_to_speech: { - 'inputs': ['北京今天天气怎么样'], - 'urlPaths': { - 'outUrls': [{ - 'outputKey': OutputKeys.OUTPUT_PCM, - 'fileType': 'pcm' - }] - } - }, - TasksIODescriptions.speeches_to_speech: { - 'inputs': [ - 'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/nearend_mic.wav', - 'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/nearend_speech.wav' - ], - 'urlPaths': { - 'outUrls': [{ - 'outputKey': OutputKeys.OUTPUT_PCM, - 'fileType': 'pcm' - }] - } - }, - TasksIODescriptions.speech_to_speech: { - 'inputs': [ - 'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/speech_with_noise.wav' - ], - 'urlPaths': { - 'outUrls': [{ - 'outputKey': OutputKeys.OUTPUT_PCM, - 'fileType': 'pcm' - }] - } - }, - - # multi modal - TasksIODescriptions.visual_grounding: { - 'task': - Tasks.visual_grounding, - 'inputs': [ - 'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-grounding/visual_grounding.png', - 'a blue turtle-like pokemon with round head' - ], - 'urlPaths': { - 'inUrls': [{ - 'name': 'image' - }, { - 'name': 'text' - }] - } - }, - TasksIODescriptions.visual_question_answering: { - 'task': - Tasks.visual_question_answering, - 'inputs': [ - 'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/visual_question_answering.png', - 'what is grown on the plant?' - ], - 'urlPaths': { - 'inUrls': [{ - 'name': 'image' - }, { - 'name': 'text' - }], - 'outUrls': [{ - 'outputKey': 'text' - }] - } - }, - TasksIODescriptions.visual_entailment: { - 'task': - Tasks.visual_entailment, - 'inputs': [ - 'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-entailment/visual_entailment.jpg', - 'there are two birds.', 'test' - ], - 'urlPaths': { - 'inUrls': [{ - 'name': 'image' - }, { - 'name': 'text' - }], - 'outUrls': [{}] - } - }, - TasksIODescriptions.generative_multi_modal_embedding: { - 'task': - Tasks.generative_multi_modal_embedding, - 'inputs': [ - 'http://clip-multimodal.oss-cn-beijing.aliyuncs.com/lingchen/demo/dogs.jpg', - 'dogs playing in the grass' - ], - 'urlPaths': { - 'inUrls': [{ - 'name': 'image' - }, { - 'name': 'text' - }], - 'outUrls': [{}] - } - }, -} - - -class DemoCompatibilityCheck(object): - - def compatibility_check(self): - if self.task not in TASKS_INPUT_TEMPLATES: - print('task is not supported in demo service so far') - return False - if TASKS_INPUT_TEMPLATES[self.task] not in INPUT_EXAMPLES: - print('no example input for this task') - return False - - print('testing demo: ', self.task, self.model_id) - test_pipline = pipeline(self.task, self.model_id) - req = INPUT_EXAMPLES[TASKS_INPUT_TEMPLATES[self.task]] - inputs = preprocess(req) - params = req.get('parameters', {}) - # modelscope inference - if params != {}: - output = test_pipline(inputs, **params) - else: - output = test_pipline(inputs) - json.dumps(output, cls=NumpyEncoder) - result = postprocess(req, output) - print(result) - return True - - -def preprocess(req): - in_urls = req.get('urlPaths').get('inUrls') - if len(req['inputs']) == 1: - inputs = req['inputs'][0] - else: - inputs = tuple(req['inputs']) - if in_urls is None or len(in_urls) == 0: - return inputs - - inputs_dict = {} - for i, in_url in enumerate(in_urls): - input_name = in_url.get('name') - if input_name is None or input_name == '': - return inputs - inputs_dict[input_name] = req['inputs'][i] - return inputs_dict - - -def postprocess(req, resp): - out_urls = req.get('urlPaths').get('outUrls') - if out_urls is None or len(out_urls) == 0: - return resp - new_resp = resp - if isinstance(resp, str): - new_resp = json.loads(resp) - for out_url in out_urls: - output_key = out_url['outputKey'] - file_type = out_url['fileType'] - new_resp.get(output_key) - if file_type == 'png' or file_type == 'jpg': - content = new_resp.get(output_key) - import cv2 - _, img_encode = cv2.imencode('.' + file_type, content) - img_bytes = img_encode.tobytes() - return type(img_bytes) - else: - out_mem_file = io.BytesIO() - out_mem_file.write(new_resp.get(output_key)) - return type(out_mem_file) diff --git a/modelscope/utils/input_output.py b/modelscope/utils/input_output.py new file mode 100644 index 00000000..b2c9cd5b --- /dev/null +++ b/modelscope/utils/input_output.py @@ -0,0 +1,756 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import ast +import base64 +import importlib +import inspect +from io import BytesIO +from typing import Any +from urllib.parse import urlparse + +import numpy as np + +from modelscope.hub.api import HubApi +from modelscope.hub.errors import NotExistError +from modelscope.hub.file_download import model_file_download +from modelscope.outputs.outputs import (TASK_OUTPUTS, OutputKeys, OutputTypes, + OutputTypeSchema) +from modelscope.pipeline_inputs import (INPUT_TYPE, INPUT_TYPE_SCHEMA, + TASK_INPUTS, InputType) +from modelscope.pipelines import pipeline +from modelscope.pipelines.base import Pipeline +from modelscope.utils.config import Config +from modelscope.utils.constant import ModelFile, Tasks +from modelscope.utils.logger import get_logger + +logger = get_logger() +"""Support webservice integration pipeline。 + +This module provides a support library when webservice uses pipeline, +converts webservice input into pipeline input, and converts pipeline +output into webservice output, which automatically encodes and +decodes relevant fields. + +Example: + # create pipeine instance and pipeline information, save it to app + pipeline_instance = create_pipeline('damo/cv_gpen_image-portrait-enhancement', 'v1.0.0') + pipeline_info = get_pipeline_information_by_pipeline(pipeline_instance) + app.state.pipeline = pipeline_instance + app.state.pipeline_info = pipeline_info + + # for service schema request. + pipeline_info = request.app.state.pipeline_info + return pipeline_info.schema + + # for service call request. + def inference(request: Request): + pipeline_service = request.app.state.pipeline + pipeline_info = request.app.state.pipeline_info + request_json = await request.json() + result = call_pipeline_with_json(pipeline_info, + pipeline_service, + request_json) + # convert output to json, if binary field, we need encoded. + output = pipeline_output_to_service_base64_output(pipeline_info.task_name, result) + return output +Todo: + * Support more service input type, such as form. + +""" + + +def create_pipeline(model_id: str, revision: str): + model_configuration_file = model_file_download( + model_id=model_id, + file_path=ModelFile.CONFIGURATION, + revision=revision) + cfg = Config.from_file(model_configuration_file) + return pipeline(task=cfg.task, model=model_id, model_revision=revision) + + +def get_class_user_attributes(cls): + attributes = inspect.getmembers(cls, lambda a: not (inspect.isroutine(a))) + user_attributes = [ + a for a in attributes + if (not (a[0].startswith('__') and a[0].endswith('__'))) + ] + return user_attributes + + +def get_input_type(task_inputs: Any): + """Get task input schema. + + Args: + task_name (str): The task name. + """ + if isinstance(task_inputs, str): # no input key + input_type = INPUT_TYPE[task_inputs] + return input_type + elif isinstance(task_inputs, tuple) or isinstance(task_inputs, list): + for item in task_inputs: + if isinstance(item, + dict): # for list, server only support dict format. + return get_input_type(item) + else: + continue + elif isinstance(task_inputs, dict): + input_info = {} # key input key, value input type + for k, v in task_inputs.items(): + input_info[k] = get_input_type(v) + return input_info + else: + raise ValueError(f'invalid input_type definition {task_inputs}') + + +def get_input_schema(task_name: str, input_type: type): + """Get task input schema. + + Args: + task_name (str): The task name. + input_type (type): The input type + """ + if input_type is None: + task_inputs = TASK_INPUTS[task_name] + if isinstance(task_inputs, + str): # only one input field, key is task_inputs + return { + 'type': 'object', + 'properties': { + task_inputs: INPUT_TYPE_SCHEMA[task_inputs] + } + } + else: + task_inputs = input_type + + if isinstance(task_inputs, str): # no input key + return INPUT_TYPE_SCHEMA[task_inputs] + elif input_type is None and isinstance(task_inputs, list): + for item in task_inputs: + # for list, server only support dict format. + if isinstance(item, dict): + return get_input_schema(None, item) + elif isinstance(task_inputs, tuple) or isinstance(task_inputs, list): + input_schema = {'type': 'array', 'items': {}} + for item in task_inputs: + if isinstance(item, dict): + item_schema = get_input_schema(None, item) + input_schema['items']['type'] = item_schema + return input_schema + else: + input_schema['items'] = INPUT_TYPE_SCHEMA[item] + return input_schema + + elif isinstance(task_inputs, dict): + input_schema = { + 'type': 'object', + 'properties': {} + } # key input key, value input type + for k, v in task_inputs.items(): + input_schema['properties'][k] = get_input_schema(None, v) + return input_schema + else: + raise ValueError(f'invalid input_type definition {task_inputs}') + + +def get_output_schema(task_name: str): + """Get task output schema. + + Args: + task_name (str): The task name. + """ + task_outputs = TASK_OUTPUTS[task_name] + output_schema = {'type': 'object', 'properties': {}} + if not isinstance(task_outputs, list): + raise ValueError('TASK_OUTPUTS for %s is not list.' % task_name) + else: + for output_key in task_outputs: + output_schema['properties'][output_key] = OutputTypeSchema[ + output_key] + return output_schema + + +def get_input_info(task_name: str): + task_inputs = TASK_INPUTS[task_name] + if isinstance(task_inputs, str): # no input key default input key input + input_type = INPUT_TYPE[task_inputs] + return input_type + elif isinstance(task_inputs, tuple): + return task_inputs + elif isinstance(task_inputs, list): + for item in task_inputs: + if isinstance(item, + dict): # for list, server only support dict format. + return {'input': get_input_type(item)} + else: + continue + elif isinstance(task_inputs, dict): + input_info = {} # key input key, value input type + for k, v in task_inputs.items(): + input_info[k] = get_input_type(v) + return {'input': input_info} + else: + raise ValueError(f'invalid input_type definition {task_inputs}') + + +def get_output_info(task_name: str): + output_keys = TASK_OUTPUTS[task_name] + output_type = {} + if not isinstance(output_keys, list): + raise ValueError('TASK_OUTPUTS for %s is not list.' % task_name) + else: + for output_key in output_keys: + output_type[output_key] = OutputTypes[output_key] + return output_type + + +def get_task_io_info(task_name: str): + """Get task input output schema. + + Args: + task_name (str): The task name. + """ + tasks = get_class_user_attributes(Tasks) + task_exist = False + for key, value in tasks: + if key == task_name or value == task_name: + task_exist = True + break + if not task_exist: + return None, None + + task_inputs = get_input_info(task_name) + task_outputs = get_output_info(task_name) + + return task_inputs, task_outputs + + +def process_arg_type_annotation(arg, default_value): + if arg.annotation is not None: + if isinstance(arg.annotation, ast.Subscript): + return arg.arg, arg.annotation.value.id + elif isinstance(arg.annotation, ast.Name): + return arg.arg, arg.annotation.id + elif isinstance(arg.annotation, ast.Attribute): + return arg.arg, arg.annotation.attr + else: + raise Exception('Invalid annotation: %s' % arg.annotation) + else: + if default_value is not None: + return arg.arg, type(default_value).__name__ + # Irregular, assuming no type hint no default value type is object + logger.warning('arg: %s has no data type annotation, use default!' % + (arg.arg)) + return arg.arg, 'object' + + +def process_args(args): + arguments = [] + # name, type, has_default, default + n_args = len(args.args) + n_args_default = len(args.defaults) + # no default + for arg in args.args[0:n_args - n_args_default]: + if arg.arg == 'self': + continue + else: + arg_name, arg_type = process_arg_type_annotation(arg, None) + arguments.append((arg_name, arg_type, False, None)) + + # process defaults arg. + for arg, dft in zip(args.args[n_args - n_args_default:], args.defaults): + # compatible with python3.7 ast.Num no value. + value = dft.value if hasattr(dft, 'value') else dft.n + arg_name, arg_type = process_arg_type_annotation(arg, value) + arguments.append((arg_name, arg_type, True, value)) + + # kwargs + n_kwargs = len(args.kwonlyargs) + n_kwargs_default = len(args.kw_defaults) + for kwarg in args.kwonlyargs[0:n_kwargs - n_kwargs_default]: + arg_name, arg_type = process_arg_type_annotation(kwarg) + arguments.append((arg_name, arg_type, False, None)) + + for kwarg, dft in zip(args.kwonlyargs[n_kwargs - n_kwargs_default:], + args.kw_defaults): + arg_name, arg_type = process_arg_type_annotation(kwarg) + arguments.append((arg_name, arg_type, True, dft.value)) + return arguments + + +class PipelineClassAnalyzer(ast.NodeVisitor): + """Analysis pipeline class define get inputs and parameters. + """ + + def __init__(self) -> None: + super().__init__() + self.parameters = [] + self.has_call = False + self.preprocess_parameters = [] + self.has_preprocess = False + self.has_postprocess = False + self.has_forward = False + self.forward_parameters = [] + self.postprocess_parameters = [] + self.lineno = 0 + self.end_lineno = 0 + + def visit_FunctionDef(self, node: ast.FunctionDef) -> Any: + if node.name == '__call__': + self.parameters = process_args(node.args) + self.has_call = True + if node.name == 'preprocess': + self.preprocess_parameters = process_args(node.args) + self.has_preprocess = True + elif node.name == 'postprocess': + self.postprocess_parameters = process_args(node.args) + self.has_postprocess = True + elif node.name == 'forward': + self.forward_parameters = process_args(node.args) + self.has_forward = True + + def get_input_parameters(self): + if self.has_call: + # custom define __call__ inputs and parameter are control by the + # custom __call__, all parameter is input. + return self.parameters, None + parameters = [] + if self.has_preprocess: + parameters.extend(self.preprocess_parameters[1:]) + if self.has_forward: + parameters.extend(self.forward_parameters[1:]) + if self.has_postprocess: + parameters.extend(self.postprocess_parameters[1:]) + + if len(parameters) > 0: + return None, parameters + else: + return None, [] + + +class AnalysisSourceFileRegisterModules(ast.NodeVisitor): + """Get register_module call of the python source file. + + + Args: + ast (NodeVisitor): The ast node. + + Examples: + >>> with open(source_file_path, "rb") as f: + >>> src = f.read() + >>> analyzer = AnalysisSourceFileRegisterModules(source_file_path) + >>> analyzer.visit(ast.parse(src, filename=source_file_path)) + """ + + def __init__(self, source_file_path, class_name) -> None: + super().__init__() + self.source_file_path = source_file_path + self.class_name = class_name + self.class_define = None + + def visit_ClassDef(self, node: ast.ClassDef): + if node.name == self.class_name: + self.class_define = node + + +def get_pipeline_input_parameters( + source_file_path: str, + class_name: str, +): + """Get pipeline input and parameter + + Args: + source_file_path (str): The pipeline source code path + class_name (str): The pipeline class name + """ + with open(source_file_path, 'rb') as f: + src = f.read() + analyzer = AnalysisSourceFileRegisterModules(source_file_path, + class_name) + analyzer.visit( + ast.parse( + src, + filename=source_file_path, + # python3.7 no type_comments parameter . + # type_comments=True + )) + clz = PipelineClassAnalyzer() + clz.visit(analyzer.class_define) + input, pipeline_parameters = clz.get_input_parameters() + # remove the first input parameter, the input is defined by task. + return input, pipeline_parameters + + +meta_type_schema_map = { + # For parameters, current only support types. + 'str': 'string', + 'int': 'integer', + 'float': 'number', + 'bool': 'boolean', + 'Dict': 'object', + 'dict': 'object', + 'list': 'array', + 'List': 'array', + 'Union': 'object', + 'Input': 'object', + 'object': 'object', +} + + +def generate_pipeline_parameters_schema(parameters): + parameters_schema = {'type': 'object', 'properties': {}} + if len(parameters) == 0: + return {} + for param in parameters: + name, param_type, has_default, default_value = param + # 'max_length': ('int', True, 1024) + prop = {'type': meta_type_schema_map[param_type]} + if has_default: + prop['default'] = default_value + parameters_schema['properties'][name] = prop + return parameters_schema + + +def get_pipeline_information_by_pipeline(pipeline: Pipeline, ): + """Get pipeline input output schema. + + Args: + pipeline (Pipeline): The pipeline object. + """ + task_name = pipeline.group_key + pipeline_class = pipeline.__class__.__name__ + spec = importlib.util.find_spec(pipeline.__module__) + pipeline_file_path = spec.origin + info = PipelineInfomation(task_name, pipeline_class, pipeline_file_path) + return info + + +class PipelineInfomation(): + """Analyze pipeline information, task_name, schema. + """ + + def __init__(self, task_name: str, class_name, source_path): + self._task_name = task_name + self._class_name = class_name + self._source_path = source_path + self._is_custom_call_method = False + self._analyze() + + def _analyze(self): + input, parameters = get_pipeline_input_parameters( + self._source_path, self._class_name) + if input is not None: # custom pipeline __call__ asr_inferrnce_pipeline + self._is_custom_call_method = True + self._input_schema = generate_pipeline_parameters_schema(input) + self._input_schema[ + 'description'] = 'For binary input such as image audio video, only url is supported.' + self._parameters_schema = {} + self._output_schema = { + 'type': 'object', + } + if self._task_name in TASK_OUTPUTS: + self._output_schema = get_output_schema(self._task_name) + else: + # use base pipeline __call__ + if self._task_name in TASK_INPUTS and self._task_name in TASK_OUTPUTS: + # delete the first default input which is defined by task. + self._parameters_schema = generate_pipeline_parameters_schema( + parameters) + self._input_schema = get_input_schema(self._task_name, None) + self._output_schema = get_output_schema(self._task_name) + else: + logger.warning( + 'Task: %s input is defined: %s, output is defined: %s which is not completed' + % (self._task_name, self._task_name + in TASK_INPUTS, self._task_name in TASK_OUTPUTS)) + self._input_schema = None + self._output_schema = None + if self._task_name in TASK_INPUTS: + self._input_schema = get_input_schema( + self._task_name, None) + if self._task_name in TASK_OUTPUTS: + self._output_schema = get_output_schema(self._task_name) + self._parameters_schema = generate_pipeline_parameters_schema( + parameters) + + @property + def task_name(self): + return self._task_name + + @property + def is_custom_call(self): + return self._is_custom_call_method + + @property + def input_schema(self): + return self._input_schema + + @property + def output_schema(self): + return self._output_schema + + @property + def parameters_schema(self): + return self._parameters_schema + + @property + def schema(self): + return { + 'input': self._input_schema if self._input_schema else + self._parameters_schema, # all parameter is input + 'parameters': + self._parameters_schema if self._input_schema else {}, + 'output': self._output_schema if self._output_schema else { + 'type': 'object', + }, + } + + +def is_url(url: str): + """Check the input url is valid url. + + Args: + url (str): The url + + Returns: + bool: If is url return True, otherwise False. + """ + url_parsed = urlparse(url) + if url_parsed.scheme in ('http', 'https', 'oss'): + return True + else: + return False + + +def decode_base64_to_image(content): + if content.startswith('http') or content.startswith('oss'): + return content + + from PIL import Image + image_file_content = base64.b64decode(content) + return Image.open(BytesIO(image_file_content)) + + +def decode_base64_to_audio(content): + if content.startswith('http') or content.startswith('oss'): + return content + + file_content = base64.b64decode(content) + return file_content + + +def decode_base64_to_video(content): + if content.startswith('http') or content.startswith('oss'): + return content + + file_content = base64.b64decode(content) + return file_content + + +def return_origin(content): + return content + + +def decode_box(content): + pass + + +def service_multipart_input_to_pipeline_input(body): + """Convert multipart data to pipeline input. + + Args: + body (dict): The multipart data body + """ + pass + + +def pipeline_output_to_service_multipart_output(output): + """Convert multipart data to service multipart output. + + Args: + output (dict): Multipart body. + """ + pass + + +base64_decoder_map = { + InputType.IMAGE: decode_base64_to_image, + InputType.TEXT: return_origin, + InputType.AUDIO: decode_base64_to_audio, + InputType.VIDEO: decode_base64_to_video, + InputType.BOX: decode_box, + InputType.DICT: return_origin, + InputType.LIST: return_origin, + InputType.NUMBER: return_origin, +} + + +def call_pipeline_with_json(pipeline_info: PipelineInfomation, + pipeline: Pipeline, body: str): + """Call pipeline with json input. + + Args: + pipeline_info (PipelineInfomation): The pipeline information object. + pipeline (Pipeline): The pipeline object. + body (Dict): The input object, include input and parameters + """ + if pipeline_info.is_custom_call: + pipeline_inputs = body['input'] + result = pipeline(**pipeline_inputs) + else: + pipeline_inputs, parameters = service_base64_input_to_pipeline_input( + pipeline_info.task_name, body) + result = pipeline(pipeline_inputs, **parameters) + + return result + + +def service_base64_input_to_pipeline_input(task_name, body): + """Convert service base64 input to pipeline input and parameters + + Args: + task_name (str): The task name. + body (Dict): The input object, include input and parameters + """ + if 'input' not in body: + raise ValueError('No input data!') + service_input = body['input'] + if 'parameters' in body: + parameters = body['parameters'] + else: + parameters = {} + pipeline_input = {} + + task_input_info = TASK_INPUTS[task_name] + if isinstance(task_input_info, str): # no input key default + return base64_decoder_map[task_input_info](list( + service_input.values())[0]), parameters + elif isinstance(task_input_info, tuple): + pipeline_input = tuple(service_input) + return pipeline_input, parameters + elif isinstance(task_input_info, dict): + for key, value in service_input.items( + ): # task input has no nesting field. + # get input filed type + input_type = task_input_info[key] + # TODO recursion for list, dict if need. + if not isinstance(input_type, str): + pipeline_input[key] = value + continue + if input_type not in INPUT_TYPE: + raise ValueError('Invalid input field: %s' % input_type) + pipeline_input[key] = base64_decoder_map[input_type](value) + return pipeline_input, parameters + elif isinstance(task_input_info, + list): # one of input format, we use dict. + for item in task_input_info: + if isinstance(item, dict): + for key, value in service_input.items( + ): # task input has no nesting field. + # get input filed type + input_type = item[key] + if input_type not in INPUT_TYPE: + raise ValueError('Invalid input field: %s' + % input_type) + pipeline_input[key] = base64_decoder_map[input_type](value) + return pipeline_input, parameters + else: + raise IndexError('Task %s input invalid: %s' % + (task_name, task_input_info)) + + +def encode_numpy_image_to_base64(image): + from PIL import Image + with BytesIO() as output_bytes: + pil_image = Image.fromarray(image.astype(np.uint8)) + pil_image.save(output_bytes, 'PNG') + bytes_data = output_bytes.getvalue() + base64_str = str(base64.b64encode(bytes_data), 'utf-8') + return base64_str + + +def encode_video_to_base64(video): + return str(base64.b64encode(video), 'utf-8') + + +def encode_pcm_to_base64(pcm): + return str(base64.b64encode(pcm), 'utf-8') + + +def encode_wav_to_base64(wav): + return str(base64.b64encode(wav), 'utf-8') + + +def encode_bytes_to_base64(bts): + return str(base64.b64encode(bts), 'utf-8') + + +base64_encoder_map = { + 'image': encode_numpy_image_to_base64, + 'video': encode_video_to_base64, + 'pcm': encode_pcm_to_base64, + 'wav': encode_wav_to_base64, + 'bytes': encode_bytes_to_base64, +} + +# convert numpy etc type to python type. +type_to_python_type = { + np.int64: int, +} + + +def _convert_to_python_type(inputs): + if isinstance(inputs, (list, tuple)): + res = [] + for item in inputs: + res.append(_convert_to_python_type(item)) + return res + elif isinstance(inputs, dict): + res = {} + for k, v in inputs.items(): + if type(v) in type_to_python_type: + res[k] = type_to_python_type[type(v)](v) + else: + res[k] = _convert_to_python_type(v) + return res + else: + return inputs + + +def pipeline_output_to_service_base64_output(task_name, pipeline_output): + """Convert pipeline output to service output, + convert binary fields to base64 encoding。 + + Args: + task_name (str): The output task name. + pipeline_output (object): The pipeline output. + """ + json_serializable_output = {} + task_outputs = [] + if task_name in TASK_OUTPUTS: + task_outputs = TASK_OUTPUTS[task_name] + for key, value in pipeline_output.items(): + if key not in task_outputs: + continue # skip the output not defined. + if key in [ + OutputKeys.OUTPUT_IMG, OutputKeys.OUTPUT_IMGS, + OutputKeys.OUTPUT_VIDEO, OutputKeys.OUTPUT_PCM, + OutputKeys.OUTPUT_PCM_LIST, OutputKeys.OUTPUT_WAV + ]: + if isinstance(value, list): + items = [] + if key == OutputKeys.OUTPUT_IMGS: + output_item_type = OutputKeys.OUTPUT_IMG + else: + output_item_type = OutputKeys.OUTPUT_PCM + for item in value: + items.append(base64_encoder_map[output_item_type](item)) + json_serializable_output[key] = items + else: + json_serializable_output[key] = base64_encoder_map[ + OutputTypes[key]]( + value) + elif OutputTypes[key] in [np.ndarray]: + json_serializable_output[key] = value.tolist() + else: + json_serializable_output[key] = value + + return _convert_to_python_type(json_serializable_output) diff --git a/modelscope/utils/megatron_utils.py b/modelscope/utils/megatron_utils.py index 922cb53d..53b5aacb 100644 --- a/modelscope/utils/megatron_utils.py +++ b/modelscope/utils/megatron_utils.py @@ -96,15 +96,16 @@ def convert_megatron_checkpoint( log_master( f'origin_num_partitions: {origin_num_partitions}, target_num_partitions: {target_num_partitions}' ) - os.makedirs(target_dir, exist_ok=True) if origin_num_partitions < target_num_partitions: + os.makedirs(target_dir, exist_ok=True) state_dict = _split_checkpoint( model, checkpoint_dir, target_num_partitions // origin_num_partitions) _save_converted_checkpoint(state_dict, target_dir) log_master('Split checkpoints succeeded.') elif origin_num_partitions > target_num_partitions: + os.makedirs(target_dir, exist_ok=True) state_dict = _merge_checkpoint( model, checkpoint_dir, origin_num_partitions // target_num_partitions) diff --git a/modelscope/utils/plugins.py b/modelscope/utils/plugins.py index a83ca03c..9d238e7d 100644 --- a/modelscope/utils/plugins.py +++ b/modelscope/utils/plugins.py @@ -263,12 +263,11 @@ def import_module_and_submodules(package_name: str, def install_module_from_requirements(requirement_path, ): - """ + """ install module from requirements Args: requirement_path: The path of requirement file - Returns: - + No returns, raise error if failed """ install_list = [] @@ -292,6 +291,15 @@ def install_module_from_requirements(requirement_path, ): def import_module_from_file(module_name, file_path): + """ install module by name with file path + + Args: + module_name: the module name need to be import + file_path: the related file path that matched with the module name + + Returns: return the module class + + """ spec = importlib.util.spec_from_file_location(module_name, file_path) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) @@ -299,6 +307,14 @@ def import_module_from_file(module_name, file_path): def import_module_from_model_dir(model_dir): + """ import all the necessary module from a model dir + + Args: + model_dir: model file location + + No returns, raise error if failed + + """ from pathlib import Path file_scanner = FilesAstScanning() file_scanner.traversal_files(model_dir) @@ -317,6 +333,14 @@ def import_module_from_model_dir(model_dir): def install_requirements_by_names(plugins: List[str]): + """ install the requirements by names + + Args: + plugins: name of plugins (pai-easyscv, transformers) + + No returns, raise error if failed + + """ plugins_manager = PluginsManager() uninstalled_plugins = [] for plugin in plugins: @@ -333,6 +357,14 @@ def install_requirements_by_names(plugins: List[str]): def install_requirements_by_files(requirements: List[str]): + """ install the requriements by files + + Args: + requirements: a list of files including requirements info (requirements.txt) + + No returns, raise error if failed + + """ for requirement in requirements: install_module_from_requirements(requirement) @@ -343,7 +375,8 @@ def register_plugins_repo(plugins: List[str]) -> None: install_requirements_by_names(plugins) modules = [] for plugin in plugins: - modules.extend(get_modules_from_package(plugin)) + module_name, module_version, _ = get_modules_from_package(plugin) + modules.extend(module_name) import_plugins(modules) @@ -362,12 +395,15 @@ DEFAULT_INDEX = 'https://pypi.org/simple/' def get_modules_from_package(package): - """ to get the modules from a installed package + """ to get the modules from an installed package Args: package: The distribution name or package name Returns: + import_names: The modules that in the package distribution + import_version: The version of those modules, should be same and identical + package_name: The package name, if installed by whl file, the package is unknown, should be passed """ from zipfile import ZipFile @@ -378,8 +414,6 @@ def get_modules_from_package(package): from urllib.parse import urlparse from urllib import request as urllib2 from pip._internal.utils.packaging import get_requirement - req = get_requirement(package) - package = req.name def urlretrieve(url, filename, data=None, auth=None): if auth is not None: @@ -591,24 +625,58 @@ def get_modules_from_package(package): return result def discover_import_names(whl_file): + import re logger.debug('finding import names') zipfile = ZipFile(file=whl_file) namelist = zipfile.namelist() [top_level_fname ] = [x for x in namelist if x.endswith('top_level.txt')] + [metadata_fname + ] = [x for x in namelist if x.endswith('.dist-info/METADATA')] all_names = zipfile.read(top_level_fname).decode( 'utf-8').strip().splitlines() + metadata = zipfile.read(metadata_fname).decode('utf-8') public_names = [n for n in all_names if not n.startswith('_')] - return public_names + + version_pattern = re.compile(r'^Version: (?P.+)$', + re.MULTILINE) + name_pattern = re.compile(r'^Name: (?P.+)$', re.MULTILINE) + + version_match = version_pattern.search(metadata) + name_match = name_pattern.search(metadata) + + module_version = version_match.group('version') + module_name = name_match.group('name') + + return public_names, module_version, module_name tmpdir = mkdtemp() - data = get(package, tmpdir=tmpdir) - import_names = discover_import_names(data['path']) + if package.endswith('.whl'): + """if user using .whl file then parse the whl to get the module name""" + if not os.path.isfile(package): + file_name = os.path.basename(package) + file_path = os.path.join(tmpdir, file_name) + whl_file, _ = _download_dist(package, file_path, None, None) + else: + whl_file = package + else: + """if user using package name then generate whl file and parse the file to get the module name by + the discover_import_names method + """ + req = get_requirement(package) + package = req.name + data = get(package, tmpdir=tmpdir) + whl_file = data['path'] + import_names, import_version, package_name = discover_import_names( + whl_file) shutil.rmtree(tmpdir) - return import_names + return import_names, import_version, package_name class PluginsManager(object): + """ + plugins manager class + """ def __init__(self, cache_dir=MODELSCOPE_FILE_DIR, @@ -633,12 +701,26 @@ class PluginsManager(object): package: the package name need to be installed Returns: + if_installed: True if installed + version: the version of installed or None if not installed """ if package.split('.')[-1] == 'whl': - return False, '' + # install from whl should test package name instead of module name + _, module_version, package_name = get_modules_from_package(package) + local_installed, version = PluginsManager._check_plugin_installed( + package_name) + if local_installed and module_version != version: + return False, version + elif not local_installed: + return False, version + return True, module_version + else: + return PluginsManager._check_plugin_installed(package) + @staticmethod + def _check_plugin_installed(package, verified_version=None): from pip._internal.utils.packaging import get_requirement, specifiers req = get_requirement(package) @@ -656,11 +738,15 @@ class PluginsManager(object): if not installed_valid_version: installed = False break + except KeyError: version = '' installed = False - return installed, version + if installed and verified_version is not None and verified_version != version: + return False, verified_version + else: + return installed, version @staticmethod def pip_command( @@ -675,6 +761,9 @@ class PluginsManager(object): such as ['-r', 'requirements'] Returns: + status_code: The pip command status code, 0 if success, else is failed + options: parsed option from system args by pip command + args: the unknown args that could be parsed by pip command """ from pip._internal.commands import create_command @@ -702,6 +791,7 @@ class PluginsManager(object): Args: install_args (list): List of arguments passed to `pip install`. index_url (str, optional): The pypi index url. + force_update: If force update on or off """ if len(install_args) == 0: @@ -730,6 +820,16 @@ class PluginsManager(object): return status_code, install_args def parse_args_info(self, args: List[str], options): + """ + parse arguments input info + Args: + args: the list of args from pip command output + options: the options that parsed from system args by pip command method + + Returns: + installed_package: generate installed package info in order to store in the file + the info includes: name, url and desc of the package + """ installed_package = [] # the case of install with requirements @@ -781,6 +881,15 @@ class PluginsManager(object): def uninstall_plugins(self, uninstall_args: Union[str, List], is_yes=False): + """ + uninstall plugins + Args: + uninstall_args: args used to uninstall by pip command + is_yes: force yes without verified + + Returns: status code, and uninstall args + + """ if is_yes is not None: uninstall_args += ['-y'] @@ -862,6 +971,7 @@ class PluginsManager(object): show_all: show installed and official supported if True, else only those installed Returns: + local_plugins_info: show the list of plugins info """ local_plugins_info = self._get_plugins_from_file() @@ -901,6 +1011,7 @@ class PluginsManager(object): override: Override the file by the list if True, else only update. Returns: + local_plugins_info_json: the json version of updated plugins info """ local_plugins_info = self._get_plugins_from_file() @@ -921,12 +1032,12 @@ class PluginsManager(object): self, package_names: Union[str, list], ): - """ - + """remove the plugins from file Args: package_names: package name Returns: + local_plugins_info_json: the json version of updated plugins info """ local_plugins_info = self._get_plugins_from_file() @@ -1012,4 +1123,5 @@ class EnvsManager(object): if __name__ == '__main__': install_requirements_by_files(['adaseq']) - import_name = get_modules_from_package('pai-easycv') + import_name, import_version, package_name = get_modules_from_package( + 'pai-easycv') diff --git a/modelscope/utils/regress_test_utils.py b/modelscope/utils/regress_test_utils.py index 0f10c1ce..e03b3a7c 100644 --- a/modelscope/utils/regress_test_utils.py +++ b/modelscope/utils/regress_test_utils.py @@ -483,9 +483,9 @@ def numpify_tensor_nested(tensors, reduction=None, clip_value=10000): t = np.where(t > clip_value, clip_value, t) t = np.where(t < -clip_value, -clip_value, t) if reduction == 'sum': - return t.sum(dtype=np.float) + return t.sum(dtype=float) elif reduction == 'mean': - return t.mean(dtype=np.float) + return t.mean(dtype=float) return t return tensors diff --git a/modelscope/utils/service_utils.py b/modelscope/utils/service_utils.py index 6e7c0fc1..8f7ca42d 100644 --- a/modelscope/utils/service_utils.py +++ b/modelscope/utils/service_utils.py @@ -8,6 +8,7 @@ import requests from modelscope.outputs import TASK_OUTPUTS, OutputKeys from modelscope.pipeline_inputs import TASK_INPUTS, InputType +from modelscope.utils.url_utils import valid_url # service data decoder func decodes data from network and convert it to pipeline's input @@ -82,12 +83,16 @@ def get_mimetype(filename): def decode_base64_to_binary(encoding): + if valid_url(encoding): + return encoding, '' extension = get_extension(encoding) data = encoding.split(',')[1] return base64.b64decode(data), extension def decode_base64_to_image(encoding): + if valid_url(encoding): + return encoding from PIL import Image content = encoding.split(';')[1] image_encoded = content.split(',')[1] @@ -151,6 +156,7 @@ def service_data_decoder(task, data): return input_data elif isinstance(input_type, dict): input_data = {} + data = json.loads(data) for key, val in input_type.items(): if val == InputType.IMAGE: input_data[key] = decode_base64_to_image(data[key]) @@ -158,6 +164,8 @@ def service_data_decoder(task, data): input_data[key] = decode_base64_to_binary(data[key])[0] elif val == InputType.TEXT: input_data[key] = data[key] + else: + return data return input_data diff --git a/modelscope/utils/test_utils.py b/modelscope/utils/test_utils.py index b4ce7299..03d293ec 100644 --- a/modelscope/utils/test_utils.py +++ b/modelscope/utils/test_utils.py @@ -150,7 +150,7 @@ def compare_arguments_nested(print_content, if arg1 is None: return True - elif isinstance(arg1, (int, str, bool, np.bool, np.integer, np.str)): + elif isinstance(arg1, (int, str, bool, np.bool_, np.integer, np.str_)): if arg1 != arg2: if print_content is not None: print(f'{print_content}, arg1:{arg1}, arg2:{arg2}') @@ -201,10 +201,8 @@ def compare_arguments_nested(print_content, return False return True elif isinstance(arg1, np.ndarray): - arg1 = np.where(np.equal(arg1, None), np.NaN, - arg1).astype(dtype=np.float) - arg2 = np.where(np.equal(arg2, None), np.NaN, - arg2).astype(dtype=np.float) + arg1 = np.where(np.equal(arg1, None), np.NaN, arg1).astype(dtype=float) + arg2 = np.where(np.equal(arg2, None), np.NaN, arg2).astype(dtype=float) if not all( np.isclose(arg1, arg2, rtol=rtol, atol=atol, equal_nan=True).flatten()): diff --git a/modelscope/utils/url_utils.py b/modelscope/utils/url_utils.py new file mode 100644 index 00000000..59cc2efd --- /dev/null +++ b/modelscope/utils/url_utils.py @@ -0,0 +1,36 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +from urllib.parse import urlparse + +import pandas as pd + +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +def valid_url(url) -> bool: + try: + result = urlparse(url) + return all([result.scheme, result.netloc]) + except ValueError as e: + logger.warning(e) + return False + + +def fetch_csv_with_url(csv_url: str) -> pd.DataFrame: + """Fetch the csv content from url. + + Args: + csv_url (str): The input url of csv data. + + Returns: + A pandas DataFrame object which contains the csv content. + """ + try: + df = pd.read_csv(csv_url) + except Exception as e: + logger.error(f'Failed to fetch csv from url: {csv_url}') + raise e + + return df diff --git a/modelscope/version.py b/modelscope/version.py index 81c35379..cf9bbe98 100644 --- a/modelscope/version.py +++ b/modelscope/version.py @@ -1,5 +1,5 @@ # Make sure to modify __release_datetime__ to release time when making official release. -__version__ = '1.5.0' +__version__ = '1.6.0' # default release datetime for branches under active development is set # to be a time far-far-away-into-the-future -__release_datetime__ = '2099-10-13 08:56:12' +__release_datetime__ = '2023-05-18 23:59:00' diff --git a/requirements/audio/audio_asr.txt b/requirements/audio/audio_asr.txt index 1ecbe421..7725a0dd 100644 --- a/requirements/audio/audio_asr.txt +++ b/requirements/audio/audio_asr.txt @@ -1,2 +1,2 @@ easyasr>=0.0.2 -funasr>=0.4.0 +funasr>=0.5.0 diff --git a/requirements/audio/audio_signal.txt b/requirements/audio/audio_signal.txt index 61e688f3..16a18e67 100644 --- a/requirements/audio/audio_signal.txt +++ b/requirements/audio/audio_signal.txt @@ -1,11 +1,11 @@ hyperpyyaml -librosa<=0.9.2 +librosa==0.9.2 MinDAEC mir_eval>=0.7 numpy rotary_embedding_torch>=0.1.5 scipy SoundFile>0.10 -speechbrain>=0.5.7 +speechbrain>=0.5.12 torchaudio tqdm diff --git a/requirements/audio/audio_tts.txt b/requirements/audio/audio_tts.txt index b1a85faf..81a5c6f4 100644 --- a/requirements/audio/audio_tts.txt +++ b/requirements/audio/audio_tts.txt @@ -3,7 +3,7 @@ greenlet>=1.1.2 inflect jedi>=0.18.1 kantts -librosa<=0.9.2 +librosa==0.9.2 lxml matplotlib msgpack>=1.0.4 diff --git a/requirements/cv.txt b/requirements/cv.txt index 7d09a60b..0cec3659 100644 --- a/requirements/cv.txt +++ b/requirements/cv.txt @@ -6,7 +6,7 @@ chumpy clip>=1.0 control_ldm ddpm_guided_diffusion -diffusers +diffusers>=0.13.1,<0.15.0 easydict easyrobust edit_distance @@ -25,7 +25,7 @@ lmdb lpips ml_collections mmcls>=0.21.0 -mmdet>=2.25.0 +mmdet>=2.25.0,<=2.28.2 # mmdet3d-1.0.0rc6 remove networkx and numba version restriction mmdet3d==1.0.0a1 mmsegmentation<=0.30.0 @@ -39,7 +39,6 @@ onnxruntime>=1.10 onnxsim open-clip-torch>=2.7.0 opencv-python -pai-easycv>=0.8,<0.10.0 paint_ldm pandas panopticapi @@ -51,7 +50,7 @@ regex scikit-image>=0.19.3 scikit-learn>=0.20.1 shapely -shotdetect_scenedetect_lgss +shotdetect_scenedetect_lgss>=0.0.4 smplx tensorflow-estimator>=1.15.1 tf_slim diff --git a/requirements/framework.txt b/requirements/framework.txt index e15e95eb..e763ae63 100644 --- a/requirements/framework.txt +++ b/requirements/framework.txt @@ -4,9 +4,11 @@ datasets>=2.7.0,<=2.8.0 einops filelock>=3.3.0 gast>=0.2.2 -mmdet<=2.28.2 -numpy<1.24.0 +# for python3.7 python3.8 compatible +numpy<=1.22.0 oss2 +# for datasets compatible +pandas<=1.5.3 Pillow>=6.2.0 # pyarrow 9.0.0 introduced event_loop core dump pyarrow>=6.0.0,!=9.0.0 @@ -14,7 +16,7 @@ python-dateutil>=2.1 pyyaml requests scipy -setuptools==59.8.0 +setuptools simplejson>=3.3.0 sortedcontainers>=1.5.9 tqdm>=4.64.0 diff --git a/requirements/multi-modal.txt b/requirements/multi-modal.txt index 3d75f9b6..9d2c3448 100644 --- a/requirements/multi-modal.txt +++ b/requirements/multi-modal.txt @@ -1,7 +1,7 @@ accelerate diffusers>=0.13.1,<0.15.0 ftfy>=6.0.3 -librosa<=0.9.2 +librosa==0.9.2 opencv-python pycocoevalcap>=1.2 pycocotools>=2.0.4 @@ -12,13 +12,14 @@ rapidfuzz # which introduced compatability issues that are being investigated rouge_score<=0.0.4 sacrebleu +safetensors # scikit-video soundfile taming-transformers-rom1504 timm tokenizers torchvision -transformers>=4.12.0 +transformers>=4.27.1 # triton==2.0.0.dev20221120 unicodedata2 zhconv diff --git a/setup.py b/setup.py index 9affe028..98b12888 100644 --- a/setup.py +++ b/setup.py @@ -197,11 +197,12 @@ if __name__ == '__main__': setup( name='modelscope', version=get_version(), - description='', + description= + 'ModelScope: bring the notion of Model-as-a-Service to life.', long_description=readme(), long_description_content_type='text/markdown', - author='Alibaba ModelScope team', - author_email='modelscope@list.alibaba-inc.com', + author='ModelScope team', + author_email='contact@modelscope.cn', keywords='python,nlp,science,cv,speech,multi-modal', url='https://github.com/modelscope/modelscope', packages=find_packages(exclude=('configs', 'demo')), diff --git a/tests/cli/test_download_cmd.py b/tests/cli/test_download_cmd.py index 53cfdadd..6059fa12 100644 --- a/tests/cli/test_download_cmd.py +++ b/tests/cli/test_download_cmd.py @@ -17,7 +17,6 @@ DEFAULT_GIT_PATH = 'git' download_model_file_name = 'test.bin' -@unittest.skip('temporarily skip') class DownloadCMDTest(unittest.TestCase): def setUp(self): diff --git a/tests/export/test_export_speech_signal_process.py b/tests/export/test_export_speech_signal_process.py new file mode 100644 index 00000000..d3f6fe14 --- /dev/null +++ b/tests/export/test_export_speech_signal_process.py @@ -0,0 +1,83 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import os +import pickle +import shutil +import tempfile +import unittest + +import torch + +from modelscope.exporters import Exporter +from modelscope.models import Model +from modelscope.utils.logger import get_logger +from modelscope.utils.regress_test_utils import (compare_arguments_nested, + numpify_tensor_nested) +from modelscope.utils.test_utils import test_level + +INPUT_PKL = 'data/test/audios/input.pkl' + +INPUT_NAME = 'input' +OUTPUT_NAME = 'output' + +logger = get_logger() + + +class ExportSpeechSignalProcessTest(unittest.TestCase): + + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + self.tmp_dir = tempfile.TemporaryDirectory().name + if not os.path.exists(self.tmp_dir): + os.makedirs(self.tmp_dir) + + def tearDown(self): + shutil.rmtree(self.tmp_dir) + super().tearDown() + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_export_ans_dfsmn(self): + model_id = 'damo/speech_dfsmn_ans_psm_48k_causal' + model = Model.from_pretrained(model_id) + onnx_info = Exporter.from_model(model).export_onnx( + output_dir=self.tmp_dir) + + with open(os.path.join(os.getcwd(), INPUT_PKL), 'rb') as f: + fbank_input = pickle.load(f).cpu() + self.assertTrue( + self._validate_onnx_model(fbank_input, model, onnx_info['model']), + 'export onnx failed because of validation error.') + + @staticmethod + def _validate_onnx_model(dummy_inputs, model, output): + try: + import onnx + import onnxruntime as ort + except ImportError: + logger.warning( + 'Cannot validate the exported onnx file, because ' + 'the installation of onnx or onnxruntime cannot be found') + return + onnx_model = onnx.load(output) + onnx.checker.check_model(onnx_model) + ort_session = ort.InferenceSession(output) + with torch.no_grad(): + model.eval() + outputs_origin = model.forward(dummy_inputs) + outputs_origin = numpify_tensor_nested(outputs_origin) + + input_feed = {INPUT_NAME: dummy_inputs.numpy()} + outputs = ort_session.run( + None, + input_feed, + ) + outputs = numpify_tensor_nested(outputs[0]) + + print(outputs) + print(outputs_origin) + return compare_arguments_nested('Onnx model output match failed', + outputs, outputs_origin) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/hub/test_hub_operation.py b/tests/hub/test_hub_operation.py index 82752869..a22aaa64 100644 --- a/tests/hub/test_hub_operation.py +++ b/tests/hub/test_hub_operation.py @@ -22,7 +22,6 @@ DEFAULT_GIT_PATH = 'git' download_model_file_name = 'test.bin' -@unittest.skip('temporarily skip') class HubOperationTest(unittest.TestCase): def setUp(self): diff --git a/tests/hub/test_hub_private_files.py b/tests/hub/test_hub_private_files.py index b79c11cd..a343808f 100644 --- a/tests/hub/test_hub_private_files.py +++ b/tests/hub/test_hub_private_files.py @@ -21,7 +21,6 @@ from modelscope.utils.test_utils import (TEST_ACCESS_TOKEN1, download_model_file_name = 'test.bin' -@unittest.skip('temporarily skip') class HubPrivateFileDownloadTest(unittest.TestCase): def setUp(self): diff --git a/tests/hub/test_hub_private_repository.py b/tests/hub/test_hub_private_repository.py index bd2984cf..1b7c41cd 100644 --- a/tests/hub/test_hub_private_repository.py +++ b/tests/hub/test_hub_private_repository.py @@ -17,7 +17,6 @@ from modelscope.utils.test_utils import (TEST_ACCESS_TOKEN1, DEFAULT_GIT_PATH = 'git' -@unittest.skip('temporarily skip') class HubPrivateRepositoryTest(unittest.TestCase): def setUp(self): diff --git a/tests/hub/test_hub_repository.py b/tests/hub/test_hub_repository.py index a006d7c2..7631f5db 100644 --- a/tests/hub/test_hub_repository.py +++ b/tests/hub/test_hub_repository.py @@ -26,7 +26,6 @@ DEFAULT_GIT_PATH = 'git' download_model_file_name = 'test.bin' -@unittest.skip('temporarily skip') class HubRepositoryTest(unittest.TestCase): def setUp(self): @@ -81,6 +80,20 @@ class HubRepositoryTest(unittest.TestCase): assert lfs_file1 in lfs_files assert lfs_file2 in lfs_files + def test_add_lfs_file_type(self): + repo = Repository(self.model_dir, clone_from=self.model_id) + assert os.path.exists(os.path.join(self.model_dir, ModelFile.README)) + os.chdir(self.model_dir) + lfs_file = 'test.safetensors' + os.system("echo 'safttensor'>%s" + % os.path.join(self.model_dir, lfs_file)) + repo.add_lfs_type('*.safetensors') + repo.push('test') + # check lfs files. + git_wrapper = GitCommandWrapper() + lfs_files = git_wrapper.list_lfs_files(self.model_dir) + assert lfs_file in lfs_files + if __name__ == '__main__': unittest.main() diff --git a/tests/hub/test_hub_retry.py b/tests/hub/test_hub_retry.py index 7f47f119..e294cb68 100644 --- a/tests/hub/test_hub_retry.py +++ b/tests/hub/test_hub_retry.py @@ -12,7 +12,6 @@ from modelscope.hub.api import HubApi from modelscope.hub.file_download import http_get_file -@unittest.skip('temporarily skip') class HubOperationTest(unittest.TestCase): def setUp(self): diff --git a/tests/hub/test_hub_revision.py b/tests/hub/test_hub_revision.py index e97422ad..00d5d53d 100644 --- a/tests/hub/test_hub_revision.py +++ b/tests/hub/test_hub_revision.py @@ -23,7 +23,6 @@ download_model_file_name = 'test.bin' download_model_file_name2 = 'test2.bin' -@unittest.skip('temporarily skip') class HubRevisionTest(unittest.TestCase): def setUp(self): diff --git a/tests/hub/test_hub_revision_release_mode.py b/tests/hub/test_hub_revision_release_mode.py index 49a83371..3b8416db 100644 --- a/tests/hub/test_hub_revision_release_mode.py +++ b/tests/hub/test_hub_revision_release_mode.py @@ -26,7 +26,6 @@ download_model_file_name = 'test.bin' download_model_file_name2 = 'test2.bin' -@unittest.skip('temporarily skip') class HubRevisionTest(unittest.TestCase): def setUp(self): diff --git a/tests/hub/test_hub_upload.py b/tests/hub/test_hub_upload.py index 8e439aeb..2a66cb8b 100644 --- a/tests/hub/test_hub_upload.py +++ b/tests/hub/test_hub_upload.py @@ -19,7 +19,6 @@ from modelscope.utils.test_utils import (TEST_ACCESS_TOKEN1, TEST_MODEL_ORG, logger = get_logger() -@unittest.skip('temporarily skip') class HubUploadTest(unittest.TestCase): def setUp(self): @@ -38,6 +37,7 @@ class HubUploadTest(unittest.TestCase): os.mkdir(self.finetune_path) os.system("echo '{}'>%s" % os.path.join(self.finetune_path, ModelFile.CONFIGURATION)) + os.environ['MODELSCOPE_TRAIN_ID'] = 'test-id' def tearDown(self): logger.info('TearDown') diff --git a/tests/metrics/test_translation_evaluation_metrics.py b/tests/metrics/test_translation_evaluation_metrics.py new file mode 100644 index 00000000..801f742b --- /dev/null +++ b/tests/metrics/test_translation_evaluation_metrics.py @@ -0,0 +1,30 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import unittest + +from modelscope.metrics.translation_evaluation_metric import \ + TranslationEvaluationMetric +from modelscope.models.nlp.unite.configuration import InputFormat +from modelscope.utils.test_utils import test_level + + +class TestTranslationEvaluationMetrics(unittest.TestCase): + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_value(self): + metric = TranslationEvaluationMetric(gap_threshold=25.0) + + outputs = {'score': [0.25, 0.22, 0.30, 0.78, 1.11, 0.95, 1.00, 0.86]} + inputs = { + 'lp': ['zh-en'] * 8, + 'segment_id': [0, 0, 0, 1, 1, 2, 2, 2], + 'raw_score': [94.0, 60.0, 25.0, 59.5, 90.0, 100.0, 80.0, 60.0], + 'input_format': [InputFormat.SRC_REF] * 8, + } + metric.add(outputs, inputs) + result = metric.evaluate() + print(result) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/msdatasets/test_ms_dataset.py b/tests/msdatasets/test_ms_dataset.py index 8ded9a46..ddb84b45 100644 --- a/tests/msdatasets/test_ms_dataset.py +++ b/tests/msdatasets/test_ms_dataset.py @@ -195,18 +195,7 @@ class MsDatasetTest(unittest.TestCase): ) print(next(iter(tf_dataset))) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_streaming_load_coco(self): - small_coco_for_test = MsDataset.load( - dataset_name='EasyCV/small_coco_for_test', - split='train', - use_streaming=True, - download_mode=DownloadMode.FORCE_REDOWNLOAD) - dataset_sample_dict = next(iter(small_coco_for_test)) - print(dataset_sample_dict) - assert dataset_sample_dict.values() - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_streaming_load_uni_fold(self): """Test case for loading large scale datasets.""" dataset = MsDataset.load( @@ -269,7 +258,7 @@ class MsDatasetTest(unittest.TestCase): def test_to_custom_dataset_movie_scene_toydata(self): from modelscope.msdatasets.dataset_cls.custom_datasets.movie_scene_segmentation import \ MovieSceneSegmentationDataset - from modelscope.msdatasets.dataset_cls.dataset import ExternalDataset + from modelscope.msdatasets.dataset_cls import ExternalDataset model_id = 'damo/cv_resnet50-bert_video-scene-segmentation_movienet' cache_path = snapshot_download(model_id) diff --git a/tests/msdatasets/test_virgo_dataset.py b/tests/msdatasets/test_virgo_dataset.py new file mode 100644 index 00000000..96f7f25b --- /dev/null +++ b/tests/msdatasets/test_virgo_dataset.py @@ -0,0 +1,96 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import os +import unittest + +from modelscope.hub.api import HubApi +from modelscope.msdatasets import MsDataset +from modelscope.msdatasets.dataset_cls.dataset import VirgoDataset +from modelscope.utils.constant import DownloadMode, Hubs, VirgoDatasetConfig +from modelscope.utils.logger import get_logger + +logger = get_logger() + +# Please use your own access token for buc account. +YOUR_ACCESS_TOKEN = 'your_access_token' +# Please use your own virgo dataset id and ensure you have access to it. +VIRGO_DATASET_ID = 'your_virgo_dataset_id' + + +class TestVirgoDataset(unittest.TestCase): + + def setUp(self): + self.api = HubApi() + self.api.login(YOUR_ACCESS_TOKEN) + + @unittest.skip('to be used for local test only') + def test_download_virgo_dataset_meta(self): + ds = MsDataset.load(dataset_name=VIRGO_DATASET_ID, hub=Hubs.virgo) + ds_one = next(iter(ds)) + logger.info(ds_one) + + self.assertTrue(ds_one) + self.assertIsInstance(ds, VirgoDataset) + self.assertIn(VirgoDatasetConfig.col_id, ds_one) + self.assertIn(VirgoDatasetConfig.col_meta_info, ds_one) + self.assertIn(VirgoDatasetConfig.col_analysis_result, ds_one) + self.assertIn(VirgoDatasetConfig.col_external_info, ds_one) + + @unittest.skip('to be used for local test only') + def test_download_virgo_dataset_files(self): + ds = MsDataset.load( + dataset_name=VIRGO_DATASET_ID, + hub=Hubs.virgo, + download_virgo_files=True) + + ds_one = next(iter(ds)) + logger.info(ds_one) + + self.assertTrue(ds_one) + self.assertIsInstance(ds, VirgoDataset) + self.assertTrue(ds.download_virgo_files) + self.assertIn(VirgoDatasetConfig.col_cache_file, ds_one) + cache_file_path = ds_one[VirgoDatasetConfig.col_cache_file] + self.assertTrue(os.path.exists(cache_file_path)) + + @unittest.skip('to be used for local test only') + def test_force_download_virgo_dataset_files(self): + ds = MsDataset.load( + dataset_name=VIRGO_DATASET_ID, + hub=Hubs.virgo, + download_mode=DownloadMode.FORCE_REDOWNLOAD, + download_virgo_files=True) + + ds_one = next(iter(ds)) + logger.info(ds_one) + + self.assertTrue(ds_one) + self.assertIsInstance(ds, VirgoDataset) + self.assertTrue(ds.download_virgo_files) + self.assertIn(VirgoDatasetConfig.col_cache_file, ds_one) + cache_file_path = ds_one[VirgoDatasetConfig.col_cache_file] + self.assertTrue(os.path.exists(cache_file_path)) + + @unittest.skip('to be used for local test only') + def test_download_virgo_dataset_odps(self): + # Note: the samplingType must be 1, which means to get the dataset from MaxCompute(ODPS). + import pandas as pd + + ds = MsDataset.load( + dataset_name=VIRGO_DATASET_ID, + hub=Hubs.virgo, + odps_batch_size=100, + odps_limit=2000, + odps_drop_last=True) + + ds_one = next(iter(ds)) + logger.info(ds_one) + + self.assertTrue(ds_one) + self.assertIsInstance(ds, VirgoDataset) + self.assertTrue(ds_one, pd.DataFrame) + logger.info(f'The shape of sample: {ds_one.shape}') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/pipelines/easycv_pipelines/test_panoptic_segmentation_pipeline.py b/tests/pipelines/easycv_pipelines/test_panoptic_segmentation_pipeline.py deleted file mode 100644 index 49e01251..00000000 --- a/tests/pipelines/easycv_pipelines/test_panoptic_segmentation_pipeline.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -import cv2 - -from modelscope.outputs import OutputKeys -from modelscope.pipelines import pipeline -from modelscope.utils.constant import Tasks -from modelscope.utils.cv.image_utils import panoptic_seg_masks_to_image -from modelscope.utils.demo_utils import DemoCompatibilityCheck -from modelscope.utils.test_utils import test_level - - -class EasyCVPanopticSegmentationPipelineTest(unittest.TestCase, - DemoCompatibilityCheck): - img_path = 'data/test/images/image_semantic_segmentation.jpg' - - def setUp(self) -> None: - self.task = Tasks.image_segmentation - self.model_id = 'damo/cv_r50_panoptic-segmentation_cocopan' - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_r50(self): - segmentor = pipeline(task=self.task, model=self.model_id) - outputs = segmentor(self.img_path) - draw_img = panoptic_seg_masks_to_image(outputs[OutputKeys.MASKS]) - cv2.imwrite('result.jpg', draw_img) - print('print ' + self.model_id + ' success') - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/pipelines/easycv_pipelines/test_segmentation_pipeline.py b/tests/pipelines/easycv_pipelines/test_segmentation_pipeline.py deleted file mode 100644 index 5f6dac4b..00000000 --- a/tests/pipelines/easycv_pipelines/test_segmentation_pipeline.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest -from distutils.version import LooseVersion - -import cv2 -import easycv -import numpy as np -from PIL import Image - -from modelscope.outputs import OutputKeys -from modelscope.pipelines import pipeline -from modelscope.utils.constant import Tasks -from modelscope.utils.cv.image_utils import semantic_seg_masks_to_image -from modelscope.utils.demo_utils import DemoCompatibilityCheck -from modelscope.utils.test_utils import test_level - - -class EasyCVSegmentationPipelineTest(unittest.TestCase, - DemoCompatibilityCheck): - img_path = 'data/test/images/image_segmentation.jpg' - - def setUp(self) -> None: - self.task = Tasks.image_segmentation - self.model_id = 'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k' - - def _internal_test_(self, model_id): - semantic_seg = pipeline(task=Tasks.image_segmentation, model=model_id) - outputs = semantic_seg(self.img_path) - - draw_img = semantic_seg_masks_to_image(outputs[OutputKeys.MASKS]) - cv2.imwrite('result.jpg', draw_img) - print('test ' + model_id + ' DONE') - - def _internal_test_batch_(self, model_id, num_samples=2, batch_size=2): - # TODO: support in the future - img = np.asarray(Image.open(self.img_path)) - num_samples = num_samples - batch_size = batch_size - semantic_seg = pipeline( - task=Tasks.image_segmentation, - model=model_id, - batch_size=batch_size) - outputs = semantic_seg([self.img_path] * num_samples) - - self.assertEqual(semantic_seg.predict_op.batch_size, batch_size) - self.assertEqual(len(outputs), num_samples) - - for output in outputs: - self.assertListEqual( - list(img.shape)[:2], list(output['seg_pred'].shape)) - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_segformer_b0(self): - model_id = 'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k' - self._internal_test_(model_id) - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_segformer_b1(self): - model_id = 'damo/cv_segformer-b1_image_semantic-segmentation_coco-stuff164k' - self._internal_test_(model_id) - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_segformer_b2(self): - model_id = 'damo/cv_segformer-b2_image_semantic-segmentation_coco-stuff164k' - self._internal_test_(model_id) - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_segformer_b3(self): - model_id = 'damo/cv_segformer-b3_image_semantic-segmentation_coco-stuff164k' - self._internal_test_(model_id) - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_segformer_b4(self): - model_id = 'damo/cv_segformer-b4_image_semantic-segmentation_coco-stuff164k' - self._internal_test_(model_id) - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_segformer_b5(self): - model_id = 'damo/cv_segformer-b5_image_semantic-segmentation_coco-stuff164k' - self._internal_test_(model_id) - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/pipelines/plugin_remote_pipelines/test_plugin_model.py b/tests/pipelines/plugin_remote_pipelines/test_plugin_model.py index 43d840ea..71b9e64f 100644 --- a/tests/pipelines/plugin_remote_pipelines/test_plugin_model.py +++ b/tests/pipelines/plugin_remote_pipelines/test_plugin_model.py @@ -3,12 +3,11 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.plugins import PluginsManager from modelscope.utils.test_utils import test_level -class PluginModelTest(unittest.TestCase, DemoCompatibilityCheck): +class PluginModelTest(unittest.TestCase): def setUp(self): self.package = 'adaseq' diff --git a/tests/pipelines/test_abnormal_object_detection.py b/tests/pipelines/test_abnormal_object_detection.py index fbce51c6..c6264069 100644 --- a/tests/pipelines/test_abnormal_object_detection.py +++ b/tests/pipelines/test_abnormal_object_detection.py @@ -3,11 +3,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck): +class ObjectDetectionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_object_detection @@ -20,10 +19,6 @@ class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck): result = object_detect(input_location) print(result) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_action_detection.py b/tests/pipelines/test_action_detection.py index ae7e60b1..d724c81a 100644 --- a/tests/pipelines/test_action_detection.py +++ b/tests/pipelines/test_action_detection.py @@ -3,11 +3,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ActionDetectionTest(unittest.TestCase, DemoCompatibilityCheck): +class ActionDetectionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.action_detection @@ -20,10 +19,6 @@ class ActionDetectionTest(unittest.TestCase, DemoCompatibilityCheck): 'data/test/videos/action_detection_test_video.mp4') print('action detection results:', result) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_action_recognition.py b/tests/pipelines/test_action_recognition.py index 292eb238..9d0c6175 100644 --- a/tests/pipelines/test_action_recognition.py +++ b/tests/pipelines/test_action_recognition.py @@ -3,11 +3,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ActionRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): +class ActionRecognitionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.action_recognition @@ -37,10 +36,6 @@ class ActionRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): 'data/test/videos/action_recognition_test_video.mp4') print('pst recognition results:', result) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_adaint_image_color_enhance.py b/tests/pipelines/test_adaint_image_color_enhance.py index e36a85ec..f0efef5f 100644 --- a/tests/pipelines/test_adaint_image_color_enhance.py +++ b/tests/pipelines/test_adaint_image_color_enhance.py @@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class AdaIntImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck): +class AdaIntImageColorEnhanceTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_adaint_image-color-enhance-models' @@ -40,11 +39,6 @@ class AdaIntImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck): self.pipeline_inference(img_color_enhance, 'data/test/images/image_color_enhance.png') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - @unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest only') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_addr_mgeo.py b/tests/pipelines/test_addr_mgeo.py index d630b857..e678d285 100644 --- a/tests/pipelines/test_addr_mgeo.py +++ b/tests/pipelines/test_addr_mgeo.py @@ -8,12 +8,11 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import TextClassificationPipeline from modelscope.preprocessors import TextClassificationTransformersPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool from modelscope.utils.test_utils import test_level -class MGeoTest(unittest.TestCase, DemoCompatibilityCheck): +class MGeoTest(unittest.TestCase): multi_modal_inputs = { 'source_sentence': ['杭州余杭东方未来学校附近世纪华联商场(金家渡北苑店)'], @@ -117,10 +116,6 @@ class MGeoTest(unittest.TestCase, DemoCompatibilityCheck): pipeline_ins = pipeline(task=task, model=model) print(pipeline_ins(input=inputs)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_addr_similarity.py b/tests/pipelines/test_addr_similarity.py index 8c1f93c9..ecc879eb 100644 --- a/tests/pipelines/test_addr_similarity.py +++ b/tests/pipelines/test_addr_similarity.py @@ -8,12 +8,11 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import TextClassificationPipeline from modelscope.preprocessors import TextClassificationTransformersPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool from modelscope.utils.test_utils import test_level -class AddrSimilarityTest(unittest.TestCase, DemoCompatibilityCheck): +class AddrSimilarityTest(unittest.TestCase): sentence1 = '阿里巴巴西溪园区' sentence2 = '文一西路阿里巴巴' @@ -37,10 +36,6 @@ class AddrSimilarityTest(unittest.TestCase, DemoCompatibilityCheck): task=Tasks.text_classification, model=self.model_id) print(pipeline_ins(input=(self.sentence1, self.sentence2))) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_animal_recognition.py b/tests/pipelines/test_animal_recognition.py index eb9f92e6..57937770 100644 --- a/tests/pipelines/test_animal_recognition.py +++ b/tests/pipelines/test_animal_recognition.py @@ -4,11 +4,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class AnimalRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): +class AnimalRecognitionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.animal_recognition @@ -21,10 +20,6 @@ class AnimalRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): result = animal_recognition('data/test/images/dogs.jpg') print(result) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_arc_face_recognition.py b/tests/pipelines/test_arc_face_recognition.py index fa17dd91..481b9f33 100644 --- a/tests/pipelines/test_arc_face_recognition.py +++ b/tests/pipelines/test_arc_face_recognition.py @@ -6,11 +6,10 @@ import numpy as np from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class FaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): +class FaceRecognitionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.face_recognition @@ -31,10 +30,6 @@ class FaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): sim = np.dot(emb1[0], emb2[0]) print(f'Cos similarity={sim:.3f}, img1:{img1} img2:{img2}') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_automatic_post_editing.py b/tests/pipelines/test_automatic_post_editing.py index da09851c..190ff788 100644 --- a/tests/pipelines/test_automatic_post_editing.py +++ b/tests/pipelines/test_automatic_post_editing.py @@ -3,11 +3,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class AutomaticPostEditingTest(unittest.TestCase, DemoCompatibilityCheck): +class AutomaticPostEditingTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.translation @@ -21,10 +20,6 @@ class AutomaticPostEditingTest(unittest.TestCase, DemoCompatibilityCheck): pipeline_ins = pipeline(self.task, model=self.model_id) print(pipeline_ins(input=inputs)) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_automatic_speech_recognition.py b/tests/pipelines/test_automatic_speech_recognition.py index dc624f29..6014438e 100644 --- a/tests/pipelines/test_automatic_speech_recognition.py +++ b/tests/pipelines/test_automatic_speech_recognition.py @@ -10,7 +10,6 @@ import soundfile from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import ColorCodes, Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.logger import get_logger from modelscope.utils.test_utils import download_and_untar, test_level @@ -26,8 +25,7 @@ TFRECORD_TESTSETS_FILE = 'tfrecord.tar.gz' TFRECORD_TESTSETS_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/datasets/tfrecord.tar.gz' -class AutomaticSpeechRecognitionTest(unittest.TestCase, - DemoCompatibilityCheck): +class AutomaticSpeechRecognitionTest(unittest.TestCase): action_info = { 'test_run_with_wav_pytorch': { 'checking_item': OutputKeys.TEXT, @@ -457,10 +455,6 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase, logger.info(ColorCodes.MAGENTA + str(rec_result) + ColorCodes.END) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_bad_image_detecting.py b/tests/pipelines/test_bad_image_detecting.py index 728da8d1..05954f7b 100644 --- a/tests/pipelines/test_bad_image_detecting.py +++ b/tests/pipelines/test_bad_image_detecting.py @@ -7,11 +7,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.cv import BadImageDetecingPipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class BadImageDetectingTest(unittest.TestCase, DemoCompatibilityCheck): +class BadImageDetectingTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.bad_image_detecting @@ -58,10 +57,6 @@ class BadImageDetectingTest(unittest.TestCase, DemoCompatibilityCheck): print('pipeline: the out_label is {}'.format(labels)) print('pipeline: the out_score is {}'.format(scores)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_body_2d_keypoints.py b/tests/pipelines/test_body_2d_keypoints.py index 5d90cbf0..25d8fa55 100644 --- a/tests/pipelines/test_body_2d_keypoints.py +++ b/tests/pipelines/test_body_2d_keypoints.py @@ -8,11 +8,10 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import draw_keypoints -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class Body2DKeypointsTest(unittest.TestCase, DemoCompatibilityCheck): +class Body2DKeypointsTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.body_2d_keypoints @@ -34,10 +33,6 @@ class Body2DKeypointsTest(unittest.TestCase, DemoCompatibilityCheck): body_2d_keypoints = pipeline(self.task, model=self.model_id) self.pipeline_inference(body_2d_keypoints, Image.open(self.test_image)) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_body_3d_keypoints.py b/tests/pipelines/test_body_3d_keypoints.py index 6f73a243..33228022 100644 --- a/tests/pipelines/test_body_3d_keypoints.py +++ b/tests/pipelines/test_body_3d_keypoints.py @@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class Body3DKeypointsTest(unittest.TestCase, DemoCompatibilityCheck): +class Body3DKeypointsTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_canonical_body-3d-keypoints_video' @@ -41,10 +40,6 @@ class Body3DKeypointsTest(unittest.TestCase, DemoCompatibilityCheck): % (self.test_video)) self.pipeline_inference(body_3d_keypoints, pipeline_input=cap) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_body_3d_keypoints_hdformer.py b/tests/pipelines/test_body_3d_keypoints_hdformer.py index 2ebbc95b..e86f247f 100644 --- a/tests/pipelines/test_body_3d_keypoints_hdformer.py +++ b/tests/pipelines/test_body_3d_keypoints_hdformer.py @@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class Body3DKeypointsHDFormerTest(unittest.TestCase, DemoCompatibilityCheck): +class Body3DKeypointsHDFormerTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_hdformer_body-3d-keypoints_video' @@ -41,10 +40,6 @@ class Body3DKeypointsHDFormerTest(unittest.TestCase, DemoCompatibilityCheck): % (self.test_video)) self.pipeline_inference(body_3d_keypoints, pipeline_input=cap) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_canmt_translation.py b/tests/pipelines/test_canmt_translation.py index e3bce5d9..31e57040 100644 --- a/tests/pipelines/test_canmt_translation.py +++ b/tests/pipelines/test_canmt_translation.py @@ -8,11 +8,10 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import CanmtTranslationPipeline from modelscope.preprocessors import CanmtTranslationPreprocessor, Preprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class CanmtTranslationTest(unittest.TestCase, DemoCompatibilityCheck): +class CanmtTranslationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.competency_aware_translation @@ -59,10 +58,6 @@ class CanmtTranslationTest(unittest.TestCase, DemoCompatibilityCheck): pipeline_ins = pipeline(task=self.task) print(pipeline_ins(self.input)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_card_detection.py b/tests/pipelines/test_card_detection.py index d913f494..676fb786 100644 --- a/tests/pipelines/test_card_detection.py +++ b/tests/pipelines/test_card_detection.py @@ -8,11 +8,10 @@ from modelscope.msdatasets import MsDataset from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import draw_card_detection_result -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class CardDetectionTest(unittest.TestCase, DemoCompatibilityCheck): +class CardDetectionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.card_detection @@ -57,10 +56,6 @@ class CardDetectionTest(unittest.TestCase, DemoCompatibilityCheck): result = card_detection(img_path) self.show_result(img_path, result) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_cartoon_stable_diffusion.py b/tests/pipelines/test_cartoon_stable_diffusion.py index 751c7ea8..6a91ed44 100644 --- a/tests/pipelines/test_cartoon_stable_diffusion.py +++ b/tests/pipelines/test_cartoon_stable_diffusion.py @@ -6,11 +6,10 @@ import cv2 from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class CartoonStableDiffusionTest(unittest.TestCase, DemoCompatibilityCheck): +class CartoonStableDiffusionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.text_to_image_synthesis diff --git a/tests/pipelines/test_chinese_stable_diffusion.py b/tests/pipelines/test_chinese_stable_diffusion.py index bd6d74aa..05207ddb 100644 --- a/tests/pipelines/test_chinese_stable_diffusion.py +++ b/tests/pipelines/test_chinese_stable_diffusion.py @@ -6,11 +6,10 @@ import cv2 from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ChineseStableDiffusionTest(unittest.TestCase, DemoCompatibilityCheck): +class ChineseStableDiffusionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.text_to_image_synthesis diff --git a/tests/pipelines/test_clip_interrogator.py b/tests/pipelines/test_clip_interrogator.py new file mode 100644 index 00000000..615aef3c --- /dev/null +++ b/tests/pipelines/test_clip_interrogator.py @@ -0,0 +1,34 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from modelscope.models import Model +from modelscope.outputs import OutputKeys +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level + + +class CLIPInterrogatorTest(unittest.TestCase): + + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_run_with_image_captioning_with_model(self): + model = Model.from_pretrained('damo/cv_clip-interrogator') + pipeline_caption = pipeline( + task=Tasks.image_captioning, + model=model, + ) + image = 'data/test/images/image_mplug_vqa.jpg' + result = pipeline_caption(image) + print(result[OutputKeys.CAPTION]) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_image_captioning_with_name(self): + pipeline_caption = pipeline( + Tasks.image_captioning, model='damo/cv_clip-interrogator') + image = 'data/test/images/image_mplug_vqa.jpg' + result = pipeline_caption(image) + print(result[OutputKeys.CAPTION]) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/pipelines/test_cmdssl_video_embedding.py b/tests/pipelines/test_cmdssl_video_embedding.py index 5807c075..9e176cf2 100644 --- a/tests/pipelines/test_cmdssl_video_embedding.py +++ b/tests/pipelines/test_cmdssl_video_embedding.py @@ -4,11 +4,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class CMDSSLVideoEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck): +class CMDSSLVideoEmbeddingTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.video_embedding @@ -22,10 +21,6 @@ class CMDSSLVideoEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck): print(f'video embedding output: {result}.') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_content_check.py b/tests/pipelines/test_content_check.py index c68af257..39a791a0 100644 --- a/tests/pipelines/test_content_check.py +++ b/tests/pipelines/test_content_check.py @@ -4,11 +4,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ContentCheckTest(unittest.TestCase, DemoCompatibilityCheck): +class ContentCheckTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_classification @@ -20,10 +19,6 @@ class ContentCheckTest(unittest.TestCase, DemoCompatibilityCheck): result = content_check_func('data/test/images/content_check.jpg') print(result) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_controllable_image_generation.py b/tests/pipelines/test_controllable_image_generation.py index a5cff66c..7d6b03ce 100644 --- a/tests/pipelines/test_controllable_image_generation.py +++ b/tests/pipelines/test_controllable_image_generation.py @@ -10,12 +10,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.cv import ControllableImageGenerationPipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ControllableImageGenerationTest(unittest.TestCase, - DemoCompatibilityCheck): +class ControllableImageGenerationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.controllable_image_generation @@ -68,10 +66,6 @@ class ControllableImageGenerationTest(unittest.TestCase, print( 'pipeline: the output image path is {}'.format(output_image_path)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_conversational_text_to_sql.py b/tests/pipelines/test_conversational_text_to_sql.py index 17fffcaf..a7e15dcc 100644 --- a/tests/pipelines/test_conversational_text_to_sql.py +++ b/tests/pipelines/test_conversational_text_to_sql.py @@ -8,13 +8,12 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import ConversationalTextToSqlPipeline from modelscope.preprocessors import ConversationalTextToSqlPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.nlp.space_T_en.utils import \ text2sql_tracking_and_print_results from modelscope.utils.test_utils import test_level -class ConversationalTextToSql(unittest.TestCase, DemoCompatibilityCheck): +class ConversationalTextToSql(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.table_question_answering @@ -67,10 +66,6 @@ class ConversationalTextToSql(unittest.TestCase, DemoCompatibilityCheck): pipelines = [pipeline(task=self.task, model=self.model_id)] text2sql_tracking_and_print_results(self.test_case, pipelines) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_crowd_counting.py b/tests/pipelines/test_crowd_counting.py index 4e15cfca..be14f29e 100644 --- a/tests/pipelines/test_crowd_counting.py +++ b/tests/pipelines/test_crowd_counting.py @@ -8,14 +8,13 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import numpy_to_cv2img -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.logger import get_logger from modelscope.utils.test_utils import test_level logger = get_logger() -class CrowdCountingTest(unittest.TestCase, DemoCompatibilityCheck): +class CrowdCountingTest(unittest.TestCase): def setUp(self) -> None: self.input_location = 'data/test/images/crowd_counting.jpg' @@ -56,10 +55,6 @@ class CrowdCountingTest(unittest.TestCase, DemoCompatibilityCheck): else: raise ValueError('process error') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_csanmt_translation.py b/tests/pipelines/test_csanmt_translation.py index d989a6c4..03545fc5 100644 --- a/tests/pipelines/test_csanmt_translation.py +++ b/tests/pipelines/test_csanmt_translation.py @@ -3,11 +3,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class TranslationTest(unittest.TestCase, DemoCompatibilityCheck): +class TranslationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.translation @@ -90,10 +89,6 @@ class TranslationTest(unittest.TestCase, DemoCompatibilityCheck): pipeline_ins = pipeline(self.task) print(pipeline_ins(input=inputs)) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_damo_face_detection.py b/tests/pipelines/test_damo_face_detection.py index 8bd1e009..44578c3e 100644 --- a/tests/pipelines/test_damo_face_detection.py +++ b/tests/pipelines/test_damo_face_detection.py @@ -7,11 +7,10 @@ import cv2 from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import draw_face_detection_result -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class FaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck): +class FaceDetectionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.face_detection @@ -36,10 +35,6 @@ class FaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck): result = face_detection(img_path) self.show_result(img_path, result) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_ddcolor_image_colorization.py b/tests/pipelines/test_ddcolor_image_colorization.py index e1876329..5d752452 100644 --- a/tests/pipelines/test_ddcolor_image_colorization.py +++ b/tests/pipelines/test_ddcolor_image_colorization.py @@ -11,11 +11,10 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.pipelines.cv import DDColorImageColorizationPipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class DDColorImageColorizationTest(unittest.TestCase, DemoCompatibilityCheck): +class DDColorImageColorizationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_colorization @@ -52,10 +51,6 @@ class DDColorImageColorizationTest(unittest.TestCase, DemoCompatibilityCheck): image_colorization = pipeline(Tasks.image_colorization) self.pipeline_inference(image_colorization, self.test_image) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_ddpm_semantic_segmentation.py b/tests/pipelines/test_ddpm_semantic_segmentation.py index a5303098..ad35e069 100644 --- a/tests/pipelines/test_ddpm_semantic_segmentation.py +++ b/tests/pipelines/test_ddpm_semantic_segmentation.py @@ -5,12 +5,10 @@ import torch from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class DDPMImageSemanticSegmentationTest(unittest.TestCase, - DemoCompatibilityCheck): +class DDPMImageSemanticSegmentationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_segmentation @@ -28,10 +26,6 @@ class DDPMImageSemanticSegmentationTest(unittest.TestCase, else: raise ValueError('process error') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_deeplpf_image_color_enhance.py b/tests/pipelines/test_deeplpf_image_color_enhance.py index 08b1a357..87b709fe 100644 --- a/tests/pipelines/test_deeplpf_image_color_enhance.py +++ b/tests/pipelines/test_deeplpf_image_color_enhance.py @@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class DeepLPFImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck): +class DeepLPFImageColorEnhanceTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_deeplpfnet_image-color-enhance-models' @@ -37,10 +36,6 @@ class DeepLPFImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck): self.pipeline_inference(img_color_enhance, 'data/test/images/image_color_enhance.png') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_dialog_intent_prediction.py b/tests/pipelines/test_dialog_intent_prediction.py index 2ee46388..5a1729b8 100644 --- a/tests/pipelines/test_dialog_intent_prediction.py +++ b/tests/pipelines/test_dialog_intent_prediction.py @@ -8,11 +8,10 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import DialogIntentPredictionPipeline from modelscope.preprocessors import DialogIntentPredictionPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class DialogIntentPredictionTest(unittest.TestCase, DemoCompatibilityCheck): +class DialogIntentPredictionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.task_oriented_conversation @@ -68,10 +67,6 @@ class DialogIntentPredictionTest(unittest.TestCase, DemoCompatibilityCheck): for my_pipeline, item in list(zip(pipelines, self.test_case)): print(my_pipeline(item)) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_dialog_modeling.py b/tests/pipelines/test_dialog_modeling.py index 6b6259ce..202951a5 100644 --- a/tests/pipelines/test_dialog_modeling.py +++ b/tests/pipelines/test_dialog_modeling.py @@ -10,11 +10,10 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import DialogModelingPipeline from modelscope.preprocessors import DialogModelingPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class DialogModelingTest(unittest.TestCase, DemoCompatibilityCheck): +class DialogModelingTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.task_oriented_conversation @@ -148,10 +147,6 @@ class DialogModelingTest(unittest.TestCase, DemoCompatibilityCheck): pipelines = [pipeline(task=self.task)] self.generate_and_print_dialog_response(pipelines) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_dialog_state_tracking.py b/tests/pipelines/test_dialog_state_tracking.py index 6cdd5ee7..e7f72b83 100644 --- a/tests/pipelines/test_dialog_state_tracking.py +++ b/tests/pipelines/test_dialog_state_tracking.py @@ -8,13 +8,12 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import DialogStateTrackingPipeline from modelscope.preprocessors import DialogStateTrackingPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.nlp.space.utils_dst import \ tracking_and_print_dialog_states from modelscope.utils.test_utils import test_level -class DialogStateTrackingTest(unittest.TestCase, DemoCompatibilityCheck): +class DialogStateTrackingTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.task_oriented_conversation @@ -119,10 +118,6 @@ class DialogStateTrackingTest(unittest.TestCase, DemoCompatibilityCheck): pipelines = [pipeline(task=self.task, model=self.model_id)] tracking_and_print_dialog_states(self.test_case, pipelines) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_diffusers_stable_diffusion.py b/tests/pipelines/test_diffusers_stable_diffusion.py index 98c4862a..eef677fc 100644 --- a/tests/pipelines/test_diffusers_stable_diffusion.py +++ b/tests/pipelines/test_diffusers_stable_diffusion.py @@ -6,11 +6,10 @@ import cv2 from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class DiffusersStableDiffusionTest(unittest.TestCase, DemoCompatibilityCheck): +class DiffusersStableDiffusionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.text_to_image_synthesis diff --git a/tests/pipelines/test_disco_guided_diffusion.py b/tests/pipelines/test_disco_guided_diffusion.py index d7be7292..f3fd668b 100644 --- a/tests/pipelines/test_disco_guided_diffusion.py +++ b/tests/pipelines/test_disco_guided_diffusion.py @@ -5,11 +5,10 @@ import cv2 from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class DiscoGuidedDiffusionTest(unittest.TestCase, DemoCompatibilityCheck): +class DiscoGuidedDiffusionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.text_to_image_synthesis diff --git a/tests/pipelines/test_document_grounded_dialog_generate.py b/tests/pipelines/test_document_grounded_dialog_generate.py index da23fe19..b08a07fa 100644 --- a/tests/pipelines/test_document_grounded_dialog_generate.py +++ b/tests/pipelines/test_document_grounded_dialog_generate.py @@ -9,12 +9,10 @@ from modelscope.pipelines import pipeline from modelscope.preprocessors.nlp import \ DocumentGroundedDialogGeneratePreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class DocumentGroundedDialogGenerateTest(unittest.TestCase, - DemoCompatibilityCheck): +class DocumentGroundedDialogGenerateTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.document_grounded_dialog_generate diff --git a/tests/pipelines/test_document_grounded_dialog_retrieval.py b/tests/pipelines/test_document_grounded_dialog_retrieval.py index 6bcca369..48a63087 100644 --- a/tests/pipelines/test_document_grounded_dialog_retrieval.py +++ b/tests/pipelines/test_document_grounded_dialog_retrieval.py @@ -9,12 +9,10 @@ from modelscope.pipelines import pipeline from modelscope.preprocessors.nlp import \ DocumentGroundedDialogRetrievalPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class DocumentGroundedDialogRetrievalTest(unittest.TestCase, - DemoCompatibilityCheck): +class DocumentGroundedDialogRetrievalTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.document_grounded_dialog_retrieval diff --git a/tests/pipelines/test_document_segmentation.py b/tests/pipelines/test_document_segmentation.py index 41c490d2..09ce5756 100644 --- a/tests/pipelines/test_document_segmentation.py +++ b/tests/pipelines/test_document_segmentation.py @@ -6,14 +6,13 @@ from typing import Any, Dict from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.logger import get_logger from modelscope.utils.test_utils import test_level logger = get_logger() -class DocumentSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): +class DocumentSegmentationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.document_segmentation @@ -64,10 +63,6 @@ class DocumentSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): for document in documents_list: print(document) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_document_vl_embedding.py b/tests/pipelines/test_document_vl_embedding.py index f8d2d5a3..349547d1 100644 --- a/tests/pipelines/test_document_vl_embedding.py +++ b/tests/pipelines/test_document_vl_embedding.py @@ -10,11 +10,10 @@ from modelscope.models import Model from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class DocumentVLEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck): +class DocumentVLEmbeddingTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/multi-modal_convnext-roberta-base_vldoc-embedding' @@ -51,10 +50,6 @@ class DocumentVLEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck): vldoc_doc_VL_emb_pipeline = pipeline(self.task) self.pipeline_inference(vldoc_doc_VL_emb_pipeline) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_domain_classification.py b/tests/pipelines/test_domain_classification.py index 8e5bfa7f..006daa65 100644 --- a/tests/pipelines/test_domain_classification.py +++ b/tests/pipelines/test_domain_classification.py @@ -3,11 +3,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class DomainClassificationTest(unittest.TestCase, DemoCompatibilityCheck): +class DomainClassificationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.text_classification @@ -36,10 +35,6 @@ class DomainClassificationTest(unittest.TestCase, DemoCompatibilityCheck): pipeline_ins = pipeline(self.task, model=model_id) print(pipeline_ins(input=inputs)) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_efficient_diffusion_tuning.py b/tests/pipelines/test_efficient_diffusion_tuning.py index 9dc5e412..e33b2bf2 100644 --- a/tests/pipelines/test_efficient_diffusion_tuning.py +++ b/tests/pipelines/test_efficient_diffusion_tuning.py @@ -5,11 +5,10 @@ from modelscope.models import Model from modelscope.models.multi_modal import EfficientStableDiffusion from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class EfficientDiffusionTuningTest(unittest.TestCase, DemoCompatibilityCheck): +class EfficientDiffusionTuningTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.efficient_diffusion_tuning @@ -28,13 +27,9 @@ class EfficientDiffusionTuningTest(unittest.TestCase, DemoCompatibilityCheck): model = Model.from_pretrained(model_id) self.assertTrue(model.__class__ == EfficientStableDiffusion) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_efficient_diffusion_tuning_lora_demo_compatibility(self): - self.model_id = 'damo/multi-modal_efficient-diffusion-tuning-lora' - self.compatibility_check() - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_efficient_diffusion_tuning_control_lora_run_pipeline(self): + # TODO: to be fixed in the future model_id = 'damo/multi-modal_efficient-diffusion-tuning-control-lora' inputs = { 'prompt': @@ -53,11 +48,6 @@ class EfficientDiffusionTuningTest(unittest.TestCase, DemoCompatibilityCheck): model = Model.from_pretrained(model_id) self.assertTrue(model.__class__ == EfficientStableDiffusion) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_efficient_diffusion_tuning_control_lora_demo_compatibility(self): - self.model_id = 'damo/multi-modal_efficient-diffusion-tuning-control-lora' - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_extractive_summarization.py b/tests/pipelines/test_extractive_summarization.py index 26ac508c..a7f12d14 100644 --- a/tests/pipelines/test_extractive_summarization.py +++ b/tests/pipelines/test_extractive_summarization.py @@ -6,14 +6,13 @@ from typing import Any, Dict from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.logger import get_logger from modelscope.utils.test_utils import test_level logger = get_logger() -class ExtractiveSummarizationTest(unittest.TestCase, DemoCompatibilityCheck): +class ExtractiveSummarizationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.extractive_summarization @@ -46,10 +45,6 @@ class ExtractiveSummarizationTest(unittest.TestCase, DemoCompatibilityCheck): model_id=self.ponet_topic_model_id, documents=self.sentences) print(result[OutputKeys.TEXT]) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_face_2d_keypoints.py b/tests/pipelines/test_face_2d_keypoints.py index 7ccc8a59..875a0e11 100644 --- a/tests/pipelines/test_face_2d_keypoints.py +++ b/tests/pipelines/test_face_2d_keypoints.py @@ -10,7 +10,7 @@ from modelscope.utils.test_utils import test_level class EasyCVFace2DKeypointsPipelineTest(unittest.TestCase): - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skip('skip easycv related cases') def test_face_2d_keypoints(self): img_path = 'data/test/images/face_detection.png' model_id = 'damo/cv_mobilenet_face-2d-keypoints_alignment' diff --git a/tests/pipelines/test_face_detection.py b/tests/pipelines/test_face_detection.py index 7e35cdbb..86e0f702 100644 --- a/tests/pipelines/test_face_detection.py +++ b/tests/pipelines/test_face_detection.py @@ -8,11 +8,10 @@ from modelscope.msdatasets import MsDataset from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import draw_face_detection_result -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class FaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck): +class FaceDetectionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.face_detection @@ -42,10 +41,6 @@ class FaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck): result = face_detection(img_path) self.show_result(img_path, result) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_face_image_generation.py b/tests/pipelines/test_face_image_generation.py index 21d8e835..fbd7e3b5 100644 --- a/tests/pipelines/test_face_image_generation.py +++ b/tests/pipelines/test_face_image_generation.py @@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class FaceGenerationTest(unittest.TestCase, DemoCompatibilityCheck): +class FaceGenerationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.face_image_generation @@ -39,10 +38,6 @@ class FaceGenerationTest(unittest.TestCase, DemoCompatibilityCheck): face_generation = pipeline(self.task) self.pipeline_inference(face_generation, seed) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_face_recognition.py b/tests/pipelines/test_face_recognition.py index d3451f5d..7b84590c 100644 --- a/tests/pipelines/test_face_recognition.py +++ b/tests/pipelines/test_face_recognition.py @@ -6,11 +6,10 @@ import numpy as np from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class FaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): +class FaceRecognitionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.face_recognition @@ -28,10 +27,6 @@ class FaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): sim = np.dot(emb1[0], emb2[0]) print(f'Cos similarity={sim:.3f}, img1:{img1} img2:{img2}') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_face_recognition_onnx_fm.py b/tests/pipelines/test_face_recognition_onnx_fm.py index 8478b3bf..b60bec93 100644 --- a/tests/pipelines/test_face_recognition_onnx_fm.py +++ b/tests/pipelines/test_face_recognition_onnx_fm.py @@ -6,11 +6,10 @@ import numpy as np from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class FmFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): +class FmFaceRecognitionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.face_recognition @@ -31,10 +30,6 @@ class FmFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): sim = np.dot(emb1[0], emb2[0]) print(f'Cos similarity={sim:.3f}, img1:{img1} img2:{img2}') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_face_recognition_onnx_ir.py b/tests/pipelines/test_face_recognition_onnx_ir.py index c45042be..a7cf008c 100644 --- a/tests/pipelines/test_face_recognition_onnx_ir.py +++ b/tests/pipelines/test_face_recognition_onnx_ir.py @@ -6,11 +6,10 @@ import numpy as np from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class IrFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): +class IrFaceRecognitionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.face_recognition @@ -31,10 +30,6 @@ class IrFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): sim = np.dot(emb1[0], emb2[0]) print(f'Cos similarity={sim:.3f}, img1:{img1} img2:{img2}') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_face_recognition_onnx_transface.py b/tests/pipelines/test_face_recognition_onnx_transface.py new file mode 100644 index 00000000..183257f0 --- /dev/null +++ b/tests/pipelines/test_face_recognition_onnx_transface.py @@ -0,0 +1,35 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +import numpy as np + +from modelscope.outputs import OutputKeys +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level + + +class TransFaceRecognitionTest(unittest.TestCase): + + def setUp(self) -> None: + self.task = Tasks.face_recognition + self.model_id = 'damo/cv_vit_face-recognition' + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_face_compare(self): + img1 = 'data/test/images/face_recognition_1.png' + img2 = 'data/test/images/face_recognition_2.png' + + face_recognition = pipeline( + Tasks.face_recognition, model=self.model_id) + emb1 = face_recognition(img1)[OutputKeys.IMG_EMBEDDING] + emb2 = face_recognition(img2)[OutputKeys.IMG_EMBEDDING] + if emb1 is None or emb2 is None: + print('No Detected Face.') + else: + sim = np.dot(emb1[0], emb2[0]) + print(f'Cos similarity={sim:.3f}, img1:{img1} img2:{img2}') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/pipelines/test_face_recognition_ood.py b/tests/pipelines/test_face_recognition_ood.py index 8a6fb444..68cf1f0b 100644 --- a/tests/pipelines/test_face_recognition_ood.py +++ b/tests/pipelines/test_face_recognition_ood.py @@ -6,11 +6,10 @@ import numpy as np from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class FaceRecognitionOodTest(unittest.TestCase, DemoCompatibilityCheck): +class FaceRecognitionOodTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.face_recognition @@ -39,10 +38,6 @@ class FaceRecognitionOodTest(unittest.TestCase, DemoCompatibilityCheck): print(f'Cos similarity={sim:.3f}, img1:{img1} img2:{img2}') print(f'OOD score: img1:{score1:.3f} img2:{score2:.3f}') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_face_reconstruction.py b/tests/pipelines/test_face_reconstruction.py index b35482fb..06950487 100644 --- a/tests/pipelines/test_face_reconstruction.py +++ b/tests/pipelines/test_face_reconstruction.py @@ -14,13 +14,12 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level sys.path.append('.') -class FaceReconstructionTest(unittest.TestCase, DemoCompatibilityCheck): +class FaceReconstructionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.face_reconstruction @@ -60,7 +59,7 @@ class FaceReconstructionTest(unittest.TestCase, DemoCompatibilityCheck): Tasks.face_reconstruction, model=model_dir) self.pipeline_inference(face_reconstruction, self.test_image) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_modelhub(self): face_reconstruction = pipeline( Tasks.face_reconstruction, @@ -68,10 +67,6 @@ class FaceReconstructionTest(unittest.TestCase, DemoCompatibilityCheck): model_revision='v2.0.0-HRN') self.pipeline_inference(face_reconstruction, self.test_image) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_faq_question_answering.py b/tests/pipelines/test_faq_question_answering.py index 31680095..89f95162 100644 --- a/tests/pipelines/test_faq_question_answering.py +++ b/tests/pipelines/test_faq_question_answering.py @@ -12,11 +12,10 @@ from modelscope.pipelines.nlp import FaqQuestionAnsweringPipeline from modelscope.preprocessors import \ FaqQuestionAnsweringTransformersPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class FaqQuestionAnsweringTest(unittest.TestCase, DemoCompatibilityCheck): +class FaqQuestionAnsweringTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.faq_question_answering @@ -103,10 +102,6 @@ class FaqQuestionAnsweringTest(unittest.TestCase, DemoCompatibilityCheck): ['今天星期六', '明天星期几明天星期几']) print(np.shape(sentence_vec)) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_fast_instance_segmentation.py b/tests/pipelines/test_fast_instance_segmentation.py new file mode 100644 index 00000000..d5789150 --- /dev/null +++ b/tests/pipelines/test_fast_instance_segmentation.py @@ -0,0 +1,34 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from modelscope.models import Model +from modelscope.outputs import OutputKeys +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level + + +class FastInstanceSegmentationTest(unittest.TestCase): + + def setUp(self) -> None: + self.task = Tasks.image_segmentation + self.model_id = 'damo/cv_resnet50_fast-instance-segmentation_coco' + + image = 'data/test/images/image_instance_segmentation.jpg' + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_name(self): + pipeline_parsing = pipeline( + task=Tasks.image_segmentation, model=self.model_id) + print(pipeline_parsing(input=self.image)[OutputKeys.LABELS]) + + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_run_with_model_from_modelhub(self): + model = Model.from_pretrained(self.model_id) + pipeline_parsing = pipeline( + task=Tasks.image_segmentation, model=model, preprocessor=None) + print(pipeline_parsing(input=self.image)[OutputKeys.LABELS]) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/pipelines/test_feature_extraction.py b/tests/pipelines/test_feature_extraction.py index da6be1c0..8e365eab 100644 --- a/tests/pipelines/test_feature_extraction.py +++ b/tests/pipelines/test_feature_extraction.py @@ -11,12 +11,10 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import FeatureExtractionPipeline from modelscope.preprocessors import FillMaskTransformersPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class FeatureExtractionTaskModelTest(unittest.TestCase, - DemoCompatibilityCheck): +class FeatureExtractionTaskModelTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.feature_extraction diff --git a/tests/pipelines/test_fid_dialogue.py b/tests/pipelines/test_fid_dialogue.py index c69823ce..96d7c919 100644 --- a/tests/pipelines/test_fid_dialogue.py +++ b/tests/pipelines/test_fid_dialogue.py @@ -3,11 +3,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class FidDialogueTest(unittest.TestCase, DemoCompatibilityCheck): +class FidDialogueTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.fid_dialogue @@ -49,8 +48,7 @@ class FidDialogueTest(unittest.TestCase, DemoCompatibilityCheck): 'forward_params': forward_params } - # @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') - @unittest.skip('temporarily skip') + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_240m_pipeline(self): pipeline_ins = pipeline( task=self.task, @@ -59,8 +57,7 @@ class FidDialogueTest(unittest.TestCase, DemoCompatibilityCheck): result = pipeline_ins(self.input, **self.kwargs) print(result) - # @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') - @unittest.skip('temporarily skip') + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_3_7b_pipeline(self): pipeline_ins = pipeline( task=self.task, @@ -69,10 +66,6 @@ class FidDialogueTest(unittest.TestCase, DemoCompatibilityCheck): result = pipeline_ins(self.input, **self.kwargs) print(result) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_fill_mask.py b/tests/pipelines/test_fill_mask.py index 0e427464..450ada15 100644 --- a/tests/pipelines/test_fill_mask.py +++ b/tests/pipelines/test_fill_mask.py @@ -10,12 +10,11 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import FillMaskPipeline from modelscope.preprocessors import FillMaskTransformersPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool from modelscope.utils.test_utils import test_level -class FillMaskTest(unittest.TestCase, DemoCompatibilityCheck): +class FillMaskTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.fill_mask @@ -134,8 +133,9 @@ class FillMaskTest(unittest.TestCase, DemoCompatibilityCheck): f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: ' f'{pipeline_ins(test_input)}\n') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_model_name(self): + # TODO: to be fixed in the future # veco pipeline_ins = pipeline(task=Tasks.fill_mask, model=self.model_id_veco) for language in ['zh', 'en']: @@ -176,10 +176,6 @@ class FillMaskTest(unittest.TestCase, DemoCompatibilityCheck): print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: ' f'{pipeline_ins(test_input)}\n') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_general_image_classification.py b/tests/pipelines/test_general_image_classification.py index 978c474a..df036fa1 100644 --- a/tests/pipelines/test_general_image_classification.py +++ b/tests/pipelines/test_general_image_classification.py @@ -4,13 +4,11 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.regress_test_utils import MsRegressTool from modelscope.utils.test_utils import test_level -class GeneralImageClassificationTest(unittest.TestCase, - DemoCompatibilityCheck): +class GeneralImageClassificationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_classification @@ -83,10 +81,6 @@ class GeneralImageClassificationTest(unittest.TestCase, result = general_image_classification('data/test/images/bird.JPEG') print(result) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_general_recognition.py b/tests/pipelines/test_general_recognition.py index ba713bbe..873aaa02 100644 --- a/tests/pipelines/test_general_recognition.py +++ b/tests/pipelines/test_general_recognition.py @@ -4,11 +4,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class GeneralRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): +class GeneralRecognitionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.general_recognition @@ -22,10 +21,6 @@ class GeneralRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): result = general_recognition('data/test/images/dogs.jpg') print(result) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_generative_multi_modal_embedding.py b/tests/pipelines/test_generative_multi_modal_embedding.py index 18b96f65..3a853725 100644 --- a/tests/pipelines/test_generative_multi_modal_embedding.py +++ b/tests/pipelines/test_generative_multi_modal_embedding.py @@ -5,11 +5,10 @@ import unittest from modelscope.models import Model from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class GEMMMultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck): +class GEMMMultiModalEmbeddingTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.generative_multi_modal_embedding @@ -68,10 +67,6 @@ class GEMMMultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck): output = generative_multi_modal_embedding_pipeline(test_input) print(output) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_gridvlp_classification.py b/tests/pipelines/test_gridvlp_classification.py index 18c6c582..7479d0fa 100644 --- a/tests/pipelines/test_gridvlp_classification.py +++ b/tests/pipelines/test_gridvlp_classification.py @@ -3,11 +3,10 @@ import unittest from modelscope.pipelines.multi_modal.gridvlp_pipeline import ( GridVlpClassificationPipeline, GridVlpEmbeddingPipeline) -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class GridVlpClassificationTest(unittest.TestCase, DemoCompatibilityCheck): +class GridVlpClassificationTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'rgtjf1/multi-modal_gridvlp_classification_chinese-base-ecom-cate' @@ -62,10 +61,6 @@ class GridVlpClassificationTest(unittest.TestCase, DemoCompatibilityCheck): print(f'text: {self.text}\nimage: {self.image}\n' f'outputs shape: {outputs.shape}') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_hand_2d_keypoints.py b/tests/pipelines/test_hand_2d_keypoints.py index 43b569d0..a243a478 100644 --- a/tests/pipelines/test_hand_2d_keypoints.py +++ b/tests/pipelines/test_hand_2d_keypoints.py @@ -23,7 +23,7 @@ class Hand2DKeypointsPipelineTest(unittest.TestCase): self.assertEqual(results[OutputKeys.KEYPOINTS].shape[2], 3) self.assertEqual(results[OutputKeys.BOXES].shape[1], 4) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skip('skip test in current test level: no pipeline implemented') def test_hand_2d_keypoints_with_default_model(self): img_path = 'data/test/images/hand_keypoints.jpg' diff --git a/tests/pipelines/test_hand_detection.py b/tests/pipelines/test_hand_detection.py index 8a6bbd5a..9ea192a1 100644 --- a/tests/pipelines/test_hand_detection.py +++ b/tests/pipelines/test_hand_detection.py @@ -3,17 +3,16 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck): +class ObjectDetectionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.domain_specific_object_detection self.model_id = 'damo/cv_yolox-pai_hand-detection' - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skip('skip test in current test level: no pipeline implemented') def test_hand_detection_pipeline(self): test_image = 'data/test/images/hand_detection.jpg' diff --git a/tests/pipelines/test_hicossl_video_embedding.py b/tests/pipelines/test_hicossl_video_embedding.py index 8a7de1fa..a367457f 100644 --- a/tests/pipelines/test_hicossl_video_embedding.py +++ b/tests/pipelines/test_hicossl_video_embedding.py @@ -4,11 +4,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class HICOSSLVideoEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck): +class HICOSSLVideoEmbeddingTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.video_embedding @@ -23,10 +22,6 @@ class HICOSSLVideoEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck): print(f'video embedding output: {result}.') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_hitea_tasks.py b/tests/pipelines/test_hitea_tasks.py index 50efdfbd..60cd206d 100644 --- a/tests/pipelines/test_hitea_tasks.py +++ b/tests/pipelines/test_hitea_tasks.py @@ -5,11 +5,10 @@ from modelscope.models import Model from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class HiTeATasksTest(unittest.TestCase, DemoCompatibilityCheck): +class HiTeATasksTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_video_captioning_with_model(self): @@ -55,10 +54,6 @@ class HiTeATasksTest(unittest.TestCase, DemoCompatibilityCheck): result = pipeline_vqa(input) print(result) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_human_wholebody_keypoint.py b/tests/pipelines/test_human_wholebody_keypoint.py index 7c5946cc..e0052f77 100644 --- a/tests/pipelines/test_human_wholebody_keypoint.py +++ b/tests/pipelines/test_human_wholebody_keypoint.py @@ -11,7 +11,7 @@ from modelscope.utils.test_utils import test_level class EasyCVFace2DKeypointsPipelineTest(unittest.TestCase): - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skip('skip easycv related cases') def test_human_wholebody_keypoint(self): img_path = 'data/test/images/keypoints_detect/img_test_wholebody.jpg' model_id = 'damo/cv_hrnetw48_human-wholebody-keypoint_image' diff --git a/tests/pipelines/test_image_body_reshaping.py b/tests/pipelines/test_image_body_reshaping.py index e1955e94..5a0ec0e7 100644 --- a/tests/pipelines/test_image_body_reshaping.py +++ b/tests/pipelines/test_image_body_reshaping.py @@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageBodyReshapingTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageBodyReshapingTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_body_reshaping @@ -49,10 +48,6 @@ class ImageBodyReshapingTest(unittest.TestCase, DemoCompatibilityCheck): image_body_reshaping = pipeline(Tasks.image_body_reshaping) self.pipeline_inference(image_body_reshaping, self.test_image) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_color_enhance.py b/tests/pipelines/test_image_color_enhance.py index 7c3ae8c0..5e222776 100644 --- a/tests/pipelines/test_image_color_enhance.py +++ b/tests/pipelines/test_image_color_enhance.py @@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageColorEnhanceTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_csrnet_image-color-enhance-models' @@ -37,10 +36,6 @@ class ImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck): self.pipeline_inference(img_color_enhance, 'data/test/images/image_color_enhance.png') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_colorization.py b/tests/pipelines/test_image_colorization.py index 547fce89..15ea314a 100644 --- a/tests/pipelines/test_image_colorization.py +++ b/tests/pipelines/test_image_colorization.py @@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageColorizationTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageColorizationTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_unet_image-colorization' @@ -37,10 +36,6 @@ class ImageColorizationTest(unittest.TestCase, DemoCompatibilityCheck): image_colorization = pipeline(Tasks.image_colorization) self.pipeline_inference(image_colorization, self.test_image) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_debanding.py b/tests/pipelines/test_image_debanding.py index 105d1f45..da784596 100644 --- a/tests/pipelines/test_image_debanding.py +++ b/tests/pipelines/test_image_debanding.py @@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageColorEnhanceTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_rrdb_image-debanding' @@ -36,10 +35,6 @@ class ImageColorEnhanceTest(unittest.TestCase, DemoCompatibilityCheck): self.pipeline_inference(img_debanding, 'data/test/images/image_debanding.png') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_deblur.py b/tests/pipelines/test_image_deblur.py index fc9d0101..529ae96c 100644 --- a/tests/pipelines/test_image_deblur.py +++ b/tests/pipelines/test_image_deblur.py @@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.cv import ImageDeblurPipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageDenoiseTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageDenoiseTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_deblurring @@ -56,10 +55,6 @@ class ImageDenoiseTest(unittest.TestCase, DemoCompatibilityCheck): h, w = deblur_img.shape[:2] print('pipeline: the shape of output_img is {}x{}'.format(h, w)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_defrcn_fewshot.py b/tests/pipelines/test_image_defrcn_fewshot.py index d2ecde13..1771d7b8 100644 --- a/tests/pipelines/test_image_defrcn_fewshot.py +++ b/tests/pipelines/test_image_defrcn_fewshot.py @@ -8,14 +8,13 @@ from modelscope.models import Model from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.logger import get_logger from modelscope.utils.test_utils import test_level logger = get_logger() -class ImageDefrcnFewShotTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageDefrcnFewShotTest(unittest.TestCase): def setUp(self) -> None: logger.info('start install detectron2-0.3') @@ -58,10 +57,6 @@ class ImageDefrcnFewShotTest(unittest.TestCase, DemoCompatibilityCheck): self.task, model=cache_path, model_revision=self.revision) print(pipeline_defrcn(input=self.image)[OutputKeys.LABELS]) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_denoise.py b/tests/pipelines/test_image_denoise.py index d95dd343..891e703e 100644 --- a/tests/pipelines/test_image_denoise.py +++ b/tests/pipelines/test_image_denoise.py @@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.cv import ImageDenoisePipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageDenoiseTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageDenoiseTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_denoising @@ -56,10 +55,6 @@ class ImageDenoiseTest(unittest.TestCase, DemoCompatibilityCheck): h, w = denoise_img.shape[:2] print('pipeline: the shape of output_img is {}x{}'.format(h, w)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_depth_estimation.py b/tests/pipelines/test_image_depth_estimation.py index 6ec16a64..7f9b3bb9 100644 --- a/tests/pipelines/test_image_depth_estimation.py +++ b/tests/pipelines/test_image_depth_estimation.py @@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import depth_to_color -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageDepthEstimationTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageDepthEstimationTest(unittest.TestCase): def setUp(self) -> None: self.task = 'image-depth-estimation' diff --git a/tests/pipelines/test_image_depth_estimation_bts.py b/tests/pipelines/test_image_depth_estimation_bts.py index bda7a41f..e952da30 100644 --- a/tests/pipelines/test_image_depth_estimation_bts.py +++ b/tests/pipelines/test_image_depth_estimation_bts.py @@ -8,11 +8,10 @@ from modelscope.models import Model from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageDepthEstimationBtsTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageDepthEstimationBtsTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_depth_estimation @@ -45,10 +44,6 @@ class ImageDepthEstimationBtsTest(unittest.TestCase, DemoCompatibilityCheck): cv2.imwrite('result_snapshot.jpg', depth_vis) print('Test run with snapshot ok.') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_driving_perception.py b/tests/pipelines/test_image_driving_perception.py index 2f28b7d3..a6ad902d 100644 --- a/tests/pipelines/test_image_driving_perception.py +++ b/tests/pipelines/test_image_driving_perception.py @@ -17,11 +17,10 @@ from modelscope.preprocessors.image import LoadImage from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import \ show_image_driving_perception_result -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageDrivingPerceptionTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageDrivingPerceptionTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_yolopv2_image-driving-perception_bdd100k' @@ -59,10 +58,6 @@ class ImageDrivingPerceptionTest(unittest.TestCase, DemoCompatibilityCheck): self.pipeline_inference(image_driving_perception_pipeline, self.img_path) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_face_fusion.py b/tests/pipelines/test_image_face_fusion.py index fde15edf..54d2c3a4 100644 --- a/tests/pipelines/test_image_face_fusion.py +++ b/tests/pipelines/test_image_face_fusion.py @@ -7,11 +7,10 @@ from modelscope.hub.snapshot_download import snapshot_download from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageFaceFusionTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageFaceFusionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_face_fusion @@ -50,10 +49,6 @@ class ImageFaceFusionTest(unittest.TestCase, DemoCompatibilityCheck): cv2.imwrite('result_facefusion.png', result[OutputKeys.OUTPUT_IMG]) print('facefusion.test_run_modelhub_default_model done') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_human_parsing.py b/tests/pipelines/test_image_human_parsing.py index 77d75862..f9263ea8 100644 --- a/tests/pipelines/test_image_human_parsing.py +++ b/tests/pipelines/test_image_human_parsing.py @@ -5,11 +5,10 @@ from modelscope.models import Model from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageHumanParsingTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageHumanParsingTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_segmentation @@ -39,10 +38,6 @@ class ImageHumanParsingTest(unittest.TestCase, DemoCompatibilityCheck): task=Tasks.image_segmentation, model=model, preprocessor=None) print(pipeline_parsing(input=self.image_multiple)[OutputKeys.LABELS]) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_inpainting_sdv2.py b/tests/pipelines/test_image_inpainting_sdv2.py index 81002ce8..b21ac69d 100644 --- a/tests/pipelines/test_image_inpainting_sdv2.py +++ b/tests/pipelines/test_image_inpainting_sdv2.py @@ -10,11 +10,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.cv import ImageInpaintingSDV2Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageInpaintingSDV2Test(unittest.TestCase, DemoCompatibilityCheck): +class ImageInpaintingSDV2Test(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_inpainting @@ -50,10 +49,6 @@ class ImageInpaintingSDV2Test(unittest.TestCase, DemoCompatibilityCheck): print( 'pipeline: the output image path is {}'.format(output_image_path)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_instance_segmentation.py b/tests/pipelines/test_image_instance_segmentation.py index 2ba0724a..c305a7c0 100644 --- a/tests/pipelines/test_image_instance_segmentation.py +++ b/tests/pipelines/test_image_instance_segmentation.py @@ -12,11 +12,10 @@ from modelscope.pipelines.cv import ImageInstanceSegmentationPipeline from modelscope.preprocessors import build_preprocessor from modelscope.utils.config import Config from modelscope.utils.constant import Fields, ModelFile, Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageInstanceSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageInstanceSegmentationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_segmentation @@ -61,10 +60,6 @@ class ImageInstanceSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): print(f'pipeline1:{pipeline1(input=self.image)[OutputKeys.LABELS]}') print(f'pipeline2: {pipeline2(input=self.image)[OutputKeys.LABELS]}') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_layout_estimation.py b/tests/pipelines/test_image_layout_estimation.py index b312e8c2..4c93fa30 100644 --- a/tests/pipelines/test_image_layout_estimation.py +++ b/tests/pipelines/test_image_layout_estimation.py @@ -7,11 +7,10 @@ import cv2 from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageLayoutEstimationTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageLayoutEstimationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.indoor_layout_estimation diff --git a/tests/pipelines/test_image_matching.py b/tests/pipelines/test_image_matching.py index 55fd56df..6007ea31 100644 --- a/tests/pipelines/test_image_matching.py +++ b/tests/pipelines/test_image_matching.py @@ -11,11 +11,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import match_pair_visualization -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageMatchingTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageMatchingTest(unittest.TestCase): def setUp(self) -> None: self.task = 'image-matching' diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py index a3edb705..d6d87a0c 100644 --- a/tests/pipelines/test_image_matting.py +++ b/tests/pipelines/test_image_matting.py @@ -8,11 +8,10 @@ from modelscope.msdatasets import MsDataset from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import ModelFile, Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageMattingTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageMattingTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_unet_image-matting' @@ -61,10 +60,6 @@ class ImageMattingTest(unittest.TestCase, DemoCompatibilityCheck): f'Output written to dir: {osp.dirname(osp.abspath("result_0.png"))}' ) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_mvs_depth_estimation.py b/tests/pipelines/test_image_mvs_depth_estimation.py index a7e327e3..b158623b 100644 --- a/tests/pipelines/test_image_mvs_depth_estimation.py +++ b/tests/pipelines/test_image_mvs_depth_estimation.py @@ -6,11 +6,10 @@ from modelscope.hub.snapshot_download import snapshot_download from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageMVSDepthEstimationTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageMVSDepthEstimationTest(unittest.TestCase): def setUp(self) -> None: self.task = 'image-multi-view-depth-estimation' diff --git a/tests/pipelines/test_image_open_vocabulary_detection.py b/tests/pipelines/test_image_open_vocabulary_detection.py index 52dc1d11..923e1efe 100644 --- a/tests/pipelines/test_image_open_vocabulary_detection.py +++ b/tests/pipelines/test_image_open_vocabulary_detection.py @@ -10,15 +10,13 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import draw_box -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.logger import get_logger from modelscope.utils.test_utils import test_level logger = get_logger() -class ImageOpenVocabularyDetectionTest(unittest.TestCase, - DemoCompatibilityCheck): +class ImageOpenVocabularyDetectionTest(unittest.TestCase): def setUp(self) -> None: os.system( @@ -74,10 +72,6 @@ class ImageOpenVocabularyDetectionTest(unittest.TestCase, cv2.imwrite('result_snapshot.jpg', image) print('Test run with snapshot ok.') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_panoptic_segmentation.py b/tests/pipelines/test_image_panoptic_segmentation.py index 4f12e6af..38c66055 100644 --- a/tests/pipelines/test_image_panoptic_segmentation.py +++ b/tests/pipelines/test_image_panoptic_segmentation.py @@ -9,17 +9,16 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import panoptic_seg_masks_to_image -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImagePanopticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): +class ImagePanopticSegmentationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_segmentation self.model_id = 'damo/cv_swinL_panoptic-segmentation_cocopan' - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skip('skip test in current test level: no pipeline implemented') def test_image_panoptic_segmentation(self): input_location = 'data/test/images/image_panoptic_segmentation.jpg' pan_segmentor = pipeline(Tasks.image_segmentation, model=self.model_id) @@ -29,7 +28,7 @@ class ImagePanopticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): cv2.imwrite('result.jpg', draw_img) print('print test_image_panoptic_segmentation return success') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skip('skip test in current test level: no pipeline implemented') def test_image_panoptic_segmentation_from_PIL(self): input_location = 'data/test/images/image_panoptic_segmentation.jpg' pan_segmentor = pipeline(Tasks.image_segmentation, model=self.model_id) @@ -40,10 +39,6 @@ class ImagePanopticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): cv2.imwrite('result.jpg', draw_img) print('print test_image_panoptic_segmentation from PIL return success') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_portrait_enhancement.py b/tests/pipelines/test_image_portrait_enhancement.py index f0814c07..43978fd2 100644 --- a/tests/pipelines/test_image_portrait_enhancement.py +++ b/tests/pipelines/test_image_portrait_enhancement.py @@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImagePortraitEnhancementTest(unittest.TestCase, DemoCompatibilityCheck): +class ImagePortraitEnhancementTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_portrait_enhancement @@ -46,10 +45,6 @@ class ImagePortraitEnhancementTest(unittest.TestCase, DemoCompatibilityCheck): face_enhancement = pipeline(Tasks.image_portrait_enhancement) self.pipeline_inference(face_enhancement, self.test_image) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_quality_assessment_degradation.py b/tests/pipelines/test_image_quality_assessment_degradation.py index cb0f24c7..1acf8163 100644 --- a/tests/pipelines/test_image_quality_assessment_degradation.py +++ b/tests/pipelines/test_image_quality_assessment_degradation.py @@ -9,14 +9,12 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.cv import ImageQualityAssessmentDegradationPipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level sys.path.insert(0, '.') -class ImageQualityAssessmentDegradationTest(unittest.TestCase, - DemoCompatibilityCheck): +class ImageQualityAssessmentDegradationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_quality_assessment_degradation @@ -54,10 +52,6 @@ class ImageQualityAssessmentDegradationTest(unittest.TestCase, out_path = pipeline_ins(input=self.test_img)[OutputKeys.SCORES] print('pipeline: the out_path is {}'.format(out_path)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_quality_assessment_man.py b/tests/pipelines/test_image_quality_assessment_man.py index 2668d45d..f36f8b3c 100644 --- a/tests/pipelines/test_image_quality_assessment_man.py +++ b/tests/pipelines/test_image_quality_assessment_man.py @@ -7,11 +7,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.cv import ImageQualityAssessmentMANPipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageQualityAssessmentMANTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageQualityAssessmentMANTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_quality_assessment_mos @@ -47,10 +46,6 @@ class ImageQualityAssessmentMANTest(unittest.TestCase, DemoCompatibilityCheck): out_path = pipeline_ins(input=self.test_img)[OutputKeys.SCORE] print('pipeline: the out_path is {}'.format(out_path)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_quality_assessment_mos.py b/tests/pipelines/test_image_quality_assessment_mos.py index 608be8f8..3ca26b0a 100644 --- a/tests/pipelines/test_image_quality_assessment_mos.py +++ b/tests/pipelines/test_image_quality_assessment_mos.py @@ -7,11 +7,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.cv import ImageQualityAssessmentMosPipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageQualityAssessmentMosTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageQualityAssessmentMosTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_quality_assessment_mos @@ -47,10 +46,6 @@ class ImageQualityAssessmentMosTest(unittest.TestCase, DemoCompatibilityCheck): out_path = pipeline_ins(input=self.test_img)[OutputKeys.SCORE] print('pipeline: the out_path is {}'.format(out_path)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_reid_person.py b/tests/pipelines/test_image_reid_person.py index 310cdd66..e107d5ee 100644 --- a/tests/pipelines/test_image_reid_person.py +++ b/tests/pipelines/test_image_reid_person.py @@ -6,11 +6,10 @@ from PIL import Image from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageReidPersonTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageReidPersonTest(unittest.TestCase): def setUp(self) -> None: self.input_location = 'data/test/images/image_reid_person.jpg' @@ -50,10 +49,6 @@ class ImageReidPersonTest(unittest.TestCase, DemoCompatibilityCheck): ) print(f'The img embedding is: {result[OutputKeys.IMG_EMBEDDING]}') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_restoration.py b/tests/pipelines/test_image_restoration.py index baffa7d5..b9c600b2 100644 --- a/tests/pipelines/test_image_restoration.py +++ b/tests/pipelines/test_image_restoration.py @@ -4,11 +4,10 @@ import unittest from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageRestorationTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageRestorationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_demoireing @@ -24,10 +23,6 @@ class ImageRestorationTest(unittest.TestCase, DemoCompatibilityCheck): Image.fromarray(result[OutputKeys.OUTPUT_IMG]).save(input_location + '_demoire.jpg') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_semantic_segmentation.py b/tests/pipelines/test_image_semantic_segmentation.py index 2e8d7522..5bc89bd1 100644 --- a/tests/pipelines/test_image_semantic_segmentation.py +++ b/tests/pipelines/test_image_semantic_segmentation.py @@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import semantic_seg_masks_to_image -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageSemanticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageSemanticSegmentationTest(unittest.TestCase): def setUp(self) -> None: self.task = 'image-segmentation' @@ -54,10 +53,6 @@ class ImageSemanticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): cv2.imwrite('result.jpg', draw_img) print('test_image_semantic_segmentation_vitadapter_from_PIL DONE') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_structured_model_probing.py b/tests/pipelines/test_image_structured_model_probing.py index 1befcf98..f4d46d92 100644 --- a/tests/pipelines/test_image_structured_model_probing.py +++ b/tests/pipelines/test_image_structured_model_probing.py @@ -4,12 +4,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageStructuredModelProbingTest(unittest.TestCase, - DemoCompatibilityCheck): +class ImageStructuredModelProbingTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_classification diff --git a/tests/pipelines/test_image_style_transfer.py b/tests/pipelines/test_image_style_transfer.py index 5f37f204..223ec757 100644 --- a/tests/pipelines/test_image_style_transfer.py +++ b/tests/pipelines/test_image_style_transfer.py @@ -7,11 +7,10 @@ from modelscope.hub.snapshot_download import snapshot_download from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageStyleTransferTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageStyleTransferTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_style_transfer @@ -53,10 +52,6 @@ class ImageStyleTransferTest(unittest.TestCase, DemoCompatibilityCheck): cv2.imwrite('result_styletransfer3.png', result[OutputKeys.OUTPUT_IMG]) print('style_transfer.test_run_modelhub_default_model done') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_image_super_resolution.py b/tests/pipelines/test_image_super_resolution.py index d5cbebe8..45066300 100644 --- a/tests/pipelines/test_image_super_resolution.py +++ b/tests/pipelines/test_image_super_resolution.py @@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageSuperResolutionTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageSuperResolutionTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_rrdb_image-super-resolution' @@ -37,10 +36,6 @@ class ImageSuperResolutionTest(unittest.TestCase, DemoCompatibilityCheck): super_resolution = pipeline(Tasks.image_super_resolution) self.pipeline_inference(super_resolution, self.img) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_interactive_translation_pipeline.py b/tests/pipelines/test_interactive_translation_pipeline.py index b973250a..c240ba70 100644 --- a/tests/pipelines/test_interactive_translation_pipeline.py +++ b/tests/pipelines/test_interactive_translation_pipeline.py @@ -3,11 +3,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class InteractiveTranslationTest(unittest.TestCase, DemoCompatibilityCheck): +class InteractiveTranslationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.translation @@ -28,10 +27,6 @@ class InteractiveTranslationTest(unittest.TestCase, DemoCompatibilityCheck): pipeline_ins = pipeline(self.task, model=model_id) print(pipeline_ins(inputs + '' + prefix)) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_inverse_text_processing.py b/tests/pipelines/test_inverse_text_processing.py index dc7fb1e0..a1d5a712 100644 --- a/tests/pipelines/test_inverse_text_processing.py +++ b/tests/pipelines/test_inverse_text_processing.py @@ -3,11 +3,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class InverseTextProcessingTest(unittest.TestCase, DemoCompatibilityCheck): +class InverseTextProcessingTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.inverse_text_processing, @@ -61,10 +60,6 @@ class InverseTextProcessingTest(unittest.TestCase, DemoCompatibilityCheck): itn_result = itn_inference_pipline(text_in=lang_text_in) print(itn_result) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_key_word_spotting.py b/tests/pipelines/test_key_word_spotting.py index 13f7a308..f22bc845 100644 --- a/tests/pipelines/test_key_word_spotting.py +++ b/tests/pipelines/test_key_word_spotting.py @@ -10,7 +10,6 @@ import soundfile from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import ColorCodes, Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.logger import get_logger from modelscope.utils.test_utils import download_and_untar, test_level @@ -27,7 +26,7 @@ NEG_TESTSETS_FILE = 'neg_testsets.tar.gz' NEG_TESTSETS_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/KWS/neg_testsets.tar.gz' -class KeyWordSpottingTest(unittest.TestCase, DemoCompatibilityCheck): +class KeyWordSpottingTest(unittest.TestCase): action_info = { 'test_run_with_wav': { 'checking_item': [OutputKeys.KWS_LIST, 0, 'keyword'], @@ -344,10 +343,6 @@ class KeyWordSpottingTest(unittest.TestCase, DemoCompatibilityCheck): model_id=model_id, audio_in=wav_path, keywords=keywords) logger.info(ColorCodes.YELLOW + str(kws_result) + ColorCodes.END) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_key_word_spotting_farfield.py b/tests/pipelines/test_key_word_spotting_farfield.py index e736f48b..3193149c 100644 --- a/tests/pipelines/test_key_word_spotting_farfield.py +++ b/tests/pipelines/test_key_word_spotting_farfield.py @@ -19,6 +19,7 @@ class KWSFarfieldTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/speech_dfsmn_kws_char_farfield_16k_nihaomiya' + self.model_id_iot = 'damo/speech_dfsmn_kws_char_farfield_iot_16k_nihaomiya' if os.path.isfile(OUTPUT_WAV): os.remove(OUTPUT_WAV) @@ -29,6 +30,13 @@ class KWSFarfieldTest(unittest.TestCase): self.assertEqual(len(result['kws_list']), 5) print(result['kws_list'][-1]) + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_normal_iot(self): + kws = pipeline(Tasks.keyword_spotting, model=self.model_id_iot) + result = kws(os.path.join(os.getcwd(), TEST_SPEECH_FILE)) + self.assertEqual(len(result['kws_list']), 5) + print(result['kws_list'][-1]) + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_output(self): kws = pipeline(Tasks.keyword_spotting, model=self.model_id) diff --git a/tests/pipelines/test_language_guided_video_summarization.py b/tests/pipelines/test_language_guided_video_summarization.py index 0f06d4f2..01d88b55 100755 --- a/tests/pipelines/test_language_guided_video_summarization.py +++ b/tests/pipelines/test_language_guided_video_summarization.py @@ -9,12 +9,10 @@ import torch from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class LanguageGuidedVideoSummarizationTest(unittest.TestCase, - DemoCompatibilityCheck): +class LanguageGuidedVideoSummarizationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.language_guided_video_summarization @@ -40,10 +38,6 @@ class LanguageGuidedVideoSummarizationTest(unittest.TestCase, print(f'video summarization output:\n {result}.') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_language_identification.py b/tests/pipelines/test_language_identification.py index ddd91e69..ccfa1a7d 100644 --- a/tests/pipelines/test_language_identification.py +++ b/tests/pipelines/test_language_identification.py @@ -3,11 +3,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class LanguageIdentificationTest(unittest.TestCase, DemoCompatibilityCheck): +class LanguageIdentificationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.text_classification @@ -22,11 +21,6 @@ class LanguageIdentificationTest(unittest.TestCase, DemoCompatibilityCheck): pipeline_ins = pipeline(self.task, model=self.model_id) print(pipeline_ins(input=inputs)) - @unittest.skipUnless(test_level() >= 0, - 'skip test case in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_license_plate_detection.py b/tests/pipelines/test_license_plate_detection.py index 70cdb820..3c30618d 100644 --- a/tests/pipelines/test_license_plate_detection.py +++ b/tests/pipelines/test_license_plate_detection.py @@ -5,11 +5,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class LicensePlateDectionTest(unittest.TestCase, DemoCompatibilityCheck): +class LicensePlateDectionTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_resnet18_license-plate-detection_damo' @@ -32,10 +31,6 @@ class LicensePlateDectionTest(unittest.TestCase, DemoCompatibilityCheck): license_plate_detection = pipeline(Tasks.license_plate_detection) self.pipeline_inference(license_plate_detection, self.test_image) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_lineless_table_recognition.py b/tests/pipelines/test_lineless_table_recognition.py index 53fde8a1..59e173f4 100644 --- a/tests/pipelines/test_lineless_table_recognition.py +++ b/tests/pipelines/test_lineless_table_recognition.py @@ -8,11 +8,10 @@ import numpy as np from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class TableRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): +class TableRecognitionTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_resnet-transformer_table-structure-recognition_lore' @@ -35,10 +34,6 @@ class TableRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): lineless_table_recognition = pipeline(Tasks.lineless_table_recognition) self.pipeline_inference(lineless_table_recognition, self.test_image) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_live_category.py b/tests/pipelines/test_live_category.py index 391ed283..88b94b69 100644 --- a/tests/pipelines/test_live_category.py +++ b/tests/pipelines/test_live_category.py @@ -3,11 +3,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class LiveCategoryTest(unittest.TestCase, DemoCompatibilityCheck): +class LiveCategoryTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.live_category @@ -21,10 +20,6 @@ class LiveCategoryTest(unittest.TestCase, DemoCompatibilityCheck): print(f'live category output: {result}.') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_mask_face_recognition.py b/tests/pipelines/test_mask_face_recognition.py index 550e80e4..2a7e8ede 100644 --- a/tests/pipelines/test_mask_face_recognition.py +++ b/tests/pipelines/test_mask_face_recognition.py @@ -6,11 +6,10 @@ import numpy as np from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class MaskFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): +class MaskFaceRecognitionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.face_recognition @@ -28,10 +27,6 @@ class MaskFaceRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): sim = np.dot(emb1[0], emb2[0]) print(f'Cos similarity={sim:.3f}, img1:{img1} img2:{img2}') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_maskdino_instance_segmentation.py b/tests/pipelines/test_maskdino_instance_segmentation.py index 14e0887d..88c46de1 100644 --- a/tests/pipelines/test_maskdino_instance_segmentation.py +++ b/tests/pipelines/test_maskdino_instance_segmentation.py @@ -8,12 +8,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.cv import MaskDINOInstanceSegmentationPipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class MaskDINOInstanceSegmentationTest(unittest.TestCase, - DemoCompatibilityCheck): +class MaskDINOInstanceSegmentationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_segmentation @@ -45,10 +43,6 @@ class MaskDINOInstanceSegmentationTest(unittest.TestCase, print(f'pipeline1:{pipeline1(input=self.image)[OutputKeys.LABELS]}') print(f'pipeline2: {pipeline2(input=self.image)[OutputKeys.LABELS]}') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_mglm_text_summarization.py b/tests/pipelines/test_mglm_text_summarization.py index 47abc741..703e9bbe 100644 --- a/tests/pipelines/test_mglm_text_summarization.py +++ b/tests/pipelines/test_mglm_text_summarization.py @@ -6,11 +6,10 @@ from modelscope.models import Model from modelscope.pipelines import pipeline from modelscope.preprocessors import MGLMSummarizationPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class mGLMTest(unittest.TestCase, DemoCompatibilityCheck): +class mGLMTest(unittest.TestCase): def setUp(self) -> None: self.output_dir = 'unittest_output' diff --git a/tests/pipelines/test_mobile_image_super_resolution.py b/tests/pipelines/test_mobile_image_super_resolution.py index 2cc7adf0..a486d244 100644 --- a/tests/pipelines/test_mobile_image_super_resolution.py +++ b/tests/pipelines/test_mobile_image_super_resolution.py @@ -8,12 +8,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class MobileImageSuperResolutionTest(unittest.TestCase, - DemoCompatibilityCheck): +class MobileImageSuperResolutionTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_ecbsr_image-super-resolution_mobile' @@ -38,10 +36,6 @@ class MobileImageSuperResolutionTest(unittest.TestCase, super_resolution = pipeline(Tasks.image_super_resolution) self.pipeline_inference(super_resolution, self.img) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_motion_generation.py b/tests/pipelines/test_motion_generation.py index 7938611c..43903eb8 100644 --- a/tests/pipelines/test_motion_generation.py +++ b/tests/pipelines/test_motion_generation.py @@ -4,11 +4,10 @@ import unittest from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class MDMMotionGenerationTest(unittest.TestCase, DemoCompatibilityCheck): +class MDMMotionGenerationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.motion_generation @@ -23,10 +22,6 @@ class MDMMotionGenerationTest(unittest.TestCase, DemoCompatibilityCheck): result[OutputKeys.KEYPOINTS].shape) print('motion generation video file:', result[OutputKeys.OUTPUT_VIDEO]) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_movie_scene_segmentation.py b/tests/pipelines/test_movie_scene_segmentation.py index 0ac8b716..c6498a6c 100644 --- a/tests/pipelines/test_movie_scene_segmentation.py +++ b/tests/pipelines/test_movie_scene_segmentation.py @@ -10,11 +10,10 @@ from modelscope.pipelines import pipeline from modelscope.trainers import build_trainer from modelscope.utils.config import Config, ConfigDict from modelscope.utils.constant import ModelFile, Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class MovieSceneSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): +class MovieSceneSegmentationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.movie_scene_segmentation @@ -123,10 +122,6 @@ class MovieSceneSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): else: raise ValueError('process error') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_mplug_owl_multimodal_dialogue.py b/tests/pipelines/test_mplug_owl_multimodal_dialogue.py new file mode 100644 index 00000000..57bce67e --- /dev/null +++ b/tests/pipelines/test_mplug_owl_multimodal_dialogue.py @@ -0,0 +1,100 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from PIL import Image + +from modelscope.models import Model +from modelscope.outputs import OutputKeys +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level + + +class MplugOwlMultimodalDialogueTest(unittest.TestCase): + + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_run_with_multimodal_dialogue_with_model(self): + model = Model.from_pretrained( + 'damo/multi-modal_mplug_owl_multimodal-dialogue_7b') + pipeline_multimodal_dialogue = pipeline( + task=Tasks.multimodal_dialogue, + model=model, + ) + image = 'data/resource/portrait_input.png' + system_prompt_1 = 'The following is a conversation between a curious human and AI assistant.' + system_prompt_2 = "The assistant gives helpful, detailed, and polite answers to the user's questions." + messages = { + 'messages': [ + { + 'role': 'system', + 'content': system_prompt_1 + ' ' + system_prompt_2 + }, + { + 'role': 'user', + 'content': [{ + 'image': image + }] + }, + { + 'role': 'user', + 'content': 'Describe the facial expression of the man.' + }, + ] + } + result = pipeline_multimodal_dialogue(messages) + print(result[OutputKeys.TEXT]) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_multimodal_dialogue_with_name(self): + pipeline_multimodal_dialogue = pipeline( + Tasks.multimodal_dialogue, + model='damo/multi-modal_mplug_owl_multimodal-dialogue_7b') + image = 'data/resource/portrait_input.png' + system_prompt_1 = 'The following is a conversation between a curious human and AI assistant.' + system_prompt_2 = "The assistant gives helpful, detailed, and polite answers to the user's questions." + messages = { + 'messages': [ + { + 'role': 'system', + 'content': system_prompt_1 + ' ' + system_prompt_2 + }, + { + 'role': 'user', + 'content': [{ + 'image': image + }] + }, + { + 'role': 'user', + 'content': 'Describe the facial expression of the man.' + }, + ] + } + result = pipeline_multimodal_dialogue(messages) + print(result[OutputKeys.TEXT]) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_multimodal_dialogue_with_text(self): + pipeline_multimodal_dialogue = pipeline( + Tasks.multimodal_dialogue, + model='damo/multi-modal_mplug_owl_multimodal-dialogue_7b') + system_prompt_1 = 'The following is a conversation between a curious human and AI assistant.' + system_prompt_2 = "The assistant gives helpful, detailed, and polite answers to the user's questions." + messages = { + 'messages': [ + { + 'role': 'system', + 'content': system_prompt_1 + ' ' + system_prompt_2 + }, + { + 'role': 'user', + 'content': 'Where is the captial of China?' + }, + ] + } + result = pipeline_multimodal_dialogue(messages) + print(result[OutputKeys.TEXT]) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/pipelines/test_mplug_tasks.py b/tests/pipelines/test_mplug_tasks.py index 21439ce2..cff998b4 100644 --- a/tests/pipelines/test_mplug_tasks.py +++ b/tests/pipelines/test_mplug_tasks.py @@ -7,11 +7,10 @@ from modelscope.models import Model from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck): +class MplugTasksTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_image_captioning_with_model(self): @@ -95,10 +94,6 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck): result = pipeline_vqa(input) print(result) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_msrresnetlite_video_super_resolution.py b/tests/pipelines/test_msrresnetlite_video_super_resolution.py index d79e9702..d44cbd34 100644 --- a/tests/pipelines/test_msrresnetlite_video_super_resolution.py +++ b/tests/pipelines/test_msrresnetlite_video_super_resolution.py @@ -7,11 +7,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.cv import VideoSuperResolutionPipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class MSRResNetLiteVSRTest(unittest.TestCase, DemoCompatibilityCheck): +class MSRResNetLiteVSRTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.video_super_resolution @@ -50,10 +49,6 @@ class MSRResNetLiteVSRTest(unittest.TestCase, DemoCompatibilityCheck): input=self.test_video)[OutputKeys.OUTPUT_VIDEO] print('pipeline: the output video path is {}'.format(out_video_path)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_multi_modal_embedding.py b/tests/pipelines/test_multi_modal_embedding.py index 7eddc690..486adc94 100644 --- a/tests/pipelines/test_multi_modal_embedding.py +++ b/tests/pipelines/test_multi_modal_embedding.py @@ -8,11 +8,10 @@ from modelscope.models import Model from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class MultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck): +class MultiModalEmbeddingTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.multi_modal_embedding @@ -54,10 +53,6 @@ class MultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck): print('l2-norm: {}'.format(torch.norm(text_embedding, dim=-1).item())) # should be 1.0 - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_multilingual_named_entity_recognition.py b/tests/pipelines/test_multilingual_named_entity_recognition.py index ec134023..a31adf1f 100644 --- a/tests/pipelines/test_multilingual_named_entity_recognition.py +++ b/tests/pipelines/test_multilingual_named_entity_recognition.py @@ -8,12 +8,10 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import NamedEntityRecognitionPipeline from modelscope.preprocessors import NERPreprocessorThai, NERPreprocessorViet from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class MultilingualNamedEntityRecognitionTest(unittest.TestCase, - DemoCompatibilityCheck): +class MultilingualNamedEntityRecognitionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.named_entity_recognition @@ -123,10 +121,6 @@ class MultilingualNamedEntityRecognitionTest(unittest.TestCase, self.viet_sentence[5:] ])) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_multilingual_word_segmentation.py b/tests/pipelines/test_multilingual_word_segmentation.py index f10e6d98..878af0d3 100644 --- a/tests/pipelines/test_multilingual_word_segmentation.py +++ b/tests/pipelines/test_multilingual_word_segmentation.py @@ -8,12 +8,11 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import WordSegmentationThaiPipeline from modelscope.preprocessors import WordSegmentationPreprocessorThai from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.regress_test_utils import MsRegressTool from modelscope.utils.test_utils import test_level -class WordSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): +class WordSegmentationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.word_segmentation @@ -65,10 +64,6 @@ class WordSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): pipeline_ins( input=[self.sentence, self.sentence[:10], self.sentence[6:]])) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_named_entity_recognition.py b/tests/pipelines/test_named_entity_recognition.py index 175e9261..8b7424f4 100644 --- a/tests/pipelines/test_named_entity_recognition.py +++ b/tests/pipelines/test_named_entity_recognition.py @@ -10,11 +10,10 @@ from modelscope.pipelines.nlp import NamedEntityRecognitionPipeline from modelscope.preprocessors import \ TokenClassificationTransformersPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class NamedEntityRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): +class NamedEntityRecognitionTest(unittest.TestCase): language_examples = { 'zh': '新华社北京二月十一日电(记者唐虹)', @@ -470,10 +469,6 @@ class NamedEntityRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): model_id) print(pipeline_ins(input=sentence)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_nerf_recon_acc.py b/tests/pipelines/test_nerf_recon_acc.py index 95d879fb..7ca0fa44 100644 --- a/tests/pipelines/test_nerf_recon_acc.py +++ b/tests/pipelines/test_nerf_recon_acc.py @@ -9,11 +9,10 @@ from modelscope.msdatasets import MsDataset from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import DownloadMode, Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class NeRFReconAccTest(unittest.TestCase, DemoCompatibilityCheck): +class NeRFReconAccTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_nerf-3d-reconstruction-accelerate_damo' @@ -63,11 +62,6 @@ class NeRFReconAccTest(unittest.TestCase, DemoCompatibilityCheck): dict(data_dir=self.data_dir, render_dir=self.render_dir)) print('facefusion.test_run_modelhub_default_model done') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - @unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest only') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_nli.py b/tests/pipelines/test_nli.py index a7d2a236..5bbe353b 100644 --- a/tests/pipelines/test_nli.py +++ b/tests/pipelines/test_nli.py @@ -7,12 +7,11 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import TextClassificationPipeline from modelscope.preprocessors import TextClassificationTransformersPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool from modelscope.utils.test_utils import test_level -class NLITest(unittest.TestCase, DemoCompatibilityCheck): +class NLITest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.nli @@ -78,10 +77,6 @@ class NLITest(unittest.TestCase, DemoCompatibilityCheck): pipeline_ins = pipeline(task=Tasks.nli) print(pipeline_ins(input=(self.sentence1, self.sentence2))) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_object_detecion_3d.py b/tests/pipelines/test_object_detecion_3d.py index bb0eebda..69b75b39 100644 --- a/tests/pipelines/test_object_detecion_3d.py +++ b/tests/pipelines/test_object_detecion_3d.py @@ -10,11 +10,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ObjectDetection3DTest(unittest.TestCase, DemoCompatibilityCheck): +class ObjectDetection3DTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.object_detection_3d @@ -48,10 +47,6 @@ class ObjectDetection3DTest(unittest.TestCase, DemoCompatibilityCheck): detect = pipeline(self.task) self.pipeline_inference(detect, idx) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_object_detection.py b/tests/pipelines/test_object_detection.py index e4bf6b54..f06d954b 100644 --- a/tests/pipelines/test_object_detection.py +++ b/tests/pipelines/test_object_detection.py @@ -3,11 +3,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck): +class ObjectDetectionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.human_detection @@ -43,12 +42,9 @@ class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck): result = human_detect(input_location) print(result) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_image_object_detection_auto_pipeline(self): + # TODO: to be fixed in the future model_id = 'damo/cv_yolox_image-object-detection-auto' test_image = 'data/test/images/auto_demo.jpg' @@ -59,7 +55,7 @@ class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck): image_object_detection_auto.show_result(test_image, result, 'auto_demo_ret.jpg') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skip('skip test in current test level: no pipeline implemented') def test_image_object_detection_dino_pipeline(self): model_id = 'damo/cv_swinl_image-object-detection_dino' test_image = 'data/test/images/image_detection.jpg' diff --git a/tests/pipelines/test_ocr_detection.py b/tests/pipelines/test_ocr_detection.py index 243e274b..0ed2e59c 100644 --- a/tests/pipelines/test_ocr_detection.py +++ b/tests/pipelines/test_ocr_detection.py @@ -4,11 +4,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class OCRDetectionTest(unittest.TestCase, DemoCompatibilityCheck): +class OCRDetectionTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_resnet18_ocr-detection-line-level_damo' @@ -43,10 +42,6 @@ class OCRDetectionTest(unittest.TestCase, DemoCompatibilityCheck): ocr_detection = pipeline(Tasks.ocr_detection) self.pipeline_inference(ocr_detection, self.test_image) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_ocr_recognition.py b/tests/pipelines/test_ocr_recognition.py index 145ae22a..94ee521f 100644 --- a/tests/pipelines/test_ocr_recognition.py +++ b/tests/pipelines/test_ocr_recognition.py @@ -6,14 +6,13 @@ import PIL from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class OCRRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): +class OCRRecognitionTest(unittest.TestCase): def setUp(self) -> None: - self.model_id = 'damo/cv_crnn_ocr-recognition-general_damo' + self.model_id = 'damo/cv_convnextTiny_ocr-recognition-general_damo' self.test_image = 'data/test/images/ocr_recognition.jpg' self.task = Tasks.ocr_recognition @@ -26,7 +25,47 @@ class OCRRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): ocr_recognition = pipeline( Tasks.ocr_recognition, model=self.model_id, - model_revision='v2.2.1') + model_revision='v2.3.0') + self.pipeline_inference(ocr_recognition, self.test_image) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_from_modelhub_handwritten(self): + ocr_recognition = pipeline( + Tasks.ocr_recognition, + model='damo/cv_convnextTiny_ocr-recognition-handwritten_damo', + model_revision='v2.3.0') + self.pipeline_inference(ocr_recognition, self.test_image) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_from_modelhub_scene(self): + ocr_recognition = pipeline( + Tasks.ocr_recognition, + model='damo/cv_convnextTiny_ocr-recognition-scene_damo', + model_revision='v2.3.0') + self.pipeline_inference(ocr_recognition, self.test_image) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_from_modelhub_document(self): + ocr_recognition = pipeline( + Tasks.ocr_recognition, + model='damo/cv_convnextTiny_ocr-recognition-document_damo', + model_revision='v2.3.0') + self.pipeline_inference(ocr_recognition, self.test_image) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_from_modelhub_licenseplate(self): + ocr_recognition = pipeline( + Tasks.ocr_recognition, + model='damo/cv_convnextTiny_ocr-recognition-licenseplate_damo', + model_revision='v2.3.0') + self.pipeline_inference(ocr_recognition, self.test_image) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_from_modelhub_crnn(self): + ocr_recognition = pipeline( + Tasks.ocr_recognition, + model='damo/cv_crnn_ocr-recognition-general_damo', + model_revision='v2.2.2') self.pipeline_inference(ocr_recognition, self.test_image) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') @@ -34,7 +73,7 @@ class OCRRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): ocr_recognition = pipeline( Tasks.ocr_recognition, model=self.model_id, - model_revision='v2.2.1') + model_revision='v2.3.0') imagePIL = PIL.Image.open(self.test_image) self.pipeline_inference(ocr_recognition, imagePIL) @@ -44,9 +83,75 @@ class OCRRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): Tasks.ocr_recognition, model_revision='v2.3.0') self.pipeline_inference(ocr_recognition, self.test_image) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_from_modelhub_cpu(self): + ocr_recognition = pipeline( + Tasks.ocr_recognition, + model=self.model_id, + model_revision='v2.3.0', + device='cpu') + self.pipeline_inference(ocr_recognition, self.test_image) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_from_modelhub_handwritten_cpu(self): + ocr_recognition = pipeline( + Tasks.ocr_recognition, + model='damo/cv_convnextTiny_ocr-recognition-handwritten_damo', + model_revision='v2.3.0', + device='cpu') + self.pipeline_inference(ocr_recognition, self.test_image) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_from_modelhub_scene_cpu(self): + ocr_recognition = pipeline( + Tasks.ocr_recognition, + model='damo/cv_convnextTiny_ocr-recognition-scene_damo', + model_revision='v2.3.0', + device='cpu') + self.pipeline_inference(ocr_recognition, self.test_image) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_from_modelhub_document_cpu(self): + ocr_recognition = pipeline( + Tasks.ocr_recognition, + model='damo/cv_convnextTiny_ocr-recognition-document_damo', + model_revision='v2.3.0', + device='cpu') + self.pipeline_inference(ocr_recognition, self.test_image) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_from_modelhub_licenseplate_cpu(self): + ocr_recognition = pipeline( + Tasks.ocr_recognition, + model='damo/cv_convnextTiny_ocr-recognition-licenseplate_damo', + model_revision='v2.3.0', + device='cpu') + self.pipeline_inference(ocr_recognition, self.test_image) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_from_modelhub_crnn_cpu(self): + ocr_recognition = pipeline( + Tasks.ocr_recognition, + model='damo/cv_crnn_ocr-recognition-general_damo', + model_revision='v2.2.2', + device='cpu') + self.pipeline_inference(ocr_recognition, self.test_image) + + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_run_with_model_from_modelhub_PILinput_cpu(self): + ocr_recognition = pipeline( + Tasks.ocr_recognition, + model=self.model_id, + model_revision='v2.3.0', + device='cpu') + imagePIL = PIL.Image.open(self.test_image) + self.pipeline_inference(ocr_recognition, imagePIL) + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_run_modelhub_default_model_cpu(self): + ocr_recognition = pipeline( + Tasks.ocr_recognition, model_revision='v2.3.0', device='cpu') + self.pipeline_inference(ocr_recognition, self.test_image) if __name__ == '__main__': diff --git a/tests/pipelines/test_ofa_tasks.py b/tests/pipelines/test_ofa_tasks.py index df1b5647..55c3ae65 100644 --- a/tests/pipelines/test_ofa_tasks.py +++ b/tests/pipelines/test_ofa_tasks.py @@ -11,11 +11,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import created_boxed_image -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck): +class OfaTasksTest(unittest.TestCase): def setUp(self) -> None: self.output_dir = 'unittest_output' @@ -366,10 +365,6 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck): for r in result: print(r[OutputKeys.TEXT]) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_panorama_depth_estimation.py b/tests/pipelines/test_panorama_depth_estimation.py index 99e575e3..23552274 100644 --- a/tests/pipelines/test_panorama_depth_estimation.py +++ b/tests/pipelines/test_panorama_depth_estimation.py @@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import depth_to_color -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class PanoramaDepthEstimationTest(unittest.TestCase, DemoCompatibilityCheck): +class PanoramaDepthEstimationTest(unittest.TestCase): def setUp(self) -> None: self.task = 'panorama-depth-estimation' diff --git a/tests/pipelines/test_pedestrian_attribute_recognition.py b/tests/pipelines/test_pedestrian_attribute_recognition.py index c0ace43c..7d58ce12 100644 --- a/tests/pipelines/test_pedestrian_attribute_recognition.py +++ b/tests/pipelines/test_pedestrian_attribute_recognition.py @@ -8,12 +8,10 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import draw_pedestrian_attribute -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class PedestrianAttributeRecognitionTest(unittest.TestCase, - DemoCompatibilityCheck): +class PedestrianAttributeRecognitionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.pedestrian_attribute_recognition @@ -39,10 +37,6 @@ class PedestrianAttributeRecognitionTest(unittest.TestCase, self.pipeline_inference(pedestrian_attribute_recognition, Image.open(self.test_image)) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_person_image_cartoon.py b/tests/pipelines/test_person_image_cartoon.py index 1dfaf519..40d26c86 100644 --- a/tests/pipelines/test_person_image_cartoon.py +++ b/tests/pipelines/test_person_image_cartoon.py @@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ImageCartoonTest(unittest.TestCase, DemoCompatibilityCheck): +class ImageCartoonTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_unet_person-image-cartoon_compound-models' @@ -83,10 +82,6 @@ class ImageCartoonTest(unittest.TestCase, DemoCompatibilityCheck): img_cartoon = pipeline(Tasks.image_portrait_stylization) self.pipeline_inference(img_cartoon, self.test_image) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_pointcloud_sceneflow_estimation.py b/tests/pipelines/test_pointcloud_sceneflow_estimation.py index 34d87f09..4d4bf7f2 100644 --- a/tests/pipelines/test_pointcloud_sceneflow_estimation.py +++ b/tests/pipelines/test_pointcloud_sceneflow_estimation.py @@ -7,12 +7,10 @@ import numpy as np from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class PointCloudSceneFlowEstimationTest(unittest.TestCase, - DemoCompatibilityCheck): +class PointCloudSceneFlowEstimationTest(unittest.TestCase): def setUp(self) -> None: self.task = 'pointcloud-sceneflow-estimation' diff --git a/tests/pipelines/test_product_retrieval_embedding.py b/tests/pipelines/test_product_retrieval_embedding.py index 2483d53a..f194bb7b 100644 --- a/tests/pipelines/test_product_retrieval_embedding.py +++ b/tests/pipelines/test_product_retrieval_embedding.py @@ -8,11 +8,10 @@ from modelscope.models import Model from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ProductRetrievalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck): +class ProductRetrievalEmbeddingTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.product_retrieval_embedding @@ -41,10 +40,6 @@ class ProductRetrievalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck): result = product_embed(self.img_input)[OutputKeys.IMG_EMBEDDING] print('abs sum value is: {}'.format(np.sum(np.abs(result)))) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_realtime_object_detection.py b/tests/pipelines/test_realtime_object_detection.py index 498c09d8..21450619 100644 --- a/tests/pipelines/test_realtime_object_detection.py +++ b/tests/pipelines/test_realtime_object_detection.py @@ -7,14 +7,13 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import realtime_object_detection_bbox_vis -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.logger import get_logger from modelscope.utils.test_utils import test_level logger = get_logger() -class RealtimeObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck): +class RealtimeObjectDetectionTest(unittest.TestCase): def setUp(self) -> None: self.easycv_small_model_id = 'damo/cv_cspnet_image-object-detection_yolox' @@ -22,7 +21,7 @@ class RealtimeObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck): self.test_image = 'data/test/images/keypoints_detect/000000438862.jpg' self.task = Tasks.image_object_detection - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skip('skip test in current test level: no pipeline implemented') def test_run_easycv_yolox(self): realtime_object_detection = pipeline( Tasks.image_object_detection, model=self.easycv_small_model_id) @@ -34,7 +33,7 @@ class RealtimeObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck): else: raise ValueError('process error') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skip('skip test in current test level: no pipeline implemented') def test_run_easycv_yolox_nano(self): realtime_object_detection = pipeline( Tasks.image_object_detection, model=self.easycv_nano_model_id) @@ -46,10 +45,6 @@ class RealtimeObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck): else: raise ValueError('process error') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_realtime_video_object_detection.py b/tests/pipelines/test_realtime_video_object_detection.py index 716c9260..d42bda67 100644 --- a/tests/pipelines/test_realtime_video_object_detection.py +++ b/tests/pipelines/test_realtime_video_object_detection.py @@ -9,15 +9,13 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import show_video_object_detection_result -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.logger import get_logger from modelscope.utils.test_utils import test_level logger = get_logger() -class RealtimeVideoObjectDetectionTest(unittest.TestCase, - DemoCompatibilityCheck): +class RealtimeVideoObjectDetectionTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_cspnet_video-object-detection_streamyolo' @@ -53,10 +51,6 @@ class RealtimeVideoObjectDetectionTest(unittest.TestCase, else: raise ValueError('process error') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_referring_video_object_segmentation.py b/tests/pipelines/test_referring_video_object_segmentation.py index 509e9317..2b7de41c 100644 --- a/tests/pipelines/test_referring_video_object_segmentation.py +++ b/tests/pipelines/test_referring_video_object_segmentation.py @@ -3,12 +3,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ReferringVideoObjectSegmentationTest(unittest.TestCase, - DemoCompatibilityCheck): +class ReferringVideoObjectSegmentationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.referring_video_object_segmentation @@ -45,10 +43,6 @@ class ReferringVideoObjectSegmentationTest(unittest.TestCase, else: raise ValueError('process error') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_relation_extraction.py b/tests/pipelines/test_relation_extraction.py index 17ab61fc..44c0b9ad 100644 --- a/tests/pipelines/test_relation_extraction.py +++ b/tests/pipelines/test_relation_extraction.py @@ -8,11 +8,10 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import InformationExtractionPipeline from modelscope.preprocessors import RelationExtractionTransformersPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class RelationExtractionTest(unittest.TestCase, DemoCompatibilityCheck): +class RelationExtractionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.relation_extraction @@ -55,10 +54,6 @@ class RelationExtractionTest(unittest.TestCase, DemoCompatibilityCheck): pipeline_ins = pipeline(task=Tasks.relation_extraction) print(pipeline_ins(input=self.sentence)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_salient_detection.py b/tests/pipelines/test_salient_detection.py index 3101213c..78ae94db 100644 --- a/tests/pipelines/test_salient_detection.py +++ b/tests/pipelines/test_salient_detection.py @@ -4,11 +4,10 @@ import unittest from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class SalientDetectionTest(unittest.TestCase, DemoCompatibilityCheck): +class SalientDetectionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.semantic_segmentation @@ -44,10 +43,6 @@ class SalientDetectionTest(unittest.TestCase, DemoCompatibilityCheck): cv2.imwrite(input_location + '_camouflag.jpg', result[OutputKeys.MASKS]) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_sentence_similarity.py b/tests/pipelines/test_sentence_similarity.py index 233bd3a1..e411158f 100644 --- a/tests/pipelines/test_sentence_similarity.py +++ b/tests/pipelines/test_sentence_similarity.py @@ -11,12 +11,11 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import TextClassificationPipeline from modelscope.preprocessors import TextClassificationTransformersPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool from modelscope.utils.test_utils import test_level -class SentenceSimilarityTest(unittest.TestCase, DemoCompatibilityCheck): +class SentenceSimilarityTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.sentence_similarity @@ -110,10 +109,6 @@ class SentenceSimilarityTest(unittest.TestCase, DemoCompatibilityCheck): pipeline_ins = pipeline(task=Tasks.sentence_similarity) print(pipeline_ins(input=(self.sentence1, self.sentence2))) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_sentiment_classification.py b/tests/pipelines/test_sentiment_classification.py index 278f34a8..bb0311ff 100644 --- a/tests/pipelines/test_sentiment_classification.py +++ b/tests/pipelines/test_sentiment_classification.py @@ -9,12 +9,10 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import TextClassificationPipeline from modelscope.preprocessors import TextClassificationTransformersPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class SentimentClassificationTaskModelTest(unittest.TestCase, - DemoCompatibilityCheck): +class SentimentClassificationTaskModelTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.text_classification @@ -63,10 +61,6 @@ class SentimentClassificationTaskModelTest(unittest.TestCase, self.assertTrue( isinstance(pipeline_ins.model, ModelForTextClassification)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_siamese_uie.py b/tests/pipelines/test_siamese_uie.py index 30b38d2e..c5008573 100644 --- a/tests/pipelines/test_siamese_uie.py +++ b/tests/pipelines/test_siamese_uie.py @@ -10,12 +10,11 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import SiameseUiePipeline from modelscope.preprocessors import SiameseUiePreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool from modelscope.utils.test_utils import test_level -class ZeroShotClassificationTest(unittest.TestCase, DemoCompatibilityCheck): +class ZeroShotClassificationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.siamese_uie @@ -67,10 +66,6 @@ class ZeroShotClassificationTest(unittest.TestCase, DemoCompatibilityCheck): pipeline_ins = pipeline(task=Tasks.siamese_uie, model_revision='v1.1') print(pipeline_ins(input=self.sentence, schema=self.schema)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_skin_retouching.py b/tests/pipelines/test_skin_retouching.py index db8d89ed..aa1e0c59 100644 --- a/tests/pipelines/test_skin_retouching.py +++ b/tests/pipelines/test_skin_retouching.py @@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class SkinRetouchingTest(unittest.TestCase, DemoCompatibilityCheck): +class SkinRetouchingTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.skin_retouching @@ -41,10 +40,6 @@ class SkinRetouchingTest(unittest.TestCase, DemoCompatibilityCheck): skin_retouching = pipeline(Tasks.skin_retouching) self.pipeline_inference(skin_retouching, self.test_image) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_soonet_video_temporal_grounding.py b/tests/pipelines/test_soonet_video_temporal_grounding.py index 21f8027c..4fafeb31 100644 --- a/tests/pipelines/test_soonet_video_temporal_grounding.py +++ b/tests/pipelines/test_soonet_video_temporal_grounding.py @@ -5,12 +5,10 @@ from modelscope.models import Model from modelscope.models.multi_modal.soonet import SOONet from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class SOONetVideoTemporalGroundingTest(unittest.TestCase, - DemoCompatibilityCheck): +class SOONetVideoTemporalGroundingTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.video_temporal_grounding diff --git a/tests/pipelines/test_speaker_verification.py b/tests/pipelines/test_speaker_verification.py index 83d8aff3..2b90c66e 100644 --- a/tests/pipelines/test_speaker_verification.py +++ b/tests/pipelines/test_speaker_verification.py @@ -1,13 +1,11 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -import os.path import unittest -from typing import Any, Dict, List +from typing import Any, Dict, List, Union from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.logger import get_logger from modelscope.utils.test_utils import test_level @@ -16,17 +14,28 @@ logger = get_logger() SPEAKER1_A_EN_16K_WAV = 'data/test/audios/speaker1_a_en_16k.wav' SPEAKER1_B_EN_16K_WAV = 'data/test/audios/speaker1_b_en_16k.wav' SPEAKER2_A_EN_16K_WAV = 'data/test/audios/speaker2_a_en_16k.wav' +SCL_EXAMPLE_WAV = 'data/test/audios/scl_example1.wav' -class SpeakerVerificationTest(unittest.TestCase, DemoCompatibilityCheck): +class SpeakerVerificationTest(unittest.TestCase): ecapatdnn_voxceleb_16k_model_id = 'damo/speech_ecapa-tdnn_sv_en_voxceleb_16k' campplus_voxceleb_16k_model_id = 'damo/speech_campplus_sv_en_voxceleb_16k' + rdino_voxceleb_16k_model_id = 'damo/speech_rdino_ecapa_tdnn_sv_en_voxceleb_16k' + speaker_change_locating_cn_model_id = 'damo/speech_campplus-transformer_scl_zh-cn_16k-common' + eres2net_voxceleb_16k_model_id = 'damo/speech_eres2net_sv_en_voxceleb_16k' def setUp(self) -> None: self.task = Tasks.speaker_verification - def run_pipeline(self, model_id: str, audios: List[str]) -> Dict[str, Any]: - p = pipeline(task=self.task, model=model_id) + def run_pipeline(self, + model_id: str, + audios: Union[List[str], str], + task: str = None, + model_revision=None) -> Dict[str, Any]: + if task is not None: + self.task = task + p = pipeline( + task=self.task, model=model_id, model_revision=model_revision) result = p(audios) return result @@ -51,9 +60,36 @@ class SpeakerVerificationTest(unittest.TestCase, DemoCompatibilityCheck): print(result) self.assertTrue(OutputKeys.SCORE in result) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_speaker_verification_rdino_voxceleb_16k(self): + logger.info('Run speaker verification for rdino_voxceleb_16k model') + result = self.run_pipeline( + model_id=self.rdino_voxceleb_16k_model_id, + audios=[SPEAKER1_A_EN_16K_WAV, SPEAKER1_B_EN_16K_WAV], + model_revision='v1.0.1') + print(result) + self.assertTrue(OutputKeys.SCORE in result) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_speaker_change_locating_cn_16k(self): + logger.info( + 'Run speaker change locating for campplus-transformer model') + result = self.run_pipeline( + model_id=self.speaker_change_locating_cn_model_id, + task=Tasks.speaker_diarization, + audios=SCL_EXAMPLE_WAV) + print(result) + self.assertTrue(OutputKeys.TEXT in result) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_speaker_verification_eres2net_voxceleb_16k(self): + logger.info('Run speaker verification for eres2net_voxceleb_16k model') + result = self.run_pipeline( + model_id=self.eres2net_voxceleb_16k_model_id, + audios=[SPEAKER1_A_EN_16K_WAV, SPEAKER1_B_EN_16K_WAV], + model_revision='v1.0.2') + print(result) + self.assertTrue(OutputKeys.SCORE in result) if __name__ == '__main__': diff --git a/tests/pipelines/test_speech_separation.py b/tests/pipelines/test_speech_separation.py index 194f84a8..4edb3b43 100644 --- a/tests/pipelines/test_speech_separation.py +++ b/tests/pipelines/test_speech_separation.py @@ -8,13 +8,12 @@ import numpy from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level MIX_SPEECH_FILE = 'data/test/audios/mix_speech.wav' -class SpeechSeparationTest(unittest.TestCase, DemoCompatibilityCheck): +class SpeechSeparationTest(unittest.TestCase): def setUp(self) -> None: pass @@ -32,10 +31,6 @@ class SpeechSeparationTest(unittest.TestCase, DemoCompatibilityCheck): sf.write(save_file, numpy.frombuffer(signal, dtype=numpy.int16), 8000) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_speech_signal_process.py b/tests/pipelines/test_speech_signal_process.py index 2c26cee6..104bf88a 100644 --- a/tests/pipelines/test_speech_signal_process.py +++ b/tests/pipelines/test_speech_signal_process.py @@ -7,7 +7,6 @@ from modelscope.metainfo import Pipelines from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level NEAREND_MIC_FILE = 'data/test/audios/nearend_mic.wav' @@ -24,7 +23,7 @@ NOISE_SPEECH_URL = 'https://modelscope.oss-cn-beijing.aliyuncs.com/' \ 'test/audios/speech_with_noise.wav' -class SpeechSignalProcessTest(unittest.TestCase, DemoCompatibilityCheck): +class SpeechSignalProcessTest(unittest.TestCase): def setUp(self) -> None: pass @@ -150,10 +149,6 @@ class SpeechSignalProcessTest(unittest.TestCase, DemoCompatibilityCheck): w.write(pcm) audio = f.read(block_size) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_table_question_answering.py b/tests/pipelines/test_table_question_answering.py index 4d6eff24..d688ef23 100644 --- a/tests/pipelines/test_table_question_answering.py +++ b/tests/pipelines/test_table_question_answering.py @@ -40,7 +40,7 @@ def tableqa_tracking_and_print_results_with_history( print('question', question) print('sql text:', output_dict[OutputKeys.SQL_STRING]) print('sql query:', output_dict[OutputKeys.SQL_QUERY]) - print('query result:', output_dict[OutputKeys.QUERT_RESULT]) + print('query result:', output_dict[OutputKeys.QUERY_RESULT]) print('json dumps', json.dumps(output_dict, ensure_ascii=False)) print() historical_queries = output_dict[OutputKeys.HISTORY] @@ -66,7 +66,7 @@ def tableqa_tracking_and_print_results_without_history( print('question', question) print('sql text:', output_dict[OutputKeys.SQL_STRING]) print('sql query:', output_dict[OutputKeys.SQL_QUERY]) - print('query result:', output_dict[OutputKeys.QUERT_RESULT]) + print('query result:', output_dict[OutputKeys.QUERY_RESULT]) print('json dumps', json.dumps(output_dict, ensure_ascii=False)) print() @@ -99,7 +99,7 @@ def tableqa_tracking_and_print_results_with_tableid( print('question', question) print('sql text:', output_dict[OutputKeys.SQL_STRING]) print('sql query:', output_dict[OutputKeys.SQL_QUERY]) - print('query result:', output_dict[OutputKeys.QUERT_RESULT]) + print('query result:', output_dict[OutputKeys.QUERY_RESULT]) print('json dumps', json.dumps(output_dict, ensure_ascii=False)) print() historical_queries = output_dict[OutputKeys.HISTORY] @@ -135,7 +135,7 @@ class TableQuestionAnswering(unittest.TestCase): 'history_sql': None }) print(i, result[OutputKeys.OUTPUT][OutputKeys.SQL_QUERY], - result[OutputKeys.OUTPUT][OutputKeys.QUERT_RESULT], + result[OutputKeys.OUTPUT][OutputKeys.QUERY_RESULT], json.dumps(result)) procs = [] diff --git a/tests/pipelines/test_table_recognition.py b/tests/pipelines/test_table_recognition.py index 3c6ee74a..6b81fc62 100644 --- a/tests/pipelines/test_table_recognition.py +++ b/tests/pipelines/test_table_recognition.py @@ -5,11 +5,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class TableRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): +class TableRecognitionTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_dla34_table-structure-recognition_cycle-centernet' @@ -32,10 +31,6 @@ class TableRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): table_recognition = pipeline(Tasks.table_recognition) self.pipeline_inference(table_recognition, self.test_image) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_tbs_detection.py b/tests/pipelines/test_tbs_detection.py index ac0dd550..0d5a1283 100644 --- a/tests/pipelines/test_tbs_detection.py +++ b/tests/pipelines/test_tbs_detection.py @@ -2,11 +2,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck): +class ObjectDetectionTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_model_name(self): diff --git a/tests/pipelines/test_text2text_generation.py b/tests/pipelines/test_text2text_generation.py index 40576a29..d439e033 100644 --- a/tests/pipelines/test_text2text_generation.py +++ b/tests/pipelines/test_text2text_generation.py @@ -8,11 +8,10 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import TextGenerationT5Pipeline from modelscope.preprocessors import TextGenerationT5Preprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class Text2TextGenerationTest(unittest.TestCase, DemoCompatibilityCheck): +class Text2TextGenerationTest(unittest.TestCase): def setUp(self) -> None: self.model_id_generate = 'damo/t5-cn-base-test' @@ -86,10 +85,6 @@ class Text2TextGenerationTest(unittest.TestCase, DemoCompatibilityCheck): pipeline_ins = pipeline(task=Tasks.text2text_generation) print(pipeline_ins(self.input_generate)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_text_classification.py b/tests/pipelines/test_text_classification.py index d07ddbb8..128f86af 100644 --- a/tests/pipelines/test_text_classification.py +++ b/tests/pipelines/test_text_classification.py @@ -7,11 +7,10 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import TextClassificationPipeline from modelscope.preprocessors import TextClassificationTransformersPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class SequenceClassificationTest(unittest.TestCase, DemoCompatibilityCheck): +class SequenceClassificationTest(unittest.TestCase): sentence1 = 'i like this wonderful place' def setUp(self) -> None: @@ -91,10 +90,6 @@ class SequenceClassificationTest(unittest.TestCase, DemoCompatibilityCheck): result = text_classification(dataset) self.printDataset(result) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_text_driven_segmentation.py b/tests/pipelines/test_text_driven_segmentation.py index a67729ff..741787d9 100644 --- a/tests/pipelines/test_text_driven_segmentation.py +++ b/tests/pipelines/test_text_driven_segmentation.py @@ -23,10 +23,6 @@ class TextDrivenSegmentationTest(unittest.TestCase): # result[OutputKeys.MASKS] is segment map result,other keys are not used cv2.imwrite(input_location + '_lseg.jpg', result[OutputKeys.MASKS]) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.test_demo() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_text_error_correction.py b/tests/pipelines/test_text_error_correction.py index 171f3ab2..b4bf5be9 100644 --- a/tests/pipelines/test_text_error_correction.py +++ b/tests/pipelines/test_text_error_correction.py @@ -9,11 +9,10 @@ from modelscope.pipelines.nlp import TextErrorCorrectionPipeline from modelscope.preprocessors import (Preprocessor, TextErrorCorrectionPreprocessor) from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class TextErrorCorrectionTest(unittest.TestCase, DemoCompatibilityCheck): +class TextErrorCorrectionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.text_error_correction @@ -81,10 +80,6 @@ class TextErrorCorrectionTest(unittest.TestCase, DemoCompatibilityCheck): task=Tasks.text_error_correction, model=self.law_model_id) print(pipeline_ins(self.input_law)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_text_generation.py b/tests/pipelines/test_text_generation.py index 998cbd18..378b1bbc 100644 --- a/tests/pipelines/test_text_generation.py +++ b/tests/pipelines/test_text_generation.py @@ -8,11 +8,10 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import TextGenerationPipeline from modelscope.preprocessors import TextGenerationTransformersPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class TextGenerationTest(unittest.TestCase, DemoCompatibilityCheck): +class TextGenerationTest(unittest.TestCase): def setUp(self) -> None: self.palm_model_id_zh_base = 'damo/nlp_palm2.0_text-generation_chinese-base' @@ -261,10 +260,6 @@ class TextGenerationTest(unittest.TestCase, DemoCompatibilityCheck): model='damo/nlp_gpt2_text-generation_english-base') print(pipe('My name is Teven and I am')) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_text_to_image_synthesis.py b/tests/pipelines/test_text_to_image_synthesis.py index 5e28282b..63c38571 100644 --- a/tests/pipelines/test_text_to_image_synthesis.py +++ b/tests/pipelines/test_text_to_image_synthesis.py @@ -8,11 +8,10 @@ from modelscope.models import Model from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class TextToImageSynthesisTest(unittest.TestCase, DemoCompatibilityCheck): +class TextToImageSynthesisTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.text_to_image_synthesis @@ -61,10 +60,6 @@ class TextToImageSynthesisTest(unittest.TestCase, DemoCompatibilityCheck): self.test_text)[OutputKeys.OUTPUT_IMGS][0] print(np.sum(np.abs(img))) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_text_to_speech.py b/tests/pipelines/test_text_to_speech.py index f746dfbe..528977ce 100644 --- a/tests/pipelines/test_text_to_speech.py +++ b/tests/pipelines/test_text_to_speech.py @@ -11,7 +11,6 @@ import torch from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.logger import get_logger from modelscope.utils.test_utils import test_level @@ -20,8 +19,7 @@ import tensorflow as tf # isort:skip logger = get_logger() -class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase, - DemoCompatibilityCheck): +class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.text_to_speech @@ -109,10 +107,6 @@ class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase, with open(f'output_{self.test_model_name[i]}', 'wb') as f: f.write(wav) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_text_to_video_synthesis.py b/tests/pipelines/test_text_to_video_synthesis.py index 6463c155..97ef6089 100644 --- a/tests/pipelines/test_text_to_video_synthesis.py +++ b/tests/pipelines/test_text_to_video_synthesis.py @@ -5,11 +5,10 @@ import unittest from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class TextToVideoSynthesisTest(unittest.TestCase, DemoCompatibilityCheck): +class TextToVideoSynthesisTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.text_to_video_synthesis @@ -27,10 +26,6 @@ class TextToVideoSynthesisTest(unittest.TestCase, DemoCompatibilityCheck): self.test_text)[OutputKeys.OUTPUT_VIDEO] print(output_video_path) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_tinymog_face_detection.py b/tests/pipelines/test_tinymog_face_detection.py index e80fa482..48e74f44 100644 --- a/tests/pipelines/test_tinymog_face_detection.py +++ b/tests/pipelines/test_tinymog_face_detection.py @@ -8,11 +8,10 @@ from modelscope.msdatasets import MsDataset from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import draw_face_detection_result -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class TinyMogFaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck): +class TinyMogFaceDetectionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.face_detection @@ -48,10 +47,6 @@ class TinyMogFaceDetectionTest(unittest.TestCase, DemoCompatibilityCheck): result = face_detection(self.img_path) self.show_result(self.img_path, result) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_tinynas_classification.py b/tests/pipelines/test_tinynas_classification.py index ebc6b722..300bd2b1 100644 --- a/tests/pipelines/test_tinynas_classification.py +++ b/tests/pipelines/test_tinynas_classification.py @@ -4,11 +4,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class TinyNASClassificationTest(unittest.TestCase, DemoCompatibilityCheck): +class TinyNASClassificationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_classification @@ -21,10 +20,6 @@ class TinyNASClassificationTest(unittest.TestCase, DemoCompatibilityCheck): result = tinynas_classification('data/test/images/image_wolf.jpeg') print(result) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_tinynas_detection.py b/tests/pipelines/test_tinynas_detection.py index f7c513ff..08c746ea 100644 --- a/tests/pipelines/test_tinynas_detection.py +++ b/tests/pipelines/test_tinynas_detection.py @@ -7,11 +7,10 @@ from PIL import Image from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class TinynasObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck): +class TinynasObjectDetectionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_object_detection @@ -52,10 +51,6 @@ class TinynasObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck): 'data/test/images/image_detection.jpg') print('damoyolo-t', result) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_image_object_detection_auto_pipeline(self): test_image = 'data/test/images/image_detection.jpg' diff --git a/tests/pipelines/test_traffic_sign_detection.py b/tests/pipelines/test_traffic_sign_detection.py index 5404649d..efedec14 100644 --- a/tests/pipelines/test_traffic_sign_detection.py +++ b/tests/pipelines/test_traffic_sign_detection.py @@ -7,20 +7,15 @@ from PIL import Image from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class TrafficSignDetectionTest(unittest.TestCase, DemoCompatibilityCheck): +class TrafficSignDetectionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.domain_specific_object_detection self.model_id = 'damo/cv_tinynas_object-detection_damoyolo_traffic_sign' - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_traffic_sign_detection_damoyolo(self): tinynas_object_detection = pipeline( diff --git a/tests/pipelines/test_translation_evaluation.py b/tests/pipelines/test_translation_evaluation.py index 53524fdc..e936f41a 100644 --- a/tests/pipelines/test_translation_evaluation.py +++ b/tests/pipelines/test_translation_evaluation.py @@ -2,14 +2,13 @@ import unittest -from modelscope.models.nlp.unite.configuration_unite import EvaluationMode +from modelscope.models.nlp.unite.configuration import InputFormat from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class TranslationEvaluationTest(unittest.TestCase, DemoCompatibilityCheck): +class TranslationEvaluationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.translation_evaluation @@ -18,7 +17,7 @@ class TranslationEvaluationTest(unittest.TestCase, DemoCompatibilityCheck): @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_model_name_for_unite_large(self): - input = { + input_dict = { 'hyp': [ 'This is a sentence.', 'This is another sentence.', @@ -34,27 +33,27 @@ class TranslationEvaluationTest(unittest.TestCase, DemoCompatibilityCheck): } pipeline_ins = pipeline(self.task, model=self.model_id_large) - print(pipeline_ins(input=input)) + print(pipeline_ins(input_dict)['score']) - pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.SRC) - print(pipeline_ins(input=input)) + pipeline_ins.change_input_format(input_format=InputFormat.SRC) + print(pipeline_ins(input_dict)['score']) - pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.REF) - print(pipeline_ins(input=input)) + pipeline_ins.change_input_format(input_format=InputFormat.REF) + print(pipeline_ins(input_dict)['score']) pipeline_ins = pipeline( self.task, model=self.model_id_large, device='cpu') - print(pipeline_ins(input=input)) + print(pipeline_ins(input_dict)['score']) - pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.SRC) - print(pipeline_ins(input=input)) + pipeline_ins.change_input_format(input_format=InputFormat.SRC) + print(pipeline_ins(input_dict)['score']) - pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.REF) - print(pipeline_ins(input=input)) + pipeline_ins.change_input_format(input_format=InputFormat.REF) + print(pipeline_ins(input_dict)['score']) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_model_name_for_unite_base(self): - input = { + input_dict = { 'hyp': [ 'This is a sentence.', 'This is another sentence.', @@ -70,23 +69,23 @@ class TranslationEvaluationTest(unittest.TestCase, DemoCompatibilityCheck): } pipeline_ins = pipeline(self.task, model=self.model_id_base) - print(pipeline_ins(input=input)) + print(pipeline_ins(input_dict)['score']) - pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.SRC) - print(pipeline_ins(input=input)) + pipeline_ins.change_input_format(input_format=InputFormat.SRC) + print(pipeline_ins(input_dict)['score']) - pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.REF) - print(pipeline_ins(input=input)) + pipeline_ins.change_input_format(input_format=InputFormat.REF) + print(pipeline_ins(input_dict)['score']) pipeline_ins = pipeline( self.task, model=self.model_id_base, device='cpu') - print(pipeline_ins(input=input)) + print(pipeline_ins(input_dict)['score']) - pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.SRC) - print(pipeline_ins(input=input)) + pipeline_ins.change_input_format(input_format=InputFormat.SRC) + print(pipeline_ins(input_dict)['score']) - pipeline_ins.change_eval_mode(eval_mode=EvaluationMode.REF) - print(pipeline_ins(input=input)) + pipeline_ins.change_input_format(input_format=InputFormat.REF) + print(pipeline_ins(input_dict)['score']) if __name__ == '__main__': diff --git a/tests/pipelines/test_translation_quality_estimation.py b/tests/pipelines/test_translation_quality_estimation.py index 315fa72b..0890f31b 100644 --- a/tests/pipelines/test_translation_quality_estimation.py +++ b/tests/pipelines/test_translation_quality_estimation.py @@ -3,12 +3,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class TranslationQualityEstimationTest(unittest.TestCase, - DemoCompatibilityCheck): +class TranslationQualityEstimationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.sentence_similarity @@ -23,10 +21,6 @@ class TranslationQualityEstimationTest(unittest.TestCase, pipeline_ins = pipeline(self.task, model=self.model_id) print(pipeline_ins(input=inputs)) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_unifold.py b/tests/pipelines/test_unifold.py index cf67929d..98d2c1ce 100644 --- a/tests/pipelines/test_unifold.py +++ b/tests/pipelines/test_unifold.py @@ -4,11 +4,10 @@ import unittest from modelscope.hub.snapshot_download import snapshot_download from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class UnifoldProteinStructureTest(unittest.TestCase, DemoCompatibilityCheck): +class UnifoldProteinStructureTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.protein_structure diff --git a/tests/pipelines/test_universal_matting.py b/tests/pipelines/test_universal_matting.py index 5868cf36..1450d938 100644 --- a/tests/pipelines/test_universal_matting.py +++ b/tests/pipelines/test_universal_matting.py @@ -8,11 +8,10 @@ from modelscope.msdatasets import MsDataset from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import ModelFile, Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class UniversalMattingTest(unittest.TestCase, DemoCompatibilityCheck): +class UniversalMattingTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_unet_universal-matting' @@ -35,10 +34,6 @@ class UniversalMattingTest(unittest.TestCase, DemoCompatibilityCheck): cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG]) print(f'Output written to {osp.abspath("result.png")}') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_user_satisfaction_estimation.py b/tests/pipelines/test_user_satisfaction_estimation.py index 2bbfd5d7..2904ea30 100644 --- a/tests/pipelines/test_user_satisfaction_estimation.py +++ b/tests/pipelines/test_user_satisfaction_estimation.py @@ -6,12 +6,10 @@ from modelscope.models import Model from modelscope.pipelines import pipeline from modelscope.preprocessors import DialogueClassificationUsePreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class UserSatisfactionEstimationTest(unittest.TestCase, - DemoCompatibilityCheck): +class UserSatisfactionEstimationTest(unittest.TestCase): model_id = 'damo/nlp_user-satisfaction-estimation_chinese' input_dialogue = [('返修退换货咨询|||', '手机有质量问题怎么办|||稍等,我看下', '开不开机了|||', @@ -33,10 +31,6 @@ class UserSatisfactionEstimationTest(unittest.TestCase, task=Tasks.text_classification, model=self.model_id) print(pipeline_ins(input=self.input_dialogue)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - print(self.compatibility_check()) - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_video_category.py b/tests/pipelines/test_video_category.py index 660196b8..61ee72b0 100644 --- a/tests/pipelines/test_video_category.py +++ b/tests/pipelines/test_video_category.py @@ -3,11 +3,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VideoCategoryTest(unittest.TestCase, DemoCompatibilityCheck): +class VideoCategoryTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.video_category @@ -21,10 +20,6 @@ class VideoCategoryTest(unittest.TestCase, DemoCompatibilityCheck): print(f'video category output: {result}.') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_video_colorization.py b/tests/pipelines/test_video_colorization.py index c35577a4..fe6c0f87 100644 --- a/tests/pipelines/test_video_colorization.py +++ b/tests/pipelines/test_video_colorization.py @@ -11,11 +11,10 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.base import Pipeline from modelscope.pipelines.cv import VideoColorizationPipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VideoColorizationTest(unittest.TestCase, DemoCompatibilityCheck): +class VideoColorizationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.video_colorization @@ -44,10 +43,6 @@ class VideoColorizationTest(unittest.TestCase, DemoCompatibilityCheck): video_colorization = pipeline(Tasks.video_colorization) self.pipeline_inference(video_colorization, self.test_video) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_video_deinterlace.py b/tests/pipelines/test_video_deinterlace.py index bcb36cc3..267d4664 100644 --- a/tests/pipelines/test_video_deinterlace.py +++ b/tests/pipelines/test_video_deinterlace.py @@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.cv import VideoDeinterlacePipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VideoDeinterlaceTest(unittest.TestCase, DemoCompatibilityCheck): +class VideoDeinterlaceTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.video_deinterlace @@ -52,10 +51,6 @@ class VideoDeinterlaceTest(unittest.TestCase, DemoCompatibilityCheck): input=self.test_video)[OutputKeys.OUTPUT_VIDEO] print('pipeline: the output video path is {}'.format(out_video_path)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_video_depth_estimation.py b/tests/pipelines/test_video_depth_estimation.py index 30ca3b33..6a054b2b 100644 --- a/tests/pipelines/test_video_depth_estimation.py +++ b/tests/pipelines/test_video_depth_estimation.py @@ -5,11 +5,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import show_video_depth_estimation_result -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VideoDepthEstimationTest(unittest.TestCase, DemoCompatibilityCheck): +class VideoDepthEstimationTest(unittest.TestCase): def setUp(self) -> None: self.task = 'video-depth-estimation' diff --git a/tests/pipelines/test_video_frame_interpolation.py b/tests/pipelines/test_video_frame_interpolation.py index c23aa46a..11a4f568 100644 --- a/tests/pipelines/test_video_frame_interpolation.py +++ b/tests/pipelines/test_video_frame_interpolation.py @@ -8,11 +8,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.cv import VideoFrameInterpolationPipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VideoFrameInterpolationTest(unittest.TestCase, DemoCompatibilityCheck): +class VideoFrameInterpolationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.video_frame_interpolation @@ -58,10 +57,6 @@ class VideoFrameInterpolationTest(unittest.TestCase, DemoCompatibilityCheck): input=self.test_video)[OutputKeys.OUTPUT_VIDEO] print('pipeline: the output video path is {}'.format(out_video_path)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_video_instance_segmentation.py b/tests/pipelines/test_video_instance_segmentation.py index 0a76d260..465cf26f 100644 --- a/tests/pipelines/test_video_instance_segmentation.py +++ b/tests/pipelines/test_video_instance_segmentation.py @@ -3,11 +3,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VideoInstanceSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): +class VideoInstanceSegmentationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.video_panoptic_segmentation @@ -33,10 +32,6 @@ class VideoInstanceSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): print(f'video instance segmentation output:\n {result}.') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_video_multi_modal_embedding.py b/tests/pipelines/test_video_multi_modal_embedding.py index afe5940d..fe87b089 100644 --- a/tests/pipelines/test_video_multi_modal_embedding.py +++ b/tests/pipelines/test_video_multi_modal_embedding.py @@ -4,14 +4,13 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.logger import get_logger from modelscope.utils.test_utils import test_level logger = get_logger() -class VideoMultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck): +class VideoMultiModalEmbeddingTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.video_multi_modal_embedding @@ -41,10 +40,6 @@ class VideoMultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck): logger.info('video feature: {}'.format( output['video_embedding'][0][0][0])) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_video_multi_object_tracking.py b/tests/pipelines/test_video_multi_object_tracking.py index 97f1e705..f63fd8b1 100644 --- a/tests/pipelines/test_video_multi_object_tracking.py +++ b/tests/pipelines/test_video_multi_object_tracking.py @@ -4,11 +4,10 @@ import unittest from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class MultiObjectTracking(unittest.TestCase, DemoCompatibilityCheck): +class MultiObjectTracking(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.video_multi_object_tracking @@ -34,10 +33,6 @@ class MultiObjectTracking(unittest.TestCase, DemoCompatibilityCheck): in result) assert len(result[OutputKeys.LABELS]) == len(result[OutputKeys.BOXES]) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_video_object_segmentation.py b/tests/pipelines/test_video_object_segmentation.py index e4adeb26..6f0e7c2a 100644 --- a/tests/pipelines/test_video_object_segmentation.py +++ b/tests/pipelines/test_video_object_segmentation.py @@ -9,11 +9,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import masks_visualization -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VideoObjectSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): +class VideoObjectSegmentationTest(unittest.TestCase): def setUp(self) -> None: self.task = 'video-object-segmentation' diff --git a/tests/pipelines/test_video_panoptic_segmentation.py b/tests/pipelines/test_video_panoptic_segmentation.py index ad038135..cc805812 100644 --- a/tests/pipelines/test_video_panoptic_segmentation.py +++ b/tests/pipelines/test_video_panoptic_segmentation.py @@ -3,11 +3,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VideoPanopticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): +class VideoPanopticSegmentationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.video_panoptic_segmentation @@ -32,10 +31,6 @@ class VideoPanopticSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): print(f'video summarization output:\n {result}.') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_video_single_object_tracking.py b/tests/pipelines/test_video_single_object_tracking.py index e75ccbb0..c8331649 100644 --- a/tests/pipelines/test_video_single_object_tracking.py +++ b/tests/pipelines/test_video_single_object_tracking.py @@ -5,11 +5,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import show_video_tracking_result -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class SingleObjectTracking(unittest.TestCase, DemoCompatibilityCheck): +class SingleObjectTracking(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.video_single_object_tracking @@ -46,10 +45,6 @@ class SingleObjectTracking(unittest.TestCase, DemoCompatibilityCheck): result = video_single_object_tracking((video_path, init_bbox)) print('result is : ', result[OutputKeys.BOXES]) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_video_stabilization.py b/tests/pipelines/test_video_stabilization.py index d102f3e1..26501c2d 100644 --- a/tests/pipelines/test_video_stabilization.py +++ b/tests/pipelines/test_video_stabilization.py @@ -7,11 +7,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.cv import VideoStabilizationPipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VideoStabilizationTest(unittest.TestCase, DemoCompatibilityCheck): +class VideoStabilizationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.video_stabilization @@ -42,10 +41,6 @@ class VideoStabilizationTest(unittest.TestCase, DemoCompatibilityCheck): input=self.test_video)[OutputKeys.OUTPUT_VIDEO] print('pipeline: the output video path is {}'.format(out_video_path)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_video_summarization.py b/tests/pipelines/test_video_summarization.py index 1f965c53..dc6a3a80 100644 --- a/tests/pipelines/test_video_summarization.py +++ b/tests/pipelines/test_video_summarization.py @@ -3,11 +3,10 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VideoSummarizationTest(unittest.TestCase, DemoCompatibilityCheck): +class VideoSummarizationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.video_summarization @@ -30,10 +29,6 @@ class VideoSummarizationTest(unittest.TestCase, DemoCompatibilityCheck): print(f'video summarization output:\n {result}.') - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_video_super_resolution.py b/tests/pipelines/test_video_super_resolution.py index 0da18dd7..2e207887 100644 --- a/tests/pipelines/test_video_super_resolution.py +++ b/tests/pipelines/test_video_super_resolution.py @@ -7,11 +7,10 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.cv import VideoSuperResolutionPipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VideoSuperResolutionTest(unittest.TestCase, DemoCompatibilityCheck): +class VideoSuperResolutionTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.video_super_resolution @@ -50,10 +49,6 @@ class VideoSuperResolutionTest(unittest.TestCase, DemoCompatibilityCheck): input=self.test_video)[OutputKeys.OUTPUT_VIDEO] print('pipeline: the output video path is {}'.format(out_video_path)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_vidt_face.py b/tests/pipelines/test_vidt_face.py index 8640d128..e49d9de9 100644 --- a/tests/pipelines/test_vidt_face.py +++ b/tests/pipelines/test_vidt_face.py @@ -5,11 +5,10 @@ from modelscope.models import Model from modelscope.models.cv.vidt import VidtModel from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VidtTest(unittest.TestCase, DemoCompatibilityCheck): +class VidtTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_object_detection diff --git a/tests/pipelines/test_vidt_logo.py b/tests/pipelines/test_vidt_logo.py index 143eb205..fce6fe48 100644 --- a/tests/pipelines/test_vidt_logo.py +++ b/tests/pipelines/test_vidt_logo.py @@ -5,11 +5,10 @@ from modelscope.models import Model from modelscope.models.cv.vidt import VidtModel from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VidtTest(unittest.TestCase, DemoCompatibilityCheck): +class VidtTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_object_detection diff --git a/tests/pipelines/test_virtual_try_on.py b/tests/pipelines/test_virtual_try_on.py index 5c18dcc4..c8a55f79 100644 --- a/tests/pipelines/test_virtual_try_on.py +++ b/tests/pipelines/test_virtual_try_on.py @@ -8,11 +8,10 @@ from PIL import Image from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VirtualTryonTest(unittest.TestCase, DemoCompatibilityCheck): +class VirtualTryonTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.virtual_try_on @@ -36,10 +35,6 @@ class VirtualTryonTest(unittest.TestCase, DemoCompatibilityCheck): img = pipeline_virtual_tryon(self.input_imgs)[OutputKeys.OUTPUT_IMG] cv2.imwrite('demo.jpg', img[:, :, ::-1]) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_vision_efficient_tuning.py b/tests/pipelines/test_vision_efficient_tuning.py index c88ed478..acfbb235 100644 --- a/tests/pipelines/test_vision_efficient_tuning.py +++ b/tests/pipelines/test_vision_efficient_tuning.py @@ -6,11 +6,10 @@ from modelscope.models.cv.vision_efficient_tuning.model import \ VisionEfficientTuningModel from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck): +class VisionEfficientTuningTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.vision_efficient_tuning @@ -29,11 +28,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck): model = Model.from_pretrained(model_id) self.assertTrue(model.__class__ == VisionEfficientTuningModel) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_vision_efficient_tuning_adapter_demo_compatibility(self): - self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-adapter' - self.compatibility_check() - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_vision_efficient_tuning_lora_run_pipeline(self): model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-lora' @@ -48,11 +42,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck): model = Model.from_pretrained(model_id) self.assertTrue(model.__class__ == VisionEfficientTuningModel) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_vision_efficient_tuning_lora_demo_compatibility(self): - self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-lora' - self.compatibility_check() - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_vision_efficient_tuning_prefix_run_pipeline(self): model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-prefix' @@ -67,11 +56,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck): model = Model.from_pretrained(model_id) self.assertTrue(model.__class__ == VisionEfficientTuningModel) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_vision_efficient_tuning_prefix_demo_compatibility(self): - self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-prefix' - self.compatibility_check() - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_vision_efficient_tuning_prompt_run_pipeline(self): model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-prompt' @@ -86,11 +70,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck): model = Model.from_pretrained(model_id) self.assertTrue(model.__class__ == VisionEfficientTuningModel) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_vision_efficient_tuning_prompt_demo_compatibility(self): - self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-prompt' - self.compatibility_check() - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_vision_efficient_tuning_bitfit_run_pipeline(self): model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-bitfit' @@ -105,11 +84,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck): model = Model.from_pretrained(model_id) self.assertTrue(model.__class__ == VisionEfficientTuningModel) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_vision_efficient_tuning_bitfit_demo_compatibility(self): - self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-bitfit' - self.compatibility_check() - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_vision_efficient_tuning_sidetuning_run_pipeline(self): model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-sidetuning' @@ -125,11 +99,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck): model = Model.from_pretrained(model_id) self.assertTrue(model.__class__ == VisionEfficientTuningModel) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_vision_efficient_tuning_sidetuning_demo_compatibility(self): - self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-sidetuning' - self.compatibility_check() - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_vision_efficient_tuning_utuning_run_pipeline(self): model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-utuning' @@ -144,11 +113,6 @@ class VisionEfficientTuningTest(unittest.TestCase, DemoCompatibilityCheck): model = Model.from_pretrained(model_id) self.assertTrue(model.__class__ == VisionEfficientTuningModel) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_vision_efficient_tuning_utuning_demo_compatibility(self): - self.model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-utuning' - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_vision_middleware.py b/tests/pipelines/test_vision_middleware.py index b3531154..e8c1218b 100644 --- a/tests/pipelines/test_vision_middleware.py +++ b/tests/pipelines/test_vision_middleware.py @@ -5,11 +5,10 @@ from modelscope.models import Model from modelscope.models.cv.vision_middleware import VisionMiddlewareModel from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VisionMiddlewareTest(unittest.TestCase, DemoCompatibilityCheck): +class VisionMiddlewareTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.image_segmentation diff --git a/tests/pipelines/test_vop_retrieval.py b/tests/pipelines/test_vop_retrieval.py index c9c356c5..78e7eecc 100644 --- a/tests/pipelines/test_vop_retrieval.py +++ b/tests/pipelines/test_vop_retrieval.py @@ -5,11 +5,10 @@ from modelscope.models import Model from modelscope.models.cv.vop_retrieval import VoP from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VopRetrievalTest(unittest.TestCase, DemoCompatibilityCheck): +class VopRetrievalTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.vop_retrieval diff --git a/tests/pipelines/test_vop_retrieval_sebias.py b/tests/pipelines/test_vop_retrieval_sebias.py index bea1bc45..a129f7f0 100644 --- a/tests/pipelines/test_vop_retrieval_sebias.py +++ b/tests/pipelines/test_vop_retrieval_sebias.py @@ -5,11 +5,10 @@ from modelscope.models import Model from modelscope.models.cv.vop_retrieval import VideoTextRetrievalModelSeries from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VopRetrievalTest(unittest.TestCase, DemoCompatibilityCheck): +class VopRetrievalTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.vop_retrieval diff --git a/tests/pipelines/test_vop_retrieval_separtial.py b/tests/pipelines/test_vop_retrieval_separtial.py index 942fbd3b..c5832aaa 100644 --- a/tests/pipelines/test_vop_retrieval_separtial.py +++ b/tests/pipelines/test_vop_retrieval_separtial.py @@ -5,11 +5,10 @@ from modelscope.models import Model from modelscope.models.cv.vop_retrieval import VideoTextRetrievalModelSeries from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VopRetrievalTest(unittest.TestCase, DemoCompatibilityCheck): +class VopRetrievalTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.vop_retrieval diff --git a/tests/pipelines/test_vop_retrieval_seproj.py b/tests/pipelines/test_vop_retrieval_seproj.py index a371ac36..2fceb2e7 100644 --- a/tests/pipelines/test_vop_retrieval_seproj.py +++ b/tests/pipelines/test_vop_retrieval_seproj.py @@ -5,11 +5,10 @@ from modelscope.models import Model from modelscope.models.cv.vop_retrieval import VideoTextRetrievalModelSeries from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class VopRetrievalTest(unittest.TestCase, DemoCompatibilityCheck): +class VopRetrievalTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.vop_retrieval diff --git a/tests/pipelines/test_wenet_automatic_speech_recognition.py b/tests/pipelines/test_wenet_automatic_speech_recognition.py index 4adf8119..ac47cea7 100644 --- a/tests/pipelines/test_wenet_automatic_speech_recognition.py +++ b/tests/pipelines/test_wenet_automatic_speech_recognition.py @@ -10,7 +10,6 @@ import soundfile from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import ColorCodes, Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.logger import get_logger from modelscope.utils.test_utils import download_and_untar, test_level @@ -20,8 +19,7 @@ WAV_FILE = 'data/test/audios/asr_example.wav' URL_FILE = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example.wav' -class WeNetAutomaticSpeechRecognitionTest(unittest.TestCase, - DemoCompatibilityCheck): +class WeNetAutomaticSpeechRecognitionTest(unittest.TestCase): action_info = { 'test_run_with_pcm': { 'checking_item': OutputKeys.TEXT, diff --git a/tests/pipelines/test_word_segmentation.py b/tests/pipelines/test_word_segmentation.py index f8bdaef7..f8c9e078 100644 --- a/tests/pipelines/test_word_segmentation.py +++ b/tests/pipelines/test_word_segmentation.py @@ -10,12 +10,11 @@ from modelscope.pipelines.nlp import WordSegmentationPipeline from modelscope.preprocessors import \ TokenClassificationTransformersPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool from modelscope.utils.test_utils import test_level -class WordSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): +class WordSegmentationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.word_segmentation @@ -164,10 +163,6 @@ class WordSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): pipeline_ins = pipeline(task=Tasks.word_segmentation) print(pipeline_ins(input=self.sentence)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_zero_shot_classification.py b/tests/pipelines/test_zero_shot_classification.py index f9a52b42..89832d18 100644 --- a/tests/pipelines/test_zero_shot_classification.py +++ b/tests/pipelines/test_zero_shot_classification.py @@ -9,12 +9,11 @@ from modelscope.pipelines.nlp import ZeroShotClassificationPipeline from modelscope.preprocessors import \ ZeroShotClassificationTransformersPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.regress_test_utils import IgnoreKeyFn, MsRegressTool from modelscope.utils.test_utils import test_level -class ZeroShotClassificationTest(unittest.TestCase, DemoCompatibilityCheck): +class ZeroShotClassificationTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.zero_shot_classification @@ -79,10 +78,6 @@ class ZeroShotClassificationTest(unittest.TestCase, DemoCompatibilityCheck): pipeline_ins = pipeline(task=Tasks.zero_shot_classification) print(pipeline_ins(input=self.sentence, candidate_labels=self.labels)) - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - if __name__ == '__main__': unittest.main() diff --git a/tests/run_config.yaml b/tests/run_config.yaml index 773c6397..ba678468 100644 --- a/tests/run_config.yaml +++ b/tests/run_config.yaml @@ -21,6 +21,7 @@ isolated: # test cases that may require excessive anmount of GPU memory or run - test_image_instance_segmentation_trainer.py - test_image_portrait_enhancement_trainer.py - test_translation_trainer.py + - test_translation_evaluation_trainer.py - test_unifold.py - test_automatic_post_editing.py - test_mplug_tasks.py @@ -66,7 +67,7 @@ isolated: # test cases that may require excessive anmount of GPU memory or run envs: default: # default env, case not in other env will in default, pytorch. dependencies: # requirement packages,pip install before test case run. - - numpy>=1.20 + - numpy>=1.20,<=1.21.0 - protobuf<4,>=3.20.2 tensorflow1x: # cases excuted tensorflow1.x framework. requirements: # requirements files run before test case run. @@ -77,6 +78,7 @@ envs: - test_text_to_speech.py - test_csanmt_translation.py - test_translation_trainer.py + - test_translation_evaluation_trainer.py - test_ocr_detection.py - test_automatic_speech_recognition.py - test_image_matting.py @@ -85,3 +87,21 @@ envs: - test_image_style_transfer.py - test_image_portrait_stylization_trainer.py - test_language_identification.py + - test_language_guided_video_summarization_trainer.py + - test_motion_generation.py + - test_universal_matting.py + - test_dialog_modeling.py + - test_trainer.py + - test_abnormal_object_detection.py + - test_image_face_fusion.py + - test_ocr_detection_db_trainer.py + - test_language_guided_video_summarization.py + - test_interactive_translation_pipeline.py + - test_image_defrcn_fewshot_trainer.py + - test_automatic_post_editing.py + - test_human_reconstruction.py + - test_nerf_recon_acc_trainer.py + - test_nerf_recon_acc.py + - test_speech_signal_process.py + - test_tensorboard_hook.py + - test_efficient_diffusion_tuning_trainer.py diff --git a/tests/trainers/audio/test_kws_farfield_trainer.py b/tests/trainers/audio/test_kws_farfield_trainer.py index cc2b38f6..9bf65e04 100644 --- a/tests/trainers/audio/test_kws_farfield_trainer.py +++ b/tests/trainers/audio/test_kws_farfield_trainer.py @@ -23,6 +23,7 @@ class TestKwsFarfieldTrainer(unittest.TestCase): if not os.path.exists(self.tmp_dir): os.makedirs(self.tmp_dir) self.model_id = 'damo/speech_dfsmn_kws_char_farfield_16k_nihaomiya' + self.model_id_iot = 'damo/speech_dfsmn_kws_char_farfield_iot_16k_nihaomiya' train_pos_list = self.create_list('pos.list', POS_FILE) train_neg_list = self.create_list('neg.list', NEG_FILE) @@ -83,3 +84,23 @@ class TestKwsFarfieldTrainer(unittest.TestCase): f'work_dir:{self.tmp_dir}') self.assertIn('val_dataset.bin', results_files, f'work_dir:{self.tmp_dir}') + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_normal_iot(self): + kwargs = dict( + model=self.model_id_iot, + work_dir=self.tmp_dir, + workers=2, + max_epochs=2, + train_iters_per_epoch=2, + val_iters_per_epoch=1, + custom_conf=self.custom_conf) + + trainer = build_trainer( + Trainers.speech_dfsmn_kws_char_farfield, default_args=kwargs) + trainer.train() + results_files = os.listdir(self.tmp_dir) + self.assertIn(f'{trainer.timestamp}.log.json', results_files, + f'work_dir:{self.tmp_dir}') + self.assertIn('val_dataset.bin', results_files, + f'work_dir:{self.tmp_dir}') diff --git a/tests/trainers/easycv/__init__.py b/tests/trainers/cli/__init__.py similarity index 100% rename from tests/trainers/easycv/__init__.py rename to tests/trainers/cli/__init__.py diff --git a/tests/trainers/cli/test_cli.py b/tests/trainers/cli/test_cli.py new file mode 100644 index 00000000..b9fb7539 --- /dev/null +++ b/tests/trainers/cli/test_cli.py @@ -0,0 +1,52 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +import json + +from modelscope import MsDataset, TrainingArgs, build_dataset_from_file +from modelscope.utils.test_utils import test_level + + +class TestCli(unittest.TestCase): + + def setUp(self) -> None: + content = [{ + 'dataset': { + 'dataset_name': 'clue', + 'subset_name': 'cmnli', + 'split': 'train', + }, + 'column_mapping': { + 'sentence1': 'sentence1', + 'sentence2': 'sentence2', + 'label': 'label', + }, + 'split': 0.8, + }, { + 'dataset': { + 'dataset_name': 'glue', + 'subset_name': 'mnli', + 'split': 'validation_matched', + }, + 'column_mapping': { + 'premise': 'sentence1', + 'hypothesis': 'sentence2', + 'label': 'label', + }, + 'split': 'val', + }] + with open('./dataset.json', 'w') as f: + json.dump(content, f) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_merge_dataset_from_file(self): + dataset = MsDataset.load('clue', subset_name='cmnli', split='train') + dataset2 = MsDataset.load( + 'glue', subset_name='mnli', split='validation_matched') + training_args = TrainingArgs(dataset_json_file='./dataset.json') + train, test = build_dataset_from_file(training_args.dataset_json_file) + self.assertEqual(len(train) + len(test), len(dataset) + len(dataset2)) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/trainers/easycv/test_easycv_trainer.py b/tests/trainers/easycv/test_easycv_trainer.py deleted file mode 100644 index 11f9a739..00000000 --- a/tests/trainers/easycv/test_easycv_trainer.py +++ /dev/null @@ -1,238 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import glob -import os -import shutil -import tempfile -import unittest - -import json -import torch - -from modelscope.metainfo import Models, Pipelines, Trainers -from modelscope.msdatasets import MsDataset -from modelscope.trainers import build_trainer -from modelscope.utils.config import Config -from modelscope.utils.constant import LogKeys, ModeKeys, Tasks -from modelscope.utils.logger import get_logger -from modelscope.utils.test_utils import DistributedTestCase, test_level -from modelscope.utils.torch_utils import is_master - - -def train_func(work_dir, dist=False, log_interval=3, imgs_per_gpu=4): - import easycv - config_path = os.path.join( - os.path.dirname(easycv.__file__), - 'configs/detection/yolox/yolox_s_8xb16_300e_coco.py') - - cfg = Config.from_file(config_path) - - cfg.log_config.update( - dict(hooks=[ - dict(type='TextLoggerHook'), - dict(type='TensorboardLoggerHook') - ])) # not support TensorboardLoggerHookV2 - - ms_cfg_file = os.path.join(work_dir, 'ms_yolox_s_8xb16_300e_coco.json') - from easycv.utils.ms_utils import to_ms_config - - if is_master(): - to_ms_config( - cfg, - dump=True, - task=Tasks.image_object_detection, - ms_model_name=Models.yolox, - pipeline_name=Pipelines.easycv_detection, - save_path=ms_cfg_file) - - trainer_name = Trainers.easycv - train_dataset = MsDataset.load( - dataset_name='small_coco_for_test', namespace='EasyCV', split='train') - eval_dataset = MsDataset.load( - dataset_name='small_coco_for_test', - namespace='EasyCV', - split='validation') - - cfg_options = { - 'train.max_epochs': - 2, - 'train.dataloader.batch_size_per_gpu': - imgs_per_gpu, - 'evaluation.dataloader.batch_size_per_gpu': - 2, - 'train.hooks': [ - { - 'type': 'CheckpointHook', - 'interval': 1 - }, - { - 'type': 'EvaluationHook', - 'interval': 1 - }, - { - 'type': 'TextLoggerHook', - 'ignore_rounding_keys': None, - 'interval': log_interval - }, - ] - } - kwargs = dict( - cfg_file=ms_cfg_file, - train_dataset=train_dataset, - eval_dataset=eval_dataset, - work_dir=work_dir, - cfg_options=cfg_options, - launcher='pytorch' if dist else None) - - trainer = build_trainer(trainer_name, kwargs) - trainer.train() - - -@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest') -class EasyCVTrainerTestSingleGpu(unittest.TestCase): - - def setUp(self): - self.logger = get_logger() - self.logger.info(('Testing %s.%s' % - (type(self).__name__, self._testMethodName))) - self.tmp_dir = tempfile.TemporaryDirectory().name - if not os.path.exists(self.tmp_dir): - os.makedirs(self.tmp_dir) - - def tearDown(self): - super().tearDown() - shutil.rmtree(self.tmp_dir, ignore_errors=True) - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_single_gpu(self): - train_func(self.tmp_dir) - - results_files = os.listdir(self.tmp_dir) - json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) - self.assertEqual(len(json_files), 1) - - with open(json_files[0], 'r', encoding='utf-8') as f: - lines = [i.strip() for i in f.readlines()] - - self.assertDictContainsSubset( - { - LogKeys.MODE: ModeKeys.TRAIN, - LogKeys.EPOCH: 1, - LogKeys.ITER: 3, - LogKeys.LR: 0.00029 - }, json.loads(lines[0])) - self.assertDictContainsSubset( - { - LogKeys.MODE: ModeKeys.EVAL, - LogKeys.EPOCH: 1, - LogKeys.ITER: 10 - }, json.loads(lines[1])) - self.assertDictContainsSubset( - { - LogKeys.MODE: ModeKeys.TRAIN, - LogKeys.EPOCH: 2, - LogKeys.ITER: 3, - LogKeys.LR: 0.00205 - }, json.loads(lines[2])) - self.assertDictContainsSubset( - { - LogKeys.MODE: ModeKeys.EVAL, - LogKeys.EPOCH: 2, - LogKeys.ITER: 10 - }, json.loads(lines[3])) - self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files) - self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files) - for i in [0, 2]: - self.assertIn(LogKeys.DATA_LOAD_TIME, lines[i]) - self.assertIn(LogKeys.ITER_TIME, lines[i]) - self.assertIn(LogKeys.MEMORY, lines[i]) - self.assertIn('total_loss', lines[i]) - for i in [1, 3]: - self.assertIn( - 'CocoDetectionEvaluator_DetectionBoxes_Precision/mAP', - lines[i]) - self.assertIn('DetectionBoxes_Precision/mAP', lines[i]) - self.assertIn('DetectionBoxes_Precision/mAP@.50IOU', lines[i]) - self.assertIn('DetectionBoxes_Precision/mAP@.75IOU', lines[i]) - self.assertIn('DetectionBoxes_Precision/mAP (small)', lines[i]) - - -@unittest.skipIf(not torch.cuda.is_available() - or torch.cuda.device_count() <= 1, 'distributed unittest') -class EasyCVTrainerTestMultiGpus(DistributedTestCase): - - def setUp(self): - self.logger = get_logger() - self.logger.info(('Testing %s.%s' % - (type(self).__name__, self._testMethodName))) - self.tmp_dir = tempfile.TemporaryDirectory().name - if not os.path.exists(self.tmp_dir): - os.makedirs(self.tmp_dir) - - def tearDown(self): - super().tearDown() - shutil.rmtree(self.tmp_dir, ignore_errors=True) - - @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') - def test_multi_gpus(self): - self.start( - train_func, - num_gpus=2, - work_dir=self.tmp_dir, - dist=True, - log_interval=2, - imgs_per_gpu=5) - - results_files = os.listdir(self.tmp_dir) - json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) - self.assertEqual(len(json_files), 1) - - with open(json_files[0], 'r', encoding='utf-8') as f: - lines = [i.strip() for i in f.readlines()] - - self.assertDictContainsSubset( - { - LogKeys.MODE: ModeKeys.TRAIN, - LogKeys.EPOCH: 1, - LogKeys.ITER: 2, - LogKeys.LR: 0.0002 - }, json.loads(lines[0])) - self.assertDictContainsSubset( - { - LogKeys.MODE: ModeKeys.EVAL, - LogKeys.EPOCH: 1, - LogKeys.ITER: 5 - }, json.loads(lines[1])) - self.assertDictContainsSubset( - { - LogKeys.MODE: ModeKeys.TRAIN, - LogKeys.EPOCH: 2, - LogKeys.ITER: 2, - LogKeys.LR: 0.0018 - }, json.loads(lines[2])) - self.assertDictContainsSubset( - { - LogKeys.MODE: ModeKeys.EVAL, - LogKeys.EPOCH: 2, - LogKeys.ITER: 5 - }, json.loads(lines[3])) - - self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files) - self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files) - - for i in [0, 2]: - self.assertIn(LogKeys.DATA_LOAD_TIME, lines[i]) - self.assertIn(LogKeys.ITER_TIME, lines[i]) - self.assertIn(LogKeys.MEMORY, lines[i]) - self.assertIn('total_loss', lines[i]) - for i in [1, 3]: - self.assertIn( - 'CocoDetectionEvaluator_DetectionBoxes_Precision/mAP', - lines[i]) - self.assertIn('DetectionBoxes_Precision/mAP', lines[i]) - self.assertIn('DetectionBoxes_Precision/mAP@.50IOU', lines[i]) - self.assertIn('DetectionBoxes_Precision/mAP@.75IOU', lines[i]) - self.assertIn('DetectionBoxes_Precision/mAP (small)', lines[i]) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/trainers/easycv/test_easycv_trainer_detection_dino.py b/tests/trainers/easycv/test_easycv_trainer_detection_dino.py deleted file mode 100644 index 90d1f691..00000000 --- a/tests/trainers/easycv/test_easycv_trainer_detection_dino.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import glob -import os -import shutil -import tempfile -import unittest - -import torch - -from modelscope.metainfo import Trainers -from modelscope.msdatasets import MsDataset -from modelscope.trainers import build_trainer -from modelscope.utils.constant import LogKeys -from modelscope.utils.logger import get_logger -from modelscope.utils.test_utils import test_level - - -@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest') -class EasyCVTrainerTestDetectionDino(unittest.TestCase): - model_id = 'damo/cv_swinl_image-object-detection_dino' - - def setUp(self): - self.logger = get_logger() - self.logger.info(('Testing %s.%s' % - (type(self).__name__, self._testMethodName))) - - def _train(self, tmp_dir): - cfg_options = {'train.max_epochs': 1} - - trainer_name = Trainers.easycv - - train_dataset = MsDataset.load( - dataset_name='small_coco_for_test', - namespace='EasyCV', - split='train') - eval_dataset = MsDataset.load( - dataset_name='small_coco_for_test', - namespace='EasyCV', - split='validation') - - kwargs = dict( - model=self.model_id, - train_dataset=train_dataset, - eval_dataset=eval_dataset, - work_dir=tmp_dir, - cfg_options=cfg_options) - - trainer = build_trainer(trainer_name, kwargs) - trainer.train() - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_trainer_single_gpu(self): - temp_file_dir = tempfile.TemporaryDirectory() - tmp_dir = temp_file_dir.name - if not os.path.exists(tmp_dir): - os.makedirs(tmp_dir) - - self._train(tmp_dir) - - results_files = os.listdir(tmp_dir) - json_files = glob.glob(os.path.join(tmp_dir, '*.log.json')) - self.assertEqual(len(json_files), 1) - self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files) - - temp_file_dir.cleanup() - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py b/tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py deleted file mode 100644 index e4f0c57e..00000000 --- a/tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import glob -import os -import shutil -import tempfile -import unittest - -import torch - -from modelscope.metainfo import Trainers -from modelscope.msdatasets import MsDataset -from modelscope.trainers import build_trainer -from modelscope.utils.constant import DownloadMode, LogKeys, Tasks -from modelscope.utils.logger import get_logger -from modelscope.utils.test_utils import test_level - - -@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest') -class EasyCVTrainerTestFace2DKeypoints(unittest.TestCase): - model_id = 'damo/cv_mobilenet_face-2d-keypoints_alignment' - - def setUp(self): - self.logger = get_logger() - self.logger.info(('Testing %s.%s' % - (type(self).__name__, self._testMethodName))) - - def _train(self, tmp_dir): - cfg_options = {'train.max_epochs': 2} - - trainer_name = Trainers.easycv - - train_dataset = MsDataset.load( - dataset_name='face_2d_keypoints_dataset', - namespace='modelscope', - split='train', - download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS) - eval_dataset = MsDataset.load( - dataset_name='face_2d_keypoints_dataset', - namespace='modelscope', - split='train', - download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS) - - kwargs = dict( - model=self.model_id, - train_dataset=train_dataset, - eval_dataset=eval_dataset, - work_dir=tmp_dir, - cfg_options=cfg_options) - - trainer = build_trainer(trainer_name, kwargs) - trainer.train() - - @unittest.skip( - 'skip since face_2d_keypoints_dataset is set to private for now') - def test_trainer_single_gpu(self): - temp_file_dir = tempfile.TemporaryDirectory() - tmp_dir = temp_file_dir.name - if not os.path.exists(tmp_dir): - os.makedirs(tmp_dir) - - self._train(tmp_dir) - - results_files = os.listdir(tmp_dir) - json_files = glob.glob(os.path.join(tmp_dir, '*.log.json')) - self.assertEqual(len(json_files), 1) - self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files) - - temp_file_dir.cleanup() - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/trainers/easycv/test_easycv_trainer_hand_2d_keypoints.py b/tests/trainers/easycv/test_easycv_trainer_hand_2d_keypoints.py deleted file mode 100644 index 270ecbc4..00000000 --- a/tests/trainers/easycv/test_easycv_trainer_hand_2d_keypoints.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import glob -import os -import shutil -import tempfile -import unittest - -import torch - -from modelscope.metainfo import Trainers -from modelscope.msdatasets import MsDataset -from modelscope.trainers import build_trainer -from modelscope.utils.constant import DownloadMode, LogKeys, Tasks -from modelscope.utils.logger import get_logger -from modelscope.utils.test_utils import test_level - - -@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest') -class EasyCVTrainerTestHand2dKeypoints(unittest.TestCase): - model_id = 'damo/cv_hrnetw18_hand-pose-keypoints_coco-wholebody' - - def setUp(self): - self.logger = get_logger() - self.logger.info(('Testing %s.%s' % - (type(self).__name__, self._testMethodName))) - self.tmp_dir = tempfile.TemporaryDirectory().name - if not os.path.exists(self.tmp_dir): - os.makedirs(self.tmp_dir) - - def tearDown(self): - super().tearDown() - shutil.rmtree(self.tmp_dir, ignore_errors=True) - - def _train(self): - cfg_options = {'train.max_epochs': 20} - - trainer_name = Trainers.easycv - - train_dataset = MsDataset.load( - dataset_name='cv_hand_2d_keypoints_coco_wholebody', - namespace='chenhyer', - split='subtrain', - download_mode=DownloadMode.FORCE_REDOWNLOAD) - eval_dataset = MsDataset.load( - dataset_name='cv_hand_2d_keypoints_coco_wholebody', - namespace='chenhyer', - split='subtrain', - download_mode=DownloadMode.FORCE_REDOWNLOAD) - - kwargs = dict( - model=self.model_id, - train_dataset=train_dataset, - eval_dataset=eval_dataset, - work_dir=self.tmp_dir, - cfg_options=cfg_options) - - trainer = build_trainer(trainer_name, kwargs) - trainer.train() - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_trainer_single_gpu(self): - self._train() - - results_files = os.listdir(self.tmp_dir) - json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) - self.assertEqual(len(json_files), 1) - self.assertIn(f'{LogKeys.EPOCH}_10.pth', results_files) - self.assertIn(f'{LogKeys.EPOCH}_20.pth', results_files) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/trainers/easycv/test_easycv_trainer_hand_detection.py b/tests/trainers/easycv/test_easycv_trainer_hand_detection.py deleted file mode 100644 index 60ea1319..00000000 --- a/tests/trainers/easycv/test_easycv_trainer_hand_detection.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import glob -import os -import shutil -import tempfile -import unittest - -import torch - -from modelscope.metainfo import Trainers -from modelscope.msdatasets import MsDataset -from modelscope.trainers import build_trainer -from modelscope.utils.constant import DownloadMode, LogKeys, Tasks -from modelscope.utils.logger import get_logger -from modelscope.utils.test_utils import test_level - - -class EasyCVTrainerTestHandDetection(unittest.TestCase): - model_id = 'damo/cv_yolox-pai_hand-detection' - - def setUp(self): - self.logger = get_logger() - self.logger.info(('Testing %s.%s' % - (type(self).__name__, self._testMethodName))) - - def _train(self, tmp_dir): - cfg_options = {'train.max_epochs': 2} - - trainer_name = Trainers.easycv - - train_dataset = MsDataset.load( - dataset_name='hand_detection_dataset', split='subtrain') - eval_dataset = MsDataset.load( - dataset_name='hand_detection_dataset', split='subtrain') - - kwargs = dict( - model=self.model_id, - train_dataset=train_dataset, - eval_dataset=eval_dataset, - work_dir=tmp_dir, - cfg_options=cfg_options) - - trainer = build_trainer(trainer_name, kwargs) - trainer.train() - - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') - def test_trainer_single_gpu(self): - temp_file_dir = tempfile.TemporaryDirectory() - tmp_dir = temp_file_dir.name - if not os.path.exists(tmp_dir): - os.makedirs(tmp_dir) - - self._train(tmp_dir) - - results_files = os.listdir(tmp_dir) - # json_files = glob.glob(os.path.join(tmp_dir, '*.log.json')) - self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files) - - temp_file_dir.cleanup() - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/trainers/easycv/test_easycv_trainer_panoptic_mask2former.py b/tests/trainers/easycv/test_easycv_trainer_panoptic_mask2former.py deleted file mode 100644 index f6a6c41a..00000000 --- a/tests/trainers/easycv/test_easycv_trainer_panoptic_mask2former.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import glob -import os -import shutil -import tempfile -import unittest - -import torch -from mmcv.runner.hooks import HOOKS as MMCV_HOOKS - -from modelscope.metainfo import Trainers -from modelscope.msdatasets import MsDataset -from modelscope.trainers import build_trainer -from modelscope.utils.constant import LogKeys, Tasks -from modelscope.utils.logger import get_logger -from modelscope.utils.test_utils import test_level - - -@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest') -class EasyCVTrainerTestPanopticMask2Former(unittest.TestCase): - - def setUp(self): - self.logger = get_logger() - self.logger.info(('Testing %s.%s' % - (type(self).__name__, self._testMethodName))) - self.tmp_dir = tempfile.TemporaryDirectory().name - if not os.path.exists(self.tmp_dir): - os.makedirs(self.tmp_dir) - - def tearDown(self): - super().tearDown() - shutil.rmtree(self.tmp_dir, ignore_errors=True) - - def _train(self): - cfg_options = {'train.max_epochs': 1} - - trainer_name = Trainers.easycv - - train_dataset = MsDataset.load( - dataset_name='COCO2017_panopic_subset', split='train') - eval_dataset = MsDataset.load( - dataset_name='COCO2017_panopic_subset', split='validation') - kwargs = dict( - model='damo/cv_r50_panoptic-segmentation_cocopan', - train_dataset=train_dataset, - eval_dataset=eval_dataset, - work_dir=self.tmp_dir, - cfg_options=cfg_options) - - trainer = build_trainer(trainer_name, kwargs) - - hook_name = 'YOLOXLrUpdaterHook' - mmcv_hook = MMCV_HOOKS._module_dict.pop(hook_name, None) - - trainer.train() - - MMCV_HOOKS._module_dict[hook_name] = mmcv_hook - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_single_gpu_mask2former_r50(self): - self._train() - - results_files = os.listdir(self.tmp_dir) - json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) - self.assertEqual(len(json_files), 1) - self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/trainers/easycv/test_easycv_trainer_realtime_object_detection.py b/tests/trainers/easycv/test_easycv_trainer_realtime_object_detection.py deleted file mode 100644 index 1171eed4..00000000 --- a/tests/trainers/easycv/test_easycv_trainer_realtime_object_detection.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import glob -import os -import shutil -import tempfile -import unittest - -import torch - -from modelscope.hub.snapshot_download import snapshot_download -from modelscope.metainfo import Trainers -from modelscope.msdatasets import MsDataset -from modelscope.trainers import build_trainer -from modelscope.utils.constant import DownloadMode, LogKeys, Tasks -from modelscope.utils.logger import get_logger -from modelscope.utils.test_utils import test_level - - -@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest') -class EasyCVTrainerTestRealtimeObjectDetection(unittest.TestCase): - model_id = 'damo/cv_cspnet_image-object-detection_yolox' - - def setUp(self): - self.logger = get_logger() - self.logger.info(('Testing %s.%s' % - (type(self).__name__, self._testMethodName))) - - def _train(self, tmp_dir): - # cfg_options = {'train.max_epochs': 2} - self.cache_path = snapshot_download(self.model_id) - cfg_options = { - 'train.max_epochs': - 2, - 'train.dataloader.batch_size_per_gpu': - 4, - 'evaluation.dataloader.batch_size_per_gpu': - 2, - 'train.hooks': [ - { - 'type': 'CheckpointHook', - 'interval': 1 - }, - { - 'type': 'EvaluationHook', - 'interval': 1 - }, - { - 'type': 'TextLoggerHook', - 'ignore_rounding_keys': None, - 'interval': 2 - }, - ], - 'load_from': - os.path.join(self.cache_path, 'pytorch_model.bin') - } - - trainer_name = Trainers.easycv - - train_dataset = MsDataset.load( - dataset_name='small_coco_for_test', - namespace='EasyCV', - split='train') - eval_dataset = MsDataset.load( - dataset_name='small_coco_for_test', - namespace='EasyCV', - split='validation') - - kwargs = dict( - model=self.model_id, - # model_revision='v1.0.2', - train_dataset=train_dataset, - eval_dataset=eval_dataset, - work_dir=tmp_dir, - cfg_options=cfg_options) - - trainer = build_trainer(trainer_name, kwargs) - trainer.train() - - @unittest.skipUnless( - test_level() >= 0, - 'skip since face_2d_keypoints_dataset is set to private for now') - def test_trainer_single_gpu(self): - temp_file_dir = tempfile.TemporaryDirectory() - tmp_dir = temp_file_dir.name - if not os.path.exists(tmp_dir): - os.makedirs(tmp_dir) - - self._train(tmp_dir) - - results_files = os.listdir(tmp_dir) - json_files = glob.glob(os.path.join(tmp_dir, '*.log.json')) - self.assertEqual(len(json_files), 1) - self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files) - - temp_file_dir.cleanup() - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/trainers/easycv/test_segformer.py b/tests/trainers/easycv/test_segformer.py deleted file mode 100644 index 90a66635..00000000 --- a/tests/trainers/easycv/test_segformer.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import glob -import os -import shutil -import tempfile -import unittest - -import torch - -from modelscope.metainfo import Trainers -from modelscope.msdatasets import MsDataset -from modelscope.trainers import build_trainer -from modelscope.utils.constant import LogKeys, Tasks -from modelscope.utils.logger import get_logger -from modelscope.utils.test_utils import test_level - - -@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest') -class EasyCVTrainerTestSegformer(unittest.TestCase): - - def setUp(self): - self.logger = get_logger() - self.logger.info(('Testing %s.%s' % - (type(self).__name__, self._testMethodName))) - self.tmp_dir = tempfile.TemporaryDirectory().name - if not os.path.exists(self.tmp_dir): - os.makedirs(self.tmp_dir) - - def tearDown(self): - super().tearDown() - shutil.rmtree(self.tmp_dir, ignore_errors=True) - - def _train(self): - - cfg_options = { - 'train.max_epochs': 2, - 'model.decode_head.norm_cfg.type': 'BN' - } - - trainer_name = Trainers.easycv - train_dataset = MsDataset.load( - dataset_name='small_coco_stuff164k', - namespace='EasyCV', - split='train') - eval_dataset = MsDataset.load( - dataset_name='small_coco_stuff164k', - namespace='EasyCV', - split='validation') - kwargs = dict( - model= - 'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k', - train_dataset=train_dataset, - eval_dataset=eval_dataset, - work_dir=self.tmp_dir, - cfg_options=cfg_options) - - trainer = build_trainer(trainer_name, kwargs) - trainer.train() - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_single_gpu_segformer(self): - self._train() - - results_files = os.listdir(self.tmp_dir) - json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) - self.assertEqual(len(json_files), 1) - self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files) - self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/trainers/hooks/test_lr_scheduler_hook.py b/tests/trainers/hooks/test_lr_scheduler_hook.py index cd28b055..432fb39a 100644 --- a/tests/trainers/hooks/test_lr_scheduler_hook.py +++ b/tests/trainers/hooks/test_lr_scheduler_hook.py @@ -105,6 +105,7 @@ class LrSchedulerHookTest(unittest.TestCase): train_dataloader = trainer._build_dataloader_with_dataset( trainer.train_dataset, **trainer.cfg.train.get('dataloader', {})) trainer.register_optimizers_hook() + trainer.register_processors() trainer._hooks = [ hook for hook in trainer._hooks if hook.__class__.__name__ not in ['CheckpointHook', 'TextLoggerHook', 'IterTimerHook'] @@ -177,6 +178,7 @@ class LrSchedulerHookTest(unittest.TestCase): train_dataloader = trainer._build_dataloader_with_dataset( trainer.train_dataset, **trainer.cfg.train.get('dataloader', {})) trainer.register_optimizers_hook() + trainer.register_processors() trainer._hooks = [ hook for hook in trainer._hooks if hook.__class__.__name__ not in ['CheckpointHook', 'TextLoggerHook', 'IterTimerHook'] @@ -365,6 +367,7 @@ class PlateauLrSchedulerHookTest(unittest.TestCase): trainer.train_dataloader = train_dataloader trainer.data_loader = train_dataloader trainer.register_optimizers_hook() + trainer.register_processors() trainer._hooks = [ hook for hook in trainer._hooks if hook.__class__.__name__ not in ['CheckpointHook', 'TextLoggerHook', 'IterTimerHook'] diff --git a/tests/trainers/hooks/test_optimizer_hook.py b/tests/trainers/hooks/test_optimizer_hook.py index b9899c36..ed0e202a 100644 --- a/tests/trainers/hooks/test_optimizer_hook.py +++ b/tests/trainers/hooks/test_optimizer_hook.py @@ -150,6 +150,7 @@ class TorchAMPOptimizerHookTest(unittest.TestCase): train_dataloader = trainer._build_dataloader_with_dataset( trainer.train_dataset, **trainer.cfg.train.get('dataloader', {})) trainer.register_optimizers_hook() + trainer.register_processors() trainer._hooks = [ hook for hook in trainer._hooks if hook.__class__.__name__ not in ['CheckpointHook', 'TextLoggerHook', 'IterTimerHook'] diff --git a/tests/trainers/model_trainer_map.py b/tests/trainers/model_trainer_map.py index 4057c331..4e9005f7 100644 --- a/tests/trainers/model_trainer_map.py +++ b/tests/trainers/model_trainer_map.py @@ -11,33 +11,18 @@ model_trainer_map = { ['tests/trainers/audio/test_separation_trainer.py'], 'speech_tts/speech_sambert-hifigan_tts_zh-cn_multisp_pretrain_16k': ['tests/trainers/audio/test_tts_trainer.py'], - 'damo/cv_mobilenet_face-2d-keypoints_alignment': - ['tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py'], - 'damo/cv_hrnetw18_hand-pose-keypoints_coco-wholebody': - ['tests/trainers/easycv/test_easycv_trainer_hand_2d_keypoints.py'], - 'damo/cv_yolox-pai_hand-detection': - ['tests/trainers/easycv/test_easycv_trainer_hand_detection.py'], - 'damo/cv_r50_panoptic-segmentation_cocopan': - ['tests/trainers/easycv/test_easycv_trainer_panoptic_mask2former.py'], - 'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k': - ['tests/trainers/easycv/test_segformer.py'], 'damo/cv_resnet_carddetection_scrfd34gkps': ['tests/trainers/test_card_detection_scrfd_trainer.py'], - 'damo/multi-modal_clip-vit-base-patch16_zh': [ - 'tests/trainers/test_clip_trainer.py' - ], - 'damo/nlp_space_pretrained-dialog-model': [ - 'tests/trainers/test_dialog_intent_trainer.py' - ], - 'damo/cv_resnet_facedetection_scrfd10gkps': [ - 'tests/trainers/test_face_detection_scrfd_trainer.py' - ], - 'damo/nlp_structbert_faq-question-answering_chinese-base': [ - 'tests/trainers/test_finetune_faq_question_answering.py' - ], - 'PAI/nlp_gpt3_text-generation_0.35B_MoE-64': [ - 'tests/trainers/test_finetune_gpt_moe.py' - ], + 'damo/multi-modal_clip-vit-base-patch16_zh': + ['tests/trainers/test_clip_trainer.py'], + 'damo/nlp_space_pretrained-dialog-model': + ['tests/trainers/test_dialog_intent_trainer.py'], + 'damo/cv_resnet_facedetection_scrfd10gkps': + ['tests/trainers/test_face_detection_scrfd_trainer.py'], + 'damo/nlp_structbert_faq-question-answering_chinese-base': + ['tests/trainers/test_finetune_faq_question_answering.py'], + 'PAI/nlp_gpt3_text-generation_0.35B_MoE-64': + ['tests/trainers/test_finetune_gpt_moe.py'], 'damo/nlp_gpt3_text-generation_1.3B': [ 'tests/trainers/test_finetune_gpt3.py' ], @@ -139,6 +124,12 @@ model_trainer_map = { 'damo/nlp_csanmt_translation_en2es': [ 'tests/trainers/test_translation_trainer.py' ], + 'damo/nlp_unite_mup_translation_evaluation_multilingual_base': [ + 'tests/trainers/test_translation_evaluation_trainer.py' + ], + 'damo/nlp_unite_mup_translation_evaluation_multilingual_large': [ + 'tests/trainers/test_translation_evaluation_trainer.py' + ], 'damo/cv_googlenet_pgl-video-summarization': [ 'tests/trainers/test_video_summarization_trainer.py' ], diff --git a/tests/trainers/test_trainer_with_nlp.py b/tests/trainers/test_trainer_with_nlp.py index ceb04e15..a736d4fa 100644 --- a/tests/trainers/test_trainer_with_nlp.py +++ b/tests/trainers/test_trainer_with_nlp.py @@ -9,6 +9,7 @@ import unittest import numpy as np import torch from packaging import version +from torch.utils.data import RandomSampler from modelscope.hub.snapshot_download import snapshot_download from modelscope.metainfo import Metrics @@ -204,12 +205,20 @@ class TestTrainerWithNlp(unittest.TestCase): cfg.preprocessor.val['label2id'] = {'0': 0, '1': 1} cfg.train.dataloader.batch_size_per_gpu = 2 cfg.train.hooks = [{ - 'type': 'BestCkptSaverHook', - 'interval': 1, - 'by_epoch': False, - 'metric_key': 'accuracy', - 'max_checkpoint_num': 4, - 'restore_best': True, + 'type': + 'BestCkptSaverHook', + 'interval': + 1, + 'by_epoch': + False, + 'output_dir': + os.path.join(self.tmp_dir, 'output_test_best'), + 'metric_key': + 'accuracy', + 'max_checkpoint_num': + 4, + 'restore_best': + True, }, { 'type': 'TextLoggerHook', 'interval': 1 @@ -270,7 +279,7 @@ class TestTrainerWithNlp(unittest.TestCase): os.path.join(self.tmp_dir, 'output', 'pytorch_model.bin'))) self.assertTrue( os.path.isfile( - os.path.join(self.tmp_dir, 'output_best', + os.path.join(self.tmp_dir, 'output_test_best', 'pytorch_model.bin'))) md51 = hashlib.md5( pathlib.Path( @@ -282,7 +291,7 @@ class TestTrainerWithNlp(unittest.TestCase): self.assertEqual(md51, md52) md51 = hashlib.md5( pathlib.Path( - os.path.join(self.tmp_dir, 'output_best', + os.path.join(self.tmp_dir, 'output_test_best', 'pytorch_model.bin')).read_bytes()).hexdigest() md52 = hashlib.md5( pathlib.Path( @@ -472,6 +481,34 @@ class TestTrainerWithNlp(unittest.TestCase): cache_path + '/pytorch_model.bin', saving_fn=saving_fn)) self.assertTrue(os.path.isfile(f'{tmp_dir}/predicts.txt')) + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_trainer_with_custom_sampler(self): + tmp_dir = tempfile.TemporaryDirectory().name + if not os.path.exists(tmp_dir): + os.makedirs(tmp_dir) + + model_id = 'damo/nlp_structbert_sentence-similarity_chinese-tiny' + cache_path = snapshot_download(model_id) + model = SbertForSequenceClassification.from_pretrained(cache_path) + + class CustomSampler(RandomSampler): + + pass + + kwargs = dict( + cfg_file=os.path.join(cache_path, ModelFile.CONFIGURATION), + model=model, + train_dataset=self.dataset, + eval_dataset=self.dataset, + samplers=CustomSampler(self.dataset), + work_dir=self.tmp_dir) + + trainer = build_trainer(default_args=kwargs) + trainer.train() + self.assertTrue( + type(trainer.train_dataloader.sampler) == CustomSampler) + self.assertTrue(type(trainer.eval_dataloader.sampler) == CustomSampler) + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_trainer_with_prediction(self): tmp_dir = tempfile.TemporaryDirectory().name diff --git a/tests/trainers/test_training_args.py b/tests/trainers/test_training_args.py index 6e4d306e..e8f6d8a2 100644 --- a/tests/trainers/test_training_args.py +++ b/tests/trainers/test_training_args.py @@ -1,8 +1,8 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import unittest -from modelscope.trainers.default_config import DEFAULT_CONFIG -from modelscope.trainers.training_args import CliArgumentParser, TrainingArgs +from modelscope import TrainingArgs +from modelscope.trainers.cli_argument_parser import CliArgumentParser from modelscope.utils.test_utils import test_level @@ -29,14 +29,14 @@ class TrainingArgsTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_flatten_args(self): - cfg = DEFAULT_CONFIG + training_args = TrainingArgs() input_args = [ '--optimizer_params', 'weight_decay=0.8,eps=1e-6,correct_bias=False', '--lr_scheduler_params', 'initial_lr=3e-5,niter_decay=1' ] - training_args = TrainingArgs.from_cli(input_args) - cfg = training_args(cfg) + training_args = training_args.parse_cli(input_args) + cfg, _ = training_args.to_config() self.assertAlmostEqual(cfg.train.optimizer.weight_decay, 0.8) self.assertAlmostEqual(cfg.train.optimizer.eps, 1e-6) self.assertFalse(cfg.train.optimizer.correct_bias) diff --git a/tests/trainers/test_translation_evaluation_trainer.py b/tests/trainers/test_translation_evaluation_trainer.py new file mode 100644 index 00000000..139427da --- /dev/null +++ b/tests/trainers/test_translation_evaluation_trainer.py @@ -0,0 +1,30 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from modelscope.metainfo import Trainers +from modelscope.trainers import build_trainer +from modelscope.utils.test_utils import test_level + + +class TranslationEvaluationTest(unittest.TestCase): + + def setUp(self) -> None: + self.name = Trainers.translation_evaluation_trainer + self.model_id_large = 'damo/nlp_unite_mup_translation_evaluation_multilingual_large' + self.model_id_base = 'damo/nlp_unite_mup_translation_evaluation_multilingual_base' + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_unite_mup_large(self) -> None: + default_args = {'model': self.model_id_large} + trainer = build_trainer(name=self.name, default_args=default_args) + trainer.train() + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_unite_mup_base(self) -> None: + default_args = {'model': self.model_id_base} + trainer = build_trainer(name=self.name, default_args=default_args) + trainer.train() + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/utils/test_input_output.py b/tests/utils/test_input_output.py new file mode 100644 index 00000000..53b75a39 --- /dev/null +++ b/tests/utils/test_input_output.py @@ -0,0 +1,142 @@ +import base64 +import unittest + +import json + +from modelscope.utils.constant import Tasks +from modelscope.utils.input_output import ( + PipelineInfomation, service_base64_input_to_pipeline_input) + + +def encode_image_to_base64(image): + base64_str = str(base64.b64encode(image), 'utf-8') + return base64_str + + +class PipelineInputOutputTest(unittest.TestCase): + + def test_template_pipeline_dict_input(self): + pipeline_info = PipelineInfomation( + Tasks.task_template, 'PipelineTemplate', + 'modelscope/pipelines/pipeline_template.py') + schema = pipeline_info.schema + expect_schema = { + 'input': { + 'type': 'object', + 'properties': { + 'image': { + 'type': 'string', + 'description': + 'Base64 encoded image file or url string.' + }, + 'text': { + 'type': 'string', + 'description': 'The input text.' + } + } + }, + 'parameters': { + 'type': 'object', + 'properties': { + 'max_length': { + 'type': 'integer', + 'default': 1024 + }, + 'top_p': { + 'type': 'number', + 'default': 0.8 + }, + 'postprocess_param1': { + 'type': 'string', + 'default': None + } + } + }, + 'output': { + 'type': 'object', + 'properties': { + 'boxes': { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, + 'output_img': { + 'type': 'string', + 'description': 'The base64 encoded image.' + }, + 'text_embedding': { + 'type': 'array', + 'items': { + 'type': 'number' + } + } + } + } + } + assert expect_schema == schema + + def test_template_pipeline_list_input(self): + pipeline_info = PipelineInfomation( + Tasks.text_classification, 'LanguageIdentificationPipeline', + 'modelscope/pipelines/nlp/language_identification_pipline.py') + schema = pipeline_info.schema + expect_schema = { + 'input': { + 'type': 'object', + 'properties': { + 'text': { + 'type': 'string', + 'description': 'The input text.' + }, + 'text2': { + 'type': 'string', + 'description': 'The input text.' + } + } + }, + 'parameters': {}, + 'output': { + 'type': 'object', + 'properties': { + 'scores': { + 'type': 'array', + 'items': { + 'type': 'number' + } + }, + 'labels': { + 'type': 'array', + 'items': { + 'type': 'string' + } + } + } + } + } + assert expect_schema == schema + + def test_input_output_encode_decode(self): + with open('data/test/images/image_captioning.png', 'rb') as f: + image = f.read() + text = 'hello schema.' + request_json = { + 'input': { + 'image': encode_image_to_base64(image), + 'text': text + }, + 'parameters': { + 'max_length': 10000, + 'top_p': 0.8 + } + } + pipeline_inputs, parameters = service_base64_input_to_pipeline_input( + Tasks.task_template, request_json) + assert 'image' in pipeline_inputs + assert pipeline_inputs['text'] == text + assert parameters['max_length'] == 10000 + assert parameters['top_p'] == 0.8 + + +if __name__ == '__main__': + unittest.main() diff --git a/tools/convert_megatron_ckpt.py b/tools/convert_megatron_ckpt.py new file mode 100644 index 00000000..f9b8f8f3 --- /dev/null +++ b/tools/convert_megatron_ckpt.py @@ -0,0 +1,31 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import argparse +import os + +from modelscope.models import Model +from modelscope.utils.megatron_utils import convert_megatron_checkpoint + + +def unwrap_model(model): + for name in ('model', 'module', 'dist_model'): + while hasattr(model, name): + model = getattr(model, name) + return model + + +parser = argparse.ArgumentParser( + description='Split or merge your megatron_based checkpoint.') +parser.add_argument( + '--model_dir', type=str, required=True, help='Checkpoint to be converted.') +parser.add_argument( + '--target_dir', type=str, required=True, help='Target save path.') +args = parser.parse_args() + +model = Model.from_pretrained( + args.model_dir, + rank=int(os.getenv('RANK')), + megatron_cfg={'tensor_model_parallel_size': int(os.getenv('WORLD_SIZE'))}) +unwrapped_model = unwrap_model(model) + +convert_megatron_checkpoint(unwrapped_model, model.model_dir, args.target_dir) diff --git a/tools/convert_megatron_ckpt.sh b/tools/convert_megatron_ckpt.sh new file mode 100644 index 00000000..86e94877 --- /dev/null +++ b/tools/convert_megatron_ckpt.sh @@ -0,0 +1,7 @@ +TARGET_TENSOR_MODEL_PARALLEL_SIZE=1 +ORIGIN_MODEL='damo/nlp_gpt3_text-generation_1.3B' +TARGET_DIR='./target' + +torchrun --nproc_per_node $TARGET_TENSOR_MODEL_PARALLEL_SIZE tools/convert_megatron_ckpt.py \ + --model_dir $ORIGIN_MODEL \ + --target_dir $TARGET_DIR \