diff --git a/README.md b/README.md index 944c1f07..1da48ef2 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,26 @@ # Introduction -ModelScope library is targeted to support training, evaluation and inference for the state of the art models provided by Mind and further support third-party models provided by users outside alibaba. +[ModelScope]( https://www.modelscope.cn) is a “Model-as-a-Service” (MaaS) platform that seeks to bringing together most advanced machine learning models from the AI community, and to streamlining the process of leveraging and applying AI models . The core ModelScope library enables developers to perform model inference, training and evaluation, through rich layers of API designs that facilitate a unified experience across state-of-the-art models from different AI domains. -# Design doc +The Python library offers the layered-APIs necessary for model contributors to integrate models from CV, NLP, Speech, Multi-Modality, as well as Scientific-computation, into the ModelScope ecosystem. Implementations for all these different models are encapsulated within the library in a way that allows easy and unified access. With such integration, model inference, finetuning, and evaluations can be done within only a few lines of codes. In the meantime, flexibilities are provided so that different components in the model applications can be customized as well, where necessary. -Please refer to alidoc [link](https://alidocs.dingtalk.com/i/nodes/OBldywvrKxo89xmAO05yJQk2ngpNbLz4?nav=spaces&navQuery=spaceId%3Dnb9XJNlZxbgrOXyA&iframeQuery=utm_source%3Dportal%26utm_medium%3Dportal_space_file_tree) +Apart from harboring implementations of various models, ModelScope library also enables the necessary interactions with the backend services of ModelScope, particularly with the Model-Hub and Dataset-Hub. Such interactions facilitate various entity (models and datasets) management to be performed seamlessly under-the-hood, such as entity lookup, version control, and cache management. -# Development doc +# Installation -Please refer to [develop.md](docs/source/develop.md) +Please refer to [installation](https://modelscope.cn/docs/%E7%8E%AF%E5%A2%83%E5%AE%89%E8%A3%85). -# ChangeLog -* 20/05/2022 First release version +# Get Started -Refer to [change_log.md](docs/source/change_log.md) for more details +You can refer to [quick_start](https://modelscope.cn/docs/%E5%BF%AB%E9%80%9F%E5%BC%80%E5%A7%8B) for quick start. + +We also provide other documentations including: +* [Introduction to tasks](https://modelscope.cn/docs/%E4%BB%BB%E5%8A%A1%E7%9A%84%E4%BB%8B%E7%BB%8D) +* [Use pipeline for model inference](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E6%8E%A8%E7%90%86Pipeline) +* [Finetune example](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E8%AE%AD%E7%BB%83Train) +* [Preprocessing of data](https://modelscope.cn/docs/%E6%95%B0%E6%8D%AE%E7%9A%84%E9%A2%84%E5%A4%84%E7%90%86) +* [Evaluation metrics](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E8%AF%84%E4%BC%B0) + +# License + +This project is licensed under the [Apache License (Version 2.0)](https://github.com/modelscope/modelscope/blob/master/LICENSE). diff --git a/modelscope/models/multi_modal/ofa_for_all_tasks.py b/modelscope/models/multi_modal/ofa_for_all_tasks.py index 2c6034e8..fc578b25 100644 --- a/modelscope/models/multi_modal/ofa_for_all_tasks.py +++ b/modelscope/models/multi_modal/ofa_for_all_tasks.py @@ -1,6 +1,7 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import math import os +import re import string from functools import partial from os import path as osp @@ -110,6 +111,8 @@ class OfaForAllTasks(TorchModel): Tasks.text_classification: inference_d[self.gen_type], Tasks.image_classification: inference_d[self.gen_type], } + pattern_str = '((?<=[^ a-zA-Z0-9.,:!?]) +| +(?=[^ a-zA-Z0-9.,:!?]))' + self.pattern = re.compile(pattern_str) def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: input = move_to_device(input, self.model.device) @@ -135,8 +138,18 @@ class OfaForAllTasks(TorchModel): caption = input[OutputKeys.CAPTION] result_l = list() for cap in caption: - result_l.append(cap.translate(self.transtab).strip()) + if self.language == 'en': + result_l.append(cap.translate(self.transtab).strip()) + else: + result_l.append(cap) input[OutputKeys.CAPTION] = result_l + if self.gen_type == 'generation' and self.language in [ + 'zh', 'cn' + ] and self.cfg.task != Tasks.visual_grounding: + ret_l = list() + for text in input[OFA_TASK_KEY_MAPPING[self.cfg.task]]: + ret_l.append(self.detokenizer(text)) + input[OFA_TASK_KEY_MAPPING[self.cfg.task]] = ret_l return input def _text_gen_inference(self, input): @@ -314,3 +327,6 @@ class OfaForAllTasks(TorchModel): save_function=partial(save_function, with_meta=False), config=config, **kwargs) + + def detokenizer(self, text): + return self.pattern.sub('', text) diff --git a/modelscope/preprocessors/multi_modal.py b/modelscope/preprocessors/multi_modal.py index 13876058..3a3ae820 100644 --- a/modelscope/preprocessors/multi_modal.py +++ b/modelscope/preprocessors/multi_modal.py @@ -77,7 +77,7 @@ class OfaPreprocessor(Preprocessor): data[key] = item return data - def _ofa_input_compatibility_conversion(self, data): + def _ofa_input_compatibility_conversion(self, data): # fake if 'image' in data and self.cfg.model.get('type', None) == 'ofa': if isinstance(data['image'], str): image = load_image(data['image']) diff --git a/modelscope/preprocessors/ofa/ocr_recognition.py b/modelscope/preprocessors/ofa/ocr_recognition.py index a0342c14..58e3ea6e 100644 --- a/modelscope/preprocessors/ofa/ocr_recognition.py +++ b/modelscope/preprocessors/ofa/ocr_recognition.py @@ -73,21 +73,14 @@ class OfaOcrRecognitionPreprocessor(OfaBasePreprocessor): """ super(OfaOcrRecognitionPreprocessor, self).__init__(cfg, model_dir, mode, *args, **kwargs) - # Initialize transform - if self.cfg.model.imagenet_default_mean_and_std: - mean = IMAGENET_DEFAULT_MEAN - std = IMAGENET_DEFAULT_STD - else: - mean = [0.5, 0.5, 0.5] - std = [0.5, 0.5, 0.5] self.patch_resize_transform = transforms.Compose([ lambda image: ocr_resize( image, - self.cfg.model.patch_image_size, - is_document=self.cfg.model.is_document), + self.patch_image_size, + is_document=self.cfg.model.get('is_document', False)), transforms.ToTensor(), - transforms.Normalize(mean=mean, std=std), + transforms.Normalize(mean=self.mean, std=self.std), ]) def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: diff --git a/modelscope/trainers/multi_modal/ofa/ofa_trainer_utils.py b/modelscope/trainers/multi_modal/ofa/ofa_trainer_utils.py index 3c38884c..3930febb 100644 --- a/modelscope/trainers/multi_modal/ofa/ofa_trainer_utils.py +++ b/modelscope/trainers/multi_modal/ofa/ofa_trainer_utils.py @@ -103,20 +103,20 @@ class AdjustLabelSmoothedCrossEntropyCriterion(_Loss): def __init__(self, args): super().__init__() - self.sentence_avg = args.sentence_avg - self.eps = args.label_smoothing - self.ignore_prefix_size = args.ignore_prefix_size - self.ignore_eos = args.ignore_eos - self.report_accuracy = args.report_accuracy - self.drop_worst_ratio = args.drop_worst_ratio - self.drop_worst_after = args.drop_worst_after - self.use_rdrop = args.use_rdrop - self.reg_alpha = args.reg_alpha - self.sample_patch_num = args.sample_patch_num + self.sentence_avg = args.get('sentence_avg', False) + self.eps = args.get('label_smoothing', 0.1) + self.ignore_prefix_size = args.get('ignore_prefix_size', 0) + self.ignore_eos = args.get('ignore_eos', False) + self.report_accuracy = args.get('report_accuracy', False) + self.drop_worst_ratio = args.get('drop_worst_ratio', 0.0) + self.drop_worst_after = args.get('drop_worst_after', 0) + self.use_rdrop = args.get('use_rdrop', False) + self.reg_alpha = args.get('reg_alpha', 1.0) + self.sample_patch_num = args.get('sample_patch_num', 196) self.constraint_start = None self.constraint_end = None - if args.constraint_range: + if args.get('constraint_range', None): constraint_start, constraint_end = args.constraint_range.split(',') self.constraint_start = int(constraint_start) self.constraint_end = int(constraint_end) diff --git a/requirements/multi-modal.txt b/requirements/multi-modal.txt index 578f0b54..31e9601d 100644 --- a/requirements/multi-modal.txt +++ b/requirements/multi-modal.txt @@ -2,6 +2,8 @@ ftfy>=6.0.3 ofa>=0.0.2 pycocoevalcap>=1.2 pycocotools>=2.0.4 +# compatible with taming-transformers-rom1504 +pytorch_lightning<=1.7.7 # rough-score was just recently updated from 0.0.4 to 0.0.7 # which introduced compatability issues that are being investigated rouge_score<=0.0.4