diff --git a/.github/workflows/citest.yaml b/.github/workflows/citest.yaml index 00c6bbbf..1399e49d 100644 --- a/.github/workflows/citest.yaml +++ b/.github/workflows/citest.yaml @@ -27,7 +27,7 @@ on: - "tools/**" - ".dev_scripts/**" - "README.md" - - "README_zh-CN.md" + - "README_*.md" - "NOTICE" - ".github/workflows/lint.yaml" - ".github/workflows/publish.yaml" diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 6165ab7a..7c2e180a 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -20,11 +20,10 @@ jobs: with: python-version: '3.7' - name: Install wheel - run: pip install wheel + run: pip install wheel && pip install -r requirements/framework.txt - name: Build ModelScope run: python setup.py sdist bdist_wheel - name: Publish package to PyPI run: | - echo "I got run" - #pip install twine - #twine upload package/dist/* --skip-existing -u __token__ -p ${{ secrets.PYPI_API_TOKEN }} + pip install twine + twine upload package/dist/* --skip-existing -u __token__ -p ${{ secrets.PYPI_API_TOKEN }} diff --git a/README.md b/README.md index f0c65769..5ca0e26e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,10 @@ +
+
+
+
+
+
+ English | + 中文 +
+ +
+
+
+
+
+ +Some representative examples include: + +NLP: + +* [nlp_gpt3_text-generation_2.7B](https://modelscope.cn/models/damo/nlp_gpt3_text-generation_2.7B) + +* [ChatYuan-large](https://modelscope.cn/models/ClueAI/ChatYuan-large) + +* [mengzi-t5-base](https://modelscope.cn/models/langboat/mengzi-t5-base) + +* [nlp_csanmt_translation_en2zh](https://modelscope.cn/models/damo/nlp_csanmt_translation_en2zh) + +* [nlp_raner_named-entity-recognition_chinese-base-news](https://modelscope.cn/models/damo/nlp_raner_named-entity-recognition_chinese-base-news) + +* [nlp_structbert_word-segmentation_chinese-base](https://modelscope.cn/models/damo/nlp_structbert_word-segmentation_chinese-base) + +* [Erlangshen-RoBERTa-330M-Sentiment](https://modelscope.cn/models/fengshenbang/Erlangshen-RoBERTa-330M-Sentiment) + +* [nlp_convai_text2sql_pretrain_cn](https://modelscope.cn/models/damo/nlp_convai_text2sql_pretrain_cn) + +Audio: + +* [speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch) + +* [speech_sambert-hifigan_tts_zh-cn_16k](https://modelscope.cn/models/damo/speech_sambert-hifigan_tts_zh-cn_16k) + +* [speech_charctc_kws_phone-xiaoyun](https://modelscope.cn/models/damo/speech_charctc_kws_phone-xiaoyun) + +* [u2pp_conformer-asr-cn-16k-online](https://modelscope.cn/models/wenet/u2pp_conformer-asr-cn-16k-online) + +* [speech_frcrn_ans_cirm_16k](https://modelscope.cn/models/damo/speech_frcrn_ans_cirm_16k) + +* [speech_dfsmn_aec_psm_16k](https://modelscope.cn/models/damo/speech_dfsmn_aec_psm_16k) + + +CV: + +* [cv_tinynas_object-detection_damoyolo](https://modelscope.cn/models/damo/cv_tinynas_object-detection_damoyolo) + +* [cv_unet_person-image-cartoon_compound-models](https://modelscope.cn/models/damo/cv_unet_person-image-cartoon_compound-models) + +* [cv_convnextTiny_ocr-recognition-general_damo](https://modelscope.cn/models/damo/cv_convnextTiny_ocr-recognition-general_damo) + +* [cv_resnet18_human-detection](https://modelscope.cn/models/damo/cv_resnet18_human-detection) + +* [cv_resnet50_face-detection_retinaface](https://modelscope.cn/models/damo/cv_resnet50_face-detection_retinaface) + +* [cv_unet_image-matting](https://modelscope.cn/models/damo/cv_unet_image-matting) + +* [cv_F3Net_product-segmentation](https://modelscope.cn/models/damo/cv_F3Net_product-segmentation) + +* [cv_resnest101_general_recognition](https://modelscope.cn/models/damo/cv_resnest101_general_recognition) + + +Multi-Modal: + +* [multi-modal_clip-vit-base-patch16_zh](https://modelscope.cn/models/damo/multi-modal_clip-vit-base-patch16_zh) + +* [ofa_pretrain_base_zh](https://modelscope.cn/models/damo/ofa_pretrain_base_zh) + +* [Taiyi-Stable-Diffusion-1B-Chinese-v0.1](https://modelscope.cn/models/fengshenbang/Taiyi-Stable-Diffusion-1B-Chinese-v0.1) + +* [mplug_visual-question-answering_coco_large_en](https://modelscope.cn/models/damo/mplug_visual-question-answering_coco_large_en) + +AI for Science: + +* [uni-fold-monomer](https://modelscope.cn/models/DPTech/uni-fold-monomer/summary) + +* [uni-fold-multimer](https://modelscope.cn/models/DPTech/uni-fold-multimer/summary) + +# QuickTour + +We provide unified interface for inference using `pipeline`, fine-tuning and evaluation using `Trainer` for different tasks. + +For any given task with any type of input (image, text, audio, video...), inference pipeline can be implemented with only a few lines of code, which will automatically load the underlying model to get inference result, as is exemplified below: + +```python +>>> from modelscope.pipelines import pipeline +>>> word_segmentation = pipeline('word-segmentation',model='damo/nlp_structbert_word-segmentation_chinese-base') +>>> word_segmentation('今天天气不错,适合出去游玩') +{'output': '今天 天气 不错 , 适合 出去 游玩'} +``` + +Given an image, portrait matting (aka. background-removal) can be accomplished with the following code snippet: + + + +```python +>>> import cv2 +>>> from modelscope.pipelines import pipeline + +>>> portrait_matting = pipeline('portrait-matting') +>>> result = portrait_matting('https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/image_matting.png') +>>> cv2.imwrite('result.png', result['output_img']) +``` + +The output image with the background removed is: + + + +Fine-tuning and evaluation can also be done with a few more lines of code to set up training dataset and trainer, with the heavy-lifting work of training and evaluation a model encapsulated in the implementation of `traner.train()` and +`trainer.evaluate()` interfaces. + +For example, the gpt3 base model (1.3B) can be fine-tuned with the chinese-poetry dataset, resulting in a model that can be used for chinese-poetry generation. + +```python +>>> from modelscope.metainfo import Trainers +>>> from modelscope.msdatasets import MsDataset +>>> from modelscope.trainers import build_trainer + +>>> train_dataset = MsDataset.load('chinese-poetry-collection', split='train'). remap_columns({'text1': 'src_txt'}) +>>> eval_dataset = MsDataset.load('chinese-poetry-collection', split='test').remap_columns({'text1': 'src_txt'}) +>>> max_epochs = 10 +>>> tmp_dir = './gpt3_poetry' + +>>> kwargs = dict( + model='damo/nlp_gpt3_text-generation_1.3B', + train_dataset=train_dataset, + eval_dataset=eval_dataset, + max_epochs=max_epochs, + work_dir=tmp_dir) + +>>> trainer = build_trainer(name=Trainers.gpt3_trainer, default_args=kwargs) +>>> trainer.train() +``` + +# Why should I use ModelScope library + +1. A unified and concise user interface is abstracted for different tasks and different models. Model inferences and training can be implemented by as few as 3 and 10 lines of code, respectively. It is convenient for users to explore models in different fields in the ModelScope community. All models integrated into ModelScope are ready to use, which makes it easy to get started with AI, in both educational and industrial settings. + +2. ModelScope offers a model-centric development and application experience. It streamlines the support for model training, inference, export and deployment, and facilitates users to build their own MLOps based on the ModelScope ecosystem. + +3. For the model inference and training process, a modular design is put in place, and a wealth of functional module implementations are provided, which is convenient for users to customize their own model inference, training and other processes. + +4. For distributed model training, especially for large models, it provides rich training strategy support, including data parallel, model parallel, hybrid parallel and so on. # Installation -Please refer to [installation](https://modelscope.cn/docs/%E7%8E%AF%E5%A2%83%E5%AE%89%E8%A3%85). +## Docker -# Get Started +ModelScope Library currently supports popular deep learning framework for model training and inference, including PyTorch, TensorFlow and ONNX. All releases are tested and run on Python 3.7+, Pytorch 1.8+, Tensorflow1.15 or Tensorflow2.0+. -You can refer to [quick_start](https://modelscope.cn/docs/%E5%BF%AB%E9%80%9F%E5%BC%80%E5%A7%8B) for quick start. +To allow out-of-box usage for all the models on ModelScope, official docker images are provided for all releases. Based on the docker image, developers can skip all environment installation and configuration and use it directly. Currently, the latest version of the CPU image and GPU image can be obtained from: -We also provide other documentations including: +CPU docker image +```shell +registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-py37-torch1.11.0-tf1.15.5-1.3.0 +``` + +GPU docker image +```shell +registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.3.0-py37-torch1.11.0-tf1.15.5-1.3.0 +``` + +## Setup Local Python Environment + +One can also set up local ModelScope environment using pip and conda. We suggest [anaconda](https://docs.anaconda.com/anaconda/install/) for creating local python environment: + +```shell +conda create -n modelscope python=3.7 +conda activate modelscope +``` + +PyTorch or TensorFlow can be installed separately according to each model's requirements. +* Install pytorch [doc](https://pytorch.org/get-started/locally/) +* Install tensorflow [doc](https://www.tensorflow.org/install/pip) + +After installing the necessary machine-learning framework, you can install modelscope library as follows: + +If you only want to play around with the modelscope framework, of trying out model/dataset download, you can install the core modelscope components: +```shell +pip install modelscope +``` + +If you want to use multi-modal models: +```shell +pip install modelscope[multi-modal] +``` + +If you want to use nlp models: +```shell +pip install modelscope[nlp] -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html +``` + +If you want to use cv models: +```shell +pip install modelscope[cv] -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html +``` + +If you want to use audio models: +```shell +pip install modelscope[audio] -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html +``` + +If you want to use science models: +```shell +pip install modelscope[science] -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html +``` + +`Notes`: +1. Currently, some audio-task models only support python3.7, tensorflow1.15.4 Linux environments. Most other models can be installed and used on Windows and Mac (x86). + +2. Some models in the audio field use the third-party library SoundFile for wav file processing. On the Linux system, users need to manually install libsndfile of SoundFile([doc link](https://github.com/bastibe/python-soundfile#installation)). On Windows and MacOS, it will be installed automatically without user operation. For example, on Ubuntu, you can use following commands: + ```shell + sudo apt-get update + sudo apt-get install libsndfile1 + ``` + +3. Some models in computer vision need mmcv-full, you can refer to mmcv [installation guide](https://github.com/open-mmlab/mmcv#installation), a minimal installation is as follows: + + ```shell + pip uninstall mmcv # if you have installed mmcv, uninstall it + pip install -U openmim + mim install mmcv-full + ``` + + + +# Learn More + +We provide additional documentations including: +* [More detailed Installation Guide](https://modelscope.cn/docs/%E7%8E%AF%E5%A2%83%E5%AE%89%E8%A3%85) * [Introduction to tasks](https://modelscope.cn/docs/%E4%BB%BB%E5%8A%A1%E7%9A%84%E4%BB%8B%E7%BB%8D) * [Use pipeline for model inference](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E6%8E%A8%E7%90%86Pipeline) -* [Finetune example](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E8%AE%AD%E7%BB%83Train) +* [Finetuning example](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E8%AE%AD%E7%BB%83Train) * [Preprocessing of data](https://modelscope.cn/docs/%E6%95%B0%E6%8D%AE%E7%9A%84%E9%A2%84%E5%A4%84%E7%90%86) -* [Evaluation metrics](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E8%AF%84%E4%BC%B0) +* [Evaluation](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E8%AF%84%E4%BC%B0) +* [Contribute your own model to ModelScope](https://modelscope.cn/docs/ModelScope%E6%A8%A1%E5%9E%8B%E6%8E%A5%E5%85%A5%E6%B5%81%E7%A8%8B%E6%A6%82%E8%A7%88) # License diff --git a/README_zh.md b/README_zh.md new file mode 100644 index 00000000..03b7616a --- /dev/null +++ b/README_zh.md @@ -0,0 +1,273 @@ + +
+
+
+
+
+ +
+ English | + 中文 +
+ + + +
+
+
+
+
+ +示例如下: + +自然语言处理: + +* [GPT-3预训练生成模型-中文-2.7B](https://modelscope.cn/models/damo/nlp_gpt3_text-generation_2.7B) + +* [元语功能型对话大模型](https://modelscope.cn/models/ClueAI/ChatYuan-large) + +* [孟子T5预训练生成模型-中文-base](https://modelscope.cn/models/langboat/mengzi-t5-base) + +* [CSANMT连续语义增强机器翻译-英中-通用领域-large](https://modelscope.cn/models/damo/nlp_csanmt_translation_en2zh) + +* [RaNER命名实体识别-中文-新闻领域-base](https://modelscope.cn/models/damo/nlp_raner_named-entity-recognition_chinese-base-news) + +* [BAStructBERT分词-中文-新闻领域-base](https://modelscope.cn/models/damo/nlp_structbert_word-segmentation_chinese-base) + +* [二郎神-RoBERTa-330M-情感分类](https://modelscope.cn/models/fengshenbang/Erlangshen-RoBERTa-330M-Sentiment) + +* [SPACE-T表格问答预训练模型-中文-通用领域-base](https://modelscope.cn/models/damo/nlp_convai_text2sql_pretrain_cn) + +语音: + +* [Paraformer语音识别-中文-通用-16k-离线-large-pytorch](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch) + +* [语音合成-中文-多情感领域-16k-多发音人](https://modelscope.cn/models/damo/speech_sambert-hifigan_tts_zh-cn_16k) + +* [CTC语音唤醒-移动端-单麦-16k-小云小云](https://modelscope.cn/models/damo/speech_charctc_kws_phone-xiaoyun) + +* [WeNet-U2pp_Conformer-语音识别-中文-16k-实时](https://modelscope.cn/models/wenet/u2pp_conformer-asr-cn-16k-online) + +* [FRCRN语音降噪-单麦-16k](https://modelscope.cn/models/damo/speech_frcrn_ans_cirm_16k) + +* [DFSMN回声消除-单麦单参考-16k](https://modelscope.cn/models/damo/speech_dfsmn_aec_psm_16k) + + +计算机视觉: + +* [DAMOYOLO-高性能通用检测模型-S](https://modelscope.cn/models/damo/cv_tinynas_object-detection_damoyolo) + +* [DCT-Net人像卡通化](https://modelscope.cn/models/damo/cv_unet_person-image-cartoon_compound-models) + +* [读光-文字识别-行识别模型-中英-通用领域](https://modelscope.cn/models/damo/cv_convnextTiny_ocr-recognition-general_damo) + +* [人体检测-通用-Base](https://modelscope.cn/models/damo/cv_resnet18_human-detection) + +* [RetinaFace人脸检测关键点模型](https://modelscope.cn/models/damo/cv_resnet50_face-detection_retinaface) + +* [BSHM人像抠图](https://modelscope.cn/models/damo/cv_unet_image-matting) + +* [图像分割-商品展示图场景的商品分割-电商领域](https://modelscope.cn/models/damo/cv_F3Net_product-segmentation) + +* [万物识别-中文-通用领域](https://modelscope.cn/models/damo/cv_resnest101_general_recognition) + + +多模态: + +* [CLIP模型-中文-通用领域-base](https://modelscope.cn/models/damo/multi-modal_clip-vit-base-patch16_zh) + +* [OFA预训练模型-中文-通用领域-base](https://modelscope.cn/models/damo/ofa_pretrain_base_zh) + +* [太乙-Stable-Diffusion-1B-中文-v0.1](https://modelscope.cn/models/fengshenbang/Taiyi-Stable-Diffusion-1B-Chinese-v0.1) + +* [mPLUG视觉问答模型-英文-large](https://modelscope.cn/models/damo/mplug_visual-question-answering_coco_large_en) + +科学计算: + +* [Uni-Fold-Monomer 开源的蛋白质单体结构预测模型](https://modelscope.cn/models/DPTech/uni-fold-monomer/summary) + +* [Uni-Fold-Multimer 开源的蛋白质复合物结构预测模型](https://modelscope.cn/models/DPTech/uni-fold-multimer/summary) + +# 快速上手 + +我们针对不同任务提供了统一的使用接口, 使用`pipeline`进行模型推理、使用`Trainer`进行微调和评估。 + +对于任意类型输入(图像、文本、音频、视频...)的任何任务,只需3行代码即可加载模型并获得推理结果,如下所示: +```python +>>> from modelscope.pipelines import pipeline +>>> word_segmentation = pipeline('word-segmentation',model='damo/nlp_structbert_word-segmentation_chinese-base') +>>> word_segmentation('今天天气不错,适合出去游玩') +{'output': '今天 天气 不错 , 适合 出去 游玩'} +``` + +给定一张图片,你可以使用如下代码进行人像抠图. + + + +```python +>>> import cv2 +>>> from modelscope.pipelines import pipeline + +>>> portrait_matting = pipeline('portrait-matting') +>>> result = portrait_matting('https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/image_matting.png') +>>> cv2.imwrite('result.png', result['output_img']) +``` +输出图像如下 + + +对于微调和评估模型, 你需要通过十多行代码构建dataset和trainer,调用`trainer.train()`和`trainer.evaluate()`即可。 + +例如我们利用gpt3 1.3B的模型,加载是诗歌数据集进行finetune,可以完成古诗生成模型的训练。 +```python +>>> from modelscope.metainfo import Trainers +>>> from modelscope.msdatasets import MsDataset +>>> from modelscope.trainers import build_trainer + +>>> train_dataset = MsDataset.load('chinese-poetry-collection', split='train'). remap_columns({'text1': 'src_txt'}) +>>> eval_dataset = MsDataset.load('chinese-poetry-collection', split='test').remap_columns({'text1': 'src_txt'}) +>>> max_epochs = 10 +>>> tmp_dir = './gpt3_poetry' + +>>> kwargs = dict( + model='damo/nlp_gpt3_text-generation_1.3B', + train_dataset=train_dataset, + eval_dataset=eval_dataset, + max_epochs=max_epochs, + work_dir=tmp_dir) + +>>> trainer = build_trainer(name=Trainers.gpt3_trainer, default_args=kwargs) +>>> trainer.train() +``` + +# 为什么要用ModelScope library + +1. 针对不同任务、不同模型抽象了统一简洁的用户接口,3行代码完成推理,10行代码完成模型训练,方便用户使用ModelScope社区中多个领域的不同模型,开箱即用,便于AI入门和教学。 + +2. 构造以模型为中心的开发应用体验,支持模型训练、推理、导出部署,方便用户基于ModelScope Library构建自己的MLOps. + +3. 针对模型推理、训练流程,进行了模块化的设计,并提供了丰富的功能模块实现,方便用户定制化开发来自定义自己的推理、训练等过程。 + +4. 针对分布式模型训练,尤其是大模型,提供了丰富的训练策略支持,包括数据并行、模型并行、混合并行等。 + +# 安装 + +## 镜像 +ModelScope Library目前支持tensorflow,pytorch深度学习框架进行模型训练、推理, 在Python 3.7+, Pytorch 1.8+, Tensorflow1.15/Tensorflow2.0+测试可运行。 + +为了让大家能直接用上ModelScope平台上的所有模型,无需配置环境,ModelScope提供了官方镜像,方便有需要的开发者获取。地址如下: + +CPU镜像 +```shell +registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-py37-torch1.11.0-tf1.15.5-1.3.0 +``` + +GPU镜像 +```shell +registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.3.0-py37-torch1.11.0-tf1.15.5-1.3.0 +``` + +## 搭建本地Python环境 + +你也可以使用pip和conda搭建本地python环境,我们推荐使用[Anaconda](https://docs.anaconda.com/anaconda/install/),安装完成后,执行如下命令为modelscope library创建对应的python环境: +```shell +conda create -n modelscope python=3.7 +conda activate modelscope +``` + +接下来根据所需使用的模型依赖安装底层计算框架 +* 安装Pytorch [文档链接](https://pytorch.org/get-started/locally/) +* 安装tensorflow [文档链接](https://www.tensorflow.org/install/pip) + + +安装完前置依赖,你可以按照如下方式安装ModelScope Library。 + +ModelScope Libarary由核心框架,以及不同领域模型的对接组件组成。如果只需要ModelScope模型和数据集访问等基础能力,可以只安装ModelScope的核心框架: +```shell +pip install modelscope +``` + +如仅需体验多模态领域的模型,可执行如下命令安装领域依赖: +```shell +pip install modelscope[multi-modal] +``` + +如仅需体验NLP领域模型,可执行如下命令安装领域依赖(因部分依赖由ModelScope独立host,所以需要使用"-f"参数): +```shell +pip install modelscope[nlp] -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html +``` + +If you want to use cv models: +```shell +pip install modelscope[cv] -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html +``` + +如仅需体验语音领域模型,可执行如下命令安装领域依赖(因部分依赖由ModelScope独立host,所以需要使用"-f"参数): +```shell +pip install modelscope[audio] -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html +``` + +`注意`:当前大部分语音模型需要在Linux环境上使用,并且推荐使用python3.7 + tensorflow 1.x的组合。 + +如仅需体验科学计算领域模型,可执行如下命令安装领域依赖(因部分依赖由ModelScope独立host,所以需要使用"-f"参数): +```shell +pip install modelscope[science] -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html +``` + +`注`: +1. 目前部分语音相关的模型仅支持 python3.7,tensorflow1.15.4的Linux环境使用。 其他绝大部分模型可以在windows、mac(x86)上安装使用。. + +2. 语音领域中一部分模型使用了三方库SoundFile进行wav文件处理,在Linux系统上用户需要手动安装SoundFile的底层依赖库libsndfile,在Windows和MacOS上会自动安装不需要用户操作。详细信息可参考[SoundFile 官网](https://github.com/bastibe/python-soundfile#installation)。以Ubuntu系统为例,用户需要执行如下命令: + ```shell + sudo apt-get update + sudo apt-get install libsndfile1 + ``` + +3. CV领域的少数模型,需要安装mmcv-full, 如果运行过程中提示缺少mmcv,请参考mmcv[安装手册](https://github.com/open-mmlab/mmcv#installation)进行安装。 这里提供一个最简版的mmcv-full安装步骤,但是要达到最优的mmcv-full的安装效果(包括对于cuda版本的兼容),请根据自己的实际机器环境,以mmcv官方安装手册为准。 + ```shell + pip uninstall mmcv # if you have installed mmcv, uninstall it + pip install -U openmim + mim install mmcv-full + ``` + + +# 更多教程 + +除了上述内容,我们还提供如下信息: +* [更加详细的安装文档](https://modelscope.cn/docs/%E7%8E%AF%E5%A2%83%E5%AE%89%E8%A3%85) +* [任务的介绍](https://modelscope.cn/docs/%E4%BB%BB%E5%8A%A1%E7%9A%84%E4%BB%8B%E7%BB%8D) +* [模型推理](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E6%8E%A8%E7%90%86Pipeline) +* [模型微调](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E8%AE%AD%E7%BB%83Train) +* [数据预处理](https://modelscope.cn/docs/%E6%95%B0%E6%8D%AE%E7%9A%84%E9%A2%84%E5%A4%84%E7%90%86) +* [模型评估](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E8%AF%84%E4%BC%B0) +* [贡献模型到ModelScope](https://modelscope.cn/docs/ModelScope%E6%A8%A1%E5%9E%8B%E6%8E%A5%E5%85%A5%E6%B5%81%E7%A8%8B%E6%A6%82%E8%A7%88) + +# License + +本项目使用[Apache License (Version 2.0)](https://github.com/modelscope/modelscope/blob/master/LICENSE). diff --git a/data/resource/inference.gif b/data/resource/inference.gif new file mode 100644 index 00000000..aed20a39 Binary files /dev/null and b/data/resource/inference.gif differ diff --git a/data/resource/portrait_input.png b/data/resource/portrait_input.png new file mode 100644 index 00000000..de3f1918 --- /dev/null +++ b/data/resource/portrait_input.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af83a94899a6d23339c3ecc5c4c58c57c835af57b531a2f4c50461184f820141 +size 603621 diff --git a/data/resource/portrait_output.png b/data/resource/portrait_output.png new file mode 100644 index 00000000..d2ecc916 --- /dev/null +++ b/data/resource/portrait_output.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28f6d784547c295711f1a8b7c83cf7e8d19b6361de56e9a69667fc9c9b8a429a +size 661491 diff --git a/data/test/images/tbs_detection.jpg b/data/test/images/tbs_detection.jpg new file mode 100644 index 00000000..da025d4b --- /dev/null +++ b/data/test/images/tbs_detection.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:301b684c4f44e999654ce279ca82f2571fe902f1e1ada70c0b852c04c2dc667b +size 102532 diff --git a/examples/pytorch/finetune_image_classification.py b/examples/pytorch/image_classification/finetune_image_classification.py similarity index 100% rename from examples/pytorch/finetune_image_classification.py rename to examples/pytorch/image_classification/finetune_image_classification.py diff --git a/examples/pytorch/run_train.sh b/examples/pytorch/image_classification/run_train.sh similarity index 100% rename from examples/pytorch/run_train.sh rename to examples/pytorch/image_classification/run_train.sh diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index c56d16e0..f783222c 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -395,8 +395,8 @@ class HubApi: Args: model_id (str): The model id cutoff_timestamp (int): Tags created before the cutoff will be included. - The timestamp is represented by the seconds elasped from the epoch time. - use_cookies (Union[bool, CookieJar], optional): If is cookieJar, we will use this cookie, if True, will + The timestamp is represented by the seconds elapsed from the epoch time. + use_cookies (Union[bool, CookieJar], optional): If is cookieJar, we will use this cookie, if True, will load cookie from local. Defaults to False. Returns: @@ -472,7 +472,7 @@ class HubApi: Args: model_id (str): The model id - use_cookies (Union[bool, CookieJar], optional): If is cookieJar, we will use this cookie, if True, will + use_cookies (Union[bool, CookieJar], optional): If is cookieJar, we will use this cookie, if True, will load cookie from local. Defaults to False. Returns: diff --git a/modelscope/hub/check_model.py b/modelscope/hub/check_model.py index 7fd9f324..ad9593d2 100644 --- a/modelscope/hub/check_model.py +++ b/modelscope/hub/check_model.py @@ -75,7 +75,7 @@ def check_local_model_is_latest( continue else: logger.info( - 'Model is updated from modelscope hub, you can verify from http://www.modelscope.cn.' + 'Model is updated from modelscope hub, you can verify from https://www.modelscope.cn.' ) break else: @@ -86,7 +86,7 @@ def check_local_model_is_latest( continue else: logger.info( - 'Model is updated from modelscope hub, you can verify from http://www.modelscope.cn.' + 'Model is updated from modelscope hub, you can verify from https://www.modelscope.cn.' ) break except: # noqa: E722 diff --git a/modelscope/hub/deploy.py b/modelscope/hub/deploy.py index 565929ff..9eefe61b 100644 --- a/modelscope/hub/deploy.py +++ b/modelscope/hub/deploy.py @@ -185,7 +185,7 @@ class DeleteServiceParameters(AttrsToQueryString): class ServiceDeployer(object): - """Faciliate model deployment on to supported service provider(s). + """Facilitate model deployment on to supported service provider(s). """ def __init__(self, endpoint=None): diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py index 77f38fe9..23391073 100644 --- a/modelscope/hub/file_download.py +++ b/modelscope/hub/file_download.py @@ -49,7 +49,7 @@ def model_file_download( Can be any of a branch, tag or commit hash. cache_dir (str, Path, optional): Path to the folder where cached files are stored. user_agent (dict, str, optional): The user-agent info in the form of a dictionary or a string. - local_files_only (bool, optional): If `True`, avoid downloading the file and return the path to the + local_files_only (bool, optional): If `True`, avoid downloading the file and return the path to the local cached file if it exists. if `False`, download the file anyway even it exists. cookies (CookieJar, optional): The cookie of download request. @@ -201,7 +201,7 @@ def http_get_file( http headers to carry necessary info when requesting the remote file Raises: - FileDownloadError: Failed download failed. + FileDownloadError: File download failed. """ total = -1 diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py index 97be486d..7bddc72a 100644 --- a/modelscope/metainfo.py +++ b/modelscope/metainfo.py @@ -252,6 +252,7 @@ class Pipelines(object): body_3d_keypoints = 'canonical_body-3d-keypoints_video' hand_2d_keypoints = 'hrnetv2w18_hand-2d-keypoints_image' human_detection = 'resnet18-human-detection' + tbs_detection = 'tbs-detection' object_detection = 'vit-object-detection' abnormal_object_detection = 'abnormal-object-detection' easycv_detection = 'easycv-detection' @@ -406,6 +407,7 @@ class Pipelines(object): dialog_state_tracking = 'dialog-state-tracking' zero_shot_classification = 'zero-shot-classification' text_error_correction = 'text-error-correction' + word_alignment = 'word-alignment' plug_generation = 'plug-generation' gpt3_generation = 'gpt3-generation' gpt_moe_generation = 'gpt-moe-generation' @@ -928,6 +930,7 @@ class Preprocessors(object): sbert_token_cls_tokenizer = 'sbert-token-cls-tokenizer' zero_shot_cls_tokenizer = 'zero-shot-cls-tokenizer' text_error_correction = 'text-error-correction' + word_alignment = 'word-alignment' sentence_embedding = 'sentence-embedding' text_ranking = 'text-ranking' sequence_labeling_tokenizer = 'sequence-labeling-tokenizer' diff --git a/modelscope/metrics/ciderD/ciderD_scorer.py b/modelscope/metrics/ciderD/ciderD_scorer.py index 4157ec11..c1308a61 100755 --- a/modelscope/metrics/ciderD/ciderD_scorer.py +++ b/modelscope/metrics/ciderD/ciderD_scorer.py @@ -175,7 +175,7 @@ class CiderScorer(object): :return: array of score for each n-grams cosine similarity ''' delta = float(length_hyp - length_ref) - # measure consine similarity + # measure cosine similarity val = np.array([0.0 for _ in range(self.n)]) for n in range(self.n): # ngram diff --git a/modelscope/metrics/map_metric.py b/modelscope/metrics/map_metric.py index aa7a835a..8f2eebec 100644 --- a/modelscope/metrics/map_metric.py +++ b/modelscope/metrics/map_metric.py @@ -14,9 +14,9 @@ from .builder import METRICS, MetricKeys @METRICS.register_module( group_key=default_group, module_name=Metrics.multi_average_precision) class AveragePrecisionMetric(Metric): - """The metric computation class for multi avarage precision classes. + """The metric computation class for multi average precision classes. - This metric class calculates multi avarage precision for the whole input batches. + This metric class calculates multi average precision for the whole input batches. """ def __init__(self, *args, **kwargs): diff --git a/modelscope/metrics/video_super_resolution_metric/matlab_functions.py b/modelscope/metrics/video_super_resolution_metric/matlab_functions.py index 96dc49ed..43d7802b 100644 --- a/modelscope/metrics/video_super_resolution_metric/matlab_functions.py +++ b/modelscope/metrics/video_super_resolution_metric/matlab_functions.py @@ -26,7 +26,7 @@ def calculate_weights_indices(in_length, out_length, scale, kernel, out_length (int): Output length. scale (float): Scale factor. kernel_width (int): Kernel width. - antialisaing (bool): Whether to apply anti-aliasing when downsampling. + antialiasing (bool): Whether to apply anti-aliasing when downsampling. """ if (scale < 1) and antialiasing: @@ -98,7 +98,7 @@ def imresize(img, scale, antialiasing=True): Numpy: Input image with shape (h, w, c), [0, 1] range. scale (float): Scale factor. The same scale applies for both height and width. - antialisaing (bool): Whether to apply anti-aliasing when downsampling. + antialiasing (bool): Whether to apply anti-aliasing when downsampling. Default: True. Returns: Tensor: Output image with shape (c, h, w), [0, 1] range, w/o round. diff --git a/modelscope/metrics/video_super_resolution_metric/niqe.py b/modelscope/metrics/video_super_resolution_metric/niqe.py index ae38ef37..74c2ba61 100644 --- a/modelscope/metrics/video_super_resolution_metric/niqe.py +++ b/modelscope/metrics/video_super_resolution_metric/niqe.py @@ -26,7 +26,7 @@ def estimate_aggd_param(block): block (ndarray): 2D Image block. Returns: tuple: alpha (float), beta_l (float) and beta_r (float) for the AGGD - distribution (Estimating the parames in Equation 7 in the paper). + distribution (Estimating the parameters in Equation 7 in the paper). """ block = block.flatten() gam = np.arange(0.2, 10.001, 0.001) # len = 9801 @@ -124,7 +124,7 @@ def niqe(img, feat = [] for idx_w in range(num_block_w): for idx_h in range(num_block_h): - # process ecah block + # process each block block = img_nomalized[idx_h * block_size_h // scale:(idx_h + 1) * block_size_h // scale, idx_w * block_size_w // scale:(idx_w + 1) diff --git a/modelscope/models/audio/ans/frcrn.py b/modelscope/models/audio/ans/frcrn.py index b74fc273..220a14aa 100644 --- a/modelscope/models/audio/ans/frcrn.py +++ b/modelscope/models/audio/ans/frcrn.py @@ -273,8 +273,8 @@ def si_snr(s1, s2, eps=1e-8): s1_s2_norm = l2_norm(s1, s2) s2_s2_norm = l2_norm(s2, s2) s_target = s1_s2_norm / (s2_s2_norm + eps) * s2 - e_nosie = s1 - s_target + e_noise = s1 - s_target target_norm = l2_norm(s_target, s_target) - noise_norm = l2_norm(e_nosie, e_nosie) + noise_norm = l2_norm(e_noise, e_noise) snr = 10 * torch.log10((target_norm) / (noise_norm + eps) + eps) return torch.mean(snr) diff --git a/modelscope/models/audio/tts/kantts/models/pqmf.py b/modelscope/models/audio/tts/kantts/models/pqmf.py index 35838bf3..d4679af2 100644 --- a/modelscope/models/audio/tts/kantts/models/pqmf.py +++ b/modelscope/models/audio/tts/kantts/models/pqmf.py @@ -126,7 +126,7 @@ class PQMF(torch.nn.Module): Tensor: Output tensor (B, 1, T). """ - # NOTE(kan-bayashi): Power will be dreased so here multipy by # subbands. + # NOTE(kan-bayashi): Power will be dreased so here multiply by # subbands. # Not sure this is the correct way, it is better to check again. x = F.conv_transpose1d( x, self.updown_filter * self.subbands, stride=self.subbands) diff --git a/modelscope/models/audio/tts/kantts/models/sambert/kantts_sambert.py b/modelscope/models/audio/tts/kantts/models/sambert/kantts_sambert.py index bf17d12f..46939cad 100644 --- a/modelscope/models/audio/tts/kantts/models/sambert/kantts_sambert.py +++ b/modelscope/models/audio/tts/kantts/models/sambert/kantts_sambert.py @@ -628,7 +628,7 @@ class PostNet(nn.Module): def forward(self, x, mask=None): postnet_fsmn_output = self.fsmn(x, mask) # The input can also be a packed variable length sequence, - # here we just omit it for simpliciy due to the mask and uni-directional lstm. + # here we just omit it for simplicity due to the mask and uni-directional lstm. postnet_lstm_output, _ = self.lstm(postnet_fsmn_output) mel_residual_output = self.fc(postnet_lstm_output) @@ -736,7 +736,7 @@ class KanTtsSAMBERT(nn.Module): def binarize_attention_parallel(self, attn, in_lens, out_lens): """For training purposes only. Binarizes attention with MAS. - These will no longer recieve a gradient. + These will no longer receive a gradient. Args: attn: B x 1 x max_mel_len x max_text_len diff --git a/modelscope/models/audio/tts/kantts/preprocess/audio_processor/audio_processor.py b/modelscope/models/audio/tts/kantts/preprocess/audio_processor/audio_processor.py index fa16f8cb..343cfd9c 100644 --- a/modelscope/models/audio/tts/kantts/preprocess/audio_processor/audio_processor.py +++ b/modelscope/models/audio/tts/kantts/preprocess/audio_processor/audio_processor.py @@ -411,7 +411,7 @@ class AudioProcessor: self.badcase_list.append(wav_basename) else: durs, phone_list = result - # Algin length with melspec + # Align length with melspec if len(self.mel_dict) > 0: pair_mel = self.mel_dict.get(wav_basename, None) if pair_mel is None: diff --git a/modelscope/models/audio/tts/kantts/preprocess/audio_processor/core/dsp.py b/modelscope/models/audio/tts/kantts/preprocess/audio_processor/core/dsp.py index d9fb8b94..04bacb28 100644 --- a/modelscope/models/audio/tts/kantts/preprocess/audio_processor/core/dsp.py +++ b/modelscope/models/audio/tts/kantts/preprocess/audio_processor/core/dsp.py @@ -33,7 +33,7 @@ def save_wav(wav, path, sr): quant_wav = 32767 * wav else: quant_wav = wav - # maxmize the volume to avoid clipping + # maximize the volume to avoid clipping # wav *= 32767 / max(0.01, np.max(np.abs(wav))) wavfile.write(path, sr, quant_wav.astype(np.int16)) diff --git a/modelscope/models/audio/tts/kantts/preprocess/audio_processor/core/utils.py b/modelscope/models/audio/tts/kantts/preprocess/audio_processor/core/utils.py index f122eaed..0004458c 100644 --- a/modelscope/models/audio/tts/kantts/preprocess/audio_processor/core/utils.py +++ b/modelscope/models/audio/tts/kantts/preprocess/audio_processor/core/utils.py @@ -514,7 +514,7 @@ def average_by_duration(x, durs): return None durs_cum = np.cumsum(np.pad(durs, (1, 0), 'constant')) - # average over each symbol's duraion + # average over each symbol's duration x_symbol = np.zeros((durs.shape[0], ), dtype=np.float32) for idx, start, end in zip( range(durs.shape[0]), durs_cum[:-1], durs_cum[1:]): diff --git a/modelscope/models/audio/tts/kantts/preprocess/script_convertor/core/utils.py b/modelscope/models/audio/tts/kantts/preprocess/script_convertor/core/utils.py index d493e3dc..0b8bee0b 100644 --- a/modelscope/models/audio/tts/kantts/preprocess/script_convertor/core/utils.py +++ b/modelscope/models/audio/tts/kantts/preprocess/script_convertor/core/utils.py @@ -61,7 +61,7 @@ def do_prosody_text_normalization(line): text = text.replace('"', ' ') text = text.replace( '-', - '') # don't replace by space because compond word like two-year-old + '') # don't replace by space because compound word like two-year-old text = text.replace( "'", '') # don't replace by space because English word like that's diff --git a/modelscope/models/base/base_head.py b/modelscope/models/base/base_head.py index 6dc04971..a3cf1b07 100644 --- a/modelscope/models/base/base_head.py +++ b/modelscope/models/base/base_head.py @@ -24,7 +24,7 @@ class Head(ABC): def forward(self, *args, **kwargs) -> Dict[str, Any]: """ This method will use the output from backbone model to do any - downstream tasks. Recieve The output from backbone model. + downstream tasks. Receive The output from backbone model. Returns (Dict[str, Any]): The output from downstream task. """ diff --git a/modelscope/models/cv/abnormal_object_detection/mmdet_ms/roi_head/mask_scoring_roi_head.py b/modelscope/models/cv/abnormal_object_detection/mmdet_ms/roi_head/mask_scoring_roi_head.py index 60726d80..7f89dca1 100644 --- a/modelscope/models/cv/abnormal_object_detection/mmdet_ms/roi_head/mask_scoring_roi_head.py +++ b/modelscope/models/cv/abnormal_object_detection/mmdet_ms/roi_head/mask_scoring_roi_head.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. # Implementation in this file is modified based on mmdetection -# Originally Apache 2.0 License and publicly avaialbe at https://github.com/open-mmlab/mmdetection +# Originally Apache 2.0 License and publicly available at https://github.com/open-mmlab/mmdetection import torch from mmdet.core import bbox2roi from mmdet.models.builder import HEADS, build_head diff --git a/modelscope/models/cv/abnormal_object_detection/mmdet_ms/roi_head/roi_extractors/single_level_roi_extractor.py b/modelscope/models/cv/abnormal_object_detection/mmdet_ms/roi_head/roi_extractors/single_level_roi_extractor.py index e9b2c0f2..e044c603 100644 --- a/modelscope/models/cv/abnormal_object_detection/mmdet_ms/roi_head/roi_extractors/single_level_roi_extractor.py +++ b/modelscope/models/cv/abnormal_object_detection/mmdet_ms/roi_head/roi_extractors/single_level_roi_extractor.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. # Implementation in this file is modified based on mmdetection -# Originally Apache 2.0 License and publicly avaialbe at https://github.com/open-mmlab/mmdetection +# Originally Apache 2.0 License and publicly available at https://github.com/open-mmlab/mmdetection import torch from mmcv.runner import force_fp32 from mmdet.models.builder import ROI_EXTRACTORS diff --git a/modelscope/models/cv/action_recognition/temporal_patch_shift_transformer.py b/modelscope/models/cv/action_recognition/temporal_patch_shift_transformer.py index 46596afd..35c57d37 100644 --- a/modelscope/models/cv/action_recognition/temporal_patch_shift_transformer.py +++ b/modelscope/models/cv/action_recognition/temporal_patch_shift_transformer.py @@ -1063,7 +1063,7 @@ class BaseHead(nn.Module, metaclass=ABCMeta): elif labels.dim() == 1 and labels.size()[0] == self.num_classes \ and cls_score.size()[0] == 1: # Fix a bug when training with soft labels and batch size is 1. - # When using soft labels, `labels` and `cls_socre` share the same + # When using soft labels, `labels` and `cls_score` share the same # shape. labels = labels.unsqueeze(0) diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/__init__.py index 5a895582..14fc2359 100755 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/__init__.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/__init__.py @@ -1,4 +1,4 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet """ diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/bbox/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/bbox/__init__.py index cf1b7313..e369df32 100644 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/bbox/__init__.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/bbox/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/bbox """ from .transforms import bbox2result, distance2kps, kps2distance diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/bbox/transforms.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/bbox/transforms.py index 75e32d85..7d0f3070 100755 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/bbox/transforms.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/bbox/transforms.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/bbox/transforms.py """ import numpy as np diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/post_processing/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/post_processing/__init__.py index 61602fd3..7e0e85ac 100755 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/post_processing/__init__.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/post_processing/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/post_processing/bbox_nms.py """ from .bbox_nms import multiclass_nms diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/post_processing/bbox_nms.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/post_processing/bbox_nms.py index 697b7338..70d2e1da 100644 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/post_processing/bbox_nms.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/core/post_processing/bbox_nms.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/post_processing/bbox_nms.py """ import torch diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/__init__.py index cea179b0..b31cb541 100644 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/__init__.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets """ from .retinaface import RetinaFaceDataset diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/__init__.py index a2cafd1a..639e29df 100755 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/__init__.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines """ from .auto_augment import RotateV2 diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/auto_augment.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/auto_augment.py index ee60c2e0..93f826c7 100644 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/auto_augment.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/auto_augment.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines/auto_augment.py """ import copy diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/formating.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/formating.py index bd2394a8..ccb8545a 100644 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/formating.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/formating.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines/formating.py """ import numpy as np diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/loading.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/loading.py index b4c2a385..a186ff6a 100644 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/loading.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/loading.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines/loading.py """ import os.path as osp diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py index 270c34da..45b06082 100755 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/pipelines/transforms.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines/transforms.py """ import mmcv diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/retinaface.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/retinaface.py index 40c440b9..4cfb61b5 100755 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/retinaface.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/datasets/retinaface.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/retinaface.py """ import numpy as np diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/__init__.py index bd5d5f5f..d1c86abf 100755 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/__init__.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models """ from .dense_heads import * # noqa: F401,F403 diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/__init__.py index 653bd3ef..4e246f96 100755 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/__init__.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/backbones """ from .mobilenet import MobileNetV1 diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/mobilenet.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/mobilenet.py index 600f0434..7d0303e1 100644 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/mobilenet.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/mobilenet.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/backbones/mobilenet.py """ diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/resnet.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/resnet.py index 190d1570..36ba93cd 100644 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/resnet.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/resnet.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/backbones/resnet.py """ import torch.nn as nn diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/__init__.py index 9ba63b68..55ddf654 100755 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/__init__.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/dense_heads """ from .scrfd_head import SCRFDHead diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/scrfd_head.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/scrfd_head.py index e43ed6e5..222d4297 100755 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/scrfd_head.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/dense_heads/scrfd_head.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/dense_heads/scrfd_head.py """ import numpy as np diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/__init__.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/__init__.py index c1ed8f16..a9ee67dd 100755 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/__init__.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/detectors """ from .scrfd import SCRFD diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/scrfd.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/scrfd.py index 18b46be1..550b22ae 100755 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/scrfd.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/scrfd.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/detectors/scrfd.py """ import torch diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/tinymog.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/tinymog.py index a0b51753..7aa9d5b2 100755 --- a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/tinymog.py +++ b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/tinymog.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/detectors/scrfd.py """ import torch diff --git a/modelscope/models/cv/face_emotion/efficient/__init__.py b/modelscope/models/cv/face_emotion/efficient/__init__.py index e8fc91a4..3edaf9e2 100644 --- a/modelscope/models/cv/face_emotion/efficient/__init__.py +++ b/modelscope/models/cv/face_emotion/efficient/__init__.py @@ -1,5 +1,5 @@ # The implementation here is modified based on EfficientNet, -# originally Apache 2.0 License and publicly avaialbe at https://github.com/lukemelas/EfficientNet-PyTorch +# originally Apache 2.0 License and publicly available at https://github.com/lukemelas/EfficientNet-PyTorch from .model import VALID_MODELS, EfficientNet from .utils import (BlockArgs, BlockDecoder, GlobalParams, efficientnet, diff --git a/modelscope/models/cv/face_emotion/efficient/model.py b/modelscope/models/cv/face_emotion/efficient/model.py index 19ab4c3c..aa0be250 100644 --- a/modelscope/models/cv/face_emotion/efficient/model.py +++ b/modelscope/models/cv/face_emotion/efficient/model.py @@ -1,5 +1,5 @@ # The implementation here is modified based on EfficientNet, -# originally Apache 2.0 License and publicly avaialbe at https://github.com/lukemelas/EfficientNet-PyTorch +# originally Apache 2.0 License and publicly available at https://github.com/lukemelas/EfficientNet-PyTorch import torch from torch import nn diff --git a/modelscope/models/cv/face_emotion/efficient/utils.py b/modelscope/models/cv/face_emotion/efficient/utils.py index 6cae70fc..c1fcd9b3 100644 --- a/modelscope/models/cv/face_emotion/efficient/utils.py +++ b/modelscope/models/cv/face_emotion/efficient/utils.py @@ -1,5 +1,5 @@ # The implementation here is modified based on EfficientNet, -# originally Apache 2.0 License and publicly avaialbe at https://github.com/lukemelas/EfficientNet-PyTorch +# originally Apache 2.0 License and publicly available at https://github.com/lukemelas/EfficientNet-PyTorch import collections import math diff --git a/modelscope/models/cv/face_human_hand_detection/ghost_pan.py b/modelscope/models/cv/face_human_hand_detection/ghost_pan.py index e00de407..cad6cfe0 100644 --- a/modelscope/models/cv/face_human_hand_detection/ghost_pan.py +++ b/modelscope/models/cv/face_human_hand_detection/ghost_pan.py @@ -1,5 +1,5 @@ # The implementation here is modified based on nanodet, -# originally Apache 2.0 License and publicly avaialbe at https://github.com/RangiLyu/nanodet +# originally Apache 2.0 License and publicly available at https://github.com/RangiLyu/nanodet import math diff --git a/modelscope/models/cv/face_human_hand_detection/nanodet_plus_head.py b/modelscope/models/cv/face_human_hand_detection/nanodet_plus_head.py index 7f5b50ec..cf0cdf3c 100644 --- a/modelscope/models/cv/face_human_hand_detection/nanodet_plus_head.py +++ b/modelscope/models/cv/face_human_hand_detection/nanodet_plus_head.py @@ -1,5 +1,5 @@ # The implementation here is modified based on nanodet, -# originally Apache 2.0 License and publicly avaialbe at https://github.com/RangiLyu/nanodet +# originally Apache 2.0 License and publicly available at https://github.com/RangiLyu/nanodet import math diff --git a/modelscope/models/cv/face_human_hand_detection/one_stage_detector.py b/modelscope/models/cv/face_human_hand_detection/one_stage_detector.py index 0d1cd15d..a8d376fd 100644 --- a/modelscope/models/cv/face_human_hand_detection/one_stage_detector.py +++ b/modelscope/models/cv/face_human_hand_detection/one_stage_detector.py @@ -1,5 +1,5 @@ # The implementation here is modified based on nanodet, -# originally Apache 2.0 License and publicly avaialbe at https://github.com/RangiLyu/nanodet +# originally Apache 2.0 License and publicly available at https://github.com/RangiLyu/nanodet import torch import torch.nn as nn diff --git a/modelscope/models/cv/face_human_hand_detection/shufflenetv2.py b/modelscope/models/cv/face_human_hand_detection/shufflenetv2.py index 7f4dfc2a..5038ef5a 100644 --- a/modelscope/models/cv/face_human_hand_detection/shufflenetv2.py +++ b/modelscope/models/cv/face_human_hand_detection/shufflenetv2.py @@ -1,5 +1,5 @@ # The implementation here is modified based on nanodet, -# originally Apache 2.0 License and publicly avaialbe at https://github.com/RangiLyu/nanodet +# originally Apache 2.0 License and publicly available at https://github.com/RangiLyu/nanodet import torch import torch.nn as nn diff --git a/modelscope/models/cv/face_human_hand_detection/utils.py b/modelscope/models/cv/face_human_hand_detection/utils.py index f989c164..478dd7c2 100644 --- a/modelscope/models/cv/face_human_hand_detection/utils.py +++ b/modelscope/models/cv/face_human_hand_detection/utils.py @@ -1,5 +1,5 @@ # The implementation here is modified based on nanodet, -# originally Apache 2.0 License and publicly avaialbe at https://github.com/RangiLyu/nanodet +# originally Apache 2.0 License and publicly available at https://github.com/RangiLyu/nanodet import torch import torch.nn as nn diff --git a/modelscope/models/cv/face_recognition/align_face.py b/modelscope/models/cv/face_recognition/align_face.py index 0477375a..b5fa95b6 100644 --- a/modelscope/models/cv/face_recognition/align_face.py +++ b/modelscope/models/cv/face_recognition/align_face.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at +The implementation here is modified based on insightface, originally MIT license and publicly available at https://github.com/deepinsight/insightface/blob/master/python-package/insightface/utils/face_align.py """ import cv2 diff --git a/modelscope/models/cv/hand_static/networks.py b/modelscope/models/cv/hand_static/networks.py index 6cf46f5d..b9db7946 100644 --- a/modelscope/models/cv/hand_static/networks.py +++ b/modelscope/models/cv/hand_static/networks.py @@ -1,6 +1,6 @@ """ HandStatic The implementation here is modified based on MobileFaceNet, -originally Apache 2.0 License and publicly avaialbe at https://github.com/xuexingyu24/MobileFaceNet_Tutorial_Pytorch +originally Apache 2.0 License and publicly available at https://github.com/xuexingyu24/MobileFaceNet_Tutorial_Pytorch """ import os diff --git a/modelscope/models/cv/image_semantic_segmentation/ddpm_seg/data_util.py b/modelscope/models/cv/image_semantic_segmentation/ddpm_seg/data_util.py index b84d223d..7fba552d 100644 --- a/modelscope/models/cv/image_semantic_segmentation/ddpm_seg/data_util.py +++ b/modelscope/models/cv/image_semantic_segmentation/ddpm_seg/data_util.py @@ -1,5 +1,5 @@ # The implementation here is adopted from ddpm-segmentation, -# originally Apache 2.0 License and publicly avaialbe at https://github.com/yandex-research/ddpm-segmentation +# originally Apache 2.0 License and publicly available at https://github.com/yandex-research/ddpm-segmentation def get_palette(category): diff --git a/modelscope/models/cv/image_semantic_segmentation/ddpm_seg/feature_extractors.py b/modelscope/models/cv/image_semantic_segmentation/ddpm_seg/feature_extractors.py index 4973b6b7..2cffe43c 100644 --- a/modelscope/models/cv/image_semantic_segmentation/ddpm_seg/feature_extractors.py +++ b/modelscope/models/cv/image_semantic_segmentation/ddpm_seg/feature_extractors.py @@ -1,5 +1,5 @@ # The implementation here is modified based on ddpm-segmentation, -# originally Apache 2.0 License and publicly avaialbe at https://github.com/yandex-research/ddpm-segmentation +# originally Apache 2.0 License and publicly available at https://github.com/yandex-research/ddpm-segmentation # Copyright (c) Alibaba, Inc. and its affiliates. from typing import List diff --git a/modelscope/models/cv/image_semantic_segmentation/ddpm_seg/pixel_classifier.py b/modelscope/models/cv/image_semantic_segmentation/ddpm_seg/pixel_classifier.py index 5af3b3c4..c25fb364 100644 --- a/modelscope/models/cv/image_semantic_segmentation/ddpm_seg/pixel_classifier.py +++ b/modelscope/models/cv/image_semantic_segmentation/ddpm_seg/pixel_classifier.py @@ -1,5 +1,5 @@ # The implementation here is modified based on ddpm-segmentation, -# originally Apache 2.0 License and publicly avaialbe at https://github.com/yandex-research/ddpm-segmentation +# originally Apache 2.0 License and publicly available at https://github.com/yandex-research/ddpm-segmentation # Copyright (c) Alibaba, Inc. and its affiliates. import os diff --git a/modelscope/models/cv/image_semantic_segmentation/ddpm_segmentation_model.py b/modelscope/models/cv/image_semantic_segmentation/ddpm_segmentation_model.py index ea023e1b..1a76e4dc 100644 --- a/modelscope/models/cv/image_semantic_segmentation/ddpm_segmentation_model.py +++ b/modelscope/models/cv/image_semantic_segmentation/ddpm_segmentation_model.py @@ -1,5 +1,5 @@ # The implementation here is modified based on ddpm-segmentation, -# originally Apache 2.0 License and publicly avaialbe at https://github.com/yandex-research/ddpm-segmentation +# originally Apache 2.0 License and publicly available at https://github.com/yandex-research/ddpm-segmentation # Copyright (c) Alibaba, Inc. and its affiliates. import os.path as osp diff --git a/modelscope/models/cv/image_to_image_generation/models/clip.py b/modelscope/models/cv/image_to_image_generation/models/clip.py index d3dd22b4..8cd5e592 100644 --- a/modelscope/models/cv/image_to_image_generation/models/clip.py +++ b/modelscope/models/cv/image_to_image_generation/models/clip.py @@ -1,4 +1,4 @@ -# Part of the implementation is borrowed and modified from CLIP, publicly avaialbe at https://github.com/openai/CLIP. +# Part of the implementation is borrowed and modified from CLIP, publicly available at https://github.com/openai/CLIP. # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. import math diff --git a/modelscope/models/cv/image_to_image_translation/models/clip.py b/modelscope/models/cv/image_to_image_translation/models/clip.py index d3dd22b4..8cd5e592 100644 --- a/modelscope/models/cv/image_to_image_translation/models/clip.py +++ b/modelscope/models/cv/image_to_image_translation/models/clip.py @@ -1,4 +1,4 @@ -# Part of the implementation is borrowed and modified from CLIP, publicly avaialbe at https://github.com/openai/CLIP. +# Part of the implementation is borrowed and modified from CLIP, publicly available at https://github.com/openai/CLIP. # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. import math diff --git a/modelscope/models/cv/image_to_image_translation/ops/diffusion.py b/modelscope/models/cv/image_to_image_translation/ops/diffusion.py index 5ff37dc3..673f1eea 100644 --- a/modelscope/models/cv/image_to_image_translation/ops/diffusion.py +++ b/modelscope/models/cv/image_to_image_translation/ops/diffusion.py @@ -1,5 +1,5 @@ # Part of the implementation is borrowed and modified from latent-diffusion, -# publicly avaialbe at https://github.com/CompVis/latent-diffusion. +# publicly available at https://github.com/CompVis/latent-diffusion. # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. import math diff --git a/modelscope/models/cv/movie_scene_segmentation/model.py b/modelscope/models/cv/movie_scene_segmentation/model.py index f5d3e677..336af3b3 100644 --- a/modelscope/models/cv/movie_scene_segmentation/model.py +++ b/modelscope/models/cv/movie_scene_segmentation/model.py @@ -1,5 +1,5 @@ # The implementation here is modified based on BaSSL, -# originally Apache 2.0 License and publicly avaialbe at https://github.com/kakaobrain/bassl +# originally Apache 2.0 License and publicly available at https://github.com/kakaobrain/bassl import math import os diff --git a/modelscope/models/cv/movie_scene_segmentation/utils/head.py b/modelscope/models/cv/movie_scene_segmentation/utils/head.py index d6468c53..6594af8f 100644 --- a/modelscope/models/cv/movie_scene_segmentation/utils/head.py +++ b/modelscope/models/cv/movie_scene_segmentation/utils/head.py @@ -1,5 +1,5 @@ # The implementation here is modified based on BaSSL, -# originally Apache 2.0 License and publicly avaialbe at https://github.com/kakaobrain/bassl +# originally Apache 2.0 License and publicly available at https://github.com/kakaobrain/bassl import torch.nn as nn import torch.nn.functional as F diff --git a/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py b/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py index 3339e1a3..49155716 100644 --- a/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py +++ b/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py @@ -1,5 +1,5 @@ # The implementation here is modified based on SceneSeg, -# originally Apache 2.0 License and publicly avaialbe at https://github.com/AnyiRao/SceneSeg +# originally Apache 2.0 License and publicly available at https://github.com/AnyiRao/SceneSeg import os import os.path as osp import subprocess diff --git a/modelscope/models/cv/object_detection/mmdet_ms/__init__.py b/modelscope/models/cv/object_detection/mmdet_ms/__init__.py index 3a1fdd0b..4d5f40f7 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/__init__.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/__init__.py @@ -1,5 +1,5 @@ # Implementation in this file is modified based on ViTAE-Transformer -# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet +# Originally Apache 2.0 License and publicly available at https://github.com/ViTAE-Transformer/ViTDet from .backbones import ViT from .dense_heads import AnchorNHead, RPNNHead from .necks import FPNF diff --git a/modelscope/models/cv/object_detection/mmdet_ms/backbones/__init__.py b/modelscope/models/cv/object_detection/mmdet_ms/backbones/__init__.py index c0697d48..89a53028 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/backbones/__init__.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/backbones/__init__.py @@ -1,5 +1,5 @@ # Implementation in this file is modified based on ViTAE-Transformer -# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet +# Originally Apache 2.0 License and publicly available at https://github.com/ViTAE-Transformer/ViTDet from .vit import ViT __all__ = ['ViT'] diff --git a/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/__init__.py b/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/__init__.py index 0d34e996..0f65587b 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/__init__.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/__init__.py @@ -1,5 +1,5 @@ # Implementation in this file is modified based on ViTAE-Transformer -# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet +# Originally Apache 2.0 License and publicly available at https://github.com/ViTAE-Transformer/ViTDet from .anchor_head import AnchorNHead from .rpn_head import RPNNHead diff --git a/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/anchor_head.py b/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/anchor_head.py index d4ea5282..dd489cb4 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/anchor_head.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/anchor_head.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. # Implementation in this file is modified based on ViTAE-Transformer -# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet +# Originally Apache 2.0 License and publicly available at https://github.com/ViTAE-Transformer/ViTDet from mmdet.models.builder import HEADS from mmdet.models.dense_heads import AnchorHead diff --git a/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/rpn_head.py b/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/rpn_head.py index 8e934a5c..8de7c1a4 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/rpn_head.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/rpn_head.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. # Implementation in this file is modified based on ViTAE-Transformer -# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet +# Originally Apache 2.0 License and publicly available at https://github.com/ViTAE-Transformer/ViTDet import copy import torch diff --git a/modelscope/models/cv/object_detection/mmdet_ms/necks/__init__.py b/modelscope/models/cv/object_detection/mmdet_ms/necks/__init__.py index d164987e..244ecbf0 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/necks/__init__.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/necks/__init__.py @@ -1,5 +1,5 @@ # Implementation in this file is modified based on ViTAE-Transformer -# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet +# Originally Apache 2.0 License and publicly available at https://github.com/ViTAE-Transformer/ViTDet from .fpn import FPNF __all__ = ['FPNF'] diff --git a/modelscope/models/cv/object_detection/mmdet_ms/necks/fpn.py b/modelscope/models/cv/object_detection/mmdet_ms/necks/fpn.py index 5f8648ce..5a6a8222 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/necks/fpn.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/necks/fpn.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. # Implementation in this file is modified based on ViTAE-Transformer -# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet +# Originally Apache 2.0 License and publicly available at https://github.com/ViTAE-Transformer/ViTDet import torch.nn as nn import torch.nn.functional as F from mmcv.runner import BaseModule, auto_fp16 diff --git a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/__init__.py b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/__init__.py index 658280df..60bf212d 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/__init__.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/__init__.py @@ -1,5 +1,5 @@ # Implementation in this file is modified based on ViTAE-Transformer -# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet +# Originally Apache 2.0 License and publicly available at https://github.com/ViTAE-Transformer/ViTDet from .bbox_heads import (ConvFCBBoxNHead, Shared2FCBBoxNHead, Shared4Conv1FCBBoxNHead) from .mask_heads import FCNMaskNHead diff --git a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/__init__.py b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/__init__.py index 61d93503..3e0b83ae 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/__init__.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/__init__.py @@ -1,5 +1,5 @@ # Implementation in this file is modified based on ViTAE-Transformer -# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet +# Originally Apache 2.0 License and publicly available at https://github.com/ViTAE-Transformer/ViTDet from .convfc_bbox_head import (ConvFCBBoxNHead, Shared2FCBBoxNHead, Shared4Conv1FCBBoxNHead) diff --git a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py index 726329a1..f0c8c398 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. # Implementation in this file is modified based on ViTAE-Transformer -# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet +# Originally Apache 2.0 License and publicly available at https://github.com/ViTAE-Transformer/ViTDet import torch.nn as nn from mmdet.models.builder import HEADS from mmdet.models.roi_heads.bbox_heads.bbox_head import BBoxHead diff --git a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/__init__.py b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/__init__.py index 043e62a0..b3f46ba8 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/__init__.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/__init__.py @@ -1,5 +1,5 @@ # Implementation in this file is modified based on ViTAE-Transformer -# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet +# Originally Apache 2.0 License and publicly available at https://github.com/ViTAE-Transformer/ViTDet from .fcn_mask_head import FCNMaskNHead __all__ = ['FCNMaskNHead'] diff --git a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/fcn_mask_head.py b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/fcn_mask_head.py index 335f6b8f..ed90249f 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/fcn_mask_head.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/fcn_mask_head.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. # Implementation in this file is modified based on ViTAE-Transformer -# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet +# Originally Apache 2.0 License and publicly available at https://github.com/ViTAE-Transformer/ViTDet from warnings import warn import numpy as np diff --git a/modelscope/models/cv/object_detection/mmdet_ms/utils/__init__.py b/modelscope/models/cv/object_detection/mmdet_ms/utils/__init__.py index 34f240c6..b24d5a3d 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/utils/__init__.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/utils/__init__.py @@ -1,5 +1,5 @@ # Implementation in this file is modified based on ViTAE-Transformer -# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet +# Originally Apache 2.0 License and publicly available at https://github.com/ViTAE-Transformer/ViTDet from .checkpoint import load_checkpoint from .convModule_norm import ConvModule_Norm diff --git a/modelscope/models/cv/object_detection/mmdet_ms/utils/checkpoint.py b/modelscope/models/cv/object_detection/mmdet_ms/utils/checkpoint.py index 7833f592..be153caa 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/utils/checkpoint.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/utils/checkpoint.py @@ -1,6 +1,6 @@ # Copyright (c) Open-MMLab. All rights reserved. # Implementation in this file is modified based on ViTAE-Transformer -# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet +# Originally Apache 2.0 License and publicly available at https://github.com/ViTAE-Transformer/ViTDet import io import os import os.path as osp diff --git a/modelscope/models/cv/object_detection/mmdet_ms/utils/convModule_norm.py b/modelscope/models/cv/object_detection/mmdet_ms/utils/convModule_norm.py index a15780f7..f0e9a768 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/utils/convModule_norm.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/utils/convModule_norm.py @@ -1,5 +1,5 @@ # Implementation in this file is modified based on ViTAE-Transformer -# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet +# Originally Apache 2.0 License and publicly available at https://github.com/ViTAE-Transformer/ViTDet from mmcv.cnn import ConvModule diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/__init__.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/__init__.py index 9406c975..af0a990a 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/__init__.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin """ from .core.bbox.assigners import HungarianAssigner3D diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/assigners/__init__.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/assigners/__init__.py index 50554be6..166ad20d 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/assigners/__init__.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/assigners/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/core/bbox/assigners """ from .hungarian_assigner_3d import HungarianAssigner3D diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py index c47a4d6b..3bae8ddd 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/core/bbox/assigners """ import torch diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/coders/__init__.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/coders/__init__.py index 3fe7191f..036b17f0 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/coders/__init__.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/coders/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/core/bbox/coders """ from .nms_free_coder import NMSFreeCoder diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py index c51e3945..5388e7cd 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/core/bbox/coders """ import torch diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/match_costs/__init__.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/match_costs/__init__.py index d8630b21..daf3de04 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/match_costs/__init__.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/match_costs/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/core/bbox/match_costs """ from .match_cost import BBox3DL1Cost diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/match_costs/match_cost.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/match_costs/match_cost.py index c8faa270..000354fa 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/match_costs/match_cost.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/match_costs/match_cost.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/core/bbox/match_costs """ import torch diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/util.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/util.py index d36a4517..52c1bf04 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/util.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/core/bbox/util.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/core/bbox """ import mmdet3d diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/__init__.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/__init__.py index 222115dd..a5d67907 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/__init__.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/datasets """ from .nuscenes_dataset import CustomNuScenesDataset diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/nuscenes_dataset.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/nuscenes_dataset.py index 60e72914..5069c8e2 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/nuscenes_dataset.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/nuscenes_dataset.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/datasets """ import numpy as np diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/__init__.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/__init__.py index 33c3736b..e86a9ed4 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/__init__.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/datasets/pipelines """ from .loading import LoadMultiViewImageFromMultiSweepsFiles diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/loading.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/loading.py index 6a40eb5c..9443828c 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/loading.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/loading.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/datasets/pipelines """ import mmcv diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/transform_3d.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/transform_3d.py index 23db18ce..7b912318 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/transform_3d.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/datasets/pipelines/transform_3d.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/datasets/pipelines """ import copy diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/backbones/__init__.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/backbones/__init__.py index 5d10ef76..089ceb54 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/backbones/__init__.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/backbones/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/models/backbones """ from .vovnet import VoVNet diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/backbones/vovnet.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/backbones/vovnet.py index 44cdd2a1..9107f9bc 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/backbones/vovnet.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/backbones/vovnet.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/models/backbones """ from collections import OrderedDict diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/__init__.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/__init__.py index 3ab9b185..7f8fb9cb 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/__init__.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/models/dense_heads """ from .petrv2_dednhead import PETRv2DEDNHead diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/depth_net.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/depth_net.py index 1b39d33b..ff56e6a3 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/depth_net.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/depth_net.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on BEVDepth, originally MIT license and publicly avaialbe at +The implementation here is modified based on BEVDepth, originally MIT license and publicly available at https://github.com/Megvii-BaseDetection/BEVDepth/blob/main/bevdepth/layers/backbones/base_lss_fpn.py """ import torch diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/petrv2_dednhead.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/petrv2_dednhead.py index 52aaba2c..657aadab 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/petrv2_dednhead.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/dense_heads/petrv2_dednhead.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/models/dense_heads """ import copy diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/detectors/__init__.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/detectors/__init__.py index 7c38c1eb..b72289ea 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/detectors/__init__.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/detectors/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/models/detectors """ from .petr3d import Petr3D diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/detectors/petr3d.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/detectors/petr3d.py index c8e0a4e1..a997ebac 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/detectors/petr3d.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/detectors/petr3d.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/models/detectors """ import numpy as np diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/necks/__init__.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/necks/__init__.py index 34f7571e..871b3eae 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/necks/__init__.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/necks/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/models/necks """ from .cp_fpn import CPFPN diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/necks/cp_fpn.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/necks/cp_fpn.py index 0d866149..badcbd5e 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/necks/cp_fpn.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/necks/cp_fpn.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/models/necks """ import torch.nn as nn diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/__init__.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/__init__.py index 182c218e..ae88d6d8 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/__init__.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/__init__.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/models/utils """ from .petr_transformer import (PETRDNTransformer, PETRMultiheadAttention, diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py index d16d0d68..7cc0242b 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/petr_transformer.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/models/utils """ import copy diff --git a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/positional_encoding.py b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/positional_encoding.py index 0d117b65..096cd2c6 100644 --- a/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/positional_encoding.py +++ b/modelscope/models/cv/object_detection_3d/depe/mmdet3d_plugin/models/utils/positional_encoding.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on PETR, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on PETR, originally Apache-2.0 license and publicly available at https://github.com/megvii-research/PETR/blob/main/projects/mmdet3d_plugin/models/utils """ import math diff --git a/modelscope/models/cv/object_detection_3d/depe/result_vis.py b/modelscope/models/cv/object_detection_3d/depe/result_vis.py index 4dfd1ed2..d577ab68 100644 --- a/modelscope/models/cv/object_detection_3d/depe/result_vis.py +++ b/modelscope/models/cv/object_detection_3d/depe/result_vis.py @@ -1,5 +1,5 @@ """ -The implementation here is modified based on BEVDet, originally Apache-2.0 license and publicly avaialbe at +The implementation here is modified based on BEVDet, originally Apache-2.0 license and publicly available at https://github.com/HuangJunJie2017/BEVDet/blob/dev2.0/tools/analysis_tools/vis.py """ import argparse diff --git a/modelscope/models/cv/product_segmentation/net.py b/modelscope/models/cv/product_segmentation/net.py index 454c99d8..12bc3057 100644 --- a/modelscope/models/cv/product_segmentation/net.py +++ b/modelscope/models/cv/product_segmentation/net.py @@ -1,5 +1,5 @@ # The implementation here is modified based on F3Net, -# originally Apache 2.0 License and publicly avaialbe at https://github.com/weijun88/F3Net +# originally Apache 2.0 License and publicly available at https://github.com/weijun88/F3Net import torch import torch.nn as nn diff --git a/modelscope/models/cv/salient_detection/models/backbone/Res2Net_v1b.py b/modelscope/models/cv/salient_detection/models/backbone/Res2Net_v1b.py index 40c55773..46c950bf 100644 --- a/modelscope/models/cv/salient_detection/models/backbone/Res2Net_v1b.py +++ b/modelscope/models/cv/salient_detection/models/backbone/Res2Net_v1b.py @@ -1,6 +1,6 @@ # Implementation in this file is modified based on Res2Net-PretrainedModels # Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Public License -# publicly avaialbe at https://github.com/Res2Net/Res2Net-PretrainedModels/blob/master/res2net_v1b.py +# publicly available at https://github.com/Res2Net/Res2Net-PretrainedModels/blob/master/res2net_v1b.py import math import torch diff --git a/modelscope/models/cv/salient_detection/models/backbone/__init__.py b/modelscope/models/cv/salient_detection/models/backbone/__init__.py index 52d5ded1..ab4029e8 100644 --- a/modelscope/models/cv/salient_detection/models/backbone/__init__.py +++ b/modelscope/models/cv/salient_detection/models/backbone/__init__.py @@ -1,6 +1,6 @@ # Implementation in this file is modified based on Res2Net-PretrainedModels # Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Public License -# publicly avaialbe at https://github.com/Res2Net/Res2Net-PretrainedModels/blob/master/res2net_v1b.py +# publicly available at https://github.com/Res2Net/Res2Net-PretrainedModels/blob/master/res2net_v1b.py from .Res2Net_v1b import res2net50_v1b_26w_4s __all__ = ['res2net50_v1b_26w_4s'] diff --git a/modelscope/models/cv/salient_detection/models/utils.py b/modelscope/models/cv/salient_detection/models/utils.py index 292ee914..dac5a7db 100644 --- a/modelscope/models/cv/salient_detection/models/utils.py +++ b/modelscope/models/cv/salient_detection/models/utils.py @@ -1,7 +1,7 @@ # Implementation in this file is modified based on deeplabv3 -# Originally MIT license,publicly avaialbe at https://github.com/fregu856/deeplabv3/blob/master/model/aspp.py +# Originally MIT license,publicly available at https://github.com/fregu856/deeplabv3/blob/master/model/aspp.py # Implementation in this file is modified based on attention-module -# Originally MIT license,publicly avaialbe at https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py +# Originally MIT license,publicly available at https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py import torch import torch.nn as nn diff --git a/modelscope/models/cv/video_inpainting/inpainting.py b/modelscope/models/cv/video_inpainting/inpainting.py index e2af2ad0..7502bf38 100644 --- a/modelscope/models/cv/video_inpainting/inpainting.py +++ b/modelscope/models/cv/video_inpainting/inpainting.py @@ -1,6 +1,6 @@ """ VideoInpaintingProcess The implementation here is modified based on STTN, -originally Apache 2.0 License and publicly avaialbe at https://github.com/researchmm/STTN +originally Apache 2.0 License and publicly available at https://github.com/researchmm/STTN """ import os diff --git a/modelscope/models/cv/video_inpainting/inpainting_model.py b/modelscope/models/cv/video_inpainting/inpainting_model.py index ffecde67..168bc83e 100644 --- a/modelscope/models/cv/video_inpainting/inpainting_model.py +++ b/modelscope/models/cv/video_inpainting/inpainting_model.py @@ -1,6 +1,6 @@ """ VideoInpaintingProcess The implementation here is modified based on STTN, - originally Apache 2.0 License and publicly avaialbe at https://github.com/researchmm/STTN + originally Apache 2.0 License and publicly available at https://github.com/researchmm/STTN """ import math diff --git a/modelscope/models/cv/vop_retrieval/backbone.py b/modelscope/models/cv/vop_retrieval/backbone.py index a2b26e07..4eb82e3f 100644 --- a/modelscope/models/cv/vop_retrieval/backbone.py +++ b/modelscope/models/cv/vop_retrieval/backbone.py @@ -1,5 +1,5 @@ # The implementation here is modified based on HuggingFace, originally Apache 2.0 License -# and publicly avaialbe at https://github.com/huggingface/transformers +# and publicly available at https://github.com/huggingface/transformers # Copyright 2018 The HuggingFace Inc. team. # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. diff --git a/modelscope/models/cv/vop_retrieval/tokenization_clip.py b/modelscope/models/cv/vop_retrieval/tokenization_clip.py index 07bad10c..7f801093 100644 --- a/modelscope/models/cv/vop_retrieval/tokenization_clip.py +++ b/modelscope/models/cv/vop_retrieval/tokenization_clip.py @@ -1,5 +1,5 @@ # The implementation here is modified based on HuggingFace, originally Apache 2.0 License -# and publicly avaialbe at https://github.com/huggingface/transformers +# and publicly available at https://github.com/huggingface/transformers # Copyright 2018 The HuggingFace Inc. team. # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. diff --git a/modelscope/models/multi_modal/diffusion/diffusion.py b/modelscope/models/multi_modal/diffusion/diffusion.py index 286871c6..de944498 100644 --- a/modelscope/models/multi_modal/diffusion/diffusion.py +++ b/modelscope/models/multi_modal/diffusion/diffusion.py @@ -1,5 +1,5 @@ # Part of the implementation is borrowed and modified from latent-diffusion, -# publicly avaialbe at https://github.com/CompVis/latent-diffusion. +# publicly available at https://github.com/CompVis/latent-diffusion. # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. import math diff --git a/modelscope/models/multi_modal/diffusion/unet_generator.py b/modelscope/models/multi_modal/diffusion/unet_generator.py index 539d3996..d6ab1068 100644 --- a/modelscope/models/multi_modal/diffusion/unet_generator.py +++ b/modelscope/models/multi_modal/diffusion/unet_generator.py @@ -1,5 +1,5 @@ # Part of the implementation is borrowed and modified from latent-diffusion, -# publicly avaialbe at https://github.com/CompVis/latent-diffusion. +# publicly available at https://github.com/CompVis/latent-diffusion. # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. import math diff --git a/modelscope/models/multi_modal/diffusion/unet_upsampler_1024.py b/modelscope/models/multi_modal/diffusion/unet_upsampler_1024.py index 38cff6a2..b566a92b 100644 --- a/modelscope/models/multi_modal/diffusion/unet_upsampler_1024.py +++ b/modelscope/models/multi_modal/diffusion/unet_upsampler_1024.py @@ -1,5 +1,5 @@ # Part of the implementation is borrowed and modified from latent-diffusion, -# publicly avaialbe at https://github.com/CompVis/latent-diffusion. +# publicly available at https://github.com/CompVis/latent-diffusion. # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. import math diff --git a/modelscope/models/multi_modal/diffusion/unet_upsampler_256.py b/modelscope/models/multi_modal/diffusion/unet_upsampler_256.py index ca5cd7d6..e8501122 100644 --- a/modelscope/models/multi_modal/diffusion/unet_upsampler_256.py +++ b/modelscope/models/multi_modal/diffusion/unet_upsampler_256.py @@ -1,5 +1,5 @@ # Part of the implementation is borrowed and modified from latent-diffusion, -# publicly avaialbe at https://github.com/CompVis/latent-diffusion. +# publicly available at https://github.com/CompVis/latent-diffusion. # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. import math from functools import partial diff --git a/modelscope/models/multi_modal/dpm_solver_pytorch.py b/modelscope/models/multi_modal/dpm_solver_pytorch.py index f5879955..1beb7a60 100644 --- a/modelscope/models/multi_modal/dpm_solver_pytorch.py +++ b/modelscope/models/multi_modal/dpm_solver_pytorch.py @@ -1,5 +1,5 @@ # The implementation is borrowed and modified from dpm-solver, -# publicly avaialbe at https://github.com/LuChengTHU/dpm-solver. +# publicly available at https://github.com/LuChengTHU/dpm-solver. # Copyright LuChengTHU Authors. # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. # All rights reserved. diff --git a/modelscope/models/multi_modal/multi_stage_diffusion/clip.py b/modelscope/models/multi_modal/multi_stage_diffusion/clip.py index 98727066..594e3c73 100644 --- a/modelscope/models/multi_modal/multi_stage_diffusion/clip.py +++ b/modelscope/models/multi_modal/multi_stage_diffusion/clip.py @@ -1,4 +1,4 @@ -# Part of the implementation is borrowed and modified from CLIP, publicly avaialbe at https://github.com/openai/CLIP. +# Part of the implementation is borrowed and modified from CLIP, publicly available at https://github.com/openai/CLIP. # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. import math diff --git a/modelscope/models/multi_modal/multi_stage_diffusion/gaussian_diffusion.py b/modelscope/models/multi_modal/multi_stage_diffusion/gaussian_diffusion.py index 4e6cbe85..12a14d76 100644 --- a/modelscope/models/multi_modal/multi_stage_diffusion/gaussian_diffusion.py +++ b/modelscope/models/multi_modal/multi_stage_diffusion/gaussian_diffusion.py @@ -1,5 +1,5 @@ # Part of the implementation is borrowed and modified from latent-diffusion, -# publicly avaialbe at https://github.com/CompVis/latent-diffusion. +# publicly available at https://github.com/CompVis/latent-diffusion. # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. import math diff --git a/modelscope/models/multi_modal/multi_stage_diffusion/tokenizer.py b/modelscope/models/multi_modal/multi_stage_diffusion/tokenizer.py index 59d6b304..f8c9775b 100644 --- a/modelscope/models/multi_modal/multi_stage_diffusion/tokenizer.py +++ b/modelscope/models/multi_modal/multi_stage_diffusion/tokenizer.py @@ -1,4 +1,4 @@ -# Part of the implementation is borrowed and modified from CLIP, publicly avaialbe at https://github.com/openai/CLIP. +# Part of the implementation is borrowed and modified from CLIP, publicly available at https://github.com/openai/CLIP. # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. import gzip diff --git a/modelscope/models/multi_modal/multi_stage_diffusion/xglm.py b/modelscope/models/multi_modal/multi_stage_diffusion/xglm.py index 133da50b..8ff39d99 100644 --- a/modelscope/models/multi_modal/multi_stage_diffusion/xglm.py +++ b/modelscope/models/multi_modal/multi_stage_diffusion/xglm.py @@ -1,5 +1,5 @@ # Part of the implementation is borrowed and modified from HuggingFace XGLM, -# publicly avaialbe at https://github.com/huggingface/transformers. +# publicly available at https://github.com/huggingface/transformers. # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. import math diff --git a/modelscope/models/nlp/bert/__init__.py b/modelscope/models/nlp/bert/__init__.py index 6578a0d7..00373f1c 100644 --- a/modelscope/models/nlp/bert/__init__.py +++ b/modelscope/models/nlp/bert/__init__.py @@ -17,6 +17,7 @@ if TYPE_CHECKING: from .token_classification import BertForTokenClassification from .document_segmentation import BertForDocumentSegmentation from .siamese_uie import SiameseUieModel + from .word_alignment import MBertForWordAlignment else: _import_structure = { 'backbone': [ diff --git a/modelscope/models/nlp/bert/word_alignment.py b/modelscope/models/nlp/bert/word_alignment.py new file mode 100644 index 00000000..526ec07d --- /dev/null +++ b/modelscope/models/nlp/bert/word_alignment.py @@ -0,0 +1,173 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn +import torch.utils.checkpoint + +from modelscope.metainfo import Models +from modelscope.models.builder import MODELS +from modelscope.outputs import WordAlignmentOutput +from modelscope.utils import logger as logging +from modelscope.utils.constant import Tasks +from .backbone import BertModel, BertPreTrainedModel + +logger = logging.get_logger() + + +@MODELS.register_module(Tasks.word_alignment, module_name=Models.bert) +class MBertForWordAlignment(BertPreTrainedModel): + r"""MBert Model for the Word Alignment task. + + Code for EMNLP Findings 2022 paper, "Third-Party Aligner for Neural Word Alignments". + https://arxiv.org/abs/2211.04198 + + Parameters: + config (:class:`~modelscope.models.nlp.structbert.SbertConfig`): Model configuration class with + all the parameters of the model. + Initializing with a config file does not load the weights associated with the model, only the + configuration. Check out the :meth:`~transformers.PreTrainedModel.from_pretrained` method to load the model + weights. + """ + _keys_to_ignore_on_load_unexpected = [r'pooler'] + _keys_to_ignore_on_load_missing = [ + r'position_ids', r'predictions.decoder.bias' + ] + + def __init__(self, config, **kwargs): + super().__init__(config) + + if config.is_decoder: + logger.warning( + 'If you want to use `BertForMaskedLM` make sure `config.is_decoder=False` for ' + 'bi-directional self-attention.') + config.num_hidden_layers = kwargs.get('encoder_layers', 8) + + self.bert = BertModel(config, add_pooling_layer=False) + + # Initialize weights and apply final processing + self.post_init() + + def forward( + self, + src_input_ids=None, + src_attention_mask=None, + src_b2w_map=None, + tgt_input_ids=None, + tgt_attention_mask=None, + tgt_b2w_map=None, + threshold=0.001, + bpe_level=False, + ): + """ + Args: src_input_ids: + Indices of source input sequence tokens in the vocabulary. + src_attention_mask: + Source mask to avoid performing attention on padding token indices. + src_b2w_map: + Word order numner of subword in source sequence. + tgt_input_ids: + Indices of target input sequence tokens in the vocabulary. + tgt_attention_mask: + Target mask to avoid performing attention on padding token indices. + tgt_b2w_map: + Word order numner of subword in target sequence. + threshold: + The threshold used to extract alignment. + bpe_level: + Return subword-level alignment or not. + Example: + { + 'src_input_ids': LongTensor([[2478,242,24,4]]), + 'src_attention_mask': BoolTensor([[1,1,1,1]]), + 'src_b2w_map': LongTensor([[0,1,2,3]]), + 'tgt_input_ids': LongTensor([[1056,356,934,263,7]]), + 'tgt_attention_mask': BoolTensor([[1,1,1,1,1]]), + 'tgt_b2w_map': longtensor([[0,1,1,2,3]]), + 'threshold': 0.001, + 'bpe_level': False, + } + Returns `modelscope.outputs.WordAlignmentOutput` + """ + with torch.no_grad(): + src_encoder_out = self.bert( + input_ids=src_input_ids, + attention_mask=src_attention_mask.float(), + head_mask=None, + inputs_embeds=None, + output_hidden_states=True, + ) + tgt_encoder_out = self.bert( + input_ids=tgt_input_ids, + attention_mask=tgt_attention_mask.float(), + head_mask=None, + inputs_embeds=None, + output_hidden_states=True, + ) + + atten_mask_src = (1 - ( + (src_input_ids != 101) & (src_input_ids != 102) + & src_attention_mask)[:, None, None, :].float()) * -10000 + atten_mask_tgt = (1 - ( + (tgt_input_ids != 101) & (tgt_input_ids != 102) + & tgt_attention_mask)[:, None, None, :].float()) * -10000 + + src_align_out = src_encoder_out[0] + tgt_align_out = tgt_encoder_out[0] + + bpe_sim = torch.bmm(src_align_out, tgt_align_out.transpose(1, 2)) + + attention_scores_src = bpe_sim.unsqueeze(1) + atten_mask_tgt + attention_scores_tgt = bpe_sim.unsqueeze(1) + atten_mask_src.transpose( + -1, -2) + + attention_probs_src = nn.Softmax(dim=-1)(attention_scores_src) + attention_probs_tgt = nn.Softmax(dim=-2)(attention_scores_tgt) + + align_matrix = (attention_probs_src > threshold) * ( + attention_probs_tgt > threshold) + align_matrix = align_matrix.squeeze(1) + + len_src = (atten_mask_src == 0).sum(dim=-1).unsqueeze(-1) + len_tgt = (atten_mask_tgt == 0).sum(dim=-1).unsqueeze(-1) + + attention_probs_src = nn.Softmax(dim=-1)( + attention_scores_src / torch.sqrt(len_src.float())) + attention_probs_tgt = nn.Softmax(dim=-2)( + attention_scores_tgt / torch.sqrt(len_tgt.float())) + + word_aligns = [] + + for idx, (line_align, b2w_src, b2w_tgt) in enumerate( + zip(align_matrix, src_b2w_map, tgt_b2w_map)): + aligns = dict() + non_specials = torch.where(line_align) + for i, j in zip(*non_specials): + if not bpe_level: + word_pair = (src_b2w_map[idx][i - 1].item(), + tgt_b2w_map[idx][j - 1].item()) + if word_pair not in aligns: + aligns[word_pair] = bpe_sim[idx][i, j].item() + else: + aligns[word_pair] = max(aligns[word_pair], + bpe_sim[idx][i, j].item()) + else: + aligns[(i.item() - 1, + j.item() - 1)] = bpe_sim[idx][i, j].item() + word_aligns.append(aligns) + + return WordAlignmentOutput(predictions=word_aligns) diff --git a/modelscope/models/nlp/structbert/adv_utils.py b/modelscope/models/nlp/structbert/adv_utils.py index eee44199..4b4c8a7d 100644 --- a/modelscope/models/nlp/structbert/adv_utils.py +++ b/modelscope/models/nlp/structbert/adv_utils.py @@ -96,7 +96,7 @@ def compute_adv_loss(embedding, 1, keepdim=True)[0] is_nan = torch.any(torch.isnan(emb_grad_norm)) if is_nan: - logger.warning('Nan occured when calculating adv loss.') + logger.warning('Nan occurred when calculating adv loss.') return ori_loss emb_grad = emb_grad / (emb_grad_norm + 1e-6) embedding_2 = embedding_1 + adv_grad_factor * emb_grad @@ -154,7 +154,7 @@ def compute_adv_loss_pair(embedding, 1, keepdim=True)[0] is_nan = torch.any(torch.isnan(emb_grad_norm)) if is_nan: - logger.warning('Nan occured when calculating pair adv loss.') + logger.warning('Nan occurred when calculating pair adv loss.') return ori_loss emb_grad = emb_grad / emb_grad_norm embedding_2 = embedding_1 + adv_grad_factor * emb_grad diff --git a/modelscope/outputs/nlp_outputs.py b/modelscope/outputs/nlp_outputs.py index 8ed8c94a..a48e3b0e 100644 --- a/modelscope/outputs/nlp_outputs.py +++ b/modelscope/outputs/nlp_outputs.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Optional, Tuple, Union +from typing import List, Optional, Tuple, Union import numpy as np @@ -328,6 +328,14 @@ class TextErrorCorrectionOutput(ModelOutputBase): predictions: np.ndarray = None +@dataclass +class WordAlignmentOutput(ModelOutputBase): + """The output class for word alignment models. + """ + + predictions: List = None + + @dataclass class TextGenerationModelOutput(ModelOutputBase): """The output class for text generation models. diff --git a/modelscope/outputs/outputs.py b/modelscope/outputs/outputs.py index eaef02c1..f781875c 100644 --- a/modelscope/outputs/outputs.py +++ b/modelscope/outputs/outputs.py @@ -434,7 +434,7 @@ TASK_OUTPUTS = { # "00:00:0.690", # ], # "output_video": "path_to_rendered_video" , this is optional - # and is only avaialbe when the "render" option is enabled. + # and is only available when the "render" option is enabled. # } Tasks.body_3d_keypoints: [ OutputKeys.KEYPOINTS, OutputKeys.TIMESTAMPS, OutputKeys.OUTPUT_VIDEO @@ -620,7 +620,7 @@ TASK_OUTPUTS = { # "masks": [np.array # 3D array with shape [frame_num, height, width]] # "timestamps": ["hh:mm:ss", "hh:mm:ss", "hh:mm:ss"] # "output_video": "path_to_rendered_video" , this is optional - # and is only avaialbe when the "render" option is enabled. + # and is only available when the "render" option is enabled. # } Tasks.referring_video_object_segmentation: [ OutputKeys.MASKS, OutputKeys.TIMESTAMPS, OutputKeys.OUTPUT_VIDEO @@ -709,6 +709,11 @@ TASK_OUTPUTS = { # "output": "我想吃苹果" # } Tasks.text_error_correction: [OutputKeys.OUTPUT], + # word_alignment result for a single sample + # { + # "output": "0-0 1-3 2-4 3-1 4-2 5-5" + # } + Tasks.word_alignment: [OutputKeys.OUTPUT], Tasks.sentence_embedding: [OutputKeys.TEXT_EMBEDDING, OutputKeys.SCORES], Tasks.text_ranking: [OutputKeys.SCORES], diff --git a/modelscope/pipelines/audio/ans_pipeline.py b/modelscope/pipelines/audio/ans_pipeline.py index e55f613e..c12c9817 100644 --- a/modelscope/pipelines/audio/ans_pipeline.py +++ b/modelscope/pipelines/audio/ans_pipeline.py @@ -48,7 +48,8 @@ class ANSPipeline(Pipeline): if len(data1.shape) > 1: data1 = data1[:, 0] if fs != self.SAMPLE_RATE: - data1 = librosa.resample(data1, fs, self.SAMPLE_RATE) + data1 = librosa.resample( + data1, orig_sr=fs, target_sr=self.SAMPLE_RATE) data1 = audio_norm(data1) data = data1.astype(np.float32) inputs = np.reshape(data, [1, data.shape[0]]) diff --git a/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py b/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py index 49897281..39195bcd 100644 --- a/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py +++ b/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py @@ -1,5 +1,5 @@ # The implementation here is modified based on MTTR, -# originally Apache 2.0 License and publicly avaialbe at https://github.com/mttr2021/MTTR +# originally Apache 2.0 License and publicly available at https://github.com/mttr2021/MTTR # Copyright (c) Alibaba, Inc. and its affiliates. import tempfile diff --git a/modelscope/pipelines/cv/tbs_detection_pipeline.py b/modelscope/pipelines/cv/tbs_detection_pipeline.py new file mode 100644 index 00000000..58831846 --- /dev/null +++ b/modelscope/pipelines/cv/tbs_detection_pipeline.py @@ -0,0 +1,149 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import colorsys +import os +from typing import Any, Dict + +import cv2 +import numpy as np +import torch +from PIL import Image, ImageDraw, ImageFile, ImageFont + +from modelscope.metainfo import Pipelines +from modelscope.outputs import OutputKeys +from modelscope.pipelines.base import Input, Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.pipelines.cv.tbs_detection_utils.utils import (_get_anchors, + generate, + post_process) +from modelscope.preprocessors import LoadImage +from modelscope.utils.constant import Tasks +from modelscope.utils.logger import get_logger + +ImageFile.LOAD_TRUNCATED_IMAGES = True + +logger = get_logger() + +__all__ = ['TBSDetectionPipeline'] + + +@PIPELINES.register_module( + Tasks.image_object_detection, module_name=Pipelines.tbs_detection) +class TBSDetectionPipeline(Pipeline): + """ TBS Detection Pipeline. + + Example: + + ```python + >>> from modelscope.pipelines import pipeline + + >>> tbs_detect = pipeline(Tasks.image_object_detection, model='landingAI/LD_CytoBrainCerv') + >>> tbs_detect(input='data/test/images/tbs_detection.jpg') + { + "boxes": [ + [ + 446.9007568359375, + 36.374977111816406, + 907.0919189453125, + 337.439208984375 + ], + [ + 454.3310241699219, + 336.08477783203125, + 921.26904296875, + 641.7871704101562 + ] + ], + "labels": [ + ["Positive"] + ], + "scores": [ + 0.9296008944511414, + 0.9260380268096924 + ] + } + >>> # + ``` + """ + _defaults = { + 'class_names': ['positive'], + 'model_image_size': (416, 416, 3), + 'confidence': 0.5, + 'iou': 0.3, + } + + @classmethod + def get_defaults(cls, n): + if n in cls._defaults: + return cls._defaults[n] + else: + return "Unrecognized attribute name '" + n + "'" + + def __init__(self, model: str, **kwargs): + """ + model: model id on modelscope hub. + """ + super().__init__(model=model, auto_collate=False, **kwargs) + self.__dict__.update(self._defaults) + self.anchors = _get_anchors(self) + generate(self) + + def preprocess(self, input: Input) -> Dict[str, Any]: + """ + Detect objects (bounding boxes) in the image(s) passed as inputs. + + Args: + input (`Image` or `List[Image]`): + The pipeline handles three types of images: + + - A string containing an HTTP(S) link pointing to an image + - A string containing a local path to an image + - An image loaded in PIL or opencv directly + + The pipeline accepts either a single image or a batch of images. Images in a batch must all be in the + same format. + + + Return: + A dictionary of result or a list of dictionary of result. If the input is an image, a dictionary + is returned. If input is a list of image, a list of dictionary is returned. + + The dictionary contain the following keys: + + - **scores** (`List[float]`) -- The detection score for each card in the image. + - **boxes** (`List[float]) -- The bounding boxe [x1, y1, x2, y2] of detected objects in in image's + original size. + - **labels** (`List[str]`, optional) -- The boxes's class_names of detected object in image. + """ + img = LoadImage.convert_to_ndarray(input) + img = img.astype(np.float) + result = {'img': img, 'img_path': input} + return result + + def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: + img = input['img'].astype(np.uint8) + img = cv2.resize(img, (416, 416)) + img = img.astype(np.float32) + tmp_inp = np.transpose(img / 255.0, (2, 0, 1)) + tmp_inp = torch.from_numpy(tmp_inp).type(torch.FloatTensor) + img = torch.unsqueeze(tmp_inp, dim=0) + model_path = os.path.join(self.model, 'pytorch_yolov4.pt') + model = torch.load(model_path) + outputs = model(img.cuda()) + result = {'data': outputs, 'img_path': input['img_path']} + return result + + def postprocess(self, input: Dict[str, Any], *args, + **kwargs) -> Dict[str, Any]: + + bboxes, scores = post_process(self, input['data'], input['img_path']) + + if bboxes is None: + outputs = {OutputKeys.SCORES: [], OutputKeys.BOXES: []} + return outputs + outputs = { + OutputKeys.SCORES: scores.tolist(), + OutputKeys.LABELS: ['Positive'], + OutputKeys.BOXES: bboxes + } + return outputs diff --git a/modelscope/pipelines/cv/tbs_detection_utils/__init__.py b/modelscope/pipelines/cv/tbs_detection_utils/__init__.py new file mode 100644 index 00000000..21b405d8 --- /dev/null +++ b/modelscope/pipelines/cv/tbs_detection_utils/__init__.py @@ -0,0 +1 @@ +import os diff --git a/modelscope/pipelines/cv/tbs_detection_utils/utils.py b/modelscope/pipelines/cv/tbs_detection_utils/utils.py new file mode 100644 index 00000000..ab2eb8cc --- /dev/null +++ b/modelscope/pipelines/cv/tbs_detection_utils/utils.py @@ -0,0 +1,397 @@ +from __future__ import division +import colorsys +import os + +import numpy as np +import pandas as pd +import torch +import torch.nn as nn +from matplotlib import pyplot as plt +from PIL import Image +from torchvision.ops.boxes import batched_nms, nms + +plt.switch_backend('Agg') + + +class DecodeBox(nn.Module): + + def __init__(self, anchors, num_classes, img_size): + super(DecodeBox, self).__init__() + self.anchors = anchors + self.num_classes = num_classes + self.img_size = img_size + + self.num_anchors = len(anchors) + self.bbox_attrs = 5 + num_classes + + def forward(self, input): + # input为bs,3*(1+4+num_classes),13,13 + # 一共多少张图片 + batch_size = input.size(0) + # 13,13 + input_height = input.size(2) + input_width = input.size(3) + + # 计算步长 + # 每一个特征点对应原来的图片上多少个像素点 + # 如果特征层为13x13的话,一个特征点就对应原来的图片上的32个像素点 + # 416/13 = 32 + stride_h = self.img_size[1] / input_height + stride_w = self.img_size[0] / input_width + + # 把先验框的尺寸调整成特征层大小的形式 + # 计算出先验框在特征层上对应的宽高 + scaled_anchors = [(anchor_width / stride_w, anchor_height / stride_h) + for anchor_width, anchor_height in self.anchors] + + # bs,3*(5+num_classes),13,13 -> bs,3,13,13,(5+num_classes) + prediction = input.view(batch_size, self.num_anchors, self.bbox_attrs, + input_height, + input_width).permute(0, 1, 3, 4, + 2).contiguous() + + # 先验框的中心位置的调整参数 + x = torch.sigmoid(prediction[..., 0]) + y = torch.sigmoid(prediction[..., 1]) + # 先验框的宽高调整参数 + w = prediction[..., 2] # Width + h = prediction[..., 3] # Height + + # 获得置信度,是否有物体 + conf = torch.sigmoid(prediction[..., 4]) + # 种类置信度 + pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. + FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor + LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor + + # 生成网格,先验框中心,网格左上角 batch_size,3,13,13 + grid_x = torch.linspace(0, input_width - 1, input_width).repeat( + input_width, 1).repeat(batch_size * self.num_anchors, 1, + 1).view(x.shape).type(FloatTensor) + grid_y = torch.linspace(0, input_height - 1, input_height).repeat( + input_height, 1).t().repeat(batch_size * self.num_anchors, 1, + 1).view(y.shape).type(FloatTensor) + + # 生成先验框的宽高 + anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0])) + anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1])) + anchor_w = anchor_w.repeat(batch_size, 1).repeat( + 1, 1, input_height * input_width).view(w.shape) + anchor_h = anchor_h.repeat(batch_size, 1).repeat( + 1, 1, input_height * input_width).view(h.shape) + # 计算调整后的先验框中心与宽高 + pred_boxes = FloatTensor(prediction[..., :4].shape) + pred_boxes[..., 0] = x.data + grid_x + pred_boxes[..., 1] = y.data + grid_y + pred_boxes[..., 2] = torch.exp(w.data) * anchor_w + pred_boxes[..., 3] = torch.exp(h.data) * anchor_h + + # 用于将输出调整为相对于416x416的大小 + _scale = torch.Tensor([stride_w, stride_h] * 2).type(FloatTensor) + output = torch.cat((pred_boxes.view(batch_size, -1, 4) * _scale, + conf.view(batch_size, -1, 1), + pred_cls.view(batch_size, -1, self.num_classes)), + -1) + + return output.data + + +# ------------------------------------------------- # +# 输入图片的尺寸为正方形,而数据集中的图片一般为长方形,粗暴的resize会使得图片失真,采用letterbox可以较好的解决这个问题 +# 该方法可以保持图片的长宽比例,剩下的部分采用灰色填充 +# ------------------------------------------------- # +def letterbox_image(image, size): + iw, ih = image.size + w, h = size + scale = min(w / iw, h / ih) + nw = int(iw * scale) + nh = int(ih * scale) + + image = image.resize((nw, nh), Image.BICUBIC) + new_image = Image.new('RGB', size, (128, 128, 128)) + new_image.paste(image, ((w - nw) // 2, (h - nh) // 2)) + + return new_image + + +# ------------------------------------------------- # +# 对模型输出的box信息(x, y, w, h)进行校正,输出基于原图坐标系的box信息(x_min, y_min, x_max, y_max) +# ------------------------------------------------- # +def yolo_correct_boxes(top, left, bottom, right, input_shape, image_shape): + """ + :param top: 模型输出的box中心坐标信息,范围0~1 + :param left: 模型输出的box中心坐标信息,范围0~1 + :param bottom: 模型输出的box长宽信息,范围0~1 + :param right: 模型输出的box长宽信息,范围0~1 + :param input_shape: 模型的图像尺寸, 长宽均是32倍数 + :param image_shape: 原图尺寸 + :return: 基于原图坐标系的box信息(实际坐标值,非比值) + """ + new_shape = image_shape * np.min(input_shape / image_shape) + offset = (input_shape - new_shape) / 2. / input_shape + scale = input_shape / new_shape + box_yx = np.concatenate( + ((top + bottom) / 2, (left + right) / 2), axis=-1) / input_shape + box_hw = np.concatenate( + (bottom - top, right - left), axis=-1) / input_shape + box_yx = (box_yx - offset) * scale + box_hw *= scale + box_mins = box_yx - (box_hw / 2.) + box_maxes = box_yx + (box_hw / 2.) + boxes = [ + box_mins[:, 0:1], box_mins[:, 1:2], box_maxes[:, 0:1], box_maxes[:, + 1:2] + ] + boxes = np.concatenate(boxes, axis=-1) + boxes *= np.concatenate([image_shape, image_shape], axis=-1) + + return boxes + + +# ------------------------------------------------- # +# 计算IOU +# ------------------------------------------------- # +def bbox_iou(box1, box2, x1y1x2y2=True): + if not x1y1x2y2: + b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 + b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 + b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 + b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 + else: + b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, + 2], box1[:, + 3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, + 2], box2[:, + 3] + + inter_rect_x1 = torch.max(b1_x1, b2_x1) + inter_rect_y1 = torch.max(b1_y1, b2_y1) + inter_rect_x2 = torch.min(b1_x2, b2_x2) + inter_rect_y2 = torch.min(b1_y2, b2_y2) + + inter_area = torch.clamp( + inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp( + inter_rect_y2 - inter_rect_y1 + 1, min=0) + b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) + b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) + + iou = inter_area / (b1_area + b2_area - inter_area + 1e-16) + + return iou + + +# ------------------------------------------------- # +# 非极大值抑制 +# ------------------------------------------------- # +def non_max_suppression(prediction, + num_classes, + conf_thres=0.5, + nms_thres=0.4): + # 求左上角和右下角 + box_corner = prediction.new(prediction.shape) + box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 + box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 + box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 + box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 + prediction[:, :, :4] = box_corner[:, :, :4] + + output = [None for _ in range(len(prediction))] + for image_i, image_pred in enumerate(prediction): + # 获得种类及其置信度 + class_conf, class_pred = torch.max( + image_pred[:, 5:5 + num_classes], 1, keepdim=True) + # 利用置信度进行第一轮筛选 + score = image_pred[:, 4] * class_conf[:, 0] + conf_mask = (score >= conf_thres).squeeze() + + image_pred = image_pred[conf_mask] + class_conf = class_conf[conf_mask] + class_pred = class_pred[conf_mask] + if not image_pred.size(0): + continue + # 获得的内容为(x1, y1, x2, y2, obj_conf, class_conf, class_pred) + detections = torch.cat( + (image_pred[:, :5], class_conf.float(), class_pred.float()), 1) + + # 获得种类 + unique_labels = detections[:, -1].cpu().unique() + + if prediction.is_cuda: + unique_labels = unique_labels.cuda() + detections = detections.cuda() + + for c in unique_labels: + # 获得某一类初步筛选后全部的预测结果 + detections_class = detections[detections[:, -1] == c] + + # ------------------------------------------ # + # 使用官方自带的非极大抑制会速度更快一些! + # ------------------------------------------ # + keep = nms(detections_class[:, :4], + detections_class[:, 4] * detections_class[:, 5], + nms_thres) + max_detections = detections_class[keep] + + output[image_i] = max_detections if output[ + image_i] is None else torch.cat( + [output[image_i], max_detections]) + + return output + + +# ------------------------------------------------- # +# 合并boxes +# ------------------------------------------------- # +def merge_bboxes(bboxes, cutx, cuty): + merge_bbox = [] + for i in range(len(bboxes)): + for box in bboxes[i]: + tmp_box = [] + x1, y1, x2, y2 = box[0], box[1], box[2], box[3] + + if i == 0: + if y1 > cuty or x1 > cutx: + continue + if y2 >= cuty and y1 <= cuty: + y2 = cuty + if y2 - y1 < 5: + continue + if x2 >= cutx and x1 <= cutx: + x2 = cutx + if x2 - x1 < 5: + continue + + if i == 1: + if y2 < cuty or x1 > cutx: + continue + + if y2 >= cuty and y1 <= cuty: + y1 = cuty + if y2 - y1 < 5: + continue + + if x2 >= cutx and x1 <= cutx: + x2 = cutx + if x2 - x1 < 5: + continue + + if i == 2: + if y2 < cuty or x2 < cutx: + continue + + if y2 >= cuty and y1 <= cuty: + y1 = cuty + if y2 - y1 < 5: + continue + + if x2 >= cutx and x1 <= cutx: + x1 = cutx + if x2 - x1 < 5: + continue + + if i == 3: + if y1 > cuty or x2 < cutx: + continue + + if y2 >= cuty and y1 <= cuty: + y2 = cuty + if y2 - y1 < 5: + continue + + if x2 >= cutx and x1 <= cutx: + x1 = cutx + if x2 - x1 < 5: + continue + + tmp_box.append(x1) + tmp_box.append(y1) + tmp_box.append(x2) + tmp_box.append(y2) + tmp_box.append(box[-1]) + merge_bbox.append(tmp_box) + return merge_bbox + + +# ---------------------------------------------------# +# 获得所有的先验框 +# ---------------------------------------------------# +def _get_anchors(self): + anchors_path = os.path.join(self.model, 'model_data/yolo_anchors.txt') + anchors_path = os.path.expanduser(anchors_path) + with open(anchors_path) as f: + lines = f.readlines() + anchors = [line.strip().split(',') for line in lines] + return np.array(anchors, dtype='float').reshape([-1, 3, 2])[::-1, :, :] + + +def generate(self): + self.yolo_decodes = [] + for i in range(len(self.anchors)): + self.yolo_decodes.append( + DecodeBox(self.anchors[i], len(self.class_names), + self.model_image_size[:2][::-1])) + + # 画框设置不同的颜色 + hsv_tuples = [(x / len(self.class_names), 1., 1.) + for x in range(len(self.class_names))] + self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) + self.colors = list( + map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), + self.colors)) + + +# --------------------------------------------------- # +# 后处理 +# --------------------------------------------------- # +def post_process(self, outputs, img_path): + new_boxes = [] + output_list = [] + top_confs = torch.empty(0) + for i in range(3): + output_list.append(self.yolo_decodes[i](outputs[i])) + output = torch.cat(output_list, 1) + batch_detections = non_max_suppression( + output, + len(self.class_names), + conf_thres=self.confidence, + nms_thres=self.iou) + + for j, batch_detection in enumerate(batch_detections): + if batch_detection is None: + continue + try: + batch_detection = batch_detection.cpu().numpy() + except Exception: + return + + image = Image.open(img_path) + image_shape = np.array(np.shape(image)[0:2]) + top_index = batch_detection[:, + 4] * batch_detection[:, + 5] > self.confidence + top_conf = batch_detection[top_index, 4] + top_class = batch_detection[top_index, 5] + top_confs = top_conf * top_class + top_label = np.array(batch_detection[top_index, -1], np.int32) + top_bboxes = np.array(batch_detection[top_index, :4]) + top_xmin = np.expand_dims(top_bboxes[:, 0], -1) + top_ymin = np.expand_dims(top_bboxes[:, 1], -1) + top_xmax = np.expand_dims(top_bboxes[:, 2], -1) + top_ymax = np.expand_dims(top_bboxes[:, 3], -1) + + # 去掉灰条 + boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, + np.array(self.model_image_size[:2]), + image_shape) + + for i, c in enumerate(top_label): + top, left, bottom, right = boxes[i] + top = max(0, round(top, 2)) + + left = max(0, round(left, 2)) + bottom = min(image.size[1], round(bottom, 2)) + right = min(image.size[0], round(right, 2)) + new_boxes.append([top, left, bottom, right]) + + return new_boxes, top_confs diff --git a/modelscope/pipelines/cv/video_deinterlace_pipeline.py b/modelscope/pipelines/cv/video_deinterlace_pipeline.py index e30a6678..7df69f8c 100644 --- a/modelscope/pipelines/cv/video_deinterlace_pipeline.py +++ b/modelscope/pipelines/cv/video_deinterlace_pipeline.py @@ -1,5 +1,5 @@ # The implementation here is modified based on RealBasicVSR, -# originally Apache 2.0 License and publicly avaialbe at +# originally Apache 2.0 License and publicly available at # https://github.com/ckkelvinchan/RealBasicVSR/blob/master/inference_realbasicvsr.py import math import os diff --git a/modelscope/pipelines/cv/video_super_resolution_pipeline.py b/modelscope/pipelines/cv/video_super_resolution_pipeline.py index 717ece43..b4d9d8c8 100644 --- a/modelscope/pipelines/cv/video_super_resolution_pipeline.py +++ b/modelscope/pipelines/cv/video_super_resolution_pipeline.py @@ -1,5 +1,5 @@ # The implementation here is modified based on RealBasicVSR, -# originally Apache 2.0 License and publicly avaialbe at +# originally Apache 2.0 License and publicly available at # https://github.com/ckkelvinchan/RealBasicVSR/blob/master/inference_realbasicvsr.py import math import os diff --git a/modelscope/pipelines/nlp/__init__.py b/modelscope/pipelines/nlp/__init__.py index 4d34b59e..cc8487fe 100644 --- a/modelscope/pipelines/nlp/__init__.py +++ b/modelscope/pipelines/nlp/__init__.py @@ -25,6 +25,7 @@ if TYPE_CHECKING: from .summarization_pipeline import SummarizationPipeline from .translation_quality_estimation_pipeline import TranslationQualityEstimationPipeline from .text_error_correction_pipeline import TextErrorCorrectionPipeline + from .word_alignment_pipeline import WordAlignmentPipeline from .text_generation_pipeline import TextGenerationPipeline, TextGenerationT5Pipeline from .fid_dialogue_pipeline import FidDialoguePipeline from .token_classification_pipeline import TokenClassificationPipeline @@ -71,6 +72,7 @@ else: ['TableQuestionAnsweringPipeline'], 'text_classification_pipeline': ['TextClassificationPipeline'], 'text_error_correction_pipeline': ['TextErrorCorrectionPipeline'], + 'word_alignment_pipeline': ['WordAlignmentPipeline'], 'text_generation_pipeline': ['TextGenerationPipeline', 'TextGenerationT5Pipeline'], 'fid_dialogue_pipeline': ['FidDialoguePipeline'], diff --git a/modelscope/pipelines/nlp/word_alignment_pipeline.py b/modelscope/pipelines/nlp/word_alignment_pipeline.py new file mode 100644 index 00000000..8513f28e --- /dev/null +++ b/modelscope/pipelines/nlp/word_alignment_pipeline.py @@ -0,0 +1,68 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +from typing import Any, Dict, Optional, Union + +import numpy as np + +from modelscope.metainfo import Pipelines +from modelscope.models import Model +from modelscope.outputs import OutputKeys +from modelscope.pipelines.base import Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.preprocessors import WordAlignmentPreprocessor +from modelscope.utils.constant import Tasks + +__all__ = ['WordAlignmentPipeline'] + + +@PIPELINES.register_module( + Tasks.word_alignment, module_name=Pipelines.word_alignment) +class WordAlignmentPipeline(Pipeline): + + def __init__(self, + model: Union[Model, str], + preprocessor: WordAlignmentPreprocessor = None, + config_file: str = None, + device: str = 'gpu', + auto_collate=True, + sequence_length=128, + **kwargs): + """Use `model` and `preprocessor` to create a nlp text dual encoder then generates the text representation. + Args: + model (str or Model): Supply either a local model dir which supported the WS task, + or a model id from the model hub, or a torch model instance. + preprocessor (Preprocessor): A WordAlignmentPreprocessor. + kwargs (dict, `optional`): + Extra kwargs passed into the preprocessor's constructor. + Example: + >>> from modelscope.pipelines import pipeline + >>> from modelscope.utils.constant import Tasks + >>> model_id = 'damo/Third-Party-Supervised-Word-Aligner-mBERT-base-zhen' + >>> input = {"sentence_pair": '贝利 在 墨西哥 推出 自传 。||| pele promotes autobiography in mexico .'} + >>> pipeline_ins = pipeline(Tasks.word_alignment, model=model_id) + >>> print(pipeline_ins(input)['output']) + """ + super().__init__( + model=model, + preprocessor=preprocessor, + config_file=config_file, + device=device, + auto_collate=auto_collate) + if preprocessor is None: + self.preprocessor = WordAlignmentPreprocessor.from_pretrained( + self.model.model_dir, + sequence_length=sequence_length, + **kwargs) + + def forward(self, inputs: Dict[str, Any], + **forward_params) -> Dict[str, Any]: + return self.model(**inputs, **forward_params) + + def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + + align = [] + for k in inputs[0][0].keys(): + align.append(f'{k[0]}-{k[1]}') + align = ' '.join(align) + + return {OutputKeys.OUTPUT: align} diff --git a/modelscope/preprocessors/__init__.py b/modelscope/preprocessors/__init__.py index 3a13828b..1cd25e50 100644 --- a/modelscope/preprocessors/__init__.py +++ b/modelscope/preprocessors/__init__.py @@ -30,9 +30,9 @@ if TYPE_CHECKING: TextGenerationSentencePiecePreprocessor, TokenClassificationTransformersPreprocessor, TextErrorCorrectionPreprocessor, TextGenerationT5Preprocessor, - TextGenerationTransformersPreprocessor, Tokenize, - WordSegmentationBlankSetToLabelPreprocessor, CodeGeeXPreprocessor, - MGLMSummarizationPreprocessor, + WordAlignmentPreprocessor, TextGenerationTransformersPreprocessor, + Tokenize, WordSegmentationBlankSetToLabelPreprocessor, + CodeGeeXPreprocessor, MGLMSummarizationPreprocessor, ZeroShotClassificationTransformersPreprocessor, TextGenerationJiebaPreprocessor, SentencePiecePreprocessor, DialogIntentPredictionPreprocessor, DialogModelingPreprocessor, diff --git a/modelscope/preprocessors/movie_scene_segmentation/transforms.py b/modelscope/preprocessors/movie_scene_segmentation/transforms.py index 5b84003c..32add4d6 100644 --- a/modelscope/preprocessors/movie_scene_segmentation/transforms.py +++ b/modelscope/preprocessors/movie_scene_segmentation/transforms.py @@ -1,5 +1,5 @@ # The implementation here is modified based on BaSSL, -# originally Apache 2.0 License and publicly avaialbe at https://github.com/kakaobrain/bassl +# originally Apache 2.0 License and publicly available at https://github.com/kakaobrain/bassl import numbers import os.path as osp import random diff --git a/modelscope/preprocessors/nlp/__init__.py b/modelscope/preprocessors/nlp/__init__.py index 23fa9f94..f0660374 100644 --- a/modelscope/preprocessors/nlp/__init__.py +++ b/modelscope/preprocessors/nlp/__init__.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING from modelscope.utils.import_utils import LazyImportModule if TYPE_CHECKING: + from .word_alignment_preprocessor import WordAlignmentPreprocessor from .text_error_correction import TextErrorCorrectionPreprocessor from .text_generation_preprocessor import TextGenerationJiebaPreprocessor from .bert_seq_cls_tokenizer import Tokenize @@ -66,6 +67,9 @@ else: 'text_error_correction': [ 'TextErrorCorrectionPreprocessor', ], + 'word_alignment_preprocessor': [ + 'WordAlignmentPreprocessor', + ], 'mglm_summarization_preprocessor': ['MGLMSummarizationPreprocessor'], 'token_classification_thai_preprocessor': [ 'NERPreprocessorThai', diff --git a/modelscope/preprocessors/nlp/word_alignment_preprocessor.py b/modelscope/preprocessors/nlp/word_alignment_preprocessor.py new file mode 100644 index 00000000..390deb40 --- /dev/null +++ b/modelscope/preprocessors/nlp/word_alignment_preprocessor.py @@ -0,0 +1,131 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import itertools +import os +import os.path as osp +from typing import Any, Dict, Optional, Union + +import numpy as np +import torch + +from modelscope.metainfo import Preprocessors +from modelscope.preprocessors.base import Preprocessor +from modelscope.preprocessors.builder import PREPROCESSORS +from modelscope.utils.constant import Fields, ModeKeys +from modelscope.utils.hub import get_model_type +from modelscope.utils.logger import get_logger +from .transformers_tokenizer import NLPTokenizer + + +@PREPROCESSORS.register_module( + Fields.nlp, module_name=Preprocessors.word_alignment) +class WordAlignmentPreprocessor(Preprocessor): + """The tokenizer preprocessor used in word alignment . + """ + + def __init__(self, + model_dir: str, + sequence_pair='sentence_pair', + mode=ModeKeys.INFERENCE, + use_fast: bool = False, + sequence_length: int = None, + **kwargs): + """The preprocessor for word alignment task. + + Args: + model_dir: The model dir used to initialize the tokenizer. + sequence_pair: The key of the sequence pair. + mode: The mode for the preprocessor. + use_fast: Use the fast tokenizer or not. + sequence_length: The max sequence length which the model supported, + will be passed into tokenizer as the 'max_length' param. + **kwargs: Extra args input. + {sequence_length: The sequence length which the model supported.} + """ + self.sequence_pair = sequence_pair + + kwargs[ + 'sequence_length'] = sequence_length if sequence_length is not None else kwargs.get( + 'max_length', 128) + self.max_length = kwargs['sequence_length'] + kwargs.pop('max_length', None) + model_type = None + + if model_dir is not None: + model_type = get_model_type(model_dir) + self.nlp_tokenizer = NLPTokenizer( + model_dir, model_type, use_fast=use_fast, tokenize_kwargs=kwargs) + super().__init__(mode=mode) + + def __call__(self, data: Dict, **kwargs) -> Dict[str, Any]: + """process the raw input data + + Args: + data Dict: + Example: + {"sentence_pair": "贝利 在 墨西哥 推出 自传 。||| pele promotes autobiography in mexico ."} + Returns: + Dict[str, Any]: the preprocessed data + """ + sentence_pair = data[self.sequence_pair] + source_sentences, target_sentences = sentence_pair.split('|||') + # src_lang = data.get("src_lang", 'en_XX') + # tgt_lang = data.get("tgt_lang", 'en_XX') + if 'return_tensors' not in kwargs: + kwargs[ + 'return_tensors'] = 'pt' if self.mode == ModeKeys.INFERENCE else None + + sent_src, sent_tgt = source_sentences.strip().split( + ), target_sentences.strip().split() + + token_src = [ + self.nlp_tokenizer.tokenizer.tokenize(word) for word in sent_src + ] + token_tgt = [ + self.nlp_tokenizer.tokenizer.tokenize(word) for word in sent_tgt + ] + wid_src = [ + self.nlp_tokenizer.tokenizer.convert_tokens_to_ids(x) + for x in token_src + ] + wid_tgt = [ + self.nlp_tokenizer.tokenizer.convert_tokens_to_ids(x) + for x in token_tgt + ] + + ids_tgt = self.nlp_tokenizer.tokenizer.prepare_for_model( + list(itertools.chain(*wid_tgt)), + return_tensors='pt', + max_length=self.max_length, + prepend_batch_axis=True)['input_ids'] + ids_src = self.nlp_tokenizer.tokenizer.prepare_for_model( + list(itertools.chain(*wid_src)), + return_tensors='pt', + max_length=self.max_length, + prepend_batch_axis=True)['input_ids'] + + bpe2word_map_src = [] + for i, word_list in enumerate(token_src): + bpe2word_map_src += [i for x in word_list] + bpe2word_map_src = torch.Tensor(bpe2word_map_src).type_as( + ids_src).view(1, -1) + bpe2word_map_tgt = [] + for i, word_list in enumerate(token_tgt): + bpe2word_map_tgt += [i for x in word_list] + bpe2word_map_tgt = torch.Tensor(bpe2word_map_tgt).type_as( + ids_tgt).view(1, -1) + attention_mask_src = ( + ids_src != self.nlp_tokenizer.tokenizer.pad_token_id) + attention_mask_tgt = ( + ids_tgt != self.nlp_tokenizer.tokenizer.pad_token_id) + + return { + 'src_input_ids': ids_src, + 'src_attention_mask': attention_mask_src, + 'src_b2w_map': bpe2word_map_src, + 'tgt_input_ids': ids_tgt, + 'tgt_attention_mask': attention_mask_tgt, + 'tgt_b2w_map': bpe2word_map_tgt, + 'threshold': 0.001, + 'bpe_level': False + } diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py index 9a447f28..46de5aa9 100644 --- a/modelscope/trainers/trainer.py +++ b/modelscope/trainers/trainer.py @@ -979,6 +979,8 @@ class EpochBasedTrainer(BaseTrainer): dataset, num_replicas=world_size, rank=rank, shuffle=shuffle) else: sampler = None + if not isinstance(dataset, torch.utils.data.IterableDataset): + kwargs['shuffle'] = shuffle batch_sampler = None diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 72a981ca..1298b845 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -186,6 +186,7 @@ class NLPTasks(object): zero_shot_classification = 'zero-shot-classification' backbone = 'backbone' text_error_correction = 'text-error-correction' + word_alignment = 'word-alignment' faq_question_answering = 'faq-question-answering' information_extraction = 'information-extraction' document_segmentation = 'document-segmentation' diff --git a/tests/pipelines/test_tbs_detection.py b/tests/pipelines/test_tbs_detection.py new file mode 100644 index 00000000..ac0dd550 --- /dev/null +++ b/tests/pipelines/test_tbs_detection.py @@ -0,0 +1,20 @@ +import unittest + +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks +from modelscope.utils.demo_utils import DemoCompatibilityCheck +from modelscope.utils.test_utils import test_level + + +class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck): + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_name(self): + tbs_detect = pipeline( + Tasks.image_object_detection, model='landingAI/LD_CytoBrainCerv') + outputs = tbs_detect(input='data/test/images/tbs_detection.jpg') + print(outputs) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/run.py b/tests/run.py index ea78fdd0..5ca06599 100644 --- a/tests/run.py +++ b/tests/run.py @@ -600,7 +600,7 @@ if __name__ == '__main__': parser.add_argument( '--level', default=0, type=int, help='2 -- all, 1 -- p1, 0 -- p0') parser.add_argument( - '--disable_profile', action='store_true', help='disable profiling') + '--profile', action='store_true', help='enable profiling') parser.add_argument( '--run_config', default=None, @@ -634,7 +634,7 @@ if __name__ == '__main__': set_test_level(args.level) os.environ['REGRESSION_BASELINE'] = '1' logger.info(f'TEST LEVEL: {test_level()}') - if not args.disable_profile: + if args.profile: from utils import profiler logger.info('enable profile ...') profiler.enable()