diff --git a/.dev_scripts/build_image.sh b/.dev_scripts/build_image.sh index c1e61890..dceaaa22 100644 --- a/.dev_scripts/build_image.sh +++ b/.dev_scripts/build_image.sh @@ -150,7 +150,7 @@ echo -e "Building image with:\npython$python_version\npytorch$torch_version\nten docker_file_content=`cat docker/Dockerfile.ubuntu` if [ "$is_ci_test" != "True" ]; then echo "Building ModelScope lib, will install ModelScope lib to image" - docker_file_content="${docker_file_content} \nRUN pip install --no-cache-dir numpy https://modelscope.oss-cn-beijing.aliyuncs.com/releases/build/modelscope-$modelscope_version-py3-none-any.whl && pip install --no-cache-dir -U transformers" + docker_file_content="${docker_file_content} \nRUN pip install --no-cache-dir -U funasr transformers && pip install --no-cache-dir https://modelscope.oss-cn-beijing.aliyuncs.com/releases/build/modelscope-$modelscope_version-py3-none-any.whl " fi echo "$is_dsw" if [ "$is_dsw" == "False" ]; then diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index c6a9162a..f83defd0 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -475,35 +475,37 @@ class HubApi: raise NotExistError('The model: %s has no revision : %s .' % (model_id, revision)) logger.info('Development mode use revision: %s' % revision) else: - if revision is None: # user not specified revision, use latest revision before release time - revisions = self.list_model_revisions( - model_id, - cutoff_timestamp=release_timestamp, - use_cookies=False if cookies is None else cookies) - if len(revisions) == 0: - logger.warning(('There is no version specified and there is no version in the model repository,' - 'use the master branch, which is fragile, please use it with caution!')) + all_revisions = self.list_model_revisions( + model_id, + cutoff_timestamp=current_timestamp, + use_cookies=False if cookies is None else cookies) + if len(all_revisions) == 0: + if revision is None or revision == MASTER_MODEL_BRANCH: revision = MASTER_MODEL_BRANCH else: - # tags (revisions) returned from backend are guaranteed to be ordered by create-time - # we shall obtain the latest revision created earlier than release version of this branch - revision = revisions[0] - logger.info( - 'Model revision not specified, use revision: %s' - % revision) + raise NotExistError('The model: %s has no revision: %s !' % (model_id, revision)) else: - # use user-specified revision - revisions = self.list_model_revisions( - model_id, - cutoff_timestamp=current_timestamp, - use_cookies=False if cookies is None else cookies) - if revision not in revisions: - if revision == MASTER_MODEL_BRANCH: - logger.warning('Using the master branch is fragile, please use it with caution!') + if revision is None: # user not specified revision, use latest revision before release time + revisions = self.list_model_revisions( + model_id, + cutoff_timestamp=release_timestamp, + use_cookies=False if cookies is None else cookies) + if len(revisions) > 0: + revision = revisions[0] # use latest revision before release time. else: - raise NotExistError('The model: %s has no revision: %s !' % - (model_id, revision)) - logger.info('Use user-specified model revision: %s' % revision) + vl = '[%s]' % ','.join(all_revisions) + raise NoValidRevisionError('Model revision should be specified from revisions: %s' % (vl)) + logger.warning('Model revision not specified, use revision: %s' % revision) + else: + # use user-specified revision + if revision not in all_revisions: + if revision == MASTER_MODEL_BRANCH: + logger.warning('Using the master branch is fragile, please use it with caution!') + else: + vl = '[%s]' % ','.join(all_revisions) + raise NotExistError('The model: %s has no revision: %s valid are: %s!' % + (model_id, revision, vl)) + logger.info('Use user-specified model revision: %s' % revision) return revision def get_model_branches_and_tags( diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py index ea56efb5..377ade9b 100644 --- a/modelscope/metainfo.py +++ b/modelscope/metainfo.py @@ -291,6 +291,7 @@ class Pipelines(object): image_denoise = 'nafnet-image-denoise' image_deblur = 'nafnet-image-deblur' image_editing = 'masactrl-image-editing' + freeu_stable_diffusion_text2image = 'freeu-stable-diffusion-text2image' person_image_cartoon = 'unet-person-image-cartoon' ocr_detection = 'resnet18-ocr-detection' table_recognition = 'dla34-table-recognition' diff --git a/modelscope/models/base/base_model.py b/modelscope/models/base/base_model.py index 8e6d4ae6..9beb156b 100644 --- a/modelscope/models/base/base_model.py +++ b/modelscope/models/base/base_model.py @@ -134,12 +134,13 @@ class Model(ABC): ignore_file_pattern=ignore_file_pattern) logger.info(f'initialize model from {local_model_dir}') + configuration_path = osp.join(local_model_dir, ModelFile.CONFIGURATION) + cfg = None if cfg_dict is not None: cfg = cfg_dict - else: - cfg = Config.from_file( - osp.join(local_model_dir, ModelFile.CONFIGURATION)) - task_name = cfg.task + elif os.path.exists(configuration_path): + cfg = Config.from_file(configuration_path) + task_name = getattr(cfg, 'task', None) if 'task' in kwargs: task_name = kwargs.pop('task') model_cfg = getattr(cfg, 'model', ConfigDict()) @@ -162,6 +163,9 @@ class Model(ABC): model = model.to(device) return model # use ms + if cfg is None: + raise FileNotFoundError( + f'`{ModelFile.CONFIGURATION}` file not found.') model_cfg.model_dir = local_model_dir # install and import remote repos before build diff --git a/modelscope/models/cv/controllable_image_generation/controlnet.py b/modelscope/models/cv/controllable_image_generation/controlnet.py index 9df152b3..5418a39e 100644 --- a/modelscope/models/cv/controllable_image_generation/controlnet.py +++ b/modelscope/models/cv/controllable_image_generation/controlnet.py @@ -22,6 +22,8 @@ from modelscope.metainfo import Models from modelscope.models.base import Tensor from modelscope.models.base.base_torch_model import TorchModel from modelscope.models.builder import MODELS +from modelscope.utils.compatible_with_transformers import \ + compatible_position_ids from modelscope.utils.config import Config from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.logger import get_logger @@ -88,7 +90,11 @@ class ControlNet(TorchModel): if device == 'gpu': device = 'cuda' model = create_model(yaml_path).cpu() - model.load_state_dict(load_state_dict(ckpt_path, location=device)) + state_dict = load_state_dict(ckpt_path, location=device) + compatible_position_ids( + state_dict, + 'cond_stage_model.transformer.text_model.embeddings.position_ids') + model.load_state_dict(state_dict) self.model = model.to(device) self.ddim_sampler = DDIMSampler(self.model) diff --git a/modelscope/models/cv/shop_segmentation/head_fpn.py b/modelscope/models/cv/shop_segmentation/head_fpn.py index 0d4027cb..dfa284d4 100644 --- a/modelscope/models/cv/shop_segmentation/head_fpn.py +++ b/modelscope/models/cv/shop_segmentation/head_fpn.py @@ -9,8 +9,8 @@ import numpy as np import torch import torch.nn as nn from mmcv.cnn import ConvModule -from timm.models.layers.drop import drop_path -from timm.models.layers.weight_init import trunc_normal_ +from timm.layers.drop import drop_path +from timm.layers.weight_init import trunc_normal_ from .common import Upsample, resize diff --git a/modelscope/models/cv/shop_segmentation/models.py b/modelscope/models/cv/shop_segmentation/models.py index a206e9f1..1b07a08c 100644 --- a/modelscope/models/cv/shop_segmentation/models.py +++ b/modelscope/models/cv/shop_segmentation/models.py @@ -11,8 +11,8 @@ from collections import OrderedDict import torch import torch.nn.functional as F import torch.utils.checkpoint as checkpoint -from timm.models.layers.drop import drop_path -from timm.models.layers.weight_init import trunc_normal_ +from timm.layers.drop import drop_path +from timm.layers.weight_init import trunc_normal_ from torch import nn diff --git a/modelscope/models/cv/shop_segmentation/neck_fpn.py b/modelscope/models/cv/shop_segmentation/neck_fpn.py index d344de71..12c11d76 100644 --- a/modelscope/models/cv/shop_segmentation/neck_fpn.py +++ b/modelscope/models/cv/shop_segmentation/neck_fpn.py @@ -8,8 +8,8 @@ import torch.nn as nn import torch.nn.functional as F from mmcv.cnn import ConvModule -from timm.models.layers.drop import drop_path -from timm.models.layers.weight_init import trunc_normal_ +from timm.layers.drop import drop_path +from timm.layers.weight_init import trunc_normal_ from .common import resize diff --git a/modelscope/models/multi_modal/freeu/__init__.py b/modelscope/models/multi_modal/freeu/__init__.py new file mode 100644 index 00000000..3cd55cf3 --- /dev/null +++ b/modelscope/models/multi_modal/freeu/__init__.py @@ -0,0 +1,22 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import TYPE_CHECKING + +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .free_lunch_utils import register_free_upblock2d, register_free_crossattn_upblock2d +else: + _import_structure = { + 'free_lunch_utils': + ['register_free_upblock2d', 'register_free_crossattn_upblock2d'] + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/models/multi_modal/freeu/free_lunch_utils.py b/modelscope/models/multi_modal/freeu/free_lunch_utils.py new file mode 100644 index 00000000..eb5d191f --- /dev/null +++ b/modelscope/models/multi_modal/freeu/free_lunch_utils.py @@ -0,0 +1,331 @@ +# ------------------------------------------------------------------------ +# Modified from https://github.com/ChenyangSi/FreeU/blob/main/demo/free_lunch_utils.py +# Copyright (c) 2023 TencentARC. All Rights Reserved. +# ------------------------------------------------------------------------ + +from typing import Any, Dict, List, Optional, Tuple, Union + +import torch +import torch.fft as fft +from diffusers.utils import is_torch_version + + +def isinstance_str(x: object, cls_name: str): + """ + Checks whether x has any class *named* cls_name in its ancestry. + Doesn't require access to the class's implementation. + + Useful for patching! + """ + + for _cls in x.__class__.__mro__: + if _cls.__name__ == cls_name: + return True + + return False + + +def Fourier_filter(x, threshold, scale): + dtype = x.dtype + x = x.type(torch.float32) + # FFT + x_freq = fft.fftn(x, dim=(-2, -1)) + x_freq = fft.fftshift(x_freq, dim=(-2, -1)) + + B, C, H, W = x_freq.shape + mask = torch.ones((B, C, H, W)).cuda() + + crow, ccol = H // 2, W // 2 + mask[..., crow - threshold:crow + threshold, + ccol - threshold:ccol + threshold] = scale + x_freq = x_freq * mask + + # IFFT + x_freq = fft.ifftshift(x_freq, dim=(-2, -1)) + x_filtered = fft.ifftn(x_freq, dim=(-2, -1)).real + + x_filtered = x_filtered.type(dtype) + return x_filtered + + +def register_upblock2d(model): + + def up_forward(self): + + def forward(hidden_states, + res_hidden_states_tuple, + temb=None, + upsample_size=None): + for resnet in self.resnets: + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = torch.cat([hidden_states, res_hidden_states], + dim=1) + + if self.training and self.gradient_checkpointing: + + def create_custom_forward(module): + + def custom_forward(*inputs): + return module(*inputs) + + return custom_forward + + if is_torch_version('>=', '1.11.0'): + hidden_states = torch.utils.checkpoint.checkpoint( + create_custom_forward(resnet), + hidden_states, + temb, + use_reentrant=False) + else: + hidden_states = torch.utils.checkpoint.checkpoint( + create_custom_forward(resnet), hidden_states, temb) + else: + hidden_states = resnet(hidden_states, temb) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states, upsample_size) + + return hidden_states + + return forward + + for i, upsample_block in enumerate(model.unet.up_blocks): + if isinstance_str(upsample_block, 'UpBlock2D'): + upsample_block.forward = up_forward(upsample_block) + + +def register_free_upblock2d(model, b1=1.2, b2=1.4, s1=0.9, s2=0.2): + + def up_forward(self): + + def forward(hidden_states, + res_hidden_states_tuple, + temb=None, + upsample_size=None): + for resnet in self.resnets: + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + + # --------------- FreeU code ----------------------- + # Only operate on the first two stages + if hidden_states.shape[1] == 1280: + hidden_states[:, :640] = hidden_states[:, :640] * self.b1 + res_hidden_states = Fourier_filter( + res_hidden_states, threshold=1, scale=self.s1) + if hidden_states.shape[1] == 640: + hidden_states[:, :320] = hidden_states[:, :320] * self.b2 + res_hidden_states = Fourier_filter( + res_hidden_states, threshold=1, scale=self.s2) + # --------------------------------------------------------- + + hidden_states = torch.cat([hidden_states, res_hidden_states], + dim=1) + + if self.training and self.gradient_checkpointing: + + def create_custom_forward(module): + + def custom_forward(*inputs): + return module(*inputs) + + return custom_forward + + if is_torch_version('>=', '1.11.0'): + hidden_states = torch.utils.checkpoint.checkpoint( + create_custom_forward(resnet), + hidden_states, + temb, + use_reentrant=False) + else: + hidden_states = torch.utils.checkpoint.checkpoint( + create_custom_forward(resnet), hidden_states, temb) + else: + hidden_states = resnet(hidden_states, temb) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states, upsample_size) + + return hidden_states + + return forward + + for i, upsample_block in enumerate(model.unet.up_blocks): + if isinstance_str(upsample_block, 'UpBlock2D'): + upsample_block.forward = up_forward(upsample_block) + setattr(upsample_block, 'b1', b1) + setattr(upsample_block, 'b2', b2) + setattr(upsample_block, 's1', s1) + setattr(upsample_block, 's2', s2) + + +def register_crossattn_upblock2d(model): + + def up_forward(self): + + def forward( + hidden_states: torch.FloatTensor, + res_hidden_states_tuple: Tuple[torch.FloatTensor, ...], + temb: Optional[torch.FloatTensor] = None, + encoder_hidden_states: Optional[torch.FloatTensor] = None, + cross_attention_kwargs: Optional[Dict[str, Any]] = None, + upsample_size: Optional[int] = None, + attention_mask: Optional[torch.FloatTensor] = None, + encoder_attention_mask: Optional[torch.FloatTensor] = None, + ): + for resnet, attn in zip(self.resnets, self.attentions): + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = torch.cat([hidden_states, res_hidden_states], + dim=1) + + if self.training and self.gradient_checkpointing: + + def create_custom_forward(module, return_dict=None): + + def custom_forward(*inputs): + if return_dict is not None: + return module(*inputs, return_dict=return_dict) + else: + return module(*inputs) + + return custom_forward + + ckpt_kwargs: Dict[str, Any] = { + 'use_reentrant': False + } if is_torch_version('>=', '1.11.0') else {} + hidden_states = torch.utils.checkpoint.checkpoint( + create_custom_forward(resnet), + hidden_states, + temb, + **ckpt_kwargs, + ) + hidden_states = torch.utils.checkpoint.checkpoint( + create_custom_forward(attn, return_dict=False), + hidden_states, + encoder_hidden_states, + None, # timestep + None, # class_labels + cross_attention_kwargs, + attention_mask, + encoder_attention_mask, + **ckpt_kwargs, + )[0] + else: + hidden_states = resnet(hidden_states, temb) + hidden_states = attn( + hidden_states, + encoder_hidden_states=encoder_hidden_states, + cross_attention_kwargs=cross_attention_kwargs, + attention_mask=attention_mask, + encoder_attention_mask=encoder_attention_mask, + return_dict=False, + )[0] + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states, upsample_size) + + return hidden_states + + return forward + + for i, upsample_block in enumerate(model.unet.up_blocks): + if isinstance_str(upsample_block, 'CrossAttnUpBlock2D'): + upsample_block.forward = up_forward(upsample_block) + + +def register_free_crossattn_upblock2d(model, b1=1.2, b2=1.4, s1=0.9, s2=0.2): + + def up_forward(self): + + def forward( + hidden_states: torch.FloatTensor, + res_hidden_states_tuple: Tuple[torch.FloatTensor, ...], + temb: Optional[torch.FloatTensor] = None, + encoder_hidden_states: Optional[torch.FloatTensor] = None, + cross_attention_kwargs: Optional[Dict[str, Any]] = None, + upsample_size: Optional[int] = None, + attention_mask: Optional[torch.FloatTensor] = None, + encoder_attention_mask: Optional[torch.FloatTensor] = None, + ): + for resnet, attn in zip(self.resnets, self.attentions): + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + + # --------------- FreeU code ----------------------- + # Only operate on the first two stages + if hidden_states.shape[1] == 1280: + hidden_states[:, :640] = hidden_states[:, :640] * self.b1 + res_hidden_states = Fourier_filter( + res_hidden_states, threshold=1, scale=self.s1) + if hidden_states.shape[1] == 640: + hidden_states[:, :320] = hidden_states[:, :320] * self.b2 + res_hidden_states = Fourier_filter( + res_hidden_states, threshold=1, scale=self.s2) + # --------------------------------------------------------- + + hidden_states = torch.cat([hidden_states, res_hidden_states], + dim=1) + + if self.training and self.gradient_checkpointing: + + def create_custom_forward(module, return_dict=None): + + def custom_forward(*inputs): + if return_dict is not None: + return module(*inputs, return_dict=return_dict) + else: + return module(*inputs) + + return custom_forward + + ckpt_kwargs: Dict[str, Any] = { + 'use_reentrant': False + } if is_torch_version('>=', '1.11.0') else {} + hidden_states = torch.utils.checkpoint.checkpoint( + create_custom_forward(resnet), + hidden_states, + temb, + **ckpt_kwargs, + ) + hidden_states = torch.utils.checkpoint.checkpoint( + create_custom_forward(attn, return_dict=False), + hidden_states, + encoder_hidden_states, + None, # timestep + None, # class_labels + cross_attention_kwargs, + attention_mask, + encoder_attention_mask, + **ckpt_kwargs, + )[0] + else: + hidden_states = resnet(hidden_states, temb) + hidden_states = attn( + hidden_states, + encoder_hidden_states=encoder_hidden_states, + cross_attention_kwargs=cross_attention_kwargs, + )[0] + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states, upsample_size) + + return hidden_states + + return forward + + for i, upsample_block in enumerate(model.unet.up_blocks): + if isinstance_str(upsample_block, 'CrossAttnUpBlock2D'): + upsample_block.forward = up_forward(upsample_block) + setattr(upsample_block, 'b1', b1) + setattr(upsample_block, 'b2', b2) + setattr(upsample_block, 's1', s1) + setattr(upsample_block, 's2', s2) diff --git a/modelscope/models/nlp/bert/sentence_embedding.py b/modelscope/models/nlp/bert/sentence_embedding.py index 92a9da50..b7df5ef9 100644 --- a/modelscope/models/nlp/bert/sentence_embedding.py +++ b/modelscope/models/nlp/bert/sentence_embedding.py @@ -1,6 +1,7 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import torch +import torch.nn.functional as F from torch import nn from modelscope.metainfo import Models @@ -61,8 +62,9 @@ class BertForSentenceEmbedding(BertPreTrainedModel): def __init__(self, config, **kwargs): super().__init__(config) self.config = config - self.pooler_type = kwargs.get('pooler_type', 'cls') + self.pooler_type = kwargs.get('emb_pooler_type', 'cls') self.pooler = Pooler(self.pooler_type) + self.normalize = kwargs.get('normalize', False) setattr(self, self.base_model_prefix, BertModel(config, add_pooling_layer=False)) @@ -128,6 +130,8 @@ class BertForSentenceEmbedding(BertPreTrainedModel): output_hidden_states=output_hidden_states, return_dict=return_dict) outputs = self.pooler(outputs, attention_mask) + if self.normalize: + outputs = F.normalize(outputs, p=2, dim=-1) return outputs @classmethod @@ -142,8 +146,11 @@ class BertForSentenceEmbedding(BertPreTrainedModel): The loaded model, which is initialized by transformers.PreTrainedModel.from_pretrained """ model_dir = kwargs.get('model_dir') - model = super( - Model, - cls).from_pretrained(pretrained_model_name_or_path=model_dir) + model_kwargs = { + 'emb_pooler_type': kwargs.get('emb_pooler_type', 'cls'), + 'normalize': kwargs.get('normalize', False) + } + model = super(Model, cls).from_pretrained( + pretrained_model_name_or_path=model_dir, **model_kwargs) model.model_dir = model_dir return model diff --git a/modelscope/models/nlp/bloom/__init__.py b/modelscope/models/nlp/bloom/__init__.py index b0f04af7..24d7202d 100644 --- a/modelscope/models/nlp/bloom/__init__.py +++ b/modelscope/models/nlp/bloom/__init__.py @@ -6,10 +6,12 @@ from modelscope.utils.import_utils import LazyImportModule if TYPE_CHECKING: from .backbone import BloomModel from .text_generation import BloomForTextGeneration + from .sentence_embedding import BloomForSentenceEmbedding else: _import_structure = { 'backbone': ['BloomModel'], 'text_generation': ['BloomForTextGeneration'], + 'sentence_embedding': ['BloomForSentenceEmbedding'] } import sys sys.modules[__name__] = LazyImportModule( diff --git a/modelscope/models/nlp/bloom/sentence_embedding.py b/modelscope/models/nlp/bloom/sentence_embedding.py new file mode 100644 index 00000000..ec35db38 --- /dev/null +++ b/modelscope/models/nlp/bloom/sentence_embedding.py @@ -0,0 +1,165 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import torch +from transformers import BloomConfig +from transformers import BloomModel as BloomModelTransform + +from modelscope.metainfo import Models +from modelscope.models import MODELS, TorchModel +from modelscope.outputs import SentencEmbeddingModelOutput +from modelscope.utils.constant import Tasks + + +class DecoderPooler(torch.nn.Module): + """ + Parameter-free poolers to get the sentence embedding + 'last': the last token state. + 'weighted_mean': position weighted average of all token states. + """ + + def __init__(self, pooler_type): + super().__init__() + self.pooler_type = pooler_type + assert self.pooler_type in [ + 'last', 'weighted_mean' + ], 'unrecognized pooling type %s' % self.pooler_type + + def forward(self, outputs, attention_mask): + last_hidden = outputs.last_hidden_state + + if self.pooler_type in ['last']: + n, l, h = last_hidden.shape + + # Get shape [n] indices of the last token (i.e. the last token for each batch item) + # Any sequence where min == 1, we use the entire sequence lenth since argmin = 0 + values, indices = torch.min(attention_mask, 1, keepdim=False) + gather_indices = torch.where(values == 0, indices, + l) - 1 # Shape [n] + + # There are empty sequences, where the index would become -1 which will crash + gather_indices = torch.clamp(gather_indices, min=0) + + # Turn indices from shape [n] --> [n, 1, h] + gather_indices = gather_indices.unsqueeze(1).unsqueeze(1).expand( + n, 1, h) + + # Gather along the 1st dim (l) (n, l, h -> n, h) + pooled_output = torch.gather(last_hidden, 1, + gather_indices).squeeze(dim=1) + + elif self.pooler_type == 'weighted_mean': + input_mask_expanded = attention_mask.unsqueeze(-1).expand( + last_hidden.size()).float() + # last_hidden shape: bs, seq, hidden_dim + weights = ( + torch.arange(start=1, end=last_hidden.shape[1] + + 1).unsqueeze(0).unsqueeze(-1).expand( + last_hidden.size()).float().to( + last_hidden.device)) + assert weights.shape == last_hidden.shape == input_mask_expanded.shape + input_mask_expanded = input_mask_expanded * weights + + sum_embeddings = torch.sum(last_hidden * input_mask_expanded, 1) + sum_mask = input_mask_expanded.sum(1) + sum_mask = torch.clamp(sum_mask, min=1e-9) + pooled_output = sum_embeddings / sum_mask + + else: + raise NotImplementedError + + return pooled_output + + +@MODELS.register_module( + group_key=Tasks.sentence_embedding, module_name=Models.bloom) +class BloomForSentenceEmbedding(BloomModelTransform, TorchModel): + r""" + This model represent a text to a dense vector by the last token state or weighted mean of all token states. + See `Language Models are Universal Embedders + `_ for details. + """ + + def __init__(self, config, **kwargs): + super().__init__(config) + self.config = config + self.pooler_type = kwargs.get('emb_pooler_type', 'weighted_mean') + self.pooler = DecoderPooler(self.pooler_type) + self.normalize = kwargs.get('normalize', False) + setattr(self, self.base_model_prefix, BloomModelTransform(config)) + + def forward(self, query=None, docs=None, labels=None): + r""" + Args: + query (:obj: `dict`): Dict of pretrained models's input for the query sequence. See + :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__` + for details. + docs (:obj: `dict`): Dict of pretrained models's input for the query sequence. See + :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__` + for details. + Returns: + Returns `modelscope.outputs.SentencEmbeddingModelOutput + Examples: + >>> from modelscope.models import Model + >>> from modelscope.preprocessors import Preprocessor + >>> model = Model.from_pretrained('damo/nlp_udever_bloom_560m') + >>> preprocessor = Preprocessor.from_pretrained('damo/nlp_udever_bloom_560m') + >>> inputs = preprocessor({'source_sentence': ['This is a test']}) + >>> outputs = model(**inputs) + >>> print(outputs) + """ + query_embeddings, doc_embeddings = None, None + if query is not None: + query_embeddings = self.encode(**query) + if docs is not None: + doc_embeddings = self.encode(**docs) + outputs = SentencEmbeddingModelOutput( + query_embeddings=query_embeddings, doc_embeddings=doc_embeddings) + if query_embeddings is None or doc_embeddings is None: + return outputs + if self.base_model.training: + loss_fct = torch.nn.CrossEntropyLoss() + scores = torch.matmul(query_embeddings, doc_embeddings.T) + if labels is None: + labels = torch.arange( + scores.size(0), device=scores.device, dtype=torch.long) + labels = labels * ( + doc_embeddings.size(0) // query_embeddings.size(0)) + loss = loss_fct(scores, labels) + outputs.loss = loss + return outputs + + def encode( + self, + input_ids=None, + attention_mask=None, + ): + outputs = self.base_model.forward( + input_ids, attention_mask=attention_mask) + embeddings = self.pooler(outputs, attention_mask) + if self.normalize: + embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=-1) + return embeddings + + @classmethod + def _instantiate(cls, **kwargs): + """Instantiate the model. + + Args: + kwargs: Input args. + model_dir: The model dir used to load the checkpoint and the label information. + + Returns: + The loaded model, which is initialized by transformers.PreTrainedModel.from_pretrained + """ + model_dir = kwargs.get('model_dir') + model_kwargs = { + 'emb_pooler_type': kwargs.get('emb_pooler_type', 'weighted_mean'), + 'normalize': kwargs.get('normalize', False) + } + if model_dir is None: + config = BloomConfig(**kwargs) + model = cls(config) + else: + model = super(BloomModelTransform, cls).from_pretrained( + pretrained_model_name_or_path=model_dir, **model_kwargs) + model.model_dir = model_dir + return model diff --git a/modelscope/models/nlp/dgds/document_grounded_dialog_generate.py b/modelscope/models/nlp/dgds/document_grounded_dialog_generate.py index 7c2f6327..27902b67 100644 --- a/modelscope/models/nlp/dgds/document_grounded_dialog_generate.py +++ b/modelscope/models/nlp/dgds/document_grounded_dialog_generate.py @@ -6,6 +6,8 @@ import torch from modelscope.metainfo import Models from modelscope.models.base import Tensor, TorchModel from modelscope.models.builder import MODELS +from modelscope.utils.compatible_with_transformers import \ + compatible_position_ids from modelscope.utils.config import Config from modelscope.utils.constant import ModelFile, Tasks from .backbone import Re2GModel @@ -24,6 +26,8 @@ class DocumentGroundedDialogGenerateModel(TorchModel): state_dict = torch.load( os.path.join(self.model_dir, ModelFile.TORCH_MODEL_BIN_FILE), map_location='cpu') + compatible_position_ids( + state_dict, 'rerank.encoder.roberta.embeddings.position_ids') self.model.load_state_dict(state_dict) def forward(self, input: Dict[str, Tensor]): diff --git a/modelscope/models/nlp/dgds/document_grounded_dialog_retrieval.py b/modelscope/models/nlp/dgds/document_grounded_dialog_retrieval.py index bd8e05d6..07685673 100644 --- a/modelscope/models/nlp/dgds/document_grounded_dialog_retrieval.py +++ b/modelscope/models/nlp/dgds/document_grounded_dialog_retrieval.py @@ -6,6 +6,8 @@ import torch from modelscope.metainfo import Models from modelscope.models.base import Tensor, TorchModel from modelscope.models.builder import MODELS +from modelscope.utils.compatible_with_transformers import \ + compatible_position_ids from modelscope.utils.config import Config from modelscope.utils.constant import ModelFile, Tasks from .backbone import DPRModel @@ -24,6 +26,8 @@ class DocumentGroundedDialogRetrievalModel(TorchModel): state_dict = torch.load( os.path.join(self.model_dir, ModelFile.TORCH_MODEL_BIN_FILE), map_location='cpu') + compatible_position_ids(state_dict, + 'ctx_encoder.encoder.embeddings.position_ids') self.model.load_state_dict(state_dict) def forward(self, input: Dict[str, Tensor], gck_segment=32): diff --git a/modelscope/models/nlp/task_models/machine_reading_comprehension.py b/modelscope/models/nlp/task_models/machine_reading_comprehension.py index 034e53ce..add62ce8 100644 --- a/modelscope/models/nlp/task_models/machine_reading_comprehension.py +++ b/modelscope/models/nlp/task_models/machine_reading_comprehension.py @@ -16,6 +16,8 @@ from modelscope.models.base import TorchModel from modelscope.models.builder import MODELS from modelscope.models.nlp.task_models.task_model import EncoderModel from modelscope.outputs import MachineReadingComprehensionOutput, OutputKeys +from modelscope.utils.compatible_with_transformers import \ + compatible_position_ids from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.hub import parse_label_mapping @@ -45,9 +47,10 @@ class ModelForMachineReadingComprehension(TorchModel): self.config.hidden_dropout_prob, intermediate_hidden_size=self.config. projection_intermediate_hidden_size) - self.load_state_dict( - torch.load( - os.path.join(model_dir, ModelFile.TORCH_MODEL_BIN_FILE))) + state_dict = torch.load( + os.path.join(model_dir, ModelFile.TORCH_MODEL_BIN_FILE)) + compatible_position_ids(state_dict, 'roberta.embeddings.position_ids') + self.load_state_dict(state_dict) def forward( self, diff --git a/modelscope/pipeline_inputs.py b/modelscope/pipeline_inputs.py index d97a95f9..6e644376 100644 --- a/modelscope/pipeline_inputs.py +++ b/modelscope/pipeline_inputs.py @@ -326,6 +326,20 @@ TASK_INPUTS = { # ============ nlp tasks =================== Tasks.chat: { + # An input example for `messages` format (Dict[str, List[Dict[str, str]]]): + # {'messages': [{ + # 'role': 'system', + # 'content': 'You are a helpful assistant.' + # }, { + # 'role': 'user', + # 'content': 'Hello! Where is the capital of Zhejiang?' + # }, { + # 'role': 'assistant', + # 'content': 'Hangzhou is the capital of Zhejiang.' + # }, { + # 'role': 'user', + # 'content': 'Tell me something about HangZhou?' + # }]} 'messages': InputType.LIST }, Tasks.text_classification: [ diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index 525bc92c..ddc3c422 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -121,6 +121,7 @@ def pipeline(task: str = None, ignore_file_pattern=ignore_file_pattern) if pipeline_name is None and kwargs.get('llm_first'): pipeline_name = llm_first_checker(model, model_revision) + kwargs.pop('llm_first') pipeline_props = {'type': pipeline_name} if pipeline_name is None: # get default pipeline for this task @@ -215,7 +216,7 @@ def llm_first_checker(model: Union[str, List[str], Model, List[Model]], except Exception: return None - def parse_model_type(file: Optional[str], pattern: str) -> Optional[str]: + def parse_and_get(file: Optional[str], pattern: str) -> Optional[str]: if file is None or not osp.exists(file): return None return Config.from_file(file).safe_get(pattern) @@ -223,15 +224,22 @@ def llm_first_checker(model: Union[str, List[str], Model, List[Model]], def get_model_type(model: str, revision: Optional[str]) -> Optional[str]: cfg_file = get_file_name(model, ModelFile.CONFIGURATION, revision) hf_cfg_file = get_file_name(model, ModelFile.CONFIG, revision) - cfg_model_type = parse_model_type(cfg_file, 'model.type') - hf_cfg_model_type = parse_model_type(hf_cfg_file, 'model_type') + cfg_model_type = parse_and_get(cfg_file, 'model.type') + hf_cfg_model_type = parse_and_get(hf_cfg_file, 'model_type') return cfg_model_type or hf_cfg_model_type + def get_adapter_type(model: str, revision: Optional[str]) -> Optional[str]: + cfg_file = get_file_name(model, ModelFile.CONFIGURATION, revision) + model = parse_and_get(cfg_file, 'adapter_cfg.model_id_or_path') + revision = parse_and_get(cfg_file, 'adapter_cfg.model_revision') + return None if model is None else get_model_type(model, revision) + if isinstance(model, list): model = model[0] if not isinstance(model, str): model = model.model_dir - model_type = get_model_type(model, revision) + model_type = get_model_type(model, revision) \ + or get_adapter_type(model, revision) if model_type is not None: model_type = model_type.lower().split('-')[0] if model_type in LLM_FORMAT_MAP: diff --git a/modelscope/pipelines/multi_modal/__init__.py b/modelscope/pipelines/multi_modal/__init__.py index b5316684..1faa261e 100644 --- a/modelscope/pipelines/multi_modal/__init__.py +++ b/modelscope/pipelines/multi_modal/__init__.py @@ -26,6 +26,7 @@ if TYPE_CHECKING: from .visual_question_answering_pipeline import VisualQuestionAnsweringPipeline from .video_question_answering_pipeline import VideoQuestionAnsweringPipeline from .videocomposer_pipeline import VideoComposerPipeline + from .text_to_image_freeu_pipeline import FreeUTextToImagePipeline else: _import_structure = { 'image_captioning_pipeline': ['ImageCaptioningPipeline'], @@ -53,7 +54,8 @@ else: ['SOONetVideoTemporalGroundingPipeline'], 'text_to_video_synthesis_pipeline': ['TextToVideoSynthesisPipeline'], 'multimodal_dialogue_pipeline': ['MultimodalDialoguePipeline'], - 'videocomposer_pipeline': ['VideoComposerPipeline'] + 'videocomposer_pipeline': ['VideoComposerPipeline'], + 'text_to_image_freeu_pipeline': ['FreeUTextToImagePipeline'] } import sys diff --git a/modelscope/pipelines/multi_modal/text_to_image_freeu_pipeline.py b/modelscope/pipelines/multi_modal/text_to_image_freeu_pipeline.py new file mode 100644 index 00000000..9300554c --- /dev/null +++ b/modelscope/pipelines/multi_modal/text_to_image_freeu_pipeline.py @@ -0,0 +1,138 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os.path +from typing import Any, Dict, Optional, Union + +import numpy as np +import torch + +from modelscope.metainfo import Pipelines +from modelscope.models.multi_modal.freeu import ( + register_free_crossattn_upblock2d, register_free_upblock2d) +from modelscope.outputs import OutputKeys +from modelscope.pipelines import pipeline +from modelscope.pipelines.base import Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.utils.constant import Tasks +from modelscope.utils.logger import get_logger + +logger = get_logger() + +__all__ = ['FreeUTextToImagePipeline'] + + +@PIPELINES.register_module( + Tasks.text_to_image_synthesis, + module_name=Pipelines.freeu_stable_diffusion_text2image) +class FreeUTextToImagePipeline(Pipeline): + + def __init__(self, model=str, preprocessor=None, **kwargs): + """ FreeU Text to Image Pipeline. + + Examples: + + >>> import cv2 + >>> from modelscope.pipelines import pipeline + >>> from modelscope.utils.constant import Tasks + + >>> prompt = "a photo of a running corgi" # prompt + >>> output_image_path = './result.png' + >>> inputs = {'prompt': prompt} + >>> + >>> pipe = pipeline( + >>> Tasks.text_to_image_synthesis, + >>> model='damo/multi-modal_freeu_stable_diffusion', + >>> base_model='AI-ModelScope/stable-diffusion-v1-5', + >>> ) + >>> + >>> output = pipe(inputs)['output_imgs'] + >>> cv2.imwrite(output_image_path, output) + >>> print('pipeline: the output image path is {}'.format(output_image_path)) + """ + super().__init__(model=model, preprocessor=preprocessor, **kwargs) + + torch_dtype = kwargs.get('torch_dtype', torch.float32) + self._device = getattr( + kwargs, 'device', + torch.device('cuda' if torch.cuda.is_available() else 'cpu')) + base_model = kwargs.get( + 'base_model', 'AI-ModelScope/stable-diffusion-v1-5') # default 1.5 + self.freeu_params = kwargs.get('freeu_params', { + 'b1': 1.5, + 'b2': 1.6, + 's1': 0.9, + 's2': 0.2 + }) # default + + logger.info('load freeu stable diffusion text to image pipeline done') + self.pipeline = pipeline( + task=Tasks.text_to_image_synthesis, + model=base_model, + torch_dtype=torch_dtype, + device=self._device).pipeline + + def preprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: + return inputs + + def forward(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: + """ + Inputs Args: + prompt (`str` or `List[str]`, *optional*): + The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. + instead. + height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): + The height in pixels of the generated image. + width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): + The width in pixels of the generated image. + num_inference_steps (`int`, *optional*, defaults to 50): + The number of denoising steps. More denoising steps usually lead to a higher quality image at the + expense of slower inference. + guidance_scale (`float`, *optional*, defaults to 7.5): + Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). + `guidance_scale` is defined as `w` of equation 2. of [Imagen + Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > + 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, + usually at the expense of lower image quality. + negative_prompt (`str` or `List[str]`, *optional*): + The prompt or prompts not to guide the image generation. If not defined, one has to pass + `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is + less than `1`). + num_images_per_prompt (`int`, *optional*, defaults to 1): + The number of images to generate per prompt. + eta (`float`, *optional*, defaults to 0.0): + Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to + [`schedulers.DDIMScheduler`], will be ignored for others. + generator (`torch.Generator` or `List[torch.Generator]`, *optional*): + One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) + to make generation deterministic. + latents (`torch.FloatTensor`, *optional*): + Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image + generation. Can be used to tweak the same generation with different prompts. If not provided, a latents + tensor will ge generated by sampling using the supplied random `generator`. + """ + if not isinstance(inputs, dict): + raise ValueError( + f'Expected the input to be a dictionary, but got {type(inputs)}' + ) + # -------- freeu block registration + register_free_upblock2d(self.pipeline, **self.freeu_params) + register_free_crossattn_upblock2d(self.pipeline, **self.freeu_params) + # -------- freeu block registration + + output = self.pipeline( + prompt=inputs.get('prompt'), + height=inputs.get('height'), + width=inputs.get('width'), + num_inference_steps=inputs.get('num_inference_steps', 50), + guidance_scale=inputs.get('guidance_scale', 7.5), + negative_prompt=inputs.get('negative_prompt'), + num_images_per_prompt=inputs.get('num_images_per_prompt', 1), + eta=inputs.get('eta', 0.0), + generator=inputs.get('generator'), + latents=inputs.get('latents'), + ).images[0] + + return {'output_tensor': output} + + def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: + output_img = np.array(inputs['output_tensor']) + return {OutputKeys.OUTPUT_IMGS: output_img[:, :, ::-1]} diff --git a/modelscope/pipelines/nlp/llm_pipeline.py b/modelscope/pipelines/nlp/llm_pipeline.py index e2979ccb..e2f669d8 100644 --- a/modelscope/pipelines/nlp/llm_pipeline.py +++ b/modelscope/pipelines/nlp/llm_pipeline.py @@ -9,11 +9,13 @@ from transformers import PreTrainedTokenizer from modelscope import (AutoModelForCausalLM, AutoTokenizer, Pipeline, snapshot_download) +from modelscope.hub.file_download import model_file_download from modelscope.models.base import Model from modelscope.models.nlp import ChatGLM2Tokenizer, Llama2Tokenizer from modelscope.outputs import OutputKeys from modelscope.pipelines.builder import PIPELINES from modelscope.pipelines.util import is_model, is_official_hub_path +from modelscope.utils.config import Config from modelscope.utils.constant import Invoke, ModelFile, Tasks from modelscope.utils.logger import get_logger @@ -27,6 +29,22 @@ class LLMPipeline(Pipeline): def initiate_single_model(self, model): if isinstance(model, str): logger.info(f'initiate model from {model}') + if self._is_swift_model(model): + from swift import Swift + + base_model = self.cfg.safe_get('adapter_cfg.model_id_or_path') + assert base_model is not None, 'Cannot get adapter_cfg.model_id_or_path from configuration.json file.' + revision = self.cfg.safe_get('adapter_cfg.model_revision', + 'master') + base_model = Model.from_pretrained( + base_model, + revision, + invoked_by=Invoke.PIPELINE, + device_map=self.device_map, + torch_dtype=self.torch_dtype, + trust_remote_code=True) + swift_model = Swift.from_pretrained(base_model, model_id=model) + return swift_model if isinstance(model, str) and is_official_hub_path(model): logger.info(f'initiate model from location {model}.') if is_model(model): @@ -50,6 +68,20 @@ class LLMPipeline(Pipeline): else: return model + def _is_swift_model(self, model: Union[str, Any]) -> bool: + if not isinstance(model, str): + return False + if os.path.exists(model): + cfg_file = os.path.join(model, ModelFile.CONFIGURATION) + else: + try: + cfg_file = model_file_download(model, ModelFile.CONFIGURATION) + except Exception: + return False + + self.cfg = Config.from_file(cfg_file) + return self.cfg.safe_get('adapter_cfg.tuner_backend') == 'swift' + def __init__(self, format_messages: Union[Callable, str] = None, format_output: Callable = None, diff --git a/modelscope/preprocessors/nlp/sentence_embedding_preprocessor.py b/modelscope/preprocessors/nlp/sentence_embedding_preprocessor.py index b03268c6..f1ca6685 100644 --- a/modelscope/preprocessors/nlp/sentence_embedding_preprocessor.py +++ b/modelscope/preprocessors/nlp/sentence_embedding_preprocessor.py @@ -1,14 +1,19 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -from typing import Any, Dict +from typing import Any, Dict, Optional + +import torch from modelscope.metainfo import Preprocessors from modelscope.preprocessors import Preprocessor from modelscope.preprocessors.builder import PREPROCESSORS from modelscope.utils.constant import Fields, ModeKeys from modelscope.utils.hub import get_model_type +from modelscope.utils.logger import get_logger from .transformers_tokenizer import NLPTokenizer +logger = get_logger() + @PREPROCESSORS.register_module( Fields.nlp, module_name=Preprocessors.sentence_embedding) @@ -46,9 +51,32 @@ class SentenceEmbeddingTransformersPreprocessor(Preprocessor): self.max_length = max_length if model_dir is not None: model_type = get_model_type(model_dir) + # we could add `boq/bod` token/prompt and `eoq/eod` token if they exist when tokenizing. + for k in ('boq', 'eoq', 'bod', 'eod'): + setattr(self, k, kwargs.pop(k, None)) self.nlp_tokenizer = NLPTokenizer( model_dir, model_type, use_fast=use_fast, tokenize_kwargs=kwargs) super().__init__(mode=mode) + tokenizer = self.nlp_tokenizer.tokenizer + # For tokenizers like bloom + if tokenizer.padding_side != 'right': + # weighted mean pooling need pad right + logger.warning( + f'Change tokenizer.padding_side from {tokenizer.padding_side} to right' + ) + tokenizer.padding_side = 'right' + # For decoder-only tokenizers + if tokenizer.pad_token is None: + logger.warning( + f'Set tokenizer.pad_token as eos_token {tokenizer.eos_token}') + tokenizer.pad_token = tokenizer.eos_token + # Currently eos is single token, we can extend to prompt later. + for k in ('eoq', 'eod'): + v = getattr(self, k, None) + if v is not None: + v = tokenizer.convert_tokens_to_ids(v) + setattr(self, k + '_id', v) + self.pad_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token) def __call__(self, data: Dict, @@ -81,13 +109,80 @@ class SentenceEmbeddingTransformersPreprocessor(Preprocessor): if 'return_tensors' not in kwargs: kwargs[ 'return_tensors'] = 'pt' if self.mode == ModeKeys.INFERENCE else None - query_inputs = self.nlp_tokenizer( - source_sentences, padding=padding, truncation=truncation, **kwargs) + query_inputs = self.tokenize( + source_sentences, + is_query=True, + padding=padding, + truncation=truncation, + **kwargs) tokenized_inputs = {'query': query_inputs, 'docs': None} if compare_sentences is not None and len(compare_sentences) > 0: - tokenized_inputs['docs'] = self.nlp_tokenizer( + tokenized_inputs['docs'] = self.tokenize( compare_sentences, + is_query=kwargs.get('symmetric', False), padding=padding, truncation=truncation, **kwargs) return tokenized_inputs + + def tokenize(self, texts, is_query=True, return_tensors=None, **kwargs): + """Tokenize raw texts, add `boq/bod` token/prompt and `eoq/eod` token if they exist. + + Args: + `texts` List[str]: texts to tokenize, + Example: + ["how long it take to get a master's degree"] + `is_query` bool: whether the input text(s) is query. + `return_tensors` str: the `return_tensors` argument to tokenizer. + Returns: + Dict[str, Any]: the preprocessed data + """ + if is_query: + bos, eos_id = self.boq, self.eoq_id + else: + bos, eos_id = self.bod, self.eod_id + if bos is not None: + # bos can be prompt + texts = [bos + t for t in texts] + encoding = self.nlp_tokenizer( + texts, return_tensors=return_tensors, **kwargs) + if eos_id is not None: + if return_tensors == 'pt': + self.add_eos_pt(encoding, eos_id) + else: + self.add_eos(encoding, eos_id) + return encoding + + def add_eos_pt(self, encoding: Dict[str, torch.Tensor], eos: int): + """Add `eos` token id to the end of each sequence.""" + input_ids, attn_mask = encoding['input_ids'], encoding[ + 'attention_mask'] + batch = torch.arange(input_ids.size(0)) + length = attn_mask.sum(-1) + + if input_ids.size(1) < self.max_length: + ones = input_ids.new_ones(input_ids.size(0), 1) + attn_mask = torch.cat((ones, attn_mask), dim=1) + padding = ones * self.pad_id + input_ids = torch.cat((input_ids, padding), dim=1) + eos_index = length + else: + eos_index = torch.clamp(length, max=self.max_length - 1) + attn_mask[batch, eos_index] = 1 + input_ids[batch, eos_index] = eos + encoding['input_ids'], encoding[ + 'attention_mask'] = input_ids, attn_mask + return + + def add_eos(self, encoding: Dict[str, list], eos: int): + """Add `eos` token id to the end of each sequence.""" + for ids, mask in zip(encoding['input_ids'], + encoding['attention_mask']): + if len(mask) < self.max_length: + ids.append(eos) + mask.append(1) + else: + last = min(sum(mask), self.max_length - 1) + ids[last] = eos + mask[last] = 1 + return diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py index a3707918..25f948bc 100644 --- a/modelscope/trainers/trainer.py +++ b/modelscope/trainers/trainer.py @@ -181,8 +181,20 @@ class EpochBasedTrainer(BaseTrainer): compile_options = {} self.model = compile_model(self.model, **compile_options) - if 'work_dir' in kwargs: + if kwargs.get('work_dir', None) is not None: self.work_dir = kwargs['work_dir'] + if 'train' not in self.cfg: + self.cfg['train'] = ConfigDict() + self.cfg['train']['work_dir'] = self.work_dir + if 'checkpoint' in self.cfg['train']: + if 'period' in self.cfg['train']['checkpoint']: + self.cfg['train']['checkpoint']['period'][ + 'save_dir'] = self.work_dir + if 'best' in self.cfg['train']['checkpoint']: + self.cfg['train']['checkpoint']['best'][ + 'save_dir'] = self.work_dir + if 'logging' in self.cfg['train']: + self.cfg['train']['logging']['out_dir'] = self.work_dir else: self.work_dir = self.cfg.train.get('work_dir', './work_dir') diff --git a/modelscope/utils/automodel_utils.py b/modelscope/utils/automodel_utils.py index afd83817..1f5de3b6 100644 --- a/modelscope/utils/automodel_utils.py +++ b/modelscope/utils/automodel_utils.py @@ -6,8 +6,11 @@ from modelscope.utils.ast_utils import INDEX_KEY from modelscope.utils.import_utils import LazyImportModule -def can_load_by_ms(model_dir: str, tast_name: str, model_type: str) -> bool: - if ('MODELS', tast_name, +def can_load_by_ms(model_dir: str, task_name: Optional[str], + model_type: Optional[str]) -> bool: + if model_type is None or task_name is None: + return False + if ('MODELS', task_name, model_type) in LazyImportModule.AST_INDEX[INDEX_KEY]: return True ms_wrapper_path = os.path.join(model_dir, 'ms_wrapper.py') @@ -25,11 +28,27 @@ def _can_load_by_hf_automodel(automodel_class: type, config) -> bool: return False -def get_hf_automodel_class(model_dir: str, task_name: str) -> Optional[type]: - from modelscope import (AutoConfig, AutoModel, AutoModelForCausalLM, - AutoModelForSeq2SeqLM, - AutoModelForTokenClassification, - AutoModelForSequenceClassification) +def get_default_automodel(config) -> Optional[type]: + import modelscope.utils.hf_util as hf_util + if not hasattr(config, 'auto_map'): + return None + auto_map = config.auto_map + automodel_list = [k for k in auto_map.keys() if k.startswith('AutoModel')] + if len(automodel_list) == 1: + return getattr(hf_util, automodel_list[0]) + if len(automodel_list) > 1 and len( + set([auto_map[k] for k in automodel_list])) == 1: + return getattr(hf_util, automodel_list[0]) + return None + + +def get_hf_automodel_class(model_dir: str, + task_name: Optional[str]) -> Optional[type]: + from modelscope.utils.hf_util import (AutoConfig, AutoModel, + AutoModelForCausalLM, + AutoModelForSeq2SeqLM, + AutoModelForTokenClassification, + AutoModelForSequenceClassification) automodel_mapping = { Tasks.backbone: AutoModel, Tasks.chat: AutoModelForCausalLM, @@ -37,19 +56,18 @@ def get_hf_automodel_class(model_dir: str, task_name: str) -> Optional[type]: Tasks.text_classification: AutoModelForSequenceClassification, Tasks.token_classification: AutoModelForTokenClassification, } - automodel_class = automodel_mapping.get(task_name, None) - if automodel_class is None: - return None config_path = os.path.join(model_dir, 'config.json') if not os.path.exists(config_path): return None try: - try: - config = AutoConfig.from_pretrained( - model_dir, trust_remote_code=True) - except (FileNotFoundError, ValueError): - return None + config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) + if task_name is None: + automodel_class = get_default_automodel(config) + else: + automodel_class = automodel_mapping.get(task_name, None) + if automodel_class is None: + return None if _can_load_by_hf_automodel(automodel_class, config): return automodel_class if (automodel_class is AutoModelForCausalLM @@ -71,14 +89,5 @@ def try_to_load_hf_model(model_dir: str, task_name: str, model = None if automodel_class is not None: # use hf - device_map = kwargs.get('device_map', None) - torch_dtype = kwargs.get('torch_dtype', None) - config = kwargs.get('config', None) - - model = automodel_class.from_pretrained( - model_dir, - device_map=device_map, - torch_dtype=torch_dtype, - config=config, - trust_remote_code=True) + model = automodel_class.from_pretrained(model_dir, **kwargs) return model diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py index e3e8cac8..463dcea7 100644 --- a/modelscope/utils/hf_util.py +++ b/modelscope/utils/hf_util.py @@ -21,7 +21,7 @@ from transformers.models.auto.tokenization_auto import ( TOKENIZER_MAPPING_NAMES, get_tokenizer_config) from modelscope import snapshot_download -from modelscope.utils.constant import Invoke +from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke try: from transformers import GPTQConfig as GPTQConfigHF @@ -84,69 +84,6 @@ patch_tokenizer_base() patch_model_base() -def check_hf_code(model_dir: str, auto_class: type, - trust_remote_code: bool) -> None: - config_path = os.path.join(model_dir, 'config.json') - if not os.path.exists(config_path): - raise FileNotFoundError(f'{config_path} is not found') - config_dict = PretrainedConfig.get_config_dict(config_path)[0] - auto_class_name = auto_class.__name__ - if auto_class is AutoTokenizerHF: - tokenizer_config = get_tokenizer_config(model_dir) - # load from repo - if trust_remote_code: - has_remote_code = False - if auto_class is AutoTokenizerHF: - auto_map = tokenizer_config.get('auto_map', None) - if auto_map is not None: - module_name = auto_map.get(auto_class_name, None) - if module_name is not None: - module_name = module_name[0] - has_remote_code = True - else: - auto_map = config_dict.get('auto_map', None) - if auto_map is not None: - module_name = auto_map.get(auto_class_name, None) - has_remote_code = module_name is not None - - if has_remote_code: - module_path = os.path.join(model_dir, - module_name.split('.')[0] + '.py') - if not os.path.exists(module_path): - raise FileNotFoundError(f'{module_path} is not found') - return - - # trust_remote_code is False or has_remote_code is False - model_type = config_dict.get('model_type', None) - if model_type is None: - raise ValueError(f'`model_type` key is not found in {config_path}.') - - trust_remote_code_info = '.' - if not trust_remote_code: - trust_remote_code_info = ', You can try passing `trust_remote_code=True`.' - if auto_class is AutoConfigHF: - if model_type not in CONFIG_MAPPING: - raise ValueError( - f'{model_type} not found in HF `CONFIG_MAPPING`{trust_remote_code_info}' - ) - elif auto_class is AutoTokenizerHF: - tokenizer_class = tokenizer_config.get('tokenizer_class') - if tokenizer_class is not None: - return - if model_type not in TOKENIZER_MAPPING_NAMES: - raise ValueError( - f'{model_type} not found in HF `TOKENIZER_MAPPING_NAMES`{trust_remote_code_info}' - ) - else: - mapping_names = [ - m.model_type for m in auto_class._model_mapping.keys() - ] - if model_type not in mapping_names: - raise ValueError( - f'{model_type} not found in HF `auto_class._model_mapping`{trust_remote_code_info}' - ) - - def get_wrapped_class(module_class, ignore_file_pattern=[], **kwargs): """Get a custom wrapper class for auto classes to download the models from the ModelScope hub Args: @@ -166,7 +103,7 @@ def get_wrapped_class(module_class, ignore_file_pattern=[], **kwargs): ignore_file_pattern = kwargs.pop('ignore_file_pattern', default_ignore_file_pattern) if not os.path.exists(pretrained_model_name_or_path): - revision = kwargs.pop('revision', None) + revision = kwargs.pop('revision', DEFAULT_MODEL_REVISION) model_dir = snapshot_download( pretrained_model_name_or_path, revision=revision, @@ -175,9 +112,6 @@ def get_wrapped_class(module_class, ignore_file_pattern=[], **kwargs): else: model_dir = pretrained_model_name_or_path - if module_class is not GenerationConfigHF: - trust_remote_code = kwargs.get('trust_remote_code', False) - check_hf_code(model_dir, module_class, trust_remote_code) module_obj = module_class.from_pretrained(model_dir, *model_args, **kwargs) diff --git a/modelscope/utils/pipeline_inputs.json b/modelscope/utils/pipeline_inputs.json index 03a00636..c75c8b9c 100644 --- a/modelscope/utils/pipeline_inputs.json +++ b/modelscope/utils/pipeline_inputs.json @@ -145,6 +145,19 @@ "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/image_salient_detection.jpg" } }, + "sentence-embedding":{ + "input": { + "source_sentence":[ + "吃完海鲜可以喝牛奶吗?" + ], + "sentences_to_compare":[ + "不可以,早晨喝牛奶不科学", + "吃了海鲜后是不能再喝牛奶的,因为牛奶中含得有维生素C,如果海鲜喝牛奶一起服用会对人体造成一定的伤害", + "吃海鲜是不能同时喝牛奶吃水果,这个至少间隔6小时以上才可以。", + "吃海鲜是不可以吃柠檬的因为其中的维生素C会和海鲜中的矿物质形成砷" + ] + } + }, "shop-segmentation":{ "input":{ "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/shop_segmentation.jpg" diff --git a/modelscope/version.py b/modelscope/version.py index f7f006e7..fb0e01f3 100644 --- a/modelscope/version.py +++ b/modelscope/version.py @@ -1,5 +1,5 @@ # Make sure to modify __release_datetime__ to release time when making official release. -__version__ = '1.9.3' +__version__ = '1.9.4' # default release datetime for branches under active development is set # to be a time far-far-away-into-the-future -__release_datetime__ = '2023-10-17 00:00:00' +__release_datetime__ = '2099-09-06 00:00:00' diff --git a/tests/export/test_export_speech_signal_process.py b/tests/export/test_export_speech_signal_process.py index faba59a5..d4579936 100644 --- a/tests/export/test_export_speech_signal_process.py +++ b/tests/export/test_export_speech_signal_process.py @@ -66,9 +66,7 @@ class ExportSpeechSignalProcessTest(unittest.TestCase): with torch.no_grad(): model.eval() outputs_origin = model.forward(dummy_inputs) - outputs_origin = numpify_tensor_nested( - outputs_origin, - providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) + outputs_origin = numpify_tensor_nested(outputs_origin) input_feed = {INPUT_NAME: dummy_inputs.numpy()} outputs = ort_session.run( diff --git a/tests/pipelines/test_base.py b/tests/pipelines/test_base.py index d08c9edd..434e2944 100644 --- a/tests/pipelines/test_base.py +++ b/tests/pipelines/test_base.py @@ -166,12 +166,24 @@ class CustomPipelineTest(unittest.TestCase): return inputs def postprocess(self, out, **kwargs): - return {'response': 'xxx', 'history': []} + return {'message': {'role': 'assistant', 'content': 'xxx'}} pipe = pipeline( task=Tasks.chat, pipeline_name=dummy_module, model=self.model_dir) pipe('text') inputs = {'text': 'aaa', 'history': [('dfd', 'fds')]} + inputs = { + 'messages': [{ + 'role': 'user', + 'content': 'dfd' + }, { + 'role': 'assistant', + 'content': 'fds' + }, { + 'role': 'user', + 'content': 'aaa' + }] + } pipe(inputs) def test_custom(self): diff --git a/tests/pipelines/test_controllable_image_generation.py b/tests/pipelines/test_controllable_image_generation.py index c1a29f5b..fa2bb4a1 100644 --- a/tests/pipelines/test_controllable_image_generation.py +++ b/tests/pipelines/test_controllable_image_generation.py @@ -25,7 +25,8 @@ class ControllableImageGenerationTest(unittest.TestCase): 'prompt': 'flower' } - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 2, + 'skip test for huggingface model download issue.') def test_run_with_model_from_modelhub(self): output_image_path = tempfile.NamedTemporaryFile(suffix='.png').name control_types = [ diff --git a/tests/pipelines/test_sentence_embedding.py b/tests/pipelines/test_sentence_embedding.py index 13260132..a6dd89ec 100644 --- a/tests/pipelines/test_sentence_embedding.py +++ b/tests/pipelines/test_sentence_embedding.py @@ -21,6 +21,7 @@ class SentenceEmbeddingTest(unittest.TestCase): medical_tiny_model_id = 'damo/nlp_corom_sentence-embedding_chinese-tiny-medical' general_base_model_id = 'damo/nlp_corom_sentence-embedding_chinese-base' general_tiny_model_id = 'damo/nlp_corom_sentence-embedding_chinese-tiny' + bloom_model_id = 'damo/udever-bloom-7b1' inputs = { 'source_sentence': ["how long it take to get a master's degree"], @@ -154,6 +155,14 @@ class SentenceEmbeddingTest(unittest.TestCase): print() print(f'pipeline2: {pipeline2(input=self.medical_inputs1)}') + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_bloom_model_from_modelhub(self): + model = Model.from_pretrained(self.bloom_model_id) + tokenizer = SentenceEmbeddingTransformersPreprocessor(model.model_dir) + pipeline_ins = pipeline( + task=Tasks.sentence_embedding, model=model, preprocessor=tokenizer) + print(pipeline_ins(input=self.inputs)) + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_model_from_modelhub(self): model = Model.from_pretrained(self.model_id) diff --git a/tests/pipelines/test_text_to_image_freeu.py b/tests/pipelines/test_text_to_image_freeu.py new file mode 100644 index 00000000..7aebe318 --- /dev/null +++ b/tests/pipelines/test_text_to_image_freeu.py @@ -0,0 +1,57 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import unittest + +import cv2 + +from modelscope.hub.snapshot_download import snapshot_download +from modelscope.outputs import OutputKeys +from modelscope.pipelines import pipeline +from modelscope.pipelines.multi_modal import FreeUTextToImagePipeline +from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level + + +class ImageEditingTest(unittest.TestCase): + + def setUp(self) -> None: + self.task = Tasks.text_to_image_synthesis + self.model_id = 'damo/multi-modal_freeu_stable_diffusion' + prompt = 'a photo of a running corgi' # prompt + self.inputs = {'prompt': prompt} + self.output_image_path = './result.png' + self.base_model = 'AI-ModelScope/stable-diffusion-v2-1' + self.freeu_params = { + 'b1': 1.4, + 'b2': 1.6, + 's1': 0.9, + 's2': 0.2 + } # for SD2.1 + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_run_by_direct_model_download(self): + cache_path = snapshot_download(self.model_id) + pipeline = FreeUTextToImagePipeline(cache_path) + pipeline.group_key = self.task + synthesized_img = pipeline( + input=self.inputs)[OutputKeys.OUTPUT_IMGS] # BGR + cv2.imwrite(self.output_image_path, synthesized_img) + print('FreeU pipeline: the synthesized image path is {}'.format( + self.output_image_path)) + + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_run_with_model_name(self): + pipeline_ins = pipeline( + task=Tasks.text_to_image_synthesis, + model=self.model_id, + base_model=self.base_model, + freeu_params=self.freeu_params) + synthesized_img = pipeline_ins( + self.inputs)[OutputKeys.OUTPUT_IMGS] # BGR + cv2.imwrite(self.output_image_path, synthesized_img) + print('FreeU pipeline: the synthesized image path is {}'.format( + self.output_image_path)) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/utils/test_hf_util.py b/tests/utils/test_hf_util.py index fcbaf50c..e16bc6fa 100644 --- a/tests/utils/test_hf_util.py +++ b/tests/utils/test_hf_util.py @@ -18,10 +18,10 @@ class HFUtilTest(unittest.TestCase): def test_auto_tokenizer(self): tokenizer = AutoTokenizer.from_pretrained( - 'baichuan-inc/Baichuan-13B-Chat', + 'baichuan-inc/Baichuan2-7B-Chat', trust_remote_code=True, revision='v1.0.3') - self.assertEqual(tokenizer.vocab_size, 64000) + self.assertEqual(tokenizer.vocab_size, 125696) self.assertEqual(tokenizer.model_max_length, 4096) self.assertFalse(tokenizer.is_fast)