mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-25 12:39:25 +01:00
Merge branch 'master-github' into master-merge-github925
This commit is contained in:
15
.github/ISSUE_TEMPLATE/bug_report.md
vendored
15
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -32,3 +32,18 @@ A clear and concise description of what the bug is.
|
||||
* You may add addition that may be helpful for locating the problem, such as
|
||||
* How you installed PyTorch [e.g., pip, conda, source]
|
||||
* Other environment variables that may be related (such as $PATH, $LD_LIBRARY_PATH, $PYTHONPATH, etc.)
|
||||
|
||||
|
||||
Please @ corresponding people according to your problem:
|
||||
|
||||
Model related: @wenmengzhou @tastelikefeet
|
||||
|
||||
Model hub related: @liuyhwangyh
|
||||
|
||||
Dataset releated: @wangxingjun778
|
||||
|
||||
Finetune related: @tastelikefeet @Jintao-Huang
|
||||
|
||||
Pipeline related: @Firmament-cyou @wenmengzhou
|
||||
|
||||
Contribute your model: @zzclynn
|
||||
|
||||
16
.github/ISSUE_TEMPLATE/question.md
vendored
16
.github/ISSUE_TEMPLATE/question.md
vendored
@@ -3,7 +3,7 @@ name: Question
|
||||
about: Describe this issue template's purpose here.
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: zzclynn
|
||||
assignees: zzclynn,wenmengzhou
|
||||
|
||||
---
|
||||
|
||||
@@ -15,3 +15,17 @@ Before asking a question, make sure you have:
|
||||
* Googled your question.
|
||||
* Searched related issues but cannot get the expected help.
|
||||
* The bug has not been fixed in the latest version.
|
||||
|
||||
Please @ corresponding people according to your problem:
|
||||
|
||||
Model related: @wenmengzhou @tastelikefeet
|
||||
|
||||
Model hub related: @liuyhwangyh
|
||||
|
||||
Dataset releated: @wangxingjun778
|
||||
|
||||
Finetune related: @tastelikefeet @Jintao-Huang
|
||||
|
||||
Pipeline related: @Firmament-cyou @wenmengzhou
|
||||
|
||||
Contribute your model: @zzclynn
|
||||
|
||||
@@ -28,7 +28,8 @@ if TYPE_CHECKING:
|
||||
from .trainers import (EpochBasedTrainer, Hook, Priority, TrainingArgs,
|
||||
build_dataset_from_file)
|
||||
from .utils.constant import Tasks
|
||||
from .utils.hf_util import (AutoConfig, AutoModel, AutoModelForCausalLM,
|
||||
from .utils.hf_util import AutoConfig, GPTQConfig, BitsAndBytesConfig
|
||||
from .utils.hf_util import (AutoModel, AutoModelForCausalLM,
|
||||
AutoModelForSeq2SeqLM,
|
||||
AutoModelForSequenceClassification,
|
||||
AutoModelForTokenClassification, AutoTokenizer,
|
||||
@@ -76,8 +77,9 @@ else:
|
||||
'utils.logger': ['get_logger'],
|
||||
'utils.constant': ['Tasks'],
|
||||
'utils.hf_util': [
|
||||
'AutoConfig', 'GenerationConfig', 'AutoModel',
|
||||
'AutoModelForCausalLM', 'AutoModelForSeq2SeqLM', 'AutoTokenizer',
|
||||
'AutoConfig', 'GenerationConfig', 'AutoModel', 'GPTQConfig',
|
||||
'BitsAndBytesConfig', 'AutoModelForCausalLM',
|
||||
'AutoModelForSeq2SeqLM', 'AutoTokenizer',
|
||||
'AutoModelForSequenceClassification',
|
||||
'AutoModelForTokenClassification'
|
||||
],
|
||||
|
||||
@@ -251,7 +251,8 @@ class HubApi:
|
||||
tag: Optional[str] = None,
|
||||
revision: Optional[str] = DEFAULT_REPOSITORY_REVISION,
|
||||
original_model_id: Optional[str] = None,
|
||||
ignore_file_pattern: Optional[Union[List[str], str]] = None):
|
||||
ignore_file_pattern: Optional[Union[List[str], str]] = None,
|
||||
lfs_suffix: Optional[Union[str, List[str]]] = None):
|
||||
"""Upload model from a given directory to given repository. A valid model directory
|
||||
must contain a configuration.json file.
|
||||
|
||||
@@ -289,6 +290,7 @@ class HubApi:
|
||||
branch and push to it.
|
||||
original_model_id (str, optional): The base model id which this model is trained from
|
||||
ignore_file_pattern (`Union[List[str], str]`, optional): The file pattern to ignore uploading
|
||||
lfs_suffix (`List[str]`, optional): File types to use LFS to manage. examples: '*.safetensors'.
|
||||
|
||||
Raises:
|
||||
InvalidParameter: Parameter invalid.
|
||||
@@ -357,6 +359,10 @@ class HubApi:
|
||||
date = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
|
||||
commit_message = '[automsg] push model %s to hub at %s' % (
|
||||
model_id, date)
|
||||
if lfs_suffix is not None:
|
||||
lfs_suffix_list = [lfs_suffix] if isinstance(lfs_suffix, str) else lfs_suffix
|
||||
for suffix in lfs_suffix_list:
|
||||
repo.add_lfs_type(suffix)
|
||||
repo.push(
|
||||
commit_message=commit_message,
|
||||
local_branch=revision,
|
||||
|
||||
@@ -105,7 +105,7 @@ class Repository:
|
||||
examples '*.safetensors'
|
||||
"""
|
||||
os.system(
|
||||
"printf '%s filter=lfs diff=lfs merge=lfs -text\n'>>%s" %
|
||||
"printf '\n%s filter=lfs diff=lfs merge=lfs -text\n'>>%s" %
|
||||
(file_name_suffix, os.path.join(self.model_dir, '.gitattributes')))
|
||||
|
||||
def push(self,
|
||||
|
||||
@@ -1252,6 +1252,7 @@ class Hooks(object):
|
||||
DeepspeedHook = 'DeepspeedHook'
|
||||
MegatronHook = 'MegatronHook'
|
||||
DDPHook = 'DDPHook'
|
||||
SwiftHook = 'SwiftHook'
|
||||
|
||||
|
||||
class LR_Schedulers(object):
|
||||
|
||||
@@ -13,7 +13,6 @@ from diffusers import (AutoencoderKL, DDPMScheduler, DiffusionPipeline,
|
||||
utils)
|
||||
from diffusers.models import attention
|
||||
from diffusers.utils import deprecation_utils
|
||||
from swift import AdapterConfig, LoRAConfig, PromptConfig, Swift
|
||||
from transformers import CLIPTextModel, CLIPTokenizer
|
||||
|
||||
from modelscope import snapshot_download
|
||||
@@ -26,6 +25,7 @@ from modelscope.outputs import OutputKeys
|
||||
from modelscope.utils.checkpoint import save_checkpoint, save_configuration
|
||||
from modelscope.utils.config import Config
|
||||
from modelscope.utils.constant import ModelFile, Tasks
|
||||
from modelscope.utils.import_utils import is_swift_available
|
||||
from .control_sd_lora import ControlLoRATuner
|
||||
|
||||
utils.deprecate = lambda *arg, **kwargs: None
|
||||
@@ -34,6 +34,9 @@ attention.deprecate = lambda *arg, **kwargs: None
|
||||
|
||||
__tuner_MAP__ = {'lora': LoRATuner, 'control_lora': ControlLoRATuner}
|
||||
|
||||
if is_swift_available():
|
||||
from swift import AdapterConfig, LoRAConfig, PromptConfig, Swift
|
||||
|
||||
|
||||
@MODELS.register_module(
|
||||
Tasks.efficient_diffusion_tuning,
|
||||
@@ -83,6 +86,8 @@ class EfficientStableDiffusion(TorchModel):
|
||||
self.pipe.scheduler.config)
|
||||
self.pipe = self.pipe.to(self.device)
|
||||
self.unet = self.pipe.unet
|
||||
self.text_encoder = self.pipe.text_encoder
|
||||
self.vae = self.pipe.vae
|
||||
else:
|
||||
# Load scheduler, tokenizer and models.
|
||||
self.noise_scheduler = DDPMScheduler.from_pretrained(
|
||||
@@ -110,6 +115,10 @@ class EfficientStableDiffusion(TorchModel):
|
||||
self.tuner_name = tuner_name
|
||||
|
||||
if tuner_name == 'swift-lora':
|
||||
if not is_swift_available():
|
||||
raise ValueError(
|
||||
'Please install swift by `pip install ms-swift` to use swift tuners.'
|
||||
)
|
||||
rank = tuner_config[
|
||||
'rank'] if tuner_config and 'rank' in tuner_config else 4
|
||||
lora_config = LoRAConfig(
|
||||
@@ -119,15 +128,32 @@ class EfficientStableDiffusion(TorchModel):
|
||||
use_merged_linear=False)
|
||||
self.unet = Swift.prepare_model(self.unet, lora_config)
|
||||
elif tuner_name == 'swift-adapter':
|
||||
if not is_swift_available():
|
||||
raise ValueError(
|
||||
'Please install swift by `pip install ms-swift` to use swift tuners.'
|
||||
)
|
||||
adapter_length = tuner_config[
|
||||
'adapter_length'] if tuner_config and 'adapter_length' in tuner_config else 10
|
||||
adapter_config = AdapterConfig(
|
||||
dim=-1,
|
||||
hidden_pos=0,
|
||||
target_modules=r'.*ff\.net\.2$',
|
||||
adapter_length=adapter_length)
|
||||
self.unet = Swift.prepare_model(self.unet, adapter_config)
|
||||
adapter_config_dict = {}
|
||||
dim_list = [320, 640, 1280]
|
||||
target_modules_list = [
|
||||
r'(down_blocks.0.*ff\.net\.2$)|(up_blocks.3.*ff\.net\.2$)',
|
||||
r'(down_blocks.1.*ff\.net\.2$)|(up_blocks.2.*ff\.net\.2$)',
|
||||
r'(down_blocks.2.*ff\.net\.2$)|(up_blocks.1.*ff\.net\.2$)|(mid_block.*ff\.net\.2$)'
|
||||
]
|
||||
for dim, target_modules in zip(dim_list, target_modules_list):
|
||||
adapter_config = AdapterConfig(
|
||||
dim=dim,
|
||||
hidden_pos=0,
|
||||
target_modules=target_modules,
|
||||
adapter_length=adapter_length)
|
||||
adapter_config_dict[f'adapter_{dim}'] = adapter_config
|
||||
self.unet = Swift.prepare_model(self.unet, adapter_config_dict)
|
||||
elif tuner_name == 'swift-prompt':
|
||||
if not is_swift_available():
|
||||
raise ValueError(
|
||||
'Please install swift by `pip install ms-swift` to use swift tuners.'
|
||||
)
|
||||
prompt_length = tuner_config[
|
||||
'prompt_length'] if tuner_config and 'prompt_length' in tuner_config else 10
|
||||
prompt_config = PromptConfig(
|
||||
@@ -139,7 +165,8 @@ class EfficientStableDiffusion(TorchModel):
|
||||
r'.*[down_blocks|up_blocks|mid_block]\.\d+\.attentions\.\d+\.transformer_blocks\.\d+$',
|
||||
embedding_pos=0,
|
||||
prompt_length=prompt_length,
|
||||
attach_front=False)
|
||||
attach_front=False,
|
||||
extract_embedding=True)
|
||||
self.unet = Swift.prepare_model(self.unet, prompt_config)
|
||||
elif tuner_name in ('lora', 'control_lora'):
|
||||
# if not set the config of control-tuner, we add the lora tuner directly to the original framework,
|
||||
@@ -166,13 +193,13 @@ class EfficientStableDiffusion(TorchModel):
|
||||
else:
|
||||
super().load_state_dict(state_dict=state_dict, strict=strict)
|
||||
|
||||
def state_dict(self):
|
||||
def state_dict(self, *arg, **kwargs):
|
||||
if hasattr(self, 'tuner'):
|
||||
return self.tuner.state_dict()
|
||||
elif self.tuner_name.startswith('swift'):
|
||||
return self.unet.state_dict()
|
||||
return self.tuner.state_dict(*arg, **kwargs)
|
||||
elif self.tuner_name.startswith('swift-'):
|
||||
return self.unet.state_dict(*arg, **kwargs)
|
||||
else:
|
||||
return super().state_dict()
|
||||
return super().state_dict(*arg, **kwargs)
|
||||
|
||||
def tokenize_caption(self, captions):
|
||||
""" Convert caption text to token data.
|
||||
@@ -189,7 +216,7 @@ class EfficientStableDiffusion(TorchModel):
|
||||
return_tensors='pt')
|
||||
return inputs.input_ids
|
||||
|
||||
def forward(self, prompt='', cond=None, target=None, **args):
|
||||
def forward(self, prompt, cond=None, target=None, **args):
|
||||
if self.inference:
|
||||
if 'generator_seed' in args and isinstance(args['generator_seed'],
|
||||
int):
|
||||
@@ -198,11 +225,13 @@ class EfficientStableDiffusion(TorchModel):
|
||||
else:
|
||||
generator = None
|
||||
num_inference_steps = args.get('num_inference_steps', 30)
|
||||
guidance_scale = args.get('guidance_scale', 7.5)
|
||||
if self.is_control:
|
||||
_ = self.tuner(cond.to(self.device)).control_states
|
||||
images = self.pipe(
|
||||
prompt,
|
||||
num_inference_steps=num_inference_steps,
|
||||
guidance_scale=guidance_scale,
|
||||
generator=generator).images
|
||||
return images
|
||||
else:
|
||||
@@ -228,8 +257,8 @@ class EfficientStableDiffusion(TorchModel):
|
||||
input_ids = self.tokenize_caption(prompt).to(self.device)
|
||||
|
||||
# Get the text embedding for conditioning
|
||||
with torch.no_grad():
|
||||
encoder_hidden_states = self.text_encoder(input_ids)[0]
|
||||
# with torch.no_grad():
|
||||
encoder_hidden_states = self.text_encoder(input_ids)[0]
|
||||
|
||||
# Inject control states to unet
|
||||
if self.is_control:
|
||||
|
||||
@@ -158,9 +158,9 @@ class StableDiffusion(TorchModel):
|
||||
config: Optional[dict] = None,
|
||||
save_config_function: Callable = save_configuration,
|
||||
**kwargs):
|
||||
config['pipeline']['type'] = 'diffusers-stable-diffusion'
|
||||
# Skip copying the original weights for lora and dreambooth method
|
||||
if self.lora_tune or self.dreambooth_tune:
|
||||
config['pipeline']['type'] = 'diffusers-stable-diffusion'
|
||||
pass
|
||||
else:
|
||||
super().save_pretrained(target_folder, save_checkpoint_names,
|
||||
|
||||
@@ -244,9 +244,9 @@ class StableDiffusionXL(TorchModel):
|
||||
config: Optional[dict] = None,
|
||||
save_config_function: Callable = save_configuration,
|
||||
**kwargs):
|
||||
config['pipeline']['type'] = 'diffusers-stable-diffusion-xl'
|
||||
# Skip copying the original weights for lora and dreambooth method
|
||||
if self.lora_tune or self.dreambooth_tune:
|
||||
if self.lora_tune:
|
||||
config['pipeline']['type'] = 'diffusers-stable-diffusion-xl'
|
||||
pass
|
||||
else:
|
||||
super().save_pretrained(target_folder, save_checkpoint_names,
|
||||
|
||||
@@ -102,6 +102,7 @@ class ModelForTokenClassificationWithCRF(ModelForTokenClassification):
|
||||
base_model_prefix = 'encoder'
|
||||
|
||||
def postprocess(self, inputs, **kwargs):
|
||||
logits = inputs['logits']
|
||||
predicts = self.head.decode(inputs['logits'], inputs['label_mask'])
|
||||
offset_mapping = inputs['offset_mapping']
|
||||
mask = inputs['label_mask']
|
||||
@@ -119,7 +120,7 @@ class ModelForTokenClassificationWithCRF(ModelForTokenClassification):
|
||||
|
||||
return AttentionTokenClassificationModelOutput(
|
||||
loss=None,
|
||||
logits=None,
|
||||
logits=logits,
|
||||
hidden_states=None,
|
||||
attentions=None,
|
||||
label_mask=mask,
|
||||
|
||||
@@ -160,6 +160,8 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
|
||||
token_num_relax=self.cmd['token_num_relax'],
|
||||
decoding_ind=self.cmd['decoding_ind'],
|
||||
decoding_mode=self.cmd['decoding_mode'],
|
||||
fake_streaming=self.cmd['fake_streaming'],
|
||||
model_lang=self.cmd['model_lang'],
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@@ -304,19 +306,21 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
|
||||
'idx_text': '',
|
||||
'sampled_ids': 'seq2seq/sampled_ids',
|
||||
'sampled_lengths': 'seq2seq/sampled_lengths',
|
||||
'lang': 'zh-cn',
|
||||
'model_lang': outputs['model_lang'],
|
||||
'code_base': outputs['code_base'],
|
||||
'mode': outputs['mode'],
|
||||
'fs': {
|
||||
'model_fs': None,
|
||||
'audio_fs': None
|
||||
}
|
||||
},
|
||||
'fake_streaming': False,
|
||||
}
|
||||
|
||||
frontend_conf = None
|
||||
token_num_relax = None
|
||||
decoding_ind = None
|
||||
decoding_mode = None
|
||||
fake_streaming = False
|
||||
if os.path.exists(outputs['am_model_config']):
|
||||
config_file = open(outputs['am_model_config'], encoding='utf-8')
|
||||
root = yaml.full_load(config_file)
|
||||
@@ -350,19 +354,20 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
|
||||
cmd['token_num_relax'] = token_num_relax
|
||||
cmd['decoding_ind'] = decoding_ind
|
||||
cmd['decoding_mode'] = decoding_mode
|
||||
cmd['fake_streaming'] = fake_streaming
|
||||
if outputs.__contains__('mvn_file'):
|
||||
cmd['cmvn_file'] = outputs['mvn_file']
|
||||
model_config = self.model_cfg['model_config']
|
||||
if model_config.__contains__('vad_model') and self.vad_model != '':
|
||||
if model_config.__contains__('vad_model') and self.vad_model is None:
|
||||
self.vad_model = model_config['vad_model']
|
||||
if model_config.__contains__('vad_model_revision'):
|
||||
self.vad_model_revision = model_config['vad_model_revision']
|
||||
if model_config.__contains__('punc_model') and self.punc_model != '':
|
||||
if model_config.__contains__('punc_model') and self.punc_model is None:
|
||||
self.punc_model = model_config['punc_model']
|
||||
if model_config.__contains__('punc_model_revision'):
|
||||
self.punc_model_revision = model_config['punc_model_revision']
|
||||
if model_config.__contains__(
|
||||
'timestamp_model') and self.timestamp_model != '':
|
||||
'timestamp_model') and self.timestamp_model is None:
|
||||
self.timestamp_model = model_config['timestamp_model']
|
||||
if model_config.__contains__('timestamp_model_revision'):
|
||||
self.timestamp_model_revision = model_config[
|
||||
@@ -389,6 +394,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
|
||||
'punc_model_file',
|
||||
'punc_infer_config',
|
||||
'param_dict',
|
||||
'fake_streaming',
|
||||
]
|
||||
|
||||
for user_args in user_args_dict:
|
||||
|
||||
@@ -12,7 +12,7 @@ import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from diffusers import LMSDiscreteScheduler, StableDiffusionPipeline
|
||||
from diffusers.models.cross_attention import CrossAttention
|
||||
from diffusers.models.attention_processor import Attention
|
||||
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import \
|
||||
StableDiffusionPipelineOutput
|
||||
from PIL import Image
|
||||
@@ -245,7 +245,7 @@ class Cones2AttnProcessor:
|
||||
super().__init__()
|
||||
|
||||
def __call__(self,
|
||||
attn: CrossAttention,
|
||||
attn: Attention,
|
||||
hidden_states,
|
||||
encoder_hidden_states=None,
|
||||
attention_mask=None):
|
||||
|
||||
@@ -17,6 +17,7 @@ from modelscope.pipelines.builder import PIPELINES
|
||||
from modelscope.pipelines.multi_modal.diffusers_wrapped.diffusers_pipeline import \
|
||||
DiffusersPipeline
|
||||
from modelscope.utils.constant import Tasks
|
||||
from modelscope.utils.import_utils import is_swift_available
|
||||
|
||||
|
||||
@PIPELINES.register_module(
|
||||
@@ -38,9 +39,11 @@ class StableDiffusionPipeline(DiffusersPipeline):
|
||||
custom_dir: custom diffusion weight dir for unet.
|
||||
modifier_token: token to use as a modifier for the concept of custom diffusion.
|
||||
use_safetensors: load safetensors weights.
|
||||
use_swift: Whether to use swift lora dir for unet.
|
||||
"""
|
||||
use_safetensors = kwargs.pop('use_safetensors', False)
|
||||
torch_type = kwargs.pop('torch_type', torch.float32)
|
||||
use_swift = kwargs.pop('use_swift', False)
|
||||
# check custom diffusion input value
|
||||
if custom_dir is None and modifier_token is not None:
|
||||
raise ValueError(
|
||||
@@ -58,7 +61,17 @@ class StableDiffusionPipeline(DiffusersPipeline):
|
||||
# load lora moudle to unet
|
||||
if lora_dir is not None:
|
||||
assert os.path.exists(lora_dir), f"{lora_dir} isn't exist"
|
||||
self.pipeline.unet.load_attn_procs(lora_dir)
|
||||
if use_swift:
|
||||
if not is_swift_available():
|
||||
raise ValueError(
|
||||
'Please install swift by `pip install ms-swift` to use efficient_tuners.'
|
||||
)
|
||||
from swift import Swift
|
||||
self.pipeline.unet = Swift.from_pretrained(
|
||||
self.pipeline.unet, lora_dir)
|
||||
else:
|
||||
self.pipeline.unet.load_attn_procs(lora_dir)
|
||||
|
||||
# load custom diffusion to unet
|
||||
if custom_dir is not None:
|
||||
assert os.path.exists(custom_dir), f"{custom_dir} isn't exist"
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
import math
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
@@ -8,7 +9,7 @@ import torch
|
||||
from modelscope.metainfo import Pipelines
|
||||
from modelscope.models import Model
|
||||
from modelscope.outputs import OutputKeys
|
||||
from modelscope.pipelines.base import Pipeline
|
||||
from modelscope.pipelines.base import Input, Pipeline
|
||||
from modelscope.pipelines.builder import PIPELINES
|
||||
from modelscope.preprocessors import Preprocessor
|
||||
from modelscope.utils.constant import ModelFile, Tasks
|
||||
@@ -64,6 +65,7 @@ class TokenClassificationPipeline(Pipeline):
|
||||
sequence_length=sequence_length,
|
||||
**kwargs)
|
||||
self.model.eval()
|
||||
self.sequence_length = sequence_length
|
||||
|
||||
assert hasattr(self.preprocessor, 'id2label')
|
||||
self.id2label = self.preprocessor.id2label
|
||||
@@ -131,9 +133,20 @@ class TokenClassificationPipeline(Pipeline):
|
||||
predictions = torch_nested_numpify(torch_nested_detach(predictions))
|
||||
labels = [self.id2label[x] for x in predictions]
|
||||
|
||||
return_prob = postprocess_params.pop('return_prob', True)
|
||||
if return_prob:
|
||||
if OutputKeys.LOGITS in inputs:
|
||||
logits = inputs[OutputKeys.LOGITS]
|
||||
if len(logits.shape) == 3:
|
||||
logits = logits[0]
|
||||
probs = torch_nested_numpify(
|
||||
torch_nested_detach(logits.softmax(-1)))
|
||||
else:
|
||||
return_prob = False
|
||||
|
||||
chunks = []
|
||||
chunk = {}
|
||||
for label, offsets in zip(labels, offset_mapping):
|
||||
for i, (label, offsets) in enumerate(zip(labels, offset_mapping)):
|
||||
if label[0] in 'BS':
|
||||
if chunk:
|
||||
chunk['span'] = text[chunk['start']:chunk['end']]
|
||||
@@ -143,6 +156,8 @@ class TokenClassificationPipeline(Pipeline):
|
||||
'start': offsets[0],
|
||||
'end': offsets[1]
|
||||
}
|
||||
if return_prob:
|
||||
chunk['prob'] = probs[i][predictions[i]]
|
||||
if label[0] in 'I':
|
||||
if not chunk:
|
||||
chunk = {
|
||||
@@ -150,6 +165,8 @@ class TokenClassificationPipeline(Pipeline):
|
||||
'start': offsets[0],
|
||||
'end': offsets[1]
|
||||
}
|
||||
if return_prob:
|
||||
chunk['prob'] = probs[i][predictions[i]]
|
||||
if label[0] in 'E':
|
||||
if not chunk:
|
||||
chunk = {
|
||||
@@ -157,6 +174,8 @@ class TokenClassificationPipeline(Pipeline):
|
||||
'start': offsets[0],
|
||||
'end': offsets[1]
|
||||
}
|
||||
if return_prob:
|
||||
chunk['prob'] = probs[i][predictions[i]]
|
||||
if label[0] in 'IES':
|
||||
if chunk:
|
||||
chunk['end'] = offsets[1]
|
||||
@@ -172,3 +191,63 @@ class TokenClassificationPipeline(Pipeline):
|
||||
chunks.append(chunk)
|
||||
|
||||
return chunks
|
||||
|
||||
def _process_single(self, input: Input, *args, **kwargs) -> Dict[str, Any]:
|
||||
split_max_length = kwargs.pop('split_max_length',
|
||||
0) # default: no split
|
||||
if split_max_length <= 0:
|
||||
return super()._process_single(input, *args, **kwargs)
|
||||
else:
|
||||
split_texts, index_mapping = self._auto_split([input],
|
||||
split_max_length)
|
||||
outputs = []
|
||||
for text in split_texts:
|
||||
outputs.append(super()._process_single(text, *args, **kwargs))
|
||||
return self._auto_join(outputs, index_mapping)[0]
|
||||
|
||||
def _process_batch(self, input: List[Input], batch_size: int, *args,
|
||||
**kwargs) -> List[Dict[str, Any]]:
|
||||
split_max_length = kwargs.pop('split_max_length',
|
||||
0) # default: no split
|
||||
if split_max_length <= 0:
|
||||
return super()._process_batch(
|
||||
input, batch_size=batch_size, *args, **kwargs)
|
||||
else:
|
||||
split_texts, index_mapping = self._auto_split(
|
||||
input, split_max_length)
|
||||
outputs = super()._process_batch(
|
||||
split_texts, batch_size=batch_size, *args, **kwargs)
|
||||
return self._auto_join(outputs, index_mapping)
|
||||
|
||||
def _auto_split(self, input_texts: List[str], split_max_length: int):
|
||||
split_texts = []
|
||||
index_mapping = {}
|
||||
new_idx = 0
|
||||
for raw_idx, text in enumerate(input_texts):
|
||||
if len(text) < split_max_length:
|
||||
split_texts.append(text)
|
||||
index_mapping[new_idx] = (raw_idx, 0)
|
||||
new_idx += 1
|
||||
else:
|
||||
n_split = math.ceil(len(text) / split_max_length)
|
||||
for i in range(n_split):
|
||||
offset = i * split_max_length
|
||||
split_texts.append(text[offset:offset + split_max_length])
|
||||
index_mapping[new_idx] = (raw_idx, offset)
|
||||
new_idx += 1
|
||||
return split_texts, index_mapping
|
||||
|
||||
def _auto_join(
|
||||
self, outputs: List[Dict[str, Any]],
|
||||
index_mapping: Dict[int, Tuple[int, int]]) -> List[Dict[str, Any]]:
|
||||
joined_outputs = []
|
||||
for idx, output in enumerate(outputs):
|
||||
raw_idx, offset = index_mapping[idx]
|
||||
if raw_idx >= len(joined_outputs):
|
||||
joined_outputs.append(output)
|
||||
else:
|
||||
for chunk in output[OutputKeys.OUTPUT]:
|
||||
chunk['start'] += offset
|
||||
chunk['end'] += offset
|
||||
joined_outputs[raw_idx][OutputKeys.OUTPUT].append(chunk)
|
||||
return joined_outputs
|
||||
|
||||
@@ -96,6 +96,10 @@ class WavToScp(Preprocessor):
|
||||
else:
|
||||
mode = None
|
||||
inputs['mode'] = mode
|
||||
if 'lang' in inputs['model_config']:
|
||||
inputs['model_lang'] = inputs['model_config']['lang']
|
||||
else:
|
||||
inputs['model_lang'] = 'zh-cn'
|
||||
|
||||
if inputs['model_type'] == Frameworks.torch:
|
||||
assert inputs['model_config'].__contains__(
|
||||
|
||||
@@ -53,10 +53,15 @@ class DiffusionImageGenerationPreprocessor(Preprocessor):
|
||||
self.preprocessor_mean = kwargs.pop('mean', [0.5])
|
||||
self.preprocessor_std = kwargs.pop('std', [0.5])
|
||||
self.preprocessor_image_keys = set(kwargs.pop('image_keys', []))
|
||||
self.center_crop = kwargs.pop('center_crop', True)
|
||||
|
||||
self.transform_input = transforms.Compose([
|
||||
transforms.Resize(
|
||||
self.preprocessor_resolution,
|
||||
interpolation=transforms.InterpolationMode.BILINEAR),
|
||||
transforms.CenterCrop(self.preprocessor_resolution)
|
||||
if self.center_crop else transforms.RandomCrop(
|
||||
self.preprocessor_resolution),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(self.preprocessor_mean,
|
||||
self.preprocessor_std),
|
||||
|
||||
@@ -19,6 +19,7 @@ if TYPE_CHECKING:
|
||||
from .distributed.ddp_hook import DDPHook
|
||||
from .distributed.deepspeed_hook import DeepspeedHook
|
||||
from .distributed.megatron_hook import MegatronHook
|
||||
from .swift.swift_hook import SwiftHook
|
||||
|
||||
else:
|
||||
_import_structure = {
|
||||
@@ -40,6 +41,7 @@ else:
|
||||
'distributed.ddp_hook': ['DDPHook'],
|
||||
'distributed.deepspeed_hook': ['DeepspeedHook'],
|
||||
'distributed.megatron_hook': ['MegatronHook'],
|
||||
'swift.swift_hook': ['SwiftHook'],
|
||||
'priority': ['Priority', 'get_priority']
|
||||
}
|
||||
|
||||
|
||||
1
modelscope/trainers/hooks/swift/__init__.py
Normal file
1
modelscope/trainers/hooks/swift/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from .swift_hook import SwiftHook
|
||||
132
modelscope/trainers/hooks/swift/swift_hook.py
Normal file
132
modelscope/trainers/hooks/swift/swift_hook.py
Normal file
@@ -0,0 +1,132 @@
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from modelscope.metainfo import Hooks
|
||||
from modelscope.trainers import EpochBasedTrainer
|
||||
from modelscope.trainers.hooks.builder import HOOKS
|
||||
from modelscope.trainers.hooks.checkpoint.checkpoint_hook import (
|
||||
BestCkptSaverHook, CheckpointHook, CheckpointProcessor)
|
||||
from modelscope.trainers.hooks.checkpoint.load_checkpoint_hook import \
|
||||
LoadCheckpointHook
|
||||
from modelscope.trainers.hooks.hook import Hook
|
||||
from modelscope.utils.checkpoint import save_configuration
|
||||
from modelscope.utils.import_utils import is_swift_available
|
||||
|
||||
|
||||
class SwiftCheckpointProcessor(CheckpointProcessor):
|
||||
|
||||
_BIN_FILE_DIR = 'model'
|
||||
SWIFT_SAVE_SUFFIX = '_swift'
|
||||
|
||||
@staticmethod
|
||||
def copy_files_and_dump_config(trainer, output_dir, config, bin_file):
|
||||
"""Copy useful files to target output folder and dumps the target configuration.json.
|
||||
"""
|
||||
model = trainer.unwrap_module(trainer.model)
|
||||
|
||||
class SaveConfig:
|
||||
|
||||
def __init__(self, output_dir, config):
|
||||
self.output_dir = output_dir
|
||||
self.config = config
|
||||
|
||||
def __call__(self, _output_dir, _config):
|
||||
self.config = _config
|
||||
|
||||
def save_config(self):
|
||||
save_configuration(self.output_dir, self.config)
|
||||
|
||||
for pop_key in [
|
||||
'push_to_hub', 'hub_repo_id', 'hub_token', 'private_hub'
|
||||
]:
|
||||
if config.safe_get('train.checkpoint.period.'
|
||||
+ pop_key) is not None:
|
||||
config.safe_get('train.checkpoint.period').pop(pop_key)
|
||||
if config.safe_get('train.checkpoint.best.' + pop_key) is not None:
|
||||
config.safe_get('train.checkpoint.best').pop(pop_key)
|
||||
|
||||
save_config_fn = SaveConfig(output_dir, config)
|
||||
|
||||
if hasattr(model, 'save_pretrained'):
|
||||
if not is_swift_available():
|
||||
raise ValueError(
|
||||
'Please install swift by `pip install ms-swift` to use SwiftHook.'
|
||||
)
|
||||
from swift import SwiftModel
|
||||
if isinstance(model, SwiftModel):
|
||||
_swift_output_dir = output_dir + SwiftCheckpointProcessor.SWIFT_SAVE_SUFFIX
|
||||
model.save_pretrained(
|
||||
save_directory=_swift_output_dir,
|
||||
safe_serialization=config.safe_get(
|
||||
'train.checkpoint.safe_serialization', False),
|
||||
adapter_name=config.safe_get(
|
||||
'train.checkpoint.adapter_name', 'default'))
|
||||
else:
|
||||
model.save_pretrained(
|
||||
output_dir,
|
||||
bin_file,
|
||||
save_function=lambda *args, **kwargs: None,
|
||||
config=save_config_fn.config,
|
||||
save_config_function=save_config_fn)
|
||||
|
||||
if trainer.train_preprocessor is not None:
|
||||
trainer.train_preprocessor.save_pretrained(
|
||||
output_dir,
|
||||
save_config_fn.config,
|
||||
save_config_function=save_config_fn)
|
||||
if trainer.eval_preprocessor is not None:
|
||||
trainer.eval_preprocessor.save_pretrained(
|
||||
output_dir,
|
||||
save_config_fn.config,
|
||||
save_config_function=save_config_fn)
|
||||
save_config_fn.save_config()
|
||||
|
||||
def link_dir(self, source_dir, output_dir):
|
||||
if os.path.exists(output_dir):
|
||||
shutil.rmtree(output_dir)
|
||||
shutil.copytree(source_dir, output_dir)
|
||||
|
||||
def save_swift_model_state(self, model, filename):
|
||||
model.save_pretrained(filename)
|
||||
|
||||
def save_checkpoints(self,
|
||||
trainer,
|
||||
checkpoint_path_prefix,
|
||||
output_dir,
|
||||
meta=None,
|
||||
save_optimizers=True):
|
||||
model = trainer.unwrap_module(trainer.model)
|
||||
_model_file, _train_state_file = self._get_state_file_name(
|
||||
checkpoint_path_prefix)
|
||||
_swift_save_dir = checkpoint_path_prefix + SwiftCheckpointProcessor.SWIFT_SAVE_SUFFIX
|
||||
_swift_output_dir = output_dir + SwiftCheckpointProcessor.SWIFT_SAVE_SUFFIX
|
||||
self.save_trainer_state(trainer, model, _train_state_file, meta,
|
||||
save_optimizers)
|
||||
self.save_model_state(model, _model_file)
|
||||
self.link(model, _model_file, output_dir)
|
||||
self.save_swift_model_state(model, _swift_save_dir)
|
||||
self.link_dir(_swift_save_dir, _swift_output_dir)
|
||||
|
||||
|
||||
@HOOKS.register_module(module_name=Hooks.SwiftHook)
|
||||
class SwiftHook(Hook):
|
||||
|
||||
_BIN_FILE_DIR = 'model'
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def register_processor(self, trainer: EpochBasedTrainer):
|
||||
processor = SwiftCheckpointProcessor()
|
||||
ckpt_hook = trainer.get_hook(CheckpointHook)
|
||||
if len(ckpt_hook) > 0 and not isinstance(ckpt_hook[0].processor,
|
||||
SwiftCheckpointProcessor):
|
||||
ckpt_hook[0].set_processor(processor)
|
||||
best_ckpt_hook = trainer.get_hook(BestCkptSaverHook)
|
||||
if len(best_ckpt_hook) > 0 and not isinstance(
|
||||
best_ckpt_hook[0].processor, SwiftCheckpointProcessor):
|
||||
best_ckpt_hook[0].set_processor(processor)
|
||||
load_ckpt_hook = trainer.get_hook(LoadCheckpointHook)
|
||||
if len(load_ckpt_hook) > 0 and not isinstance(
|
||||
load_ckpt_hook[0].processor, SwiftCheckpointProcessor):
|
||||
load_ckpt_hook[0].set_processor(processor)
|
||||
@@ -1,4 +1,5 @@
|
||||
# Copyright 2022-2023 The Alibaba Fundamental Vision Team Authors. All rights reserved.
|
||||
import os
|
||||
from typing import Union
|
||||
|
||||
import torch
|
||||
@@ -7,16 +8,46 @@ from torch import nn
|
||||
from modelscope.metainfo import Trainers
|
||||
from modelscope.models.base import Model, TorchModel
|
||||
from modelscope.trainers.builder import TRAINERS
|
||||
from modelscope.trainers.hooks.checkpoint.checkpoint_hook import CheckpointHook
|
||||
from modelscope.trainers.hooks.checkpoint.checkpoint_processor import \
|
||||
CheckpointProcessor
|
||||
from modelscope.trainers.optimizer.builder import build_optimizer
|
||||
from modelscope.trainers.trainer import EpochBasedTrainer
|
||||
from modelscope.utils.config import ConfigDict
|
||||
|
||||
|
||||
class SwiftDiffusionCheckpointProcessor(CheckpointProcessor):
|
||||
|
||||
def save_checkpoints(self,
|
||||
trainer,
|
||||
checkpoint_path_prefix,
|
||||
output_dir,
|
||||
meta=None,
|
||||
save_optimizers=True):
|
||||
"""Save the state dict for swift lora tune model.
|
||||
"""
|
||||
trainer.model.unet.save_pretrained(os.path.join(output_dir))
|
||||
|
||||
|
||||
@TRAINERS.register_module(module_name=Trainers.stable_diffusion)
|
||||
class StableDiffusionTrainer(EpochBasedTrainer):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""Stable Diffusion trainers for fine-tuning.
|
||||
|
||||
Args:
|
||||
use_swift: Whether to use swift.
|
||||
|
||||
"""
|
||||
super().__init__(*args, **kwargs)
|
||||
use_swift = kwargs.pop('use_swift', False)
|
||||
|
||||
# set swift lora save checkpoint processor
|
||||
if use_swift:
|
||||
ckpt_hook = list(
|
||||
filter(lambda hook: isinstance(hook, CheckpointHook),
|
||||
self.hooks))[0]
|
||||
ckpt_hook.set_processor(SwiftDiffusionCheckpointProcessor())
|
||||
|
||||
def build_optimizer(self, cfg: ConfigDict, default_args: dict = None):
|
||||
try:
|
||||
|
||||
@@ -142,12 +142,8 @@ class EpochBasedTrainer(BaseTrainer):
|
||||
self._samplers = samplers
|
||||
|
||||
if isinstance(model, str):
|
||||
third_party = kwargs.get(ThirdParty.KEY)
|
||||
if third_party is not None:
|
||||
kwargs.pop(ThirdParty.KEY)
|
||||
|
||||
self.model_dir = self.get_or_download_model_dir(
|
||||
model, model_revision, third_party)
|
||||
model, model_revision, kwargs.pop(ThirdParty.KEY, None))
|
||||
if cfg_file is None:
|
||||
cfg_file = os.path.join(self.model_dir,
|
||||
ModelFile.CONFIGURATION)
|
||||
@@ -159,7 +155,10 @@ class EpochBasedTrainer(BaseTrainer):
|
||||
if hasattr(model, 'model_dir'):
|
||||
check_local_model_is_latest(
|
||||
model.model_dir,
|
||||
user_agent={Invoke.KEY: Invoke.LOCAL_TRAINER})
|
||||
user_agent={
|
||||
Invoke.KEY: Invoke.LOCAL_TRAINER,
|
||||
ThirdParty.KEY: kwargs.pop(ThirdParty.KEY, None)
|
||||
})
|
||||
|
||||
super().__init__(cfg_file, arg_parse_fn)
|
||||
self.cfg_modify_fn = cfg_modify_fn
|
||||
|
||||
@@ -435,24 +435,27 @@ class FilesAstScanning(object):
|
||||
ignored.add(item)
|
||||
return list(set(output) - set(ignored))
|
||||
|
||||
def traversal_files(self, path, check_sub_dir=None):
|
||||
def traversal_files(self, path, check_sub_dir=None, include_init=False):
|
||||
self.file_dirs = []
|
||||
if check_sub_dir is None or len(check_sub_dir) == 0:
|
||||
self._traversal_files(path)
|
||||
self._traversal_files(path, include_init=include_init)
|
||||
else:
|
||||
for item in check_sub_dir:
|
||||
sub_dir = os.path.join(path, item)
|
||||
if os.path.isdir(sub_dir):
|
||||
self._traversal_files(sub_dir)
|
||||
self._traversal_files(sub_dir, include_init=include_init)
|
||||
|
||||
def _traversal_files(self, path):
|
||||
def _traversal_files(self, path, include_init=False):
|
||||
dir_list = os.scandir(path)
|
||||
for item in dir_list:
|
||||
if item.name.startswith('__') or item.name.endswith(
|
||||
'.json') or item.name.endswith('.md'):
|
||||
if item.name == '__init__.py' and not include_init:
|
||||
continue
|
||||
elif (item.name.startswith('__')
|
||||
and item.name != '__init__.py') or item.name.endswith(
|
||||
'.json') or item.name.endswith('.md'):
|
||||
continue
|
||||
if item.is_dir():
|
||||
self._traversal_files(item.path)
|
||||
self._traversal_files(item.path, include_init=include_init)
|
||||
elif item.is_file() and item.name.endswith('.py'):
|
||||
self.file_dirs.append(item.path)
|
||||
elif item.is_file() and 'requirement' in item.name:
|
||||
|
||||
@@ -174,3 +174,9 @@ XFORMERS_IMPORT_ERROR = """
|
||||
{0} requires the timm library but it was not found in your environment. You can install it with pip:
|
||||
`pip install xformers>=0.0.17`
|
||||
"""
|
||||
|
||||
# docstyle-ignore
|
||||
SWIFT_IMPORT_ERROR = """
|
||||
{0} requires the ms-swift library but it was not found in your environment. You can install it with pip:
|
||||
`pip install ms-swift -U`
|
||||
"""
|
||||
|
||||
@@ -13,6 +13,7 @@ from transformers import \
|
||||
from transformers import \
|
||||
AutoModelForTokenClassification as AutoModelForTokenClassificationHF
|
||||
from transformers import AutoTokenizer as AutoTokenizerHF
|
||||
from transformers import BitsAndBytesConfig as BitsAndBytesConfigHF
|
||||
from transformers import GenerationConfig as GenerationConfigHF
|
||||
from transformers import (PretrainedConfig, PreTrainedModel,
|
||||
PreTrainedTokenizerBase)
|
||||
@@ -22,6 +23,11 @@ from transformers.models.auto.tokenization_auto import (
|
||||
from modelscope import snapshot_download
|
||||
from modelscope.utils.constant import Invoke
|
||||
|
||||
try:
|
||||
from transformers import GPTQConfig as GPTQConfigHF
|
||||
except ImportError:
|
||||
GPTQConfigHF = None
|
||||
|
||||
|
||||
def user_agent(invoked_by=None):
|
||||
if invoked_by is None:
|
||||
@@ -199,3 +205,5 @@ AutoConfig = get_wrapped_class(
|
||||
AutoConfigHF, ignore_file_pattern=[r'\w+\.bin', r'\w+\.safetensors'])
|
||||
GenerationConfig = get_wrapped_class(
|
||||
GenerationConfigHF, ignore_file_pattern=[r'\w+\.bin', r'\w+\.safetensors'])
|
||||
GPTQConfig = GPTQConfigHF
|
||||
BitsAndBytesConfig = BitsAndBytesConfigHF
|
||||
|
||||
@@ -310,6 +310,7 @@ REQUIREMENTS_MAAPING = OrderedDict([
|
||||
('open_clip', (is_package_available('open_clip'), OPENCLIP_IMPORT_ERROR)),
|
||||
('taming', (is_package_available('taming'), TAMING_IMPORT_ERROR)),
|
||||
('xformers', (is_package_available('xformers'), XFORMERS_IMPORT_ERROR)),
|
||||
('swift', (is_package_available('swift'), SWIFT_IMPORT_ERROR)),
|
||||
])
|
||||
|
||||
SYSTEM_PACKAGE = set(['os', 'sys', 'typing'])
|
||||
|
||||
@@ -372,7 +372,7 @@ def import_module_from_model_dir(model_dir):
|
||||
"""
|
||||
from pathlib import Path
|
||||
file_scanner = FilesAstScanning()
|
||||
file_scanner.traversal_files(model_dir)
|
||||
file_scanner.traversal_files(model_dir, include_init=True)
|
||||
file_dirs = file_scanner.file_dirs
|
||||
requirements = file_scanner.requirement_dirs
|
||||
|
||||
|
||||
@@ -2,4 +2,4 @@
|
||||
__version__ = '1.9.1'
|
||||
# default release datetime for branches under active development is set
|
||||
# to be a time far-far-away-into-the-future
|
||||
__release_datetime__ = '2023-09-06 00:00:00'
|
||||
__release_datetime__ = '2099-09-06 00:00:00'
|
||||
|
||||
@@ -4,7 +4,6 @@ datasets>=2.8.0,<=2.13.0
|
||||
einops
|
||||
filelock>=3.3.0
|
||||
gast>=0.2.2
|
||||
ms-swift
|
||||
numpy
|
||||
oss2
|
||||
pandas
|
||||
|
||||
@@ -23,20 +23,31 @@ class PluginModelTest(unittest.TestCase):
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_run_span_based_ner_pipeline(self):
|
||||
pipeline_ins = pipeline(
|
||||
Tasks.named_entity_recognition,
|
||||
'damo/nlp_nested-ner_named-entity-recognition_chinese-base-med')
|
||||
print(
|
||||
pipeline_ins(
|
||||
'1、可测量目标: 1周内胸闷缓解。2、下一步诊疗措施:1.心内科护理常规,一级护理,低盐低脂饮食,留陪客。'
|
||||
'2.予“阿司匹林肠溶片”抗血小板聚集,“呋塞米、螺内酯”利尿减轻心前负荷,“瑞舒伐他汀”调脂稳定斑块,“厄贝沙坦片片”降血压抗心机重构'
|
||||
))
|
||||
try:
|
||||
pipeline_ins = pipeline(
|
||||
Tasks.named_entity_recognition,
|
||||
'damo/nlp_nested-ner_named-entity-recognition_chinese-base-med'
|
||||
)
|
||||
print(
|
||||
pipeline_ins(
|
||||
'1、可测量目标: 1周内胸闷缓解。2、下一步诊疗措施:1.心内科护理常规,一级护理,低盐低脂饮食,留陪客。'
|
||||
'2.予“阿司匹林肠溶片”抗血小板聚集,“呋塞米、螺内酯”利尿减轻心前负荷,“瑞舒伐他汀”调脂稳定斑块,“厄贝沙坦片片”降血压抗心机重构'
|
||||
))
|
||||
except RuntimeError:
|
||||
print(
|
||||
'Skip test span_based_ner_pipeline! RuntimeError: Try loading from huggingface and modelscope failed'
|
||||
)
|
||||
|
||||
def test_maoe_pipelines(self):
|
||||
pipeline_ins = pipeline(
|
||||
Tasks.named_entity_recognition,
|
||||
'damo/nlp_maoe_named-entity-recognition_chinese-base-general')
|
||||
print(
|
||||
pipeline_ins(
|
||||
'刘培强,男,生理年龄40岁(因为在太空中进入休眠状态),实际年龄52岁,领航员国际空间站中的中国航天员,机械工程专家,军人,军衔中校。'
|
||||
))
|
||||
try:
|
||||
pipeline_ins = pipeline(
|
||||
Tasks.named_entity_recognition,
|
||||
'damo/nlp_maoe_named-entity-recognition_chinese-base-general')
|
||||
print(
|
||||
pipeline_ins(
|
||||
'刘培强,男,生理年龄40岁(因为在太空中进入休眠状态),实际年龄52岁,领航员国际空间站中的中国航天员,机械工程专家,军人,军衔中校。'
|
||||
))
|
||||
except RuntimeError:
|
||||
print(
|
||||
'Skip test maoe_pipeline! RuntimeError: Try loading from huggingface and modelscope failed'
|
||||
)
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Copyright 2022-2023 The Alibaba Fundamental Vision Team Authors. All rights reserved.
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from modelscope.models import Model
|
||||
from modelscope.models.multi_modal import EfficientStableDiffusion
|
||||
from modelscope.pipelines import pipeline
|
||||
from modelscope.utils.constant import Tasks
|
||||
from modelscope.utils.test_utils import test_level
|
||||
@@ -11,6 +11,7 @@ from modelscope.utils.test_utils import test_level
|
||||
class EfficientDiffusionTuningTest(unittest.TestCase):
|
||||
|
||||
def setUp(self) -> None:
|
||||
os.system('pip install ms-swift -U')
|
||||
self.task = Tasks.efficient_diffusion_tuning
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@@ -28,6 +29,7 @@ class EfficientDiffusionTuningTest(unittest.TestCase):
|
||||
model_id = 'damo/multi-modal_efficient-diffusion-tuning-lora'
|
||||
model_revision = 'v1.0.2'
|
||||
model = Model.from_pretrained(model_id, model_revision=model_revision)
|
||||
from modelscope.models.multi_modal import EfficientStableDiffusion
|
||||
self.assertTrue(model.__class__ == EfficientStableDiffusion)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@@ -52,6 +54,7 @@ class EfficientDiffusionTuningTest(unittest.TestCase):
|
||||
model_id = 'damo/multi-modal_efficient-diffusion-tuning-control-lora'
|
||||
model_revision = 'v1.0.2'
|
||||
model = Model.from_pretrained(model_id, model_revision=model_revision)
|
||||
from modelscope.models.multi_modal import EfficientStableDiffusion
|
||||
self.assertTrue(model.__class__ == EfficientStableDiffusion)
|
||||
|
||||
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
# Copyright 2022-2023 The Alibaba Fundamental Vision Team Authors. All rights reserved.
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import cv2
|
||||
|
||||
from modelscope.models import Model
|
||||
from modelscope.models.multi_modal import EfficientStableDiffusion
|
||||
from modelscope.pipelines import pipeline
|
||||
from modelscope.utils.constant import Tasks
|
||||
from modelscope.utils.test_utils import test_level
|
||||
@@ -14,6 +14,7 @@ from modelscope.utils.test_utils import test_level
|
||||
class EfficientDiffusionTuningTestSwift(unittest.TestCase):
|
||||
|
||||
def setUp(self) -> None:
|
||||
os.system('pip install ms-swift -U')
|
||||
self.task = Tasks.efficient_diffusion_tuning
|
||||
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
@@ -39,6 +40,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
|
||||
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-lora'
|
||||
model_revision = 'v1.0.2'
|
||||
model = Model.from_pretrained(model_id, model_revision=model_revision)
|
||||
from modelscope.models.multi_modal import EfficientStableDiffusion
|
||||
self.assertTrue(model.__class__ == EfficientStableDiffusion)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
@@ -64,6 +66,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
|
||||
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-adapter'
|
||||
model_revision = 'v1.0.2'
|
||||
model = Model.from_pretrained(model_id, model_revision=model_revision)
|
||||
from modelscope.models.multi_modal import EfficientStableDiffusion
|
||||
self.assertTrue(model.__class__ == EfficientStableDiffusion)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
@@ -89,6 +92,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
|
||||
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-prompt'
|
||||
model_revision = 'v1.0.2'
|
||||
model = Model.from_pretrained(model_id, model_revision=model_revision)
|
||||
from modelscope.models.multi_modal import EfficientStableDiffusion
|
||||
self.assertTrue(model.__class__ == EfficientStableDiffusion)
|
||||
|
||||
|
||||
|
||||
@@ -459,6 +459,25 @@ class NamedEntityRecognitionTest(unittest.TestCase):
|
||||
pipeline_ins = pipeline(task=Tasks.named_entity_recognition)
|
||||
print(pipeline_ins(input=self.sentence))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_run_long_chinese_with_model_name(self):
|
||||
pipeline_ins = pipeline(
|
||||
task=Tasks.named_entity_recognition, model=self.chinese_model_id)
|
||||
print(
|
||||
pipeline_ins(
|
||||
input=self.sentence + '. ' * 1000,
|
||||
split_max_length=300)) # longer than 512
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_run_long_chinese_with_model_name_batch(self):
|
||||
pipeline_ins = pipeline(
|
||||
task=Tasks.named_entity_recognition, model=self.chinese_model_id)
|
||||
print(
|
||||
pipeline_ins(
|
||||
input=[self.sentence + '. ' * 1000] * 2,
|
||||
batch_size=2,
|
||||
split_max_length=300)) # longer than 512
|
||||
|
||||
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
||||
def test_run_with_all_modelcards(self):
|
||||
for item in self.all_modelcards_info:
|
||||
|
||||
@@ -22,7 +22,7 @@ class TestEfficientDiffusionTuningTrainerSwift(unittest.TestCase):
|
||||
split='train',
|
||||
subset_name='Anime').remap_columns({'Image:FILE': 'target:FILE'})
|
||||
|
||||
self.max_epochs = 30
|
||||
self.max_epochs = 1
|
||||
self.lr = 0.0001
|
||||
|
||||
self.tmp_dir = tempfile.TemporaryDirectory().name
|
||||
|
||||
@@ -24,13 +24,31 @@ class AstScaningTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
|
||||
self.tmp_dir = tempfile.TemporaryDirectory().name
|
||||
self.tmp_dir2 = tempfile.TemporaryDirectory().name
|
||||
self.test_file = os.path.join(self.tmp_dir, 'test.py')
|
||||
if not os.path.exists(self.tmp_dir):
|
||||
os.makedirs(self.tmp_dir)
|
||||
|
||||
fnames = ['1.py', '2.py', '3.py', '__init__.py']
|
||||
self.folders = ['.', 'a', 'b', 'c']
|
||||
dir_path = self.tmp_dir2
|
||||
folder_dirs = [
|
||||
os.path.join(dir_path, folder) for folder in self.folders
|
||||
]
|
||||
for folder in folder_dirs:
|
||||
os.makedirs(folder, exist_ok=True)
|
||||
for fname in fnames:
|
||||
fpath = os.path.join(folder, fname)
|
||||
with open(fpath, 'w') as f:
|
||||
f.write('hello world')
|
||||
|
||||
for folder in folder_dirs:
|
||||
print(f'folder: {os.listdir(folder)}')
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
shutil.rmtree(self.tmp_dir)
|
||||
shutil.rmtree(self.tmp_dir2)
|
||||
|
||||
def test_ast_scaning_class(self):
|
||||
astScaner = AstScanning()
|
||||
@@ -75,6 +93,15 @@ class AstScaningTest(unittest.TestCase):
|
||||
index_0 = list(requirements.keys())[0]
|
||||
self.assertIsInstance(requirements[index_0], list)
|
||||
|
||||
fileScaner.traversal_files(self.tmp_dir2, include_init=False)
|
||||
self.assertTrue(
|
||||
os.path.join(self.tmp_dir2, '__init__.py') not in
|
||||
fileScaner.file_dirs)
|
||||
|
||||
fileScaner.traversal_files(self.tmp_dir2, include_init=True)
|
||||
self.assertTrue(
|
||||
os.path.join(self.tmp_dir2, '__init__.py') in fileScaner.file_dirs)
|
||||
|
||||
def test_file_mtime_md5_method(self):
|
||||
fileScaner = FilesAstScanning()
|
||||
# create first file
|
||||
|
||||
@@ -25,6 +25,10 @@ class HFUtilTest(unittest.TestCase):
|
||||
self.assertEqual(tokenizer.model_max_length, 4096)
|
||||
self.assertFalse(tokenizer.is_fast)
|
||||
|
||||
def test_quantization_import(self):
|
||||
from modelscope import GPTQConfig, BitsAndBytesConfig
|
||||
self.assertTrue(BitsAndBytesConfig is not None)
|
||||
|
||||
def test_auto_model(self):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
'baichuan-inc/baichuan-7B', trust_remote_code=True)
|
||||
|
||||
Reference in New Issue
Block a user