Add Lora/Adapter/Prompt and support for chatglm6B and chatglm2-6B

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/12770413

* add prompt and lora

* add adapter

* add prefix

* add tests

* adapter smoke test passed

* prompt test passed

* support model id in petl

* migrate chatglm6b

* add train script for chatglm6b

* move gen_kwargs to finetune.py

* add chatglm2

* add model definination
This commit is contained in:
yuze.zyz
2023-06-27 14:38:18 +08:00
committed by wenmeng.zwm
parent 1dbff6cb48
commit a58be34384
45 changed files with 6352 additions and 105 deletions

View File

@@ -0,0 +1,118 @@
from typing import Any, Dict, Union
import numpy as np
import torch
from transformers.deepspeed import is_deepspeed_zero3_enabled
from modelscope import EpochBasedTrainer, get_logger
logger = get_logger(__name__)
class Seq2SeqTrainer(EpochBasedTrainer):
def _decode(self, tokens, ignore_pad_token_for_loss=False):
tokens = tokens.cpu().numpy()
if ignore_pad_token_for_loss:
tokens = np.where(tokens != -100, tokens,
self.tokenizer.pad_token_id)
return [
t for t in self.tokenizer.batch_decode(
tokens, skip_special_tokens=True) if t != '</s>'
]
def evaluation_step(
self,
inputs: Dict[str, Union[torch.Tensor, Any]],
):
has_labels = 'labels' in inputs
# XXX: adapt synced_gpus for fairscale as well
gen_kwargs = self.cfg['gen_kwargs']
if gen_kwargs.get('max_length') is None and gen_kwargs.get(
'max_new_tokens') is None:
gen_kwargs['max_length'] = self.model.config.max_length
gen_kwargs['num_beams'] = (
gen_kwargs['num_beams'] if gen_kwargs.get('num_beams') is not None
else self.model.config.num_beams)
default_synced_gpus = True if is_deepspeed_zero3_enabled() else False
gen_kwargs['synced_gpus'] = (
gen_kwargs['synced_gpus'] if gen_kwargs.get('synced_gpus')
is not None else default_synced_gpus)
if 'attention_mask' in inputs:
gen_kwargs['attention_mask'] = inputs.get('attention_mask', None)
if 'position_ids' in inputs:
gen_kwargs['position_ids'] = inputs.get('position_ids', None)
if 'global_attention_mask' in inputs:
gen_kwargs['global_attention_mask'] = inputs.get(
'global_attention_mask', None)
# prepare generation inputs
# some encoder-decoder models can have varying encoder's and thus
# varying model input names
if hasattr(
self.model, 'encoder'
) and self.model.encoder.main_input_name != self.model.main_input_name:
generation_inputs = inputs[self.model.encoder.main_input_name]
else:
generation_inputs = inputs[self.model.main_input_name]
gen_kwargs['input_ids'] = generation_inputs
gen_kwargs['pad_token_id'] = self.tokenizer.pad_token_id
generated_tokens = self.model.generate(**gen_kwargs)
generated_tokens = generated_tokens[:, generation_inputs.size()[-1]:]
# in case the batch is shorter than max length, the output should be padded
if gen_kwargs.get('max_length') is not None and generated_tokens.shape[
-1] < gen_kwargs['max_length']:
generated_tokens = self._pad_tensors_to_max_len(
generated_tokens, gen_kwargs['max_length'])
elif gen_kwargs.get('max_new_tokens'
) is not None and generated_tokens.shape[-1] < (
gen_kwargs['max_new_tokens'] + 1):
generated_tokens = self._pad_tensors_to_max_len(
generated_tokens, gen_kwargs['max_new_tokens'] + 1)
if has_labels:
labels = inputs['labels']
if gen_kwargs.get('max_length') is not None and labels.shape[
-1] < gen_kwargs['max_length']:
labels = self._pad_tensors_to_max_len(labels,
gen_kwargs['max_length'])
elif gen_kwargs.get(
'max_new_tokens') is not None and labels.shape[-1] < (
gen_kwargs['max_new_tokens'] + 1):
labels = self._pad_tensors_to_max_len(
labels, (gen_kwargs['max_new_tokens'] + 1))
else:
labels = None
generated_tokens = [
''.join(self._decode(seq, False)) for seq in generated_tokens
]
inputs['tgts'] = [''.join(self._decode(seq, True)) for seq in labels]
return {
'preds': generated_tokens,
}
def _pad_tensors_to_max_len(self, tensor, max_length):
if self.tokenizer is not None and hasattr(self.tokenizer,
'pad_token_id'):
# If PAD token is not defined at least EOS token has to be defined
pad_token_id = (
self.tokenizer.pad_token_id if self.tokenizer.pad_token_id
is not None else self.tokenizer.eos_token_id)
else:
if self.model.config.pad_token_id is not None:
pad_token_id = self.model.config.pad_token_id
else:
raise ValueError(
'Pad_token_id must be set in the configuration of the model, in order to pad tensors'
)
padded_tensor = pad_token_id * torch.ones(
(tensor.shape[0], max_length),
dtype=tensor.dtype,
device=tensor.device)
padded_tensor[:, :tensor.shape[-1]] = tensor
return padded_tensor

View File

@@ -0,0 +1,380 @@
import os
from dataclasses import dataclass, field
import numpy as np
import torch
from chatglm_trainer import Seq2SeqTrainer
from text_generation_metric import TextGenerationMetric
from transformers import DataCollatorForSeq2Seq
from modelscope import snapshot_download
from modelscope.metainfo import Models
from modelscope.models import Model
from modelscope.msdatasets import MsDataset
from modelscope.swift import Swift
from modelscope.swift.lora import LoRAConfig
from modelscope.trainers.training_args import TrainingArgs
from modelscope.utils.config import ConfigDict
from modelscope.utils.hub import read_config
@dataclass(init=False)
class Chatglm6bArguments(TrainingArgs):
ptuning_checkpoint: str = field(
default=None,
metadata={
'help': 'The p-tuning checkpoint previously trained.',
})
pre_seq_len: int = field(
default=None, metadata={
'help': 'The p-tuning sequence length',
})
prefix_projection: bool = field(
default=False, metadata={
'help': '',
})
quantization_bit: int = field(
default=None, metadata={
'help': 'Quantized bit',
})
prompt_column: str = field(
default=None,
metadata={
'help':
'The name of the column in the datasets containing the full texts (for summarization).'
},
)
response_column: str = field(
default=None,
metadata={
'help':
'The name of the column in the datasets containing the summaries (for summarization).'
},
)
history_column: str = field(
default=None,
metadata={
'help':
'The name of the column in the datasets containing the history of chat.'
},
)
source_prefix: str = field(
default='',
metadata={
'help':
'A prefix to add before every source text (useful for T5 models).'
})
ignore_pad_token_for_loss: bool = field(
default=True,
metadata={
'help':
'Whether to ignore the tokens corresponding to padded labels in the loss computation or not.'
},
)
max_source_length: int = field(
default=1024,
metadata={
'help':
('The maximum total input sequence length after tokenization. Sequences longer '
'than this will be truncated, sequences shorter will be padded.')
},
)
max_target_length: int = field(
default=128,
metadata={
'help':
('The maximum total sequence length for target text after tokenization. Sequences longer '
'than this will be truncated, sequences shorter will be padded.')
},
)
max_train_samples: int = field(
default=None,
metadata={
'help':
('For debugging purposes or quicker training, truncate the number of training examples to this '
'value if set.')
},
)
max_eval_samples: int = field(
default=None,
metadata={
'help':
('For debugging purposes or quicker training, truncate the number of evaluation examples to this '
'value if set.')
},
)
preprocessing_num_workers: int = field(
default=None,
metadata={
'help': 'The number of processes to use for the preprocessing.'
},
)
use_lora: int = field(
default=0,
metadata={'help': 'Whether to use lora to train the model.'},
)
lora_rank: int = field(
default=32,
metadata={'help': 'The lora rank'},
)
lora_alpha: int = field(
default=32,
metadata={'help': 'The lora alpha'},
)
lora_dropout: float = field(
default=0.05,
metadata={'help': 'The lora alpha'},
)
args = Chatglm6bArguments(eval_metrics='chatglm').parse_cli()
print(args)
config, _ = args.to_config(ignore_default_config=args.use_model_config)
config.dump('./configuration.json')
if config['model']['type'] == 'chatglm6b':
from modelscope.models.nlp import ChatGLMTokenizer
else:
from modelscope.models.nlp import ChatGLM2Tokenizer as ChatGLMTokenizer
def cfg_modify_fn(cfg):
if args.use_model_config:
cfg.merge_from_dict(config)
else:
cfg = config
if cfg.train.lr_scheduler.type == 'LinearLR':
cfg.train.lr_scheduler['total_iters'] = \
int(len(train_dataset) / cfg.train.dataloader.batch_size_per_gpu) * cfg.train.max_epochs
cfg['gen_kwargs'] = {
'do_sample': True,
'top_p': 0.7,
'max_length': 512,
'temperature': 0.95
}
return cfg
train_dataset = MsDataset.load(
args.train_dataset_name,
subset_name=args.train_subset_name,
split=args.train_split)
validation_dataset = MsDataset.load(
args.val_dataset_name,
subset_name=args.val_subset_name,
split=args.val_split)
model_dir = snapshot_download(args.model)
model_config = read_config(model_dir)
model_config['model'] = ConfigDict({
'type': config['model']['type'],
})
if config['model']['type'] == 'chatglm6b':
model_config['model']['pre_seq_len'] = args.pre_seq_len
model_config['model']['prefix_projection'] = args.prefix_projection
tokenizer = ChatGLMTokenizer.from_pretrained(model_dir, trust_remote_code=True)
model = Model.from_pretrained(model_dir, cfg_dict=model_config)
if args.ptuning_checkpoint is not None:
# Evaluation
# Loading extra state dict of prefix encoder
prefix_state_dict = torch.load(
os.path.join(args.ptuning_checkpoint, 'pytorch_model.bin'))
new_prefix_state_dict = {}
for k, v in prefix_state_dict.items():
if k.startswith('transformer.prefix_encoder.'):
new_prefix_state_dict[k[len('transformer.prefix_encoder.'):]] = v
model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
if args.quantization_bit is not None:
print(f'Quantized to {args.quantization_bit} bit')
model = model.quantize(args.quantization_bit)
if args.pre_seq_len is not None:
# P-tuning v2
model = model.half()
model.transformer.prefix_encoder.float()
else:
# Finetune
model = model.float()
if args.use_lora != 0:
lora_config = LoRAConfig(
replace_modules=['attention.query_key_value'],
rank=args.lora_rank,
lora_alpha=args.lora_alpha,
lora_dropout=args.lora_dropout)
model = model.bfloat16()
Swift.prepare_model(model, lora_config)
prefix = args.source_prefix if args.source_prefix is not None else ''
# Get the column names for input/target.
prompt_column = args.prompt_column
response_column = args.response_column
history_column = args.history_column
# Temporarily set max_target_length for training.
max_target_length = args.max_target_length
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
trainable_params = sum([np.prod(p.size()) for p in model_parameters])
model_parameters = filter(lambda p: not p.requires_grad, model.parameters())
non_trainable_params = sum([np.prod(p.size()) for p in model_parameters])
print('trainable_params:{} ({:.2f}%), non_trainable_params:{}'.format(
trainable_params, trainable_params / non_trainable_params * 100,
non_trainable_params))
def preprocess_function_eval(examples):
inputs, targets = [], []
for i in range(len(examples[prompt_column])):
if examples[prompt_column][i] and examples[response_column][i]:
query = examples[prompt_column][i]
if history_column is None or len(examples[history_column][i]) == 0:
prompt = query
else:
prompt = ''
history = examples[history_column][i]
for turn_idx, (old_query, response) in enumerate(history):
prompt += '[Round {}]\n问:{}\n答:{}\n'.format(
turn_idx, old_query, response)
prompt += '[Round {}]\n问:{}\n答:'.format(len(history), query)
inputs.append(prompt)
targets.append(examples[response_column][i])
inputs = [prefix + inp for inp in inputs]
model_inputs = tokenizer(
inputs,
max_length=args.max_source_length,
truncation=True,
padding=True)
labels = tokenizer(
text_target=targets, max_length=max_target_length, truncation=True)
if args.ignore_pad_token_for_loss:
labels['input_ids'] = [[(lb if lb != tokenizer.pad_token_id else -100)
for lb in label]
for label in labels['input_ids']]
model_inputs['labels'] = labels['input_ids']
return model_inputs
def preprocess_function_train(examples):
max_seq_length = args.max_source_length + args.max_target_length
model_inputs = {
'input_ids': [],
'labels': [],
}
for i in range(len(examples[prompt_column])):
if examples[prompt_column][i] and examples[response_column][i]:
query, answer = examples[prompt_column][i], examples[
response_column][i]
if history_column is None:
prompt = query
else:
prompt = ''
history = examples[history_column][i]
for turn_idx, (old_query, response) in enumerate(history):
prompt += '[Round {}]\n问:{}\n答:{}\n'.format(
turn_idx, old_query, response)
prompt += '[Round {}]\n问:{}\n答:'.format(len(history), query)
prompt = prefix + prompt
a_ids = tokenizer.encode(text=prompt, add_special_tokens=False)
b_ids = tokenizer.encode(text=answer, add_special_tokens=False)
if len(a_ids) > args.max_source_length - 1:
a_ids = a_ids[:args.max_source_length - 1]
if len(b_ids) > args.max_target_length - 2:
b_ids = b_ids[:args.max_target_length - 2]
input_ids = tokenizer.build_inputs_with_special_tokens(
a_ids, b_ids)
if config['model']['type'] == 'chatglm6b':
context_length = input_ids.index(tokenizer.bos_token_id)
else:
context_length = len(a_ids) + 2
mask_position = context_length - 1
labels = [-100] * context_length + input_ids[mask_position + 1:]
pad_len = max_seq_length - len(input_ids)
input_ids = input_ids + [tokenizer.pad_token_id] * pad_len
if config['model']['type'] == 'chatglm6b':
labels = labels + [tokenizer.pad_token_id] * pad_len
if args.ignore_pad_token_for_loss:
labels = [(lb if lb != tokenizer.pad_token_id else -100)
for lb in labels]
else:
labels = labels + [-100] * pad_len
model_inputs['input_ids'].append(input_ids)
model_inputs['labels'].append(labels)
return model_inputs
train_dataset = train_dataset.to_hf_dataset().map(
preprocess_function_train,
batched=True,
num_proc=args.preprocessing_num_workers,
desc='Running tokenizer on train dataset',
)
validation_dataset = validation_dataset.to_hf_dataset().map(
preprocess_function_eval,
batched=True,
num_proc=args.preprocessing_num_workers,
desc='Running tokenizer on eval dataset',
)
# Data collator
label_pad_token_id = -100 if args.ignore_pad_token_for_loss else tokenizer.pad_token_id
data_collator = DataCollatorForSeq2Seq(
tokenizer,
model=model,
label_pad_token_id=label_pad_token_id,
pad_to_multiple_of=None,
padding=False)
model.gradient_checkpointing_enable()
if config['model']['type'] == 'chatglm6b':
model.enable_input_require_grads()
trainer = Seq2SeqTrainer(
model=model,
cfg_file='./configuration.json',
train_dataset=train_dataset,
eval_dataset=validation_dataset,
seed=args.seed,
data_collator=data_collator,
remove_unused_data=True,
cfg_modify_fn=cfg_modify_fn)
trainer.tokenizer = tokenizer
trainer.train()

View File

@@ -0,0 +1,31 @@
from modelscope import Model, pipeline, read_config
from modelscope.metainfo import Models
from modelscope.swift import Swift
from modelscope.swift.lora import LoRAConfig
from modelscope.utils.config import ConfigDict
lora_config = LoRAConfig(
replace_modules=['attention.query_key_value'],
rank=32,
lora_alpha=32,
lora_dropout=0.05,
pretrained_weights='./lora_dureader_target/iter_600.pth')
model_dir = 'ZhipuAI/chatglm2-6b'
model_config = read_config(model_dir)
model_config['model'] = ConfigDict({
'type': Models.chatglm2_6b,
})
model = Model.from_pretrained(model_dir, cfg_dict=model_config)
model = model.bfloat16()
Swift.prepare_model(model, lora_config)
pipe = pipeline('chat', model, pipeline_name='chatglm2_6b-text-generation')
print(
pipe({
'text':
'纵使进入21世纪后我国教育水平有了明显进步高考的难度却依旧不容小觑高考被中国学生和家长定义为改变命运、改写人生脑重要考试为了这场考试学生和家长都付出了很多。',
'history': []
}))

View File

@@ -0,0 +1,34 @@
import torch
from modelscope import Model, pipeline, read_config
from modelscope.metainfo import Models
from modelscope.utils.config import ConfigDict
model_dir = 'ZhipuAI/ChatGLM-6B'
model_config = read_config(model_dir)
model_config['model'] = ConfigDict({
'type': Models.chatglm_6b,
'pre_seq_len': 128,
'prefix_projection': False,
})
model = Model.from_pretrained(model_dir, cfg_dict=model_config)
model = model.half()
model.transformer.prefix_encoder.float()
prefix_state_dict = torch.load('./ptuning_dureader_target/iter_900.pth')
new_prefix_state_dict = {}
for k, v in prefix_state_dict.items():
if k.startswith('transformer.prefix_encoder.'):
new_prefix_state_dict[k[len('transformer.prefix_encoder.'):]] = v
model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
pipe = pipeline('chat', model)
print(
pipe({
'text':
'维生素C也叫抗坏血酸所以它最重要的一个作用是预防坏血病。另外维生素C在控制感染和愈合伤口方面发挥作用是一种强大的抗氧化剂'
'可以中和有害的自由基。维生素C还是合成胶原蛋白的重要营养成分胶原蛋白是结缔组织中的一种纤维蛋白它存在于身体的各个系统中'
'神经系统、免疫系统、骨骼系统、软骨系统、血液系统和其他系统。维生素C有助于产生作用于大脑和神经的多种激素和化学信使。',
'history': []
}))

View File

@@ -0,0 +1,28 @@
LR=5e-5
PYTHONPATH=. python examples/pytorch/chatglm6b/finetune.py \
--train_dataset_name modelscope/DuReader_robust-QG \
--val_dataset_name modelscope/DuReader_robust-QG \
--train_subset_name default \
--val_subset_name default \
--train_split train \
--val_split validation \
--prompt_column text1 \
--response_column text2 \
--model "ZhipuAI/chatglm2-6b" \
--max_source_length 64 \
--max_target_length 64 \
--per_device_train_batch_size 16 \
--per_device_eval_batch_size 1 \
--train.optimizer.options.cumulative_iters 1 \
--max_epochs 2 \
--save_strategy 'by_step' \
--save_interval 300 \
--lr $LR \
--eval_strategy "by_step" \
--eval_interval 300 \
--lr_strategy 'by_step' \
--task 'chat' \
--model.type 'chatglm2-6b' \
--use_lora 1 \
--work_dir lora_dureader_target \

View File

@@ -0,0 +1,24 @@
LR=5e-5
PYTHONPATH=. python examples/pytorch/chatglm6b/finetune.py \
--train_dataset_name AdvertiseGen/train.json \
--val_dataset_name AdvertiseGen/dev.json \
--prompt_column content \
--response_column summary \
--model "ZhipuAI/ChatGLM-6B" \
--max_source_length 64 \
--max_target_length 64 \
--per_device_train_batch_size 16 \
--per_device_eval_batch_size 1 \
--train.optimizer.options.cumulative_iters 1 \
--max_epochs 1 \
--save_strategy 'by_step' \
--save_interval 1000 \
--lr $LR \
--eval_strategy "by_step" \
--eval_interval 1000 \
--lr_strategy 'by_step' \
--task 'chat' \
--model.type 'chatglm6b' \
--use_lora 1 \
--work_dir lora_adv_target \

View File

@@ -0,0 +1,28 @@
LR=5e-5
PYTHONPATH=. python examples/pytorch/chatglm6b/finetune.py \
--train_dataset_name modelscope/DuReader_robust-QG \
--val_dataset_name modelscope/DuReader_robust-QG \
--train_subset_name default \
--val_subset_name default \
--train_split train \
--val_split validation \
--prompt_column text1 \
--response_column text2 \
--model "ZhipuAI/ChatGLM-6B" \
--max_source_length 64 \
--max_target_length 64 \
--per_device_train_batch_size 16 \
--per_device_eval_batch_size 1 \
--train.optimizer.options.cumulative_iters 1 \
--max_epochs 2 \
--save_strategy 'by_step' \
--save_interval 300 \
--lr $LR \
--eval_strategy "by_step" \
--eval_interval 300 \
--lr_strategy 'by_step' \
--task 'chat' \
--model.type 'chatglm6b' \
--use_lora 1 \
--work_dir lora_dureader_target \

View File

@@ -0,0 +1,26 @@
PRE_SEQ_LEN=128
LR=2e-2
PYTHONPATH=. python examples/pytorch/chatglm6b/finetune.py \
--train_dataset_name AdvertiseGen/train.json \
--val_dataset_name AdvertiseGen/dev.json \
--prompt_column content \
--response_column summary \
--model "ZhipuAI/ChatGLM-6B" \
--max_source_length 64 \
--max_target_length 64 \
--per_device_train_batch_size 16 \
--per_device_eval_batch_size 1 \
--train.optimizer.options.cumulative_iters 1 \
--max_epochs 1 \
--save_strategy 'by_step' \
--save_interval 1000 \
--lr $LR \
--eval_strategy "by_step" \
--eval_interval 1000 \
--lr_strategy 'by_step' \
--task 'chat' \
--model.type 'chatglm6b' \
--pre_seq_len $PRE_SEQ_LEN \
--quantization_bit 4 \
--work_dir ptuning_adv_target \

View File

@@ -0,0 +1,30 @@
PRE_SEQ_LEN=128
LR=2e-2
PYTHONPATH=. python examples/pytorch/chatglm6b/finetune.py \
--train_dataset_name modelscope/DuReader_robust-QG \
--val_dataset_name modelscope/DuReader_robust-QG \
--train_subset_name default \
--val_subset_name default \
--train_split train \
--val_split validation \
--prompt_column text1 \
--response_column text2 \
--model "ZhipuAI/ChatGLM-6B" \
--max_source_length 64 \
--max_target_length 64 \
--per_device_train_batch_size 16 \
--per_device_eval_batch_size 1 \
--train.optimizer.options.cumulative_iters 1 \
--max_epochs 3 \
--save_strategy 'by_step' \
--save_interval 300 \
--lr $LR \
--eval_strategy "by_step" \
--eval_interval 300 \
--lr_strategy 'by_step' \
--task 'chat' \
--model.type 'chatglm6b' \
--pre_seq_len $PRE_SEQ_LEN \
--quantization_bit 4 \
--work_dir ptuning_dureader_target \

View File

@@ -0,0 +1,85 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Dict, Iterable, List
import jieba
import numpy as np
from nltk.translate.bleu_score import (SmoothingFunction, corpus_bleu,
sentence_bleu)
from rouge import Rouge
from modelscope.metainfo import Metrics
from modelscope.metrics.base import Metric
from modelscope.metrics.builder import METRICS, MetricKeys
from modelscope.utils.chinese_utils import rebuild_chinese_str
from modelscope.utils.registry import default_group
@METRICS.register_module(group_key=default_group, module_name='chatglm')
class TextGenerationMetric(Metric):
def __init__(self, target_text='tgts', pred_text='preds'):
self.preds: List[str] = []
self.tgts: List[str] = []
self.rouge = Rouge()
self.target_text = target_text
self.pred_text = pred_text
def add(self, outputs: Dict[str, List[str]], inputs: Dict[str, List[str]]):
ground_truths = inputs[self.target_text]
eval_results = outputs[self.pred_text]
for truth in ground_truths:
self.tgts.append(truth)
for result in eval_results:
self.preds.append(result)
def _check(self, pred: str, tgt: str) -> bool:
def remove_useless(string: str) -> str:
return string.replace(' ', '').replace('.', '')
return len(remove_useless(pred)) != 0 and len(remove_useless(tgt)) != 0
def evaluate(self):
preds, labels = self.preds, self.tgts
if isinstance(preds, tuple):
preds = preds[0]
score_dict = {
'rouge-1': [],
'rouge-2': [],
'rouge-l': [],
'bleu-4': []
}
for pred, label in zip(preds, labels):
hypothesis = list(jieba.cut(pred))
if len(hypothesis) == 0:
hypothesis = ['</s>']
reference = list(jieba.cut(label))
rouge = Rouge()
scores = rouge.get_scores(' '.join(hypothesis),
' '.join(reference))
result = scores[0]
for k, v in result.items():
score_dict[k].append(round(v['f'] * 100, 4))
bleu_score = sentence_bleu(
[list(label)],
list(pred),
smoothing_function=SmoothingFunction().method3)
score_dict['bleu-4'].append(round(bleu_score * 100, 4))
for k, v in score_dict.items():
score_dict[k] = float(np.mean(v))
return score_dict
def merge(self, other: 'TextGenerationMetric'):
self.preds.extend(other.preds)
self.tgts.extend(other.tgts)
def __getstate__(self):
return self.preds, self.tgts
def __setstate__(self, state):
self.__init__()
self.preds, self.tgts = state

View File

@@ -165,6 +165,8 @@ class Models(object):
doc2bot = 'doc2bot'
peer = 'peer'
llama = 'llama'
chatglm_6b = 'chatglm6b'
chatglm2_6b = 'chatglm2-6b'
# audio models
sambert_hifigan = 'sambert-hifigan'

View File

@@ -191,7 +191,7 @@ class BlockPETL(nn.Module):
self.prompt = None
def forward(self, x):
if self.prompt is not None:
if self.prompt is not None and self.prompt_length and self.prompt_length > 0:
x = self.prompt(x)
x = x + self.drop_path1(self.ls1(self.attn(self.norm1(x))))

View File

@@ -19,8 +19,8 @@ from modelscope.metainfo import Models
from modelscope.models import TorchModel
from modelscope.models.builder import MODELS
from modelscope.outputs import OutputKeys
from modelscope.tuners.control_sd_lora import ControlLoRATuner
from modelscope.tuners.sd_lora import LoRATuner
from modelscope.swift.control_sd_lora import ControlLoRATuner
from modelscope.swift.sd_lora import LoRATuner
from modelscope.utils.checkpoint import save_checkpoint, save_configuration
from modelscope.utils.config import Config
from modelscope.utils.constant import ModelFile, Tasks

View File

@@ -22,6 +22,8 @@ if TYPE_CHECKING:
from .csanmt import CsanmtForTranslation
from .canmt import CanmtForTranslation
from .deberta_v2 import DebertaV2ForMaskedLM, DebertaV2Model
from .chatglm import ChatGLMForConditionalGeneration, ChatGLMTokenizer, ChatGLMConfig
from .chatglm2 import ChatGLM2ForConditionalGeneration, ChatGLM2Tokenizer, ChatGLM2Config
from .gpt_neo import GPTNeoModel
from .gpt2 import GPT2Model
from .gpt3 import GPT3ForTextGeneration, DistributedGPT3
@@ -95,6 +97,14 @@ else:
['CodeGeeXForCodeTranslation', 'CodeGeeXForCodeGeneration'],
'glm_130b': ['GLM130bForTextGeneration'],
'deberta_v2': ['DebertaV2ForMaskedLM', 'DebertaV2Model'],
'chatglm': [
'ChatGLMForConditionalGeneration', 'ChatGLMTokenizer',
'ChatGLMConfig'
],
'chatglm2': [
'ChatGLM2ForConditionalGeneration', 'ChatGLM2Tokenizer',
'ChatGLM2Config'
],
'heads': ['TextClassificationHead'],
'hf_transformers': ['TransformersModel'],
'gpt2': ['GPT2Model'],

View File

@@ -0,0 +1,46 @@
# flake8: noqa
# There's no way to ignore "F401 '...' imported but unused" warnings in this
# module, but to preserve other warnings. So, don't check this module at all.
# Copyright 2021-2022 The Alibaba DAMO NLP Team Authors.
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING
from modelscope.utils.import_utils import LazyImportModule
if TYPE_CHECKING:
from .configuration import ChatGLMConfig
from .tokenization import ChatGLMTokenizer
from .text_generation import ChatGLMForConditionalGeneration
from .quantization import (
quantize, )
else:
_import_structure = {
'configuration': ['ChatGLMConfig'],
'text_generation': ['ChatGLMForConditionalGeneration'],
'quantization': ['quantize'],
'tokenization': [
'ChatGLMTokenizer',
],
}
import sys
sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__)

View File

@@ -0,0 +1,101 @@
""" ChatGLM model configuration """
from transformers.configuration_utils import PretrainedConfig
from transformers.utils import logging
logger = logging.get_logger(__name__)
class ChatGLMConfig(PretrainedConfig):
r"""
This is the configuration class to store the configuration of a [`~ChatGLMModel`].
It is used to instantiate an ChatGLM model according to the specified arguments, defining the model
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
the ChatGLM-6B [THUDM/ChatGLM-6B](https://huggingface.co/THUDM/chatglm-6b) architecture.
Configuration objects inherit from [`PretrainedConfig`] and can be used
to control the model outputs. Read the documentation from [`PretrainedConfig`]
for more information.
Args:
vocab_size (`int`, *optional*, defaults to 150528):
Vocabulary size of the ChatGLM-6B model.
Defines the number of different tokens that can be represented by the
`inputs_ids` passed when calling [`~ChatGLMModel`] or
[`~TFChatGLMModel`].
hidden_size (`int`, *optional*, defaults to 4096):
Dimension of the encoder layers and the pooler layer.
num_hidden_layers (`int`, *optional*, defaults to 28):
Number of hidden layers in the Transformer encoder.
num_attention_heads (`int`, *optional*, defaults to 32):
Number of attention heads for each attention layer in the Transformer encoder.
inner_hidden_size (`int`, *optional*, defaults to 16384):
Dimension of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
max_sequence_length (`int`, *optional*, defaults to 512):
The maximum sequence length that this model might ever be used with.
Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
layernorm_epsilon (`float`, *optional*, defaults to 1e-5):
The epsilon used by the layer normalization layers.
use_cache (`bool`, *optional*, defaults to `True`):
Whether the model should return the last key/values attentions (not used by all models).
Example:
```python
>>> from modelscope.models.nlp.chatglm.configuration import ChatGLMConfig
>>> from modelscope.models.nlp.chatglm.text_generation import ChatGLMModel
>>> # Initializing a ChatGLM-6B THUDM/ChatGLM-6B style configuration
>>> configuration = ChatGLMConfig()
>>> # Initializing a model from the THUDM/ChatGLM-6B style configuration
>>> model = ChatGLMModel(configuration)
>>> # Accessing the model configuration
>>> configuration = model.config
```
"""
model_type = 'chatglm'
def __init__(self,
vocab_size=150528,
hidden_size=4096,
num_layers=28,
num_attention_heads=32,
layernorm_epsilon=1e-5,
use_cache=False,
bos_token_id=150004,
eos_token_id=150005,
mask_token_id=150000,
gmask_token_id=150001,
pad_token_id=0,
max_sequence_length=2048,
inner_hidden_size=16384,
position_encoding_2d=True,
quantization_bit=0,
pre_seq_len=None,
prefix_projection=False,
**kwargs):
self.num_layers = num_layers
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_attention_heads = num_attention_heads
self.max_sequence_length = max_sequence_length
self.layernorm_epsilon = layernorm_epsilon
self.inner_hidden_size = inner_hidden_size
self.use_cache = use_cache
self.bos_token_id = bos_token_id
self.eos_token_id = eos_token_id
self.pad_token_id = pad_token_id
self.mask_token_id = mask_token_id
self.gmask_token_id = gmask_token_id
self.position_encoding_2d = position_encoding_2d
self.quantization_bit = quantization_bit
self.pre_seq_len = pre_seq_len
self.prefix_projection = prefix_projection
super().__init__(
pad_token_id=pad_token_id,
bos_token_id=bos_token_id,
eos_token_id=eos_token_id,
**kwargs)

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,463 @@
"""Tokenization classes for ChatGLM."""
import os
from typing import Dict, List, Optional, Union
import numpy as np
import sentencepiece as spm
from transformers.tokenization_utils import PreTrainedTokenizer
from transformers.tokenization_utils_base import BatchEncoding, EncodedInput
from transformers.utils import PaddingStrategy, logging
logger = logging.get_logger(__name__)
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
'THUDM/chatglm-6b': 2048,
}
class TextTokenizer:
def __init__(self, model_path):
self.sp = spm.SentencePieceProcessor()
self.sp.Load(model_path)
self.num_tokens = self.sp.vocab_size()
def encode(self, text):
return self.sp.EncodeAsIds(text)
def decode(self, ids: List[int]):
return self.sp.DecodeIds(ids)
def tokenize(self, text):
return self.sp.EncodeAsPieces(text)
def convert_tokens_to_ids(self, tokens):
return [self.sp.PieceToId(token) for token in tokens]
def convert_token_to_id(self, token):
return self.sp.PieceToId(token)
def convert_id_to_token(self, idx):
return self.sp.IdToPiece(idx)
def __len__(self):
return self.num_tokens
class SPTokenizer:
def __init__(
self,
vocab_file,
num_image_tokens=20000,
max_blank_length=80,
byte_fallback=True,
):
assert vocab_file is not None
self.vocab_file = vocab_file
self.num_image_tokens = num_image_tokens
self.special_tokens = [
'[MASK]', '[gMASK]', '[sMASK]', '<unused_0>', '<sop>', '<eop>',
'<ENC>', '<dBLOCK>'
]
self.max_blank_length = max_blank_length
self.byte_fallback = byte_fallback
self.text_tokenizer = TextTokenizer(vocab_file)
def _get_text_tokenizer(self):
return self.text_tokenizer
@staticmethod
def get_blank_token(length: int):
assert length >= 2
return f'<|blank_{length}|>'
@staticmethod
def get_tab_token():
return '<|tab|>'
@property
def num_text_tokens(self):
return self.text_tokenizer.num_tokens
@property
def num_tokens(self):
return self.num_image_tokens + self.num_text_tokens
@staticmethod
def _encode_whitespaces(text: str, max_len: int = 80):
text = text.replace('\t', SPTokenizer.get_tab_token())
for i in range(max_len, 1, -1):
text = text.replace(' ' * i, SPTokenizer.get_blank_token(i))
return text
def _preprocess(self, text: str, linebreak=True, whitespaces=True):
if linebreak:
text = text.replace('\n', '<n>')
if whitespaces:
text = self._encode_whitespaces(
text, max_len=self.max_blank_length)
return text
def encode(self,
text: str,
linebreak=True,
whitespaces=True,
add_dummy_prefix=True) -> List[int]:
"""
@param text: Text to encode.
@param linebreak: Whether to encode newline (\n) in text.
@param whitespaces: Whether to encode multiple whitespaces or tab in text, useful for source code encoding.
@param special_tokens: Whether to encode special token ([MASK], [gMASK], etc.) in text.
@param add_dummy_prefix: Whether to add dummy blank space in the beginning.
"""
text = self._preprocess(text, linebreak, whitespaces)
if not add_dummy_prefix:
text = '<n>' + text
tmp = self._get_text_tokenizer().encode(text)
tokens = [x + self.num_image_tokens for x in tmp]
return tokens if add_dummy_prefix else tokens[2:]
def decode(self, text_ids: List[int]) -> str:
ids = [int(_id) - self.num_image_tokens for _id in text_ids]
ids = [_id for _id in ids if _id >= 0]
text = self._get_text_tokenizer().decode(ids)
text = text.replace('<n>', '\n')
text = text.replace(SPTokenizer.get_tab_token(), '\t')
for i in range(2, self.max_blank_length + 1):
text = text.replace(self.get_blank_token(i), ' ' * i)
return text
def tokenize(self,
text: str,
linebreak=True,
whitespaces=True,
add_dummy_prefix=True) -> List[str]:
"""
@param text: Text to encode.
@param linebreak: Whether to encode newline (\n) in text.
@param whitespaces: Whether to encode multiple whitespaces or tab in text, useful for source code encoding.
@param special_tokens: Whether to encode special token ([MASK], [gMASK], etc.) in text.
@param add_dummy_prefix: Whether to add dummy blank space in the beginning.
"""
text = self._preprocess(text, linebreak, whitespaces)
if not add_dummy_prefix:
text = '<n>' + text
tokens = self._get_text_tokenizer().tokenize(text)
return tokens if add_dummy_prefix else tokens[2:]
def __getitem__(self, x: Union[int, str]):
if isinstance(x, int):
if x < self.num_image_tokens:
return '<image_{}>'.format(x)
else:
return self.text_tokenizer.convert_id_to_token(
x - self.num_image_tokens)
elif isinstance(x, str):
if x.startswith('<image_') and x.endswith(
'>') and x[7:-1].isdigit():
return int(x[7:-1])
else:
return self.text_tokenizer.convert_token_to_id(
x) + self.num_image_tokens
else:
raise ValueError('The key should be str or int.')
class ChatGLMTokenizer(PreTrainedTokenizer):
"""
Construct a ChatGLM tokenizer. Based on byte-level Byte-Pair-Encoding.
Args:
vocab_file: Path to the vocabulary file.
do_lower_case: Use lower case letters.
remove_space: Remove spaces.
bos_token: The bos token
eos_token: The Eos Token
end_token: The end token
mask_token: The mask token
gmask_token: The gmask token
padding_side: The padding side
num_image_tokens: The `num_image_tokens` in `SPTokenizer`
"""
vocab_files_names = {'vocab_file': 'ice_text.model'}
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
model_input_names = ['input_ids', 'attention_mask', 'position_ids']
def __init__(self,
vocab_file,
do_lower_case=False,
remove_space=False,
bos_token='<sop>',
eos_token='<eop>',
end_token='</s>',
mask_token='[MASK]',
gmask_token='[gMASK]',
padding_side='left',
num_image_tokens=20000,
**kwargs) -> None:
super().__init__(
do_lower_case=do_lower_case,
remove_space=remove_space,
padding_side=padding_side,
bos_token=bos_token,
eos_token=eos_token,
end_token=end_token,
mask_token=mask_token,
gmask_token=gmask_token,
num_image_tokens=num_image_tokens,
**kwargs)
self.do_lower_case = do_lower_case
self.remove_space = remove_space
self.vocab_file = vocab_file
self.bos_token = bos_token
self.eos_token = eos_token
self.end_token = end_token
self.mask_token = mask_token
self.gmask_token = gmask_token
self.sp_tokenizer = SPTokenizer(
vocab_file, num_image_tokens=num_image_tokens)
""" Initialisation """
@property
def gmask_token_id(self) -> Optional[int]:
if self.gmask_token is None:
return None
return self.convert_tokens_to_ids(self.gmask_token)
@property
def end_token_id(self) -> Optional[int]:
"""
`Optional[int]`: Id of the end of context token in the vocabulary. Returns `None` if the token has not been
set.
"""
if self.end_token is None:
return None
return self.convert_tokens_to_ids(self.end_token)
@property
def vocab_size(self):
""" Returns vocab size """
return self.sp_tokenizer.num_tokens
def get_vocab(self):
""" Returns vocab as a dict """
vocab = {
self._convert_id_to_token(i): i
for i in range(self.vocab_size)
}
vocab.update(self.added_tokens_encoder)
return vocab
def preprocess_text(self, inputs):
if self.remove_space:
outputs = ' '.join(inputs.strip().split())
else:
outputs = inputs
if self.do_lower_case:
outputs = outputs.lower()
return outputs
def _tokenize(self, text, **kwargs):
""" Returns a tokenized string. """
text = self.preprocess_text(text)
seq = self.sp_tokenizer.tokenize(text)
return seq
def _decode(self,
token_ids: Union[int, List[int]],
skip_special_tokens: bool = False,
clean_up_tokenization_spaces: bool = True,
**kwargs) -> str:
if isinstance(token_ids, int):
token_ids = [token_ids]
if len(token_ids) == 0:
return ''
if self.pad_token_id in token_ids: # remove pad
token_ids = list(filter((self.pad_token_id).__ne__, token_ids))
return self.sp_tokenizer.decode(token_ids)
def _convert_token_to_id(self, token):
""" Converts a token (str) in an id using the vocab. """
return self.sp_tokenizer[token]
def _convert_id_to_token(self, index):
"""Converts an index (integer) in a token (str) using the vocab."""
return self.sp_tokenizer[index]
def save_vocabulary(self, save_directory, filename_prefix=None):
"""
Save the vocabulary and special tokens file to a directory.
Args:
save_directory (`str`):
The directory in which to save the vocabulary.
filename_prefix (`str`, *optional*):
An optional prefix to add to the named of the saved files.
Returns:
`Tuple(str)`: Paths to the files saved.
"""
if os.path.isdir(save_directory):
vocab_file = os.path.join(save_directory,
self.vocab_files_names['vocab_file'])
else:
vocab_file = save_directory
with open(self.vocab_file, 'rb') as fin:
proto_str = fin.read()
with open(vocab_file, 'wb') as writer:
writer.write(proto_str)
return (vocab_file, )
def build_inputs_with_special_tokens(
self,
token_ids_0: List[int],
token_ids_1: Optional[List[int]] = None) -> List[int]:
"""
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
adding special tokens. A BERT sequence has the following format:
- single sequence: `[CLS] X [SEP]`
- pair of sequences: `[CLS] A [SEP] B [SEP]`
Args:
token_ids_0 (`List[int]`):
List of IDs to which the special tokens will be added.
token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs.
Returns:
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
"""
mask_ids = self.sp_tokenizer[self.mask_token]
gmask_ids = self.sp_tokenizer[self.gmask_token]
eos_id = self.sp_tokenizer[self.eos_token]
if mask_ids not in token_ids_0 and gmask_ids not in token_ids_0:
token_ids_0 += [gmask_ids]
if token_ids_0[-1] != mask_ids and token_ids_0[-1] != gmask_ids:
token_ids_0 += [self.sp_tokenizer[self.end_token]]
token_ids_0 += [self.sp_tokenizer[self.bos_token]]
if token_ids_1 is not None:
if not token_ids_1 or token_ids_1[-1] != eos_id:
token_ids_1 += [eos_id]
token_ids_0 += token_ids_1
return token_ids_0
def _pad(
self,
encoded_inputs: Union[Dict[str, EncodedInput], BatchEncoding],
max_length: Optional[int] = None,
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
pad_to_multiple_of: Optional[int] = None,
return_attention_mask: Optional[bool] = None,
) -> dict:
"""
Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
Args:
encoded_inputs:
Dictionary of tokenized inputs (`List[int]`) or batch of tokenized inputs (`List[List[int]]`).
max_length: maximum length of the returned list and optionally padding length (see below).
Will truncate by taking into account the special tokens.
padding_strategy: PaddingStrategy to use for padding.
- PaddingStrategy.LONGEST Pad to the longest sequence in the batch
- PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
- PaddingStrategy.DO_NOT_PAD: Do not pad
The tokenizer padding sides are defined in self.padding_side:
- 'left': pads on the left of the sequences
- 'right': pads on the right of the sequences
pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
`>= 7.5` (Volta).
return_attention_mask:
(optional) Set to False to avoid returning attention mask (default: set to model specifics)
"""
# Load from model defaults
bos_token_id = self.sp_tokenizer[self.bos_token]
mask_token_id = self.sp_tokenizer[self.mask_token]
gmask_token_id = self.sp_tokenizer[self.gmask_token]
assert self.padding_side == 'left'
required_input = encoded_inputs[self.model_input_names[0]]
seq_length = len(required_input)
if padding_strategy == PaddingStrategy.LONGEST:
max_length = len(required_input)
if max_length is not None and pad_to_multiple_of is not None and (
max_length % pad_to_multiple_of != 0):
max_length = (
(max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of
needs_to_be_padded = padding_strategy != PaddingStrategy.DO_NOT_PAD and len(
required_input) != max_length
# Initialize attention mask if not present.
if max_length is not None:
if 'attention_mask' not in encoded_inputs:
if bos_token_id in required_input:
context_length = required_input.index(bos_token_id)
else:
context_length = seq_length
attention_mask = np.ones((1, seq_length, seq_length))
attention_mask = np.tril(attention_mask)
attention_mask[:, :, :context_length] = 1
attention_mask = np.bool_(attention_mask < 0.5)
encoded_inputs['attention_mask'] = attention_mask
if 'position_ids' not in encoded_inputs:
position_ids = np.arange(seq_length, dtype=np.int64)
mask_token = mask_token_id if mask_token_id in required_input else gmask_token_id
if mask_token in required_input:
mask_position = required_input.index(mask_token)
position_ids[context_length:] = mask_position
block_position_ids = np.concatenate([
np.zeros(context_length, dtype=np.int64),
np.arange(
1, seq_length - context_length + 1, dtype=np.int64)
])
encoded_inputs['position_ids'] = np.stack(
[position_ids, block_position_ids], axis=0)
if needs_to_be_padded:
difference = max_length - len(required_input)
if 'attention_mask' in encoded_inputs:
encoded_inputs['attention_mask'] = np.pad(
encoded_inputs['attention_mask'],
pad_width=[(0, 0), (difference, 0), (difference, 0)],
mode='constant',
constant_values=True)
if 'token_type_ids' in encoded_inputs:
encoded_inputs['token_type_ids'] = [
self.pad_token_type_id
] * difference + encoded_inputs['token_type_ids']
if 'special_tokens_mask' in encoded_inputs:
encoded_inputs['special_tokens_mask'] = [
1
] * difference + encoded_inputs['special_tokens_mask']
if 'position_ids' in encoded_inputs:
encoded_inputs['position_ids'] = np.pad(
encoded_inputs['position_ids'],
pad_width=[(0, 0), (difference, 0)])
encoded_inputs[self.model_input_names[
0]] = [self.pad_token_id] * difference + required_input
return encoded_inputs

View File

@@ -0,0 +1,46 @@
# flake8: noqa
# There's no way to ignore "F401 '...' imported but unused" warnings in this
# module, but to preserve other warnings. So, don't check this module at all.
# Copyright 2021-2022 The Alibaba DAMO NLP Team Authors.
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING
from modelscope.utils.import_utils import LazyImportModule
if TYPE_CHECKING:
from .configuration import ChatGLM2Config
from .tokenization import ChatGLM2Tokenizer
from .text_generation import ChatGLM2ForConditionalGeneration
from .quantization import (
quantize, )
else:
_import_structure = {
'configuration': ['ChatGLM2Config'],
'text_generation': ['ChatGLM2ForConditionalGeneration'],
'quantization': ['quantize'],
'tokenization': [
'ChatGLM2Tokenizer',
],
}
import sys
sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__)

View File

@@ -0,0 +1,58 @@
""" ChatGLM model configuration """
from transformers.configuration_utils import PretrainedConfig
from transformers.utils import logging
logger = logging.get_logger(__name__)
class ChatGLM2Config(PretrainedConfig):
def __init__(self,
num_layers=28,
padded_vocab_size=65024,
hidden_size=4096,
ffn_hidden_size=13696,
kv_channels=128,
num_attention_heads=32,
seq_length=2048,
hidden_dropout=0.0,
attention_dropout=0.0,
layernorm_epsilon=1e-5,
rmsnorm=True,
apply_residual_connection_post_layernorm=False,
post_layer_norm=True,
add_bias_linear=False,
add_qkv_bias=False,
interleaved_qkv=False,
bias_dropout_fusion=True,
multi_query_attention=False,
multi_query_group_num=1,
apply_query_key_layer_scaling=True,
attention_softmax_in_fp32=True,
fp32_residual_connection=False,
quantization_bit=0,
**kwargs):
self.num_layers = num_layers
self.padded_vocab_size = padded_vocab_size
self.hidden_size = hidden_size
self.ffn_hidden_size = ffn_hidden_size
self.kv_channels = kv_channels
self.num_attention_heads = num_attention_heads
self.seq_length = seq_length
self.hidden_dropout = hidden_dropout
self.attention_dropout = attention_dropout
self.layernorm_epsilon = layernorm_epsilon
self.rmsnorm = rmsnorm
self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
self.post_layer_norm = post_layer_norm
self.add_bias_linear = add_bias_linear
self.add_qkv_bias = add_qkv_bias
self.bias_dropout_fusion = bias_dropout_fusion
self.multi_query_attention = multi_query_attention
self.multi_query_group_num = multi_query_group_num
self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
self.attention_softmax_in_fp32 = attention_softmax_in_fp32
self.fp32_residual_connection = fp32_residual_connection
self.quantization_bit = quantization_bit
super().__init__(**kwargs)

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,251 @@
"""Tokenization classes for ChatGLM."""
import os
from typing import Dict, List, Optional, Union
from sentencepiece import SentencePieceProcessor
from transformers.tokenization_utils import PreTrainedTokenizer
from transformers.tokenization_utils_base import BatchEncoding, EncodedInput
from transformers.utils import PaddingStrategy, logging
logger = logging.get_logger(__name__)
class SPTokenizer:
def __init__(self, model_path: str):
# reload tokenizer
assert os.path.isfile(model_path), model_path
self.sp_model = SentencePieceProcessor(model_file=model_path)
# BOS / EOS token IDs
self.n_words: int = self.sp_model.vocab_size()
self.bos_id: int = self.sp_model.bos_id()
self.eos_id: int = self.sp_model.eos_id()
self.pad_id: int = self.sp_model.eos_id()
assert self.sp_model.vocab_size() == self.sp_model.get_piece_size()
special_tokens = ['[MASK]', '[gMASK]', '[sMASK]', 'sop', 'eop']
self.special_tokens = {}
self.index_special_tokens = {}
for token in special_tokens:
self.special_tokens[token] = self.n_words
self.index_special_tokens[self.n_words] = token
self.n_words += 1
def tokenize(self, s: str):
return self.sp_model.EncodeAsPieces(s)
def encode(self,
s: str,
bos: bool = False,
eos: bool = False) -> List[int]:
assert type(s) is str
t = self.sp_model.encode(s)
if bos:
t = [self.bos_id] + t
if eos:
t = t + [self.eos_id]
return t
def decode(self, t: List[int]) -> str:
return self.sp_model.decode(t)
def decode_tokens(self, tokens: List[str]) -> str:
text = self.sp_model.DecodePieces(tokens)
return text
def convert_token_to_id(self, token):
""" Converts a token (str) in an id using the vocab. """
if token in self.special_tokens:
return self.special_tokens[token]
return self.sp_model.PieceToId(token)
def convert_id_to_token(self, index):
"""Converts an index (integer) in a token (str) using the vocab."""
if index in self.index_special_tokens:
return ''
return self.sp_model.IdToPiece(index)
class ChatGLM2Tokenizer(PreTrainedTokenizer):
vocab_files_names = {'vocab_file': 'tokenizer.model'}
model_input_names = ['input_ids', 'attention_mask', 'position_ids']
def __init__(self, vocab_file, padding_side='left', **kwargs):
super().__init__(padding_side=padding_side, **kwargs)
self.name = 'GLMTokenizer'
self.tokenizer = SPTokenizer(vocab_file)
self.special_tokens = {
'<bos>': self.tokenizer.bos_id,
'<eos>': self.tokenizer.eos_id,
'<pad>': self.tokenizer.pad_id
}
def get_command(self, token):
if token in self.special_tokens:
return self.special_tokens[token]
assert token in self.tokenizer.special_tokens, f'{token} is not a special token for {self.name}'
return self.tokenizer.special_tokens[token]
@property
def pad_token(self) -> str:
return '</s>'
@property
def pad_token_id(self):
return self.get_command('<pad>')
@property
def eos_token_id(self):
return self.get_command('<eos>')
@property
def vocab_size(self):
return self.tokenizer.n_words
def get_vocab(self):
""" Returns vocab as a dict """
vocab = {
self._convert_id_to_token(i): i
for i in range(self.vocab_size)
}
vocab.update(self.added_tokens_encoder)
return vocab
def _tokenize(self, text, **kwargs):
return self.tokenizer.tokenize(text)
def _convert_token_to_id(self, token):
""" Converts a token (str) in an id using the vocab. """
return self.tokenizer.convert_token_to_id(token)
def _convert_id_to_token(self, index):
"""Converts an index (integer) in a token (str) using the vocab."""
return self.tokenizer.convert_id_to_token(index)
def convert_tokens_to_string(self, tokens: List[str]) -> str:
return self.tokenizer.decode_tokens(tokens)
def save_vocabulary(self, save_directory, filename_prefix=None):
"""
Save the vocabulary and special tokens file to a directory.
Args:
save_directory (`str`):
The directory in which to save the vocabulary.
filename_prefix (`str`, *optional*):
An optional prefix to add to the named of the saved files.
Returns:
`Tuple(str)`: Paths to the files saved.
"""
if os.path.isdir(save_directory):
vocab_file = os.path.join(save_directory,
self.vocab_files_names['vocab_file'])
else:
vocab_file = save_directory
with open(self.vocab_file, 'rb') as fin:
proto_str = fin.read()
with open(vocab_file, 'wb') as writer:
writer.write(proto_str)
return (vocab_file, )
def get_prefix_tokens(self):
prefix_tokens = [self.get_command('[gMASK]'), self.get_command('sop')]
return prefix_tokens
def build_inputs_with_special_tokens(
self,
token_ids_0: List[int],
token_ids_1: Optional[List[int]] = None) -> List[int]:
"""
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
adding special tokens. A BERT sequence has the following format:
- single sequence: `[CLS] X [SEP]`
- pair of sequences: `[CLS] A [SEP] B [SEP]`
Args:
token_ids_0 (`List[int]`):
List of IDs to which the special tokens will be added.
token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs.
Returns:
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
"""
prefix_tokens = self.get_prefix_tokens()
token_ids_0 = prefix_tokens + token_ids_0
if token_ids_1 is not None:
token_ids_0 = token_ids_0 + token_ids_1 + [
self.get_command('<eos>')
]
return token_ids_0
def _pad(
self,
encoded_inputs: Union[Dict[str, EncodedInput], BatchEncoding],
max_length: Optional[int] = None,
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
pad_to_multiple_of: Optional[int] = None,
return_attention_mask: Optional[bool] = None,
) -> dict:
"""
Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
Args:
encoded_inputs:
Dictionary of tokenized inputs (`List[int]`) or batch of tokenized inputs (`List[List[int]]`).
max_length: maximum length of the returned list and optionally padding length (see below).
Will truncate by taking into account the special tokens.
padding_strategy: PaddingStrategy to use for padding.
- PaddingStrategy.LONGEST Pad to the longest sequence in the batch
- PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
- PaddingStrategy.DO_NOT_PAD: Do not pad
The tokenizer padding sides are defined in self.padding_side:
- 'left': pads on the left of the sequences
- 'right': pads on the right of the sequences
pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
`>= 7.5` (Volta).
return_attention_mask:
(optional) Set to False to avoid returning attention mask (default: set to model specifics)
"""
# Load from model defaults
assert self.padding_side == 'left'
required_input = encoded_inputs[self.model_input_names[0]]
seq_length = len(required_input)
if padding_strategy == PaddingStrategy.LONGEST:
max_length = len(required_input)
if max_length is not None and pad_to_multiple_of is not None and (
max_length % pad_to_multiple_of != 0):
max_length = (
(max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of
needs_to_be_padded = padding_strategy != PaddingStrategy.DO_NOT_PAD and len(
required_input) != max_length
# Initialize attention mask if not present.
if 'attention_mask' not in encoded_inputs:
encoded_inputs['attention_mask'] = [1] * seq_length
if 'position_ids' not in encoded_inputs:
encoded_inputs['position_ids'] = list(range(seq_length))
if needs_to_be_padded:
difference = max_length - len(required_input)
if 'attention_mask' in encoded_inputs:
encoded_inputs['attention_mask'] = [
0
] * difference + encoded_inputs['attention_mask']
if 'position_ids' in encoded_inputs:
encoded_inputs['position_ids'] = [
0
] * difference + encoded_inputs['position_ids']
encoded_inputs[self.model_input_names[
0]] = [self.pad_token_id] * difference + required_input
return encoded_inputs

View File

@@ -1,5 +1,6 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
# Copyright (c) 2022 Zhipu.AI
import os
from typing import Any, Dict, Optional, Union
@@ -17,7 +18,10 @@ from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.hub import Config, read_config
from modelscope.utils.streaming_output import PipelineStreamingOutputMixin
__all__ = ['TextGenerationPipeline', 'TextGenerationT5Pipeline']
__all__ = [
'TextGenerationPipeline', 'TextGenerationT5Pipeline',
'ChatGLM6bTextGenerationPipeline', 'ChatGLM6bV2TextGenerationPipeline'
]
@PIPELINES.register_module(
@@ -177,3 +181,71 @@ class TextGenerationT5Pipeline(TextGenerationPipeline):
with torch.no_grad():
return self.model.generate(**inputs, **forward_params)
@PIPELINES.register_module(
group_key=Tasks.chat, module_name='chatglm6b-text-generation')
class ChatGLM6bTextGenerationPipeline(Pipeline):
def __init__(self,
model: Union[Model, str],
quantization_bit=None,
use_bf16=False,
**kwargs):
from modelscope.models.nlp.chatglm.text_generation import ChatGLMForConditionalGeneration
model = ChatGLMForConditionalGeneration(model) if isinstance(
model, str) else model
if quantization_bit is not None:
model = model.quantize(quantization_bit)
if use_bf16:
model = model.bfloat16()
self.model = model
self.model.eval()
super().__init__(model=model, **kwargs)
def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]:
return inputs
# define the forward pass
def forward(self, inputs: Dict, **forward_params) -> Dict[str, Any]:
return self.model.chat(inputs)
# format the outputs from pipeline
def postprocess(self, input, **kwargs) -> Dict[str, Any]:
return input
@PIPELINES.register_module(
group_key=Tasks.chat, module_name='chatglm2_6b-text-generation')
class ChatGLM6bV2TextGenerationPipeline(Pipeline):
def __init__(self,
model: Union[Model, str],
quantization_bit=None,
use_bf16=False,
**kwargs):
from modelscope.models.nlp import ChatGLM2ForConditionalGeneration, ChatGLM2Tokenizer
model = ChatGLM2ForConditionalGeneration(model) if isinstance(
model, str) else model
if quantization_bit is not None:
model = model.quantize(quantization_bit)
if use_bf16:
model = model.bfloat16()
self.model = model
self.model.eval()
self.tokenizer = ChatGLM2Tokenizer.from_pretrained(
self.model.model_dir)
super().__init__(model=model, **kwargs)
def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]:
return inputs
# define the forward pass
def forward(self, inputs: Dict, **forward_params) -> Dict[str, Any]:
return self.model.chat(self.tokenizer, inputs['text'])
# format the outputs from pipeline
def postprocess(self, input, **kwargs) -> Dict[str, Any]:
return input

View File

@@ -0,0 +1,38 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import TYPE_CHECKING
from modelscope.utils.import_utils import LazyImportModule
if TYPE_CHECKING:
from .optimizers.child_tuning_adamw_optimizer import calculate_fisher, ChildTuningAdamW
from .adapter import Adapter, AdapterConfig, AdapterModule
from .lora import LoRA, LoRAConfig, Linear, MergedLinear, Embedding, Conv2d
from .prompt import Prompt, PromptConfig, PromptModule
from .control_sd_lora import ControlLoRACrossAttnProcessor, ControlLoRACrossAttnProcessorV2, ControlLoRATuner
from .base import SwiftConfig, Swift
else:
_import_structure = {
'optimizers.child_tuning_adamw_optimizer':
['calculate_fisher', 'ChildTuningAdamW'],
'adapter': ['Adapter', 'AdapterConfig', 'AdapterModule'],
'lora': [
'LoRA', 'LoRAConfig', 'Linear', 'MergedLinear', 'Embedding',
'Conv2d'
],
'prompt': ['Prompt', 'PromptConfig', 'PromptModule'],
'control_sd_lora': [
'ControlLoRACrossAttnProcessor', 'ControlLoRACrossAttnProcessorV2',
'ControlLoRATuner'
],
'base': ['SwiftConfig', 'Swift']
}
import sys
sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

195
modelscope/swift/adapter.py Normal file
View File

@@ -0,0 +1,195 @@
import inspect
import os
import re
import types
from dataclasses import dataclass, field
from typing import Union
import torch
from torch import nn
from modelscope import snapshot_download
from modelscope.utils.constant import ModelFile
from .base import SwiftConfig
@dataclass
class AdapterConfig(SwiftConfig):
"""
The configuration class for the adapter module.
Adapters project input tokens by an MLP layer.
'Parameter-Efficient Transfer Learning for NLP' by Houlsby et al.(2019)
See http://arxiv.org/abs/1902.00751
Args:
dim: The dimension of the hidden states
module_name: The feedforward module to be replaced, in regex format
hidden_pos: The position of the hidden state to passed into the adapter, can be int (args) or str (kwargs)
method_name: The method to be replaced, default to replace the forward method
adapter_length: The length of the adapter length (intermediate length)
act_layer: The activation layer of the adapter
only_adapter_trainable: Whether to train only adapters
pretrained_weights: The pretrained adapter weights.
Can be a local dir, local file, or a model id from modelscope
"""
dim: int = field(metadata={'help': 'The dimension of the hidden states'})
module_name: str = field(
metadata={
'help': 'The feedforward module to be replaced, in regex format'
})
hidden_pos: Union[str, int] = field(
metadata={
'help':
'The position of the hidden state to passed into the adapter, can be int (args) or str (kwargs)'
})
method_name: str = field(
default='forward',
metadata={
'help':
'The method to be replaced, default to replace the forward method'
})
adapter_length: int = field(
default=128,
metadata={
'help': 'The length of the adapter length (intermediate length)'
})
act_layer: nn.Module = field(
default=nn.GELU,
metadata={'help': 'The activation layer of the adapter'})
only_adapter_trainable: bool = field(
default=True, metadata={'help': 'Whether to train only adapters'})
pretrained_weights: str = field(
default=None,
metadata={
'help':
'The pretrained adapter weights. Can be a local dir, local file, or a model id from modelscope'
})
class Adapter:
@staticmethod
def prepare_model(model: nn.Module, config: AdapterConfig):
module_keys = [key for key, _ in model.named_modules()]
for module_key in module_keys:
if re.fullmatch(config.module_name, module_key): # noqa
module = model.get_submodule(module_key)
def _forward(self, *args, **kwargs):
args = self.forward_origin(*args, **kwargs)
if isinstance(args, (tuple, list, dict)):
if isinstance(config.hidden_pos, int):
return args[0:config.hidden_pos] + args[
config.hidden_pos] + getattr(self, 'adapter')(args[config.hidden_pos]) \
+ args[config.hidden_pos + 1:] # noqa
else:
kwargs[config.hidden_pos] = args[
config.hidden_pos] + getattr(self, 'adapter')(
args[config.hidden_pos])
elif isinstance(args, torch.Tensor):
args = getattr(self, 'adapter')(args)
return args
def _feed_forward_chunk(self, attention_output):
return _forward(self, attention_output)
module.forward_origin = getattr(module, config.method_name)
num_args_in_forward_chunk_fn = len(
inspect.signature(module.forward_origin).parameters)
if config.method_name == 'feed_forward_chunk' and num_args_in_forward_chunk_fn == 1:
setattr(module, config.method_name,
types.MethodType(_feed_forward_chunk, module))
else:
setattr(module, config.method_name,
types.MethodType(_forward, module))
adapter_module = AdapterModule(config.dim,
config.adapter_length,
config.act_layer)
setattr(module, 'adapter', adapter_module)
if config.only_adapter_trainable:
for n, p in model.named_parameters():
if 'adapter' not in n:
p.requires_grad = False
def state_dict_hook(module, destination, prefix, local_metadata):
return {
key: value
for key, value in destination.items() if 'adapter' in key
}
model.state_dict_hook_handle = model._register_state_dict_hook(
state_dict_hook)
def load_state_dict(self, state_dict, strict=True):
return self.load_state_dict_origin(state_dict, False)
model.load_state_dict_origin = model.load_state_dict
model.load_state_dict = types.MethodType(load_state_dict, model)
if config.pretrained_weights is not None:
if not os.path.exists(config.pretrained_weights):
model_dir = snapshot_download(config.pretrained_weights)
pretrained_weights = os.path.join(
model_dir, ModelFile.TORCH_MODEL_BIN_FILE)
elif os.path.isfile(config.pretrained_weights):
pretrained_weights = config.pretrained_weights
else:
pretrained_weights = os.path.join(
config.pretrained_weights, ModelFile.TORCH_MODEL_BIN_FILE)
model.load_state_dict(torch.load(pretrained_weights))
return model
class AdapterModule(nn.Module):
"""The implementation of adapter tuning method.
Adapters project input tokens by an MLP layer.
'Parameter-Efficient Transfer Learning for NLP' by Houlsby et al.(2019)
See http://arxiv.org/abs/1902.00751
Attributes:
dim: An integer indicating the embedding dimension.
adapter_length: An integer indicating the length of adapter tuning.
"""
def __init__(
self,
dim,
adapter_length=None,
act_layer=nn.GELU,
):
super(AdapterModule, self).__init__()
self.dim = dim
self.adapter_length = adapter_length
# self.adapter_type = adapter_type
self.ln1 = nn.Linear(dim, adapter_length)
self.activate = act_layer()
self.ln2 = nn.Linear(adapter_length, dim)
self.init_weights()
def init_weights(self):
def _init_weights(m):
if isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight)
nn.init.normal_(m.bias, std=1e-6)
self.apply(_init_weights)
def forward(self, x, identity=None):
out = self.ln2(self.activate(self.ln1(x)))
if identity is None:
identity = x
out = identity + out
return out

31
modelscope/swift/base.py Normal file
View File

@@ -0,0 +1,31 @@
from dataclasses import dataclass
@dataclass
class SwiftConfig:
pass
class Swift:
@staticmethod
def prepare_model(model, config: SwiftConfig):
"""Prepare the module and returns the new module.
Args:
model: The model to tune.
config: The config of the tuner.
Returns:
The tuned model.
"""
from .lora import LoRA, LoRAConfig
from .adapter import Adapter, AdapterConfig
from .prompt import Prompt, PromptConfig
if isinstance(config, LoRAConfig):
return LoRA.prepare_model(model, config)
elif isinstance(config, AdapterConfig):
return Adapter.prepare_model(model, config)
elif isinstance(config, PromptConfig):
return Prompt.prepare_model(model, config)
return None

View File

@@ -4,93 +4,148 @@
import logging
import math
import os.path
import re
import types
from dataclasses import dataclass, field
from typing import Dict, List
import torch
import torch.nn as nn
import torch.nn.functional as F
from modelscope import snapshot_download
from modelscope.utils.constant import ModelFile
from .base import SwiftConfig
logger = logging.getLogger(__name__)
class LoRATuner:
@dataclass
class LoRAConfig(SwiftConfig):
"""
The configuration class for the loRA module.
Args:
rank: The rank of the LoRA module
replace_modules: The modules to be replaced by LoRA, can be the end of the module name or a regex string
lora_alpha: The factor to add the lora weights
lora_dropout: The dropout rate of the lora module
merge_weights: Whether to merge weights when validating
use_merged_linear: Whether to replace with merged linear layer
enable_lora: The modules need to be turned on when using the merged linear layer
fan_in_fan_out: Set this to True if the layer to replace stores weight like (fan_in, fan_out)
bias: Bias type. Values ca be "none", "all" or "lora_only"
only_lora_trainable: Whether to train only lora
pretrained_weights: The pretrained lora weights.
Can be a local dir, local file, or a model id from modelscope
"""
rank: int = field(
default=6, metadata={'help': 'The rank of the LoRA module'})
replace_modules: List = field(
default=None,
metadata={
'help':
'The modules to be replaced by LoRA, can be the end of the module name or a regex string'
})
lora_alpha: float = field(
default=1., metadata={'help': 'The factor to add the lora weights'})
lora_dropout: float = field(
default=0., metadata={'help': 'The dropout rate of the lora module'})
merge_weights: bool = field(
default=True,
metadata={'help': 'Whether to merge weights when validating'})
use_merged_linear: bool = field(
default=False,
metadata={'help': 'Whether to replace with merged linear layer'})
enable_lora: List = field(
default=None,
metadata={
'help':
'The modules need to be turned on when using the merged linear layer'
})
fan_in_fan_out: bool = field(
default=False,
metadata={
'help':
'Set this to True if the layer to replace stores weight like (fan_in, fan_out)'
})
bias: str = field(
default='none',
metadata={
'help': 'Bias type. Values ca be "none", "all" or "lora_only"'
})
only_lora_trainable: bool = field(
default=True, metadata={'help': 'Whether to train only lora'})
pretrained_weights: str = field(
default=None,
metadata={
'help':
'The pretrained lora weights. Can be a local dir, local file, or a model id from modelscope'
})
class LoRA:
@staticmethod
def tune(model: nn.Module,
rank=6,
replace_modules=None,
lora_alpha=1.,
lora_dropout=0.,
merge_weights=True,
fan_in_fan_out=False,
bias='none',
pretrained_tuner=None):
"""Tune a model with lora.
def prepare_model(model: nn.Module, config: LoRAConfig):
"""Tune a model with LoRA.
Args:
model: The torch.nn.Module containing the target module to be patched.
rank: The lora rank.
replace_modules: The module names to be replaced, the replacing strategy is `end with`.
lora_alpha: The alpha value for lora module.
lora_dropout: The dropout value for lora module.
merge_weights: If merge_weights set to True, when the module turns to `eval`, the lora weights
will be added into the origin weight to reduce calculation.
fan_in_fan_out: Set this to True if the layer to replace stores weight like (fan_in, fan_out).
bias: The grad strategy for bias, can be `none`, 'all' or 'lora_only'.
pretrained_tuner: The pretrained file of lora.
config: The LoRAConfig instance.
Returns:
The lora modules
"""
modules = LoRATuner._dynamic_patch_lora(
LoRA._dynamic_patch_lora(
model,
replace_modules=replace_modules,
r=rank,
lora_alpha=lora_alpha,
lora_dropout=lora_dropout,
merge_weights=merge_weights,
fan_in_fan_out=fan_in_fan_out)
replace_modules=config.replace_modules,
r=config.rank,
lora_alpha=config.lora_alpha,
lora_dropout=config.lora_dropout,
merge_weights=config.merge_weights,
use_merged_linear=config.use_merged_linear,
enable_lora=config.enable_lora,
fan_in_fan_out=config.fan_in_fan_out)
mark_only_lora_as_trainable(model, bias)
if config.only_lora_trainable:
mark_only_lora_as_trainable(model, config.bias)
def state_dict_hook(module, destination, prefix, local_metadata):
return lora_state_dict(destination, bias)
return lora_state_dict(destination, config.bias)
model.state_dict_hook_handle = model._register_state_dict_hook(
state_dict_hook)
def warning_hook(module, incompatible_keys):
logger.info(
f'The {module.__class__.__name__} module has unmatched keys: {incompatible_keys},'
f'this is converted to a notice with respect to LoRA')
for ik in incompatible_keys:
ik.clear()
def load_state_dict(self, state_dict, strict=True):
return self.load_state_dict_origin(state_dict, False)
if hasattr(model, 'register_load_state_dict_post_hook'):
model.load_state_dict_hook_handle = model.register_load_state_dict_post_hook(
warning_hook)
else:
model.load_state_dict_origin = model.load_state_dict
model.load_state_dict = types.MethodType(load_state_dict, model)
def load_state_dict(self, state_dict, strict=True):
return self.load_state_dict_origin(state_dict, False)
if config.pretrained_weights is not None:
if not os.path.exists(config.pretrained_weights):
model_dir = snapshot_download(config.pretrained_weights)
pretrained_weights = os.path.join(
model_dir, ModelFile.TORCH_MODEL_BIN_FILE)
elif os.path.isfile(config.pretrained_weights):
pretrained_weights = config.pretrained_weights
else:
pretrained_weights = os.path.join(
config.pretrained_weights, ModelFile.TORCH_MODEL_BIN_FILE)
model.load_state_dict(torch.load(pretrained_weights))
model.load_state_dict_origin = model.load_state_dict
model.load_state_dict = types.MethodType(load_state_dict, model)
if pretrained_tuner is not None and os.path.isfile(pretrained_tuner):
logger.info(f'Loading LoRA weights from file: {pretrained_tuner}')
model.load_state_dict(torch.load(pretrained_tuner))
return modules
return model
@staticmethod
def _dynamic_patch_lora(model, replace_modules, **kwargs):
def _dynamic_patch_lora(model, replace_modules, use_merged_linear,
**kwargs):
"""Dynamic patch lora to model
Args:
model: The torch.nn.Module containing the target module to be patched.
replace_modules: The module names to be replaced, the replacing strategy is `end with`.
use_merged_linear: Whether to replace with merged linear layer
**kwargs: The arguments passed from `tune` which are needed by lora.
Returns:
@@ -103,8 +158,13 @@ class LoRATuner:
replace_modules = [replace_modules]
for module_key in module_keys:
if any([module_key.endswith(name)
for name in replace_modules]): # noqa
if isinstance(replace_modules, str):
target_module_found = re.fullmatch(replace_modules, module_key)
else:
target_module_found = any(
module_key.endswith(target_key)
for target_key in replace_modules)
if target_module_found: # noqa
parts = module_key.split('.')
module = model.get_submodule('.'.join(parts[:-1]))
sub_module = model.get_submodule(module_key)
@@ -112,11 +172,19 @@ class LoRATuner:
lora_module = None
if isinstance(sub_module, torch.nn.Linear):
lora_module = Linear(
sub_module.in_features,
sub_module.out_features,
bias=sub_module.bias is not None,
**kwargs)
if use_merged_linear:
lora_module = MergedLinear(
sub_module.in_features,
sub_module.out_features,
bias=sub_module.bias is not None,
**kwargs)
else:
kwargs.pop('enable_lora', None)
lora_module = Linear(
sub_module.in_features,
sub_module.out_features,
bias=sub_module.bias is not None,
**kwargs)
elif isinstance(sub_module, torch.nn.Conv2d):
kwargs.pop('fan_in_fan_out', None)
lora_module = Conv2d(
@@ -140,9 +208,13 @@ class LoRATuner:
return modules
@staticmethod
def unpatch_lora(model, replace_modules):
def unpatch_lora(model, config: LoRAConfig):
"""Unpatch lora modules and merge the weights to original modules.
LoRA constructs an additional layer with low-rank decomposition matrices of the weights in the network.
'LoRA: Low-Rank Adaptation of Large Language Models' by Hu et al.(2021)
See https://arxiv.org/abs/2106.09685
Args:
model: The model called with `tune` function.
replace_modules: The module names to be replaced, the replacing strategy is `end with`.
@@ -152,13 +224,17 @@ class LoRATuner:
"""
modules = []
module_keys = [key for key, _ in model.named_modules()]
assert isinstance(replace_modules, (str, list))
if isinstance(replace_modules, str):
replace_modules = [replace_modules]
assert isinstance(config.replace_modules, (str, list))
replace_modules = config.replace_modules
for module_key in module_keys:
if any([module_key.endswith(name)
for name in replace_modules]): # noqa
if isinstance(replace_modules, str):
target_module_found = re.fullmatch(replace_modules, module_key)
else:
target_module_found = any(
module_key.endswith(target_key)
for target_key in replace_modules)
if target_module_found: # noqa
parts = module_key.split('.')
module = model.get_submodule('.'.join(parts[:-1]))
sub_module = model.get_submodule(module_key)

View File

View File

@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import types
from typing import Callable, Iterable, Tuple
import numpy as np
@@ -22,7 +21,6 @@ from torch.distributions.bernoulli import Bernoulli
from torch.optim import Optimizer
from modelscope.utils.logger import get_logger
from .builder import OPTIMIZERS, default_group
logger = get_logger()
@@ -72,8 +70,6 @@ def calculate_fisher(model: torch.nn.Module,
return gradient_mask
@OPTIMIZERS.register_module(
group_key=default_group, module_name='ChildTuningAdamW')
class ChildTuningAdamW(Optimizer):
def __init__(self,

214
modelscope/swift/prompt.py Normal file
View File

@@ -0,0 +1,214 @@
import os
import re
import types
from dataclasses import dataclass, field
from typing import Union
import torch
from torch import nn
from modelscope import snapshot_download
from modelscope.utils.constant import ModelFile
from .base import SwiftConfig
@dataclass
class PromptConfig(SwiftConfig):
"""
The configuration class for the prompt module.
Visual prompt tuning (VPT) is proposed to initialize tunable prompt tokens
and prepend to the original tokens in the first layer or multiple layers.
'Visual Prompt Tuning' by Jia et al.(2022)
See https://arxiv.org/abs/2203.12119
Here we apply the VPT to other fields.
Args:
dim: The dimension of the hidden states
module_layer_name: The layer module to be replaced, in regex format
embedding_pos: The position of the embedding tensor
attention_mask_pos: The position of the attention mask
attention_mask_value: The value to pad to the attention mask
prompt_length: The length of the prompt tokens
only_prompt_trainable: Whether to train only prompt
attach_front: When set to True, prompt is attached in front of the embedding
pretrained_weights: The pretrained prompt weights. Can be a local dir, local file,
or a model id from modelscope
"""
dim: int = field(metadata={'help': 'The dimension of the hidden states'})
module_layer_name: str = field(
metadata={'help': 'The layer module to be replaced, in regex format'})
embedding_pos: Union[str, int] = field(
metadata={'help': 'The position of the embedding tensor'})
attention_mask_pos: Union[str, int] = field(
default=None, metadata={'help': 'The position of the attention mask'})
attention_mask_value: Union[float, int, bool] = field(
default=0.,
metadata={'help': 'The value to pad to the attention mask'})
prompt_length: int = field(
default=16, metadata={'help': 'The length of the prompt tokens'})
only_prompt_trainable: bool = field(
default=True, metadata={'help': 'Whether to train only prompt'})
attach_front: bool = field(
default=True,
metadata={
'help':
'When set to True, prompt is attached in front of the embedding'
})
pretrained_weights: str = field(
default=None,
metadata={
'help':
'The pretrained prompt weights. Can be a local dir, local file, or a model id from modelscope'
})
class Prompt:
@staticmethod
def prepare_model(model: nn.Module, config: PromptConfig):
module_keys = [key for key, _ in model.named_modules()]
for module_key in module_keys:
if re.fullmatch(config.module_layer_name, module_key): # noqa
module = model.get_submodule(module_key)
def _forward(self, *args, **kwargs):
if isinstance(config.embedding_pos, int):
input_embedding = args[config.embedding_pos]
else:
input_embedding = kwargs[config.embedding_pos]
input_embedding = getattr(
self, 'prompt').forward(input_embedding)
if isinstance(config.embedding_pos, int):
args = type(args)(
args[0:config.embedding_pos] + (input_embedding, )
+ args[config.embedding_pos + 1:])
else:
kwargs[config.embedding_pos] = input_embedding
if config.attention_mask_pos:
attention_mask = None
if isinstance(config.attention_mask_pos, int):
attention_mask = args[config.attention_mask_pos]
elif isinstance(config.attention_mask_pos, str):
attention_mask = kwargs[config.attention_mask_pos]
if attention_mask is not None:
attention_mask = getattr(
self,
'prompt').patch_attention_mask(attention_mask)
if isinstance(config.attention_mask_pos, int):
args = type(args)(
args[0:config.attention_mask_pos]
+ (attention_mask, )
+ args[config.attention_mask_pos + 1:])
else:
kwargs[config.attention_mask_pos] = attention_mask
return self.forward_origin(*args, **kwargs)
module.forward_origin = module.forward
module.forward = types.MethodType(_forward, module)
prompt_module = PromptModule(config.dim,
int(module_key.rsplit('.')[-1]),
config.prompt_length,
config.attention_mask_value,
config.attach_front)
setattr(module, 'prompt', prompt_module)
if config.only_prompt_trainable:
for n, p in model.named_parameters():
if 'prompt' not in n:
p.requires_grad = False
def state_dict_hook(module, destination, prefix, local_metadata):
return {
key: value
for key, value in destination.items() if 'prompt' in key
}
model.state_dict_hook_handle = model._register_state_dict_hook(
state_dict_hook)
def load_state_dict(self, state_dict, strict=True):
return self.load_state_dict_origin(state_dict, False)
model.load_state_dict_origin = model.load_state_dict
model.load_state_dict = types.MethodType(load_state_dict, model)
if config.pretrained_weights is not None:
if not os.path.exists(config.pretrained_weights):
model_dir = snapshot_download(config.pretrained_weights)
pretrained_weights = os.path.join(
model_dir, ModelFile.TORCH_MODEL_BIN_FILE)
elif os.path.isfile(config.pretrained_weights):
pretrained_weights = config.pretrained_weights
else:
pretrained_weights = os.path.join(
config.pretrained_weights, ModelFile.TORCH_MODEL_BIN_FILE)
model.load_state_dict(torch.load(pretrained_weights))
return model
class PromptModule(nn.Module):
"""The implementation of vision prompt tuning method.
Visual prompt tuning (VPT) is proposed to initialize tunable prompt tokens
and prepend to the original tokens in the first layer or multiple layers.
'Visual Prompt Tuning' by Jia et al.(2022)
See https://arxiv.org/abs/2203.12119
Attributes:
dim: An integer indicating the embedding dimension.
layer_num: An integer indicating number of layers.
prompt_length: An integer indicating the length of vision prompt tuning.
"""
def __init__(self,
dim,
layer_num,
prompt_length=None,
mask_values=0.,
attach_front=True):
super(PromptModule, self).__init__()
self.dim = dim
self.layer_num = layer_num
self.prompt_length = prompt_length
self.mask_values = mask_values
self.attach_front = attach_front
self.prompt_token = nn.Parameter(torch.zeros(1, prompt_length, dim))
nn.init.xavier_uniform_(self.prompt_token)
def forward(self, x):
prompt_token = self.prompt_token.expand(x.shape[0], -1, -1)
if self.layer_num == 0:
if self.attach_front:
x = torch.cat((prompt_token, x), dim=1)
else:
x = torch.cat((x, prompt_token), dim=1)
else:
if self.attach_front:
x = torch.cat((prompt_token, x[:, self.prompt_length:, :]),
dim=1)
else:
x = torch.cat((x[:, :-self.prompt_length, :], prompt_token),
dim=1)
return x
def patch_attention_mask(self, m):
prefix_attention_mask = torch.full((*m.shape[:-1], self.prompt_length),
self.mask_values).to(m.device)
return torch.cat((prefix_attention_mask, m), dim=-1)

218
modelscope/swift/sd_lora.py Normal file
View File

@@ -0,0 +1,218 @@
# Copyright 2023-2024 The Alibaba Fundamental Vision Team Authors. All rights reserved.
# The implementation is adopted from HighCWu,
# made pubicly available under the Apache License 2.0 License at https://github.com/HighCWu/ControlLoRA
import os
from dataclasses import dataclass
from typing import List, Tuple, Union
import torch
import torch.nn as nn
from diffusers.configuration_utils import ConfigMixin, register_to_config
from diffusers.models.cross_attention import CrossAttention, LoRALinearLayer
from diffusers.models.modeling_utils import ModelMixin
from diffusers.utils.outputs import BaseOutput
@dataclass
class TunerOutput(BaseOutput):
lora_states: Tuple[torch.FloatTensor]
class LoRACrossAttnProcessor(nn.Module):
""" The implementation of lora attention module.
"""
def __init__(self,
hidden_size,
cross_attention_dim=None,
rank=4,
post_add=False,
key_states_skipped=False,
value_states_skipped=False,
output_states_skipped=False):
""" Initialize a lora attn instance.
Args:
hidden_size (`int`): The number of channels in embedding.
cross_attention_dim (`int`, *optional*):
The number of channels in the hidden_states. If not given, defaults to `hidden_size`.
rank (`int`, *optional*, defaults to 4): The number of rank of lora.
post_add (`bool`, *optional*, defaults to False): Set to `True`, conduct weighted
adding operation after lora.
key_states_skipped (`bool`, *optional*, defaults to False):
Set to `True` for skip to perform lora on key value.
value_states_skipped (`bool`, *optional*, defaults to False):
Set to `True` for skip to perform lora on value.
output_states_skipped (`bool`, *optional*, defaults to False):
Set to `True` for skip to perform lora on output value.
"""
super().__init__()
self.hidden_size = hidden_size
self.cross_attention_dim = cross_attention_dim
self.rank = rank
self.post_add = post_add
self.to_q_lora = LoRALinearLayer(hidden_size, hidden_size, rank)
if not key_states_skipped:
self.to_k_lora = LoRALinearLayer(
hidden_size if post_add else
(cross_attention_dim or hidden_size), hidden_size, rank)
if not value_states_skipped:
self.to_v_lora = LoRALinearLayer(
hidden_size if post_add else
(cross_attention_dim or hidden_size), hidden_size, rank)
if not output_states_skipped:
self.to_out_lora = LoRALinearLayer(hidden_size, hidden_size, rank)
self.key_states_skipped: bool = key_states_skipped
self.value_states_skipped: bool = value_states_skipped
self.output_states_skipped: bool = output_states_skipped
def skip_key_states(self, is_skipped: bool = True):
if not is_skipped:
assert hasattr(self, 'to_k_lora')
self.key_states_skipped = is_skipped
def skip_value_states(self, is_skipped: bool = True):
if not is_skipped:
assert hasattr(self, 'to_q_lora')
self.value_states_skipped = is_skipped
def skip_output_states(self, is_skipped: bool = True):
if not is_skipped:
assert hasattr(self, 'to_out_lora')
self.output_states_skipped = is_skipped
def __call__(self,
attn: CrossAttention,
hidden_states,
encoder_hidden_states=None,
attention_mask=None,
scale=1.0):
batch_size, sequence_length, _ = hidden_states.shape
attention_mask = attn.prepare_attention_mask(
attention_mask=attention_mask,
target_length=sequence_length,
batch_size=batch_size)
query = attn.to_q(hidden_states)
query = query + scale * self.to_q_lora(
query if self.post_add else hidden_states)
query = attn.head_to_batch_dim(query)
encoder_hidden_states = encoder_hidden_states if encoder_hidden_states is not None else hidden_states
key = attn.to_k(encoder_hidden_states)
if not self.key_states_skipped:
key = key + scale * self.to_k_lora(
key if self.post_add else encoder_hidden_states)
value = attn.to_v(encoder_hidden_states)
if not self.value_states_skipped:
value = value + scale * self.to_v_lora(
value if self.post_add else encoder_hidden_states)
key = attn.head_to_batch_dim(key)
value = attn.head_to_batch_dim(value)
attention_probs = attn.get_attention_scores(query, key, attention_mask)
hidden_states = torch.bmm(attention_probs, value)
hidden_states = attn.batch_to_head_dim(hidden_states)
# linear proj
out = attn.to_out[0](hidden_states)
if not self.output_states_skipped:
out = out + scale * self.to_out_lora(
out if self.post_add else hidden_states)
hidden_states = out
# dropout
hidden_states = attn.to_out[1](hidden_states)
return hidden_states
class LoRATuner(ModelMixin, ConfigMixin):
@staticmethod
def tune(
model: nn.Module,
tuner_config=None,
pretrained_tuner=None,
):
tuner = LoRATuner.from_config(tuner_config)
if pretrained_tuner is not None and os.path.exists(pretrained_tuner):
tuner.load_state_dict(
torch.load(pretrained_tuner, map_location='cpu'), strict=True)
tune_layers_list = list(
[list(layer_list) for layer_list in tuner.lora_layers])
assert hasattr(model, 'unet')
unet = model.unet
tuner.to(unet.device)
tune_attn_procs = tuner.set_tune_layers(unet, tune_layers_list)
unet.set_attn_processor(tune_attn_procs)
return tuner
def set_tune_layers(self, unet, tune_layers_list):
n_ch = len(unet.config.block_out_channels)
control_ids = [i for i in range(n_ch)]
tune_attn_procs = {}
for name in unet.attn_processors.keys():
if name.startswith('mid_block'):
control_id = control_ids[-1]
elif name.startswith('up_blocks'):
block_id = int(name[len('up_blocks.')])
control_id = list(reversed(control_ids))[block_id]
elif name.startswith('down_blocks'):
block_id = int(name[len('down_blocks.')])
control_id = control_ids[block_id]
tune_layers = tune_layers_list[control_id]
if len(tune_layers) != 0:
tune_layer = tune_layers.pop(0)
tune_attn_procs[name] = tune_layer
return tune_attn_procs
@register_to_config
def __init__(
self,
lora_block_out_channels: Tuple[int] = (320, 640, 1280, 1280),
lora_cross_attention_dims: Tuple[List[int]] = ([
None, 768, None, 768, None, 768, None, 768, None, 768
], [None, 768, None, 768, None, 768, None, 768, None,
768], [None, 768, None, 768, None, 768, None, 768, None,
768], [None, 768]),
lora_rank: int = 4,
lora_post_add: bool = False,
lora_key_states_skipped: bool = False,
lora_value_states_skipped: bool = False,
lora_output_states_skipped: bool = False,
):
super().__init__()
lora_cls = LoRACrossAttnProcessor
self.lora_layers = nn.ModuleList([])
for i, lora_cross_attention_dim in enumerate(
lora_cross_attention_dims):
self.lora_layers.append(
nn.ModuleList([
lora_cls(
lora_block_out_channels[i],
cross_attention_dim=cross_attention_dim,
rank=lora_rank,
post_add=lora_post_add,
key_states_skipped=lora_key_states_skipped,
value_states_skipped=lora_value_states_skipped,
output_states_skipped=lora_output_states_skipped)
for cross_attention_dim in lora_cross_attention_dim
]))
def forward(self) -> Union[TunerOutput, Tuple]:
lora_states_list = []
tune_layers_list = list(
[list(layer_list) for layer_list in self.lora_layers])
for tune_list in tune_layers_list:
for tune_layer in tune_list:
lora_states_list.append(tune_layer.to_q_lora.down.weight)
return TunerOutput(lora_states=tuple(lora_states_list))

View File

@@ -1,5 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from modelscope.swift import ChildTuningAdamW
from .builder import OPTIMIZERS, build_optimizer
from .child_tuning_adamw_optimizer import ChildTuningAdamW
__all__ = ['OPTIMIZERS', 'build_optimizer', 'ChildTuningAdamW']

View File

@@ -44,6 +44,7 @@ from modelscope.utils.registry import build_from_cfg
from modelscope.utils.torch_utils import (compile_model, get_dist_info,
get_local_rank, init_dist, is_dist,
is_master, set_random_seed)
from ..swift import Swift
from .base import BaseTrainer
from .builder import TRAINERS
from .default_config import merge_cfg, merge_hooks, update_cfg
@@ -264,10 +265,7 @@ class EpochBasedTrainer(BaseTrainer):
def tune_module(self, efficient_tuners):
if efficient_tuners is not None:
for tuner in efficient_tuners:
type = tuner.pop('type')
if type == 'lora':
from modelscope.tuners.lora import LoRATuner
LoRATuner.tune(self.model, **tuner)
self.model = Swift.prepare_model(self.model, tuner)
def place_model(self):
"""Place model to device, or to DDP

View File

@@ -0,0 +1 @@
from .hub import create_model_if_not_exist, read_config

View File

@@ -8,18 +8,17 @@ from modelscope.metainfo import Preprocessors, Trainers
from modelscope.models import Model
from modelscope.msdatasets import MsDataset
from modelscope.pipelines import pipeline
from modelscope.swift.optimizers.child_tuning_adamw_optimizer import \
calculate_fisher
from modelscope.trainers import build_trainer
from modelscope.trainers.hooks import Hook
from modelscope.trainers.nlp_trainer import (EpochBasedTrainer,
NlpEpochBasedTrainer)
from modelscope.trainers.optimizer.child_tuning_adamw_optimizer import \
calculate_fisher
from modelscope.trainers.training_args import TrainingArgs
from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.data_utils import to_device
from modelscope.utils.regress_test_utils import (MsRegressTool,
compare_arguments_nested)
from modelscope.utils.test_utils import test_level
class TestFinetuneSequenceClassification(unittest.TestCase):

View File

@@ -0,0 +1,164 @@
# Copyright 2022-2023 The Alibaba Fundamental Vision Team Authors. All rights reserved.
import os
import shutil
import tempfile
import unittest
from modelscope.metainfo import Trainers
from modelscope.msdatasets import MsDataset
from modelscope.swift import Swift
from modelscope.swift.adapter import AdapterConfig
from modelscope.swift.lora import LoRAConfig
from modelscope.swift.prompt import PromptConfig
from modelscope.trainers import build_trainer
from modelscope.utils.test_utils import test_level
class TestVisionEfficientTuningSwiftTrainer(unittest.TestCase):
def setUp(self):
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
self.train_dataset = MsDataset.load(
'foundation_model_evaluation_benchmark',
namespace='damo',
subset_name='OxfordFlowers',
split='train')
self.eval_dataset = MsDataset.load(
'foundation_model_evaluation_benchmark',
namespace='damo',
subset_name='OxfordFlowers',
split='eval')
self.max_epochs = 1
self.num_classes = 102
self.tune_length = 10
self.tmp_dir = tempfile.TemporaryDirectory().name
if not os.path.exists(self.tmp_dir):
os.makedirs(self.tmp_dir)
def tearDown(self):
shutil.rmtree(self.tmp_dir)
super().tearDown()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_vision_efficient_tuning_swift_lora_train(self):
model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-lora'
def cfg_modify_fn(cfg):
cfg.model.head.num_classes = self.num_classes
cfg.model.finetune = True
cfg.train.max_epochs = self.max_epochs
cfg.train.lr_scheduler.T_max = self.max_epochs
cfg.model.backbone.lora_length = 0
return cfg
lora_config = LoRAConfig(
rank=self.tune_length,
replace_modules=['qkv'],
merge_weights=False,
only_lora_trainable=False,
use_merged_linear=True,
enable_lora=[True])
kwargs = dict(
model=model_id,
work_dir=self.tmp_dir,
train_dataset=self.train_dataset,
eval_dataset=self.eval_dataset,
cfg_modify_fn=cfg_modify_fn,
efficient_tuners=[lora_config])
trainer = build_trainer(
name=Trainers.vision_efficient_tuning, default_args=kwargs)
trainer.train()
result = trainer.evaluate()
print(f'Vision-efficient-tuning-lora train output: {result}.')
results_files = os.listdir(self.tmp_dir)
self.assertIn(f'{trainer.timestamp}.log.json', results_files)
for i in range(self.max_epochs):
self.assertIn(f'epoch_{i+1}.pth', results_files)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_vision_efficient_tuning_swift_adapter_train(self):
model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-adapter'
def cfg_modify_fn(cfg):
cfg.model.head.num_classes = self.num_classes
cfg.model.finetune = True
cfg.train.max_epochs = self.max_epochs
cfg.train.lr_scheduler.T_max = self.max_epochs
cfg.model.backbone.adapter_length = 0
return cfg
adapter_config = AdapterConfig(
dim=768,
hidden_pos=0,
module_name=r'.*blocks\.\d+\.mlp$',
adapter_length=self.tune_length,
only_adapter_trainable=False)
kwargs = dict(
model=model_id,
work_dir=self.tmp_dir,
train_dataset=self.train_dataset,
eval_dataset=self.eval_dataset,
cfg_modify_fn=cfg_modify_fn,
efficient_tuners=[adapter_config])
trainer = build_trainer(
name=Trainers.vision_efficient_tuning, default_args=kwargs)
trainer.train()
result = trainer.evaluate()
print(f'Vision-efficient-tuning-adapter train output: {result}.')
results_files = os.listdir(self.tmp_dir)
self.assertIn(f'{trainer.timestamp}.log.json', results_files)
for i in range(self.max_epochs):
self.assertIn(f'epoch_{i+1}.pth', results_files)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_vision_efficient_tuning_swift_prompt_train(self):
model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-prompt'
def cfg_modify_fn(cfg):
cfg.model.head.num_classes = self.num_classes
cfg.model.finetune = True
cfg.train.max_epochs = self.max_epochs
cfg.train.lr_scheduler.T_max = self.max_epochs
cfg.model.backbone.prompt_length = 0
return cfg
prompt_config = PromptConfig(
dim=768,
module_layer_name=r'.*blocks\.\d+$',
embedding_pos=0,
prompt_length=self.tune_length,
only_prompt_trainable=False,
attach_front=False)
kwargs = dict(
model=model_id,
work_dir=self.tmp_dir,
train_dataset=self.train_dataset,
eval_dataset=self.eval_dataset,
cfg_modify_fn=cfg_modify_fn,
efficient_tuners=[prompt_config])
trainer = build_trainer(
name=Trainers.vision_efficient_tuning, default_args=kwargs)
trainer.train()
result = trainer.evaluate()
print(f'Vision-efficient-tuning-prompt train output: {result}.')
results_files = os.listdir(self.tmp_dir)
self.assertIn(f'{trainer.timestamp}.log.json', results_files)
for i in range(self.max_epochs):
self.assertIn(f'epoch_{i+1}.pth', results_files)
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,81 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import shutil
import tempfile
import unittest
import numpy as np
import torch
from modelscope import read_config
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models.base import Model
from modelscope.msdatasets import MsDataset
from modelscope.pipelines import pipeline
from modelscope.swift import Swift
from modelscope.swift.adapter import AdapterConfig
from modelscope.trainers import build_trainer
from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.test_utils import test_level
class TestAdapter(unittest.TestCase):
def setUp(self):
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
self.tmp_dir = tempfile.TemporaryDirectory().name
if not os.path.exists(self.tmp_dir):
os.makedirs(self.tmp_dir)
def tearDown(self):
shutil.rmtree(self.tmp_dir)
super().tearDown()
@unittest.skipUnless(test_level() >= 0, 'skip in this level')
def test_adapter_smoke_test(self):
dataset = MsDataset.load(
'clue', subset_name='afqmc',
split='train').to_hf_dataset().select(range(2))
model_dir = snapshot_download(
'damo/nlp_structbert_sentence-similarity_chinese-tiny')
model = Model.from_pretrained(model_dir, adv_grad_factor=None)
cfg_file = os.path.join(model_dir, 'configuration.json')
model_cfg = os.path.join(model_dir, 'config.json')
model_cfg = read_config(model_cfg)
adapter_config = AdapterConfig(
dim=model_cfg.hidden_size,
module_name=r'.*layer\.\d+$',
method_name='feed_forward_chunk',
hidden_pos=0)
model = Swift.prepare_model(model, adapter_config)
kwargs = dict(
model=model,
cfg_file=cfg_file,
train_dataset=dataset,
eval_dataset=dataset,
work_dir=self.tmp_dir)
trainer = build_trainer(default_args=kwargs)
trainer.train()
output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR)
def pipeline_sentence_similarity(model_dir):
model = Model.from_pretrained(model_dir)
adapter_config.pretrained_weights = output_dir
Swift.prepare_model(model, adapter_config)
model.eval()
pipeline_ins = pipeline(
task=Tasks.sentence_similarity, model=model)
return pipeline_ins(input=('test', 'this is a test'))
output1 = pipeline_sentence_similarity(
'damo/nlp_structbert_sentence-similarity_chinese-tiny')
print(output1)
if __name__ == '__main__':
unittest.main()

View File

@@ -11,9 +11,10 @@ from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models.base import Model
from modelscope.msdatasets import MsDataset
from modelscope.pipelines import pipeline
from modelscope.swift import Swift
from modelscope.swift.lora import (Linear, LoRA, LoRAConfig,
mark_only_lora_as_trainable)
from modelscope.trainers import build_trainer
from modelscope.tuners.lora import (Linear, LoRATuner,
mark_only_lora_as_trainable)
from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.test_utils import test_level
@@ -66,22 +67,18 @@ class TestLora(unittest.TestCase):
model_dir = snapshot_download(
'damo/nlp_structbert_sentence-similarity_chinese-tiny')
model = Model.from_pretrained(
'damo/nlp_structbert_sentence-similarity_chinese-tiny',
adv_grad_factor=None)
model = Model.from_pretrained(model_dir, adv_grad_factor=None)
cfg_file = os.path.join(model_dir, 'configuration.json')
lora_config = LoRAConfig(replace_modules=['query', 'key', 'value'])
model = Swift.prepare_model(model, lora_config)
kwargs = dict(
model=model,
cfg_file=cfg_file,
train_dataset=dataset,
eval_dataset=dataset,
work_dir=self.tmp_dir,
efficient_tuners=[{
'type': 'lora',
'replace_modules': ['query', 'key', 'value']
}])
work_dir=self.tmp_dir)
trainer = build_trainer(default_args=kwargs)
trainer.train()
@@ -89,7 +86,8 @@ class TestLora(unittest.TestCase):
def pipeline_sentence_similarity(model_dir):
model = Model.from_pretrained(model_dir)
LoRATuner.tune(model, replace_modules=['query', 'key', 'value'])
lora_config.pretrained_weights = output_dir
Swift.prepare_model(model, lora_config)
model.load_state_dict(
torch.load(os.path.join(output_dir, 'pytorch_model.bin')))
model.eval()
@@ -100,7 +98,7 @@ class TestLora(unittest.TestCase):
output1 = pipeline_sentence_similarity(
'damo/nlp_structbert_sentence-similarity_chinese-tiny')
LoRATuner.unpatch_lora(model, ['query', 'key', 'value'])
LoRA.unpatch_lora(model, lora_config)
model.save_pretrained(
output_dir, save_checkpoint_names='pytorch_model.bin')

View File

@@ -0,0 +1,83 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import shutil
import tempfile
import unittest
import numpy as np
import torch
from modelscope import read_config
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models.base import Model
from modelscope.msdatasets import MsDataset
from modelscope.pipelines import pipeline
from modelscope.swift import Swift
from modelscope.swift.adapter import AdapterConfig
from modelscope.swift.prompt import PromptConfig
from modelscope.trainers import build_trainer
from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.test_utils import test_level
class TestPrompt(unittest.TestCase):
def setUp(self):
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
self.tmp_dir = tempfile.TemporaryDirectory().name
if not os.path.exists(self.tmp_dir):
os.makedirs(self.tmp_dir)
def tearDown(self):
shutil.rmtree(self.tmp_dir)
super().tearDown()
@unittest.skipUnless(test_level() >= 0, 'skip in this level')
def test_prompt_smoke_test(self):
dataset = MsDataset.load(
'clue', subset_name='afqmc',
split='train').to_hf_dataset().select(range(2))
model_dir = snapshot_download(
'damo/nlp_structbert_sentence-similarity_chinese-tiny')
model = Model.from_pretrained(model_dir, adv_grad_factor=None)
cfg_file = os.path.join(model_dir, 'configuration.json')
model_cfg = os.path.join(model_dir, 'config.json')
model_cfg = read_config(model_cfg)
prompt_config = PromptConfig(
dim=model_cfg.hidden_size,
module_layer_name=r'.*layer\.\d+$',
embedding_pos=0,
attention_mask_pos=1)
model = Swift.prepare_model(model, prompt_config)
kwargs = dict(
model=model,
cfg_file=cfg_file,
train_dataset=dataset,
eval_dataset=dataset,
work_dir=self.tmp_dir)
trainer = build_trainer(default_args=kwargs)
trainer.train()
output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR)
def pipeline_sentence_similarity(model_dir):
model = Model.from_pretrained(model_dir)
prompt_config.pretrained_weights = output_dir
Swift.prepare_model(model, prompt_config)
model.eval()
pipeline_ins = pipeline(
task=Tasks.sentence_similarity, model=model)
return pipeline_ins(input=('test', 'this is a test'))
output1 = pipeline_sentence_similarity(
'damo/nlp_structbert_sentence-similarity_chinese-tiny')
print(output1)
if __name__ == '__main__':
unittest.main()

View File

@@ -35,7 +35,7 @@ class AstScaningTest(unittest.TestCase):
def test_ast_scaning_class(self):
astScaner = AstScanning()
pipeline_file = os.path.join(MODELSCOPE_PATH, 'pipelines', 'nlp',
'text_generation_pipeline.py')
'fill_mask_pipeline.py')
output = astScaner.generate_ast(pipeline_file)
self.assertTrue(output['imports'] is not None)
self.assertTrue(output['from_imports'] is not None)
@@ -45,24 +45,19 @@ class AstScaningTest(unittest.TestCase):
self.assertIsInstance(imports, dict)
self.assertIsInstance(from_imports, dict)
self.assertIsInstance(decorators, list)
self.assertListEqual(
list(set(imports.keys()) - set(['torch', 'os'])), [])
self.assertEqual(len(from_imports.keys()), 11)
self.assertListEqual(list(set(imports.keys()) - set(['numpy'])), [])
self.assertEqual(len(from_imports.keys()), 8)
self.assertTrue(from_imports['modelscope.metainfo'] is not None)
self.assertEqual(from_imports['modelscope.metainfo'], ['Pipelines'])
self.assertEqual(
decorators,
[('PIPELINES', 'text-generation', 'text-generation'),
('PIPELINES', 'text2text-generation', 'translation_en_to_de'),
('PIPELINES', 'text2text-generation', 'translation_en_to_ro'),
('PIPELINES', 'text2text-generation', 'translation_en_to_fr'),
('PIPELINES', 'text2text-generation', 'text2text-generation')])
self.assertEqual(decorators,
[('PIPELINES', 'fill-mask', 'fill-mask'),
('PIPELINES', 'fill-mask', 'fill-mask-ponet')])
def test_files_scaning_method(self):
fileScaner = FilesAstScanning()
# case of pass in files directly
pipeline_file = os.path.join(MODELSCOPE_PATH, 'pipelines', 'nlp',
'text_generation_pipeline.py')
'fill_mask_pipeline.py')
file_list = [pipeline_file]
output = fileScaner.get_files_scan_results(file_list)
self.assertTrue(output[INDEX_KEY] is not None)