mirror of
https://github.com/modelscope/modelscope.git
synced 2026-02-24 04:01:10 +01:00
Add Lora/Adapter/Prompt and support for chatglm6B and chatglm2-6B
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/12770413 * add prompt and lora * add adapter * add prefix * add tests * adapter smoke test passed * prompt test passed * support model id in petl * migrate chatglm6b * add train script for chatglm6b * move gen_kwargs to finetune.py * add chatglm2 * add model definination
This commit is contained in:
Submodule data/test updated: acc59489d3...c117008caa
118
examples/pytorch/chatglm6b/chatglm_trainer.py
Normal file
118
examples/pytorch/chatglm6b/chatglm_trainer.py
Normal file
@@ -0,0 +1,118 @@
|
||||
from typing import Any, Dict, Union
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers.deepspeed import is_deepspeed_zero3_enabled
|
||||
|
||||
from modelscope import EpochBasedTrainer, get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class Seq2SeqTrainer(EpochBasedTrainer):
|
||||
|
||||
def _decode(self, tokens, ignore_pad_token_for_loss=False):
|
||||
tokens = tokens.cpu().numpy()
|
||||
if ignore_pad_token_for_loss:
|
||||
tokens = np.where(tokens != -100, tokens,
|
||||
self.tokenizer.pad_token_id)
|
||||
return [
|
||||
t for t in self.tokenizer.batch_decode(
|
||||
tokens, skip_special_tokens=True) if t != '</s>'
|
||||
]
|
||||
|
||||
def evaluation_step(
|
||||
self,
|
||||
inputs: Dict[str, Union[torch.Tensor, Any]],
|
||||
):
|
||||
has_labels = 'labels' in inputs
|
||||
# XXX: adapt synced_gpus for fairscale as well
|
||||
gen_kwargs = self.cfg['gen_kwargs']
|
||||
if gen_kwargs.get('max_length') is None and gen_kwargs.get(
|
||||
'max_new_tokens') is None:
|
||||
gen_kwargs['max_length'] = self.model.config.max_length
|
||||
gen_kwargs['num_beams'] = (
|
||||
gen_kwargs['num_beams'] if gen_kwargs.get('num_beams') is not None
|
||||
else self.model.config.num_beams)
|
||||
default_synced_gpus = True if is_deepspeed_zero3_enabled() else False
|
||||
gen_kwargs['synced_gpus'] = (
|
||||
gen_kwargs['synced_gpus'] if gen_kwargs.get('synced_gpus')
|
||||
is not None else default_synced_gpus)
|
||||
|
||||
if 'attention_mask' in inputs:
|
||||
gen_kwargs['attention_mask'] = inputs.get('attention_mask', None)
|
||||
if 'position_ids' in inputs:
|
||||
gen_kwargs['position_ids'] = inputs.get('position_ids', None)
|
||||
if 'global_attention_mask' in inputs:
|
||||
gen_kwargs['global_attention_mask'] = inputs.get(
|
||||
'global_attention_mask', None)
|
||||
|
||||
# prepare generation inputs
|
||||
# some encoder-decoder models can have varying encoder's and thus
|
||||
# varying model input names
|
||||
if hasattr(
|
||||
self.model, 'encoder'
|
||||
) and self.model.encoder.main_input_name != self.model.main_input_name:
|
||||
generation_inputs = inputs[self.model.encoder.main_input_name]
|
||||
else:
|
||||
generation_inputs = inputs[self.model.main_input_name]
|
||||
|
||||
gen_kwargs['input_ids'] = generation_inputs
|
||||
gen_kwargs['pad_token_id'] = self.tokenizer.pad_token_id
|
||||
generated_tokens = self.model.generate(**gen_kwargs)
|
||||
generated_tokens = generated_tokens[:, generation_inputs.size()[-1]:]
|
||||
|
||||
# in case the batch is shorter than max length, the output should be padded
|
||||
if gen_kwargs.get('max_length') is not None and generated_tokens.shape[
|
||||
-1] < gen_kwargs['max_length']:
|
||||
generated_tokens = self._pad_tensors_to_max_len(
|
||||
generated_tokens, gen_kwargs['max_length'])
|
||||
elif gen_kwargs.get('max_new_tokens'
|
||||
) is not None and generated_tokens.shape[-1] < (
|
||||
gen_kwargs['max_new_tokens'] + 1):
|
||||
generated_tokens = self._pad_tensors_to_max_len(
|
||||
generated_tokens, gen_kwargs['max_new_tokens'] + 1)
|
||||
|
||||
if has_labels:
|
||||
labels = inputs['labels']
|
||||
if gen_kwargs.get('max_length') is not None and labels.shape[
|
||||
-1] < gen_kwargs['max_length']:
|
||||
labels = self._pad_tensors_to_max_len(labels,
|
||||
gen_kwargs['max_length'])
|
||||
elif gen_kwargs.get(
|
||||
'max_new_tokens') is not None and labels.shape[-1] < (
|
||||
gen_kwargs['max_new_tokens'] + 1):
|
||||
labels = self._pad_tensors_to_max_len(
|
||||
labels, (gen_kwargs['max_new_tokens'] + 1))
|
||||
else:
|
||||
labels = None
|
||||
|
||||
generated_tokens = [
|
||||
''.join(self._decode(seq, False)) for seq in generated_tokens
|
||||
]
|
||||
inputs['tgts'] = [''.join(self._decode(seq, True)) for seq in labels]
|
||||
return {
|
||||
'preds': generated_tokens,
|
||||
}
|
||||
|
||||
def _pad_tensors_to_max_len(self, tensor, max_length):
|
||||
if self.tokenizer is not None and hasattr(self.tokenizer,
|
||||
'pad_token_id'):
|
||||
# If PAD token is not defined at least EOS token has to be defined
|
||||
pad_token_id = (
|
||||
self.tokenizer.pad_token_id if self.tokenizer.pad_token_id
|
||||
is not None else self.tokenizer.eos_token_id)
|
||||
else:
|
||||
if self.model.config.pad_token_id is not None:
|
||||
pad_token_id = self.model.config.pad_token_id
|
||||
else:
|
||||
raise ValueError(
|
||||
'Pad_token_id must be set in the configuration of the model, in order to pad tensors'
|
||||
)
|
||||
|
||||
padded_tensor = pad_token_id * torch.ones(
|
||||
(tensor.shape[0], max_length),
|
||||
dtype=tensor.dtype,
|
||||
device=tensor.device)
|
||||
padded_tensor[:, :tensor.shape[-1]] = tensor
|
||||
return padded_tensor
|
||||
380
examples/pytorch/chatglm6b/finetune.py
Normal file
380
examples/pytorch/chatglm6b/finetune.py
Normal file
@@ -0,0 +1,380 @@
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from chatglm_trainer import Seq2SeqTrainer
|
||||
from text_generation_metric import TextGenerationMetric
|
||||
from transformers import DataCollatorForSeq2Seq
|
||||
|
||||
from modelscope import snapshot_download
|
||||
from modelscope.metainfo import Models
|
||||
from modelscope.models import Model
|
||||
from modelscope.msdatasets import MsDataset
|
||||
from modelscope.swift import Swift
|
||||
from modelscope.swift.lora import LoRAConfig
|
||||
from modelscope.trainers.training_args import TrainingArgs
|
||||
from modelscope.utils.config import ConfigDict
|
||||
from modelscope.utils.hub import read_config
|
||||
|
||||
|
||||
@dataclass(init=False)
|
||||
class Chatglm6bArguments(TrainingArgs):
|
||||
ptuning_checkpoint: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
'help': 'The p-tuning checkpoint previously trained.',
|
||||
})
|
||||
|
||||
pre_seq_len: int = field(
|
||||
default=None, metadata={
|
||||
'help': 'The p-tuning sequence length',
|
||||
})
|
||||
|
||||
prefix_projection: bool = field(
|
||||
default=False, metadata={
|
||||
'help': '',
|
||||
})
|
||||
|
||||
quantization_bit: int = field(
|
||||
default=None, metadata={
|
||||
'help': 'Quantized bit',
|
||||
})
|
||||
|
||||
prompt_column: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
'help':
|
||||
'The name of the column in the datasets containing the full texts (for summarization).'
|
||||
},
|
||||
)
|
||||
|
||||
response_column: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
'help':
|
||||
'The name of the column in the datasets containing the summaries (for summarization).'
|
||||
},
|
||||
)
|
||||
|
||||
history_column: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
'help':
|
||||
'The name of the column in the datasets containing the history of chat.'
|
||||
},
|
||||
)
|
||||
|
||||
source_prefix: str = field(
|
||||
default='',
|
||||
metadata={
|
||||
'help':
|
||||
'A prefix to add before every source text (useful for T5 models).'
|
||||
})
|
||||
|
||||
ignore_pad_token_for_loss: bool = field(
|
||||
default=True,
|
||||
metadata={
|
||||
'help':
|
||||
'Whether to ignore the tokens corresponding to padded labels in the loss computation or not.'
|
||||
},
|
||||
)
|
||||
|
||||
max_source_length: int = field(
|
||||
default=1024,
|
||||
metadata={
|
||||
'help':
|
||||
('The maximum total input sequence length after tokenization. Sequences longer '
|
||||
'than this will be truncated, sequences shorter will be padded.')
|
||||
},
|
||||
)
|
||||
|
||||
max_target_length: int = field(
|
||||
default=128,
|
||||
metadata={
|
||||
'help':
|
||||
('The maximum total sequence length for target text after tokenization. Sequences longer '
|
||||
'than this will be truncated, sequences shorter will be padded.')
|
||||
},
|
||||
)
|
||||
|
||||
max_train_samples: int = field(
|
||||
default=None,
|
||||
metadata={
|
||||
'help':
|
||||
('For debugging purposes or quicker training, truncate the number of training examples to this '
|
||||
'value if set.')
|
||||
},
|
||||
)
|
||||
|
||||
max_eval_samples: int = field(
|
||||
default=None,
|
||||
metadata={
|
||||
'help':
|
||||
('For debugging purposes or quicker training, truncate the number of evaluation examples to this '
|
||||
'value if set.')
|
||||
},
|
||||
)
|
||||
|
||||
preprocessing_num_workers: int = field(
|
||||
default=None,
|
||||
metadata={
|
||||
'help': 'The number of processes to use for the preprocessing.'
|
||||
},
|
||||
)
|
||||
|
||||
use_lora: int = field(
|
||||
default=0,
|
||||
metadata={'help': 'Whether to use lora to train the model.'},
|
||||
)
|
||||
|
||||
lora_rank: int = field(
|
||||
default=32,
|
||||
metadata={'help': 'The lora rank'},
|
||||
)
|
||||
|
||||
lora_alpha: int = field(
|
||||
default=32,
|
||||
metadata={'help': 'The lora alpha'},
|
||||
)
|
||||
|
||||
lora_dropout: float = field(
|
||||
default=0.05,
|
||||
metadata={'help': 'The lora alpha'},
|
||||
)
|
||||
|
||||
|
||||
args = Chatglm6bArguments(eval_metrics='chatglm').parse_cli()
|
||||
print(args)
|
||||
config, _ = args.to_config(ignore_default_config=args.use_model_config)
|
||||
config.dump('./configuration.json')
|
||||
|
||||
if config['model']['type'] == 'chatglm6b':
|
||||
from modelscope.models.nlp import ChatGLMTokenizer
|
||||
else:
|
||||
from modelscope.models.nlp import ChatGLM2Tokenizer as ChatGLMTokenizer
|
||||
|
||||
|
||||
def cfg_modify_fn(cfg):
|
||||
if args.use_model_config:
|
||||
cfg.merge_from_dict(config)
|
||||
else:
|
||||
cfg = config
|
||||
if cfg.train.lr_scheduler.type == 'LinearLR':
|
||||
cfg.train.lr_scheduler['total_iters'] = \
|
||||
int(len(train_dataset) / cfg.train.dataloader.batch_size_per_gpu) * cfg.train.max_epochs
|
||||
cfg['gen_kwargs'] = {
|
||||
'do_sample': True,
|
||||
'top_p': 0.7,
|
||||
'max_length': 512,
|
||||
'temperature': 0.95
|
||||
}
|
||||
return cfg
|
||||
|
||||
|
||||
train_dataset = MsDataset.load(
|
||||
args.train_dataset_name,
|
||||
subset_name=args.train_subset_name,
|
||||
split=args.train_split)
|
||||
validation_dataset = MsDataset.load(
|
||||
args.val_dataset_name,
|
||||
subset_name=args.val_subset_name,
|
||||
split=args.val_split)
|
||||
|
||||
model_dir = snapshot_download(args.model)
|
||||
model_config = read_config(model_dir)
|
||||
model_config['model'] = ConfigDict({
|
||||
'type': config['model']['type'],
|
||||
})
|
||||
|
||||
if config['model']['type'] == 'chatglm6b':
|
||||
model_config['model']['pre_seq_len'] = args.pre_seq_len
|
||||
model_config['model']['prefix_projection'] = args.prefix_projection
|
||||
|
||||
tokenizer = ChatGLMTokenizer.from_pretrained(model_dir, trust_remote_code=True)
|
||||
model = Model.from_pretrained(model_dir, cfg_dict=model_config)
|
||||
|
||||
if args.ptuning_checkpoint is not None:
|
||||
# Evaluation
|
||||
# Loading extra state dict of prefix encoder
|
||||
|
||||
prefix_state_dict = torch.load(
|
||||
os.path.join(args.ptuning_checkpoint, 'pytorch_model.bin'))
|
||||
new_prefix_state_dict = {}
|
||||
for k, v in prefix_state_dict.items():
|
||||
if k.startswith('transformer.prefix_encoder.'):
|
||||
new_prefix_state_dict[k[len('transformer.prefix_encoder.'):]] = v
|
||||
model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
|
||||
|
||||
if args.quantization_bit is not None:
|
||||
print(f'Quantized to {args.quantization_bit} bit')
|
||||
model = model.quantize(args.quantization_bit)
|
||||
if args.pre_seq_len is not None:
|
||||
# P-tuning v2
|
||||
model = model.half()
|
||||
model.transformer.prefix_encoder.float()
|
||||
else:
|
||||
# Finetune
|
||||
model = model.float()
|
||||
|
||||
if args.use_lora != 0:
|
||||
lora_config = LoRAConfig(
|
||||
replace_modules=['attention.query_key_value'],
|
||||
rank=args.lora_rank,
|
||||
lora_alpha=args.lora_alpha,
|
||||
lora_dropout=args.lora_dropout)
|
||||
model = model.bfloat16()
|
||||
Swift.prepare_model(model, lora_config)
|
||||
|
||||
prefix = args.source_prefix if args.source_prefix is not None else ''
|
||||
|
||||
# Get the column names for input/target.
|
||||
prompt_column = args.prompt_column
|
||||
response_column = args.response_column
|
||||
history_column = args.history_column
|
||||
|
||||
# Temporarily set max_target_length for training.
|
||||
max_target_length = args.max_target_length
|
||||
|
||||
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
|
||||
trainable_params = sum([np.prod(p.size()) for p in model_parameters])
|
||||
|
||||
model_parameters = filter(lambda p: not p.requires_grad, model.parameters())
|
||||
non_trainable_params = sum([np.prod(p.size()) for p in model_parameters])
|
||||
|
||||
print('trainable_params:{} ({:.2f}%), non_trainable_params:{}'.format(
|
||||
trainable_params, trainable_params / non_trainable_params * 100,
|
||||
non_trainable_params))
|
||||
|
||||
|
||||
def preprocess_function_eval(examples):
|
||||
inputs, targets = [], []
|
||||
for i in range(len(examples[prompt_column])):
|
||||
if examples[prompt_column][i] and examples[response_column][i]:
|
||||
query = examples[prompt_column][i]
|
||||
if history_column is None or len(examples[history_column][i]) == 0:
|
||||
prompt = query
|
||||
else:
|
||||
prompt = ''
|
||||
history = examples[history_column][i]
|
||||
for turn_idx, (old_query, response) in enumerate(history):
|
||||
prompt += '[Round {}]\n问:{}\n答:{}\n'.format(
|
||||
turn_idx, old_query, response)
|
||||
prompt += '[Round {}]\n问:{}\n答:'.format(len(history), query)
|
||||
inputs.append(prompt)
|
||||
targets.append(examples[response_column][i])
|
||||
|
||||
inputs = [prefix + inp for inp in inputs]
|
||||
model_inputs = tokenizer(
|
||||
inputs,
|
||||
max_length=args.max_source_length,
|
||||
truncation=True,
|
||||
padding=True)
|
||||
labels = tokenizer(
|
||||
text_target=targets, max_length=max_target_length, truncation=True)
|
||||
|
||||
if args.ignore_pad_token_for_loss:
|
||||
labels['input_ids'] = [[(lb if lb != tokenizer.pad_token_id else -100)
|
||||
for lb in label]
|
||||
for label in labels['input_ids']]
|
||||
model_inputs['labels'] = labels['input_ids']
|
||||
|
||||
return model_inputs
|
||||
|
||||
|
||||
def preprocess_function_train(examples):
|
||||
max_seq_length = args.max_source_length + args.max_target_length
|
||||
|
||||
model_inputs = {
|
||||
'input_ids': [],
|
||||
'labels': [],
|
||||
}
|
||||
for i in range(len(examples[prompt_column])):
|
||||
if examples[prompt_column][i] and examples[response_column][i]:
|
||||
query, answer = examples[prompt_column][i], examples[
|
||||
response_column][i]
|
||||
|
||||
if history_column is None:
|
||||
prompt = query
|
||||
else:
|
||||
prompt = ''
|
||||
history = examples[history_column][i]
|
||||
for turn_idx, (old_query, response) in enumerate(history):
|
||||
prompt += '[Round {}]\n问:{}\n答:{}\n'.format(
|
||||
turn_idx, old_query, response)
|
||||
prompt += '[Round {}]\n问:{}\n答:'.format(len(history), query)
|
||||
|
||||
prompt = prefix + prompt
|
||||
a_ids = tokenizer.encode(text=prompt, add_special_tokens=False)
|
||||
b_ids = tokenizer.encode(text=answer, add_special_tokens=False)
|
||||
|
||||
if len(a_ids) > args.max_source_length - 1:
|
||||
a_ids = a_ids[:args.max_source_length - 1]
|
||||
|
||||
if len(b_ids) > args.max_target_length - 2:
|
||||
b_ids = b_ids[:args.max_target_length - 2]
|
||||
|
||||
input_ids = tokenizer.build_inputs_with_special_tokens(
|
||||
a_ids, b_ids)
|
||||
|
||||
if config['model']['type'] == 'chatglm6b':
|
||||
context_length = input_ids.index(tokenizer.bos_token_id)
|
||||
else:
|
||||
context_length = len(a_ids) + 2
|
||||
mask_position = context_length - 1
|
||||
labels = [-100] * context_length + input_ids[mask_position + 1:]
|
||||
|
||||
pad_len = max_seq_length - len(input_ids)
|
||||
input_ids = input_ids + [tokenizer.pad_token_id] * pad_len
|
||||
if config['model']['type'] == 'chatglm6b':
|
||||
labels = labels + [tokenizer.pad_token_id] * pad_len
|
||||
if args.ignore_pad_token_for_loss:
|
||||
labels = [(lb if lb != tokenizer.pad_token_id else -100)
|
||||
for lb in labels]
|
||||
else:
|
||||
labels = labels + [-100] * pad_len
|
||||
|
||||
model_inputs['input_ids'].append(input_ids)
|
||||
model_inputs['labels'].append(labels)
|
||||
|
||||
return model_inputs
|
||||
|
||||
|
||||
train_dataset = train_dataset.to_hf_dataset().map(
|
||||
preprocess_function_train,
|
||||
batched=True,
|
||||
num_proc=args.preprocessing_num_workers,
|
||||
desc='Running tokenizer on train dataset',
|
||||
)
|
||||
|
||||
validation_dataset = validation_dataset.to_hf_dataset().map(
|
||||
preprocess_function_eval,
|
||||
batched=True,
|
||||
num_proc=args.preprocessing_num_workers,
|
||||
desc='Running tokenizer on eval dataset',
|
||||
)
|
||||
|
||||
# Data collator
|
||||
label_pad_token_id = -100 if args.ignore_pad_token_for_loss else tokenizer.pad_token_id
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
tokenizer,
|
||||
model=model,
|
||||
label_pad_token_id=label_pad_token_id,
|
||||
pad_to_multiple_of=None,
|
||||
padding=False)
|
||||
|
||||
model.gradient_checkpointing_enable()
|
||||
if config['model']['type'] == 'chatglm6b':
|
||||
model.enable_input_require_grads()
|
||||
|
||||
trainer = Seq2SeqTrainer(
|
||||
model=model,
|
||||
cfg_file='./configuration.json',
|
||||
train_dataset=train_dataset,
|
||||
eval_dataset=validation_dataset,
|
||||
seed=args.seed,
|
||||
data_collator=data_collator,
|
||||
remove_unused_data=True,
|
||||
cfg_modify_fn=cfg_modify_fn)
|
||||
trainer.tokenizer = tokenizer
|
||||
trainer.train()
|
||||
31
examples/pytorch/chatglm6b/lora_inference.py
Normal file
31
examples/pytorch/chatglm6b/lora_inference.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from modelscope import Model, pipeline, read_config
|
||||
from modelscope.metainfo import Models
|
||||
from modelscope.swift import Swift
|
||||
from modelscope.swift.lora import LoRAConfig
|
||||
from modelscope.utils.config import ConfigDict
|
||||
|
||||
lora_config = LoRAConfig(
|
||||
replace_modules=['attention.query_key_value'],
|
||||
rank=32,
|
||||
lora_alpha=32,
|
||||
lora_dropout=0.05,
|
||||
pretrained_weights='./lora_dureader_target/iter_600.pth')
|
||||
|
||||
model_dir = 'ZhipuAI/chatglm2-6b'
|
||||
model_config = read_config(model_dir)
|
||||
model_config['model'] = ConfigDict({
|
||||
'type': Models.chatglm2_6b,
|
||||
})
|
||||
|
||||
model = Model.from_pretrained(model_dir, cfg_dict=model_config)
|
||||
model = model.bfloat16()
|
||||
Swift.prepare_model(model, lora_config)
|
||||
|
||||
pipe = pipeline('chat', model, pipeline_name='chatglm2_6b-text-generation')
|
||||
|
||||
print(
|
||||
pipe({
|
||||
'text':
|
||||
'纵使进入21世纪后,我国教育水平有了明显进步,高考的难度却依旧不容小觑,高考被中国学生和家长定义为改变命运、改写人生脑重要考试,为了这场考试,学生和家长都付出了很多。',
|
||||
'history': []
|
||||
}))
|
||||
34
examples/pytorch/chatglm6b/ptuning_inference.py
Normal file
34
examples/pytorch/chatglm6b/ptuning_inference.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import torch
|
||||
|
||||
from modelscope import Model, pipeline, read_config
|
||||
from modelscope.metainfo import Models
|
||||
from modelscope.utils.config import ConfigDict
|
||||
|
||||
model_dir = 'ZhipuAI/ChatGLM-6B'
|
||||
model_config = read_config(model_dir)
|
||||
model_config['model'] = ConfigDict({
|
||||
'type': Models.chatglm_6b,
|
||||
'pre_seq_len': 128,
|
||||
'prefix_projection': False,
|
||||
})
|
||||
|
||||
model = Model.from_pretrained(model_dir, cfg_dict=model_config)
|
||||
model = model.half()
|
||||
model.transformer.prefix_encoder.float()
|
||||
prefix_state_dict = torch.load('./ptuning_dureader_target/iter_900.pth')
|
||||
new_prefix_state_dict = {}
|
||||
for k, v in prefix_state_dict.items():
|
||||
if k.startswith('transformer.prefix_encoder.'):
|
||||
new_prefix_state_dict[k[len('transformer.prefix_encoder.'):]] = v
|
||||
model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
|
||||
|
||||
pipe = pipeline('chat', model)
|
||||
|
||||
print(
|
||||
pipe({
|
||||
'text':
|
||||
'维生素C也叫抗坏血酸,所以它最重要的一个作用是预防坏血病。另外,维生素C在控制感染和愈合伤口方面发挥作用,是一种强大的抗氧化剂,'
|
||||
'可以中和有害的自由基。维生素C还是合成胶原蛋白的重要营养成分,胶原蛋白是结缔组织中的一种纤维蛋白,它存在于身体的各个系统中:'
|
||||
'神经系统、免疫系统、骨骼系统、软骨系统、血液系统和其他系统。维生素C有助于产生作用于大脑和神经的多种激素和化学信使。',
|
||||
'history': []
|
||||
}))
|
||||
@@ -0,0 +1,28 @@
|
||||
LR=5e-5
|
||||
|
||||
PYTHONPATH=. python examples/pytorch/chatglm6b/finetune.py \
|
||||
--train_dataset_name modelscope/DuReader_robust-QG \
|
||||
--val_dataset_name modelscope/DuReader_robust-QG \
|
||||
--train_subset_name default \
|
||||
--val_subset_name default \
|
||||
--train_split train \
|
||||
--val_split validation \
|
||||
--prompt_column text1 \
|
||||
--response_column text2 \
|
||||
--model "ZhipuAI/chatglm2-6b" \
|
||||
--max_source_length 64 \
|
||||
--max_target_length 64 \
|
||||
--per_device_train_batch_size 16 \
|
||||
--per_device_eval_batch_size 1 \
|
||||
--train.optimizer.options.cumulative_iters 1 \
|
||||
--max_epochs 2 \
|
||||
--save_strategy 'by_step' \
|
||||
--save_interval 300 \
|
||||
--lr $LR \
|
||||
--eval_strategy "by_step" \
|
||||
--eval_interval 300 \
|
||||
--lr_strategy 'by_step' \
|
||||
--task 'chat' \
|
||||
--model.type 'chatglm2-6b' \
|
||||
--use_lora 1 \
|
||||
--work_dir lora_dureader_target \
|
||||
24
examples/pytorch/chatglm6b/run_train_lora_adv.sh
Normal file
24
examples/pytorch/chatglm6b/run_train_lora_adv.sh
Normal file
@@ -0,0 +1,24 @@
|
||||
LR=5e-5
|
||||
|
||||
PYTHONPATH=. python examples/pytorch/chatglm6b/finetune.py \
|
||||
--train_dataset_name AdvertiseGen/train.json \
|
||||
--val_dataset_name AdvertiseGen/dev.json \
|
||||
--prompt_column content \
|
||||
--response_column summary \
|
||||
--model "ZhipuAI/ChatGLM-6B" \
|
||||
--max_source_length 64 \
|
||||
--max_target_length 64 \
|
||||
--per_device_train_batch_size 16 \
|
||||
--per_device_eval_batch_size 1 \
|
||||
--train.optimizer.options.cumulative_iters 1 \
|
||||
--max_epochs 1 \
|
||||
--save_strategy 'by_step' \
|
||||
--save_interval 1000 \
|
||||
--lr $LR \
|
||||
--eval_strategy "by_step" \
|
||||
--eval_interval 1000 \
|
||||
--lr_strategy 'by_step' \
|
||||
--task 'chat' \
|
||||
--model.type 'chatglm6b' \
|
||||
--use_lora 1 \
|
||||
--work_dir lora_adv_target \
|
||||
28
examples/pytorch/chatglm6b/run_train_lora_dureader.sh
Normal file
28
examples/pytorch/chatglm6b/run_train_lora_dureader.sh
Normal file
@@ -0,0 +1,28 @@
|
||||
LR=5e-5
|
||||
|
||||
PYTHONPATH=. python examples/pytorch/chatglm6b/finetune.py \
|
||||
--train_dataset_name modelscope/DuReader_robust-QG \
|
||||
--val_dataset_name modelscope/DuReader_robust-QG \
|
||||
--train_subset_name default \
|
||||
--val_subset_name default \
|
||||
--train_split train \
|
||||
--val_split validation \
|
||||
--prompt_column text1 \
|
||||
--response_column text2 \
|
||||
--model "ZhipuAI/ChatGLM-6B" \
|
||||
--max_source_length 64 \
|
||||
--max_target_length 64 \
|
||||
--per_device_train_batch_size 16 \
|
||||
--per_device_eval_batch_size 1 \
|
||||
--train.optimizer.options.cumulative_iters 1 \
|
||||
--max_epochs 2 \
|
||||
--save_strategy 'by_step' \
|
||||
--save_interval 300 \
|
||||
--lr $LR \
|
||||
--eval_strategy "by_step" \
|
||||
--eval_interval 300 \
|
||||
--lr_strategy 'by_step' \
|
||||
--task 'chat' \
|
||||
--model.type 'chatglm6b' \
|
||||
--use_lora 1 \
|
||||
--work_dir lora_dureader_target \
|
||||
26
examples/pytorch/chatglm6b/run_train_ptuning_adv.sh
Normal file
26
examples/pytorch/chatglm6b/run_train_ptuning_adv.sh
Normal file
@@ -0,0 +1,26 @@
|
||||
PRE_SEQ_LEN=128
|
||||
LR=2e-2
|
||||
|
||||
PYTHONPATH=. python examples/pytorch/chatglm6b/finetune.py \
|
||||
--train_dataset_name AdvertiseGen/train.json \
|
||||
--val_dataset_name AdvertiseGen/dev.json \
|
||||
--prompt_column content \
|
||||
--response_column summary \
|
||||
--model "ZhipuAI/ChatGLM-6B" \
|
||||
--max_source_length 64 \
|
||||
--max_target_length 64 \
|
||||
--per_device_train_batch_size 16 \
|
||||
--per_device_eval_batch_size 1 \
|
||||
--train.optimizer.options.cumulative_iters 1 \
|
||||
--max_epochs 1 \
|
||||
--save_strategy 'by_step' \
|
||||
--save_interval 1000 \
|
||||
--lr $LR \
|
||||
--eval_strategy "by_step" \
|
||||
--eval_interval 1000 \
|
||||
--lr_strategy 'by_step' \
|
||||
--task 'chat' \
|
||||
--model.type 'chatglm6b' \
|
||||
--pre_seq_len $PRE_SEQ_LEN \
|
||||
--quantization_bit 4 \
|
||||
--work_dir ptuning_adv_target \
|
||||
30
examples/pytorch/chatglm6b/run_train_ptuning_dureader.sh
Normal file
30
examples/pytorch/chatglm6b/run_train_ptuning_dureader.sh
Normal file
@@ -0,0 +1,30 @@
|
||||
PRE_SEQ_LEN=128
|
||||
LR=2e-2
|
||||
|
||||
PYTHONPATH=. python examples/pytorch/chatglm6b/finetune.py \
|
||||
--train_dataset_name modelscope/DuReader_robust-QG \
|
||||
--val_dataset_name modelscope/DuReader_robust-QG \
|
||||
--train_subset_name default \
|
||||
--val_subset_name default \
|
||||
--train_split train \
|
||||
--val_split validation \
|
||||
--prompt_column text1 \
|
||||
--response_column text2 \
|
||||
--model "ZhipuAI/ChatGLM-6B" \
|
||||
--max_source_length 64 \
|
||||
--max_target_length 64 \
|
||||
--per_device_train_batch_size 16 \
|
||||
--per_device_eval_batch_size 1 \
|
||||
--train.optimizer.options.cumulative_iters 1 \
|
||||
--max_epochs 3 \
|
||||
--save_strategy 'by_step' \
|
||||
--save_interval 300 \
|
||||
--lr $LR \
|
||||
--eval_strategy "by_step" \
|
||||
--eval_interval 300 \
|
||||
--lr_strategy 'by_step' \
|
||||
--task 'chat' \
|
||||
--model.type 'chatglm6b' \
|
||||
--pre_seq_len $PRE_SEQ_LEN \
|
||||
--quantization_bit 4 \
|
||||
--work_dir ptuning_dureader_target \
|
||||
85
examples/pytorch/chatglm6b/text_generation_metric.py
Normal file
85
examples/pytorch/chatglm6b/text_generation_metric.py
Normal file
@@ -0,0 +1,85 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
|
||||
from typing import Dict, Iterable, List
|
||||
|
||||
import jieba
|
||||
import numpy as np
|
||||
from nltk.translate.bleu_score import (SmoothingFunction, corpus_bleu,
|
||||
sentence_bleu)
|
||||
from rouge import Rouge
|
||||
|
||||
from modelscope.metainfo import Metrics
|
||||
from modelscope.metrics.base import Metric
|
||||
from modelscope.metrics.builder import METRICS, MetricKeys
|
||||
from modelscope.utils.chinese_utils import rebuild_chinese_str
|
||||
from modelscope.utils.registry import default_group
|
||||
|
||||
|
||||
@METRICS.register_module(group_key=default_group, module_name='chatglm')
|
||||
class TextGenerationMetric(Metric):
|
||||
|
||||
def __init__(self, target_text='tgts', pred_text='preds'):
|
||||
self.preds: List[str] = []
|
||||
self.tgts: List[str] = []
|
||||
self.rouge = Rouge()
|
||||
self.target_text = target_text
|
||||
self.pred_text = pred_text
|
||||
|
||||
def add(self, outputs: Dict[str, List[str]], inputs: Dict[str, List[str]]):
|
||||
ground_truths = inputs[self.target_text]
|
||||
eval_results = outputs[self.pred_text]
|
||||
for truth in ground_truths:
|
||||
self.tgts.append(truth)
|
||||
for result in eval_results:
|
||||
self.preds.append(result)
|
||||
|
||||
def _check(self, pred: str, tgt: str) -> bool:
|
||||
|
||||
def remove_useless(string: str) -> str:
|
||||
return string.replace(' ', '').replace('.', '')
|
||||
|
||||
return len(remove_useless(pred)) != 0 and len(remove_useless(tgt)) != 0
|
||||
|
||||
def evaluate(self):
|
||||
preds, labels = self.preds, self.tgts
|
||||
if isinstance(preds, tuple):
|
||||
preds = preds[0]
|
||||
|
||||
score_dict = {
|
||||
'rouge-1': [],
|
||||
'rouge-2': [],
|
||||
'rouge-l': [],
|
||||
'bleu-4': []
|
||||
}
|
||||
for pred, label in zip(preds, labels):
|
||||
hypothesis = list(jieba.cut(pred))
|
||||
if len(hypothesis) == 0:
|
||||
hypothesis = ['</s>']
|
||||
reference = list(jieba.cut(label))
|
||||
rouge = Rouge()
|
||||
scores = rouge.get_scores(' '.join(hypothesis),
|
||||
' '.join(reference))
|
||||
result = scores[0]
|
||||
|
||||
for k, v in result.items():
|
||||
score_dict[k].append(round(v['f'] * 100, 4))
|
||||
bleu_score = sentence_bleu(
|
||||
[list(label)],
|
||||
list(pred),
|
||||
smoothing_function=SmoothingFunction().method3)
|
||||
score_dict['bleu-4'].append(round(bleu_score * 100, 4))
|
||||
|
||||
for k, v in score_dict.items():
|
||||
score_dict[k] = float(np.mean(v))
|
||||
return score_dict
|
||||
|
||||
def merge(self, other: 'TextGenerationMetric'):
|
||||
self.preds.extend(other.preds)
|
||||
self.tgts.extend(other.tgts)
|
||||
|
||||
def __getstate__(self):
|
||||
return self.preds, self.tgts
|
||||
|
||||
def __setstate__(self, state):
|
||||
self.__init__()
|
||||
self.preds, self.tgts = state
|
||||
@@ -165,6 +165,8 @@ class Models(object):
|
||||
doc2bot = 'doc2bot'
|
||||
peer = 'peer'
|
||||
llama = 'llama'
|
||||
chatglm_6b = 'chatglm6b'
|
||||
chatglm2_6b = 'chatglm2-6b'
|
||||
|
||||
# audio models
|
||||
sambert_hifigan = 'sambert-hifigan'
|
||||
|
||||
@@ -191,7 +191,7 @@ class BlockPETL(nn.Module):
|
||||
self.prompt = None
|
||||
|
||||
def forward(self, x):
|
||||
if self.prompt is not None:
|
||||
if self.prompt is not None and self.prompt_length and self.prompt_length > 0:
|
||||
x = self.prompt(x)
|
||||
|
||||
x = x + self.drop_path1(self.ls1(self.attn(self.norm1(x))))
|
||||
|
||||
@@ -19,8 +19,8 @@ from modelscope.metainfo import Models
|
||||
from modelscope.models import TorchModel
|
||||
from modelscope.models.builder import MODELS
|
||||
from modelscope.outputs import OutputKeys
|
||||
from modelscope.tuners.control_sd_lora import ControlLoRATuner
|
||||
from modelscope.tuners.sd_lora import LoRATuner
|
||||
from modelscope.swift.control_sd_lora import ControlLoRATuner
|
||||
from modelscope.swift.sd_lora import LoRATuner
|
||||
from modelscope.utils.checkpoint import save_checkpoint, save_configuration
|
||||
from modelscope.utils.config import Config
|
||||
from modelscope.utils.constant import ModelFile, Tasks
|
||||
|
||||
@@ -22,6 +22,8 @@ if TYPE_CHECKING:
|
||||
from .csanmt import CsanmtForTranslation
|
||||
from .canmt import CanmtForTranslation
|
||||
from .deberta_v2 import DebertaV2ForMaskedLM, DebertaV2Model
|
||||
from .chatglm import ChatGLMForConditionalGeneration, ChatGLMTokenizer, ChatGLMConfig
|
||||
from .chatglm2 import ChatGLM2ForConditionalGeneration, ChatGLM2Tokenizer, ChatGLM2Config
|
||||
from .gpt_neo import GPTNeoModel
|
||||
from .gpt2 import GPT2Model
|
||||
from .gpt3 import GPT3ForTextGeneration, DistributedGPT3
|
||||
@@ -95,6 +97,14 @@ else:
|
||||
['CodeGeeXForCodeTranslation', 'CodeGeeXForCodeGeneration'],
|
||||
'glm_130b': ['GLM130bForTextGeneration'],
|
||||
'deberta_v2': ['DebertaV2ForMaskedLM', 'DebertaV2Model'],
|
||||
'chatglm': [
|
||||
'ChatGLMForConditionalGeneration', 'ChatGLMTokenizer',
|
||||
'ChatGLMConfig'
|
||||
],
|
||||
'chatglm2': [
|
||||
'ChatGLM2ForConditionalGeneration', 'ChatGLM2Tokenizer',
|
||||
'ChatGLM2Config'
|
||||
],
|
||||
'heads': ['TextClassificationHead'],
|
||||
'hf_transformers': ['TransformersModel'],
|
||||
'gpt2': ['GPT2Model'],
|
||||
|
||||
46
modelscope/models/nlp/chatglm/__init__.py
Normal file
46
modelscope/models/nlp/chatglm/__init__.py
Normal file
@@ -0,0 +1,46 @@
|
||||
# flake8: noqa
|
||||
# There's no way to ignore "F401 '...' imported but unused" warnings in this
|
||||
# module, but to preserve other warnings. So, don't check this module at all.
|
||||
|
||||
# Copyright 2021-2022 The Alibaba DAMO NLP Team Authors.
|
||||
# Copyright 2020 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from modelscope.utils.import_utils import LazyImportModule
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .configuration import ChatGLMConfig
|
||||
from .tokenization import ChatGLMTokenizer
|
||||
from .text_generation import ChatGLMForConditionalGeneration
|
||||
from .quantization import (
|
||||
quantize, )
|
||||
|
||||
else:
|
||||
_import_structure = {
|
||||
'configuration': ['ChatGLMConfig'],
|
||||
'text_generation': ['ChatGLMForConditionalGeneration'],
|
||||
'quantization': ['quantize'],
|
||||
'tokenization': [
|
||||
'ChatGLMTokenizer',
|
||||
],
|
||||
}
|
||||
import sys
|
||||
|
||||
sys.modules[__name__] = LazyImportModule(
|
||||
__name__,
|
||||
globals()['__file__'],
|
||||
_import_structure,
|
||||
module_spec=__spec__)
|
||||
101
modelscope/models/nlp/chatglm/configuration.py
Normal file
101
modelscope/models/nlp/chatglm/configuration.py
Normal file
@@ -0,0 +1,101 @@
|
||||
""" ChatGLM model configuration """
|
||||
|
||||
from transformers.configuration_utils import PretrainedConfig
|
||||
from transformers.utils import logging
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
class ChatGLMConfig(PretrainedConfig):
|
||||
r"""
|
||||
This is the configuration class to store the configuration of a [`~ChatGLMModel`].
|
||||
It is used to instantiate an ChatGLM model according to the specified arguments, defining the model
|
||||
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
|
||||
the ChatGLM-6B [THUDM/ChatGLM-6B](https://huggingface.co/THUDM/chatglm-6b) architecture.
|
||||
|
||||
Configuration objects inherit from [`PretrainedConfig`] and can be used
|
||||
to control the model outputs. Read the documentation from [`PretrainedConfig`]
|
||||
for more information.
|
||||
|
||||
|
||||
Args:
|
||||
vocab_size (`int`, *optional*, defaults to 150528):
|
||||
Vocabulary size of the ChatGLM-6B model.
|
||||
Defines the number of different tokens that can be represented by the
|
||||
`inputs_ids` passed when calling [`~ChatGLMModel`] or
|
||||
[`~TFChatGLMModel`].
|
||||
hidden_size (`int`, *optional*, defaults to 4096):
|
||||
Dimension of the encoder layers and the pooler layer.
|
||||
num_hidden_layers (`int`, *optional*, defaults to 28):
|
||||
Number of hidden layers in the Transformer encoder.
|
||||
num_attention_heads (`int`, *optional*, defaults to 32):
|
||||
Number of attention heads for each attention layer in the Transformer encoder.
|
||||
inner_hidden_size (`int`, *optional*, defaults to 16384):
|
||||
Dimension of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
|
||||
max_sequence_length (`int`, *optional*, defaults to 512):
|
||||
The maximum sequence length that this model might ever be used with.
|
||||
Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
|
||||
layernorm_epsilon (`float`, *optional*, defaults to 1e-5):
|
||||
The epsilon used by the layer normalization layers.
|
||||
use_cache (`bool`, *optional*, defaults to `True`):
|
||||
Whether the model should return the last key/values attentions (not used by all models).
|
||||
Example:
|
||||
|
||||
```python
|
||||
>>> from modelscope.models.nlp.chatglm.configuration import ChatGLMConfig
|
||||
>>> from modelscope.models.nlp.chatglm.text_generation import ChatGLMModel
|
||||
|
||||
>>> # Initializing a ChatGLM-6B THUDM/ChatGLM-6B style configuration
|
||||
>>> configuration = ChatGLMConfig()
|
||||
|
||||
>>> # Initializing a model from the THUDM/ChatGLM-6B style configuration
|
||||
>>> model = ChatGLMModel(configuration)
|
||||
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```
|
||||
"""
|
||||
model_type = 'chatglm'
|
||||
|
||||
def __init__(self,
|
||||
vocab_size=150528,
|
||||
hidden_size=4096,
|
||||
num_layers=28,
|
||||
num_attention_heads=32,
|
||||
layernorm_epsilon=1e-5,
|
||||
use_cache=False,
|
||||
bos_token_id=150004,
|
||||
eos_token_id=150005,
|
||||
mask_token_id=150000,
|
||||
gmask_token_id=150001,
|
||||
pad_token_id=0,
|
||||
max_sequence_length=2048,
|
||||
inner_hidden_size=16384,
|
||||
position_encoding_2d=True,
|
||||
quantization_bit=0,
|
||||
pre_seq_len=None,
|
||||
prefix_projection=False,
|
||||
**kwargs):
|
||||
self.num_layers = num_layers
|
||||
self.vocab_size = vocab_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.max_sequence_length = max_sequence_length
|
||||
self.layernorm_epsilon = layernorm_epsilon
|
||||
self.inner_hidden_size = inner_hidden_size
|
||||
self.use_cache = use_cache
|
||||
self.bos_token_id = bos_token_id
|
||||
self.eos_token_id = eos_token_id
|
||||
self.pad_token_id = pad_token_id
|
||||
self.mask_token_id = mask_token_id
|
||||
self.gmask_token_id = gmask_token_id
|
||||
self.position_encoding_2d = position_encoding_2d
|
||||
self.quantization_bit = quantization_bit
|
||||
self.pre_seq_len = pre_seq_len
|
||||
self.prefix_projection = prefix_projection
|
||||
|
||||
super().__init__(
|
||||
pad_token_id=pad_token_id,
|
||||
bos_token_id=bos_token_id,
|
||||
eos_token_id=eos_token_id,
|
||||
**kwargs)
|
||||
234
modelscope/models/nlp/chatglm/quantization.py
Normal file
234
modelscope/models/nlp/chatglm/quantization.py
Normal file
File diff suppressed because one or more lines are too long
1571
modelscope/models/nlp/chatglm/text_generation.py
Normal file
1571
modelscope/models/nlp/chatglm/text_generation.py
Normal file
File diff suppressed because it is too large
Load Diff
463
modelscope/models/nlp/chatglm/tokenization.py
Normal file
463
modelscope/models/nlp/chatglm/tokenization.py
Normal file
@@ -0,0 +1,463 @@
|
||||
"""Tokenization classes for ChatGLM."""
|
||||
import os
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import sentencepiece as spm
|
||||
from transformers.tokenization_utils import PreTrainedTokenizer
|
||||
from transformers.tokenization_utils_base import BatchEncoding, EncodedInput
|
||||
from transformers.utils import PaddingStrategy, logging
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
|
||||
'THUDM/chatglm-6b': 2048,
|
||||
}
|
||||
|
||||
|
||||
class TextTokenizer:
|
||||
|
||||
def __init__(self, model_path):
|
||||
self.sp = spm.SentencePieceProcessor()
|
||||
self.sp.Load(model_path)
|
||||
self.num_tokens = self.sp.vocab_size()
|
||||
|
||||
def encode(self, text):
|
||||
return self.sp.EncodeAsIds(text)
|
||||
|
||||
def decode(self, ids: List[int]):
|
||||
return self.sp.DecodeIds(ids)
|
||||
|
||||
def tokenize(self, text):
|
||||
return self.sp.EncodeAsPieces(text)
|
||||
|
||||
def convert_tokens_to_ids(self, tokens):
|
||||
return [self.sp.PieceToId(token) for token in tokens]
|
||||
|
||||
def convert_token_to_id(self, token):
|
||||
return self.sp.PieceToId(token)
|
||||
|
||||
def convert_id_to_token(self, idx):
|
||||
return self.sp.IdToPiece(idx)
|
||||
|
||||
def __len__(self):
|
||||
return self.num_tokens
|
||||
|
||||
|
||||
class SPTokenizer:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vocab_file,
|
||||
num_image_tokens=20000,
|
||||
max_blank_length=80,
|
||||
byte_fallback=True,
|
||||
):
|
||||
assert vocab_file is not None
|
||||
self.vocab_file = vocab_file
|
||||
self.num_image_tokens = num_image_tokens
|
||||
self.special_tokens = [
|
||||
'[MASK]', '[gMASK]', '[sMASK]', '<unused_0>', '<sop>', '<eop>',
|
||||
'<ENC>', '<dBLOCK>'
|
||||
]
|
||||
self.max_blank_length = max_blank_length
|
||||
self.byte_fallback = byte_fallback
|
||||
self.text_tokenizer = TextTokenizer(vocab_file)
|
||||
|
||||
def _get_text_tokenizer(self):
|
||||
return self.text_tokenizer
|
||||
|
||||
@staticmethod
|
||||
def get_blank_token(length: int):
|
||||
assert length >= 2
|
||||
return f'<|blank_{length}|>'
|
||||
|
||||
@staticmethod
|
||||
def get_tab_token():
|
||||
return '<|tab|>'
|
||||
|
||||
@property
|
||||
def num_text_tokens(self):
|
||||
return self.text_tokenizer.num_tokens
|
||||
|
||||
@property
|
||||
def num_tokens(self):
|
||||
return self.num_image_tokens + self.num_text_tokens
|
||||
|
||||
@staticmethod
|
||||
def _encode_whitespaces(text: str, max_len: int = 80):
|
||||
text = text.replace('\t', SPTokenizer.get_tab_token())
|
||||
for i in range(max_len, 1, -1):
|
||||
text = text.replace(' ' * i, SPTokenizer.get_blank_token(i))
|
||||
return text
|
||||
|
||||
def _preprocess(self, text: str, linebreak=True, whitespaces=True):
|
||||
if linebreak:
|
||||
text = text.replace('\n', '<n>')
|
||||
if whitespaces:
|
||||
text = self._encode_whitespaces(
|
||||
text, max_len=self.max_blank_length)
|
||||
return text
|
||||
|
||||
def encode(self,
|
||||
text: str,
|
||||
linebreak=True,
|
||||
whitespaces=True,
|
||||
add_dummy_prefix=True) -> List[int]:
|
||||
"""
|
||||
@param text: Text to encode.
|
||||
@param linebreak: Whether to encode newline (\n) in text.
|
||||
@param whitespaces: Whether to encode multiple whitespaces or tab in text, useful for source code encoding.
|
||||
@param special_tokens: Whether to encode special token ([MASK], [gMASK], etc.) in text.
|
||||
@param add_dummy_prefix: Whether to add dummy blank space in the beginning.
|
||||
"""
|
||||
text = self._preprocess(text, linebreak, whitespaces)
|
||||
if not add_dummy_prefix:
|
||||
text = '<n>' + text
|
||||
tmp = self._get_text_tokenizer().encode(text)
|
||||
tokens = [x + self.num_image_tokens for x in tmp]
|
||||
return tokens if add_dummy_prefix else tokens[2:]
|
||||
|
||||
def decode(self, text_ids: List[int]) -> str:
|
||||
ids = [int(_id) - self.num_image_tokens for _id in text_ids]
|
||||
ids = [_id for _id in ids if _id >= 0]
|
||||
text = self._get_text_tokenizer().decode(ids)
|
||||
text = text.replace('<n>', '\n')
|
||||
text = text.replace(SPTokenizer.get_tab_token(), '\t')
|
||||
for i in range(2, self.max_blank_length + 1):
|
||||
text = text.replace(self.get_blank_token(i), ' ' * i)
|
||||
return text
|
||||
|
||||
def tokenize(self,
|
||||
text: str,
|
||||
linebreak=True,
|
||||
whitespaces=True,
|
||||
add_dummy_prefix=True) -> List[str]:
|
||||
"""
|
||||
@param text: Text to encode.
|
||||
@param linebreak: Whether to encode newline (\n) in text.
|
||||
@param whitespaces: Whether to encode multiple whitespaces or tab in text, useful for source code encoding.
|
||||
@param special_tokens: Whether to encode special token ([MASK], [gMASK], etc.) in text.
|
||||
@param add_dummy_prefix: Whether to add dummy blank space in the beginning.
|
||||
"""
|
||||
text = self._preprocess(text, linebreak, whitespaces)
|
||||
if not add_dummy_prefix:
|
||||
text = '<n>' + text
|
||||
tokens = self._get_text_tokenizer().tokenize(text)
|
||||
return tokens if add_dummy_prefix else tokens[2:]
|
||||
|
||||
def __getitem__(self, x: Union[int, str]):
|
||||
if isinstance(x, int):
|
||||
if x < self.num_image_tokens:
|
||||
return '<image_{}>'.format(x)
|
||||
else:
|
||||
return self.text_tokenizer.convert_id_to_token(
|
||||
x - self.num_image_tokens)
|
||||
elif isinstance(x, str):
|
||||
if x.startswith('<image_') and x.endswith(
|
||||
'>') and x[7:-1].isdigit():
|
||||
return int(x[7:-1])
|
||||
else:
|
||||
return self.text_tokenizer.convert_token_to_id(
|
||||
x) + self.num_image_tokens
|
||||
else:
|
||||
raise ValueError('The key should be str or int.')
|
||||
|
||||
|
||||
class ChatGLMTokenizer(PreTrainedTokenizer):
|
||||
"""
|
||||
Construct a ChatGLM tokenizer. Based on byte-level Byte-Pair-Encoding.
|
||||
|
||||
Args:
|
||||
vocab_file: Path to the vocabulary file.
|
||||
do_lower_case: Use lower case letters.
|
||||
remove_space: Remove spaces.
|
||||
bos_token: The bos token
|
||||
eos_token: The Eos Token
|
||||
end_token: The end token
|
||||
mask_token: The mask token
|
||||
gmask_token: The gmask token
|
||||
padding_side: The padding side
|
||||
num_image_tokens: The `num_image_tokens` in `SPTokenizer`
|
||||
"""
|
||||
|
||||
vocab_files_names = {'vocab_file': 'ice_text.model'}
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
model_input_names = ['input_ids', 'attention_mask', 'position_ids']
|
||||
|
||||
def __init__(self,
|
||||
vocab_file,
|
||||
do_lower_case=False,
|
||||
remove_space=False,
|
||||
bos_token='<sop>',
|
||||
eos_token='<eop>',
|
||||
end_token='</s>',
|
||||
mask_token='[MASK]',
|
||||
gmask_token='[gMASK]',
|
||||
padding_side='left',
|
||||
num_image_tokens=20000,
|
||||
**kwargs) -> None:
|
||||
super().__init__(
|
||||
do_lower_case=do_lower_case,
|
||||
remove_space=remove_space,
|
||||
padding_side=padding_side,
|
||||
bos_token=bos_token,
|
||||
eos_token=eos_token,
|
||||
end_token=end_token,
|
||||
mask_token=mask_token,
|
||||
gmask_token=gmask_token,
|
||||
num_image_tokens=num_image_tokens,
|
||||
**kwargs)
|
||||
|
||||
self.do_lower_case = do_lower_case
|
||||
self.remove_space = remove_space
|
||||
self.vocab_file = vocab_file
|
||||
|
||||
self.bos_token = bos_token
|
||||
self.eos_token = eos_token
|
||||
self.end_token = end_token
|
||||
self.mask_token = mask_token
|
||||
self.gmask_token = gmask_token
|
||||
|
||||
self.sp_tokenizer = SPTokenizer(
|
||||
vocab_file, num_image_tokens=num_image_tokens)
|
||||
""" Initialisation """
|
||||
|
||||
@property
|
||||
def gmask_token_id(self) -> Optional[int]:
|
||||
if self.gmask_token is None:
|
||||
return None
|
||||
return self.convert_tokens_to_ids(self.gmask_token)
|
||||
|
||||
@property
|
||||
def end_token_id(self) -> Optional[int]:
|
||||
"""
|
||||
`Optional[int]`: Id of the end of context token in the vocabulary. Returns `None` if the token has not been
|
||||
set.
|
||||
"""
|
||||
if self.end_token is None:
|
||||
return None
|
||||
return self.convert_tokens_to_ids(self.end_token)
|
||||
|
||||
@property
|
||||
def vocab_size(self):
|
||||
""" Returns vocab size """
|
||||
return self.sp_tokenizer.num_tokens
|
||||
|
||||
def get_vocab(self):
|
||||
""" Returns vocab as a dict """
|
||||
vocab = {
|
||||
self._convert_id_to_token(i): i
|
||||
for i in range(self.vocab_size)
|
||||
}
|
||||
vocab.update(self.added_tokens_encoder)
|
||||
return vocab
|
||||
|
||||
def preprocess_text(self, inputs):
|
||||
if self.remove_space:
|
||||
outputs = ' '.join(inputs.strip().split())
|
||||
else:
|
||||
outputs = inputs
|
||||
|
||||
if self.do_lower_case:
|
||||
outputs = outputs.lower()
|
||||
|
||||
return outputs
|
||||
|
||||
def _tokenize(self, text, **kwargs):
|
||||
""" Returns a tokenized string. """
|
||||
text = self.preprocess_text(text)
|
||||
|
||||
seq = self.sp_tokenizer.tokenize(text)
|
||||
|
||||
return seq
|
||||
|
||||
def _decode(self,
|
||||
token_ids: Union[int, List[int]],
|
||||
skip_special_tokens: bool = False,
|
||||
clean_up_tokenization_spaces: bool = True,
|
||||
**kwargs) -> str:
|
||||
if isinstance(token_ids, int):
|
||||
token_ids = [token_ids]
|
||||
if len(token_ids) == 0:
|
||||
return ''
|
||||
if self.pad_token_id in token_ids: # remove pad
|
||||
token_ids = list(filter((self.pad_token_id).__ne__, token_ids))
|
||||
return self.sp_tokenizer.decode(token_ids)
|
||||
|
||||
def _convert_token_to_id(self, token):
|
||||
""" Converts a token (str) in an id using the vocab. """
|
||||
return self.sp_tokenizer[token]
|
||||
|
||||
def _convert_id_to_token(self, index):
|
||||
"""Converts an index (integer) in a token (str) using the vocab."""
|
||||
return self.sp_tokenizer[index]
|
||||
|
||||
def save_vocabulary(self, save_directory, filename_prefix=None):
|
||||
"""
|
||||
Save the vocabulary and special tokens file to a directory.
|
||||
|
||||
Args:
|
||||
save_directory (`str`):
|
||||
The directory in which to save the vocabulary.
|
||||
filename_prefix (`str`, *optional*):
|
||||
An optional prefix to add to the named of the saved files.
|
||||
|
||||
Returns:
|
||||
`Tuple(str)`: Paths to the files saved.
|
||||
"""
|
||||
if os.path.isdir(save_directory):
|
||||
vocab_file = os.path.join(save_directory,
|
||||
self.vocab_files_names['vocab_file'])
|
||||
else:
|
||||
vocab_file = save_directory
|
||||
|
||||
with open(self.vocab_file, 'rb') as fin:
|
||||
proto_str = fin.read()
|
||||
|
||||
with open(vocab_file, 'wb') as writer:
|
||||
writer.write(proto_str)
|
||||
|
||||
return (vocab_file, )
|
||||
|
||||
def build_inputs_with_special_tokens(
|
||||
self,
|
||||
token_ids_0: List[int],
|
||||
token_ids_1: Optional[List[int]] = None) -> List[int]:
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. A BERT sequence has the following format:
|
||||
|
||||
- single sequence: `[CLS] X [SEP]`
|
||||
- pair of sequences: `[CLS] A [SEP] B [SEP]`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
mask_ids = self.sp_tokenizer[self.mask_token]
|
||||
gmask_ids = self.sp_tokenizer[self.gmask_token]
|
||||
eos_id = self.sp_tokenizer[self.eos_token]
|
||||
if mask_ids not in token_ids_0 and gmask_ids not in token_ids_0:
|
||||
token_ids_0 += [gmask_ids]
|
||||
|
||||
if token_ids_0[-1] != mask_ids and token_ids_0[-1] != gmask_ids:
|
||||
token_ids_0 += [self.sp_tokenizer[self.end_token]]
|
||||
|
||||
token_ids_0 += [self.sp_tokenizer[self.bos_token]]
|
||||
|
||||
if token_ids_1 is not None:
|
||||
if not token_ids_1 or token_ids_1[-1] != eos_id:
|
||||
token_ids_1 += [eos_id]
|
||||
token_ids_0 += token_ids_1
|
||||
|
||||
return token_ids_0
|
||||
|
||||
def _pad(
|
||||
self,
|
||||
encoded_inputs: Union[Dict[str, EncodedInput], BatchEncoding],
|
||||
max_length: Optional[int] = None,
|
||||
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
||||
pad_to_multiple_of: Optional[int] = None,
|
||||
return_attention_mask: Optional[bool] = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
|
||||
|
||||
Args:
|
||||
encoded_inputs:
|
||||
Dictionary of tokenized inputs (`List[int]`) or batch of tokenized inputs (`List[List[int]]`).
|
||||
max_length: maximum length of the returned list and optionally padding length (see below).
|
||||
Will truncate by taking into account the special tokens.
|
||||
padding_strategy: PaddingStrategy to use for padding.
|
||||
|
||||
- PaddingStrategy.LONGEST Pad to the longest sequence in the batch
|
||||
- PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
|
||||
- PaddingStrategy.DO_NOT_PAD: Do not pad
|
||||
The tokenizer padding sides are defined in self.padding_side:
|
||||
|
||||
- 'left': pads on the left of the sequences
|
||||
- 'right': pads on the right of the sequences
|
||||
pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
|
||||
This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
|
||||
`>= 7.5` (Volta).
|
||||
return_attention_mask:
|
||||
(optional) Set to False to avoid returning attention mask (default: set to model specifics)
|
||||
"""
|
||||
# Load from model defaults
|
||||
bos_token_id = self.sp_tokenizer[self.bos_token]
|
||||
mask_token_id = self.sp_tokenizer[self.mask_token]
|
||||
gmask_token_id = self.sp_tokenizer[self.gmask_token]
|
||||
assert self.padding_side == 'left'
|
||||
|
||||
required_input = encoded_inputs[self.model_input_names[0]]
|
||||
seq_length = len(required_input)
|
||||
|
||||
if padding_strategy == PaddingStrategy.LONGEST:
|
||||
max_length = len(required_input)
|
||||
|
||||
if max_length is not None and pad_to_multiple_of is not None and (
|
||||
max_length % pad_to_multiple_of != 0):
|
||||
max_length = (
|
||||
(max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of
|
||||
|
||||
needs_to_be_padded = padding_strategy != PaddingStrategy.DO_NOT_PAD and len(
|
||||
required_input) != max_length
|
||||
|
||||
# Initialize attention mask if not present.
|
||||
if max_length is not None:
|
||||
if 'attention_mask' not in encoded_inputs:
|
||||
if bos_token_id in required_input:
|
||||
context_length = required_input.index(bos_token_id)
|
||||
else:
|
||||
context_length = seq_length
|
||||
attention_mask = np.ones((1, seq_length, seq_length))
|
||||
attention_mask = np.tril(attention_mask)
|
||||
attention_mask[:, :, :context_length] = 1
|
||||
attention_mask = np.bool_(attention_mask < 0.5)
|
||||
encoded_inputs['attention_mask'] = attention_mask
|
||||
|
||||
if 'position_ids' not in encoded_inputs:
|
||||
position_ids = np.arange(seq_length, dtype=np.int64)
|
||||
mask_token = mask_token_id if mask_token_id in required_input else gmask_token_id
|
||||
if mask_token in required_input:
|
||||
mask_position = required_input.index(mask_token)
|
||||
position_ids[context_length:] = mask_position
|
||||
block_position_ids = np.concatenate([
|
||||
np.zeros(context_length, dtype=np.int64),
|
||||
np.arange(
|
||||
1, seq_length - context_length + 1, dtype=np.int64)
|
||||
])
|
||||
encoded_inputs['position_ids'] = np.stack(
|
||||
[position_ids, block_position_ids], axis=0)
|
||||
|
||||
if needs_to_be_padded:
|
||||
difference = max_length - len(required_input)
|
||||
|
||||
if 'attention_mask' in encoded_inputs:
|
||||
encoded_inputs['attention_mask'] = np.pad(
|
||||
encoded_inputs['attention_mask'],
|
||||
pad_width=[(0, 0), (difference, 0), (difference, 0)],
|
||||
mode='constant',
|
||||
constant_values=True)
|
||||
if 'token_type_ids' in encoded_inputs:
|
||||
encoded_inputs['token_type_ids'] = [
|
||||
self.pad_token_type_id
|
||||
] * difference + encoded_inputs['token_type_ids']
|
||||
if 'special_tokens_mask' in encoded_inputs:
|
||||
encoded_inputs['special_tokens_mask'] = [
|
||||
1
|
||||
] * difference + encoded_inputs['special_tokens_mask']
|
||||
if 'position_ids' in encoded_inputs:
|
||||
encoded_inputs['position_ids'] = np.pad(
|
||||
encoded_inputs['position_ids'],
|
||||
pad_width=[(0, 0), (difference, 0)])
|
||||
encoded_inputs[self.model_input_names[
|
||||
0]] = [self.pad_token_id] * difference + required_input
|
||||
|
||||
return encoded_inputs
|
||||
46
modelscope/models/nlp/chatglm2/__init__.py
Normal file
46
modelscope/models/nlp/chatglm2/__init__.py
Normal file
@@ -0,0 +1,46 @@
|
||||
# flake8: noqa
|
||||
# There's no way to ignore "F401 '...' imported but unused" warnings in this
|
||||
# module, but to preserve other warnings. So, don't check this module at all.
|
||||
|
||||
# Copyright 2021-2022 The Alibaba DAMO NLP Team Authors.
|
||||
# Copyright 2020 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from modelscope.utils.import_utils import LazyImportModule
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .configuration import ChatGLM2Config
|
||||
from .tokenization import ChatGLM2Tokenizer
|
||||
from .text_generation import ChatGLM2ForConditionalGeneration
|
||||
from .quantization import (
|
||||
quantize, )
|
||||
|
||||
else:
|
||||
_import_structure = {
|
||||
'configuration': ['ChatGLM2Config'],
|
||||
'text_generation': ['ChatGLM2ForConditionalGeneration'],
|
||||
'quantization': ['quantize'],
|
||||
'tokenization': [
|
||||
'ChatGLM2Tokenizer',
|
||||
],
|
||||
}
|
||||
import sys
|
||||
|
||||
sys.modules[__name__] = LazyImportModule(
|
||||
__name__,
|
||||
globals()['__file__'],
|
||||
_import_structure,
|
||||
module_spec=__spec__)
|
||||
58
modelscope/models/nlp/chatglm2/configuration.py
Normal file
58
modelscope/models/nlp/chatglm2/configuration.py
Normal file
@@ -0,0 +1,58 @@
|
||||
""" ChatGLM model configuration """
|
||||
|
||||
from transformers.configuration_utils import PretrainedConfig
|
||||
from transformers.utils import logging
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
class ChatGLM2Config(PretrainedConfig):
|
||||
|
||||
def __init__(self,
|
||||
num_layers=28,
|
||||
padded_vocab_size=65024,
|
||||
hidden_size=4096,
|
||||
ffn_hidden_size=13696,
|
||||
kv_channels=128,
|
||||
num_attention_heads=32,
|
||||
seq_length=2048,
|
||||
hidden_dropout=0.0,
|
||||
attention_dropout=0.0,
|
||||
layernorm_epsilon=1e-5,
|
||||
rmsnorm=True,
|
||||
apply_residual_connection_post_layernorm=False,
|
||||
post_layer_norm=True,
|
||||
add_bias_linear=False,
|
||||
add_qkv_bias=False,
|
||||
interleaved_qkv=False,
|
||||
bias_dropout_fusion=True,
|
||||
multi_query_attention=False,
|
||||
multi_query_group_num=1,
|
||||
apply_query_key_layer_scaling=True,
|
||||
attention_softmax_in_fp32=True,
|
||||
fp32_residual_connection=False,
|
||||
quantization_bit=0,
|
||||
**kwargs):
|
||||
self.num_layers = num_layers
|
||||
self.padded_vocab_size = padded_vocab_size
|
||||
self.hidden_size = hidden_size
|
||||
self.ffn_hidden_size = ffn_hidden_size
|
||||
self.kv_channels = kv_channels
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.seq_length = seq_length
|
||||
self.hidden_dropout = hidden_dropout
|
||||
self.attention_dropout = attention_dropout
|
||||
self.layernorm_epsilon = layernorm_epsilon
|
||||
self.rmsnorm = rmsnorm
|
||||
self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
|
||||
self.post_layer_norm = post_layer_norm
|
||||
self.add_bias_linear = add_bias_linear
|
||||
self.add_qkv_bias = add_qkv_bias
|
||||
self.bias_dropout_fusion = bias_dropout_fusion
|
||||
self.multi_query_attention = multi_query_attention
|
||||
self.multi_query_group_num = multi_query_group_num
|
||||
self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
|
||||
self.attention_softmax_in_fp32 = attention_softmax_in_fp32
|
||||
self.fp32_residual_connection = fp32_residual_connection
|
||||
self.quantization_bit = quantization_bit
|
||||
super().__init__(**kwargs)
|
||||
223
modelscope/models/nlp/chatglm2/quantization.py
Normal file
223
modelscope/models/nlp/chatglm2/quantization.py
Normal file
File diff suppressed because one or more lines are too long
1299
modelscope/models/nlp/chatglm2/text_generation.py
Normal file
1299
modelscope/models/nlp/chatglm2/text_generation.py
Normal file
File diff suppressed because it is too large
Load Diff
251
modelscope/models/nlp/chatglm2/tokenization.py
Normal file
251
modelscope/models/nlp/chatglm2/tokenization.py
Normal file
@@ -0,0 +1,251 @@
|
||||
"""Tokenization classes for ChatGLM."""
|
||||
import os
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
from sentencepiece import SentencePieceProcessor
|
||||
from transformers.tokenization_utils import PreTrainedTokenizer
|
||||
from transformers.tokenization_utils_base import BatchEncoding, EncodedInput
|
||||
from transformers.utils import PaddingStrategy, logging
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
class SPTokenizer:
|
||||
|
||||
def __init__(self, model_path: str):
|
||||
# reload tokenizer
|
||||
assert os.path.isfile(model_path), model_path
|
||||
self.sp_model = SentencePieceProcessor(model_file=model_path)
|
||||
|
||||
# BOS / EOS token IDs
|
||||
self.n_words: int = self.sp_model.vocab_size()
|
||||
self.bos_id: int = self.sp_model.bos_id()
|
||||
self.eos_id: int = self.sp_model.eos_id()
|
||||
self.pad_id: int = self.sp_model.eos_id()
|
||||
assert self.sp_model.vocab_size() == self.sp_model.get_piece_size()
|
||||
|
||||
special_tokens = ['[MASK]', '[gMASK]', '[sMASK]', 'sop', 'eop']
|
||||
self.special_tokens = {}
|
||||
self.index_special_tokens = {}
|
||||
for token in special_tokens:
|
||||
self.special_tokens[token] = self.n_words
|
||||
self.index_special_tokens[self.n_words] = token
|
||||
self.n_words += 1
|
||||
|
||||
def tokenize(self, s: str):
|
||||
return self.sp_model.EncodeAsPieces(s)
|
||||
|
||||
def encode(self,
|
||||
s: str,
|
||||
bos: bool = False,
|
||||
eos: bool = False) -> List[int]:
|
||||
assert type(s) is str
|
||||
t = self.sp_model.encode(s)
|
||||
if bos:
|
||||
t = [self.bos_id] + t
|
||||
if eos:
|
||||
t = t + [self.eos_id]
|
||||
return t
|
||||
|
||||
def decode(self, t: List[int]) -> str:
|
||||
return self.sp_model.decode(t)
|
||||
|
||||
def decode_tokens(self, tokens: List[str]) -> str:
|
||||
text = self.sp_model.DecodePieces(tokens)
|
||||
return text
|
||||
|
||||
def convert_token_to_id(self, token):
|
||||
""" Converts a token (str) in an id using the vocab. """
|
||||
if token in self.special_tokens:
|
||||
return self.special_tokens[token]
|
||||
return self.sp_model.PieceToId(token)
|
||||
|
||||
def convert_id_to_token(self, index):
|
||||
"""Converts an index (integer) in a token (str) using the vocab."""
|
||||
if index in self.index_special_tokens:
|
||||
return ''
|
||||
return self.sp_model.IdToPiece(index)
|
||||
|
||||
|
||||
class ChatGLM2Tokenizer(PreTrainedTokenizer):
|
||||
vocab_files_names = {'vocab_file': 'tokenizer.model'}
|
||||
|
||||
model_input_names = ['input_ids', 'attention_mask', 'position_ids']
|
||||
|
||||
def __init__(self, vocab_file, padding_side='left', **kwargs):
|
||||
super().__init__(padding_side=padding_side, **kwargs)
|
||||
self.name = 'GLMTokenizer'
|
||||
|
||||
self.tokenizer = SPTokenizer(vocab_file)
|
||||
self.special_tokens = {
|
||||
'<bos>': self.tokenizer.bos_id,
|
||||
'<eos>': self.tokenizer.eos_id,
|
||||
'<pad>': self.tokenizer.pad_id
|
||||
}
|
||||
|
||||
def get_command(self, token):
|
||||
if token in self.special_tokens:
|
||||
return self.special_tokens[token]
|
||||
assert token in self.tokenizer.special_tokens, f'{token} is not a special token for {self.name}'
|
||||
return self.tokenizer.special_tokens[token]
|
||||
|
||||
@property
|
||||
def pad_token(self) -> str:
|
||||
return '</s>'
|
||||
|
||||
@property
|
||||
def pad_token_id(self):
|
||||
return self.get_command('<pad>')
|
||||
|
||||
@property
|
||||
def eos_token_id(self):
|
||||
return self.get_command('<eos>')
|
||||
|
||||
@property
|
||||
def vocab_size(self):
|
||||
return self.tokenizer.n_words
|
||||
|
||||
def get_vocab(self):
|
||||
""" Returns vocab as a dict """
|
||||
vocab = {
|
||||
self._convert_id_to_token(i): i
|
||||
for i in range(self.vocab_size)
|
||||
}
|
||||
vocab.update(self.added_tokens_encoder)
|
||||
return vocab
|
||||
|
||||
def _tokenize(self, text, **kwargs):
|
||||
return self.tokenizer.tokenize(text)
|
||||
|
||||
def _convert_token_to_id(self, token):
|
||||
""" Converts a token (str) in an id using the vocab. """
|
||||
return self.tokenizer.convert_token_to_id(token)
|
||||
|
||||
def _convert_id_to_token(self, index):
|
||||
"""Converts an index (integer) in a token (str) using the vocab."""
|
||||
return self.tokenizer.convert_id_to_token(index)
|
||||
|
||||
def convert_tokens_to_string(self, tokens: List[str]) -> str:
|
||||
return self.tokenizer.decode_tokens(tokens)
|
||||
|
||||
def save_vocabulary(self, save_directory, filename_prefix=None):
|
||||
"""
|
||||
Save the vocabulary and special tokens file to a directory.
|
||||
Args:
|
||||
save_directory (`str`):
|
||||
The directory in which to save the vocabulary.
|
||||
filename_prefix (`str`, *optional*):
|
||||
An optional prefix to add to the named of the saved files.
|
||||
Returns:
|
||||
`Tuple(str)`: Paths to the files saved.
|
||||
"""
|
||||
if os.path.isdir(save_directory):
|
||||
vocab_file = os.path.join(save_directory,
|
||||
self.vocab_files_names['vocab_file'])
|
||||
else:
|
||||
vocab_file = save_directory
|
||||
|
||||
with open(self.vocab_file, 'rb') as fin:
|
||||
proto_str = fin.read()
|
||||
|
||||
with open(vocab_file, 'wb') as writer:
|
||||
writer.write(proto_str)
|
||||
|
||||
return (vocab_file, )
|
||||
|
||||
def get_prefix_tokens(self):
|
||||
prefix_tokens = [self.get_command('[gMASK]'), self.get_command('sop')]
|
||||
return prefix_tokens
|
||||
|
||||
def build_inputs_with_special_tokens(
|
||||
self,
|
||||
token_ids_0: List[int],
|
||||
token_ids_1: Optional[List[int]] = None) -> List[int]:
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. A BERT sequence has the following format:
|
||||
- single sequence: `[CLS] X [SEP]`
|
||||
- pair of sequences: `[CLS] A [SEP] B [SEP]`
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
prefix_tokens = self.get_prefix_tokens()
|
||||
token_ids_0 = prefix_tokens + token_ids_0
|
||||
if token_ids_1 is not None:
|
||||
token_ids_0 = token_ids_0 + token_ids_1 + [
|
||||
self.get_command('<eos>')
|
||||
]
|
||||
return token_ids_0
|
||||
|
||||
def _pad(
|
||||
self,
|
||||
encoded_inputs: Union[Dict[str, EncodedInput], BatchEncoding],
|
||||
max_length: Optional[int] = None,
|
||||
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
||||
pad_to_multiple_of: Optional[int] = None,
|
||||
return_attention_mask: Optional[bool] = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
|
||||
Args:
|
||||
encoded_inputs:
|
||||
Dictionary of tokenized inputs (`List[int]`) or batch of tokenized inputs (`List[List[int]]`).
|
||||
max_length: maximum length of the returned list and optionally padding length (see below).
|
||||
Will truncate by taking into account the special tokens.
|
||||
padding_strategy: PaddingStrategy to use for padding.
|
||||
- PaddingStrategy.LONGEST Pad to the longest sequence in the batch
|
||||
- PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
|
||||
- PaddingStrategy.DO_NOT_PAD: Do not pad
|
||||
The tokenizer padding sides are defined in self.padding_side:
|
||||
- 'left': pads on the left of the sequences
|
||||
- 'right': pads on the right of the sequences
|
||||
pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
|
||||
This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
|
||||
`>= 7.5` (Volta).
|
||||
return_attention_mask:
|
||||
(optional) Set to False to avoid returning attention mask (default: set to model specifics)
|
||||
"""
|
||||
# Load from model defaults
|
||||
assert self.padding_side == 'left'
|
||||
|
||||
required_input = encoded_inputs[self.model_input_names[0]]
|
||||
seq_length = len(required_input)
|
||||
|
||||
if padding_strategy == PaddingStrategy.LONGEST:
|
||||
max_length = len(required_input)
|
||||
|
||||
if max_length is not None and pad_to_multiple_of is not None and (
|
||||
max_length % pad_to_multiple_of != 0):
|
||||
max_length = (
|
||||
(max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of
|
||||
|
||||
needs_to_be_padded = padding_strategy != PaddingStrategy.DO_NOT_PAD and len(
|
||||
required_input) != max_length
|
||||
|
||||
# Initialize attention mask if not present.
|
||||
if 'attention_mask' not in encoded_inputs:
|
||||
encoded_inputs['attention_mask'] = [1] * seq_length
|
||||
|
||||
if 'position_ids' not in encoded_inputs:
|
||||
encoded_inputs['position_ids'] = list(range(seq_length))
|
||||
|
||||
if needs_to_be_padded:
|
||||
difference = max_length - len(required_input)
|
||||
|
||||
if 'attention_mask' in encoded_inputs:
|
||||
encoded_inputs['attention_mask'] = [
|
||||
0
|
||||
] * difference + encoded_inputs['attention_mask']
|
||||
if 'position_ids' in encoded_inputs:
|
||||
encoded_inputs['position_ids'] = [
|
||||
0
|
||||
] * difference + encoded_inputs['position_ids']
|
||||
encoded_inputs[self.model_input_names[
|
||||
0]] = [self.pad_token_id] * difference + required_input
|
||||
|
||||
return encoded_inputs
|
||||
@@ -1,5 +1,6 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
|
||||
# Copyright (c) 2022 Zhipu.AI
|
||||
import os
|
||||
from typing import Any, Dict, Optional, Union
|
||||
|
||||
@@ -17,7 +18,10 @@ from modelscope.utils.constant import ModelFile, Tasks
|
||||
from modelscope.utils.hub import Config, read_config
|
||||
from modelscope.utils.streaming_output import PipelineStreamingOutputMixin
|
||||
|
||||
__all__ = ['TextGenerationPipeline', 'TextGenerationT5Pipeline']
|
||||
__all__ = [
|
||||
'TextGenerationPipeline', 'TextGenerationT5Pipeline',
|
||||
'ChatGLM6bTextGenerationPipeline', 'ChatGLM6bV2TextGenerationPipeline'
|
||||
]
|
||||
|
||||
|
||||
@PIPELINES.register_module(
|
||||
@@ -177,3 +181,71 @@ class TextGenerationT5Pipeline(TextGenerationPipeline):
|
||||
|
||||
with torch.no_grad():
|
||||
return self.model.generate(**inputs, **forward_params)
|
||||
|
||||
|
||||
@PIPELINES.register_module(
|
||||
group_key=Tasks.chat, module_name='chatglm6b-text-generation')
|
||||
class ChatGLM6bTextGenerationPipeline(Pipeline):
|
||||
|
||||
def __init__(self,
|
||||
model: Union[Model, str],
|
||||
quantization_bit=None,
|
||||
use_bf16=False,
|
||||
**kwargs):
|
||||
from modelscope.models.nlp.chatglm.text_generation import ChatGLMForConditionalGeneration
|
||||
model = ChatGLMForConditionalGeneration(model) if isinstance(
|
||||
model, str) else model
|
||||
if quantization_bit is not None:
|
||||
model = model.quantize(quantization_bit)
|
||||
if use_bf16:
|
||||
model = model.bfloat16()
|
||||
self.model = model
|
||||
self.model.eval()
|
||||
|
||||
super().__init__(model=model, **kwargs)
|
||||
|
||||
def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]:
|
||||
return inputs
|
||||
|
||||
# define the forward pass
|
||||
def forward(self, inputs: Dict, **forward_params) -> Dict[str, Any]:
|
||||
return self.model.chat(inputs)
|
||||
|
||||
# format the outputs from pipeline
|
||||
def postprocess(self, input, **kwargs) -> Dict[str, Any]:
|
||||
return input
|
||||
|
||||
|
||||
@PIPELINES.register_module(
|
||||
group_key=Tasks.chat, module_name='chatglm2_6b-text-generation')
|
||||
class ChatGLM6bV2TextGenerationPipeline(Pipeline):
|
||||
|
||||
def __init__(self,
|
||||
model: Union[Model, str],
|
||||
quantization_bit=None,
|
||||
use_bf16=False,
|
||||
**kwargs):
|
||||
from modelscope.models.nlp import ChatGLM2ForConditionalGeneration, ChatGLM2Tokenizer
|
||||
model = ChatGLM2ForConditionalGeneration(model) if isinstance(
|
||||
model, str) else model
|
||||
if quantization_bit is not None:
|
||||
model = model.quantize(quantization_bit)
|
||||
if use_bf16:
|
||||
model = model.bfloat16()
|
||||
self.model = model
|
||||
self.model.eval()
|
||||
self.tokenizer = ChatGLM2Tokenizer.from_pretrained(
|
||||
self.model.model_dir)
|
||||
|
||||
super().__init__(model=model, **kwargs)
|
||||
|
||||
def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]:
|
||||
return inputs
|
||||
|
||||
# define the forward pass
|
||||
def forward(self, inputs: Dict, **forward_params) -> Dict[str, Any]:
|
||||
return self.model.chat(self.tokenizer, inputs['text'])
|
||||
|
||||
# format the outputs from pipeline
|
||||
def postprocess(self, input, **kwargs) -> Dict[str, Any]:
|
||||
return input
|
||||
|
||||
38
modelscope/swift/__init__.py
Normal file
38
modelscope/swift/__init__.py
Normal file
@@ -0,0 +1,38 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from modelscope.utils.import_utils import LazyImportModule
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .optimizers.child_tuning_adamw_optimizer import calculate_fisher, ChildTuningAdamW
|
||||
from .adapter import Adapter, AdapterConfig, AdapterModule
|
||||
from .lora import LoRA, LoRAConfig, Linear, MergedLinear, Embedding, Conv2d
|
||||
from .prompt import Prompt, PromptConfig, PromptModule
|
||||
from .control_sd_lora import ControlLoRACrossAttnProcessor, ControlLoRACrossAttnProcessorV2, ControlLoRATuner
|
||||
from .base import SwiftConfig, Swift
|
||||
else:
|
||||
_import_structure = {
|
||||
'optimizers.child_tuning_adamw_optimizer':
|
||||
['calculate_fisher', 'ChildTuningAdamW'],
|
||||
'adapter': ['Adapter', 'AdapterConfig', 'AdapterModule'],
|
||||
'lora': [
|
||||
'LoRA', 'LoRAConfig', 'Linear', 'MergedLinear', 'Embedding',
|
||||
'Conv2d'
|
||||
],
|
||||
'prompt': ['Prompt', 'PromptConfig', 'PromptModule'],
|
||||
'control_sd_lora': [
|
||||
'ControlLoRACrossAttnProcessor', 'ControlLoRACrossAttnProcessorV2',
|
||||
'ControlLoRATuner'
|
||||
],
|
||||
'base': ['SwiftConfig', 'Swift']
|
||||
}
|
||||
|
||||
import sys
|
||||
|
||||
sys.modules[__name__] = LazyImportModule(
|
||||
__name__,
|
||||
globals()['__file__'],
|
||||
_import_structure,
|
||||
module_spec=__spec__,
|
||||
extra_objects={},
|
||||
)
|
||||
195
modelscope/swift/adapter.py
Normal file
195
modelscope/swift/adapter.py
Normal file
@@ -0,0 +1,195 @@
|
||||
import inspect
|
||||
import os
|
||||
import re
|
||||
import types
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Union
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
from modelscope import snapshot_download
|
||||
from modelscope.utils.constant import ModelFile
|
||||
from .base import SwiftConfig
|
||||
|
||||
|
||||
@dataclass
|
||||
class AdapterConfig(SwiftConfig):
|
||||
"""
|
||||
The configuration class for the adapter module.
|
||||
|
||||
Adapters project input tokens by an MLP layer.
|
||||
'Parameter-Efficient Transfer Learning for NLP' by Houlsby et al.(2019)
|
||||
See http://arxiv.org/abs/1902.00751
|
||||
|
||||
Args:
|
||||
dim: The dimension of the hidden states
|
||||
module_name: The feedforward module to be replaced, in regex format
|
||||
hidden_pos: The position of the hidden state to passed into the adapter, can be int (args) or str (kwargs)
|
||||
method_name: The method to be replaced, default to replace the forward method
|
||||
adapter_length: The length of the adapter length (intermediate length)
|
||||
act_layer: The activation layer of the adapter
|
||||
only_adapter_trainable: Whether to train only adapters
|
||||
pretrained_weights: The pretrained adapter weights.
|
||||
Can be a local dir, local file, or a model id from modelscope
|
||||
"""
|
||||
|
||||
dim: int = field(metadata={'help': 'The dimension of the hidden states'})
|
||||
|
||||
module_name: str = field(
|
||||
metadata={
|
||||
'help': 'The feedforward module to be replaced, in regex format'
|
||||
})
|
||||
|
||||
hidden_pos: Union[str, int] = field(
|
||||
metadata={
|
||||
'help':
|
||||
'The position of the hidden state to passed into the adapter, can be int (args) or str (kwargs)'
|
||||
})
|
||||
|
||||
method_name: str = field(
|
||||
default='forward',
|
||||
metadata={
|
||||
'help':
|
||||
'The method to be replaced, default to replace the forward method'
|
||||
})
|
||||
|
||||
adapter_length: int = field(
|
||||
default=128,
|
||||
metadata={
|
||||
'help': 'The length of the adapter length (intermediate length)'
|
||||
})
|
||||
|
||||
act_layer: nn.Module = field(
|
||||
default=nn.GELU,
|
||||
metadata={'help': 'The activation layer of the adapter'})
|
||||
|
||||
only_adapter_trainable: bool = field(
|
||||
default=True, metadata={'help': 'Whether to train only adapters'})
|
||||
|
||||
pretrained_weights: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
'help':
|
||||
'The pretrained adapter weights. Can be a local dir, local file, or a model id from modelscope'
|
||||
})
|
||||
|
||||
|
||||
class Adapter:
|
||||
|
||||
@staticmethod
|
||||
def prepare_model(model: nn.Module, config: AdapterConfig):
|
||||
module_keys = [key for key, _ in model.named_modules()]
|
||||
|
||||
for module_key in module_keys:
|
||||
if re.fullmatch(config.module_name, module_key): # noqa
|
||||
module = model.get_submodule(module_key)
|
||||
|
||||
def _forward(self, *args, **kwargs):
|
||||
args = self.forward_origin(*args, **kwargs)
|
||||
if isinstance(args, (tuple, list, dict)):
|
||||
if isinstance(config.hidden_pos, int):
|
||||
return args[0:config.hidden_pos] + args[
|
||||
config.hidden_pos] + getattr(self, 'adapter')(args[config.hidden_pos]) \
|
||||
+ args[config.hidden_pos + 1:] # noqa
|
||||
else:
|
||||
kwargs[config.hidden_pos] = args[
|
||||
config.hidden_pos] + getattr(self, 'adapter')(
|
||||
args[config.hidden_pos])
|
||||
elif isinstance(args, torch.Tensor):
|
||||
args = getattr(self, 'adapter')(args)
|
||||
return args
|
||||
|
||||
def _feed_forward_chunk(self, attention_output):
|
||||
return _forward(self, attention_output)
|
||||
|
||||
module.forward_origin = getattr(module, config.method_name)
|
||||
num_args_in_forward_chunk_fn = len(
|
||||
inspect.signature(module.forward_origin).parameters)
|
||||
if config.method_name == 'feed_forward_chunk' and num_args_in_forward_chunk_fn == 1:
|
||||
setattr(module, config.method_name,
|
||||
types.MethodType(_feed_forward_chunk, module))
|
||||
else:
|
||||
setattr(module, config.method_name,
|
||||
types.MethodType(_forward, module))
|
||||
adapter_module = AdapterModule(config.dim,
|
||||
config.adapter_length,
|
||||
config.act_layer)
|
||||
setattr(module, 'adapter', adapter_module)
|
||||
|
||||
if config.only_adapter_trainable:
|
||||
for n, p in model.named_parameters():
|
||||
if 'adapter' not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
def state_dict_hook(module, destination, prefix, local_metadata):
|
||||
return {
|
||||
key: value
|
||||
for key, value in destination.items() if 'adapter' in key
|
||||
}
|
||||
|
||||
model.state_dict_hook_handle = model._register_state_dict_hook(
|
||||
state_dict_hook)
|
||||
|
||||
def load_state_dict(self, state_dict, strict=True):
|
||||
return self.load_state_dict_origin(state_dict, False)
|
||||
|
||||
model.load_state_dict_origin = model.load_state_dict
|
||||
model.load_state_dict = types.MethodType(load_state_dict, model)
|
||||
|
||||
if config.pretrained_weights is not None:
|
||||
if not os.path.exists(config.pretrained_weights):
|
||||
model_dir = snapshot_download(config.pretrained_weights)
|
||||
pretrained_weights = os.path.join(
|
||||
model_dir, ModelFile.TORCH_MODEL_BIN_FILE)
|
||||
elif os.path.isfile(config.pretrained_weights):
|
||||
pretrained_weights = config.pretrained_weights
|
||||
else:
|
||||
pretrained_weights = os.path.join(
|
||||
config.pretrained_weights, ModelFile.TORCH_MODEL_BIN_FILE)
|
||||
model.load_state_dict(torch.load(pretrained_weights))
|
||||
return model
|
||||
|
||||
|
||||
class AdapterModule(nn.Module):
|
||||
"""The implementation of adapter tuning method.
|
||||
|
||||
Adapters project input tokens by an MLP layer.
|
||||
'Parameter-Efficient Transfer Learning for NLP' by Houlsby et al.(2019)
|
||||
See http://arxiv.org/abs/1902.00751
|
||||
|
||||
Attributes:
|
||||
dim: An integer indicating the embedding dimension.
|
||||
adapter_length: An integer indicating the length of adapter tuning.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
dim,
|
||||
adapter_length=None,
|
||||
act_layer=nn.GELU,
|
||||
):
|
||||
super(AdapterModule, self).__init__()
|
||||
self.dim = dim
|
||||
self.adapter_length = adapter_length
|
||||
# self.adapter_type = adapter_type
|
||||
self.ln1 = nn.Linear(dim, adapter_length)
|
||||
self.activate = act_layer()
|
||||
self.ln2 = nn.Linear(adapter_length, dim)
|
||||
self.init_weights()
|
||||
|
||||
def init_weights(self):
|
||||
|
||||
def _init_weights(m):
|
||||
if isinstance(m, nn.Linear):
|
||||
nn.init.xavier_uniform_(m.weight)
|
||||
nn.init.normal_(m.bias, std=1e-6)
|
||||
|
||||
self.apply(_init_weights)
|
||||
|
||||
def forward(self, x, identity=None):
|
||||
out = self.ln2(self.activate(self.ln1(x)))
|
||||
if identity is None:
|
||||
identity = x
|
||||
out = identity + out
|
||||
return out
|
||||
31
modelscope/swift/base.py
Normal file
31
modelscope/swift/base.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class SwiftConfig:
|
||||
pass
|
||||
|
||||
|
||||
class Swift:
|
||||
|
||||
@staticmethod
|
||||
def prepare_model(model, config: SwiftConfig):
|
||||
"""Prepare the module and returns the new module.
|
||||
|
||||
Args:
|
||||
model: The model to tune.
|
||||
config: The config of the tuner.
|
||||
|
||||
Returns:
|
||||
The tuned model.
|
||||
"""
|
||||
from .lora import LoRA, LoRAConfig
|
||||
from .adapter import Adapter, AdapterConfig
|
||||
from .prompt import Prompt, PromptConfig
|
||||
if isinstance(config, LoRAConfig):
|
||||
return LoRA.prepare_model(model, config)
|
||||
elif isinstance(config, AdapterConfig):
|
||||
return Adapter.prepare_model(model, config)
|
||||
elif isinstance(config, PromptConfig):
|
||||
return Prompt.prepare_model(model, config)
|
||||
return None
|
||||
@@ -4,93 +4,148 @@
|
||||
import logging
|
||||
import math
|
||||
import os.path
|
||||
import re
|
||||
import types
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from modelscope import snapshot_download
|
||||
from modelscope.utils.constant import ModelFile
|
||||
from .base import SwiftConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LoRATuner:
|
||||
@dataclass
|
||||
class LoRAConfig(SwiftConfig):
|
||||
"""
|
||||
The configuration class for the loRA module.
|
||||
|
||||
Args:
|
||||
rank: The rank of the LoRA module
|
||||
replace_modules: The modules to be replaced by LoRA, can be the end of the module name or a regex string
|
||||
lora_alpha: The factor to add the lora weights
|
||||
lora_dropout: The dropout rate of the lora module
|
||||
merge_weights: Whether to merge weights when validating
|
||||
use_merged_linear: Whether to replace with merged linear layer
|
||||
enable_lora: The modules need to be turned on when using the merged linear layer
|
||||
fan_in_fan_out: Set this to True if the layer to replace stores weight like (fan_in, fan_out)
|
||||
bias: Bias type. Values ca be "none", "all" or "lora_only"
|
||||
only_lora_trainable: Whether to train only lora
|
||||
pretrained_weights: The pretrained lora weights.
|
||||
Can be a local dir, local file, or a model id from modelscope
|
||||
"""
|
||||
|
||||
rank: int = field(
|
||||
default=6, metadata={'help': 'The rank of the LoRA module'})
|
||||
replace_modules: List = field(
|
||||
default=None,
|
||||
metadata={
|
||||
'help':
|
||||
'The modules to be replaced by LoRA, can be the end of the module name or a regex string'
|
||||
})
|
||||
lora_alpha: float = field(
|
||||
default=1., metadata={'help': 'The factor to add the lora weights'})
|
||||
lora_dropout: float = field(
|
||||
default=0., metadata={'help': 'The dropout rate of the lora module'})
|
||||
merge_weights: bool = field(
|
||||
default=True,
|
||||
metadata={'help': 'Whether to merge weights when validating'})
|
||||
use_merged_linear: bool = field(
|
||||
default=False,
|
||||
metadata={'help': 'Whether to replace with merged linear layer'})
|
||||
enable_lora: List = field(
|
||||
default=None,
|
||||
metadata={
|
||||
'help':
|
||||
'The modules need to be turned on when using the merged linear layer'
|
||||
})
|
||||
fan_in_fan_out: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
'help':
|
||||
'Set this to True if the layer to replace stores weight like (fan_in, fan_out)'
|
||||
})
|
||||
bias: str = field(
|
||||
default='none',
|
||||
metadata={
|
||||
'help': 'Bias type. Values ca be "none", "all" or "lora_only"'
|
||||
})
|
||||
only_lora_trainable: bool = field(
|
||||
default=True, metadata={'help': 'Whether to train only lora'})
|
||||
pretrained_weights: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
'help':
|
||||
'The pretrained lora weights. Can be a local dir, local file, or a model id from modelscope'
|
||||
})
|
||||
|
||||
|
||||
class LoRA:
|
||||
|
||||
@staticmethod
|
||||
def tune(model: nn.Module,
|
||||
rank=6,
|
||||
replace_modules=None,
|
||||
lora_alpha=1.,
|
||||
lora_dropout=0.,
|
||||
merge_weights=True,
|
||||
fan_in_fan_out=False,
|
||||
bias='none',
|
||||
pretrained_tuner=None):
|
||||
"""Tune a model with lora.
|
||||
def prepare_model(model: nn.Module, config: LoRAConfig):
|
||||
"""Tune a model with LoRA.
|
||||
|
||||
Args:
|
||||
model: The torch.nn.Module containing the target module to be patched.
|
||||
rank: The lora rank.
|
||||
replace_modules: The module names to be replaced, the replacing strategy is `end with`.
|
||||
lora_alpha: The alpha value for lora module.
|
||||
lora_dropout: The dropout value for lora module.
|
||||
merge_weights: If merge_weights set to True, when the module turns to `eval`, the lora weights
|
||||
will be added into the origin weight to reduce calculation.
|
||||
fan_in_fan_out: Set this to True if the layer to replace stores weight like (fan_in, fan_out).
|
||||
bias: The grad strategy for bias, can be `none`, 'all' or 'lora_only'.
|
||||
pretrained_tuner: The pretrained file of lora.
|
||||
config: The LoRAConfig instance.
|
||||
|
||||
Returns:
|
||||
The lora modules
|
||||
"""
|
||||
modules = LoRATuner._dynamic_patch_lora(
|
||||
LoRA._dynamic_patch_lora(
|
||||
model,
|
||||
replace_modules=replace_modules,
|
||||
r=rank,
|
||||
lora_alpha=lora_alpha,
|
||||
lora_dropout=lora_dropout,
|
||||
merge_weights=merge_weights,
|
||||
fan_in_fan_out=fan_in_fan_out)
|
||||
replace_modules=config.replace_modules,
|
||||
r=config.rank,
|
||||
lora_alpha=config.lora_alpha,
|
||||
lora_dropout=config.lora_dropout,
|
||||
merge_weights=config.merge_weights,
|
||||
use_merged_linear=config.use_merged_linear,
|
||||
enable_lora=config.enable_lora,
|
||||
fan_in_fan_out=config.fan_in_fan_out)
|
||||
|
||||
mark_only_lora_as_trainable(model, bias)
|
||||
if config.only_lora_trainable:
|
||||
mark_only_lora_as_trainable(model, config.bias)
|
||||
|
||||
def state_dict_hook(module, destination, prefix, local_metadata):
|
||||
return lora_state_dict(destination, bias)
|
||||
return lora_state_dict(destination, config.bias)
|
||||
|
||||
model.state_dict_hook_handle = model._register_state_dict_hook(
|
||||
state_dict_hook)
|
||||
|
||||
def warning_hook(module, incompatible_keys):
|
||||
logger.info(
|
||||
f'The {module.__class__.__name__} module has unmatched keys: {incompatible_keys},'
|
||||
f'this is converted to a notice with respect to LoRA')
|
||||
for ik in incompatible_keys:
|
||||
ik.clear()
|
||||
def load_state_dict(self, state_dict, strict=True):
|
||||
return self.load_state_dict_origin(state_dict, False)
|
||||
|
||||
if hasattr(model, 'register_load_state_dict_post_hook'):
|
||||
model.load_state_dict_hook_handle = model.register_load_state_dict_post_hook(
|
||||
warning_hook)
|
||||
else:
|
||||
model.load_state_dict_origin = model.load_state_dict
|
||||
model.load_state_dict = types.MethodType(load_state_dict, model)
|
||||
|
||||
def load_state_dict(self, state_dict, strict=True):
|
||||
return self.load_state_dict_origin(state_dict, False)
|
||||
if config.pretrained_weights is not None:
|
||||
if not os.path.exists(config.pretrained_weights):
|
||||
model_dir = snapshot_download(config.pretrained_weights)
|
||||
pretrained_weights = os.path.join(
|
||||
model_dir, ModelFile.TORCH_MODEL_BIN_FILE)
|
||||
elif os.path.isfile(config.pretrained_weights):
|
||||
pretrained_weights = config.pretrained_weights
|
||||
else:
|
||||
pretrained_weights = os.path.join(
|
||||
config.pretrained_weights, ModelFile.TORCH_MODEL_BIN_FILE)
|
||||
model.load_state_dict(torch.load(pretrained_weights))
|
||||
|
||||
model.load_state_dict_origin = model.load_state_dict
|
||||
model.load_state_dict = types.MethodType(load_state_dict, model)
|
||||
|
||||
if pretrained_tuner is not None and os.path.isfile(pretrained_tuner):
|
||||
logger.info(f'Loading LoRA weights from file: {pretrained_tuner}')
|
||||
model.load_state_dict(torch.load(pretrained_tuner))
|
||||
|
||||
return modules
|
||||
return model
|
||||
|
||||
@staticmethod
|
||||
def _dynamic_patch_lora(model, replace_modules, **kwargs):
|
||||
def _dynamic_patch_lora(model, replace_modules, use_merged_linear,
|
||||
**kwargs):
|
||||
"""Dynamic patch lora to model
|
||||
|
||||
Args:
|
||||
model: The torch.nn.Module containing the target module to be patched.
|
||||
replace_modules: The module names to be replaced, the replacing strategy is `end with`.
|
||||
use_merged_linear: Whether to replace with merged linear layer
|
||||
**kwargs: The arguments passed from `tune` which are needed by lora.
|
||||
|
||||
Returns:
|
||||
@@ -103,8 +158,13 @@ class LoRATuner:
|
||||
replace_modules = [replace_modules]
|
||||
|
||||
for module_key in module_keys:
|
||||
if any([module_key.endswith(name)
|
||||
for name in replace_modules]): # noqa
|
||||
if isinstance(replace_modules, str):
|
||||
target_module_found = re.fullmatch(replace_modules, module_key)
|
||||
else:
|
||||
target_module_found = any(
|
||||
module_key.endswith(target_key)
|
||||
for target_key in replace_modules)
|
||||
if target_module_found: # noqa
|
||||
parts = module_key.split('.')
|
||||
module = model.get_submodule('.'.join(parts[:-1]))
|
||||
sub_module = model.get_submodule(module_key)
|
||||
@@ -112,11 +172,19 @@ class LoRATuner:
|
||||
|
||||
lora_module = None
|
||||
if isinstance(sub_module, torch.nn.Linear):
|
||||
lora_module = Linear(
|
||||
sub_module.in_features,
|
||||
sub_module.out_features,
|
||||
bias=sub_module.bias is not None,
|
||||
**kwargs)
|
||||
if use_merged_linear:
|
||||
lora_module = MergedLinear(
|
||||
sub_module.in_features,
|
||||
sub_module.out_features,
|
||||
bias=sub_module.bias is not None,
|
||||
**kwargs)
|
||||
else:
|
||||
kwargs.pop('enable_lora', None)
|
||||
lora_module = Linear(
|
||||
sub_module.in_features,
|
||||
sub_module.out_features,
|
||||
bias=sub_module.bias is not None,
|
||||
**kwargs)
|
||||
elif isinstance(sub_module, torch.nn.Conv2d):
|
||||
kwargs.pop('fan_in_fan_out', None)
|
||||
lora_module = Conv2d(
|
||||
@@ -140,9 +208,13 @@ class LoRATuner:
|
||||
return modules
|
||||
|
||||
@staticmethod
|
||||
def unpatch_lora(model, replace_modules):
|
||||
def unpatch_lora(model, config: LoRAConfig):
|
||||
"""Unpatch lora modules and merge the weights to original modules.
|
||||
|
||||
LoRA constructs an additional layer with low-rank decomposition matrices of the weights in the network.
|
||||
'LoRA: Low-Rank Adaptation of Large Language Models' by Hu et al.(2021)
|
||||
See https://arxiv.org/abs/2106.09685
|
||||
|
||||
Args:
|
||||
model: The model called with `tune` function.
|
||||
replace_modules: The module names to be replaced, the replacing strategy is `end with`.
|
||||
@@ -152,13 +224,17 @@ class LoRATuner:
|
||||
"""
|
||||
modules = []
|
||||
module_keys = [key for key, _ in model.named_modules()]
|
||||
assert isinstance(replace_modules, (str, list))
|
||||
if isinstance(replace_modules, str):
|
||||
replace_modules = [replace_modules]
|
||||
assert isinstance(config.replace_modules, (str, list))
|
||||
replace_modules = config.replace_modules
|
||||
|
||||
for module_key in module_keys:
|
||||
if any([module_key.endswith(name)
|
||||
for name in replace_modules]): # noqa
|
||||
if isinstance(replace_modules, str):
|
||||
target_module_found = re.fullmatch(replace_modules, module_key)
|
||||
else:
|
||||
target_module_found = any(
|
||||
module_key.endswith(target_key)
|
||||
for target_key in replace_modules)
|
||||
if target_module_found: # noqa
|
||||
parts = module_key.split('.')
|
||||
module = model.get_submodule('.'.join(parts[:-1]))
|
||||
sub_module = model.get_submodule(module_key)
|
||||
0
modelscope/swift/optimizers/__init__.py
Normal file
0
modelscope/swift/optimizers/__init__.py
Normal file
@@ -13,7 +13,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import math
|
||||
import types
|
||||
from typing import Callable, Iterable, Tuple
|
||||
|
||||
import numpy as np
|
||||
@@ -22,7 +21,6 @@ from torch.distributions.bernoulli import Bernoulli
|
||||
from torch.optim import Optimizer
|
||||
|
||||
from modelscope.utils.logger import get_logger
|
||||
from .builder import OPTIMIZERS, default_group
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
@@ -72,8 +70,6 @@ def calculate_fisher(model: torch.nn.Module,
|
||||
return gradient_mask
|
||||
|
||||
|
||||
@OPTIMIZERS.register_module(
|
||||
group_key=default_group, module_name='ChildTuningAdamW')
|
||||
class ChildTuningAdamW(Optimizer):
|
||||
|
||||
def __init__(self,
|
||||
214
modelscope/swift/prompt.py
Normal file
214
modelscope/swift/prompt.py
Normal file
@@ -0,0 +1,214 @@
|
||||
import os
|
||||
import re
|
||||
import types
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Union
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
from modelscope import snapshot_download
|
||||
from modelscope.utils.constant import ModelFile
|
||||
from .base import SwiftConfig
|
||||
|
||||
|
||||
@dataclass
|
||||
class PromptConfig(SwiftConfig):
|
||||
"""
|
||||
The configuration class for the prompt module.
|
||||
|
||||
Visual prompt tuning (VPT) is proposed to initialize tunable prompt tokens
|
||||
and prepend to the original tokens in the first layer or multiple layers.
|
||||
'Visual Prompt Tuning' by Jia et al.(2022)
|
||||
See https://arxiv.org/abs/2203.12119
|
||||
|
||||
Here we apply the VPT to other fields.
|
||||
|
||||
Args:
|
||||
dim: The dimension of the hidden states
|
||||
module_layer_name: The layer module to be replaced, in regex format
|
||||
embedding_pos: The position of the embedding tensor
|
||||
attention_mask_pos: The position of the attention mask
|
||||
attention_mask_value: The value to pad to the attention mask
|
||||
prompt_length: The length of the prompt tokens
|
||||
only_prompt_trainable: Whether to train only prompt
|
||||
attach_front: When set to True, prompt is attached in front of the embedding
|
||||
pretrained_weights: The pretrained prompt weights. Can be a local dir, local file,
|
||||
or a model id from modelscope
|
||||
"""
|
||||
|
||||
dim: int = field(metadata={'help': 'The dimension of the hidden states'})
|
||||
|
||||
module_layer_name: str = field(
|
||||
metadata={'help': 'The layer module to be replaced, in regex format'})
|
||||
|
||||
embedding_pos: Union[str, int] = field(
|
||||
metadata={'help': 'The position of the embedding tensor'})
|
||||
|
||||
attention_mask_pos: Union[str, int] = field(
|
||||
default=None, metadata={'help': 'The position of the attention mask'})
|
||||
|
||||
attention_mask_value: Union[float, int, bool] = field(
|
||||
default=0.,
|
||||
metadata={'help': 'The value to pad to the attention mask'})
|
||||
|
||||
prompt_length: int = field(
|
||||
default=16, metadata={'help': 'The length of the prompt tokens'})
|
||||
|
||||
only_prompt_trainable: bool = field(
|
||||
default=True, metadata={'help': 'Whether to train only prompt'})
|
||||
|
||||
attach_front: bool = field(
|
||||
default=True,
|
||||
metadata={
|
||||
'help':
|
||||
'When set to True, prompt is attached in front of the embedding'
|
||||
})
|
||||
|
||||
pretrained_weights: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
'help':
|
||||
'The pretrained prompt weights. Can be a local dir, local file, or a model id from modelscope'
|
||||
})
|
||||
|
||||
|
||||
class Prompt:
|
||||
|
||||
@staticmethod
|
||||
def prepare_model(model: nn.Module, config: PromptConfig):
|
||||
module_keys = [key for key, _ in model.named_modules()]
|
||||
for module_key in module_keys:
|
||||
if re.fullmatch(config.module_layer_name, module_key): # noqa
|
||||
module = model.get_submodule(module_key)
|
||||
|
||||
def _forward(self, *args, **kwargs):
|
||||
if isinstance(config.embedding_pos, int):
|
||||
input_embedding = args[config.embedding_pos]
|
||||
else:
|
||||
input_embedding = kwargs[config.embedding_pos]
|
||||
|
||||
input_embedding = getattr(
|
||||
self, 'prompt').forward(input_embedding)
|
||||
if isinstance(config.embedding_pos, int):
|
||||
args = type(args)(
|
||||
args[0:config.embedding_pos] + (input_embedding, )
|
||||
+ args[config.embedding_pos + 1:])
|
||||
else:
|
||||
kwargs[config.embedding_pos] = input_embedding
|
||||
|
||||
if config.attention_mask_pos:
|
||||
attention_mask = None
|
||||
if isinstance(config.attention_mask_pos, int):
|
||||
attention_mask = args[config.attention_mask_pos]
|
||||
elif isinstance(config.attention_mask_pos, str):
|
||||
attention_mask = kwargs[config.attention_mask_pos]
|
||||
|
||||
if attention_mask is not None:
|
||||
attention_mask = getattr(
|
||||
self,
|
||||
'prompt').patch_attention_mask(attention_mask)
|
||||
if isinstance(config.attention_mask_pos, int):
|
||||
args = type(args)(
|
||||
args[0:config.attention_mask_pos]
|
||||
+ (attention_mask, )
|
||||
+ args[config.attention_mask_pos + 1:])
|
||||
else:
|
||||
kwargs[config.attention_mask_pos] = attention_mask
|
||||
|
||||
return self.forward_origin(*args, **kwargs)
|
||||
|
||||
module.forward_origin = module.forward
|
||||
module.forward = types.MethodType(_forward, module)
|
||||
prompt_module = PromptModule(config.dim,
|
||||
int(module_key.rsplit('.')[-1]),
|
||||
config.prompt_length,
|
||||
config.attention_mask_value,
|
||||
config.attach_front)
|
||||
setattr(module, 'prompt', prompt_module)
|
||||
|
||||
if config.only_prompt_trainable:
|
||||
for n, p in model.named_parameters():
|
||||
if 'prompt' not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
def state_dict_hook(module, destination, prefix, local_metadata):
|
||||
return {
|
||||
key: value
|
||||
for key, value in destination.items() if 'prompt' in key
|
||||
}
|
||||
|
||||
model.state_dict_hook_handle = model._register_state_dict_hook(
|
||||
state_dict_hook)
|
||||
|
||||
def load_state_dict(self, state_dict, strict=True):
|
||||
return self.load_state_dict_origin(state_dict, False)
|
||||
|
||||
model.load_state_dict_origin = model.load_state_dict
|
||||
model.load_state_dict = types.MethodType(load_state_dict, model)
|
||||
|
||||
if config.pretrained_weights is not None:
|
||||
if not os.path.exists(config.pretrained_weights):
|
||||
model_dir = snapshot_download(config.pretrained_weights)
|
||||
pretrained_weights = os.path.join(
|
||||
model_dir, ModelFile.TORCH_MODEL_BIN_FILE)
|
||||
elif os.path.isfile(config.pretrained_weights):
|
||||
pretrained_weights = config.pretrained_weights
|
||||
else:
|
||||
pretrained_weights = os.path.join(
|
||||
config.pretrained_weights, ModelFile.TORCH_MODEL_BIN_FILE)
|
||||
model.load_state_dict(torch.load(pretrained_weights))
|
||||
return model
|
||||
|
||||
|
||||
class PromptModule(nn.Module):
|
||||
"""The implementation of vision prompt tuning method.
|
||||
|
||||
Visual prompt tuning (VPT) is proposed to initialize tunable prompt tokens
|
||||
and prepend to the original tokens in the first layer or multiple layers.
|
||||
'Visual Prompt Tuning' by Jia et al.(2022)
|
||||
See https://arxiv.org/abs/2203.12119
|
||||
|
||||
Attributes:
|
||||
dim: An integer indicating the embedding dimension.
|
||||
layer_num: An integer indicating number of layers.
|
||||
prompt_length: An integer indicating the length of vision prompt tuning.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dim,
|
||||
layer_num,
|
||||
prompt_length=None,
|
||||
mask_values=0.,
|
||||
attach_front=True):
|
||||
super(PromptModule, self).__init__()
|
||||
self.dim = dim
|
||||
self.layer_num = layer_num
|
||||
self.prompt_length = prompt_length
|
||||
self.mask_values = mask_values
|
||||
self.attach_front = attach_front
|
||||
|
||||
self.prompt_token = nn.Parameter(torch.zeros(1, prompt_length, dim))
|
||||
nn.init.xavier_uniform_(self.prompt_token)
|
||||
|
||||
def forward(self, x):
|
||||
prompt_token = self.prompt_token.expand(x.shape[0], -1, -1)
|
||||
|
||||
if self.layer_num == 0:
|
||||
if self.attach_front:
|
||||
x = torch.cat((prompt_token, x), dim=1)
|
||||
else:
|
||||
x = torch.cat((x, prompt_token), dim=1)
|
||||
else:
|
||||
if self.attach_front:
|
||||
x = torch.cat((prompt_token, x[:, self.prompt_length:, :]),
|
||||
dim=1)
|
||||
else:
|
||||
x = torch.cat((x[:, :-self.prompt_length, :], prompt_token),
|
||||
dim=1)
|
||||
return x
|
||||
|
||||
def patch_attention_mask(self, m):
|
||||
prefix_attention_mask = torch.full((*m.shape[:-1], self.prompt_length),
|
||||
self.mask_values).to(m.device)
|
||||
return torch.cat((prefix_attention_mask, m), dim=-1)
|
||||
218
modelscope/swift/sd_lora.py
Normal file
218
modelscope/swift/sd_lora.py
Normal file
@@ -0,0 +1,218 @@
|
||||
# Copyright 2023-2024 The Alibaba Fundamental Vision Team Authors. All rights reserved.
|
||||
# The implementation is adopted from HighCWu,
|
||||
# made pubicly available under the Apache License 2.0 License at https://github.com/HighCWu/ControlLoRA
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from diffusers.configuration_utils import ConfigMixin, register_to_config
|
||||
from diffusers.models.cross_attention import CrossAttention, LoRALinearLayer
|
||||
from diffusers.models.modeling_utils import ModelMixin
|
||||
from diffusers.utils.outputs import BaseOutput
|
||||
|
||||
|
||||
@dataclass
|
||||
class TunerOutput(BaseOutput):
|
||||
lora_states: Tuple[torch.FloatTensor]
|
||||
|
||||
|
||||
class LoRACrossAttnProcessor(nn.Module):
|
||||
""" The implementation of lora attention module.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
hidden_size,
|
||||
cross_attention_dim=None,
|
||||
rank=4,
|
||||
post_add=False,
|
||||
key_states_skipped=False,
|
||||
value_states_skipped=False,
|
||||
output_states_skipped=False):
|
||||
""" Initialize a lora attn instance.
|
||||
Args:
|
||||
hidden_size (`int`): The number of channels in embedding.
|
||||
cross_attention_dim (`int`, *optional*):
|
||||
The number of channels in the hidden_states. If not given, defaults to `hidden_size`.
|
||||
rank (`int`, *optional*, defaults to 4): The number of rank of lora.
|
||||
post_add (`bool`, *optional*, defaults to False): Set to `True`, conduct weighted
|
||||
adding operation after lora.
|
||||
key_states_skipped (`bool`, *optional*, defaults to False):
|
||||
Set to `True` for skip to perform lora on key value.
|
||||
value_states_skipped (`bool`, *optional*, defaults to False):
|
||||
Set to `True` for skip to perform lora on value.
|
||||
output_states_skipped (`bool`, *optional*, defaults to False):
|
||||
Set to `True` for skip to perform lora on output value.
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
self.hidden_size = hidden_size
|
||||
self.cross_attention_dim = cross_attention_dim
|
||||
self.rank = rank
|
||||
self.post_add = post_add
|
||||
|
||||
self.to_q_lora = LoRALinearLayer(hidden_size, hidden_size, rank)
|
||||
if not key_states_skipped:
|
||||
self.to_k_lora = LoRALinearLayer(
|
||||
hidden_size if post_add else
|
||||
(cross_attention_dim or hidden_size), hidden_size, rank)
|
||||
if not value_states_skipped:
|
||||
self.to_v_lora = LoRALinearLayer(
|
||||
hidden_size if post_add else
|
||||
(cross_attention_dim or hidden_size), hidden_size, rank)
|
||||
if not output_states_skipped:
|
||||
self.to_out_lora = LoRALinearLayer(hidden_size, hidden_size, rank)
|
||||
|
||||
self.key_states_skipped: bool = key_states_skipped
|
||||
self.value_states_skipped: bool = value_states_skipped
|
||||
self.output_states_skipped: bool = output_states_skipped
|
||||
|
||||
def skip_key_states(self, is_skipped: bool = True):
|
||||
if not is_skipped:
|
||||
assert hasattr(self, 'to_k_lora')
|
||||
self.key_states_skipped = is_skipped
|
||||
|
||||
def skip_value_states(self, is_skipped: bool = True):
|
||||
if not is_skipped:
|
||||
assert hasattr(self, 'to_q_lora')
|
||||
self.value_states_skipped = is_skipped
|
||||
|
||||
def skip_output_states(self, is_skipped: bool = True):
|
||||
if not is_skipped:
|
||||
assert hasattr(self, 'to_out_lora')
|
||||
self.output_states_skipped = is_skipped
|
||||
|
||||
def __call__(self,
|
||||
attn: CrossAttention,
|
||||
hidden_states,
|
||||
encoder_hidden_states=None,
|
||||
attention_mask=None,
|
||||
scale=1.0):
|
||||
batch_size, sequence_length, _ = hidden_states.shape
|
||||
attention_mask = attn.prepare_attention_mask(
|
||||
attention_mask=attention_mask,
|
||||
target_length=sequence_length,
|
||||
batch_size=batch_size)
|
||||
|
||||
query = attn.to_q(hidden_states)
|
||||
query = query + scale * self.to_q_lora(
|
||||
query if self.post_add else hidden_states)
|
||||
query = attn.head_to_batch_dim(query)
|
||||
|
||||
encoder_hidden_states = encoder_hidden_states if encoder_hidden_states is not None else hidden_states
|
||||
|
||||
key = attn.to_k(encoder_hidden_states)
|
||||
if not self.key_states_skipped:
|
||||
key = key + scale * self.to_k_lora(
|
||||
key if self.post_add else encoder_hidden_states)
|
||||
value = attn.to_v(encoder_hidden_states)
|
||||
if not self.value_states_skipped:
|
||||
value = value + scale * self.to_v_lora(
|
||||
value if self.post_add else encoder_hidden_states)
|
||||
|
||||
key = attn.head_to_batch_dim(key)
|
||||
value = attn.head_to_batch_dim(value)
|
||||
|
||||
attention_probs = attn.get_attention_scores(query, key, attention_mask)
|
||||
hidden_states = torch.bmm(attention_probs, value)
|
||||
hidden_states = attn.batch_to_head_dim(hidden_states)
|
||||
|
||||
# linear proj
|
||||
out = attn.to_out[0](hidden_states)
|
||||
if not self.output_states_skipped:
|
||||
out = out + scale * self.to_out_lora(
|
||||
out if self.post_add else hidden_states)
|
||||
hidden_states = out
|
||||
# dropout
|
||||
hidden_states = attn.to_out[1](hidden_states)
|
||||
|
||||
return hidden_states
|
||||
|
||||
|
||||
class LoRATuner(ModelMixin, ConfigMixin):
|
||||
|
||||
@staticmethod
|
||||
def tune(
|
||||
model: nn.Module,
|
||||
tuner_config=None,
|
||||
pretrained_tuner=None,
|
||||
):
|
||||
tuner = LoRATuner.from_config(tuner_config)
|
||||
if pretrained_tuner is not None and os.path.exists(pretrained_tuner):
|
||||
tuner.load_state_dict(
|
||||
torch.load(pretrained_tuner, map_location='cpu'), strict=True)
|
||||
tune_layers_list = list(
|
||||
[list(layer_list) for layer_list in tuner.lora_layers])
|
||||
assert hasattr(model, 'unet')
|
||||
unet = model.unet
|
||||
tuner.to(unet.device)
|
||||
tune_attn_procs = tuner.set_tune_layers(unet, tune_layers_list)
|
||||
unet.set_attn_processor(tune_attn_procs)
|
||||
return tuner
|
||||
|
||||
def set_tune_layers(self, unet, tune_layers_list):
|
||||
n_ch = len(unet.config.block_out_channels)
|
||||
control_ids = [i for i in range(n_ch)]
|
||||
tune_attn_procs = {}
|
||||
|
||||
for name in unet.attn_processors.keys():
|
||||
if name.startswith('mid_block'):
|
||||
control_id = control_ids[-1]
|
||||
elif name.startswith('up_blocks'):
|
||||
block_id = int(name[len('up_blocks.')])
|
||||
control_id = list(reversed(control_ids))[block_id]
|
||||
elif name.startswith('down_blocks'):
|
||||
block_id = int(name[len('down_blocks.')])
|
||||
control_id = control_ids[block_id]
|
||||
|
||||
tune_layers = tune_layers_list[control_id]
|
||||
if len(tune_layers) != 0:
|
||||
tune_layer = tune_layers.pop(0)
|
||||
tune_attn_procs[name] = tune_layer
|
||||
return tune_attn_procs
|
||||
|
||||
@register_to_config
|
||||
def __init__(
|
||||
self,
|
||||
lora_block_out_channels: Tuple[int] = (320, 640, 1280, 1280),
|
||||
lora_cross_attention_dims: Tuple[List[int]] = ([
|
||||
None, 768, None, 768, None, 768, None, 768, None, 768
|
||||
], [None, 768, None, 768, None, 768, None, 768, None,
|
||||
768], [None, 768, None, 768, None, 768, None, 768, None,
|
||||
768], [None, 768]),
|
||||
lora_rank: int = 4,
|
||||
lora_post_add: bool = False,
|
||||
lora_key_states_skipped: bool = False,
|
||||
lora_value_states_skipped: bool = False,
|
||||
lora_output_states_skipped: bool = False,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
lora_cls = LoRACrossAttnProcessor
|
||||
|
||||
self.lora_layers = nn.ModuleList([])
|
||||
|
||||
for i, lora_cross_attention_dim in enumerate(
|
||||
lora_cross_attention_dims):
|
||||
self.lora_layers.append(
|
||||
nn.ModuleList([
|
||||
lora_cls(
|
||||
lora_block_out_channels[i],
|
||||
cross_attention_dim=cross_attention_dim,
|
||||
rank=lora_rank,
|
||||
post_add=lora_post_add,
|
||||
key_states_skipped=lora_key_states_skipped,
|
||||
value_states_skipped=lora_value_states_skipped,
|
||||
output_states_skipped=lora_output_states_skipped)
|
||||
for cross_attention_dim in lora_cross_attention_dim
|
||||
]))
|
||||
|
||||
def forward(self) -> Union[TunerOutput, Tuple]:
|
||||
lora_states_list = []
|
||||
tune_layers_list = list(
|
||||
[list(layer_list) for layer_list in self.lora_layers])
|
||||
for tune_list in tune_layers_list:
|
||||
for tune_layer in tune_list:
|
||||
lora_states_list.append(tune_layer.to_q_lora.down.weight)
|
||||
return TunerOutput(lora_states=tuple(lora_states_list))
|
||||
@@ -1,5 +1,5 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
from modelscope.swift import ChildTuningAdamW
|
||||
from .builder import OPTIMIZERS, build_optimizer
|
||||
from .child_tuning_adamw_optimizer import ChildTuningAdamW
|
||||
|
||||
__all__ = ['OPTIMIZERS', 'build_optimizer', 'ChildTuningAdamW']
|
||||
|
||||
@@ -44,6 +44,7 @@ from modelscope.utils.registry import build_from_cfg
|
||||
from modelscope.utils.torch_utils import (compile_model, get_dist_info,
|
||||
get_local_rank, init_dist, is_dist,
|
||||
is_master, set_random_seed)
|
||||
from ..swift import Swift
|
||||
from .base import BaseTrainer
|
||||
from .builder import TRAINERS
|
||||
from .default_config import merge_cfg, merge_hooks, update_cfg
|
||||
@@ -264,10 +265,7 @@ class EpochBasedTrainer(BaseTrainer):
|
||||
def tune_module(self, efficient_tuners):
|
||||
if efficient_tuners is not None:
|
||||
for tuner in efficient_tuners:
|
||||
type = tuner.pop('type')
|
||||
if type == 'lora':
|
||||
from modelscope.tuners.lora import LoRATuner
|
||||
LoRATuner.tune(self.model, **tuner)
|
||||
self.model = Swift.prepare_model(self.model, tuner)
|
||||
|
||||
def place_model(self):
|
||||
"""Place model to device, or to DDP
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
from .hub import create_model_if_not_exist, read_config
|
||||
|
||||
@@ -8,18 +8,17 @@ from modelscope.metainfo import Preprocessors, Trainers
|
||||
from modelscope.models import Model
|
||||
from modelscope.msdatasets import MsDataset
|
||||
from modelscope.pipelines import pipeline
|
||||
from modelscope.swift.optimizers.child_tuning_adamw_optimizer import \
|
||||
calculate_fisher
|
||||
from modelscope.trainers import build_trainer
|
||||
from modelscope.trainers.hooks import Hook
|
||||
from modelscope.trainers.nlp_trainer import (EpochBasedTrainer,
|
||||
NlpEpochBasedTrainer)
|
||||
from modelscope.trainers.optimizer.child_tuning_adamw_optimizer import \
|
||||
calculate_fisher
|
||||
from modelscope.trainers.training_args import TrainingArgs
|
||||
from modelscope.utils.constant import ModelFile, Tasks
|
||||
from modelscope.utils.data_utils import to_device
|
||||
from modelscope.utils.regress_test_utils import (MsRegressTool,
|
||||
compare_arguments_nested)
|
||||
from modelscope.utils.test_utils import test_level
|
||||
|
||||
|
||||
class TestFinetuneSequenceClassification(unittest.TestCase):
|
||||
|
||||
164
tests/trainers/test_finetune_vision_efficient_tuning_swift.py
Normal file
164
tests/trainers/test_finetune_vision_efficient_tuning_swift.py
Normal file
@@ -0,0 +1,164 @@
|
||||
# Copyright 2022-2023 The Alibaba Fundamental Vision Team Authors. All rights reserved.
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from modelscope.metainfo import Trainers
|
||||
from modelscope.msdatasets import MsDataset
|
||||
from modelscope.swift import Swift
|
||||
from modelscope.swift.adapter import AdapterConfig
|
||||
from modelscope.swift.lora import LoRAConfig
|
||||
from modelscope.swift.prompt import PromptConfig
|
||||
from modelscope.trainers import build_trainer
|
||||
from modelscope.utils.test_utils import test_level
|
||||
|
||||
|
||||
class TestVisionEfficientTuningSwiftTrainer(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
|
||||
|
||||
self.train_dataset = MsDataset.load(
|
||||
'foundation_model_evaluation_benchmark',
|
||||
namespace='damo',
|
||||
subset_name='OxfordFlowers',
|
||||
split='train')
|
||||
|
||||
self.eval_dataset = MsDataset.load(
|
||||
'foundation_model_evaluation_benchmark',
|
||||
namespace='damo',
|
||||
subset_name='OxfordFlowers',
|
||||
split='eval')
|
||||
|
||||
self.max_epochs = 1
|
||||
self.num_classes = 102
|
||||
self.tune_length = 10
|
||||
|
||||
self.tmp_dir = tempfile.TemporaryDirectory().name
|
||||
if not os.path.exists(self.tmp_dir):
|
||||
os.makedirs(self.tmp_dir)
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.tmp_dir)
|
||||
super().tearDown()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_vision_efficient_tuning_swift_lora_train(self):
|
||||
model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-lora'
|
||||
|
||||
def cfg_modify_fn(cfg):
|
||||
cfg.model.head.num_classes = self.num_classes
|
||||
cfg.model.finetune = True
|
||||
cfg.train.max_epochs = self.max_epochs
|
||||
cfg.train.lr_scheduler.T_max = self.max_epochs
|
||||
cfg.model.backbone.lora_length = 0
|
||||
return cfg
|
||||
|
||||
lora_config = LoRAConfig(
|
||||
rank=self.tune_length,
|
||||
replace_modules=['qkv'],
|
||||
merge_weights=False,
|
||||
only_lora_trainable=False,
|
||||
use_merged_linear=True,
|
||||
enable_lora=[True])
|
||||
|
||||
kwargs = dict(
|
||||
model=model_id,
|
||||
work_dir=self.tmp_dir,
|
||||
train_dataset=self.train_dataset,
|
||||
eval_dataset=self.eval_dataset,
|
||||
cfg_modify_fn=cfg_modify_fn,
|
||||
efficient_tuners=[lora_config])
|
||||
|
||||
trainer = build_trainer(
|
||||
name=Trainers.vision_efficient_tuning, default_args=kwargs)
|
||||
trainer.train()
|
||||
result = trainer.evaluate()
|
||||
print(f'Vision-efficient-tuning-lora train output: {result}.')
|
||||
|
||||
results_files = os.listdir(self.tmp_dir)
|
||||
self.assertIn(f'{trainer.timestamp}.log.json', results_files)
|
||||
for i in range(self.max_epochs):
|
||||
self.assertIn(f'epoch_{i+1}.pth', results_files)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_vision_efficient_tuning_swift_adapter_train(self):
|
||||
model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-adapter'
|
||||
|
||||
def cfg_modify_fn(cfg):
|
||||
cfg.model.head.num_classes = self.num_classes
|
||||
cfg.model.finetune = True
|
||||
cfg.train.max_epochs = self.max_epochs
|
||||
cfg.train.lr_scheduler.T_max = self.max_epochs
|
||||
cfg.model.backbone.adapter_length = 0
|
||||
return cfg
|
||||
|
||||
adapter_config = AdapterConfig(
|
||||
dim=768,
|
||||
hidden_pos=0,
|
||||
module_name=r'.*blocks\.\d+\.mlp$',
|
||||
adapter_length=self.tune_length,
|
||||
only_adapter_trainable=False)
|
||||
|
||||
kwargs = dict(
|
||||
model=model_id,
|
||||
work_dir=self.tmp_dir,
|
||||
train_dataset=self.train_dataset,
|
||||
eval_dataset=self.eval_dataset,
|
||||
cfg_modify_fn=cfg_modify_fn,
|
||||
efficient_tuners=[adapter_config])
|
||||
|
||||
trainer = build_trainer(
|
||||
name=Trainers.vision_efficient_tuning, default_args=kwargs)
|
||||
trainer.train()
|
||||
result = trainer.evaluate()
|
||||
print(f'Vision-efficient-tuning-adapter train output: {result}.')
|
||||
|
||||
results_files = os.listdir(self.tmp_dir)
|
||||
self.assertIn(f'{trainer.timestamp}.log.json', results_files)
|
||||
for i in range(self.max_epochs):
|
||||
self.assertIn(f'epoch_{i+1}.pth', results_files)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_vision_efficient_tuning_swift_prompt_train(self):
|
||||
model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-prompt'
|
||||
|
||||
def cfg_modify_fn(cfg):
|
||||
cfg.model.head.num_classes = self.num_classes
|
||||
cfg.model.finetune = True
|
||||
cfg.train.max_epochs = self.max_epochs
|
||||
cfg.train.lr_scheduler.T_max = self.max_epochs
|
||||
cfg.model.backbone.prompt_length = 0
|
||||
return cfg
|
||||
|
||||
prompt_config = PromptConfig(
|
||||
dim=768,
|
||||
module_layer_name=r'.*blocks\.\d+$',
|
||||
embedding_pos=0,
|
||||
prompt_length=self.tune_length,
|
||||
only_prompt_trainable=False,
|
||||
attach_front=False)
|
||||
|
||||
kwargs = dict(
|
||||
model=model_id,
|
||||
work_dir=self.tmp_dir,
|
||||
train_dataset=self.train_dataset,
|
||||
eval_dataset=self.eval_dataset,
|
||||
cfg_modify_fn=cfg_modify_fn,
|
||||
efficient_tuners=[prompt_config])
|
||||
|
||||
trainer = build_trainer(
|
||||
name=Trainers.vision_efficient_tuning, default_args=kwargs)
|
||||
trainer.train()
|
||||
result = trainer.evaluate()
|
||||
print(f'Vision-efficient-tuning-prompt train output: {result}.')
|
||||
|
||||
results_files = os.listdir(self.tmp_dir)
|
||||
self.assertIn(f'{trainer.timestamp}.log.json', results_files)
|
||||
for i in range(self.max_epochs):
|
||||
self.assertIn(f'epoch_{i+1}.pth', results_files)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
81
tests/tuners/test_adapter.py
Normal file
81
tests/tuners/test_adapter.py
Normal file
@@ -0,0 +1,81 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from modelscope import read_config
|
||||
from modelscope.hub.snapshot_download import snapshot_download
|
||||
from modelscope.models.base import Model
|
||||
from modelscope.msdatasets import MsDataset
|
||||
from modelscope.pipelines import pipeline
|
||||
from modelscope.swift import Swift
|
||||
from modelscope.swift.adapter import AdapterConfig
|
||||
from modelscope.trainers import build_trainer
|
||||
from modelscope.utils.constant import ModelFile, Tasks
|
||||
from modelscope.utils.test_utils import test_level
|
||||
|
||||
|
||||
class TestAdapter(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
|
||||
self.tmp_dir = tempfile.TemporaryDirectory().name
|
||||
if not os.path.exists(self.tmp_dir):
|
||||
os.makedirs(self.tmp_dir)
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.tmp_dir)
|
||||
super().tearDown()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip in this level')
|
||||
def test_adapter_smoke_test(self):
|
||||
dataset = MsDataset.load(
|
||||
'clue', subset_name='afqmc',
|
||||
split='train').to_hf_dataset().select(range(2))
|
||||
|
||||
model_dir = snapshot_download(
|
||||
'damo/nlp_structbert_sentence-similarity_chinese-tiny')
|
||||
model = Model.from_pretrained(model_dir, adv_grad_factor=None)
|
||||
|
||||
cfg_file = os.path.join(model_dir, 'configuration.json')
|
||||
|
||||
model_cfg = os.path.join(model_dir, 'config.json')
|
||||
model_cfg = read_config(model_cfg)
|
||||
|
||||
adapter_config = AdapterConfig(
|
||||
dim=model_cfg.hidden_size,
|
||||
module_name=r'.*layer\.\d+$',
|
||||
method_name='feed_forward_chunk',
|
||||
hidden_pos=0)
|
||||
model = Swift.prepare_model(model, adapter_config)
|
||||
kwargs = dict(
|
||||
model=model,
|
||||
cfg_file=cfg_file,
|
||||
train_dataset=dataset,
|
||||
eval_dataset=dataset,
|
||||
work_dir=self.tmp_dir)
|
||||
|
||||
trainer = build_trainer(default_args=kwargs)
|
||||
trainer.train()
|
||||
output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR)
|
||||
|
||||
def pipeline_sentence_similarity(model_dir):
|
||||
model = Model.from_pretrained(model_dir)
|
||||
adapter_config.pretrained_weights = output_dir
|
||||
Swift.prepare_model(model, adapter_config)
|
||||
model.eval()
|
||||
pipeline_ins = pipeline(
|
||||
task=Tasks.sentence_similarity, model=model)
|
||||
return pipeline_ins(input=('test', 'this is a test'))
|
||||
|
||||
output1 = pipeline_sentence_similarity(
|
||||
'damo/nlp_structbert_sentence-similarity_chinese-tiny')
|
||||
print(output1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -11,9 +11,10 @@ from modelscope.hub.snapshot_download import snapshot_download
|
||||
from modelscope.models.base import Model
|
||||
from modelscope.msdatasets import MsDataset
|
||||
from modelscope.pipelines import pipeline
|
||||
from modelscope.swift import Swift
|
||||
from modelscope.swift.lora import (Linear, LoRA, LoRAConfig,
|
||||
mark_only_lora_as_trainable)
|
||||
from modelscope.trainers import build_trainer
|
||||
from modelscope.tuners.lora import (Linear, LoRATuner,
|
||||
mark_only_lora_as_trainable)
|
||||
from modelscope.utils.constant import ModelFile, Tasks
|
||||
from modelscope.utils.test_utils import test_level
|
||||
|
||||
@@ -66,22 +67,18 @@ class TestLora(unittest.TestCase):
|
||||
|
||||
model_dir = snapshot_download(
|
||||
'damo/nlp_structbert_sentence-similarity_chinese-tiny')
|
||||
model = Model.from_pretrained(
|
||||
'damo/nlp_structbert_sentence-similarity_chinese-tiny',
|
||||
adv_grad_factor=None)
|
||||
model = Model.from_pretrained(model_dir, adv_grad_factor=None)
|
||||
|
||||
cfg_file = os.path.join(model_dir, 'configuration.json')
|
||||
lora_config = LoRAConfig(replace_modules=['query', 'key', 'value'])
|
||||
model = Swift.prepare_model(model, lora_config)
|
||||
|
||||
kwargs = dict(
|
||||
model=model,
|
||||
cfg_file=cfg_file,
|
||||
train_dataset=dataset,
|
||||
eval_dataset=dataset,
|
||||
work_dir=self.tmp_dir,
|
||||
efficient_tuners=[{
|
||||
'type': 'lora',
|
||||
'replace_modules': ['query', 'key', 'value']
|
||||
}])
|
||||
work_dir=self.tmp_dir)
|
||||
|
||||
trainer = build_trainer(default_args=kwargs)
|
||||
trainer.train()
|
||||
@@ -89,7 +86,8 @@ class TestLora(unittest.TestCase):
|
||||
|
||||
def pipeline_sentence_similarity(model_dir):
|
||||
model = Model.from_pretrained(model_dir)
|
||||
LoRATuner.tune(model, replace_modules=['query', 'key', 'value'])
|
||||
lora_config.pretrained_weights = output_dir
|
||||
Swift.prepare_model(model, lora_config)
|
||||
model.load_state_dict(
|
||||
torch.load(os.path.join(output_dir, 'pytorch_model.bin')))
|
||||
model.eval()
|
||||
@@ -100,7 +98,7 @@ class TestLora(unittest.TestCase):
|
||||
output1 = pipeline_sentence_similarity(
|
||||
'damo/nlp_structbert_sentence-similarity_chinese-tiny')
|
||||
|
||||
LoRATuner.unpatch_lora(model, ['query', 'key', 'value'])
|
||||
LoRA.unpatch_lora(model, lora_config)
|
||||
model.save_pretrained(
|
||||
output_dir, save_checkpoint_names='pytorch_model.bin')
|
||||
|
||||
|
||||
83
tests/tuners/test_prompt.py
Normal file
83
tests/tuners/test_prompt.py
Normal file
@@ -0,0 +1,83 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from modelscope import read_config
|
||||
from modelscope.hub.snapshot_download import snapshot_download
|
||||
from modelscope.models.base import Model
|
||||
from modelscope.msdatasets import MsDataset
|
||||
from modelscope.pipelines import pipeline
|
||||
from modelscope.swift import Swift
|
||||
from modelscope.swift.adapter import AdapterConfig
|
||||
from modelscope.swift.prompt import PromptConfig
|
||||
from modelscope.trainers import build_trainer
|
||||
from modelscope.utils.constant import ModelFile, Tasks
|
||||
from modelscope.utils.test_utils import test_level
|
||||
|
||||
|
||||
class TestPrompt(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
|
||||
self.tmp_dir = tempfile.TemporaryDirectory().name
|
||||
if not os.path.exists(self.tmp_dir):
|
||||
os.makedirs(self.tmp_dir)
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.tmp_dir)
|
||||
super().tearDown()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip in this level')
|
||||
def test_prompt_smoke_test(self):
|
||||
dataset = MsDataset.load(
|
||||
'clue', subset_name='afqmc',
|
||||
split='train').to_hf_dataset().select(range(2))
|
||||
|
||||
model_dir = snapshot_download(
|
||||
'damo/nlp_structbert_sentence-similarity_chinese-tiny')
|
||||
model = Model.from_pretrained(model_dir, adv_grad_factor=None)
|
||||
|
||||
cfg_file = os.path.join(model_dir, 'configuration.json')
|
||||
model_cfg = os.path.join(model_dir, 'config.json')
|
||||
model_cfg = read_config(model_cfg)
|
||||
|
||||
prompt_config = PromptConfig(
|
||||
dim=model_cfg.hidden_size,
|
||||
module_layer_name=r'.*layer\.\d+$',
|
||||
embedding_pos=0,
|
||||
attention_mask_pos=1)
|
||||
|
||||
model = Swift.prepare_model(model, prompt_config)
|
||||
|
||||
kwargs = dict(
|
||||
model=model,
|
||||
cfg_file=cfg_file,
|
||||
train_dataset=dataset,
|
||||
eval_dataset=dataset,
|
||||
work_dir=self.tmp_dir)
|
||||
|
||||
trainer = build_trainer(default_args=kwargs)
|
||||
trainer.train()
|
||||
output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR)
|
||||
|
||||
def pipeline_sentence_similarity(model_dir):
|
||||
model = Model.from_pretrained(model_dir)
|
||||
prompt_config.pretrained_weights = output_dir
|
||||
Swift.prepare_model(model, prompt_config)
|
||||
model.eval()
|
||||
pipeline_ins = pipeline(
|
||||
task=Tasks.sentence_similarity, model=model)
|
||||
return pipeline_ins(input=('test', 'this is a test'))
|
||||
|
||||
output1 = pipeline_sentence_similarity(
|
||||
'damo/nlp_structbert_sentence-similarity_chinese-tiny')
|
||||
print(output1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -35,7 +35,7 @@ class AstScaningTest(unittest.TestCase):
|
||||
def test_ast_scaning_class(self):
|
||||
astScaner = AstScanning()
|
||||
pipeline_file = os.path.join(MODELSCOPE_PATH, 'pipelines', 'nlp',
|
||||
'text_generation_pipeline.py')
|
||||
'fill_mask_pipeline.py')
|
||||
output = astScaner.generate_ast(pipeline_file)
|
||||
self.assertTrue(output['imports'] is not None)
|
||||
self.assertTrue(output['from_imports'] is not None)
|
||||
@@ -45,24 +45,19 @@ class AstScaningTest(unittest.TestCase):
|
||||
self.assertIsInstance(imports, dict)
|
||||
self.assertIsInstance(from_imports, dict)
|
||||
self.assertIsInstance(decorators, list)
|
||||
self.assertListEqual(
|
||||
list(set(imports.keys()) - set(['torch', 'os'])), [])
|
||||
self.assertEqual(len(from_imports.keys()), 11)
|
||||
self.assertListEqual(list(set(imports.keys()) - set(['numpy'])), [])
|
||||
self.assertEqual(len(from_imports.keys()), 8)
|
||||
self.assertTrue(from_imports['modelscope.metainfo'] is not None)
|
||||
self.assertEqual(from_imports['modelscope.metainfo'], ['Pipelines'])
|
||||
self.assertEqual(
|
||||
decorators,
|
||||
[('PIPELINES', 'text-generation', 'text-generation'),
|
||||
('PIPELINES', 'text2text-generation', 'translation_en_to_de'),
|
||||
('PIPELINES', 'text2text-generation', 'translation_en_to_ro'),
|
||||
('PIPELINES', 'text2text-generation', 'translation_en_to_fr'),
|
||||
('PIPELINES', 'text2text-generation', 'text2text-generation')])
|
||||
self.assertEqual(decorators,
|
||||
[('PIPELINES', 'fill-mask', 'fill-mask'),
|
||||
('PIPELINES', 'fill-mask', 'fill-mask-ponet')])
|
||||
|
||||
def test_files_scaning_method(self):
|
||||
fileScaner = FilesAstScanning()
|
||||
# case of pass in files directly
|
||||
pipeline_file = os.path.join(MODELSCOPE_PATH, 'pipelines', 'nlp',
|
||||
'text_generation_pipeline.py')
|
||||
'fill_mask_pipeline.py')
|
||||
file_list = [pipeline_file]
|
||||
output = fileScaner.get_files_scan_results(file_list)
|
||||
self.assertTrue(output[INDEX_KEY] is not None)
|
||||
|
||||
Reference in New Issue
Block a user