Merge branch 'master-github' into release/1.7

This commit is contained in:
wenmeng.zwm
2023-07-19 17:50:59 +08:00
44 changed files with 3142 additions and 368 deletions

View File

@@ -1,15 +1,19 @@
import os import os
from modelscope.metainfo import Trainers from modelscope.metainfo import Trainers
from modelscope.msdatasets.audio.asr_dataset import ASRDataset from modelscope.msdatasets.dataset_cls.custom_datasets import ASRDataset
from modelscope.trainers import build_trainer from modelscope.trainers import build_trainer
from modelscope.utils.constant import DownloadMode
def modelscope_finetune(params): def modelscope_finetune(params):
if not os.path.exists(params.output_dir): if not os.path.exists(params.output_dir):
os.makedirs(params.output_dir, exist_ok=True) os.makedirs(params.output_dir, exist_ok=True)
# dataset split ["train", "validation"] # dataset split ["train", "validation"]
ds_dict = ASRDataset.load(params.data_path, namespace='speech_asr') ds_dict = ASRDataset.load(
params.data_path,
namespace='speech_asr',
download_mode=params.download_mode)
kwargs = dict( kwargs = dict(
model=params.model, model=params.model,
data_dir=ds_dict, data_dir=ds_dict,
@@ -36,5 +40,6 @@ if __name__ == '__main__':
# 如果dataset_type="large"batch_bins单位为毫秒 # 如果dataset_type="large"batch_bins单位为毫秒
params.max_epoch = 50 # 最大训练轮数 params.max_epoch = 50 # 最大训练轮数
params.lr = 0.00005 # 设置学习率 params.lr = 0.00005 # 设置学习率
params.download_mode = DownloadMode.FORCE_REDOWNLOAD # 重新下载数据否则设置为默认值DownloadMode.REUSE_DATASET_IF_EXISTS
modelscope_finetune(params) modelscope_finetune(params)

View File

@@ -8,7 +8,6 @@ from text_generation_metric import TextGenerationMetric
from transformers import DataCollatorForSeq2Seq from transformers import DataCollatorForSeq2Seq
from modelscope import snapshot_download from modelscope import snapshot_download
from modelscope.metainfo import Models
from modelscope.models import Model from modelscope.models import Model
from modelscope.msdatasets import MsDataset from modelscope.msdatasets import MsDataset
from modelscope.swift import Swift from modelscope.swift import Swift
@@ -143,6 +142,14 @@ class Chatglm6bArguments(TrainingArgs):
metadata={'help': 'The lora alpha'}, metadata={'help': 'The lora alpha'},
) )
use_amp: int = field(
default=0,
metadata={
'help':
'Whether to use amp(automatic mixed precision) to train the model.'
},
)
args = Chatglm6bArguments(eval_metrics='chatglm').parse_cli() args = Chatglm6bArguments(eval_metrics='chatglm').parse_cli()
print(args) print(args)
@@ -160,6 +167,13 @@ def cfg_modify_fn(cfg):
cfg.merge_from_dict(config) cfg.merge_from_dict(config)
else: else:
cfg = config cfg = config
if args.use_amp:
if not getattr(cfg.train, 'hooks', None):
cfg.train.hooks = []
cfg.train.hooks.append({
'type': 'TorchAMPOptimizerHook',
# Optional loss_scale parameter here.
})
if cfg.train.lr_scheduler.type == 'LinearLR': if cfg.train.lr_scheduler.type == 'LinearLR':
cfg.train.lr_scheduler['total_iters'] = \ cfg.train.lr_scheduler['total_iters'] = \
int(len(train_dataset) / cfg.train.dataloader.batch_size_per_gpu) * cfg.train.max_epochs int(len(train_dataset) / cfg.train.dataloader.batch_size_per_gpu) * cfg.train.max_epochs
@@ -187,15 +201,13 @@ model_config['model'] = ConfigDict({
'type': config['model']['type'], 'type': config['model']['type'],
}) })
if config['model']['type'] == 'chatglm6b': model_config['model']['pre_seq_len'] = args.pre_seq_len
model_config['model']['pre_seq_len'] = args.pre_seq_len model_config['model']['prefix_projection'] = args.prefix_projection
model_config['model']['prefix_projection'] = args.prefix_projection
tokenizer = ChatGLMTokenizer.from_pretrained(model_dir, trust_remote_code=True) tokenizer = ChatGLMTokenizer.from_pretrained(model_dir, trust_remote_code=True)
device_map_kwargs = {} device_map_kwargs = {}
device_kwargs = {} device_kwargs = {}
if args.use_lora != 0: if args.use_lora != 0 and torch.cuda.device_count() > 1:
device_map_kwargs['device_map'] = 'auto' device_map_kwargs['device_map'] = 'auto'
# No placement for model, leave the model to `device_map` # No placement for model, leave the model to `device_map`
device_kwargs['device'] = 'cpu' device_kwargs['device'] = 'cpu'
@@ -231,7 +243,10 @@ if args.use_lora != 0:
rank=args.lora_rank, rank=args.lora_rank,
lora_alpha=args.lora_alpha, lora_alpha=args.lora_alpha,
lora_dropout=args.lora_dropout) lora_dropout=args.lora_dropout)
model = model.bfloat16() if args.use_amp:
model = model.float()
else:
model = model.bfloat16()
Swift.prepare_model(model, lora_config) Swift.prepare_model(model, lora_config)
prefix = args.source_prefix if args.source_prefix is not None else '' prefix = args.source_prefix if args.source_prefix is not None else ''
@@ -334,13 +349,10 @@ def preprocess_function_train(examples):
pad_len = max_seq_length - len(input_ids) pad_len = max_seq_length - len(input_ids)
input_ids = input_ids + [tokenizer.pad_token_id] * pad_len input_ids = input_ids + [tokenizer.pad_token_id] * pad_len
if config['model']['type'] == 'chatglm6b': labels = labels + [tokenizer.pad_token_id] * pad_len
labels = labels + [tokenizer.pad_token_id] * pad_len if args.ignore_pad_token_for_loss:
if args.ignore_pad_token_for_loss: labels = [(lb if lb != tokenizer.pad_token_id else -100)
labels = [(lb if lb != tokenizer.pad_token_id else -100) for lb in labels]
for lb in labels]
else:
labels = labels + [-100] * pad_len
model_inputs['input_ids'].append(input_ids) model_inputs['input_ids'].append(input_ids)
model_inputs['labels'].append(labels) model_inputs['labels'].append(labels)
@@ -372,8 +384,7 @@ data_collator = DataCollatorForSeq2Seq(
padding=False) padding=False)
model.gradient_checkpointing_enable() model.gradient_checkpointing_enable()
if config['model']['type'] == 'chatglm6b': model.enable_input_require_grads()
model.enable_input_require_grads()
# import torch # import torch
# model = torch.nn.DataParallel(model).cuda() # model = torch.nn.DataParallel(model).cuda()

View File

@@ -0,0 +1,26 @@
PRE_SEQ_LEN=128
LR=2e-2
PYTHONPATH=. python examples/pytorch/chatglm6b/finetune.py \
--train_dataset_name AdvertiseGen/train.json \
--val_dataset_name AdvertiseGen/dev.json \
--prompt_column content \
--response_column summary \
--model "ZhipuAI/chatglm2-6b" \
--max_source_length 64 \
--max_target_length 128 \
--per_device_train_batch_size 16 \
--per_device_eval_batch_size 1 \
--train.optimizer.options.cumulative_iters 1 \
--max_epochs 1 \
--save_strategy 'by_step' \
--save_interval 1000 \
--lr $LR \
--eval_strategy "by_step" \
--eval_interval 1000 \
--lr_strategy 'by_step' \
--task 'chat' \
--model.type 'chatglm2-6b' \
--pre_seq_len $PRE_SEQ_LEN \
--quantization_bit 4 \
--work_dir ptuning_adv_target \

View File

@@ -0,0 +1,466 @@
import ast
import datetime as dt
import math
import os
import random
import re
import sys
from dataclasses import dataclass, field
from functools import partial
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import json
import matplotlib.pyplot as plt
import numpy as np
#
import torch
import torch.nn as nn
import torch.optim as optim
from datasets import Dataset as HfDataset
from datasets import concatenate_datasets
from matplotlib.axes import Axes
from matplotlib.figure import Figure
from numpy import ndarray
from tensorboard.backend.event_processing.event_accumulator import \
EventAccumulator
from torch import Tensor
from torch import device as Device
from torch import dtype as Dtype
from torch.nn import Module
from torch.nn.parameter import Parameter
from torch.nn.utils.rnn import pad_sequence
from torch.optim import Optimizer
from torch.optim import lr_scheduler as lrs
from torch.optim.lr_scheduler import _LRScheduler as LRScheduler
from torch.utils.data import Dataset
#
from torchmetrics import Accuracy, MeanMetric
#
from tqdm import tqdm
from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer,
GenerationConfig, HfArgumentParser, TextStreamer)
#
from modelscope import (Model, MsDataset, get_logger, read_config,
snapshot_download)
from modelscope.metrics.base import Metric
from modelscope.metrics.builder import METRICS
from modelscope.models.nlp.chatglm2 import ChatGLM2Tokenizer
from modelscope.msdatasets.dataset_cls.custom_datasets import \
TorchCustomDataset
from modelscope.swift import LoRAConfig, Swift
from modelscope.trainers import EpochBasedTrainer
from modelscope.utils.config import Config, ConfigDict
from modelscope.utils.registry import default_group
#
COLOR, COLOR_S = '#FFE2D9', '#FF7043'
PROMPT = """Human: {instruction}
AI: """
logger = get_logger()
os.environ['TOKENIZERS_PARALLELISM'] = 'true'
#
def _get_version(work_dir: str) -> int:
if os.path.isdir(work_dir):
fnames = os.listdir(work_dir)
else:
fnames = []
v_list = [-1]
for fname in fnames:
m = re.match(r'v(\d+)', fname)
if m is None:
continue
v = m.group(1)
v_list.append(int(v))
return max(v_list) + 1
def get_work_dir(work_dir: str) -> str:
"""add version"""
work_dir = os.path.abspath(work_dir)
version = _get_version(work_dir)
time = dt.datetime.now().strftime('%Y%m%d-%H%M%S')
#
work_dir = os.path.join(work_dir, f'v{version}-{time}')
logger.info(f'work_dir: {work_dir}')
return work_dir
def _format_device(device: Union[List[int], str]) -> Tuple[List[int], str]:
if isinstance(device, list):
device_ids = device
device_str = ','.join([str(d) for d in device])
else:
device_ids = [int(d) for d in device.split(',') if d != '-1']
device_str = device
device_str = device_str.replace(' ', '')
return device_ids, device_str
def select_device(device: Union[List[int], str]) -> Device:
"""Call this function before cuda is initialized.
device: e.g. []: 'cpu', [0], [0, 1, 2]
e.g. '-1': 'cpu', '0', '0,1,2'
"""
if torch.cuda.is_initialized():
logger.warning('CUDA has been initialized! Device selection fails!')
return torch.device('cuda:0')
#
device_ids, device_str = _format_device(device)
#
os.environ['CUDA_VISIBLE_DEVICES'] = device_str
log_s = 'Using device: '
if len(device_ids) == 0:
master_device: str = 'cpu'
log_s += 'cpu'
else:
assert torch.cuda.is_available(
) and torch.cuda.device_count() >= len(device_ids)
master_device = 'cuda:0'
log_s += f'cuda:{device_str}'
logger.info(log_s)
return torch.device(master_device)
def seed_everything(seed: Optional[int] = None, gpu_dtm: bool = False) -> int:
if seed is None:
seed_max = np.iinfo(np.int32).max
seed = random.randint(0, seed_max)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
logger.info(f'Global seed set to {seed}')
if gpu_dtm:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
logger.info(f'Setting deterministic: {True}, benchmark: {False}')
return seed
def get_T_max(dataset_len: int, batch_size: int, max_epochs: int,
drop_last: bool) -> int:
"""Calculate T_max in CosineAnnealingLR"""
if drop_last:
T_max = dataset_len // batch_size
else:
T_max = math.ceil(dataset_len / batch_size)
T_max *= max_epochs
return T_max
def tokenize_function(example: Dict[str, Optional[str]],
tokenizer,
max_length: Optional[int] = 2048) -> Dict[str, Any]:
"""Only applicable to baichuan and chatglm2. Other models need to be tested"""
instruction: str = example['instruction']
input_ = example['input']
if input_ is not None and input_ != '':
# instruction = instruction + '\n'
if input_.startswith('输入:'):
instruction = instruction + input_[3:]
else:
instruction = instruction + input_
output = example['output']
src_text = PROMPT.format(instruction=instruction)
src_input_ids: List[int] = tokenizer(
src_text, return_attention_mask=False,
add_special_tokens=True)['input_ids']
#
tgt_input_ids = []
if output is not None:
tgt_input_ids += tokenizer(
output, return_attention_mask=False,
add_special_tokens=False)['input_ids']
tgt_input_ids += [tokenizer.eos_token_id]
labels = [-100] * len(src_input_ids) + tgt_input_ids
else:
labels = None
input_ids = src_input_ids + tgt_input_ids
#
if max_length is not None:
input_ids = input_ids[-max_length:]
if labels is not None:
labels = labels[-max_length:]
#
return {'input_ids': input_ids, 'labels': labels}
def stat_dataset(dataset: HfDataset) -> None:
"""Statistical analysis was performed on the data set"""
_token_len = []
for d in dataset:
_token_len.append(len(d['input_ids']))
_token_len = np.array(_token_len)
mean = _token_len.mean().item()
std = _token_len.std().item()
min_ = _token_len.min().item()
max_ = _token_len.max().item()
logger.info(
f'Dataset Token Length: {mean:.6f}±{std:.6f}, min={min_:.6f}, max={max_:.6f}, size={_token_len.shape[0]}'
)
def print_example(example: Dict[str, Any], tokenizer) -> None:
input_ids, labels = example['input_ids'], example['labels']
print(f'[INPUT_IDS] {input_ids}')
print(f'[INPUT] {tokenizer.decode(input_ids)}')
print()
print(f'[LABLES_IDS] {labels}')
print(
f'[LABLES] {tokenizer.decode([lb if lb != -100 else 0 for lb in labels])}'
)
def data_collate_fn(batch: List[Dict[str, Any]], tokenizer) -> Dict[str, Any]:
input_ids = [torch.tensor(b['input_ids']) for b in batch]
labels = [torch.tensor(b['labels']) for b in batch]
attention_mask = [
torch.ones(len(input_ids[i]), dtype=torch.int64)
for i in range(len(input_ids))
]
#
input_ids = pad_sequence(
input_ids, batch_first=True, padding_value=tokenizer.pad_token_id)
attention_mask = pad_sequence(
attention_mask, batch_first=True, padding_value=0)
labels = pad_sequence(labels, batch_first=True, padding_value=-100)
return {
'input_ids': input_ids,
'attention_mask': attention_mask,
'labels': labels
}
def print_model_info(model: Module, name: Optional[str] = None) -> None:
if name is None:
name = model.__class__.__name__
#
n_params = sum(p.numel() for p in model.parameters())
n_grads = sum(p.numel() for p in model.parameters() if p.requires_grad)
n_buffers = sum(p.numel() for p in model.buffers())
#
n_params /= 1e6
n_grads /= 1e6
n_buffers /= 1e6
s = [
f'{name}: ',
f'{n_params:.4f}M Params ({n_grads:.4f}M Trainable), ',
f'{n_buffers:.4f}M Buffers',
]
s += '.'
logger.info(''.join(s))
def show_freeze_layers(model: Module, max_lines: int = 20) -> None:
named_p = list(model.named_parameters())
for i, (n, p) in enumerate(named_p):
if i >= max_lines:
logger.info('...')
break
logger.info(f'{n}: requires_grad={p.requires_grad}')
@METRICS.register_module(group_key=default_group, module_name='my_metric')
class MyMetric(Metric):
def __init__(self, vocab_size: int):
self.acc = Accuracy('multiclass', num_classes=vocab_size)
self.loss = MeanMetric()
def add(self, outputs: Dict[str, Any], inputs: Dict[str, Any]) -> None:
loss: Tensor = outputs.loss
self.loss.update(loss)
#
labels: Tensor = inputs['labels']
labels = labels[:, 1:]
labels_mask = labels != -100
logits: Tensor = outputs.logits[:, :-1]
logits = logits[labels_mask].contiguous().view(-1, logits.shape[-1])
pred = logits.argmax(dim=-1)
labels = labels[labels_mask].to(logits.device)
self.acc.update(pred, labels)
def evaluate(self):
return {
'acc': self.acc.compute().item(),
'loss': self.loss.compute().item()
}
def merge(self, other: 'MyMetric') -> None:
"""This script does not support ddp. TODO"""
raise NotImplementedError
def _add_special_token(tokenizer):
if tokenizer.eos_token_id is None:
tokenizer.eos_token_id = 2
if tokenizer.bos_token_id is None:
tokenizer.bos_token_id = 1
if tokenizer.pad_token_id is None:
tokenizer.pad_token_id = 0
logger.info(f'bos_token_id: {tokenizer.bos_token_id}, '
f'eos_token_id: {tokenizer.eos_token_id}, '
f'pad_token_id: {tokenizer.pad_token_id}')
def get_baichuan_model_tokenizer(model_dir: str,
load_model: bool = True,
add_special_token: bool = True):
sys.path.insert(0, model_dir)
model_config = AutoConfig.from_pretrained(
model_dir, trust_remote_code=True)
model_config.torch_dtype = torch.float16
logger.info(f'model_config: {model_config}')
tokenizer = AutoTokenizer.from_pretrained(
model_dir, trust_remote_code=True)
model = None
if load_model:
model = AutoModelForCausalLM.from_pretrained(
model_dir,
config=model_config,
device_map='auto',
torch_dtype=torch.float16,
trust_remote_code=True)
#
if add_special_token:
_add_special_token(tokenizer)
return model, tokenizer
def get_chatglm2_model_tokenizer(model_dir: str,
load_model: bool = True,
add_special_token: bool = True):
config = read_config(model_dir)
config['model'] = ConfigDict({'type': 'chatglm2-6b'})
tokenizer = ChatGLM2Tokenizer.from_pretrained(model_dir)
model = None
if load_model:
model = Model.from_pretrained(
model_dir,
cfg_dict=config,
device_map='auto',
torch_dtype=torch.float16)
if add_special_token:
_add_special_token(tokenizer)
return model, tokenizer
def get_llama2_model_tokenizer(model_dir: str,
load_model: bool = True,
add_special_token: bool = True):
config = AutoConfig.from_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = None
if load_model:
model = AutoModelForCausalLM.from_pretrained(
model_dir,
config=config,
device_map='auto',
torch_dtype=torch.float16,
)
if add_special_token:
_add_special_token(tokenizer)
return model, tokenizer
def get_alpaca_en_zh_dataset(
tokenize_function,
only_val: bool = False,
test_split_p: float = 0.01,
split_seed: int = 42,
data_sample: Optional[int] = None) -> Tuple[HfDataset, HfDataset]:
"""
split: Literal['train', 'validation', None]
"""
dataset_en: HfDataset = MsDataset.load(
'AI-ModelScope/alpaca-gpt4-data-en', split='train').to_hf_dataset()
dataset_zh: HfDataset = MsDataset.load(
'AI-ModelScope/alpaca-gpt4-data-zh', split='train').to_hf_dataset()
dataset_en = dataset_en.remove_columns(['text'])
dataset: HfDataset = concatenate_datasets([dataset_zh, dataset_en])
#
if data_sample is not None:
dataset = dataset.select(range(data_sample))
dataset = dataset.train_test_split(test_split_p, seed=split_seed)
if only_val:
dataset = dataset['test']
if tokenize_function is not None:
dataset = dataset.map(tokenize_function)
dataset = dataset.remove_columns(['instruction', 'input', 'output'])
#
if only_val:
return None, dataset
else:
return dataset['train'], dataset['test']
Item = Dict[str, float]
def read_tensorboard_file(fpath: str) -> Dict[str, List[Item]]:
if not os.path.isfile(fpath):
raise FileNotFoundError(f'fpath: {fpath}')
ea = EventAccumulator(fpath)
ea.Reload()
res = {}
tags = ea.Tags()['scalars']
for tag in tags:
values = ea.Scalars(tag)
r = []
for v in values:
r.append({'step': v.step, 'value': v.value})
res[tag] = r
return res
def tensorboard_smoothing(values: List[float],
smooth: float = 0.9) -> List[float]:
norm_factor = 1
x = 0
res = []
for i in range(len(values)):
x = x * smooth + values[i] # Exponential decay
res.append(x / norm_factor)
#
norm_factor *= smooth
norm_factor += 1
return res
def plot_image(tb_dir: str,
smooth_key: List[str],
smooth_val: float = 0.9,
figsize: Tuple[int, int] = (8, 5),
dpi: int = 100) -> None:
image_dir = os.path.join(os.path.dirname(tb_dir), 'images')
os.makedirs(image_dir, exist_ok=True)
#
fname = os.listdir(tb_dir)[0]
tb_path = os.path.join(tb_dir, fname)
data = read_tensorboard_file(tb_path)
#
for k in data.keys():
_data = data[k]
steps = [d['step'] for d in _data]
values = [d['value'] for d in _data]
if len(values) == 0:
continue
_, ax = plt.subplots(1, 1, squeeze=True, figsize=figsize, dpi=dpi)
ax.set_title(k)
if len(values) == 1:
ax.scatter(steps, values, color=COLOR_S)
elif k in smooth_key:
ax.plot(steps, values, color=COLOR)
values_s = tensorboard_smoothing(values, smooth_val)
ax.plot(steps, values_s, color=COLOR_S)
else:
ax.plot(steps, values, color=COLOR_S)
fpath = os.path.join(image_dir, k.replace('/', '_'))
plt.savefig(fpath, dpi=dpi, bbox_inches='tight')

View File

@@ -0,0 +1,122 @@
# ### Setting up experimental environment.
from _common import *
@dataclass
class Arguments:
device: str = '0' # e.g. '-1'; '0'; '0,1'
model_type: str = field(
default='baichuan-7b',
metadata={
'choices':
['baichuan-7b', 'baichuan-13b', 'chatglm2', 'llama2-7b']
})
ckpt_fpath: str = '' # e.g. '/path/to/your/iter_xxx.pth'
eval_human: bool = False # False: eval test_dataset
data_sample: Optional[int] = None
#
lora_target_modules: Optional[List[str]] = None
lora_rank: int = 8
lora_alpha: int = 32
lora_dropout_p: float = 0.1
#
max_new_tokens: int = 512
temperature: float = 0.9
top_k: int = 50
top_p: float = 0.9
def __post_init__(self):
if self.lora_target_modules is None:
if self.model_type in {'baichuan-7b', 'baichuan-13b'}:
self.lora_target_modules = ['W_pack']
elif self.model_type == 'chatglm2':
self.lora_target_modules = ['query_key_value']
elif self.model_type == 'llama2-7b':
self.lora_target_modules = ['q_proj', 'k_proj', 'v_proj']
else:
raise ValueError(f'model_type: {self.model_type}')
#
if not os.path.isfile(self.ckpt_fpath):
raise ValueError('Please enter a valid fpath')
def parse_args() -> Arguments:
args, = HfArgumentParser([Arguments]).parse_args_into_dataclasses()
return args
args = parse_args()
logger.info(args)
select_device(args.device)
# ### Loading Model and Tokenizer
if args.model_type == 'baichuan-7b':
model_dir = snapshot_download('baichuan-inc/baichuan-7B', 'v1.0.5')
model, tokenizer = get_baichuan_model_tokenizer(model_dir)
elif args.model_type == 'baichuan-13b':
model_dir = snapshot_download('baichuan-inc/Baichuan-13B-Base', 'v1.0.2')
model, tokenizer = get_baichuan_model_tokenizer(model_dir)
elif args.model_type == 'chatglm2':
model_dir = snapshot_download('ZhipuAI/chatglm2-6b', 'v1.0.6')
model, tokenizer = get_chatglm2_model_tokenizer(model_dir)
elif args.model_type == 'llama2-7b':
model_dir = snapshot_download('modelscope/Llama-2-7b-ms', 'v1.0.0')
model, tokenizer = get_llama2_model_tokenizer(model_dir)
else:
raise ValueError(f'model_type: {args.model_type}')
# ### Preparing lora
lora_config = LoRAConfig(
replace_modules=args.lora_target_modules,
rank=args.lora_rank,
lora_alpha=args.lora_alpha,
lora_dropout=args.lora_dropout_p,
pretrained_weights=args.ckpt_fpath)
logger.info(f'lora_config: {lora_config}')
Swift.prepare_model(model, lora_config)
model.bfloat16() # Consistent with training
# ### Inference
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_config = GenerationConfig(
max_new_tokens=args.max_new_tokens,
temperature=args.temperature,
top_k=args.top_k,
top_p=args.top_p,
do_sample=True,
pad_token_id=tokenizer.eos_token_id)
logger.info(generation_config)
def inference(data: Dict[str, Optional[str]]) -> str:
input_ids = tokenize_function(data, tokenizer)['input_ids']
print(f'[TEST]{tokenizer.decode(input_ids)}', end='')
input_ids = torch.tensor(input_ids)[None].cuda()
attention_mask = torch.ones_like(input_ids)
generate_ids = model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
streamer=streamer,
generation_config=generation_config)
output_text = tokenizer.decode(generate_ids[0])
return output_text
if args.eval_human:
while True:
instruction = input('<<< ')
data = {'instruction': instruction, 'input': None, 'output': None}
inference(data)
print('-' * 80)
else:
_, test_dataset = get_alpaca_en_zh_dataset(
None, True, split_seed=42, data_sample=None)
mini_test_dataset = test_dataset.select(range(10))
for data in mini_test_dataset:
output = data['output']
data['output'] = None
inference(data)
print()
print(f'[LABELS]{output}')
print('-' * 80)
# input('next[ENTER]')

View File

@@ -0,0 +1,237 @@
# ### Setting up experimental environment.
"""
pip install modelscope
pip install numpy pandas matplotlib scikit-learn
pip install transformers datasets
conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
pip install tqdm tensorboard torchmetrics sentencepiece charset_normalizer
pip install accelerate transformers_stream_generator
pip install numpy -U # Resolve torchmetrics dependencies and update numpy
"""
from _common import *
@dataclass
class Arguments:
device: str = '0,1' # e.g. '-1'; '0'; '0,1'
seed: int = 42
model_type: str = field(
default='baichuan-7b',
metadata={
'choices':
['baichuan-7b', 'baichuan-13b', 'chatglm2', 'llama2-7b']
})
data_sample: Optional[int] = None
#
lora_target_modules: Optional[List[str]] = None
lora_rank: int = 8
lora_alpha: int = 32
lora_dropout_p: float = 0.1
#
gradient_checkpoint: bool = True
batch_size: int = 1
max_epochs: int = 1
eval_interval: int = 500
learning_rate: float = 1e-4
weight_decay: float = 0.01
n_accumulate_grad: int = 16
grad_clip_norm: float = 1.
warmup_iters: int = 200
last_max_checkpoint_num: int = 1
best_max_checkpoint_num: int = 1
#
logging_interval: int = 5
tb_interval: int = 5
def __post_init__(self):
if self.lora_target_modules is None:
if self.model_type in {'baichuan-7b', 'baichuan-13b'}:
self.lora_target_modules = ['W_pack']
elif self.model_type == 'chatglm2':
self.lora_target_modules = ['query_key_value']
elif self.model_type == 'llama2-7b':
self.lora_target_modules = ['q_proj', 'k_proj', 'v_proj']
else:
raise ValueError(f'model_type: {self.model_type}')
def parse_args() -> Arguments:
args, = HfArgumentParser([Arguments]).parse_args_into_dataclasses()
return args
args = parse_args()
logger.info(args)
select_device(args.device)
seed_everything(args.seed)
# ### Loading Model and Tokenizer
if args.model_type == 'baichuan-7b':
model_dir = snapshot_download('baichuan-inc/baichuan-7B', 'v1.0.5')
model, tokenizer = get_baichuan_model_tokenizer(model_dir)
elif args.model_type == 'baichuan-13b':
model_dir = snapshot_download('baichuan-inc/Baichuan-13B-Base', 'v1.0.2')
model, tokenizer = get_baichuan_model_tokenizer(model_dir)
elif args.model_type == 'chatglm2':
model_dir = snapshot_download('ZhipuAI/chatglm2-6b', 'v1.0.6')
model, tokenizer = get_chatglm2_model_tokenizer(model_dir)
elif args.model_type == 'llama2-7b':
model_dir = snapshot_download('modelscope/Llama-2-7b-ms', 'v1.0.0')
model, tokenizer = get_llama2_model_tokenizer(model_dir)
else:
raise ValueError(f'model_type: {args.model_type}')
#
if args.gradient_checkpoint:
# baichuan13B does not implement the `get_input_embeddings` function
if args.model_type == 'baichuan-13b':
def get_input_embeddings(self):
return self.model.embed_tokens
model.__class__.get_input_embeddings = get_input_embeddings.__get__(
model)
model.gradient_checkpointing_enable()
model.enable_input_require_grads()
# ### Preparing lora
lora_config = LoRAConfig(
replace_modules=args.lora_target_modules,
rank=args.lora_rank,
lora_alpha=args.lora_alpha,
lora_dropout=args.lora_dropout_p)
logger.info(f'lora_config: {lora_config}')
Swift.prepare_model(model, lora_config)
#
show_freeze_layers(model)
print_model_info(model)
_p: Parameter = list(model.parameters())[100]
logger.info(f'device: {_p.device}, dtype: {_p.dtype}')
model.bfloat16()
# ### Loading Dataset
tokenize_function = partial(tokenize_function, tokenizer=tokenizer)
train_dataset, val_dataset = get_alpaca_en_zh_dataset(
tokenize_function, split_seed=42, data_sample=args.data_sample)
# Data analysis
stat_dataset(train_dataset)
stat_dataset(val_dataset)
data_collate_fn = partial(data_collate_fn, tokenizer=tokenizer)
print_example(train_dataset[0], tokenizer)
# ### Setting Config
cfg_file = os.path.join(model_dir, 'configuration.json')
#
T_max = get_T_max(len(train_dataset), args.batch_size, args.max_epochs, True)
work_dir = get_work_dir(f'runs/{args.model_type}')
config = Config({
'train': {
'dataloader': {
'batch_size_per_gpu': args.batch_size,
'workers_per_gpu': 1,
'shuffle': True,
'drop_last': True,
'pin_memory': True
},
'max_epochs':
args.max_epochs,
'work_dir':
work_dir,
'optimizer': {
'type': 'AdamW',
'lr': args.learning_rate,
'weight_decay': args.weight_decay,
'options': {
'cumulative_iters': args.n_accumulate_grad,
'grad_clip': {
'norm_type': 2,
'max_norm': args.grad_clip_norm
}
}
},
'lr_scheduler': {
'type': 'CosineAnnealingLR',
'T_max': T_max,
'eta_min': 0,
'options': {
'by_epoch': False,
'warmup': {
'type': 'LinearWarmup',
'warmup_ratio': 0.1,
'warmup_iters': args.warmup_iters
}
}
},
'hooks': [
{
'type': 'CheckpointHook',
'by_epoch': False,
'interval': args.eval_interval,
'max_checkpoint_num': args.last_max_checkpoint_num
},
{
'type': 'EvaluationHook',
'by_epoch': False,
'interval': args.eval_interval
},
{
'type': 'BestCkptSaverHook',
'metric_key': 'loss',
'save_best': True,
'rule': 'min',
'max_checkpoint_num': args.best_max_checkpoint_num
},
{
'type': 'TextLoggerHook',
'by_epoch': True, # Whether EpochBasedTrainer is used
'interval': args.logging_interval
},
{
'type': 'TensorboardHook',
'by_epoch': False,
'interval': args.tb_interval
}
]
},
'evaluation': {
'dataloader': {
'batch_size_per_gpu': args.batch_size,
'workers_per_gpu': 1,
'shuffle': False,
'drop_last': False,
'pin_memory': True
},
'metrics': [{
'type': 'my_metric',
'vocab_size': tokenizer.vocab_size
}]
}
})
# ### Finetuning
def cfg_modify_fn(cfg: Config) -> Config:
cfg.update(config)
return cfg
trainer = EpochBasedTrainer(
model=model,
cfg_file=cfg_file,
data_collator=data_collate_fn,
train_dataset=train_dataset,
eval_dataset=val_dataset,
remove_unused_data=True,
seed=42,
device='cpu', # No placement for model, leave the model to `device_map`
cfg_modify_fn=cfg_modify_fn,
)
trainer.train()
# ### Visualization
tb_dir = os.path.join(work_dir, 'tensorboard_output')
plot_image(tb_dir, ['loss'], 0.9)

View File

@@ -0,0 +1,5 @@
python llm_infer.py \
--device 0 \
--model_type llama2-7b \
--ckpt_fpath "runs/llama2-7b/vx_xxx/output_best/pytorch_model.bin" \
--eval_human true

View File

@@ -0,0 +1,8 @@
#!/bin/bash
DATE=$(date +"%Y%m%d-%H%M%S")
nohup python llm_sft.py \
--device 0 \
--model_type llama2-7b \
--data_sample 25000 \
&> train_$DATE.out &

View File

@@ -49,11 +49,9 @@ from modelscope.utils.config import Config, ConfigDict
from modelscope.utils.registry import default_group from modelscope.utils.registry import default_group
# #
SYSTEM_TEXT = """{system}""" PROMPT = """System: {system}
USER_TEXT = """\n\n### 用户 Human: {user}
{user}""" AI: """
ASSISTANT_PROMPT = """\n\n### 助手
"""
MAX_LENGTH = 2048 MAX_LENGTH = 2048
TEST_MAX_LENGTH = MAX_LENGTH TEST_MAX_LENGTH = MAX_LENGTH
@@ -62,11 +60,6 @@ logger = get_logger()
# #
def get_model_dir(model_id: str, model_revision: Optional[str] = None) -> str:
model_dir = snapshot_download(model_id, model_revision)
return model_dir
def _get_version(work_dir: str) -> int: def _get_version(work_dir: str) -> int:
if os.path.isdir(work_dir): if os.path.isdir(work_dir):
fnames = os.listdir(work_dir) fnames = os.listdir(work_dir)
@@ -93,28 +86,40 @@ def get_work_dir(work_dir: str) -> str:
return work_dir return work_dir
def select_device(device_ids: List[int]) -> Device: def _format_device(device: Union[List[int], str]) -> Tuple[List[int], str]:
if isinstance(device, list):
device_ids = device
device_str = ','.join([str(d) for d in device])
else:
device_ids = [int(d) for d in device.split(',') if d != '-1']
device_str = device
device_str = device_str.replace(' ', '')
return device_ids, device_str
def select_device(device: Union[List[int], str]) -> Device:
"""Call this function before cuda is initialized. """Call this function before cuda is initialized.
Return: master device device: e.g. []: 'cpu', [0], [0, 1, 2]
e.g. '-1': 'cpu', '0', '0,1,2'
""" """
if torch.cuda.is_initialized(): if torch.cuda.is_initialized():
logger.warning('CUDA has been initialized! Device selection fails!') logger.warning('CUDA has been initialized! Device selection fails!')
return torch.device('cuda:0') return torch.device('cuda:0')
# #
device_ids, device_str = _format_device(device)
#
os.environ['CUDA_VISIBLE_DEVICES'] = device_str
log_s = 'Using device: ' log_s = 'Using device: '
if len(device_ids) == 0: # cpu if len(device_ids) == 0:
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' master_device: str = 'cpu'
device: str = 'cpu' log_s += 'cpu'
log_s += device
else: else:
os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(
[str(d) for d in device_ids])
assert torch.cuda.is_available( assert torch.cuda.is_available(
) and torch.cuda.device_count() >= len(device_ids) ) and torch.cuda.device_count() >= len(device_ids)
log_s += f"cuda:{','.join([str(d) for d in device_ids])}" # e.g. "cuda:1,7,8" master_device = 'cuda:0'
device = 'cuda:0' log_s += f'cuda:{device_str}'
logger.info(log_s) logger.info(log_s)
return torch.device(device) return torch.device(master_device)
def seed_everything(seed: Optional[int] = None, gpu_dtm: bool = False) -> int: def seed_everything(seed: Optional[int] = None, gpu_dtm: bool = False) -> int:
@@ -148,37 +153,27 @@ def get_T_max(dataset_len: int, batch_size: int, max_epochs: int,
def tokenize_function(system: str, user: str, assistant: Optional[str], def tokenize_function(system: str, user: str, assistant: Optional[str],
tokenizer) -> Dict[str, Any]: tokenizer) -> Dict[str, Any]:
"""Only applicable to baichuan and chatglm2. Other models need to be tested""" """Only applicable to baichuan and chatglm2. Other models need to be tested"""
system_text = SYSTEM_TEXT.format(system=system) src_text = PROMPT.format(system=system, user=user)
user_text = USER_TEXT.format(user=user) src_input_ids: List[int] = tokenizer(
system_text_ids: List[int] = tokenizer( src_text, return_attention_mask=False,
system_text, return_attention_mask=False,
add_special_tokens=True)['input_ids'] add_special_tokens=True)['input_ids']
user_text_ids: List[int] = tokenizer( #
user_text, return_attention_mask=False, tgt_input_ids: List[int] = []
add_special_tokens=False)['input_ids']
assistant_p_input_ids: List[int] = tokenizer(
ASSISTANT_PROMPT,
return_attention_mask=False,
add_special_tokens=False)['input_ids']
# tokenizer.bos_token_id: Avoid `assistant` being empty
assistant_input_ids: List[int] = [tokenizer.bos_token_id]
if assistant is not None: if assistant is not None:
assistant_input_ids += tokenizer( tgt_input_ids += tokenizer(
assistant, return_attention_mask=False, assistant, return_attention_mask=False,
add_special_tokens=False)['input_ids'] add_special_tokens=False)['input_ids']
assistant_input_ids += [tokenizer.eos_token_id] tgt_input_ids += [tokenizer.eos_token_id]
labels = [-100] * len(src_input_ids) + tgt_input_ids
else:
labels = None
input_ids = src_input_ids + tgt_input_ids
# #
input_ids = system_text_ids + user_text_ids + assistant_p_input_ids + assistant_input_ids if assistant is not None:
if assistant is not None: # train, val
if len(input_ids) > MAX_LENGTH: if len(input_ids) > MAX_LENGTH:
return {} return {}
len_mask = len(input_ids) - len(assistant_input_ids) else:
labels = [-100] * len_mask + assistant_input_ids
else: # test
input_ids = input_ids[-TEST_MAX_LENGTH:] input_ids = input_ids[-TEST_MAX_LENGTH:]
labels = None
# #
return {'input_ids': input_ids, 'labels': labels} return {'input_ids': input_ids, 'labels': labels}
@@ -221,7 +216,7 @@ def print_examples(examples: Dict[str, Any], tokenizer) -> None:
print(f'[INPUT_IDS] {tokenizer.decode(input_ids)}') print(f'[INPUT_IDS] {tokenizer.decode(input_ids)}')
print() print()
print( print(
f'[LABLES] {tokenizer.decode([l if l != -100 else 0 for l in labels])}' f'[LABLES] {tokenizer.decode([lb if lb != -100 else 0 for lb in labels])}'
) )
@@ -305,12 +300,21 @@ class MyMetric(Metric):
raise NotImplementedError raise NotImplementedError
def get_baichuan_model_tokenizer(model_dir: Optional[str] = None, def _add_special_token(tokenizer):
load_model: bool = True): if tokenizer.eos_token_id is None:
if model_dir is None: tokenizer.eos_token_id = 2
model_id = 'baichuan-inc/baichuan-7B' if tokenizer.bos_token_id is None:
model_dir = get_model_dir(model_id, None) tokenizer.bos_token_id = 1
# if tokenizer.pad_token_id is None:
tokenizer.pad_token_id = 0
logger.info(f'bos_token_id: {tokenizer.bos_token_id}, '
f'eos_token_id: {tokenizer.eos_token_id}, '
f'pad_token_id: {tokenizer.pad_token_id}')
def get_baichuan7B_model_tokenizer(model_dir: str,
load_model: bool = True,
add_special_token: bool = True):
sys.path.insert(0, model_dir) sys.path.insert(0, model_dir)
from configuration_baichuan import BaiChuanConfig from configuration_baichuan import BaiChuanConfig
from tokenization_baichuan import BaiChuanTokenizer from tokenization_baichuan import BaiChuanTokenizer
@@ -327,16 +331,14 @@ def get_baichuan_model_tokenizer(model_dir: Optional[str] = None,
device_map='auto', device_map='auto',
torch_dtype=torch.float16) torch_dtype=torch.float16)
# #
if add_special_token:
_add_special_token(tokenizer)
return model, tokenizer return model, tokenizer
def get_chatglm2_model_tokenizer(model_dir: Optional[str] = None, def get_chatglm2_model_tokenizer(model_dir: str,
load_model: bool = True): load_model: bool = True,
if model_dir is None: add_special_token: bool = True):
model_id = 'ZhipuAI/chatglm2-6b'
model_revision = 'v1.0.3'
model_dir = snapshot_download(model_id, model_revision)
#
config = read_config(model_dir) config = read_config(model_dir)
config['model'] = ConfigDict({'type': 'chatglm2-6b'}) config['model'] = ConfigDict({'type': 'chatglm2-6b'})
tokenizer = ChatGLM2Tokenizer.from_pretrained(model_dir) tokenizer = ChatGLM2Tokenizer.from_pretrained(model_dir)
@@ -347,6 +349,8 @@ def get_chatglm2_model_tokenizer(model_dir: Optional[str] = None,
cfg_dict=config, cfg_dict=config,
device_map='auto', device_map='auto',
torch_dtype=torch.float16) torch_dtype=torch.float16)
if add_special_token:
_add_special_token(tokenizer)
return model, tokenizer return model, tokenizer
@@ -355,7 +359,7 @@ def make_dataset(
Dict[str, Any]] Dict[str, Any]]
) -> MyDataset: ) -> MyDataset:
""" """
split: Literal["train", "validation"] split: Literal['train', 'validation']
""" """
dataset = MsDataset.load( dataset = MsDataset.load(
'modelscope/ms_hackathon_23_agent_train_dev', split=split) 'modelscope/ms_hackathon_23_agent_train_dev', split=split)

View File

@@ -16,15 +16,6 @@
"### 配置实验环境" "### 配置实验环境"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# !pip install transformers"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 1,
@@ -62,8 +53,7 @@
"source": [ "source": [
"from _common import *\n", "from _common import *\n",
"from transformers import TextStreamer\n", "from transformers import TextStreamer\n",
"device_ids = list(range(min(4, torch.cuda.device_count())))\n", "device_ids = [0, 1]\n",
"logger.info(device_ids)\n",
"select_device(device_ids)" "select_device(device_ids)"
] ]
}, },
@@ -152,12 +142,11 @@
} }
], ],
"source": [ "source": [
"CKPT_FAPTH = \"/home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/output_best/pytorch_model.bin\"\n", "CKPT_FAPTH = '/home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/output_best/pytorch_model.bin'\n",
"LORA_TARGET_MODULES = [\"W_pack\"]\n", "LORA_TARGET_MODULES = ['W_pack']\n",
"\n", "\n",
"model, tokenizer = get_baichuan_model_tokenizer()\n", "model_dir = snapshot_download('baichuan-inc/baichuan-7B', 'v1.0.5')\n",
"if tokenizer.pad_token_id is None:\n", "model, tokenizer = get_baichuan7B_model_tokenizer(model_dir)\n",
" tokenizer.pad_token_id = tokenizer.eos_token_id\n",
"model.bfloat16() # Consistent with training" "model.bfloat16() # Consistent with training"
] ]
}, },
@@ -225,7 +214,7 @@
" lora_alpha=LORA_ALPHA,\n", " lora_alpha=LORA_ALPHA,\n",
" lora_dropout=LORA_DROPOUT_P,\n", " lora_dropout=LORA_DROPOUT_P,\n",
" pretrained_weights=CKPT_FAPTH)\n", " pretrained_weights=CKPT_FAPTH)\n",
"logger.info(f\"lora_config: {lora_config}\")\n", "logger.info(f'lora_config: {lora_config}')\n",
"Swift.prepare_model(model, lora_config)" "Swift.prepare_model(model, lora_config)"
] ]
}, },
@@ -289,8 +278,8 @@
} }
], ],
"source": [ "source": [
"test_dataset = make_dataset(\"validation\", lambda system, user, assistant:\n", "test_dataset = make_dataset('validation', lambda system, user, assistant:\n",
" {\"system\": system, \"user\": user, \"assistant\": assistant})" " {'system': system, 'user': user, 'assistant': assistant})"
] ]
}, },
{ {
@@ -451,20 +440,21 @@
"source": [ "source": [
"streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n", "streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n",
"for d in test_dataset[:5]:\n", "for d in test_dataset[:5]:\n",
" system = d[\"system\"]\n", " system = d['system']\n",
" user = d[\"user\"]\n", " user = d['user']\n",
" assistant = d[\"assistant\"]\n", " assistant = d['assistant']\n",
" input_ids = tokenize_function(system, user, None, tokenizer)[\"input_ids\"]\n", " input_ids = tokenize_function(system, user, None, tokenizer)['input_ids']\n",
" print(f\"[TEST]{tokenizer.decode(input_ids)}\", end=\"\")\n", " print(f'[TEST]{tokenizer.decode(input_ids)}', end='')\n",
" input_ids = torch.tensor(input_ids)[None].cuda()\n", " input_ids = torch.tensor(input_ids)[None].cuda()\n",
" attention_mask = torch.ones_like(input_ids)\n", " attention_mask = torch.ones_like(input_ids)\n",
" generate_ids = model.generate(input_ids=input_ids, max_new_tokens=512,\n", " generate_ids = model.generate(input_ids=input_ids, max_new_tokens=512,\n",
" attention_mask=attention_mask,\n", " attention_mask=attention_mask,\n",
" streamer=streamer, pad_token_id=tokenizer.pad_token_id)\n", " streamer=streamer, pad_token_id=tokenizer.eos_token_id, \n",
" temperature=0.7, top_k=50, top_p=0.7, do_sample=True)\n",
" print()\n", " print()\n",
" print(f\"[LABELS]{assistant}\")\n", " print(f'[LABELS]{assistant}')\n",
" print(\"-----------------------------------------------------------------------------------\")\n", " print('-----------------------------------------------------------------------------------')\n",
" # input(\"next[ENTER]\")" " # input('next[ENTER]')"
] ]
} }
], ],
@@ -484,7 +474,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.11" "version": "3.10.12"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -33,14 +33,12 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# !pip install modelscope -U\n", "# !pip install modelscope\n",
"# !pip install numpy pandas matplotlib scikit-learn\n", "# !pip install numpy pandas matplotlib scikit-learn\n",
"# !pip install transformers datasets\n", "# !pip install transformers datasets\n",
"# !conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia\n", "# !conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia\n",
"# !pip install tqdm\n", "# !pip install tqdm tensorboard torchmetrics sentencepiece charset_normalizer accelerate\n",
"# !pip install tensorboard\n", "\n",
"# !pip install torchmetrics\n",
"#\n",
"# !pip install numpy -U # Resolve torchmetrics dependencies and update numpy" "# !pip install numpy -U # Resolve torchmetrics dependencies and update numpy"
] ]
}, },
@@ -73,8 +71,7 @@
], ],
"source": [ "source": [
"from _common import *\n", "from _common import *\n",
"device_ids = list(range(min(4, torch.cuda.device_count())))\n", "device_ids = [0, 1]\n",
"logger.info(device_ids)\n",
"select_device(device_ids)\n", "select_device(device_ids)\n",
"_ = seed_everything(42)" "_ = seed_everything(42)"
] ]
@@ -130,22 +127,16 @@
} }
], ],
"source": [ "source": [
"model_id = \"baichuan-inc/baichuan-7B\"\n", "WORK_DIR = 'runs/baichuan'\n",
"WORK_DIR = \"runs/baichuan\"\n", "LORA_TARGET_MODULES = ['W_pack']\n",
"LORA_TARGET_MODULES = [\"W_pack\"]\n",
"#\n", "#\n",
"model_dir = get_model_dir(model_id, None)\n", "model_dir = snapshot_download('baichuan-inc/baichuan-7B', 'v1.0.5')\n",
"model, tokenizer = get_baichuan_model_tokenizer(model_dir)\n", "model, tokenizer = get_baichuan7B_model_tokenizer(model_dir)\n",
"#\n", "#\n",
"GRADIENT_CHECKPOINTING = True\n", "GRADIENT_CHECKPOINTING = True\n",
"if GRADIENT_CHECKPOINTING:\n", "if GRADIENT_CHECKPOINTING:\n",
" model.gradient_checkpointing_enable()\n", " model.gradient_checkpointing_enable()\n",
" model.enable_input_require_grads()\n", " model.enable_input_require_grads()"
"if tokenizer.pad_token_id is None:\n",
" tokenizer.pad_token_id = tokenizer.eos_token_id\n",
"#\n",
"logger.info(f\"bos_token_id: {tokenizer.bos_token_id}, eos_token_id: {tokenizer.eos_token_id}, \"\n",
" f\"pad_token_id: {tokenizer.pad_token_id}\")"
] ]
}, },
{ {
@@ -237,13 +228,13 @@
" rank=LORA_RANK,\n", " rank=LORA_RANK,\n",
" lora_alpha=LORA_ALPHA,\n", " lora_alpha=LORA_ALPHA,\n",
" lora_dropout=LORA_DROPOUT_P)\n", " lora_dropout=LORA_DROPOUT_P)\n",
"logger.info(f\"lora_config: {lora_config}\")\n", "logger.info(f'lora_config: {lora_config}')\n",
"Swift.prepare_model(model, lora_config)\n", "Swift.prepare_model(model, lora_config)\n",
"#\n", "#\n",
"show_freeze_layers(model)\n", "show_freeze_layers(model)\n",
"print_model_info(model)\n", "print_model_info(model)\n",
"_p = list(model.parameters())[100]\n", "_p = list(model.parameters())[100]\n",
"logger.info(f\"device: {_p.device}, dtype: {_p.dtype}\")\n", "logger.info(f'device: {_p.device}, dtype: {_p.dtype}')\n",
"model.bfloat16()" "model.bfloat16()"
] ]
}, },
@@ -308,8 +299,8 @@
], ],
"source": [ "source": [
"tokenize_function = partial(tokenize_function, tokenizer=tokenizer)\n", "tokenize_function = partial(tokenize_function, tokenizer=tokenizer)\n",
"train_dataset = make_dataset(\"train\", tokenize_function)\n", "train_dataset = make_dataset('train', tokenize_function)\n",
"val_dataset = make_dataset(\"validation\", tokenize_function)\n", "val_dataset = make_dataset('validation', tokenize_function)\n",
"# Data analysis\n", "# Data analysis\n",
"stat_dataset(train_dataset)\n", "stat_dataset(train_dataset)\n",
"stat_dataset(val_dataset)\n", "stat_dataset(val_dataset)\n",
@@ -339,7 +330,7 @@
} }
], ],
"source": [ "source": [
"cfg_file = os.path.join(model_dir, \"configuration.json\")\n", "cfg_file = os.path.join(model_dir, 'configuration.json')\n",
"#\n", "#\n",
"BATCH_SIZE = 1\n", "BATCH_SIZE = 1\n",
"MAX_EPOCHS = 1\n", "MAX_EPOCHS = 1\n",
@@ -347,62 +338,62 @@
"WORK_DIR = get_work_dir(WORK_DIR)\n", "WORK_DIR = get_work_dir(WORK_DIR)\n",
"EVAL_INTERVAL = 200\n", "EVAL_INTERVAL = 200\n",
"CONFIG = Config({\n", "CONFIG = Config({\n",
" \"train\": {\n", " 'train': {\n",
" \"dataloader\": {\n", " 'dataloader': {\n",
" \"batch_size_per_gpu\": BATCH_SIZE,\n", " 'batch_size_per_gpu': BATCH_SIZE,\n",
" \"workers_per_gpu\": 1,\n", " 'workers_per_gpu': 1,\n",
" \"shuffle\": True,\n", " 'shuffle': True,\n",
" \"drop_last\": True,\n", " 'drop_last': True,\n",
" \"pin_memory\": True\n", " 'pin_memory': True\n",
" },\n", " },\n",
" \"max_epochs\": MAX_EPOCHS,\n", " 'max_epochs': MAX_EPOCHS,\n",
" \"work_dir\": WORK_DIR,\n", " 'work_dir': WORK_DIR,\n",
" \"optimizer\": {\n", " 'optimizer': {\n",
" \"type\": \"AdamW\",\n", " 'type': 'AdamW',\n",
" \"lr\": 1e-4,\n", " 'lr': 1e-4,\n",
" \"weight_decay\": 0.01,\n", " 'weight_decay': 0.01,\n",
" \"options\": {\n", " 'options': {\n",
" \"cumulative_iters\": 16, \"grad_clip\": {\n", " 'cumulative_iters': 16, 'grad_clip': {\n",
" \"norm_type\": 2,\n", " 'norm_type': 2,\n",
" \"max_norm\": 2.0\n", " 'max_norm': 2.0\n",
" }\n", " }\n",
" }\n", " }\n",
" },\n", " },\n",
" \"lr_scheduler\": {\n", " 'lr_scheduler': {\n",
" \"type\": \"CosineAnnealingLR\",\n", " 'type': 'CosineAnnealingLR',\n",
" \"T_max\": T_max,\n", " 'T_max': T_max,\n",
" \"eta_min\": 1e-5,\n", " 'eta_min': 1e-5,\n",
" \"options\": {\n", " 'options': {\n",
" \"by_epoch\": False,\n", " 'by_epoch': False,\n",
" \"warmup\": {\n", " 'warmup': {\n",
" 'type': 'LinearWarmup',\n", " 'type': 'LinearWarmup',\n",
" 'warmup_ratio': 0.1,\n", " 'warmup_ratio': 0.1,\n",
" \"warmup_iters\": 200\n", " 'warmup_iters': 200\n",
" }\n", " }\n",
" }\n", " }\n",
" },\n", " },\n",
" \"hooks\": [\n", " 'hooks': [\n",
" {\"type\": \"CheckpointHook\", \"by_epoch\": False, \"interval\": EVAL_INTERVAL, \"max_checkpoint_num\": 1},\n", " {'type': 'CheckpointHook', 'by_epoch': False, 'interval': EVAL_INTERVAL, 'max_checkpoint_num': 1},\n",
" {\"type\": \"EvaluationHook\", \"by_epoch\": False, \"interval\": EVAL_INTERVAL},\n", " {'type': 'EvaluationHook', 'by_epoch': False, 'interval': EVAL_INTERVAL},\n",
" {\"type\": \"BestCkptSaverHook\",\n", " {'type': 'BestCkptSaverHook',\n",
" \"metric_key\": \"acc\",\n", " 'metric_key': 'acc',\n",
" \"save_best\": True, \"rule\": \"max\", \"max_checkpoint_num\": 1},\n", " 'save_best': True, 'rule': 'max', 'max_checkpoint_num': 1},\n",
" {\"type\": \"TextLoggerHook\",\n", " {'type': 'TextLoggerHook',\n",
" \"by_epoch\": True, # Whether EpochBasedTrainer is used\n", " 'by_epoch': True, # Whether EpochBasedTrainer is used\n",
" \"interval\": 5},\n", " 'interval': 5},\n",
" {\"type\": \"TensorboardHook\", \"by_epoch\": False, \"interval\": 5}\n", " {'type': 'TensorboardHook', 'by_epoch': False, 'interval': 5}\n",
" ]\n", " ]\n",
" },\n", " },\n",
" \"evaluation\": {\n", " 'evaluation': {\n",
" \"dataloader\": {\n", " 'dataloader': {\n",
" \"batch_size_per_gpu\": BATCH_SIZE,\n", " 'batch_size_per_gpu': BATCH_SIZE,\n",
" \"workers_per_gpu\": 1,\n", " 'workers_per_gpu': 1,\n",
" \"shuffle\": False,\n", " 'shuffle': False,\n",
" \"drop_last\": False,\n", " 'drop_last': False,\n",
" \"pin_memory\": True\n", " 'pin_memory': True\n",
" },\n", " },\n",
" \"metrics\": [\n", " 'metrics': [\n",
" {\"type\": \"my_metric\", \"vocab_size\": tokenizer.vocab_size}\n", " {'type': 'my_metric', 'vocab_size': tokenizer.vocab_size}\n",
" ]\n", " ]\n",
" }\n", " }\n",
"})" "})"
@@ -1778,16 +1769,16 @@
} }
], ],
"source": [ "source": [
"tb_dir = os.path.join(WORK_DIR, \"tensorboard_output\")\n", "tb_dir = os.path.join(WORK_DIR, 'tensorboard_output')\n",
"fname = os.listdir(tb_dir)[0]\n", "fname = os.listdir(tb_dir)[0]\n",
"tb_path = os.path.join(tb_dir, fname)\n", "tb_path = os.path.join(tb_dir, fname)\n",
"#\n", "#\n",
"data = read_tensorboard_file(tb_path)\n", "data = read_tensorboard_file(tb_path)\n",
"print(data.keys())\n", "print(data.keys())\n",
"_ = plot_image(data, \"loss\", 0.9)\n", "_ = plot_image(data, 'loss', 0.9)\n",
"_ = plot_image(data, \"lr\", 0)\n", "_ = plot_image(data, 'lr', 0)\n",
"_ = plot_image(data, \"evaluation/acc\", 0)\n", "_ = plot_image(data, 'evaluation/acc', 0)\n",
"_ = plot_image(data, \"evaluation/loss\", 0)" "_ = plot_image(data, 'evaluation/loss', 0)"
] ]
}, },
{ {

View File

@@ -17,15 +17,6 @@
"The following code is copied from baichuan_infer.ipynb" "The following code is copied from baichuan_infer.ipynb"
] ]
}, },
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# !pip install transformers"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 2,
@@ -63,8 +54,7 @@
"source": [ "source": [
"from _common import *\n", "from _common import *\n",
"from transformers import TextStreamer\n", "from transformers import TextStreamer\n",
"device_ids = list(range(min(4, torch.cuda.device_count())))\n", "device_ids = [0, 1]\n",
"logger.info(device_ids)\n",
"select_device(device_ids)" "select_device(device_ids)"
] ]
}, },
@@ -149,14 +139,11 @@
} }
], ],
"source": [ "source": [
"CKPT_FAPTH = \"/home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/output_best/pytorch_model.bin\"\n", "CKPT_FAPTH = '/home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/output_best/pytorch_model.bin'\n",
"LORA_TARGET_MODULES = [\"query_key_value\"]\n", "LORA_TARGET_MODULES = ['query_key_value']\n",
"\n", "\n",
"model, tokenizer = get_chatglm2_model_tokenizer()\n", "model_dir = snapshot_download('ZhipuAI/chatglm2-6b', 'v1.0.6')\n",
"if tokenizer.eos_token_id is None:\n", "model, tokenizer = get_chatglm2_model_tokenizer(model_dir)\n",
" tokenizer.eos_token_id = tokenizer.pad_token_id\n",
"if tokenizer.bos_token_id is None:\n",
" tokenizer.bos_token_id = 1\n",
"model.bfloat16() # Consistent with training" "model.bfloat16() # Consistent with training"
] ]
}, },
@@ -230,7 +217,7 @@
" lora_alpha=LORA_ALPHA,\n", " lora_alpha=LORA_ALPHA,\n",
" lora_dropout=LORA_DROPOUT_P,\n", " lora_dropout=LORA_DROPOUT_P,\n",
" pretrained_weights=CKPT_FAPTH)\n", " pretrained_weights=CKPT_FAPTH)\n",
"logger.info(f\"lora_config: {lora_config}\")\n", "logger.info(f'lora_config: {lora_config}')\n",
"Swift.prepare_model(model, lora_config)" "Swift.prepare_model(model, lora_config)"
] ]
}, },
@@ -295,8 +282,8 @@
} }
], ],
"source": [ "source": [
"test_dataset = make_dataset(\"validation\", lambda system, user, assistant:\n", "test_dataset = make_dataset('validation', lambda system, user, assistant:\n",
" {\"system\": system, \"user\": user, \"assistant\": assistant})" " {'system': system, 'user': user, 'assistant': assistant})"
] ]
}, },
{ {
@@ -484,20 +471,21 @@
"source": [ "source": [
"streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n", "streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n",
"for d in test_dataset[:5]:\n", "for d in test_dataset[:5]:\n",
" system = d[\"system\"]\n", " system = d['system']\n",
" user = d[\"user\"]\n", " user = d['user']\n",
" assistant = d[\"assistant\"]\n", " assistant = d['assistant']\n",
" input_ids = tokenize_function(system, user, None, tokenizer)[\"input_ids\"]\n", " input_ids = tokenize_function(system, user, None, tokenizer)['input_ids']\n",
" print(f\"[TEST]{tokenizer.decode(input_ids)}\", end=\"\")\n", " print(f'[TEST]{tokenizer.decode(input_ids)}', end='')\n",
" input_ids = torch.tensor(input_ids)[None].cuda()\n", " input_ids = torch.tensor(input_ids)[None].cuda()\n",
" attention_mask = torch.ones_like(input_ids)\n", " attention_mask = torch.ones_like(input_ids)\n",
" generate_ids = model.generate(input_ids=input_ids, max_new_tokens=512,\n", " generate_ids = model.generate(input_ids=input_ids, max_new_tokens=512,\n",
" attention_mask=attention_mask,\n", " attention_mask=attention_mask,\n",
" streamer=streamer, pad_token_id=tokenizer.pad_token_id)\n", " streamer=streamer, pad_token_id=tokenizer.eos_token_id, \n",
" temperature=0.7, top_k=50, top_p=0.7, do_sample=True)\n",
" print()\n", " print()\n",
" print(f\"[LABELS]{assistant}\")\n", " print(f'[LABELS]{assistant}')\n",
" print(\"-----------------------------------------------------------------------------------\")\n", " print('-----------------------------------------------------------------------------------')\n",
" # input(\"next[ENTER]\")" " # input('next[ENTER]')"
] ]
} }
], ],
@@ -517,7 +505,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.11" "version": "3.10.12"
}, },
"orig_nbformat": 4 "orig_nbformat": 4
}, },

View File

@@ -40,20 +40,18 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# !pip install modelscope -U\n", "# !pip install modelscope\n",
"# !pip install numpy pandas matplotlib scikit-learn\n", "# !pip install numpy pandas matplotlib scikit-learn\n",
"# !pip install transformers datasets\n", "# !pip install transformers datasets\n",
"# !conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia\n", "# !conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia\n",
"# !pip install tqdm\n", "# !pip install tqdm tensorboard torchmetrics sentencepiece charset_normalizer accelerate\n",
"# !pip install tensorboard\n", "\n",
"# !pip install torchmetrics\n",
"#\n",
"# !pip install numpy -U # Resolve torchmetrics dependencies and update numpy" "# !pip install numpy -U # Resolve torchmetrics dependencies and update numpy"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 1,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -78,8 +76,7 @@
], ],
"source": [ "source": [
"from _common import *\n", "from _common import *\n",
"device_ids = list(range(min(4, torch.cuda.device_count())))\n", "device_ids = [0, 1]\n",
"logger.info(device_ids)\n",
"select_device(device_ids)\n", "select_device(device_ids)\n",
"_ = seed_everything(42)" "_ = seed_everything(42)"
] ]
@@ -134,26 +131,16 @@
} }
], ],
"source": [ "source": [
"model_id = \"ZhipuAI/chatglm2-6b\"\n", "WORK_DIR = 'runs/chatglm2'\n",
"model_revision = \"v1.0.3\"\n", "LORA_TARGET_MODULES = ['query_key_value']\n",
"WORK_DIR = \"runs/chatglm2\"\n",
"LORA_TARGET_MODULES = [\"query_key_value\"]\n",
"#\n", "#\n",
"model_dir = get_model_dir(model_id, model_revision)\n", "model_dir = snapshot_download('ZhipuAI/chatglm2-6b', 'v1.0.6')\n",
"model, tokenizer = get_chatglm2_model_tokenizer(model_dir)\n", "model, tokenizer = get_chatglm2_model_tokenizer(model_dir)\n",
"# chatglm2 does not support gradient_checkpointing\n", "#\n",
"GRADIENT_CHECKPOINTING = False\n", "GRADIENT_CHECKPOINTING = True\n",
"if GRADIENT_CHECKPOINTING:\n", "if GRADIENT_CHECKPOINTING:\n",
" model.gradient_checkpointing_enable()\n", " model.gradient_checkpointing_enable()\n",
" model.enable_input_require_grads()\n", " model.enable_input_require_grads()"
"logger.info(tokenizer.special_tokens)\n",
"if tokenizer.eos_token_id is None:\n",
" tokenizer.eos_token_id = tokenizer.pad_token_id\n",
"if tokenizer.bos_token_id is None:\n",
" tokenizer.bos_token_id = 1\n",
"#\n",
"logger.info(f\"bos_token_id: {tokenizer.bos_token_id}, eos_token_id: {tokenizer.eos_token_id}, \"\n",
" f\"pad_token_id: {tokenizer.pad_token_id}\")"
] ]
}, },
{ {
@@ -251,13 +238,13 @@
" rank=LORA_RANK,\n", " rank=LORA_RANK,\n",
" lora_alpha=LORA_ALPHA,\n", " lora_alpha=LORA_ALPHA,\n",
" lora_dropout=LORA_DROPOUT_P)\n", " lora_dropout=LORA_DROPOUT_P)\n",
"logger.info(f\"lora_config: {lora_config}\")\n", "logger.info(f'lora_config: {lora_config}')\n",
"Swift.prepare_model(model, lora_config)\n", "Swift.prepare_model(model, lora_config)\n",
"#\n", "#\n",
"show_freeze_layers(model)\n", "show_freeze_layers(model)\n",
"print_model_info(model)\n", "print_model_info(model)\n",
"_p = list(model.parameters())[100]\n", "_p = list(model.parameters())[100]\n",
"logger.info(f\"device: {_p.device}, dtype: {_p.dtype}\")\n", "logger.info(f'device: {_p.device}, dtype: {_p.dtype}')\n",
"model.bfloat16()" "model.bfloat16()"
] ]
}, },
@@ -399,8 +386,8 @@
], ],
"source": [ "source": [
"tokenize_function = partial(tokenize_function, tokenizer=tokenizer)\n", "tokenize_function = partial(tokenize_function, tokenizer=tokenizer)\n",
"train_dataset = make_dataset(\"train\", tokenize_function)\n", "train_dataset = make_dataset('train', tokenize_function)\n",
"val_dataset = make_dataset(\"validation\", tokenize_function)\n", "val_dataset = make_dataset('validation', tokenize_function)\n",
"# Data analysis\n", "# Data analysis\n",
"stat_dataset(train_dataset)\n", "stat_dataset(train_dataset)\n",
"stat_dataset(val_dataset)\n", "stat_dataset(val_dataset)\n",
@@ -431,7 +418,7 @@
} }
], ],
"source": [ "source": [
"cfg_file = os.path.join(model_dir, \"configuration.json\")\n", "cfg_file = os.path.join(model_dir, 'configuration.json')\n",
"#\n", "#\n",
"BATCH_SIZE = 1\n", "BATCH_SIZE = 1\n",
"MAX_EPOCHS = 1\n", "MAX_EPOCHS = 1\n",
@@ -439,62 +426,62 @@
"WORK_DIR = get_work_dir(WORK_DIR)\n", "WORK_DIR = get_work_dir(WORK_DIR)\n",
"EVAL_INTERVAL = 200\n", "EVAL_INTERVAL = 200\n",
"CONFIG = Config({\n", "CONFIG = Config({\n",
" \"train\": {\n", " 'train': {\n",
" \"dataloader\": {\n", " 'dataloader': {\n",
" \"batch_size_per_gpu\": BATCH_SIZE,\n", " 'batch_size_per_gpu': BATCH_SIZE,\n",
" \"workers_per_gpu\": 1,\n", " 'workers_per_gpu': 1,\n",
" \"shuffle\": True,\n", " 'shuffle': True,\n",
" \"drop_last\": True,\n", " 'drop_last': True,\n",
" \"pin_memory\": True\n", " 'pin_memory': True\n",
" },\n", " },\n",
" \"max_epochs\": MAX_EPOCHS,\n", " 'max_epochs': MAX_EPOCHS,\n",
" \"work_dir\": WORK_DIR,\n", " 'work_dir': WORK_DIR,\n",
" \"optimizer\": {\n", " 'optimizer': {\n",
" \"type\": \"AdamW\",\n", " 'type': 'AdamW',\n",
" \"lr\": 1e-4,\n", " 'lr': 1e-4,\n",
" \"weight_decay\": 0.01,\n", " 'weight_decay': 0.01,\n",
" \"options\": {\n", " 'options': {\n",
" \"cumulative_iters\": 16, \"grad_clip\": {\n", " 'cumulative_iters': 16, 'grad_clip': {\n",
" \"norm_type\": 2,\n", " 'norm_type': 2,\n",
" \"max_norm\": 2.0\n", " 'max_norm': 2.0\n",
" }\n", " }\n",
" }\n", " }\n",
" },\n", " },\n",
" \"lr_scheduler\": {\n", " 'lr_scheduler': {\n",
" \"type\": \"CosineAnnealingLR\",\n", " 'type': 'CosineAnnealingLR',\n",
" \"T_max\": T_max,\n", " 'T_max': T_max,\n",
" \"eta_min\": 1e-5,\n", " 'eta_min': 1e-5,\n",
" \"options\": {\n", " 'options': {\n",
" \"by_epoch\": False,\n", " 'by_epoch': False,\n",
" \"warmup\": {\n", " 'warmup': {\n",
" 'type': 'LinearWarmup',\n", " 'type': 'LinearWarmup',\n",
" 'warmup_ratio': 0.1,\n", " 'warmup_ratio': 0.1,\n",
" \"warmup_iters\": 200\n", " 'warmup_iters': 200\n",
" }\n", " }\n",
" }\n", " }\n",
" },\n", " },\n",
" \"hooks\": [\n", " 'hooks': [\n",
" {\"type\": \"CheckpointHook\", \"by_epoch\": False, \"interval\": EVAL_INTERVAL, \"max_checkpoint_num\": 1},\n", " {'type': 'CheckpointHook', 'by_epoch': False, 'interval': EVAL_INTERVAL, 'max_checkpoint_num': 1},\n",
" {\"type\": \"EvaluationHook\", \"by_epoch\": False, \"interval\": EVAL_INTERVAL},\n", " {'type': 'EvaluationHook', 'by_epoch': False, 'interval': EVAL_INTERVAL},\n",
" {\"type\": \"BestCkptSaverHook\",\n", " {'type': 'BestCkptSaverHook',\n",
" \"metric_key\": \"acc\",\n", " 'metric_key': 'acc',\n",
" \"save_best\": True, \"rule\": \"max\", \"max_checkpoint_num\": 1},\n", " 'save_best': True, 'rule': 'max', 'max_checkpoint_num': 1},\n",
" {\"type\": \"TextLoggerHook\",\n", " {'type': 'TextLoggerHook',\n",
" \"by_epoch\": True, # Whether EpochBasedTrainer is used\n", " 'by_epoch': True, # Whether EpochBasedTrainer is used\n",
" \"interval\": 5},\n", " 'interval': 5},\n",
" {\"type\": \"TensorboardHook\", \"by_epoch\": False, \"interval\": 5}\n", " {'type': 'TensorboardHook', 'by_epoch': False, 'interval': 5}\n",
" ]\n", " ]\n",
" },\n", " },\n",
" \"evaluation\": {\n", " 'evaluation': {\n",
" \"dataloader\": {\n", " 'dataloader': {\n",
" \"batch_size_per_gpu\": BATCH_SIZE,\n", " 'batch_size_per_gpu': BATCH_SIZE,\n",
" \"workers_per_gpu\": 1,\n", " 'workers_per_gpu': 1,\n",
" \"shuffle\": False,\n", " 'shuffle': False,\n",
" \"drop_last\": False,\n", " 'drop_last': False,\n",
" \"pin_memory\": True\n", " 'pin_memory': True\n",
" },\n", " },\n",
" \"metrics\": [\n", " 'metrics': [\n",
" {\"type\": \"my_metric\", \"vocab_size\": tokenizer.vocab_size}\n", " {'type': 'my_metric', 'vocab_size': tokenizer.vocab_size}\n",
" ]\n", " ]\n",
" }\n", " }\n",
"})" "})"
@@ -1884,16 +1871,16 @@
} }
], ],
"source": [ "source": [
"tb_dir = os.path.join(WORK_DIR, \"tensorboard_output\")\n", "tb_dir = os.path.join(WORK_DIR, 'tensorboard_output')\n",
"fname = os.listdir(tb_dir)[0]\n", "fname = os.listdir(tb_dir)[0]\n",
"tb_path = os.path.join(tb_dir, fname)\n", "tb_path = os.path.join(tb_dir, fname)\n",
"#\n", "#\n",
"data = read_tensorboard_file(tb_path)\n", "data = read_tensorboard_file(tb_path)\n",
"print(data.keys())\n", "print(data.keys())\n",
"_ = plot_image(data, \"loss\", 0.9)\n", "_ = plot_image(data, 'loss', 0.9)\n",
"_ = plot_image(data, \"lr\", 0)\n", "_ = plot_image(data, 'lr', 0)\n",
"_ = plot_image(data, \"evaluation/acc\", 0)\n", "_ = plot_image(data, 'evaluation/acc', 0)\n",
"_ = plot_image(data, \"evaluation/loss\", 0)" "_ = plot_image(data, 'evaluation/loss', 0)"
] ]
}, },
{ {

View File

@@ -165,6 +165,7 @@ class Models(object):
doc2bot = 'doc2bot' doc2bot = 'doc2bot'
peer = 'peer' peer = 'peer'
llama = 'llama' llama = 'llama'
llama2 = 'llama2'
chatglm_6b = 'chatglm6b' chatglm_6b = 'chatglm6b'
chatglm2_6b = 'chatglm2-6b' chatglm2_6b = 'chatglm2-6b'

View File

@@ -6,6 +6,7 @@ from typing import Callable, List, Optional, Union
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
from diffusers import AutoencoderKL, DDPMScheduler, UNet2DConditionModel from diffusers import AutoencoderKL, DDPMScheduler, UNet2DConditionModel
from packaging import version
from transformers import CLIPTextModel, CLIPTokenizer from transformers import CLIPTextModel, CLIPTokenizer
from modelscope.metainfo import Models from modelscope.metainfo import Models
@@ -34,6 +35,7 @@ class StableDiffusion(TorchModel):
""" """
super().__init__(model_dir, *args, **kwargs) super().__init__(model_dir, *args, **kwargs)
revision = kwargs.pop('revision', None) revision = kwargs.pop('revision', None)
xformers_enable = kwargs.pop('xformers_enable', False)
self.lora_tune = kwargs.pop('lora_tune', False) self.lora_tune = kwargs.pop('lora_tune', False)
self.dreambooth_tune = kwargs.pop('dreambooth_tune', False) self.dreambooth_tune = kwargs.pop('dreambooth_tune', False)
@@ -66,6 +68,18 @@ class StableDiffusion(TorchModel):
self.unet.requires_grad_(False) self.unet.requires_grad_(False)
self.unet = self.unet.to(self.device) self.unet = self.unet.to(self.device)
# xformers accelerate memory efficient attention
if xformers_enable:
import xformers
xformers_version = version.parse(xformers.__version__)
if xformers_version == version.parse('0.0.16'):
logger.warn(
'xFormers 0.0.16 cannot be used for training in some GPUs. '
'If you observe problems during training, please update xFormers to at least 0.0.17.'
)
self.unet.enable_xformers_memory_efficient_attention()
def tokenize_caption(self, captions): def tokenize_caption(self, captions):
""" Convert caption text to token data. """ Convert caption text to token data.

View File

@@ -75,6 +75,7 @@ if TYPE_CHECKING:
DocumentGroundedDialogRerankModel) DocumentGroundedDialogRerankModel)
from .xlm_roberta import XLMRobertaConfig, XLMRobertaModel from .xlm_roberta import XLMRobertaConfig, XLMRobertaModel
from .llama import LlamaForTextGeneration, LlamaConfig, LlamaModel, LlamaTokenizer, LlamaTokenizerFast from .llama import LlamaForTextGeneration, LlamaConfig, LlamaModel, LlamaTokenizer, LlamaTokenizerFast
from .llama2 import Llama2ForTextGeneration, Llama2Config, Llama2Model, Llama2Tokenizer, Llama2TokenizerFast
else: else:
_import_structure = { _import_structure = {
@@ -170,6 +171,10 @@ else:
'LlamaForTextGeneration', 'LlamaConfig', 'LlamaModel', 'LlamaForTextGeneration', 'LlamaConfig', 'LlamaModel',
'LlamaTokenizer', 'LlamaTokenizerFast' 'LlamaTokenizer', 'LlamaTokenizerFast'
], ],
'llama2': [
'Llama2ForTextGeneration', 'Llama2Config', 'Llama2Model',
'Llama2Tokenizer', 'Llama2TokenizerFast'
],
} }
import sys import sys

View File

@@ -1,12 +1,13 @@
""" ChatGLM model configuration """ """ ChatGLM model configuration """
from transformers.configuration_utils import PretrainedConfig from transformers import PretrainedConfig
from transformers.utils import logging from transformers.utils import logging
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
class ChatGLM2Config(PretrainedConfig): class ChatGLM2Config(PretrainedConfig):
model_type = 'chatglm'
def __init__(self, def __init__(self,
num_layers=28, num_layers=28,
@@ -24,7 +25,6 @@ class ChatGLM2Config(PretrainedConfig):
post_layer_norm=True, post_layer_norm=True,
add_bias_linear=False, add_bias_linear=False,
add_qkv_bias=False, add_qkv_bias=False,
interleaved_qkv=False,
bias_dropout_fusion=True, bias_dropout_fusion=True,
multi_query_attention=False, multi_query_attention=False,
multi_query_group_num=1, multi_query_group_num=1,
@@ -32,8 +32,11 @@ class ChatGLM2Config(PretrainedConfig):
attention_softmax_in_fp32=True, attention_softmax_in_fp32=True,
fp32_residual_connection=False, fp32_residual_connection=False,
quantization_bit=0, quantization_bit=0,
pre_seq_len=None,
prefix_projection=False,
**kwargs): **kwargs):
self.num_layers = num_layers self.num_layers = num_layers
self.vocab_size = padded_vocab_size
self.padded_vocab_size = padded_vocab_size self.padded_vocab_size = padded_vocab_size
self.hidden_size = hidden_size self.hidden_size = hidden_size
self.ffn_hidden_size = ffn_hidden_size self.ffn_hidden_size = ffn_hidden_size
@@ -55,4 +58,6 @@ class ChatGLM2Config(PretrainedConfig):
self.attention_softmax_in_fp32 = attention_softmax_in_fp32 self.attention_softmax_in_fp32 = attention_softmax_in_fp32
self.fp32_residual_connection = fp32_residual_connection self.fp32_residual_connection = fp32_residual_connection
self.quantization_bit = quantization_bit self.quantization_bit = quantization_bit
self.pre_seq_len = pre_seq_len
self.prefix_projection = prefix_projection
super().__init__(**kwargs) super().__init__(**kwargs)

View File

@@ -1,11 +1,9 @@
import base64 import base64
import bz2 import bz2
import ctypes import ctypes
from functools import partial
from typing import List from typing import List
import torch import torch
from torch.nn import Linear
from torch.nn.parameter import Parameter from torch.nn.parameter import Parameter
from transformers.utils import logging from transformers.utils import logging

View File

@@ -2,10 +2,9 @@
import copy import copy
import math import math
import re
import sys import sys
import warnings import warnings
from typing import Any, Callable, Dict, List, Optional, Tuple, Union from typing import Any, Callable, Dict, List, Optional, Tuple
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
@@ -22,10 +21,11 @@ from transformers.modeling_outputs import (BaseModelOutputWithPast,
from transformers.modeling_utils import PreTrainedModel from transformers.modeling_utils import PreTrainedModel
from transformers.utils import logging from transformers.utils import logging
from modelscope import Model, TorchModel
from modelscope.metainfo import Models from modelscope.metainfo import Models
from modelscope.models import MODELS, Model, TorchModel
from modelscope.outputs import OutputKeys from modelscope.outputs import OutputKeys
from modelscope.utils.constant import Tasks from modelscope.utils.constant import Tasks
from ... import MODELS
from .configuration import ChatGLM2Config from .configuration import ChatGLM2Config
# flags required to enable jit fusion kernels # flags required to enable jit fusion kernels
@@ -61,17 +61,50 @@ class InvalidScoreLogitsProcessor(LogitsProcessor):
return scores return scores
class PrefixEncoder(torch.nn.Module):
"""
The torch.nn model to encode the prefix
Input shape: (batch-size, prefix-length)
Output shape: (batch-size, prefix-length, 2*layers*hidden)
"""
def __init__(self, config: ChatGLM2Config):
super().__init__()
self.prefix_projection = config.prefix_projection
if self.prefix_projection:
# Use a two-layer MLP to encode the prefix
kv_size = config.num_layers * config.kv_channels * config.multi_query_group_num * 2
self.embedding = torch.nn.Embedding(config.pre_seq_len, kv_size)
self.trans = torch.nn.Sequential(
torch.nn.Linear(kv_size, config.hidden_size), torch.nn.Tanh(),
torch.nn.Linear(config.hidden_size, kv_size))
else:
self.embedding = torch.nn.Embedding(
config.pre_seq_len, config.num_layers * config.kv_channels
* config.multi_query_group_num * 2)
def forward(self, prefix: torch.Tensor):
if self.prefix_projection:
prefix_tokens = self.embedding(prefix)
past_key_values = self.trans(prefix_tokens)
else:
past_key_values = self.embedding(prefix)
return past_key_values
def split_tensor_along_last_dim( def split_tensor_along_last_dim(
tensor: torch.Tensor, tensor: torch.Tensor,
num_partitions: int, num_partitions: int,
contiguous_split_chunks: bool = False, contiguous_split_chunks: bool = False,
) -> List[torch.Tensor]: ) -> List[torch.Tensor]:
"""Split a tensor along its last dimension. """Split a tensor along its last dimension.
Arguments: Arguments:
tensor: input tensor. tensor: input tensor.
num_partitions: number of partitions to split the tensor num_partitions: number of partitions to split the tensor
contiguous_split_chunks: If True, make each chunk contiguous contiguous_split_chunks: If True, make each chunk contiguous
in memory. in memory.
Returns: Returns:
A list of Tensors A list of Tensors
""" """
@@ -92,7 +125,7 @@ class RotaryEmbedding(nn.Module):
def __init__(self, dim, original_impl=False, device=None, dtype=None): def __init__(self, dim, original_impl=False, device=None, dtype=None):
super().__init__() super().__init__()
inv_freq = 1.0 / (10000**( inv_freq = 1.0 / (10000**(
torch.arange(0, dim, 2, device=device, dtype=dtype) / dim)) torch.arange(0, dim, 2, device=device).to(dtype=dtype) / dim))
self.register_buffer('inv_freq', inv_freq) self.register_buffer('inv_freq', inv_freq)
self.dim = dim self.dim = dim
self.original_impl = original_impl self.original_impl = original_impl
@@ -104,6 +137,7 @@ class RotaryEmbedding(nn.Module):
device: torch.device, device: torch.device,
base: int = 10000): base: int = 10000):
"""Enhanced Transformer with Rotary Position Embedding. """Enhanced Transformer with Rotary Position Embedding.
Derived from: https://github.com/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/ Derived from: https://github.com/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/
transformers/rope/__init__.py. MIT License: transformers/rope/__init__.py. MIT License:
https://github.com/labmlai/annotated_deep_learning_paper_implementations/blob/master/license. https://github.com/labmlai/annotated_deep_learning_paper_implementations/blob/master/license.
@@ -325,6 +359,7 @@ class CoreAttention(torch.nn.Module):
class SelfAttention(torch.nn.Module): class SelfAttention(torch.nn.Module):
"""Parallel self-attention layer abstract class. """Parallel self-attention layer abstract class.
Self-attention layer takes input with size [s, b, h] Self-attention layer takes input with size [s, b, h]
and returns output of the same size. and returns output of the same size.
""" """
@@ -421,9 +456,9 @@ class SelfAttention(torch.nn.Module):
self.num_multi_query_groups_per_partition, self.num_multi_query_groups_per_partition,
self.hidden_size_per_attention_head)) self.hidden_size_per_attention_head))
else: else:
new_tensor_shape = mixed_x_layer.size()[:-1] + ( new_tensor_shape = mixed_x_layer.size()[:-1] + \
self.num_attention_heads_per_partition, # noqa (self.num_attention_heads_per_partition, # noqa
3 * self.hidden_size_per_attention_head) # noqa 3 * self.hidden_size_per_attention_head) # noqa
mixed_x_layer = mixed_x_layer.view(*new_tensor_shape) mixed_x_layer = mixed_x_layer.view(*new_tensor_shape)
# [sq, b, np, 3 * hn] --> 3 [sq, b, np, hn] # [sq, b, np, 3 * hn] --> 3 [sq, b, np, hn]
@@ -436,11 +471,11 @@ class SelfAttention(torch.nn.Module):
key_layer = apply_rotary_pos_emb(key_layer, rotary_pos_emb) key_layer = apply_rotary_pos_emb(key_layer, rotary_pos_emb)
# adjust key and value for inference # adjust key and value for inference
if kv_cache is not None:
cache_k, cache_v = kv_cache
key_layer = torch.cat((cache_k, key_layer), dim=0)
value_layer = torch.cat((cache_v, value_layer), dim=0)
if use_cache: if use_cache:
if kv_cache is not None:
cache_k, cache_v = kv_cache
key_layer = torch.cat((cache_k, key_layer), dim=0)
value_layer = torch.cat((cache_v, value_layer), dim=0)
kv_cache = (key_layer, value_layer) kv_cache = (key_layer, value_layer)
else: else:
kv_cache = None kv_cache = None
@@ -487,6 +522,7 @@ def _config_to_kwargs(args):
class MLP(torch.nn.Module): class MLP(torch.nn.Module):
"""MLP. """MLP.
MLP will take the input with h hidden state, project it to 4*h MLP will take the input with h hidden state, project it to 4*h
hidden dimension, perform nonlinear transformation, and project the hidden dimension, perform nonlinear transformation, and project the
state back into h hidden dimension. state back into h hidden dimension.
@@ -530,6 +566,7 @@ class MLP(torch.nn.Module):
class GLMBlock(torch.nn.Module): class GLMBlock(torch.nn.Module):
"""A single transformer layer. """A single transformer layer.
Transformer layer takes input with size [s, b, h] and returns an Transformer layer takes input with size [s, b, h] and returns an
output of the same size. output of the same size.
""" """
@@ -642,6 +679,8 @@ class GLMTransformer(torch.nn.Module):
device=device, device=device,
dtype=config.torch_dtype) dtype=config.torch_dtype)
self.gradient_checkpointing = False
def _get_layer(self, layer_number): def _get_layer(self, layer_number):
return self.layers[layer_number] return self.layers[layer_number]
@@ -657,6 +696,13 @@ class GLMTransformer(torch.nn.Module):
if not kv_caches: if not kv_caches:
kv_caches = [None for _ in range(self.num_layers)] kv_caches = [None for _ in range(self.num_layers)]
presents = () if use_cache else None presents = () if use_cache else None
if self.gradient_checkpointing and self.training:
if use_cache:
logger.warning_once(
'`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...'
)
use_cache = False
all_self_attentions = None all_self_attentions = None
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
for index in range(self.num_layers): for index in range(self.num_layers):
@@ -664,13 +710,18 @@ class GLMTransformer(torch.nn.Module):
all_hidden_states = all_hidden_states + (hidden_states, ) all_hidden_states = all_hidden_states + (hidden_states, )
layer = self._get_layer(index) layer = self._get_layer(index)
if self.gradient_checkpointing and self.training:
hidden_states, kv_cache = layer( layer_ret = torch.utils.checkpoint.checkpoint(
hidden_states, layer, hidden_states, attention_mask, rotary_pos_emb,
attention_mask, kv_caches[index], use_cache)
rotary_pos_emb, else:
kv_cache=kv_caches[index], layer_ret = layer(
use_cache=use_cache) hidden_states,
attention_mask,
rotary_pos_emb,
kv_cache=kv_caches[index],
use_cache=use_cache)
hidden_states, kv_cache = layer_ret
if use_cache: if use_cache:
presents = presents + (kv_cache, ) presents = presents + (kv_cache, )
@@ -724,7 +775,7 @@ class ChatGLMPreTrainedModel(TorchModel, PreTrainedModel):
dim=-1) # noqa dim=-1) # noqa
if padding_mask is not None: if padding_mask is not None:
full_attention_mask = full_attention_mask * padding_mask.unsqueeze( full_attention_mask = full_attention_mask * padding_mask.unsqueeze(
1) # noqa 1)
if not past_length and padding_mask is not None: if not past_length and padding_mask is not None:
full_attention_mask -= padding_mask.unsqueeze(-1) - 1 full_attention_mask -= padding_mask.unsqueeze(-1) - 1
full_attention_mask = (full_attention_mask < 0.5).bool() full_attention_mask = (full_attention_mask < 0.5).bool()
@@ -739,7 +790,7 @@ class ChatGLMPreTrainedModel(TorchModel, PreTrainedModel):
return position_ids return position_ids
def _set_gradient_checkpointing(self, module, value=False): def _set_gradient_checkpointing(self, module, value=False):
if isinstance(module, ChatGLMModel): if isinstance(module, GLMTransformer):
module.gradient_checkpointing = value module.gradient_checkpointing = value
@classmethod @classmethod
@@ -801,6 +852,9 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
if device is not None: if device is not None:
init_kwargs['device'] = device init_kwargs['device'] = device
self.embedding = init_method(Embedding, config, **init_kwargs) self.embedding = init_method(Embedding, config, **init_kwargs)
self.num_layers = config.num_layers
self.multi_query_group_num = config.multi_query_group_num
self.kv_channels = config.kv_channels
# Rotary positional embeddings # Rotary positional embeddings
self.seq_length = config.seq_length self.seq_length = config.seq_length
@@ -821,7 +875,30 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
bias=False, bias=False,
dtype=config.torch_dtype, dtype=config.torch_dtype,
**init_kwargs) **init_kwargs)
self.gradient_checkpointing = False self.pre_seq_len = config.pre_seq_len
self.prefix_projection = config.prefix_projection
if self.pre_seq_len is not None:
for param in self.parameters():
param.requires_grad = False
self.prefix_tokens = torch.arange(self.pre_seq_len).long()
self.prefix_encoder = PrefixEncoder(config)
self.dropout = torch.nn.Dropout(0.1)
def get_input_embeddings(self):
return self.embedding.word_embeddings
def get_prompt(self, batch_size, device, dtype=torch.half):
prefix_tokens = self.prefix_tokens.unsqueeze(0).expand(batch_size,
-1).to(device)
past_key_values = self.prefix_encoder(prefix_tokens).type(dtype)
past_key_values = past_key_values.view(batch_size, self.pre_seq_len,
self.num_layers * 2,
self.multi_query_group_num,
self.kv_channels)
# seq_len, b, nh, hidden_size
past_key_values = self.dropout(past_key_values)
past_key_values = past_key_values.permute([2, 1, 0, 3, 4]).split(2)
return past_key_values
def forward( def forward(
self, self,
@@ -847,6 +924,21 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
if inputs_embeds is None: if inputs_embeds is None:
inputs_embeds = self.embedding(input_ids) inputs_embeds = self.embedding(input_ids)
if self.pre_seq_len is not None:
if past_key_values is None:
past_key_values = self.get_prompt(
batch_size=batch_size,
device=input_ids.device,
dtype=inputs_embeds.dtype)
if attention_mask is not None:
attention_mask = torch.cat(
[
attention_mask.new_ones( # noqa
(batch_size, self.pre_seq_len)),
attention_mask # noqa
], # noqa
dim=-1) # noqa
if full_attention_mask is None: if full_attention_mask is None:
if (attention_mask is not None if (attention_mask is not None
and not attention_mask.all()) or (past_key_values and not attention_mask.all()) or (past_key_values
@@ -923,7 +1015,7 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel):
attention_mask, # noqa attention_mask, # noqa
attention_mask.new_ones( attention_mask.new_ones(
(attention_mask.shape[0], 1)) # noqa (attention_mask.shape[0], 1)) # noqa
], ], # noqa
dim=-1) # noqa dim=-1) # noqa
# update position ids # update position ids
@@ -1032,6 +1124,7 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel):
This function is used to re-order the `past_key_values` cache if [`~PreTrainedModel.beam_search`] or This function is used to re-order the `past_key_values` cache if [`~PreTrainedModel.beam_search`] or
[`~PreTrainedModel.beam_sample`] is called. This is required to match `past_key_values` with the correct [`~PreTrainedModel.beam_sample`] is called. This is required to match `past_key_values` with the correct
beam_idx at every generation step. beam_idx at every generation step.
Output shares the same memory storage as `past`. Output shares the same memory storage as `past`.
""" """
return tuple(( return tuple((
@@ -1048,11 +1141,7 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel):
tokenizer, tokenizer,
query: str, query: str,
history: List[Tuple[str, str]] = None): history: List[Tuple[str, str]] = None):
prompt = '' prompt = tokenizer.build_prompt(query, history=history)
for i, (old_query, response) in enumerate(history):
prompt += '[Round {}]\n\n问:{}\n\n答:{}\n\n'.format(
i + 1, old_query, response)
prompt += '[Round {}]\n\n问:{}\n\n答:'.format(len(history) + 1, query)
inputs = tokenizer([prompt], return_tensors='pt') inputs = tokenizer([prompt], return_tensors='pt')
inputs = inputs.to(self.device) inputs = inputs.to(self.device)
return inputs return inputs
@@ -1080,7 +1169,7 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel):
tokenizer, tokenizer,
query: str, query: str,
history: List[Tuple[str, str]] = None, history: List[Tuple[str, str]] = None,
max_length: int = 2048, max_length: int = 8192,
num_beams=1, num_beams=1,
do_sample=True, do_sample=True,
top_p=0.8, top_p=0.8,
@@ -1115,7 +1204,7 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel):
query: str, query: str,
history: List[Tuple[str, str]] = None, history: List[Tuple[str, str]] = None,
past_key_values=None, past_key_values=None,
max_length: int = 2048, max_length: int = 8192,
do_sample=True, do_sample=True,
top_p=0.8, top_p=0.8,
temperature=0.8, temperature=0.8,
@@ -1142,6 +1231,8 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel):
tokenizer, query, history=history) tokenizer, query, history=history)
if past_key_values is not None: if past_key_values is not None:
past_length = past_key_values[0][0].shape[0] past_length = past_key_values[0][0].shape[0]
if self.transformer.pre_seq_len is not None:
past_length -= self.transformer.pre_seq_len
inputs.position_ids += past_length inputs.position_ids += past_length
attention_mask = inputs.attention_mask attention_mask = inputs.attention_mask
attention_mask = torch.cat( attention_mask = torch.cat(
@@ -1157,12 +1248,13 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel):
outputs, past_key_values = outputs outputs, past_key_values = outputs
outputs = outputs.tolist()[0][len(inputs['input_ids'][0]):] outputs = outputs.tolist()[0][len(inputs['input_ids'][0]):]
response = tokenizer.decode(outputs) response = tokenizer.decode(outputs)
response = self.process_response(response) if response and response[-1] != '<EFBFBD>':
new_history = history + [(query, response)] response = self.process_response(response)
if return_past_key_values: new_history = history + [(query, response)]
yield response, new_history, past_key_values if return_past_key_values:
else: yield response, new_history, past_key_values
yield response, new_history else:
yield response, new_history
@torch.no_grad() @torch.no_grad()
def stream_generate( def stream_generate(
@@ -1295,7 +1387,8 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel):
self.transformer.encoder, self.transformer.encoder,
bits, bits,
empty_init=empty_init, empty_init=empty_init,
device=device) device=device,
**kwargs)
return self return self
def chat(self, input: Dict, tokenizer) -> Dict: def chat(self, input: Dict, tokenizer) -> Dict:

View File

@@ -1,13 +1,10 @@
"""Tokenization classes for ChatGLM."""
import os import os
from typing import Dict, List, Optional, Union from typing import Dict, List, Optional, Union
from sentencepiece import SentencePieceProcessor from sentencepiece import SentencePieceProcessor
from transformers.tokenization_utils import PreTrainedTokenizer from transformers import PreTrainedTokenizer
from transformers.tokenization_utils_base import BatchEncoding, EncodedInput from transformers.tokenization_utils_base import BatchEncoding, EncodedInput
from transformers.utils import PaddingStrategy, logging from transformers.utils import PaddingStrategy
logger = logging.get_logger(__name__)
class SPTokenizer: class SPTokenizer:
@@ -21,7 +18,7 @@ class SPTokenizer:
self.n_words: int = self.sp_model.vocab_size() self.n_words: int = self.sp_model.vocab_size()
self.bos_id: int = self.sp_model.bos_id() self.bos_id: int = self.sp_model.bos_id()
self.eos_id: int = self.sp_model.eos_id() self.eos_id: int = self.sp_model.eos_id()
self.pad_id: int = self.sp_model.eos_id() self.pad_id: int = self.sp_model.unk_id()
assert self.sp_model.vocab_size() == self.sp_model.get_piece_size() assert self.sp_model.vocab_size() == self.sp_model.get_piece_size()
special_tokens = ['[MASK]', '[gMASK]', '[sMASK]', 'sop', 'eop'] special_tokens = ['[MASK]', '[gMASK]', '[sMASK]', 'sop', 'eop']
@@ -62,7 +59,9 @@ class SPTokenizer:
def convert_id_to_token(self, index): def convert_id_to_token(self, index):
"""Converts an index (integer) in a token (str) using the vocab.""" """Converts an index (integer) in a token (str) using the vocab."""
if index in self.index_special_tokens: if index in self.index_special_tokens or index in [
self.eos_id, self.bos_id, self.pad_id
] or index < 0:
return '' return ''
return self.sp_model.IdToPiece(index) return self.sp_model.IdToPiece(index)
@@ -76,6 +75,7 @@ class ChatGLM2Tokenizer(PreTrainedTokenizer):
super().__init__(padding_side=padding_side, **kwargs) super().__init__(padding_side=padding_side, **kwargs)
self.name = 'GLMTokenizer' self.name = 'GLMTokenizer'
self.vocab_file = vocab_file
self.tokenizer = SPTokenizer(vocab_file) self.tokenizer = SPTokenizer(vocab_file)
self.special_tokens = { self.special_tokens = {
'<bos>': self.tokenizer.bos_id, '<bos>': self.tokenizer.bos_id,
@@ -91,12 +91,16 @@ class ChatGLM2Tokenizer(PreTrainedTokenizer):
@property @property
def pad_token(self) -> str: def pad_token(self) -> str:
return '</s>' return '<unk>'
@property @property
def pad_token_id(self): def pad_token_id(self):
return self.get_command('<pad>') return self.get_command('<pad>')
@property
def eos_token(self) -> str:
return '</s>'
@property @property
def eos_token_id(self): def eos_token_id(self):
return self.get_command('<eos>') return self.get_command('<eos>')
@@ -131,11 +135,13 @@ class ChatGLM2Tokenizer(PreTrainedTokenizer):
def save_vocabulary(self, save_directory, filename_prefix=None): def save_vocabulary(self, save_directory, filename_prefix=None):
""" """
Save the vocabulary and special tokens file to a directory. Save the vocabulary and special tokens file to a directory.
Args: Args:
save_directory (`str`): save_directory (`str`):
The directory in which to save the vocabulary. The directory in which to save the vocabulary.
filename_prefix (`str`, *optional*): filename_prefix (`str`, *optional*):
An optional prefix to add to the named of the saved files. An optional prefix to add to the named of the saved files.
Returns: Returns:
`Tuple(str)`: Paths to the files saved. `Tuple(str)`: Paths to the files saved.
""" """
@@ -157,6 +163,16 @@ class ChatGLM2Tokenizer(PreTrainedTokenizer):
prefix_tokens = [self.get_command('[gMASK]'), self.get_command('sop')] prefix_tokens = [self.get_command('[gMASK]'), self.get_command('sop')]
return prefix_tokens return prefix_tokens
def build_prompt(self, query, history=None):
if history is None:
history = []
prompt = ''
for i, (old_query, response) in enumerate(history):
prompt += '[Round {}]\n\n问:{}\n\n答:{}\n\n'.format(
i + 1, old_query, response)
prompt += '[Round {}]\n\n问:{}\n\n答:'.format(len(history) + 1, query)
return prompt
def build_inputs_with_special_tokens( def build_inputs_with_special_tokens(
self, self,
token_ids_0: List[int], token_ids_0: List[int],
@@ -164,13 +180,16 @@ class ChatGLM2Tokenizer(PreTrainedTokenizer):
""" """
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
adding special tokens. A BERT sequence has the following format: adding special tokens. A BERT sequence has the following format:
- single sequence: `[CLS] X [SEP]` - single sequence: `[CLS] X [SEP]`
- pair of sequences: `[CLS] A [SEP] B [SEP]` - pair of sequences: `[CLS] A [SEP] B [SEP]`
Args: Args:
token_ids_0 (`List[int]`): token_ids_0 (`List[int]`):
List of IDs to which the special tokens will be added. List of IDs to which the special tokens will be added.
token_ids_1 (`List[int]`, *optional*): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens. `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
""" """
@@ -192,16 +211,19 @@ class ChatGLM2Tokenizer(PreTrainedTokenizer):
) -> dict: ) -> dict:
""" """
Pad encoded inputs (on left/right and up to predefined length or max length in the batch) Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
Args: Args:
encoded_inputs: encoded_inputs:
Dictionary of tokenized inputs (`List[int]`) or batch of tokenized inputs (`List[List[int]]`). Dictionary of tokenized inputs (`List[int]`) or batch of tokenized inputs (`List[List[int]]`).
max_length: maximum length of the returned list and optionally padding length (see below). max_length: maximum length of the returned list and optionally padding length (see below).
Will truncate by taking into account the special tokens. Will truncate by taking into account the special tokens.
padding_strategy: PaddingStrategy to use for padding. padding_strategy: PaddingStrategy to use for padding.
- PaddingStrategy.LONGEST Pad to the longest sequence in the batch - PaddingStrategy.LONGEST Pad to the longest sequence in the batch
- PaddingStrategy.MAX_LENGTH: Pad to the max length (default) - PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
- PaddingStrategy.DO_NOT_PAD: Do not pad - PaddingStrategy.DO_NOT_PAD: Do not pad
The tokenizer padding sides are defined in self.padding_side: The tokenizer padding sides are defined in self.padding_side:
- 'left': pads on the left of the sequences - 'left': pads on the left of the sequences
- 'right': pads on the right of the sequences - 'right': pads on the right of the sequences
pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value. pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.

View File

@@ -0,0 +1,29 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import TYPE_CHECKING
from modelscope.utils.import_utils import LazyImportModule
if TYPE_CHECKING:
from .configuration import Llama2Config
from .text_generation import Llama2ForTextGeneration
from .backbone import Llama2Model
from .tokenization import Llama2Tokenizer
from .tokenization_fast import Llama2TokenizerFast
else:
_import_structure = {
'configuration': ['Llama2Config'],
'text_generation': ['Llama2ForTextGeneration'],
'backbone': ['Llama2Model'],
'tokenization': ['Llama2Tokenizer'],
'tokenization_fast': ['Llama2TokenizerFast'],
}
import sys
sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

View File

@@ -0,0 +1,667 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
# and OPT implementations in this library. It has been modified from its
# original forms to accommodate minor architectural differences compared
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" PyTorch LLaMA model."""
from typing import List, Optional, Tuple, Union
import math
import torch
import torch.nn.functional as F
import torch.utils.checkpoint
from torch import nn
from transformers.activations import ACT2FN
from transformers.modeling_outputs import BaseModelOutputWithPast
from transformers.modeling_utils import PreTrainedModel
from modelscope import TorchModel, Model
from modelscope.metainfo import Models
from modelscope.utils.constant import Tasks
from modelscope.utils.logger import get_logger
from .configuration import Llama2Config
from ... import MODELS
logger = get_logger(__name__)
_CONFIG_FOR_DOC = 'Llama2Config'
# This file is mainly copied from the llama code of transformers
# Copied from transformers.models.bart.modeling_bart._make_causal_mask
def _make_causal_mask(
input_ids_shape: torch.Size, dtype: torch.dtype, device: torch.device, past_key_values_length: int = 0
):
"""
Make causal mask used for bi-directional self-attention.
"""
bsz, tgt_len = input_ids_shape
mask = torch.full((tgt_len, tgt_len), torch.finfo(dtype).min, device=device)
mask_cond = torch.arange(mask.size(-1), device=device)
mask.masked_fill_(mask_cond < (mask_cond + 1).view(mask.size(-1), 1), 0)
mask = mask.to(dtype)
if past_key_values_length > 0:
mask = torch.cat([torch.zeros(tgt_len, past_key_values_length, dtype=dtype, device=device), mask], dim=-1)
return mask[None, None, :, :].expand(bsz, 1, tgt_len, tgt_len + past_key_values_length)
# Copied from transformers.models.bart.modeling_bart._expand_mask
def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Optional[int] = None):
"""
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
"""
bsz, src_len = mask.size()
tgt_len = tgt_len if tgt_len is not None else src_len
expanded_mask = mask[:, None, None, :].expand(bsz, 1, tgt_len, src_len).to(dtype)
inverted_mask = 1.0 - expanded_mask
return inverted_mask.masked_fill(inverted_mask.to(torch.bool), torch.finfo(dtype).min)
class LlamaRMSNorm(nn.Module):
def __init__(self, hidden_size, eps=1e-6):
"""
LlamaRMSNorm is equivalent to T5LayerNorm
"""
super().__init__()
self.weight = nn.Parameter(torch.ones(hidden_size))
self.variance_epsilon = eps
def forward(self, hidden_states):
input_dtype = hidden_states.dtype
hidden_states = hidden_states.to(torch.float32)
variance = hidden_states.pow(2).mean(-1, keepdim=True)
hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
return self.weight * hidden_states.to(input_dtype)
class LlamaRotaryEmbedding(torch.nn.Module):
def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None):
super().__init__()
self.dim = dim
self.max_position_embeddings = max_position_embeddings
self.base = base
inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim))
self.register_buffer("inv_freq", inv_freq)
# Build here to make `torch.jit.trace` work.
self._set_cos_sin_cache(
seq_len=max_position_embeddings, device=self.inv_freq.device, dtype=torch.get_default_dtype()
)
def _set_cos_sin_cache(self, seq_len, device, dtype):
self.max_seq_len_cached = seq_len
t = torch.arange(self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype)
freqs = torch.einsum("i,j->ij", t, self.inv_freq)
# Different from paper, but it uses a different permutation in order to obtain the same calculation
emb = torch.cat((freqs, freqs), dim=-1)
self.register_buffer("cos_cached", emb.cos()[None, None, :, :].to(dtype), persistent=False)
self.register_buffer("sin_cached", emb.sin()[None, None, :, :].to(dtype), persistent=False)
def forward(self, x, seq_len=None):
# x: [bs, num_attention_heads, seq_len, head_size]
if seq_len > self.max_seq_len_cached:
self._set_cos_sin_cache(seq_len=seq_len, device=x.device, dtype=x.dtype)
return (
self.cos_cached[:, :, :seq_len, ...].to(dtype=x.dtype),
self.sin_cached[:, :, :seq_len, ...].to(dtype=x.dtype),
)
class LlamaLinearScalingRotaryEmbedding(LlamaRotaryEmbedding):
"""LlamaRotaryEmbedding extended with linear scaling. Credits to the Reddit user /u/kaiokendev"""
def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None, scaling_factor=1.0):
self.scaling_factor = scaling_factor
super().__init__(dim, max_position_embeddings, base, device)
def _set_cos_sin_cache(self, seq_len, device, dtype):
self.max_seq_len_cached = seq_len
t = torch.arange(self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype)
t = t / self.scaling_factor
freqs = torch.einsum("i,j->ij", t, self.inv_freq)
# Different from paper, but it uses a different permutation in order to obtain the same calculation
emb = torch.cat((freqs, freqs), dim=-1)
self.register_buffer("cos_cached", emb.cos()[None, None, :, :].to(dtype), persistent=False)
self.register_buffer("sin_cached", emb.sin()[None, None, :, :].to(dtype), persistent=False)
class LlamaDynamicNTKScalingRotaryEmbedding(LlamaRotaryEmbedding):
"""LlamaRotaryEmbedding extended with Dynamic NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla"""
def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None, scaling_factor=1.0):
self.scaling_factor = scaling_factor
super().__init__(dim, max_position_embeddings, base, device)
def _set_cos_sin_cache(self, seq_len, device, dtype):
self.max_seq_len_cached = seq_len
if seq_len > self.max_position_embeddings:
base = self.base * (
(self.scaling_factor * seq_len / self.max_position_embeddings) - (self.scaling_factor - 1)
) ** (self.dim / (self.dim - 2))
inv_freq = 1.0 / (base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim))
self.register_buffer("inv_freq", inv_freq)
t = torch.arange(self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype)
freqs = torch.einsum("i,j->ij", t, self.inv_freq)
# Different from paper, but it uses a different permutation in order to obtain the same calculation
emb = torch.cat((freqs, freqs), dim=-1)
self.register_buffer("cos_cached", emb.cos()[None, None, :, :].to(dtype), persistent=False)
self.register_buffer("sin_cached", emb.sin()[None, None, :, :].to(dtype), persistent=False)
def rotate_half(x):
"""Rotates half the hidden dims of the input."""
x1 = x[..., : x.shape[-1] // 2]
x2 = x[..., x.shape[-1] // 2 :]
return torch.cat((-x2, x1), dim=-1)
def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
# The first two dimensions of cos and sin are always 1, so we can `squeeze` them.
cos = cos.squeeze(1).squeeze(0) # [seq_len, dim]
sin = sin.squeeze(1).squeeze(0) # [seq_len, dim]
cos = cos[position_ids].unsqueeze(1) # [bs, 1, seq_len, dim]
sin = sin[position_ids].unsqueeze(1) # [bs, 1, seq_len, dim]
q_embed = (q * cos) + (rotate_half(q) * sin)
k_embed = (k * cos) + (rotate_half(k) * sin)
return q_embed, k_embed
class LlamaMLP(nn.Module):
def __init__(self, config):
super().__init__()
self.pretraining_tp = config.pretraining_tp
self.hidden_size = config.hidden_size
self.intermediate_size = config.intermediate_size
self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=False)
self.act_fn = ACT2FN[config.hidden_act]
def forward(self, x):
if self.pretraining_tp > 1:
slice = self.intermediate_size // self.pretraining_tp
gate_proj_slices = self.gate_proj.weight.split(slice, dim=0)
up_proj_slices = self.up_proj.weight.split(slice, dim=0)
down_proj_slices = self.down_proj.weight.split(slice, dim=1)
gate_proj = torch.cat([F.linear(x, gate_proj_slices[i]) for i in range(self.pretraining_tp)], dim=-1)
up_proj = torch.cat([F.linear(x, up_proj_slices[i]) for i in range(self.pretraining_tp)], dim=-1)
intermediate_states = (self.act_fn(gate_proj) * up_proj).split(slice, dim=2)
down_proj = [F.linear(intermediate_states[i], down_proj_slices[i]) for i in range(self.pretraining_tp)]
down_proj = sum(down_proj)
else:
down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
return down_proj
def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
"""
This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
"""
batch, num_key_value_heads, slen, head_dim = hidden_states.shape
if n_rep == 1:
return hidden_states
hidden_states = hidden_states[:, :, None, :, :].expand(batch, num_key_value_heads, n_rep, slen, head_dim)
return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
class LlamaAttention(nn.Module):
"""Multi-headed attention from 'Attention Is All You Need' paper"""
def __init__(self, config: Llama2Config):
super().__init__()
self.config = config
self.hidden_size = config.hidden_size
self.num_heads = config.num_attention_heads
self.head_dim = self.hidden_size // self.num_heads
self.num_key_value_heads = config.num_key_value_heads
self.num_key_value_groups = self.num_heads // self.num_key_value_heads
self.pretraining_tp = config.pretraining_tp
self.max_position_embeddings = config.max_position_embeddings
if (self.head_dim * self.num_heads) != self.hidden_size:
raise ValueError(
f"hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size}"
f" and `num_heads`: {self.num_heads})."
)
self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False)
self.k_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=False)
self.v_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=False)
self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False)
self._init_rope()
def _init_rope(self):
if self.config.rope_scaling is None:
self.rotary_emb = LlamaRotaryEmbedding(self.head_dim, max_position_embeddings=self.max_position_embeddings)
else:
scaling_type = self.config.rope_scaling["type"]
scaling_factor = self.config.rope_scaling["factor"]
if scaling_type == "linear":
self.rotary_emb = LlamaLinearScalingRotaryEmbedding(
self.head_dim, max_position_embeddings=self.max_position_embeddings, scaling_factor=scaling_factor
)
elif scaling_type == "dynamic":
self.rotary_emb = LlamaDynamicNTKScalingRotaryEmbedding(
self.head_dim, max_position_embeddings=self.max_position_embeddings, scaling_factor=scaling_factor
)
else:
raise ValueError(f"Unknown RoPE scaling type {scaling_type}")
def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
def forward(
self,
hidden_states: torch.Tensor,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: bool = False,
use_cache: bool = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
bsz, q_len, _ = hidden_states.size()
if self.pretraining_tp > 1:
key_value_slicing = (self.num_key_value_heads * self.head_dim) // self.pretraining_tp
query_slices = self.q_proj.weight.split((self.num_heads * self.head_dim) // self.pretraining_tp, dim=0)
key_slices = self.k_proj.weight.split(key_value_slicing, dim=0)
value_slices = self.v_proj.weight.split(key_value_slicing, dim=0)
query_states = [F.linear(hidden_states, query_slices[i]) for i in range(self.pretraining_tp)]
query_states = torch.cat(query_states, dim=-1)
key_states = [F.linear(hidden_states, key_slices[i]) for i in range(self.pretraining_tp)]
key_states = torch.cat(key_states, dim=-1)
value_states = [F.linear(hidden_states, value_slices[i]) for i in range(self.pretraining_tp)]
value_states = torch.cat(value_states, dim=-1)
else:
query_states = self.q_proj(hidden_states)
key_states = self.k_proj(hidden_states)
value_states = self.v_proj(hidden_states)
query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
kv_seq_len = key_states.shape[-2]
if past_key_value is not None:
kv_seq_len += past_key_value[0].shape[-2]
cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
if past_key_value is not None:
# reuse k, v, self_attention
key_states = torch.cat([past_key_value[0], key_states], dim=2)
value_states = torch.cat([past_key_value[1], value_states], dim=2)
past_key_value = (key_states, value_states) if use_cache else None
# repeat k/v heads if n_kv_heads < n_heads
key_states = repeat_kv(key_states, self.num_key_value_groups)
value_states = repeat_kv(value_states, self.num_key_value_groups)
attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
raise ValueError(
f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is"
f" {attn_weights.size()}"
)
if attention_mask is not None:
if attention_mask.size() != (bsz, 1, q_len, kv_seq_len):
raise ValueError(
f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}"
)
attn_weights = attn_weights + attention_mask
# upcast attention to fp32
attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
attn_output = torch.matmul(attn_weights, value_states)
if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
raise ValueError(
f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
f" {attn_output.size()}"
)
attn_output = attn_output.transpose(1, 2).contiguous()
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
if self.pretraining_tp > 1:
attn_output = attn_output.split(self.hidden_size // self.pretraining_tp, dim=2)
o_proj_slices = self.o_proj.weight.split(self.hidden_size // self.pretraining_tp, dim=1)
attn_output = sum([F.linear(attn_output[i], o_proj_slices[i]) for i in range(self.pretraining_tp)])
else:
attn_output = self.o_proj(attn_output)
if not output_attentions:
attn_weights = None
return attn_output, attn_weights, past_key_value
class LlamaDecoderLayer(nn.Module):
def __init__(self, config: Llama2Config):
super().__init__()
self.hidden_size = config.hidden_size
self.self_attn = LlamaAttention(config=config)
self.mlp = LlamaMLP(config)
self.input_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
self.post_attention_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
def forward(
self,
hidden_states: torch.Tensor,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
"""
Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
attention_mask (`torch.FloatTensor`, *optional*): attention mask of size
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
output_attentions (`bool`, *optional*):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail.
use_cache (`bool`, *optional*):
If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
(see `past_key_values`).
past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
"""
residual = hidden_states
hidden_states = self.input_layernorm(hidden_states)
# Self Attention
hidden_states, self_attn_weights, present_key_value = self.self_attn(
hidden_states=hidden_states,
attention_mask=attention_mask,
position_ids=position_ids,
past_key_value=past_key_value,
output_attentions=output_attentions,
use_cache=use_cache,
)
hidden_states = residual + hidden_states
# Fully Connected
residual = hidden_states
hidden_states = self.post_attention_layernorm(hidden_states)
hidden_states = self.mlp(hidden_states)
hidden_states = residual + hidden_states
outputs = (hidden_states,)
if output_attentions:
outputs += (self_attn_weights,)
if use_cache:
outputs += (present_key_value,)
return outputs
class LlamaPreTrainedModel(TorchModel, PreTrainedModel):
config_class = Llama2Config
base_model_prefix = "model"
supports_gradient_checkpointing = True
_no_split_modules = ["LlamaDecoderLayer"]
_skip_keys_device_placement = "past_key_values"
def __init__(self, config, **kwargs):
super().__init__(config.name_or_path, **kwargs)
super(Model, self).__init__(config)
def _init_weights(self, module):
std = self.config.initializer_range
if isinstance(module, nn.Linear):
module.weight.data.normal_(mean=0.0, std=std)
if module.bias is not None:
module.bias.data.zero_()
elif isinstance(module, nn.Embedding):
module.weight.data.normal_(mean=0.0, std=std)
if module.padding_idx is not None:
module.weight.data[module.padding_idx].zero_()
def _set_gradient_checkpointing(self, module, value=False):
if isinstance(module, LlamaModel):
module.gradient_checkpointing = value
@classmethod
def _instantiate(cls, **kwargs):
"""Instantiate the model.
Args:
kwargs: Input args.
model_dir: The model dir used to load the checkpoint and the label information.
num_labels: An optional arg to tell the model how many classes to initialize.
Method will call utils.parse_label_mapping if num_labels not supplied.
If num_labels is not found, the model will use the default setting (2 classes).
Returns:
The loaded model, which is initialized by transformers.PreTrainedModel.from_pretrained
"""
model_dir = kwargs.pop('model_dir', None)
if model_dir is None:
config = Llama2Config(**kwargs)
model = cls(config)
else:
model = super(Model, cls).from_pretrained(
pretrained_model_name_or_path=model_dir, **kwargs)
model.model_dir = model_dir
return model
@MODELS.register_module(Tasks.backbone, module_name=Models.llama2)
class Llama2Model(LlamaPreTrainedModel):
"""
Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`LlamaDecoderLayer`]
Args:
config: Llama2Config
"""
def __init__(self, config: Llama2Config):
super().__init__(config)
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size
self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
self.layers = nn.ModuleList([LlamaDecoderLayer(config) for _ in range(config.num_hidden_layers)])
self.norm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
self.gradient_checkpointing = False
# Initialize weights and apply final processing
self.post_init()
def get_input_embeddings(self):
return self.embed_tokens
def set_input_embeddings(self, value):
self.embed_tokens = value
# Copied from transformers.models.bart.modeling_bart.BartDecoder._prepare_decoder_attention_mask
def _prepare_decoder_attention_mask(self, attention_mask, input_shape, inputs_embeds, past_key_values_length):
# create causal mask
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
combined_attention_mask = None
if input_shape[-1] > 1:
combined_attention_mask = _make_causal_mask(
input_shape,
inputs_embeds.dtype,
device=inputs_embeds.device,
past_key_values_length=past_key_values_length,
)
if attention_mask is not None:
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
expanded_attn_mask = _expand_mask(attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]).to(
inputs_embeds.device
)
combined_attention_mask = (
expanded_attn_mask if combined_attention_mask is None else expanded_attn_mask + combined_attention_mask
)
return combined_attention_mask
def forward(
self,
input_ids: torch.LongTensor = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[List[torch.FloatTensor]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[Tuple, BaseModelOutputWithPast]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
use_cache = use_cache if use_cache is not None else self.config.use_cache
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
# retrieve input_ids and inputs_embeds
if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
elif input_ids is not None:
batch_size, seq_length = input_ids.shape
elif inputs_embeds is not None:
batch_size, seq_length, _ = inputs_embeds.shape
else:
raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
seq_length_with_past = seq_length
past_key_values_length = 0
if past_key_values is not None:
past_key_values_length = past_key_values[0][0].shape[2]
seq_length_with_past = seq_length_with_past + past_key_values_length
if position_ids is None:
device = input_ids.device if input_ids is not None else inputs_embeds.device
position_ids = torch.arange(
past_key_values_length, seq_length + past_key_values_length, dtype=torch.long, device=device
)
position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
else:
position_ids = position_ids.view(-1, seq_length).long()
if inputs_embeds is None:
inputs_embeds = self.embed_tokens(input_ids)
# embed positions
if attention_mask is None:
attention_mask = torch.ones(
(batch_size, seq_length_with_past), dtype=torch.bool, device=inputs_embeds.device
)
attention_mask = self._prepare_decoder_attention_mask(
attention_mask, (batch_size, seq_length), inputs_embeds, past_key_values_length
)
hidden_states = inputs_embeds
if self.gradient_checkpointing and self.training:
if use_cache:
logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache = False
# decoder layers
all_hidden_states = () if output_hidden_states else None
all_self_attns = () if output_attentions else None
next_decoder_cache = () if use_cache else None
for idx, decoder_layer in enumerate(self.layers):
if output_hidden_states:
all_hidden_states += (hidden_states,)
past_key_value = past_key_values[idx] if past_key_values is not None else None
if self.gradient_checkpointing and self.training:
def create_custom_forward(module):
def custom_forward(*inputs):
# None for past_key_value
return module(*inputs, output_attentions, None)
return custom_forward
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(decoder_layer),
hidden_states,
attention_mask,
position_ids,
None,
)
else:
layer_outputs = decoder_layer(
hidden_states,
attention_mask=attention_mask,
position_ids=position_ids,
past_key_value=past_key_value,
output_attentions=output_attentions,
use_cache=use_cache,
)
hidden_states = layer_outputs[0]
if use_cache:
next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
if output_attentions:
all_self_attns += (layer_outputs[1],)
hidden_states = self.norm(hidden_states)
# add hidden states from the last decoder layer
if output_hidden_states:
all_hidden_states += (hidden_states,)
next_cache = next_decoder_cache if use_cache else None
if not return_dict:
return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
return BaseModelOutputWithPast(
last_hidden_state=hidden_states,
past_key_values=next_cache,
hidden_states=all_hidden_states,
attentions=all_self_attns,
)

View File

@@ -0,0 +1,161 @@
# coding=utf-8
# Copyright (c) Alibaba, Inc. and its affiliates.
# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
# and OPT implementations in this library. It has been modified from its
# original forms to accommodate minor architectural differences compared
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" LLaMA model configuration"""
from transformers.configuration_utils import PretrainedConfig
from modelscope.utils.logger import get_logger
logger = get_logger(__name__)
LLAMA_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
class Llama2Config(PretrainedConfig):
r"""
This is the configuration class to store the configuration of a [`LlamaModel`]. It is used to instantiate an LLaMA
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
defaults will yield a similar configuration to that of the LLaMA-7B.
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.
Args:
vocab_size (`int`, *optional*, defaults to 32000):
Vocabulary size of the LLaMA model. Defines the number of different tokens that can be represented by the
`inputs_ids` passed when calling [`LlamaModel`]
hidden_size (`int`, *optional*, defaults to 4096):
Dimension of the hidden representations.
intermediate_size (`int`, *optional*, defaults to 11008):
Dimension of the MLP representations.
num_hidden_layers (`int`, *optional*, defaults to 32):
Number of hidden layers in the Transformer encoder.
num_attention_heads (`int`, *optional*, defaults to 32):
Number of attention heads for each attention layer in the Transformer encoder.
num_key_value_heads (`int`, *optional*):
This is the number of key_value heads that should be used to implement Grouped Query Attention. If
`num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
`num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
by meanpooling all the original heads within that group. For more details checkout [this
paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
`num_attention_heads`.
pretraining_tp (`int`, *optional*, defaults to `1`):
Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this
document](https://huggingface.co/docs/transformers/parallelism) to understand more about it. This value is
necessary to ensure exact reproducibility of the pretraining results. Please refer to [this
issue](https://github.com/pytorch/pytorch/issues/76232).
hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
The non-linear activation function (function or string) in the decoder.
max_position_embeddings (`int`, *optional*, defaults to 2048):
The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048).
initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
rms_norm_eps (`float`, *optional*, defaults to 1e-12):
The epsilon used by the rms normalization layers.
use_cache (`bool`, *optional*, defaults to `True`):
Whether or not the model should return the last key/values attentions (not used by all models). Only
relevant if `config.is_decoder=True`.
tie_word_embeddings(`bool`, *optional*, defaults to `False`):
Whether to tie weight embeddings
rope_scaling (`Dict`, *optional*):
Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports three scaling
strategies: linear and dynamic. Their scaling factor must be an float greater than 1. The expected format
is `{"type": strategy name, "factor": scaling factor}`. When using this flag, don't update
`max_position_embeddings` to the expected new maximum. See the following thread for more information on how
these scaling strategies behave:
https://www.reddit.com/r/LocalLLaMA/comments/14mrgpr/dynamically_scaled_rope_further_increases/. This is an
experimental feature, subject to breaking API changes in future versions.
"""
model_type = "llama"
keys_to_ignore_at_inference = ["past_key_values"]
def __init__(
self,
vocab_size=32000,
hidden_size=4096,
intermediate_size=11008,
num_hidden_layers=32,
num_attention_heads=32,
num_key_value_heads=None,
hidden_act="silu",
max_position_embeddings=2048,
initializer_range=0.02,
rms_norm_eps=1e-6,
use_cache=True,
pad_token_id=0,
bos_token_id=1,
eos_token_id=2,
pretraining_tp=1,
tie_word_embeddings=False,
rope_scaling=None,
**kwargs,
):
self.vocab_size = vocab_size
self.max_position_embeddings = max_position_embeddings
self.hidden_size = hidden_size
self.intermediate_size = intermediate_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
# for backward compatibility
if num_key_value_heads is None:
num_key_value_heads = num_attention_heads
self.num_key_value_heads = num_key_value_heads
self.hidden_act = hidden_act
self.initializer_range = initializer_range
self.rms_norm_eps = rms_norm_eps
self.pretraining_tp = pretraining_tp
self.use_cache = use_cache
self.rope_scaling = rope_scaling
self._rope_scaling_validation()
super().__init__(
pad_token_id=pad_token_id,
bos_token_id=bos_token_id,
eos_token_id=eos_token_id,
tie_word_embeddings=tie_word_embeddings,
**kwargs,
)
def _rope_scaling_validation(self):
"""
Validate the `rope_scaling` configuration.
"""
if self.rope_scaling is None:
return
if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2:
raise ValueError(
"`rope_scaling` must be a dictionary with with two fields, `name` and `factor`, "
f"got {self.rope_scaling}"
)
rope_scaling_type = self.rope_scaling.get("type", None)
rope_scaling_factor = self.rope_scaling.get("factor", None)
if rope_scaling_type is None or rope_scaling_type not in ["linear", "dynamic"]:
raise ValueError(
f"`rope_scaling`'s name field must be one of ['linear', 'dynamic'], got {rope_scaling_type}"
)
if rope_scaling_factor is None or not isinstance(rope_scaling_factor, float) or rope_scaling_factor <= 1.0:
raise ValueError(f"`rope_scaling`'s factor field must be an float > 1, got {rope_scaling_factor}")

View File

@@ -0,0 +1,182 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
# and OPT implementations in this library. It has been modified from its
# original forms to accommodate minor architectural differences compared
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List, Optional, Tuple, Union
import torch
import torch.nn.functional as F
import torch.utils.checkpoint
from torch import nn
from torch.nn import CrossEntropyLoss
from transformers.modeling_outputs import CausalLMOutputWithPast
from modelscope.metainfo import Models
from modelscope.utils.constant import Tasks
from .backbone import LlamaPreTrainedModel, Llama2Model
from ... import MODELS
# This file is mainly copied from the llama code of transformers
@MODELS.register_module(Tasks.text_generation, module_name=Models.llama2)
class Llama2ForTextGeneration(LlamaPreTrainedModel):
_tied_weights_keys = ["lm_head.weight"]
def __init__(self, config):
super().__init__(config)
self.model = Llama2Model(config)
self.pretraining_tp = config.pretraining_tp
self.vocab_size = config.vocab_size
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
# Initialize weights and apply final processing
self.post_init()
def get_input_embeddings(self):
return self.model.embed_tokens
def set_input_embeddings(self, value):
self.model.embed_tokens = value
def get_output_embeddings(self):
return self.lm_head
def set_output_embeddings(self, new_embeddings):
self.lm_head = new_embeddings
def set_decoder(self, decoder):
self.model = decoder
def get_decoder(self):
return self.model
def forward(
self,
input_ids: torch.LongTensor = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[List[torch.FloatTensor]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
labels: Optional[torch.LongTensor] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[Tuple, CausalLMOutputWithPast]:
r"""
Args:
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
(masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
Returns:
"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
outputs = self.model(
input_ids=input_ids,
attention_mask=attention_mask,
position_ids=position_ids,
past_key_values=past_key_values,
inputs_embeds=inputs_embeds,
use_cache=use_cache,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)
hidden_states = outputs[0]
if self.pretraining_tp > 1:
lm_head_slices = self.lm_head.weight.split(self.vocab_size // self.pretraining_tp, dim=0)
logits = [F.linear(hidden_states, lm_head_slices[i]) for i in range(self.pretraining_tp)]
logits = torch.cat(logits, dim=-1)
else:
logits = self.lm_head(hidden_states)
logits = logits.float()
loss = None
if labels is not None:
# Shift so that tokens < n predict n
shift_logits = logits[..., :-1, :].contiguous()
shift_labels = labels[..., 1:].contiguous()
# Flatten the tokens
loss_fct = CrossEntropyLoss()
shift_logits = shift_logits.view(-1, self.config.vocab_size)
shift_labels = shift_labels.view(-1)
# Enable model parallelism
shift_labels = shift_labels.to(shift_logits.device)
loss = loss_fct(shift_logits, shift_labels)
if not return_dict:
output = (logits,) + outputs[1:]
return (loss,) + output if loss is not None else output
return CausalLMOutputWithPast(
loss=loss,
logits=logits,
past_key_values=outputs.past_key_values,
hidden_states=outputs.hidden_states,
attentions=outputs.attentions,
)
def prepare_inputs_for_generation(
self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs
):
if past_key_values:
input_ids = input_ids[:, -1:]
position_ids = kwargs.get("position_ids", None)
if attention_mask is not None and position_ids is None:
# create position_ids on the fly for batch generation
position_ids = attention_mask.long().cumsum(-1) - 1
position_ids.masked_fill_(attention_mask == 0, 1)
if past_key_values:
position_ids = position_ids[:, -1].unsqueeze(-1)
# if `inputs_embeds` are passed, we only want to use them in the 1st generation step
if inputs_embeds is not None and past_key_values is None:
model_inputs = {"inputs_embeds": inputs_embeds}
else:
model_inputs = {"input_ids": input_ids}
model_inputs.update(
{
"position_ids": position_ids,
"past_key_values": past_key_values,
"use_cache": kwargs.get("use_cache"),
"attention_mask": attention_mask,
}
)
return model_inputs
@staticmethod
def _reorder_cache(past_key_values, beam_idx):
reordered_past = ()
for layer_past in past_key_values:
reordered_past += (
tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past),
)
return reordered_past

View File

@@ -0,0 +1,393 @@
# coding=utf-8
# Copyright (c) Alibaba, Inc. and its affiliates.
# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
# and OPT implementations in this library. It has been modified from its
# original forms to accommodate minor architectural differences compared
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tokenization classes for LLaMA."""
import os
from shutil import copyfile
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
import sentencepiece as spm
from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer
from modelscope.utils.logger import get_logger
if TYPE_CHECKING:
from transformers.pipelines.conversational import Conversation
logger = get_logger(__name__)
VOCAB_FILES_NAMES = {"vocab_file": "tokenizer.model"}
PRETRAINED_VOCAB_FILES_MAP = {
"vocab_file": {
"hf-internal-testing/llama-tokenizer": "https://huggingface.co/hf-internal-testing/llama-tokenizer/resolve/main/tokenizer.model",
},
"tokenizer_file": {
"hf-internal-testing/llama-tokenizer": "https://huggingface.co/hf-internal-testing/llama-tokenizer/resolve/main/tokenizer_config.json",
},
}
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
"hf-internal-testing/llama-tokenizer": 2048,
}
SPIECE_UNDERLINE = ""
B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
# fmt: off
DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your\
answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure\
that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not\
correct. If you don't know the answer to a question, please don't share false information."""
# fmt: on
class Llama2Tokenizer(PreTrainedTokenizer):
"""
Construct a Llama tokenizer. Based on byte-level Byte-Pair-Encoding. The default padding token is unset as there is
no padding token in the original model.
Args:
vocab_file (`str`):
Path to the vocabulary file.
legacy (`bool`, *optional*, defaults to `True`):
Whether or not the `legacy` behaviour of the tokenizer should be used. Legacy is before the merge of #24622
which includes fixes to properly handle tokens that appear after special tokens. A simple example:
- `legacy=True`:
```python
>>> from transformers import T5Tokenizer
>>> tokenizer = T5Tokenizer.from_pretrained("t5-base", legacy=True)
>>> tokenizer.encode("Hello <extra_id_0>.")
[8774, 32099, 3, 5, 1]
```
- `legacy=False`:
```python
>>> from transformers import T5Tokenizer
>>> tokenizer = T5Tokenizer.from_pretrained("t5-base", legacy=False)
>>> tokenizer.encode("Hello <extra_id_0>.") # the extra space `[3]` is no longer here
[8774, 32099, 5, 1]
```
Checkout the pull request and the issue [here](https://github.com/huggingface/transformers/pull/24565) for
more details.
"""
vocab_files_names = VOCAB_FILES_NAMES
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
model_input_names = ["input_ids", "attention_mask"]
def __init__(
self,
vocab_file,
unk_token="<unk>",
bos_token="<s>",
eos_token="</s>",
pad_token=None,
sp_model_kwargs: Optional[Dict[str, Any]] = None,
add_bos_token=True,
add_eos_token=False,
clean_up_tokenization_spaces=False,
legacy=True,
**kwargs,
):
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
bos_token = AddedToken(bos_token, lstrip=False, rstrip=False) if isinstance(bos_token, str) else bos_token
eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
super().__init__(
bos_token=bos_token,
eos_token=eos_token,
unk_token=unk_token,
pad_token=pad_token,
add_bos_token=add_bos_token,
add_eos_token=add_eos_token,
sp_model_kwargs=self.sp_model_kwargs,
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
legacy=legacy,
**kwargs,
)
if legacy:
logger.warning_once(
f"You are using the legacy behaviour of the {self.__class__}. This means that tokens that come after special tokens will not be properly handled. We recommend you to"
" read the related pull request available at https://github.com/huggingface/transformers/pull/24565"
)
self.legacy = legacy
self.vocab_file = vocab_file
self.add_bos_token = add_bos_token
self.add_eos_token = add_eos_token
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
self.sp_model.Load(vocab_file)
def __getstate__(self):
state = self.__dict__.copy()
state["sp_model"] = None
state["sp_model_proto"] = self.sp_model.serialized_model_proto()
return state
def __setstate__(self, d):
self.__dict__ = d
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
self.sp_model.LoadFromSerializedProto(self.sp_model_proto)
@property
def vocab_size(self):
"""Returns vocab size"""
return self.sp_model.get_piece_size()
def get_vocab(self):
"""Returns vocab as a dict"""
vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
vocab.update(self.added_tokens_encoder)
return vocab
# Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.tokenize
def tokenize(self, text, **kwargs) -> List[str]:
# Replace the SPIECE_UNDERLINE with a space to make sure SPIECE_UNDERLINE is only used at
# the beginning of the text
if not self.legacy:
text = SPIECE_UNDERLINE + text.replace(SPIECE_UNDERLINE, " ")
return super().tokenize(text, **kwargs)
# Copied from transformers.models.t5.tokenization_t5.T5Tokenizer._tokenize
def _tokenize(self, text):
"""
Returns a tokenized string.
Since the sentencepiece internal model always adds a SPIECE_UNDERLINE, at the beginning of the provided text,
we need to remove it by hand when the current text is a subsequence. This happens whenever the `self.tokenize`
function is called with specials tokens: the input is split on the special tokens, and each subsequence is
passed to `_tokenize`. Thus if a subsequence did not start with a `" "` or SPIECE_UNDERLINE, we have to remove
the extra `SPIECE_UNDERLINE` prepended.
"""
if not self.legacy:
is_first = text.startswith(SPIECE_UNDERLINE)
if is_first:
text = text[1:]
tokens = self.sp_model.encode(text, out_type=str)
if not self.legacy and not is_first and not text.startswith(" ") and tokens[0].startswith(SPIECE_UNDERLINE):
tokens = ([tokens[0][1:]] if len(tokens[0]) > 1 else []) + tokens[1:]
return tokens
def _convert_token_to_id(self, token):
"""Converts a token (str) in an id using the vocab."""
return self.sp_model.piece_to_id(token)
def _convert_id_to_token(self, index):
"""Converts an index (integer) in a token (str) using the vocab."""
token = self.sp_model.IdToPiece(index)
return token
def convert_tokens_to_string(self, tokens):
"""Converts a sequence of tokens (string) in a single string."""
current_sub_tokens = []
out_string = ""
prev_is_special = False
for i, token in enumerate(tokens):
# make sure that special tokens are not decoded using sentencepiece model
if token in self.all_special_tokens:
if not prev_is_special and i != 0:
out_string += " "
out_string += self.sp_model.decode(current_sub_tokens) + token
prev_is_special = True
current_sub_tokens = []
else:
current_sub_tokens.append(token)
prev_is_special = False
out_string += self.sp_model.decode(current_sub_tokens)
return out_string
def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
"""
Save the vocabulary and special tokens file to a directory.
Args:
save_directory (`str`):
The directory in which to save the vocabulary.
Returns:
`Tuple(str)`: Paths to the files saved.
"""
if not os.path.isdir(save_directory):
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return
out_vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
)
if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
copyfile(self.vocab_file, out_vocab_file)
elif not os.path.isfile(self.vocab_file):
with open(out_vocab_file, "wb") as fi:
content_spiece_model = self.sp_model.serialized_model_proto()
fi.write(content_spiece_model)
return (out_vocab_file,)
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
bos_token_id = [self.bos_token_id] if self.add_bos_token else []
eos_token_id = [self.eos_token_id] if self.add_eos_token else []
output = bos_token_id + token_ids_0 + eos_token_id
if token_ids_1 is not None:
output = output + bos_token_id + token_ids_1 + eos_token_id
return output
def get_special_tokens_mask(
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
) -> List[int]:
"""
Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
special tokens using the tokenizer `prepare_for_model` method.
Args:
token_ids_0 (`List[int]`):
List of IDs.
token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs.
already_has_special_tokens (`bool`, *optional*, defaults to `False`):
Whether or not the token list is already formatted with special tokens for the model.
Returns:
`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
"""
if already_has_special_tokens:
return super().get_special_tokens_mask(
token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
)
bos_token_id = [1] if self.add_bos_token else []
eos_token_id = [1] if self.add_eos_token else []
if token_ids_1 is None:
return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
return (
bos_token_id
+ ([0] * len(token_ids_0))
+ eos_token_id
+ bos_token_id
+ ([0] * len(token_ids_1))
+ eos_token_id
)
def create_token_type_ids_from_sequences(
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
"""
Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
sequence pair mask has the following format:
```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
| first sequence | second sequence |
```
if token_ids_1 is None, only returns the first portion of the mask (0s).
Args:
token_ids_0 (`List[int]`):
List of ids.
token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs.
Returns:
`List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
"""
bos_token_id = [self.bos_token_id] if self.add_bos_token else []
eos_token_id = [self.eos_token_id] if self.add_eos_token else []
output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)
if token_ids_1 is not None:
output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)
return output
def _build_conversation_input_ids(self, conversation: "Conversation") -> List[int]:
"""Builds the input ids for a conversation.
This is the format used in the provided examples. System prompts should be manually added at the beginning of
the conversation. If no system prompt is given, the `DEFAULT_SYSTEM_PROMPT` will be used.
```
<bos>[INST] B_SYS SytemPrompt E_SYS Prompt [/INST] Answer <eos>
<bos>[INST] Prompt [/INST] Answer <eos>
<bos>[INST] Prompt [/INST]
```
If you want to use your own system prompt, make sure to use both `B_SYS` and `E_SYS` use the following:
```python
>>> from transformers import Conversation
>>> Conversation(
... "<<SYS>>\n Only answer with emojis, and charades\n<</SYS>>\n\nHow can I build a house in 10 septs?"
... )
```
Args:
conversation (`Conversation`):
Conversation to build input ids for.
Returns:
`List[int]`:
Input ids for the conversation.
"""
dialogue = list(conversation.iter_texts())
if not all([is_user for is_user, msg in dialogue[::2]]) or not all(
[not is_user for is_user, msg in dialogue[1::2]]
):
raise ValueError(
"The model only supports 'user' and 'assistant' roles, starting with user and alternating (u/a/u/a/u...)"
)
dialog_tokens: List[int] = []
if len(conversation.past_user_inputs) > 0:
if not conversation.past_user_inputs[0].startswith(B_SYS) or E_SYS not in conversation.past_user_inputs[0]:
conversation.past_user_inputs[0] = (
B_SYS + DEFAULT_SYSTEM_PROMPT + E_SYS + conversation.past_user_inputs[0]
)
elif not dialogue[0][1].startswith(B_SYS) or E_SYS not in dialogue[0][1]:
dialogue[0] = (dialogue[0][0], B_SYS + DEFAULT_SYSTEM_PROMPT + E_SYS + dialogue[0][1])
dialog_tokens += sum(
[
[self.bos_token_id]
+ self.encode(
f"{B_INST} {(prompt[1]).strip()} {E_INST} {(answer[1]).strip()} ", add_special_tokens=False
)
+ [self.eos_token_id]
for prompt, answer in zip(dialogue[::2], dialogue[1::2])
],
[],
)
if not (dialogue[-1][0]):
raise ValueError(f"Last message must be from user, got {dialogue[-1]['role']}")
dialog_tokens += [self.bos_token_id] + self.encode(
f"{B_INST} {(dialogue[-1][1]).strip()} {E_INST}", add_special_tokens=False
)
return dialog_tokens

View File

@@ -0,0 +1,249 @@
# coding=utf-8
# Copyright (c) Alibaba, Inc. and its affiliates.
# Copyright 2020 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from shutil import copyfile
from typing import TYPE_CHECKING, Optional, Tuple
from tokenizers import processors
from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
from transformers.utils import is_sentencepiece_available, logging
from transformers.utils.versions import require_version
if TYPE_CHECKING:
from transformers.pipelines.conversational import Conversation
require_version("tokenizers>=0.13.3")
if is_sentencepiece_available():
from .tokenization import Llama2Tokenizer
else:
Llama2Tokenizer = None
logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"vocab_file": "tokenizer.model", "tokenizer_file": "tokenizer.json"}
B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
# fmt: off
DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your\
answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure\
that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not\
correct. If you don't know the answer to a question, please don't share false information."""
# fmt: on
class Llama2TokenizerFast(PreTrainedTokenizerFast):
"""
Construct a Llama tokenizer. Based on byte-level Byte-Pair-Encoding.
This uses notably ByteFallback and no normalization.
```
from transformers import LlamaTokenizerFast
tokenizer = LlaTokenizerFast.from_pretrained("hf-internal-testing/llama-tokenizer")
tokenizer.encode("Hello this is a test")
>>> [1, 15043, 445, 338, 263, 1243]
```
If you want to change the `bos_token` or the `eos_token`, make sure to specify them when initializing the model, or
call `tokenizer.update_post_processor()` to make sure that the post-processing is correctly done (otherwise the
values of the first token and final token of an encoded sequence will not be correct). For more details, checkout
[post-processors] (https://huggingface.co/docs/tokenizers/api/post-processors) documentation.
This tokenizer inherits from [`PreTrainedTokenizerFast`] which contains most of the main methods. Users should
refer to this superclass for more information regarding those methods.
Args:
vocab_file (`str`):
[SentencePiece](https://github.com/google/sentencepiece) file (generally has a .model extension) that
contains the vocabulary necessary to instantiate a tokenizer.
tokenizer_file (`str`):
[tokenizers](https://github.com/huggingface/tokenizers) file (generally has a .json extension) that
contains everything needed to load the tokenizer.
clean_up_tokenization_spaces (`str`, *optional*, defaults to `False`):
Wether to cleanup spaces after decoding, cleanup consists in removing potential artifacts like extra
spaces.
bos_token (`str`, *optional*, defaults to `"<s>"`):
The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
eos_token (`str`, *optional*, defaults to `"</s>"`):
The end of sequence token.
unk_token (`str`, *optional*, defaults to `"<unk>"`):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
token instead.
"""
vocab_files_names = VOCAB_FILES_NAMES
slow_tokenizer_class = Llama2Tokenizer
padding_side = "left"
model_input_names = ["input_ids", "attention_mask"]
def __init__(
self,
vocab_file=None,
tokenizer_file=None,
clean_up_tokenization_spaces=False,
unk_token="<unk>",
bos_token="<s>",
eos_token="</s>",
add_bos_token=True,
add_eos_token=False,
**kwargs,
):
super().__init__(
vocab_file=vocab_file,
tokenizer_file=tokenizer_file,
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
unk_token=unk_token,
bos_token=bos_token,
eos_token=eos_token,
**kwargs,
)
self._add_bos_token = add_bos_token
self._add_eos_token = add_eos_token
self.update_post_processor()
self.vocab_file = vocab_file
self.can_save_slow_tokenizer = False if not self.vocab_file else True
def update_post_processor(self):
"""
Updates the underlying post processor with the current `bos_token` and `eos_token`.
"""
bos = self.bos_token
bos_token_id = self.bos_token_id
eos = self.eos_token
eos_token_id = self.eos_token_id
single = f"{(bos+':0 ') * self.add_bos_token}$A:0{(' '+eos+':0') * self.add_eos_token}"
pair = f"{single}{(' '+bos+':1') * self.add_bos_token} $B:1{(' '+eos+':1') * self.add_eos_token}"
special_tokens = []
if self.add_bos_token:
special_tokens.append((bos, bos_token_id))
if self.add_eos_token:
special_tokens.append((eos, eos_token_id))
self._tokenizer.post_processor = processors.TemplateProcessing(
single=single, pair=pair, special_tokens=special_tokens
)
@property
def add_eos_token(self):
return self._add_eos_token
@property
def add_bos_token(self):
return self._add_bos_token
@add_eos_token.setter
def add_eos_token(self, value):
self._add_eos_token = value
self.update_post_processor()
@add_bos_token.setter
def add_bos_token(self, value):
self._add_bos_token = value
self.update_post_processor()
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not self.can_save_slow_tokenizer:
raise ValueError(
"Your fast tokenizer does not have the necessary information to save the vocabulary for a slow "
"tokenizer."
)
if not os.path.isdir(save_directory):
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return
out_vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
)
if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file):
copyfile(self.vocab_file, out_vocab_file)
return (out_vocab_file,)
def _build_conversation_input_ids(self, conversation: "Conversation"):
"""Builds the input ids for a conversation.
This is the format used in the provided examples. System prompts should be manually added at the beginning of
the conversation. If no system prompt is given, the `DEFAULT_SYSTEM_PROMPT` will be used.
```
<bos>[INST] B_SYS SytemPrompt E_SYS Prompt [/INST] Answer <eos>
<bos>[INST] Prompt [/INST] Answer <eos>
<bos>[INST] Prompt [/INST]
```
If you want to use your own system prompt, make sure to use both `B_SYS` and `E_SYS` use the following:
```python
>>> from transformers import Conversation
>>> Conversation(
... "<<SYS>>\n Only answer with emojis, and charades\n<</SYS>>\n\nHow can I build a house in 10 septs?"
... )
```
Args:
conversation (`Conversation`):
Conversation to build input ids for.
Returns:
`List[int]`:
Input ids for the conversation.
"""
dialogue = list(conversation.iter_texts())
if not all([is_user for is_user, msg in dialogue[::2]]) or not all(
[not is_user for is_user, msg in dialogue[1::2]]
):
raise ValueError(
"The model only supports 'user' and 'assistant' roles, starting with user and alternating (u/a/u/a/u...)"
)
dialog_tokens = []
if len(conversation.past_user_inputs) > 0:
if not conversation.past_user_inputs[0].startswith(B_SYS) or E_SYS not in conversation.past_user_inputs[0]:
conversation.past_user_inputs[0] = (
B_SYS + DEFAULT_SYSTEM_PROMPT + E_SYS + conversation.past_user_inputs[0]
)
elif not dialogue[0][1].startswith(B_SYS) or E_SYS not in dialogue[0][1]:
dialogue[0] = (dialogue[0][0], B_SYS + DEFAULT_SYSTEM_PROMPT + E_SYS + dialogue[0][1])
dialog_tokens += sum(
[
[self.bos_token_id]
+ self.encode(
f"{B_INST} {(prompt[1]).strip()} {E_INST} {(answer[1]).strip()} ", add_special_tokens=False
)
+ [self.eos_token_id]
for prompt, answer in zip(dialogue[::2], dialogue[1::2])
],
[],
)
if not (dialogue[-1][0]):
raise ValueError(f"Last message must be from user, got {dialogue[-1]['role']}")
dialog_tokens += [self.bos_token_id] + self.encode(
f"{B_INST} {(dialogue[-1][1]).strip()} {E_INST}", add_special_tokens=False
)
return dialog_tokens

View File

@@ -3,6 +3,7 @@
import os import os
from modelscope.msdatasets.ms_dataset import MsDataset from modelscope.msdatasets.ms_dataset import MsDataset
from modelscope.utils.constant import DownloadMode
class ASRDataset(MsDataset): class ASRDataset(MsDataset):
@@ -29,11 +30,14 @@ class ASRDataset(MsDataset):
return data_list return data_list
@classmethod @classmethod
def load(cls, def load(
dataset_name, cls,
namespace='speech_asr', dataset_name,
train_set='train', namespace='speech_asr',
dev_set='validation'): train_set='train',
dev_set='validation',
download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS,
):
if os.path.exists(dataset_name): if os.path.exists(dataset_name):
data_dir = dataset_name data_dir = dataset_name
ds_dict = {} ds_dict = {}
@@ -43,6 +47,10 @@ class ASRDataset(MsDataset):
return ds_dict return ds_dict
else: else:
from modelscope.msdatasets import MsDataset from modelscope.msdatasets import MsDataset
ds_dict = MsDataset.load( ds_dict = MsDataset.load(
dataset_name=dataset_name, namespace=namespace) dataset_name=dataset_name,
namespace=namespace,
download_mode=download_mode,
)
return ds_dict return ds_dict

View File

@@ -223,11 +223,23 @@ class CsvDatasetBuilder(csv.Csv):
if field_name.endswith(':FILE'): if field_name.endswith(':FILE'):
transform_fields.append(field_name) transform_fields.append(field_name)
base_extracted_dir = self.split_path_dict.get(split_name, '') base_extracted_dir: Union[str, list] = self.split_path_dict.get(
split_name, '')
for field_name in transform_fields: for field_name in transform_fields:
if base_extracted_dir: if isinstance(base_extracted_dir,
list) and len(base_extracted_dir) > 0:
if df.shape[0] != len(base_extracted_dir):
logger.error(
f"Number of lines in meta-csv file for split '{split_name}' ({df.shape[0]}) "
f'does not match number of data-files({len(base_extracted_dir)})!'
)
else:
df[field_name] = base_extracted_dir
elif isinstance(base_extracted_dir, str) and base_extracted_dir:
df[field_name] = df[field_name].apply( df[field_name] = df[field_name].apply(
lambda x: os.path.join(base_extracted_dir, x)) lambda x: os.path.join(base_extracted_dir, x))
else:
logger.warning(f'Nothing to do for field {field_name}')
pa_data = pa.Table.from_pandas(df) pa_data = pa.Table.from_pandas(df)
return Dataset(arrow_table=pa_data) return Dataset(arrow_table=pa_data)

View File

@@ -93,7 +93,7 @@ class TimestampPipeline(Pipeline):
def __call__(self, def __call__(self,
audio_in: Union[str, bytes], audio_in: Union[str, bytes],
text_in: str = None, text_in: str,
audio_fs: int = None, audio_fs: int = None,
recog_type: str = None, recog_type: str = None,
audio_format: str = None, audio_format: str = None,

View File

@@ -15,7 +15,7 @@ class DiffusersPipeline(Pipeline):
""" """
use `model` to create a diffusers pipeline use `model` to create a diffusers pipeline
Args: Args:
model: model id on modelscope hub. model: model id on modelscope hub or local dir.
device: str = 'gpu' device: str = 'gpu'
""" """

View File

@@ -146,7 +146,8 @@ class _DiffuersChineseStableDiffusionPipeline(StableDiffusionPipeline):
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.FloatTensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None): negative_prompt_embeds: Optional[torch.FloatTensor] = None,
lora_scale: Optional[float] = None):
r""" r"""
Encodes the prompt into text encoder hidden states. Encodes the prompt into text encoder hidden states.
@@ -169,7 +170,14 @@ class _DiffuersChineseStableDiffusionPipeline(StableDiffusionPipeline):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
lora_scale (`float`, *optional*):
A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
""" """
# set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin):
self._lora_scale = lora_scale
if prompt is not None and isinstance(prompt, str): if prompt is not None and isinstance(prompt, str):
batch_size = 1 batch_size = 1
elif prompt is not None and isinstance(prompt, list): elif prompt is not None and isinstance(prompt, list):

View File

@@ -6,8 +6,7 @@ import cv2
import numpy as np import numpy as np
import torch import torch
import torchvision.transforms as transforms import torchvision.transforms as transforms
from diffusers import \ from diffusers import DiffusionPipeline
StableDiffusionPipeline as DiffuserStableDiffusionPipeline
from PIL import Image from PIL import Image
from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
@@ -35,7 +34,7 @@ class StableDiffusionPipeline(DiffusersPipeline):
self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
# load pipeline # load pipeline
torch_type = torch.float16 if self.device == 'cuda' else torch.float32 torch_type = torch.float16 if self.device == 'cuda' else torch.float32
self.pipeline = DiffuserStableDiffusionPipeline.from_pretrained( self.pipeline = DiffusionPipeline.from_pretrained(
model, torch_dtype=torch_type) model, torch_dtype=torch_type)
self.pipeline = self.pipeline.to(self.device) self.pipeline = self.pipeline.to(self.device)
# load lora moudle to unet # load lora moudle to unet
@@ -48,6 +47,60 @@ class StableDiffusionPipeline(DiffusersPipeline):
def forward(self, inputs: Dict[str, Any], def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]: **forward_params) -> Dict[str, Any]:
"""
Inputs Args:
prompt (`str` or `List[str]`, *optional*):
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
instead.
height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
expense of slower inference.
guidance_scale (`float`, *optional*, defaults to 7.5):
Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
`guidance_scale` is defined as `w` of equation 2. of [Imagen
Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
usually at the expense of lower image quality.
negative_prompt (`str` or `List[str]`, *optional*):
The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
less than `1`).
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
eta (`float`, *optional*, defaults to 0.0):
Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
[`schedulers.DDIMScheduler`], will be ignored for others.
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic.
latents (`torch.FloatTensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument.
output_type (`str`, *optional*, defaults to `"pil"`):
The output format of the generate image. Choose between
[PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
return_dict (`bool`, *optional*, defaults to `True`):
Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
plain tuple.
callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step.
"""
if not isinstance(inputs, dict): if not isinstance(inputs, dict):
raise ValueError( raise ValueError(
f'Expected the input to be a dictionary, but got {type(input)}' f'Expected the input to be a dictionary, but got {type(input)}'
@@ -57,7 +110,20 @@ class StableDiffusionPipeline(DiffusersPipeline):
raise ValueError('input should contain "text", but not found') raise ValueError('input should contain "text", but not found')
images = self.pipeline( images = self.pipeline(
inputs['text'], num_inference_steps=30, guidance_scale=7.5) prompt=inputs.get('text'),
height=inputs.get('height'),
width=inputs.get('width'),
num_inference_steps=inputs.get('num_inference_steps', 50),
guidance_scale=inputs.get('guidance_scale', 7.5),
negative_prompt=inputs.get('negative_prompt'),
num_images_per_prompt=inputs.get('num_images_per_prompt', 1),
eta=inputs.get('eta', 0.0),
generator=inputs.get('generator'),
latents=inputs.get('latents'),
output_type=inputs.get('output_type', 'pil'),
return_dict=inputs.get('return_dict', True),
callback=inputs.get('callback'),
callback_steps=inputs.get('callback_steps', 1))
return images return images

View File

@@ -50,6 +50,7 @@ class CheckpointHook(Hook):
hub_revision (str): Which branch to push the model to, default is `master`. hub_revision (str): Which branch to push the model to, default is `master`.
upload_strategy (str): The action adopted when the previous uploading is not done upload_strategy (str): The action adopted when the previous uploading is not done
and the next one is coming, can be `cancel` or `wait`. and the next one is coming, can be `cancel` or `wait`.
save_trainer_state (bool): Save the trainer state for continue training, default True.
kwargs: kwargs:
by_epoch (bool): Same with `save_strategy`, but has a higher priority, legacy argument. by_epoch (bool): Same with `save_strategy`, but has a higher priority, legacy argument.
output_sub_dir (str): The folder under the `save_dir` to save the output checkpoint for inference. output_sub_dir (str): The folder under the `save_dir` to save the output checkpoint for inference.
@@ -75,6 +76,7 @@ class CheckpointHook(Hook):
private_hub: Optional[bool] = True, private_hub: Optional[bool] = True,
hub_revision: Optional[str] = DEFAULT_REPOSITORY_REVISION, hub_revision: Optional[str] = DEFAULT_REPOSITORY_REVISION,
upload_strategy: Optional[str] = UploadStrategy.cancel, upload_strategy: Optional[str] = UploadStrategy.cancel,
save_trainer_state: Optional[bool] = True,
**kwargs): **kwargs):
self.interval = interval self.interval = interval
self.save_dir = save_dir self.save_dir = save_dir
@@ -97,6 +99,7 @@ class CheckpointHook(Hook):
self.private_hub = private_hub self.private_hub = private_hub
self.hub_revision = hub_revision self.hub_revision = hub_revision
self.upload_strategy = upload_strategy self.upload_strategy = upload_strategy
self.save_trainer_state = save_trainer_state
self.tag = -1 self.tag = -1
self.is_model_id = None self.is_model_id = None
self.max_checkpoint_num = None self.max_checkpoint_num = None
@@ -219,7 +222,8 @@ class CheckpointHook(Hook):
checkpoint_path_prefix = os.path.join(self.save_dir, prefix) checkpoint_path_prefix = os.path.join(self.save_dir, prefix)
meta = self._create_training_state(trainer) meta = self._create_training_state(trainer)
self.processor.save_checkpoints(trainer, checkpoint_path_prefix, self.processor.save_checkpoints(trainer, checkpoint_path_prefix,
self.output_dir, meta) self.output_dir, meta,
self.save_trainer_state)
self.save_evaluate_results(trainer) self.save_evaluate_results(trainer)
self.history_checkpoints.append(checkpoint_path_prefix) self.history_checkpoints.append(checkpoint_path_prefix)
self._remove_obsolete_checkpoints(trainer) self._remove_obsolete_checkpoints(trainer)
@@ -399,7 +403,8 @@ class BestCkptSaverHook(CheckpointHook):
self._best_ckpt_file = checkpoint_path_prefix self._best_ckpt_file = checkpoint_path_prefix
meta = self._create_training_state(trainer) meta = self._create_training_state(trainer)
self.processor.save_checkpoints(trainer, checkpoint_path_prefix, self.processor.save_checkpoints(trainer, checkpoint_path_prefix,
self.output_dir, meta) self.output_dir, meta,
self.save_trainer_state)
self.save_evaluate_results(trainer) self.save_evaluate_results(trainer)
self.history_checkpoints.add(checkpoint_path_prefix) self.history_checkpoints.add(checkpoint_path_prefix)
self._remove_obsolete_checkpoints(trainer) self._remove_obsolete_checkpoints(trainer)

View File

@@ -104,7 +104,8 @@ class CheckpointProcessor:
trainer, trainer,
checkpoint_path_prefix, checkpoint_path_prefix,
output_dir, output_dir,
meta=None): meta=None,
save_optimizers=True):
"""Save the state dict for trainer and model. """Save the state dict for trainer and model.
This is a strategic function which can be registered by other hook's function. This is a strategic function which can be registered by other hook's function.
@@ -115,13 +116,15 @@ class CheckpointProcessor:
like: /tmp/test/epoch_0 like: /tmp/test/epoch_0
output_dir(`str`): The output dir for inference. output_dir(`str`): The output dir for inference.
meta: (`dict`): The meta info needed to be saved into files. meta: (`dict`): The meta info needed to be saved into files.
save_optimizers: (`bool`): Do save the optimizers state
""" """
model = trainer.unwrap_module(trainer.model) model = trainer.unwrap_module(trainer.model)
_model_file, _train_state_file = self._get_state_file_name( _model_file, _train_state_file = self._get_state_file_name(
checkpoint_path_prefix) checkpoint_path_prefix)
# Save pth file without model state_dict # Save pth file without model state_dict
self.save_trainer_state(trainer, model, _train_state_file, meta) self.save_trainer_state(trainer, model, _train_state_file, meta,
save_optimizers)
self.save_model_state(model, _model_file) self.save_model_state(model, _model_file)
self.link(model, _model_file, output_dir) self.link(model, _model_file, output_dir)
@@ -175,7 +178,8 @@ class CheckpointProcessor:
'changing to copy the bin file, this may use more disk space.') 'changing to copy the bin file, this may use more disk space.')
shutil.copyfile(src_file, dest_file) shutil.copyfile(src_file, dest_file)
def save_trainer_state(self, trainer, model, train_state_file, meta): def save_trainer_state(self, trainer, model, train_state_file, meta,
save_optimizers):
"""Save the trainer state, including optimizer/lr_scheduler's state dict, random states etc. """Save the trainer state, including optimizer/lr_scheduler's state dict, random states etc.
Args: Args:
@@ -183,12 +187,13 @@ class CheckpointProcessor:
model: The model instance. model: The model instance.
train_state_file: The target file name for saving trainer states. train_state_file: The target file name for saving trainer states.
meta: Some extra meta info. meta: Some extra meta info.
save_optimizers: Save optimizers state or not.
""" """
save_checkpoint( save_checkpoint(
model, model,
train_state_file, train_state_file,
trainer.optimizer, trainer.optimizer if save_optimizers else None,
trainer.lr_scheduler, trainer.lr_scheduler if save_optimizers else None,
meta=meta, meta=meta,
with_model=False) with_model=False)

View File

@@ -156,7 +156,8 @@ class DeepspeedProcessor(CheckpointProcessor, LrSchedulerProcessor,
trainer, trainer,
checkpoint_path_prefix, checkpoint_path_prefix,
output_dir, output_dir,
meta=None): meta=None,
save_optimizers=True):
model = trainer.unwrap_module(trainer.model) model = trainer.unwrap_module(trainer.model)
_train_state_file = checkpoint_path_prefix + self.rank_name( _train_state_file = checkpoint_path_prefix + self.rank_name(
) + CheckpointProcessor.TRAINER_STATE_SUFFIX ) + CheckpointProcessor.TRAINER_STATE_SUFFIX

View File

@@ -57,7 +57,8 @@ class MpuProcessor(CheckpointProcessor):
trainer, trainer,
checkpoint_path_prefix, checkpoint_path_prefix,
output_dir, output_dir,
meta=None): meta=None,
save_optimizers=True):
model = trainer.unwrap_module(trainer.model) model = trainer.unwrap_module(trainer.model)
_train_state_file = checkpoint_path_prefix + self.rank_name( _train_state_file = checkpoint_path_prefix + self.rank_name(
) + CheckpointProcessor.TRAINER_STATE_SUFFIX ) + CheckpointProcessor.TRAINER_STATE_SUFFIX
@@ -65,8 +66,8 @@ class MpuProcessor(CheckpointProcessor):
save_checkpoint( save_checkpoint(
model, model,
_train_state_file, _train_state_file,
trainer.optimizer, trainer.optimizer if save_optimizers else None,
trainer.lr_scheduler, trainer.lr_scheduler if save_optimizers else None,
meta=meta, meta=meta,
with_model=False) with_model=False)

View File

@@ -41,7 +41,8 @@ class DreamboothCheckpointProcessor(CheckpointProcessor):
trainer, trainer,
checkpoint_path_prefix, checkpoint_path_prefix,
output_dir, output_dir,
meta=None): meta=None,
save_optimizers=True):
"""Save the state dict for dreambooth model. """Save the state dict for dreambooth model.
""" """
pipeline_args = {} pipeline_args = {}

View File

@@ -21,7 +21,8 @@ class LoraDiffusionCheckpointProcessor(CheckpointProcessor):
trainer, trainer,
checkpoint_path_prefix, checkpoint_path_prefix,
output_dir, output_dir,
meta=None): meta=None,
save_optimizers=True):
"""Save the state dict for lora tune model. """Save the state dict for lora tune model.
""" """
trainer.model.unet = trainer.model.unet.to(torch.float32) trainer.model.unet = trainer.model.unet.to(torch.float32)

View File

@@ -168,3 +168,9 @@ TAMING_IMPORT_ERROR = """
{0} requires the timm library but it was not found in your environment. You can install it with pip: {0} requires the timm library but it was not found in your environment. You can install it with pip:
`pip install taming-transformers-rom1504` `pip install taming-transformers-rom1504`
""" """
# docstyle-ignore
XFORMERS_IMPORT_ERROR = """
{0} requires the timm library but it was not found in your environment. You can install it with pip:
`pip install xformers>=0.0.17`
"""

View File

@@ -306,6 +306,7 @@ REQUIREMENTS_MAAPING = OrderedDict([
('mpi4py', (is_package_available('mpi4py'), MPI4PY_IMPORT_ERROR)), ('mpi4py', (is_package_available('mpi4py'), MPI4PY_IMPORT_ERROR)),
('open_clip', (is_package_available('open_clip'), OPENCLIP_IMPORT_ERROR)), ('open_clip', (is_package_available('open_clip'), OPENCLIP_IMPORT_ERROR)),
('taming', (is_package_available('taming'), TAMING_IMPORT_ERROR)), ('taming', (is_package_available('taming'), TAMING_IMPORT_ERROR)),
('xformers', (is_package_available('xformers'), XFORMERS_IMPORT_ERROR)),
]) ])
SYSTEM_PACKAGE = set(['os', 'sys', 'typing']) SYSTEM_PACKAGE = set(['os', 'sys', 'typing'])

View File

@@ -25,7 +25,7 @@ def get_logger(log_file: Optional[str] = None,
logger_name = __name__.split('.')[0] logger_name = __name__.split('.')[0]
logger = logging.getLogger(logger_name) logger = logging.getLogger(logger_name)
logger.propagate = False
if logger_name in init_loggers: if logger_name in init_loggers:
add_file_handler_if_needed(logger, log_file, file_mode, log_level) add_file_handler_if_needed(logger, log_file, file_mode, log_level)
return logger return logger

View File

@@ -1,7 +1,7 @@
accelerate accelerate
cloudpickle cloudpickle
decord>=0.6.0 decord>=0.6.0
diffusers==0.15.0 diffusers==0.18.0
fairseq fairseq
ftfy>=6.0.3 ftfy>=6.0.3
librosa==0.9.2 librosa==0.9.2

View File

@@ -21,7 +21,7 @@ ignore-words-list = patten,nd,ty,mot,hist,formating,winn,gool,datas,wan,confids
[flake8] [flake8]
max-line-length = 120 max-line-length = 120
select = B,C,E,F,P,T4,W,B9 select = B,C,E,F,P,T4,W,B9
ignore = F401,F405,F821,W503,E251 ignore = F401,F403,F405,F821,W503,E251
exclude = docs/src,*.pyi,.git exclude = docs/src,*.pyi,.git
[darglint] [darglint]

View File

@@ -35,7 +35,7 @@ class TestLoraDiffusionTrainer(unittest.TestCase):
shutil.rmtree(self.tmp_dir) shutil.rmtree(self.tmp_dir)
super().tearDown() super().tearDown()
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level') @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_lora_diffusion_train(self): def test_lora_diffusion_train(self):
model_id = 'AI-ModelScope/stable-diffusion-v1-5' model_id = 'AI-ModelScope/stable-diffusion-v1-5'
model_revision = 'v1.0.9' model_revision = 'v1.0.9'
@@ -67,7 +67,7 @@ class TestLoraDiffusionTrainer(unittest.TestCase):
results_files = os.listdir(self.tmp_dir) results_files = os.listdir(self.tmp_dir)
self.assertIn(f'{trainer.timestamp}.log.json', results_files) self.assertIn(f'{trainer.timestamp}.log.json', results_files)
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level') @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_lora_diffusion_eval(self): def test_lora_diffusion_eval(self):
model_id = 'AI-ModelScope/stable-diffusion-v1-5' model_id = 'AI-ModelScope/stable-diffusion-v1-5'
model_revision = 'v1.0.9' model_revision = 'v1.0.9'