mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-23 19:49:24 +01:00
Fix/chatglm2 (#384)
This commit is contained in:
@@ -51,25 +51,15 @@ from modelscope.utils.config import Config, ConfigDict
|
|||||||
from modelscope.utils.registry import default_group
|
from modelscope.utils.registry import default_group
|
||||||
|
|
||||||
#
|
#
|
||||||
TEST_SPLIT_P = 0.01
|
|
||||||
SPLIT_SEED = 42
|
|
||||||
MAX_LENGTH: Optional[int] = 2048
|
|
||||||
COLOR, COLOR_S = '#FFE2D9', '#FF7043'
|
COLOR, COLOR_S = '#FFE2D9', '#FF7043'
|
||||||
|
|
||||||
PROMPT = """### 用户
|
PROMPT = """Human: {instruction}
|
||||||
{instruction}
|
AI: """
|
||||||
### AI助手
|
|
||||||
"""
|
|
||||||
|
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
def get_model_dir(model_id: str, model_revision: Optional[str] = None) -> str:
|
|
||||||
model_dir = snapshot_download(model_id, model_revision)
|
|
||||||
return model_dir
|
|
||||||
|
|
||||||
|
|
||||||
def _get_version(work_dir: str) -> int:
|
def _get_version(work_dir: str) -> int:
|
||||||
if os.path.isdir(work_dir):
|
if os.path.isdir(work_dir):
|
||||||
fnames = os.listdir(work_dir)
|
fnames = os.listdir(work_dir)
|
||||||
@@ -96,28 +86,40 @@ def get_work_dir(work_dir: str) -> str:
|
|||||||
return work_dir
|
return work_dir
|
||||||
|
|
||||||
|
|
||||||
def select_device(device_ids: List[int]) -> Device:
|
def _format_device(device: Union[List[int], str]) -> Tuple[List[int], str]:
|
||||||
|
if isinstance(device, list):
|
||||||
|
device_ids = device
|
||||||
|
device_str = ','.join([str(d) for d in device])
|
||||||
|
else:
|
||||||
|
device_ids = [int(d) for d in device.split(',') if d != '-1']
|
||||||
|
device_str = device
|
||||||
|
device_str = device_str.replace(' ', '')
|
||||||
|
return device_ids, device_str
|
||||||
|
|
||||||
|
|
||||||
|
def select_device(device: Union[List[int], str]) -> Device:
|
||||||
"""Call this function before cuda is initialized.
|
"""Call this function before cuda is initialized.
|
||||||
Return: master device
|
device: e.g. []: 'cpu', [0], [0, 1, 2]
|
||||||
|
e.g. '-1': 'cpu', '0', '0,1,2'
|
||||||
"""
|
"""
|
||||||
if torch.cuda.is_initialized():
|
if torch.cuda.is_initialized():
|
||||||
logger.warning('CUDA has been initialized! Device selection fails!')
|
logger.warning('CUDA has been initialized! Device selection fails!')
|
||||||
return torch.device('cuda:0')
|
return torch.device('cuda:0')
|
||||||
#
|
#
|
||||||
|
device_ids, device_str = _format_device(device)
|
||||||
|
#
|
||||||
|
os.environ['CUDA_VISIBLE_DEVICES'] = device_str
|
||||||
log_s = 'Using device: '
|
log_s = 'Using device: '
|
||||||
if len(device_ids) == 0: # cpu
|
if len(device_ids) == 0:
|
||||||
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
|
master_device: str = 'cpu'
|
||||||
device: str = 'cpu'
|
log_s += 'cpu'
|
||||||
log_s += device
|
|
||||||
else:
|
else:
|
||||||
os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(
|
|
||||||
[str(d) for d in device_ids])
|
|
||||||
assert torch.cuda.is_available(
|
assert torch.cuda.is_available(
|
||||||
) and torch.cuda.device_count() >= len(device_ids)
|
) and torch.cuda.device_count() >= len(device_ids)
|
||||||
log_s += f"cuda:{','.join([str(d) for d in device_ids])}" # e.g. 'cuda:1,7,8'
|
master_device = 'cuda:0'
|
||||||
device = 'cuda:0'
|
log_s += f'cuda:{device_str}'
|
||||||
logger.info(log_s)
|
logger.info(log_s)
|
||||||
return torch.device(device)
|
return torch.device(master_device)
|
||||||
|
|
||||||
|
|
||||||
def seed_everything(seed: Optional[int] = None, gpu_dtm: bool = False) -> int:
|
def seed_everything(seed: Optional[int] = None, gpu_dtm: bool = False) -> int:
|
||||||
@@ -148,7 +150,9 @@ def get_T_max(dataset_len: int, batch_size: int, max_epochs: int,
|
|||||||
return T_max
|
return T_max
|
||||||
|
|
||||||
|
|
||||||
def tokenize_function(example: Dict[str, str], tokenizer) -> Dict[str, Any]:
|
def tokenize_function(example: Dict[str, str],
|
||||||
|
tokenizer,
|
||||||
|
max_length: Optional[int] = 2048) -> Dict[str, Any]:
|
||||||
"""Only applicable to baichuan and chatglm2. Other models need to be tested"""
|
"""Only applicable to baichuan and chatglm2. Other models need to be tested"""
|
||||||
instruction = example['instruction']
|
instruction = example['instruction']
|
||||||
input_: str = example['input']
|
input_: str = example['input']
|
||||||
@@ -159,12 +163,12 @@ def tokenize_function(example: Dict[str, str], tokenizer) -> Dict[str, Any]:
|
|||||||
else:
|
else:
|
||||||
instruction = instruction + input_
|
instruction = instruction + input_
|
||||||
output = example['output']
|
output = example['output']
|
||||||
src_text = PROMPT.format(instruction=instruction, add_special_tokens=False)
|
src_text = PROMPT.format(instruction=instruction)
|
||||||
src_input_ids: List[int] = tokenizer(
|
src_input_ids: List[int] = tokenizer(
|
||||||
src_text, return_attention_mask=False,
|
src_text, return_attention_mask=False,
|
||||||
add_special_tokens=True)['input_ids']
|
add_special_tokens=True)['input_ids']
|
||||||
# tokenizer.bos_token_id: Avoid `tgt_input_ids` being empty
|
#
|
||||||
tgt_input_ids = [tokenizer.bos_token_id]
|
tgt_input_ids = []
|
||||||
if output is not None:
|
if output is not None:
|
||||||
tgt_input_ids += tokenizer(
|
tgt_input_ids += tokenizer(
|
||||||
output, return_attention_mask=False,
|
output, return_attention_mask=False,
|
||||||
@@ -175,10 +179,10 @@ def tokenize_function(example: Dict[str, str], tokenizer) -> Dict[str, Any]:
|
|||||||
labels = None
|
labels = None
|
||||||
input_ids = src_input_ids + tgt_input_ids
|
input_ids = src_input_ids + tgt_input_ids
|
||||||
#
|
#
|
||||||
if MAX_LENGTH is not None:
|
if max_length is not None:
|
||||||
input_ids = input_ids[-MAX_LENGTH:]
|
input_ids = input_ids[-max_length:]
|
||||||
if labels is not None:
|
if labels is not None:
|
||||||
labels = labels[-MAX_LENGTH:]
|
labels = labels[-max_length:]
|
||||||
#
|
#
|
||||||
return {'input_ids': input_ids, 'labels': labels}
|
return {'input_ids': input_ids, 'labels': labels}
|
||||||
|
|
||||||
@@ -200,8 +204,10 @@ def stat_dataset(dataset: HFDataset) -> None:
|
|||||||
|
|
||||||
def print_examples(examples: Dict[str, Any], tokenizer) -> None:
|
def print_examples(examples: Dict[str, Any], tokenizer) -> None:
|
||||||
input_ids, labels = examples['input_ids'], examples['labels']
|
input_ids, labels = examples['input_ids'], examples['labels']
|
||||||
print(f'[INPUT_IDS] {tokenizer.decode(input_ids)}')
|
print(f'[INPUT_IDS] {input_ids}')
|
||||||
|
print(f'[INPUT] {tokenizer.decode(input_ids)}')
|
||||||
print()
|
print()
|
||||||
|
print(f'[LABLES_IDS] {labels}')
|
||||||
print(
|
print(
|
||||||
f'[LABLES] {tokenizer.decode([lb if lb != -100 else 0 for lb in labels])}'
|
f'[LABLES] {tokenizer.decode([lb if lb != -100 else 0 for lb in labels])}'
|
||||||
)
|
)
|
||||||
@@ -283,16 +289,25 @@ class MyMetric(Metric):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def merge(self, other: 'MyMetric') -> None:
|
def merge(self, other: 'MyMetric') -> None:
|
||||||
"""This script does not support ddp"""
|
"""This script does not support ddp. TODO"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
def get_baichuan7B_model_tokenizer(model_dir: Optional[str] = None,
|
def _add_special_token(tokenizer):
|
||||||
load_model: bool = True):
|
if tokenizer.eos_token_id is None:
|
||||||
if model_dir is None:
|
tokenizer.eos_token_id = 2
|
||||||
model_id = 'baichuan-inc/baichuan-7B'
|
if tokenizer.bos_token_id is None:
|
||||||
model_dir = get_model_dir(model_id, None)
|
tokenizer.bos_token_id = 1
|
||||||
#
|
if tokenizer.pad_token_id is None:
|
||||||
|
tokenizer.pad_token_id = 0
|
||||||
|
logger.info(f'bos_token_id: {tokenizer.bos_token_id}, '
|
||||||
|
f'eos_token_id: {tokenizer.eos_token_id}, '
|
||||||
|
f'pad_token_id: {tokenizer.pad_token_id}')
|
||||||
|
|
||||||
|
|
||||||
|
def get_baichuan7B_model_tokenizer(model_dir: str,
|
||||||
|
load_model: bool = True,
|
||||||
|
add_special_token: bool = True):
|
||||||
sys.path.insert(0, model_dir)
|
sys.path.insert(0, model_dir)
|
||||||
from configuration_baichuan import BaiChuanConfig
|
from configuration_baichuan import BaiChuanConfig
|
||||||
from tokenization_baichuan import BaiChuanTokenizer
|
from tokenization_baichuan import BaiChuanTokenizer
|
||||||
@@ -309,15 +324,14 @@ def get_baichuan7B_model_tokenizer(model_dir: Optional[str] = None,
|
|||||||
device_map='auto',
|
device_map='auto',
|
||||||
torch_dtype=torch.float16)
|
torch_dtype=torch.float16)
|
||||||
#
|
#
|
||||||
|
if add_special_token:
|
||||||
|
_add_special_token(tokenizer)
|
||||||
return model, tokenizer
|
return model, tokenizer
|
||||||
|
|
||||||
|
|
||||||
def get_baichuan13B_model_tokenizer(model_dir: Optional[str] = None,
|
def get_baichuan13B_model_tokenizer(model_dir: str,
|
||||||
load_model: bool = True):
|
load_model: bool = True,
|
||||||
if model_dir is None:
|
add_special_token: bool = True):
|
||||||
model_id = 'baichuan-inc/Baichuan-13B-Base'
|
|
||||||
model_dir = get_model_dir(model_id, 'v1.0.1')
|
|
||||||
#
|
|
||||||
sys.path.insert(0, model_dir)
|
sys.path.insert(0, model_dir)
|
||||||
from configuration_baichuan import BaichuanConfig
|
from configuration_baichuan import BaichuanConfig
|
||||||
from tokenization_baichuan import BaichuanTokenizer
|
from tokenization_baichuan import BaichuanTokenizer
|
||||||
@@ -334,15 +348,14 @@ def get_baichuan13B_model_tokenizer(model_dir: Optional[str] = None,
|
|||||||
device_map='auto',
|
device_map='auto',
|
||||||
torch_dtype=torch.float16)
|
torch_dtype=torch.float16)
|
||||||
#
|
#
|
||||||
|
if add_special_token:
|
||||||
|
_add_special_token(tokenizer)
|
||||||
return model, tokenizer
|
return model, tokenizer
|
||||||
|
|
||||||
|
|
||||||
def get_chatglm2_model_tokenizer(model_dir: Optional[str] = None,
|
def get_chatglm2_model_tokenizer(model_dir: str,
|
||||||
load_model: bool = True):
|
load_model: bool = True,
|
||||||
if model_dir is None:
|
add_special_token: bool = True):
|
||||||
model_id = 'ZhipuAI/chatglm2-6b'
|
|
||||||
model_dir = snapshot_download(model_id, None)
|
|
||||||
#
|
|
||||||
config = read_config(model_dir)
|
config = read_config(model_dir)
|
||||||
config['model'] = ConfigDict({'type': 'chatglm2-6b'})
|
config['model'] = ConfigDict({'type': 'chatglm2-6b'})
|
||||||
tokenizer = ChatGLM2Tokenizer.from_pretrained(model_dir)
|
tokenizer = ChatGLM2Tokenizer.from_pretrained(model_dir)
|
||||||
@@ -353,12 +366,16 @@ def get_chatglm2_model_tokenizer(model_dir: Optional[str] = None,
|
|||||||
cfg_dict=config,
|
cfg_dict=config,
|
||||||
device_map='auto',
|
device_map='auto',
|
||||||
torch_dtype=torch.float16)
|
torch_dtype=torch.float16)
|
||||||
|
if add_special_token:
|
||||||
|
_add_special_token(tokenizer)
|
||||||
return model, tokenizer
|
return model, tokenizer
|
||||||
|
|
||||||
|
|
||||||
def get_alpaca_en_zh_dataset(
|
def get_alpaca_en_zh_dataset(
|
||||||
tokenize_function,
|
tokenize_function,
|
||||||
only_val: bool = False) -> Tuple[HFDataset, HFDataset]:
|
only_val: bool = False,
|
||||||
|
test_split_p: float = 0.01,
|
||||||
|
split_seed: int = 42) -> Tuple[HFDataset, HFDataset]:
|
||||||
"""
|
"""
|
||||||
split: Literal['train', 'validation', None]
|
split: Literal['train', 'validation', None]
|
||||||
"""
|
"""
|
||||||
@@ -371,7 +388,7 @@ def get_alpaca_en_zh_dataset(
|
|||||||
dataset: HFDataset = concatenate_datasets([dataset_zh, dataset_en])
|
dataset: HFDataset = concatenate_datasets([dataset_zh, dataset_en])
|
||||||
#
|
#
|
||||||
# dataset = dataset.select(range(1000)) # for debug
|
# dataset = dataset.select(range(1000)) # for debug
|
||||||
dataset = dataset.train_test_split(TEST_SPLIT_P, seed=SPLIT_SEED)
|
dataset = dataset.train_test_split(test_split_p, seed=split_seed)
|
||||||
if only_val:
|
if only_val:
|
||||||
dataset = dataset['test']
|
dataset = dataset['test']
|
||||||
if tokenize_function is not None:
|
if tokenize_function is not None:
|
||||||
|
|||||||
@@ -3,24 +3,22 @@ from _common import *
|
|||||||
from transformers import TextStreamer
|
from transformers import TextStreamer
|
||||||
|
|
||||||
device_ids = [0, 1]
|
device_ids = [0, 1]
|
||||||
logger.info(device_ids)
|
|
||||||
select_device(device_ids)
|
select_device(device_ids)
|
||||||
|
# Note: You need to set the value of `CKPT_FPATH`
|
||||||
|
CKPT_FAPTH = '/path/to/your/iter_xxx.pth'
|
||||||
|
|
||||||
# ### Loading Model and Tokenizer
|
# ### Loading Model and Tokenizer
|
||||||
# Note: You need to set the value of `CKPT_FPATH`
|
|
||||||
BAICHUAN_TYPE = '13B' # Literal['7B', '13B']
|
BAICHUAN_TYPE = '13B' # Literal['7B', '13B']
|
||||||
CKPT_FAPTH = '/path/to/your/xxx.pth'
|
|
||||||
LORA_TARGET_MODULES = ['W_pack']
|
|
||||||
|
|
||||||
if BAICHUAN_TYPE == '7B':
|
if BAICHUAN_TYPE == '7B':
|
||||||
model, tokenizer = get_baichuan7B_model_tokenizer()
|
model_dir = snapshot_download('baichuan-inc/baichuan-7B', 'v1.0.5')
|
||||||
|
model, tokenizer = get_baichuan7B_model_tokenizer(model_dir)
|
||||||
else:
|
else:
|
||||||
model, tokenizer = get_baichuan13B_model_tokenizer()
|
model_dir = snapshot_download('baichuan-inc/Baichuan-13B-Base', 'v1.0.2')
|
||||||
if tokenizer.pad_token_id is None:
|
model, tokenizer = get_baichuan13B_model_tokenizer(model_dir)
|
||||||
tokenizer.pad_token_id = tokenizer.eos_token_id
|
|
||||||
model.bfloat16() # Consistent with training
|
model.bfloat16() # Consistent with training
|
||||||
|
|
||||||
# ### Preparing lora
|
# ### Preparing lora
|
||||||
|
LORA_TARGET_MODULES = ['W_pack']
|
||||||
LORA_RANK = 8
|
LORA_RANK = 8
|
||||||
LORA_ALPHA = 32
|
LORA_ALPHA = 32
|
||||||
LORA_DROPOUT_P = 0 # Arbitrary value
|
LORA_DROPOUT_P = 0 # Arbitrary value
|
||||||
@@ -38,7 +36,8 @@ _, test_dataset = get_alpaca_en_zh_dataset(None, True)
|
|||||||
|
|
||||||
# ### Inference
|
# ### Inference
|
||||||
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
||||||
for d in test_dataset[:5]:
|
mini_test_dataset = test_dataset.select(range(5))
|
||||||
|
for d in mini_test_dataset:
|
||||||
output = d['output']
|
output = d['output']
|
||||||
d['output'] = None
|
d['output'] = None
|
||||||
input_ids = tokenize_function(d, tokenizer)['input_ids']
|
input_ids = tokenize_function(d, tokenizer)['input_ids']
|
||||||
@@ -50,9 +49,10 @@ for d in test_dataset[:5]:
|
|||||||
max_new_tokens=512,
|
max_new_tokens=512,
|
||||||
attention_mask=attention_mask,
|
attention_mask=attention_mask,
|
||||||
streamer=streamer,
|
streamer=streamer,
|
||||||
pad_token_id=tokenizer.pad_token_id,
|
pad_token_id=tokenizer.eos_token_id,
|
||||||
temperature=0.7,
|
temperature=0.7,
|
||||||
top_k=50,
|
top_k=50,
|
||||||
|
top_p=0.7,
|
||||||
do_sample=True)
|
do_sample=True)
|
||||||
print()
|
print()
|
||||||
print(f'[LABELS]{output}')
|
print(f'[LABELS]{output}')
|
||||||
|
|||||||
@@ -3,35 +3,27 @@
|
|||||||
pip install modelscope
|
pip install modelscope
|
||||||
pip install numpy pandas matplotlib scikit-learn
|
pip install numpy pandas matplotlib scikit-learn
|
||||||
pip install transformers datasets
|
pip install transformers datasets
|
||||||
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
|
||||||
pip install tqdm
|
pip install tqdm tensorboard torchmetrics sentencepiece charset_normalizer accelerate
|
||||||
pip install tensorboard
|
|
||||||
pip install torchmetrics
|
|
||||||
pip install sentencepiece
|
|
||||||
pip install accelerate
|
|
||||||
|
|
||||||
pip install numpy -U # Resolve torchmetrics dependencies and update numpy
|
pip install numpy -U # Resolve torchmetrics dependencies and update numpy
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from _common import *
|
from _common import *
|
||||||
|
|
||||||
device_ids = [0, 1, 2, 3]
|
device_ids = [0, 1]
|
||||||
logger.info(device_ids)
|
|
||||||
select_device(device_ids)
|
select_device(device_ids)
|
||||||
seed_everything(42)
|
seed_everything(42)
|
||||||
|
|
||||||
# ### Loading Model and Tokenizer
|
# ### Loading Model and Tokenizer
|
||||||
BAICHUAN_TYPE = '13B' # Literal['7B', '13B']
|
BAICHUAN_TYPE = '13B' # Literal['7B', '13B']
|
||||||
WORK_DIR = f'runs/baichuan_{BAICHUAN_TYPE}'
|
WORK_DIR = f'runs/baichuan_{BAICHUAN_TYPE}'
|
||||||
LORA_TARGET_MODULES = ['W_pack']
|
|
||||||
#
|
#
|
||||||
if BAICHUAN_TYPE == '7B':
|
if BAICHUAN_TYPE == '7B':
|
||||||
model_id = 'baichuan-inc/baichuan-7B'
|
model_dir = snapshot_download('baichuan-inc/baichuan-7B', 'v1.0.5')
|
||||||
model_dir = get_model_dir(model_id, None)
|
|
||||||
model, tokenizer = get_baichuan7B_model_tokenizer(model_dir)
|
model, tokenizer = get_baichuan7B_model_tokenizer(model_dir)
|
||||||
else:
|
else:
|
||||||
model_id = 'baichuan-inc/Baichuan-13B-Base'
|
model_dir = snapshot_download('baichuan-inc/Baichuan-13B-Base', 'v1.0.2')
|
||||||
model_dir = get_model_dir(model_id, 'v1.0.1')
|
|
||||||
model, tokenizer = get_baichuan13B_model_tokenizer(model_dir)
|
model, tokenizer = get_baichuan13B_model_tokenizer(model_dir)
|
||||||
#
|
#
|
||||||
GRADIENT_CHECKPOINTING = True
|
GRADIENT_CHECKPOINTING = True
|
||||||
@@ -46,14 +38,9 @@ if GRADIENT_CHECKPOINTING:
|
|||||||
model)
|
model)
|
||||||
model.gradient_checkpointing_enable()
|
model.gradient_checkpointing_enable()
|
||||||
model.enable_input_require_grads()
|
model.enable_input_require_grads()
|
||||||
if tokenizer.pad_token_id is None:
|
|
||||||
tokenizer.pad_token_id = tokenizer.eos_token_id
|
|
||||||
#
|
|
||||||
logger.info(
|
|
||||||
f'bos_token_id: {tokenizer.bos_token_id}, eos_token_id: {tokenizer.eos_token_id}, '
|
|
||||||
f'pad_token_id: {tokenizer.pad_token_id}')
|
|
||||||
|
|
||||||
# ### Preparing lora
|
# ### Preparing lora
|
||||||
|
LORA_TARGET_MODULES = ['W_pack']
|
||||||
LORA_RANK = 8
|
LORA_RANK = 8
|
||||||
LORA_ALPHA = 32
|
LORA_ALPHA = 32
|
||||||
LORA_DROPOUT_P = 0.1
|
LORA_DROPOUT_P = 0.1
|
||||||
|
|||||||
@@ -3,22 +3,17 @@ from _common import *
|
|||||||
from transformers import TextStreamer
|
from transformers import TextStreamer
|
||||||
|
|
||||||
device_ids = [0, 1]
|
device_ids = [0, 1]
|
||||||
logger.info(device_ids)
|
|
||||||
select_device(device_ids)
|
select_device(device_ids)
|
||||||
|
# Note: You need to set the value of `CKPT_FPATH`
|
||||||
|
CKPT_FAPTH = '/path/to/your/iter_xxx.pth'
|
||||||
|
|
||||||
# ### Loading Model and Tokenizer
|
# ### Loading Model and Tokenizer
|
||||||
# Note: You need to set the value of `CKPT_FPATH`
|
model_dir = snapshot_download('ZhipuAI/chatglm2-6b', 'v1.0.6')
|
||||||
CKPT_FAPTH = '/path/to/your/xxx.pth'
|
model, tokenizer = get_chatglm2_model_tokenizer(model_dir)
|
||||||
LORA_TARGET_MODULES = ['query_key_value']
|
|
||||||
|
|
||||||
model, tokenizer = get_chatglm2_model_tokenizer()
|
|
||||||
if tokenizer.eos_token_id is None:
|
|
||||||
tokenizer.eos_token_id = tokenizer.pad_token_id
|
|
||||||
if tokenizer.bos_token_id is None:
|
|
||||||
tokenizer.bos_token_id = 1
|
|
||||||
model.bfloat16() # Consistent with training
|
model.bfloat16() # Consistent with training
|
||||||
|
|
||||||
# ### Preparing lora
|
# ### Preparing lora
|
||||||
|
LORA_TARGET_MODULES = ['query_key_value']
|
||||||
LORA_RANK = 8
|
LORA_RANK = 8
|
||||||
LORA_ALPHA = 32
|
LORA_ALPHA = 32
|
||||||
LORA_DROPOUT_P = 0 # Arbitrary value
|
LORA_DROPOUT_P = 0 # Arbitrary value
|
||||||
@@ -36,7 +31,8 @@ _, test_dataset = get_alpaca_en_zh_dataset(None, True)
|
|||||||
|
|
||||||
# ### Inference
|
# ### Inference
|
||||||
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
||||||
for d in test_dataset[:5]:
|
mini_test_dataset = test_dataset.select(range(5))
|
||||||
|
for d in mini_test_dataset:
|
||||||
output = d['output']
|
output = d['output']
|
||||||
d['output'] = None
|
d['output'] = None
|
||||||
input_ids = tokenize_function(d, tokenizer)['input_ids']
|
input_ids = tokenize_function(d, tokenizer)['input_ids']
|
||||||
@@ -48,9 +44,10 @@ for d in test_dataset[:5]:
|
|||||||
max_new_tokens=512,
|
max_new_tokens=512,
|
||||||
attention_mask=attention_mask,
|
attention_mask=attention_mask,
|
||||||
streamer=streamer,
|
streamer=streamer,
|
||||||
pad_token_id=tokenizer.pad_token_id,
|
pad_token_id=tokenizer.eos_token_id,
|
||||||
temperature=0.7,
|
temperature=0.7,
|
||||||
top_k=50,
|
top_k=50,
|
||||||
|
top_p=0.7,
|
||||||
do_sample=True)
|
do_sample=True)
|
||||||
print()
|
print()
|
||||||
print(f'[LABELS]{output}')
|
print(f'[LABELS]{output}')
|
||||||
|
|||||||
@@ -3,46 +3,31 @@
|
|||||||
pip install modelscope
|
pip install modelscope
|
||||||
pip install numpy pandas matplotlib scikit-learn
|
pip install numpy pandas matplotlib scikit-learn
|
||||||
pip install transformers datasets
|
pip install transformers datasets
|
||||||
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
|
||||||
pip install tqdm
|
pip install tqdm tensorboard torchmetrics sentencepiece charset_normalizer accelerate
|
||||||
pip install tensorboard
|
|
||||||
pip install torchmetrics
|
|
||||||
pip install sentencepiece
|
|
||||||
pip install accelerate
|
|
||||||
|
|
||||||
pip install numpy -U # Resolve torchmetrics dependencies and update numpy
|
pip install numpy -U # Resolve torchmetrics dependencies and update numpy
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from _common import *
|
from _common import *
|
||||||
|
|
||||||
device_ids = [0, 1, 2, 3]
|
device_ids = [0, 1]
|
||||||
logger.info(device_ids)
|
|
||||||
select_device(device_ids)
|
select_device(device_ids)
|
||||||
seed_everything(42)
|
seed_everything(42)
|
||||||
|
|
||||||
# ### Loading Model and Tokenizer
|
# ### Loading Model and Tokenizer
|
||||||
model_id = 'ZhipuAI/chatglm2-6b'
|
|
||||||
WORK_DIR = 'runs/chatglm2'
|
WORK_DIR = 'runs/chatglm2'
|
||||||
LORA_TARGET_MODULES = ['query_key_value']
|
|
||||||
#
|
#
|
||||||
model_dir = get_model_dir(model_id, None)
|
model_dir = snapshot_download('ZhipuAI/chatglm2-6b', 'v1.0.6')
|
||||||
model, tokenizer = get_chatglm2_model_tokenizer(model_dir)
|
model, tokenizer = get_chatglm2_model_tokenizer(model_dir)
|
||||||
# chatglm2 does not support gradient_checkpointing
|
#
|
||||||
GRADIENT_CHECKPOINTING = False
|
GRADIENT_CHECKPOINTING = True
|
||||||
if GRADIENT_CHECKPOINTING:
|
if GRADIENT_CHECKPOINTING:
|
||||||
model.gradient_checkpointing_enable()
|
model.gradient_checkpointing_enable()
|
||||||
model.enable_input_require_grads()
|
model.enable_input_require_grads()
|
||||||
logger.info(tokenizer.special_tokens)
|
|
||||||
if tokenizer.eos_token_id is None:
|
|
||||||
tokenizer.eos_token_id = tokenizer.pad_token_id
|
|
||||||
if tokenizer.bos_token_id is None:
|
|
||||||
tokenizer.bos_token_id = 1
|
|
||||||
#
|
|
||||||
logger.info(
|
|
||||||
f'bos_token_id: {tokenizer.bos_token_id}, eos_token_id: {tokenizer.eos_token_id}, '
|
|
||||||
f'pad_token_id: {tokenizer.pad_token_id}')
|
|
||||||
|
|
||||||
# ### Preparing lora
|
# ### Preparing lora
|
||||||
|
LORA_TARGET_MODULES = ['query_key_value']
|
||||||
LORA_RANK = 8
|
LORA_RANK = 8
|
||||||
LORA_ALPHA = 32
|
LORA_ALPHA = 32
|
||||||
LORA_DROPOUT_P = 0.1
|
LORA_DROPOUT_P = 0.1
|
||||||
|
|||||||
@@ -49,11 +49,9 @@ from modelscope.utils.config import Config, ConfigDict
|
|||||||
from modelscope.utils.registry import default_group
|
from modelscope.utils.registry import default_group
|
||||||
|
|
||||||
#
|
#
|
||||||
SYSTEM_TEXT = """{system}"""
|
PROMPT = """System: {system}
|
||||||
USER_TEXT = """\n\n### 用户
|
Human: {user}
|
||||||
{user}"""
|
AI: """
|
||||||
ASSISTANT_PROMPT = """\n\n### 助手
|
|
||||||
"""
|
|
||||||
MAX_LENGTH = 2048
|
MAX_LENGTH = 2048
|
||||||
TEST_MAX_LENGTH = MAX_LENGTH
|
TEST_MAX_LENGTH = MAX_LENGTH
|
||||||
|
|
||||||
@@ -62,11 +60,6 @@ logger = get_logger()
|
|||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
def get_model_dir(model_id: str, model_revision: Optional[str] = None) -> str:
|
|
||||||
model_dir = snapshot_download(model_id, model_revision)
|
|
||||||
return model_dir
|
|
||||||
|
|
||||||
|
|
||||||
def _get_version(work_dir: str) -> int:
|
def _get_version(work_dir: str) -> int:
|
||||||
if os.path.isdir(work_dir):
|
if os.path.isdir(work_dir):
|
||||||
fnames = os.listdir(work_dir)
|
fnames = os.listdir(work_dir)
|
||||||
@@ -93,28 +86,40 @@ def get_work_dir(work_dir: str) -> str:
|
|||||||
return work_dir
|
return work_dir
|
||||||
|
|
||||||
|
|
||||||
def select_device(device_ids: List[int]) -> Device:
|
def _format_device(device: Union[List[int], str]) -> Tuple[List[int], str]:
|
||||||
|
if isinstance(device, list):
|
||||||
|
device_ids = device
|
||||||
|
device_str = ','.join([str(d) for d in device])
|
||||||
|
else:
|
||||||
|
device_ids = [int(d) for d in device.split(',') if d != '-1']
|
||||||
|
device_str = device
|
||||||
|
device_str = device_str.replace(' ', '')
|
||||||
|
return device_ids, device_str
|
||||||
|
|
||||||
|
|
||||||
|
def select_device(device: Union[List[int], str]) -> Device:
|
||||||
"""Call this function before cuda is initialized.
|
"""Call this function before cuda is initialized.
|
||||||
Return: master device
|
device: e.g. []: 'cpu', [0], [0, 1, 2]
|
||||||
|
e.g. '-1': 'cpu', '0', '0,1,2'
|
||||||
"""
|
"""
|
||||||
if torch.cuda.is_initialized():
|
if torch.cuda.is_initialized():
|
||||||
logger.warning('CUDA has been initialized! Device selection fails!')
|
logger.warning('CUDA has been initialized! Device selection fails!')
|
||||||
return torch.device('cuda:0')
|
return torch.device('cuda:0')
|
||||||
#
|
#
|
||||||
|
device_ids, device_str = _format_device(device)
|
||||||
|
#
|
||||||
|
os.environ['CUDA_VISIBLE_DEVICES'] = device_str
|
||||||
log_s = 'Using device: '
|
log_s = 'Using device: '
|
||||||
if len(device_ids) == 0: # cpu
|
if len(device_ids) == 0:
|
||||||
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
|
master_device: str = 'cpu'
|
||||||
device: str = 'cpu'
|
log_s += 'cpu'
|
||||||
log_s += device
|
|
||||||
else:
|
else:
|
||||||
os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(
|
|
||||||
[str(d) for d in device_ids])
|
|
||||||
assert torch.cuda.is_available(
|
assert torch.cuda.is_available(
|
||||||
) and torch.cuda.device_count() >= len(device_ids)
|
) and torch.cuda.device_count() >= len(device_ids)
|
||||||
log_s += f"cuda:{','.join([str(d) for d in device_ids])}" # e.g. 'cuda:1,7,8'
|
master_device = 'cuda:0'
|
||||||
device = 'cuda:0'
|
log_s += f'cuda:{device_str}'
|
||||||
logger.info(log_s)
|
logger.info(log_s)
|
||||||
return torch.device(device)
|
return torch.device(master_device)
|
||||||
|
|
||||||
|
|
||||||
def seed_everything(seed: Optional[int] = None, gpu_dtm: bool = False) -> int:
|
def seed_everything(seed: Optional[int] = None, gpu_dtm: bool = False) -> int:
|
||||||
@@ -148,37 +153,27 @@ def get_T_max(dataset_len: int, batch_size: int, max_epochs: int,
|
|||||||
def tokenize_function(system: str, user: str, assistant: Optional[str],
|
def tokenize_function(system: str, user: str, assistant: Optional[str],
|
||||||
tokenizer) -> Dict[str, Any]:
|
tokenizer) -> Dict[str, Any]:
|
||||||
"""Only applicable to baichuan and chatglm2. Other models need to be tested"""
|
"""Only applicable to baichuan and chatglm2. Other models need to be tested"""
|
||||||
system_text = SYSTEM_TEXT.format(system=system)
|
src_text = PROMPT.format(system=system, user=user)
|
||||||
user_text = USER_TEXT.format(user=user)
|
src_input_ids: List[int] = tokenizer(
|
||||||
system_text_ids: List[int] = tokenizer(
|
src_text, return_attention_mask=False,
|
||||||
system_text, return_attention_mask=False,
|
|
||||||
add_special_tokens=True)['input_ids']
|
add_special_tokens=True)['input_ids']
|
||||||
user_text_ids: List[int] = tokenizer(
|
#
|
||||||
user_text, return_attention_mask=False,
|
tgt_input_ids: List[int] = []
|
||||||
add_special_tokens=False)['input_ids']
|
|
||||||
assistant_p_input_ids: List[int] = tokenizer(
|
|
||||||
ASSISTANT_PROMPT,
|
|
||||||
return_attention_mask=False,
|
|
||||||
add_special_tokens=False)['input_ids']
|
|
||||||
|
|
||||||
# tokenizer.bos_token_id: Avoid `assistant` being empty
|
|
||||||
assistant_input_ids: List[int] = [tokenizer.bos_token_id]
|
|
||||||
if assistant is not None:
|
if assistant is not None:
|
||||||
assistant_input_ids += tokenizer(
|
tgt_input_ids += tokenizer(
|
||||||
assistant, return_attention_mask=False,
|
assistant, return_attention_mask=False,
|
||||||
add_special_tokens=False)['input_ids']
|
add_special_tokens=False)['input_ids']
|
||||||
assistant_input_ids += [tokenizer.eos_token_id]
|
tgt_input_ids += [tokenizer.eos_token_id]
|
||||||
|
labels = [-100] * len(src_input_ids) + tgt_input_ids
|
||||||
|
else:
|
||||||
|
labels = None
|
||||||
|
input_ids = src_input_ids + tgt_input_ids
|
||||||
#
|
#
|
||||||
input_ids = system_text_ids + user_text_ids + assistant_p_input_ids + assistant_input_ids
|
if assistant is not None:
|
||||||
if assistant is not None: # train, val
|
|
||||||
if len(input_ids) > MAX_LENGTH:
|
if len(input_ids) > MAX_LENGTH:
|
||||||
return {}
|
return {}
|
||||||
len_mask = len(input_ids) - len(assistant_input_ids)
|
else:
|
||||||
labels = [-100] * len_mask + assistant_input_ids
|
|
||||||
else: # test
|
|
||||||
input_ids = input_ids[-TEST_MAX_LENGTH:]
|
input_ids = input_ids[-TEST_MAX_LENGTH:]
|
||||||
labels = None
|
|
||||||
|
|
||||||
#
|
#
|
||||||
return {'input_ids': input_ids, 'labels': labels}
|
return {'input_ids': input_ids, 'labels': labels}
|
||||||
|
|
||||||
@@ -305,12 +300,21 @@ class MyMetric(Metric):
|
|||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
def get_baichuan_model_tokenizer(model_dir: Optional[str] = None,
|
def _add_special_token(tokenizer):
|
||||||
load_model: bool = True):
|
if tokenizer.eos_token_id is None:
|
||||||
if model_dir is None:
|
tokenizer.eos_token_id = 2
|
||||||
model_id = 'baichuan-inc/baichuan-7B'
|
if tokenizer.bos_token_id is None:
|
||||||
model_dir = get_model_dir(model_id, None)
|
tokenizer.bos_token_id = 1
|
||||||
#
|
if tokenizer.pad_token_id is None:
|
||||||
|
tokenizer.pad_token_id = 0
|
||||||
|
logger.info(f'bos_token_id: {tokenizer.bos_token_id}, '
|
||||||
|
f'eos_token_id: {tokenizer.eos_token_id}, '
|
||||||
|
f'pad_token_id: {tokenizer.pad_token_id}')
|
||||||
|
|
||||||
|
|
||||||
|
def get_baichuan7B_model_tokenizer(model_dir: str,
|
||||||
|
load_model: bool = True,
|
||||||
|
add_special_token: bool = True):
|
||||||
sys.path.insert(0, model_dir)
|
sys.path.insert(0, model_dir)
|
||||||
from configuration_baichuan import BaiChuanConfig
|
from configuration_baichuan import BaiChuanConfig
|
||||||
from tokenization_baichuan import BaiChuanTokenizer
|
from tokenization_baichuan import BaiChuanTokenizer
|
||||||
@@ -327,15 +331,14 @@ def get_baichuan_model_tokenizer(model_dir: Optional[str] = None,
|
|||||||
device_map='auto',
|
device_map='auto',
|
||||||
torch_dtype=torch.float16)
|
torch_dtype=torch.float16)
|
||||||
#
|
#
|
||||||
|
if add_special_token:
|
||||||
|
_add_special_token(tokenizer)
|
||||||
return model, tokenizer
|
return model, tokenizer
|
||||||
|
|
||||||
|
|
||||||
def get_chatglm2_model_tokenizer(model_dir: Optional[str] = None,
|
def get_chatglm2_model_tokenizer(model_dir: str,
|
||||||
load_model: bool = True):
|
load_model: bool = True,
|
||||||
if model_dir is None:
|
add_special_token: bool = True):
|
||||||
model_id = 'ZhipuAI/chatglm2-6b'
|
|
||||||
model_dir = snapshot_download(model_id, None)
|
|
||||||
#
|
|
||||||
config = read_config(model_dir)
|
config = read_config(model_dir)
|
||||||
config['model'] = ConfigDict({'type': 'chatglm2-6b'})
|
config['model'] = ConfigDict({'type': 'chatglm2-6b'})
|
||||||
tokenizer = ChatGLM2Tokenizer.from_pretrained(model_dir)
|
tokenizer = ChatGLM2Tokenizer.from_pretrained(model_dir)
|
||||||
@@ -346,6 +349,8 @@ def get_chatglm2_model_tokenizer(model_dir: Optional[str] = None,
|
|||||||
cfg_dict=config,
|
cfg_dict=config,
|
||||||
device_map='auto',
|
device_map='auto',
|
||||||
torch_dtype=torch.float16)
|
torch_dtype=torch.float16)
|
||||||
|
if add_special_token:
|
||||||
|
_add_special_token(tokenizer)
|
||||||
return model, tokenizer
|
return model, tokenizer
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -54,7 +54,6 @@
|
|||||||
"from _common import *\n",
|
"from _common import *\n",
|
||||||
"from transformers import TextStreamer\n",
|
"from transformers import TextStreamer\n",
|
||||||
"device_ids = [0, 1]\n",
|
"device_ids = [0, 1]\n",
|
||||||
"logger.info(device_ids)\n",
|
|
||||||
"select_device(device_ids)"
|
"select_device(device_ids)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -146,9 +145,8 @@
|
|||||||
"CKPT_FAPTH = '/home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/output_best/pytorch_model.bin'\n",
|
"CKPT_FAPTH = '/home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/output_best/pytorch_model.bin'\n",
|
||||||
"LORA_TARGET_MODULES = ['W_pack']\n",
|
"LORA_TARGET_MODULES = ['W_pack']\n",
|
||||||
"\n",
|
"\n",
|
||||||
"model, tokenizer = get_baichuan_model_tokenizer()\n",
|
"model_dir = snapshot_download('baichuan-inc/baichuan-7B', 'v1.0.5')\n",
|
||||||
"if tokenizer.pad_token_id is None:\n",
|
"model, tokenizer = get_baichuan7B_model_tokenizer(model_dir)\n",
|
||||||
" tokenizer.pad_token_id = tokenizer.eos_token_id\n",
|
|
||||||
"model.bfloat16() # Consistent with training"
|
"model.bfloat16() # Consistent with training"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -451,8 +449,8 @@
|
|||||||
" attention_mask = torch.ones_like(input_ids)\n",
|
" attention_mask = torch.ones_like(input_ids)\n",
|
||||||
" generate_ids = model.generate(input_ids=input_ids, max_new_tokens=512,\n",
|
" generate_ids = model.generate(input_ids=input_ids, max_new_tokens=512,\n",
|
||||||
" attention_mask=attention_mask,\n",
|
" attention_mask=attention_mask,\n",
|
||||||
" streamer=streamer, pad_token_id=tokenizer.pad_token_id, \n",
|
" streamer=streamer, pad_token_id=tokenizer.eos_token_id, \n",
|
||||||
" temperature=0.7, top_k=50, do_sample=True)\n",
|
" temperature=0.7, top_k=50, top_p=0.7, do_sample=True)\n",
|
||||||
" print()\n",
|
" print()\n",
|
||||||
" print(f'[LABELS]{assistant}')\n",
|
" print(f'[LABELS]{assistant}')\n",
|
||||||
" print('-----------------------------------------------------------------------------------')\n",
|
" print('-----------------------------------------------------------------------------------')\n",
|
||||||
|
|||||||
@@ -33,16 +33,12 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# !pip install modelscope -U\n",
|
"# !pip install modelscope\n",
|
||||||
"# !pip install numpy pandas matplotlib scikit-learn\n",
|
"# !pip install numpy pandas matplotlib scikit-learn\n",
|
||||||
"# !pip install transformers datasets\n",
|
"# !pip install transformers datasets\n",
|
||||||
"# !pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118\n",
|
"# !conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia\n",
|
||||||
"# !pip install tqdm\n",
|
"# !pip install tqdm tensorboard torchmetrics sentencepiece charset_normalizer accelerate\n",
|
||||||
"# !pip install tensorboard\n",
|
"\n",
|
||||||
"# !pip install torchmetrics\n",
|
|
||||||
"# !pip install sentencepiece\n",
|
|
||||||
"# !pip install accelerate\n",
|
|
||||||
"#\n",
|
|
||||||
"# !pip install numpy -U # Resolve torchmetrics dependencies and update numpy"
|
"# !pip install numpy -U # Resolve torchmetrics dependencies and update numpy"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -75,8 +71,7 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"from _common import *\n",
|
"from _common import *\n",
|
||||||
"device_ids = [0, 1, 2, 3]\n",
|
"device_ids = [0, 1]\n",
|
||||||
"logger.info(device_ids)\n",
|
|
||||||
"select_device(device_ids)\n",
|
"select_device(device_ids)\n",
|
||||||
"_ = seed_everything(42)"
|
"_ = seed_everything(42)"
|
||||||
]
|
]
|
||||||
@@ -132,22 +127,16 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"model_id = 'baichuan-inc/baichuan-7B'\n",
|
|
||||||
"WORK_DIR = 'runs/baichuan'\n",
|
"WORK_DIR = 'runs/baichuan'\n",
|
||||||
"LORA_TARGET_MODULES = ['W_pack']\n",
|
"LORA_TARGET_MODULES = ['W_pack']\n",
|
||||||
"#\n",
|
"#\n",
|
||||||
"model_dir = get_model_dir(model_id, None)\n",
|
"model_dir = snapshot_download('baichuan-inc/baichuan-7B', 'v1.0.5')\n",
|
||||||
"model, tokenizer = get_baichuan_model_tokenizer(model_dir)\n",
|
"model, tokenizer = get_baichuan7B_model_tokenizer(model_dir)\n",
|
||||||
"#\n",
|
"#\n",
|
||||||
"GRADIENT_CHECKPOINTING = True\n",
|
"GRADIENT_CHECKPOINTING = True\n",
|
||||||
"if GRADIENT_CHECKPOINTING:\n",
|
"if GRADIENT_CHECKPOINTING:\n",
|
||||||
" model.gradient_checkpointing_enable()\n",
|
" model.gradient_checkpointing_enable()\n",
|
||||||
" model.enable_input_require_grads()\n",
|
" model.enable_input_require_grads()"
|
||||||
"if tokenizer.pad_token_id is None:\n",
|
|
||||||
" tokenizer.pad_token_id = tokenizer.eos_token_id\n",
|
|
||||||
"#\n",
|
|
||||||
"logger.info(f'bos_token_id: {tokenizer.bos_token_id}, eos_token_id: {tokenizer.eos_token_id}, '\n",
|
|
||||||
" f'pad_token_id: {tokenizer.pad_token_id}')"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -55,7 +55,6 @@
|
|||||||
"from _common import *\n",
|
"from _common import *\n",
|
||||||
"from transformers import TextStreamer\n",
|
"from transformers import TextStreamer\n",
|
||||||
"device_ids = [0, 1]\n",
|
"device_ids = [0, 1]\n",
|
||||||
"logger.info(device_ids)\n",
|
|
||||||
"select_device(device_ids)"
|
"select_device(device_ids)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -143,11 +142,8 @@
|
|||||||
"CKPT_FAPTH = '/home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/output_best/pytorch_model.bin'\n",
|
"CKPT_FAPTH = '/home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/output_best/pytorch_model.bin'\n",
|
||||||
"LORA_TARGET_MODULES = ['query_key_value']\n",
|
"LORA_TARGET_MODULES = ['query_key_value']\n",
|
||||||
"\n",
|
"\n",
|
||||||
"model, tokenizer = get_chatglm2_model_tokenizer()\n",
|
"model_dir = snapshot_download('ZhipuAI/chatglm2-6b', 'v1.0.6')\n",
|
||||||
"if tokenizer.eos_token_id is None:\n",
|
"model, tokenizer = get_chatglm2_model_tokenizer(model_dir)\n",
|
||||||
" tokenizer.eos_token_id = tokenizer.pad_token_id\n",
|
|
||||||
"if tokenizer.bos_token_id is None:\n",
|
|
||||||
" tokenizer.bos_token_id = 1\n",
|
|
||||||
"model.bfloat16() # Consistent with training"
|
"model.bfloat16() # Consistent with training"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -484,8 +480,8 @@
|
|||||||
" attention_mask = torch.ones_like(input_ids)\n",
|
" attention_mask = torch.ones_like(input_ids)\n",
|
||||||
" generate_ids = model.generate(input_ids=input_ids, max_new_tokens=512,\n",
|
" generate_ids = model.generate(input_ids=input_ids, max_new_tokens=512,\n",
|
||||||
" attention_mask=attention_mask,\n",
|
" attention_mask=attention_mask,\n",
|
||||||
" streamer=streamer, pad_token_id=tokenizer.pad_token_id, \n",
|
" streamer=streamer, pad_token_id=tokenizer.eos_token_id, \n",
|
||||||
" temperature=0.7, top_k=50, do_sample=True)\n",
|
" temperature=0.7, top_k=50, top_p=0.7, do_sample=True)\n",
|
||||||
" print()\n",
|
" print()\n",
|
||||||
" print(f'[LABELS]{assistant}')\n",
|
" print(f'[LABELS]{assistant}')\n",
|
||||||
" print('-----------------------------------------------------------------------------------')\n",
|
" print('-----------------------------------------------------------------------------------')\n",
|
||||||
|
|||||||
@@ -40,22 +40,18 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# !pip install modelscope -U\n",
|
"# !pip install modelscope\n",
|
||||||
"# !pip install numpy pandas matplotlib scikit-learn\n",
|
"# !pip install numpy pandas matplotlib scikit-learn\n",
|
||||||
"# !pip install transformers datasets\n",
|
"# !pip install transformers datasets\n",
|
||||||
"# !pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118\n",
|
"# !conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia\n",
|
||||||
"# !pip install tqdm\n",
|
"# !pip install tqdm tensorboard torchmetrics sentencepiece charset_normalizer accelerate\n",
|
||||||
"# !pip install tensorboard\n",
|
"\n",
|
||||||
"# !pip install torchmetrics\n",
|
|
||||||
"# !pip install sentencepiece\n",
|
|
||||||
"# !pip install accelerate\n",
|
|
||||||
"#\n",
|
|
||||||
"# !pip install numpy -U # Resolve torchmetrics dependencies and update numpy"
|
"# !pip install numpy -U # Resolve torchmetrics dependencies and update numpy"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 1,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@@ -80,8 +76,7 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"from _common import *\n",
|
"from _common import *\n",
|
||||||
"device_ids = [0, 1, 2, 3]\n",
|
"device_ids = [0, 1]\n",
|
||||||
"logger.info(device_ids)\n",
|
|
||||||
"select_device(device_ids)\n",
|
"select_device(device_ids)\n",
|
||||||
"_ = seed_everything(42)"
|
"_ = seed_everything(42)"
|
||||||
]
|
]
|
||||||
@@ -136,25 +131,16 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"model_id = 'ZhipuAI/chatglm2-6b'\n",
|
|
||||||
"WORK_DIR = 'runs/chatglm2'\n",
|
"WORK_DIR = 'runs/chatglm2'\n",
|
||||||
"LORA_TARGET_MODULES = ['query_key_value']\n",
|
"LORA_TARGET_MODULES = ['query_key_value']\n",
|
||||||
"#\n",
|
"#\n",
|
||||||
"model_dir = get_model_dir(model_id, None)\n",
|
"model_dir = snapshot_download('ZhipuAI/chatglm2-6b', 'v1.0.6')\n",
|
||||||
"model, tokenizer = get_chatglm2_model_tokenizer(model_dir)\n",
|
"model, tokenizer = get_chatglm2_model_tokenizer(model_dir)\n",
|
||||||
"# chatglm2 does not support gradient_checkpointing\n",
|
"#\n",
|
||||||
"GRADIENT_CHECKPOINTING = False\n",
|
"GRADIENT_CHECKPOINTING = True\n",
|
||||||
"if GRADIENT_CHECKPOINTING:\n",
|
"if GRADIENT_CHECKPOINTING:\n",
|
||||||
" model.gradient_checkpointing_enable()\n",
|
" model.gradient_checkpointing_enable()\n",
|
||||||
" model.enable_input_require_grads()\n",
|
" model.enable_input_require_grads()"
|
||||||
"logger.info(tokenizer.special_tokens)\n",
|
|
||||||
"if tokenizer.eos_token_id is None:\n",
|
|
||||||
" tokenizer.eos_token_id = tokenizer.pad_token_id\n",
|
|
||||||
"if tokenizer.bos_token_id is None:\n",
|
|
||||||
" tokenizer.bos_token_id = 1\n",
|
|
||||||
"#\n",
|
|
||||||
"logger.info(f'bos_token_id: {tokenizer.bos_token_id}, eos_token_id: {tokenizer.eos_token_id}, '\n",
|
|
||||||
" f'pad_token_id: {tokenizer.pad_token_id}')"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1095,6 +1095,7 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel):
|
|||||||
shift_labels = labels[..., 1:].contiguous()
|
shift_labels = labels[..., 1:].contiguous()
|
||||||
# Flatten the tokens
|
# Flatten the tokens
|
||||||
loss_fct = CrossEntropyLoss(ignore_index=-100)
|
loss_fct = CrossEntropyLoss(ignore_index=-100)
|
||||||
|
shift_labels = shift_labels.to(shift_logits.device)
|
||||||
loss = loss_fct(
|
loss = loss_fct(
|
||||||
shift_logits.view(-1, shift_logits.size(-1)),
|
shift_logits.view(-1, shift_logits.size(-1)),
|
||||||
shift_labels.view(-1))
|
shift_labels.view(-1))
|
||||||
|
|||||||
Reference in New Issue
Block a user