examples/pytorch/llm/utils/utils.py

import datetime as dt
import math
import os
import random
import re
from typing import Any, Counter, Dict, List, Optional, Tuple, Type, TypeVar

import matplotlib.pyplot as plt
import numpy as np
import torch
from datasets import Dataset as HfDataset
from tensorboard.backend.event_processing.event_accumulator import \
    EventAccumulator
from torch import Tensor
from torch.nn import Module
from torch.nn.utils.rnn import pad_sequence
from torchmetrics import Accuracy, MeanMetric
from tqdm import tqdm
from transformers import GenerationConfig, HfArgumentParser, TextStreamer

from modelscope import get_logger
from modelscope.metrics.base import Metric
from modelscope.metrics.builder import METRICS
from modelscope.utils.registry import default_group

COLOR, COLOR_S = '#FFE2D9', '#FF7043'

DEFAULT_PROMPT = """Here's a conversation between a human and an AI assistant. \
The AI assistant provides detailed, friendly answers for the human.

### Human:
{instruction}

### AI:
"""

logger = get_logger()
os.environ['TOKENIZERS_PARALLELISM'] = 'true'


def _get_version(work_dir: str) -> int:
    if os.path.isdir(work_dir):
        fnames = os.listdir(work_dir)
    else:
        fnames = []
    v_list = [-1]
    for fname in fnames:
        m = re.match(r'v(\d+)', fname)
        if m is None:
            continue
        v = m.group(1)
        v_list.append(int(v))
    return max(v_list) + 1


def get_work_dir(work_dir: str) -> str:
    """add version"""
    work_dir = os.path.abspath(work_dir)
    version = _get_version(work_dir)
    time = dt.datetime.now().strftime('%Y%m%d-%H%M%S')

    work_dir = os.path.join(work_dir, f'v{version}-{time}')
    logger.info(f'work_dir: {work_dir}')
    return work_dir


def seed_everything(seed: Optional[int] = None, gpu_dtm: bool = False) -> int:
    if seed is None:
        seed_max = np.iinfo(np.int32).max
        seed = random.randint(0, seed_max)

    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    logger.info(f'Global seed set to {seed}')
    if gpu_dtm:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        logger.info(f'Setting deterministic: {True}, benchmark: {False}')
    return seed


def get_T_max(dataset_len: int, batch_size: int, max_epochs: int,
              drop_last: bool) -> int:
    """Calculate T_max in CosineAnnealingLR"""
    if drop_last:
        T_max = dataset_len // batch_size
    else:
        T_max = math.ceil(dataset_len / batch_size)
    T_max *= max_epochs
    return T_max


def tokenize_function(example: Dict[str, Optional[str]],
                      tokenizer,
                      prompt: str = DEFAULT_PROMPT,
                      max_length: Optional[int] = 2048) -> Dict[str, Any]:
    instruction: str = example['instruction']
    output = example.get('output')
    src_text = prompt.format(instruction=instruction)
    src_input_ids: List[int] = tokenizer(
        src_text, return_attention_mask=False,
        add_special_tokens=True)['input_ids']

    tgt_input_ids = []
    if output is not None:
        tgt_input_ids += tokenizer(
            output, return_attention_mask=False,
            add_special_tokens=False)['input_ids']
        tgt_input_ids += [tokenizer.eos_token_id]
        labels = [-100] * len(src_input_ids) + tgt_input_ids
    else:
        labels = None
    input_ids = src_input_ids + tgt_input_ids

    if max_length is not None:
        input_ids = input_ids[-max_length:]
        if labels is not None:
            labels = labels[-max_length:]

    return {'input_ids': input_ids, 'labels': labels}


def stat_dataset(dataset: HfDataset) -> None:
    """Statistical analysis was performed on the dataset"""
    _token_len = []
    for d in dataset:
        _token_len.append(len(d['input_ids']))
    _token_len = np.array(_token_len)
    mean = _token_len.mean().item()
    std = _token_len.std().item()
    min_ = _token_len.min().item()
    max_ = _token_len.max().item()
    logger.info(
        f'Dataset Token Length: {mean:.6f}±{std:.6f}, min={min_:.6f}, max={max_:.6f}, size={_token_len.shape[0]}'
    )


def print_example(example: Dict[str, Any], tokenizer) -> None:
    input_ids, labels = example['input_ids'], example['labels']
    print(f'[INPUT_IDS] {input_ids}')
    print(f'[INPUT] {tokenizer.decode(input_ids)}')
    print()
    n_mask = Counter(labels)[-100]
    print(f'[LABELS_IDS] {labels}')
    print(f'[LABELS] <-100 * {n_mask}>{tokenizer.decode(labels[n_mask:])}')


def data_collate_fn(batch: List[Dict[str, Any]], tokenizer) -> Dict[str, Any]:
    input_ids = [torch.tensor(b['input_ids']) for b in batch]
    labels = [torch.tensor(b['labels']) for b in batch]
    attention_mask = [
        torch.ones(len(input_ids[i]), dtype=torch.int64)
        for i in range(len(input_ids))
    ]

    input_ids = pad_sequence(
        input_ids, batch_first=True, padding_value=tokenizer.pad_token_id)
    attention_mask = pad_sequence(
        attention_mask, batch_first=True, padding_value=0)
    labels = pad_sequence(labels, batch_first=True, padding_value=-100)
    return {
        'input_ids': input_ids,
        'attention_mask': attention_mask,
        'labels': labels
    }


def print_model_info(model: Module, name: Optional[str] = None) -> None:
    if name is None:
        name = model.__class__.__name__

    n_params = sum(p.numel() for p in model.parameters())
    n_grads = sum(p.numel() for p in model.parameters() if p.requires_grad)
    n_buffers = sum(p.numel() for p in model.buffers())

    n_params /= 1e6
    n_grads /= 1e6
    n_buffers /= 1e6
    s = [
        f'{name}: ',
        f'{n_params:.4f}M Params ({n_grads:.4f}M Trainable), ',
        f'{n_buffers:.4f}M Buffers',
    ]
    s += '.'
    logger.info(''.join(s))


def show_freeze_layers(model: Module, max_lines: Optional[int] = 20) -> None:
    named_p = list(model.named_parameters())
    for i, (n, p) in enumerate(named_p):
        if max_lines is not None and i >= max_lines:
            logger.info('...')
            break
        logger.info(f'{n}: requires_grad={p.requires_grad}')


@METRICS.register_module(group_key=default_group, module_name='my_metric')
class MyMetric(Metric):

    def __init__(self, vocab_size: int):
        self.acc = Accuracy('multiclass', num_classes=vocab_size)
        self.loss = MeanMetric()

    def add(self, outputs: Dict[str, Any], inputs: Dict[str, Any]) -> None:
        loss: Tensor = outputs.loss
        self.loss.update(loss.cpu())

        labels: Tensor = inputs['labels']
        labels = labels[:, 1:]
        labels_mask = labels != -100
        logits: Tensor = outputs.logits[:, :-1]
        logits = logits[labels_mask].contiguous().view(-1, logits.shape[-1])
        pred = logits.argmax(dim=-1)
        labels = labels[labels_mask].to(logits.device)
        self.acc.update(pred.cpu(), labels.cpu())

    def evaluate(self):
        return {
            'acc': self.acc.compute().item(),
            'loss': self.loss.compute().item()
        }

    def merge(self, other: 'MyMetric') -> None:
        """This script does not support ddp. TODO"""
        raise NotImplementedError


Item = Dict[str, float]


def read_tensorboard_file(fpath: str) -> Dict[str, List[Item]]:
    if not os.path.isfile(fpath):
        raise FileNotFoundError(f'fpath: {fpath}')
    ea = EventAccumulator(fpath)
    ea.Reload()
    res = {}
    tags = ea.Tags()['scalars']
    for tag in tags:
        values = ea.Scalars(tag)
        r = []
        for v in values:
            r.append({'step': v.step, 'value': v.value})
        res[tag] = r
    return res


def tensorboard_smoothing(values: List[float],
                          smooth: float = 0.9) -> List[float]:
    norm_factor = 1
    x = 0
    res = []
    for i in range(len(values)):
        x = x * smooth + values[i]  # Exponential decay
        res.append(x / norm_factor)

        norm_factor *= smooth
        norm_factor += 1
    return res


def plot_images(tb_dir: str,
                smooth_key: List[str],
                smooth_val: float = 0.9,
                figsize: Tuple[int, int] = (8, 5),
                dpi: int = 100) -> None:
    images_dir = os.path.join(os.path.dirname(tb_dir), 'images')
    os.makedirs(images_dir, exist_ok=True)

    fname = os.listdir(tb_dir)[0]
    tb_path = os.path.join(tb_dir, fname)
    data = read_tensorboard_file(tb_path)

    for k in data.keys():
        _data = data[k]
        steps = [d['step'] for d in _data]
        values = [d['value'] for d in _data]
        if len(values) == 0:
            continue
        _, ax = plt.subplots(1, 1, squeeze=True, figsize=figsize, dpi=dpi)
        ax.set_title(k)
        if len(values) == 1:
            ax.scatter(steps, values, color=COLOR_S)
        elif k in smooth_key:
            ax.plot(steps, values, color=COLOR)
            values_s = tensorboard_smoothing(values, smooth_val)
            ax.plot(steps, values_s, color=COLOR_S)
        else:
            ax.plot(steps, values, color=COLOR_S)
        fpath = os.path.join(images_dir, k.replace('/', '_'))
        plt.savefig(fpath, dpi=dpi, bbox_inches='tight')


def inference(input_ids: List[int],
              model,
              tokenizer,
              streamer: Optional[TextStreamer] = None,
              generation_config: Optional[GenerationConfig] = None,
              tag: str = '[INFERENCE]') -> str:
    print(f'{tag}{tokenizer.decode(input_ids)}', end='')
    input_ids = torch.tensor(input_ids)[None].cuda()
    attention_mask = torch.ones_like(input_ids)
    model.eval()
    generate_ids = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        streamer=streamer,
        generation_config=generation_config)
    output_text = tokenizer.decode(generate_ids[0])
    return output_text


_T = TypeVar('_T')


def parse_args(class_type: Type[_T],
               argv: Optional[List[str]] = None) -> Tuple[_T, List[str]]:
    parser = HfArgumentParser([class_type])
    args, remaining_args = parser.parse_args_into_dataclasses(
        argv, return_remaining_strings=True)
    logger.info(f'args: {args}')
    return args, remaining_args
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`import datetime as dt`
			`import math`
			`import os`
			`import random`
			`import re`
add qwen 7b base and chat 添加QWen 7b base模型和chat模型及相关pipelines Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/13482235 * add qwen 7b base and chat * fix logger * update examples, lint test * add unittest for qwen base and chat * rename qwen to qwen-7b * resolve imports and add a registry to text-generation * reset load model from pretrained * fix precheck * skip qwen test case now * remove strange file 2023-08-02 09:25:21 +08:00			`from typing import Any, Counter, Dict, List, Optional, Tuple, Type, TypeVar`
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00
			`import matplotlib.pyplot as plt`
			`import numpy as np`
			`import torch`
support llama2 (#393) * Unify sft and infer code into a single file * update llama2 sft infer 2023-07-19 17:34:27 +08:00			`from datasets import Dataset as HfDataset`
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`from tensorboard.backend.event_processing.event_accumulator import \`
			`EventAccumulator`
			`from torch import Tensor`
			`from torch.nn import Module`
			`from torch.nn.utils.rnn import pad_sequence`
			`from torchmetrics import Accuracy, MeanMetric`
			`from tqdm import tqdm`
add qwen 7b base and chat 添加QWen 7b base模型和chat模型及相关pipelines Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/13482235 * add qwen 7b base and chat * fix logger * update examples, lint test * add unittest for qwen base and chat * rename qwen to qwen-7b * resolve imports and add a registry to text-generation * reset load model from pretrained * fix precheck * skip qwen test case now * remove strange file 2023-08-02 09:25:21 +08:00			`from transformers import GenerationConfig, HfArgumentParser, TextStreamer`
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00
support openbuddy-llama2-13b (#416) 2023-07-26 18:12:55 +08:00			`from modelscope import get_logger`
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`from modelscope.metrics.base import Metric`
			`from modelscope.metrics.builder import METRICS`
			`from modelscope.utils.registry import default_group`

			`COLOR, COLOR_S = '#FFE2D9', '#FF7043'`

support openbuddy-llama2-13b (#416) 2023-07-26 18:12:55 +08:00			`DEFAULT_PROMPT = """Here's a conversation between a human and an AI assistant. \`
Added full parameter sft to llm (#402) * Optimized code * update parse_args * fix get_logger bug * update parse_args * Added full parameter fine-tuning * Add support_bf16 warning * Modify the code format and fix bugs 2023-07-24 15:52:09 +08:00			`The AI assistant provides detailed, friendly answers for the human.`

			`### Human:`
			`{instruction}`

			`### AI:`
			`"""`
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00
			`logger = get_logger()`
support llama2 (#393) * Unify sft and infer code into a single file * update llama2 sft infer 2023-07-19 17:34:27 +08:00			`os.environ['TOKENIZERS_PARALLELISM'] = 'true'`
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00

			`def _get_version(work_dir: str) -> int:`
			`if os.path.isdir(work_dir):`
			`fnames = os.listdir(work_dir)`
			`else:`
			`fnames = []`
			`v_list = [-1]`
			`for fname in fnames:`
			`m = re.match(r'v(\d+)', fname)`
			`if m is None:`
			`continue`
			`v = m.group(1)`
			`v_list.append(int(v))`
			`return max(v_list) + 1`


			`def get_work_dir(work_dir: str) -> str:`
			`"""add version"""`
			`work_dir = os.path.abspath(work_dir)`
			`version = _get_version(work_dir)`
			`time = dt.datetime.now().strftime('%Y%m%d-%H%M%S')`
Added full parameter sft to llm (#402) * Optimized code * update parse_args * fix get_logger bug * update parse_args * Added full parameter fine-tuning * Add support_bf16 warning * Modify the code format and fix bugs 2023-07-24 15:52:09 +08:00
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`work_dir = os.path.join(work_dir, f'v{version}-{time}')`
			`logger.info(f'work_dir: {work_dir}')`
			`return work_dir`


			`def seed_everything(seed: Optional[int] = None, gpu_dtm: bool = False) -> int:`
			`if seed is None:`
			`seed_max = np.iinfo(np.int32).max`
			`seed = random.randint(0, seed_max)`

			`random.seed(seed)`
			`np.random.seed(seed)`
			`torch.manual_seed(seed)`
			`torch.cuda.manual_seed_all(seed)`
			`logger.info(f'Global seed set to {seed}')`
			`if gpu_dtm:`
			`torch.backends.cudnn.deterministic = True`
			`torch.backends.cudnn.benchmark = False`
			`logger.info(f'Setting deterministic: {True}, benchmark: {False}')`
			`return seed`


			`def get_T_max(dataset_len: int, batch_size: int, max_epochs: int,`
			`drop_last: bool) -> int:`
			`"""Calculate T_max in CosineAnnealingLR"""`
			`if drop_last:`
			`T_max = dataset_len // batch_size`
			`else:`
			`T_max = math.ceil(dataset_len / batch_size)`
			`T_max *= max_epochs`
			`return T_max`


support llama2 (#393) * Unify sft and infer code into a single file * update llama2 sft infer 2023-07-19 17:34:27 +08:00			`def tokenize_function(example: Dict[str, Optional[str]],`
Fix/chatglm2 (#384) 2023-07-15 09:59:53 +08:00			`tokenizer,`
support openbuddy-llama2-13b (#416) 2023-07-26 18:12:55 +08:00			`prompt: str = DEFAULT_PROMPT,`
Fix/chatglm2 (#384) 2023-07-15 09:59:53 +08:00			`max_length: Optional[int] = 2048) -> Dict[str, Any]:`
support llama2 (#393) * Unify sft and infer code into a single file * update llama2 sft infer 2023-07-19 17:34:27 +08:00			`instruction: str = example['instruction']`
support openbuddy-llama2-13b (#416) 2023-07-26 18:12:55 +08:00			`output = example.get('output')`
			`src_text = prompt.format(instruction=instruction)`
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`src_input_ids: List[int] = tokenizer(`
			`src_text, return_attention_mask=False,`
			`add_special_tokens=True)['input_ids']`
Added full parameter sft to llm (#402) * Optimized code * update parse_args * fix get_logger bug * update parse_args * Added full parameter fine-tuning * Add support_bf16 warning * Modify the code format and fix bugs 2023-07-24 15:52:09 +08:00
Fix/chatglm2 (#384) 2023-07-15 09:59:53 +08:00			`tgt_input_ids = []`
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`if output is not None:`
			`tgt_input_ids += tokenizer(`
			`output, return_attention_mask=False,`
			`add_special_tokens=False)['input_ids']`
			`tgt_input_ids += [tokenizer.eos_token_id]`
			`labels = [-100] * len(src_input_ids) + tgt_input_ids`
			`else:`
			`labels = None`
			`input_ids = src_input_ids + tgt_input_ids`
Added full parameter sft to llm (#402) * Optimized code * update parse_args * fix get_logger bug * update parse_args * Added full parameter fine-tuning * Add support_bf16 warning * Modify the code format and fix bugs 2023-07-24 15:52:09 +08:00
Fix/chatglm2 (#384) 2023-07-15 09:59:53 +08:00			`if max_length is not None:`
			`input_ids = input_ids[-max_length:]`
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`if labels is not None:`
Fix/chatglm2 (#384) 2023-07-15 09:59:53 +08:00			`labels = labels[-max_length:]`
Added full parameter sft to llm (#402) * Optimized code * update parse_args * fix get_logger bug * update parse_args * Added full parameter fine-tuning * Add support_bf16 warning * Modify the code format and fix bugs 2023-07-24 15:52:09 +08:00
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`return {'input_ids': input_ids, 'labels': labels}`


support llama2 (#393) * Unify sft and infer code into a single file * update llama2 sft infer 2023-07-19 17:34:27 +08:00			`def stat_dataset(dataset: HfDataset) -> None:`
Added full parameter sft to llm (#402) * Optimized code * update parse_args * fix get_logger bug * update parse_args * Added full parameter fine-tuning * Add support_bf16 warning * Modify the code format and fix bugs 2023-07-24 15:52:09 +08:00			`"""Statistical analysis was performed on the dataset"""`
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`_token_len = []`
			`for d in dataset:`
			`_token_len.append(len(d['input_ids']))`
			`_token_len = np.array(_token_len)`
			`mean = _token_len.mean().item()`
			`std = _token_len.std().item()`
			`min_ = _token_len.min().item()`
			`max_ = _token_len.max().item()`
			`logger.info(`
			`f'Dataset Token Length: {mean:.6f}±{std:.6f}, min={min_:.6f}, max={max_:.6f}, size={_token_len.shape[0]}'`
			`)`


support llama2 (#393) * Unify sft and infer code into a single file * update llama2 sft infer 2023-07-19 17:34:27 +08:00			`def print_example(example: Dict[str, Any], tokenizer) -> None:`
			`input_ids, labels = example['input_ids'], example['labels']`
Fix/chatglm2 (#384) 2023-07-15 09:59:53 +08:00			`print(f'[INPUT_IDS] {input_ids}')`
			`print(f'[INPUT] {tokenizer.decode(input_ids)}')`
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`print()`
fix checkpoint, same device bug (#427) 2023-07-29 00:06:27 +08:00			`n_mask = Counter(labels)[-100]`
Fix typos (#1328) 2025-05-08 16:10:54 +08:00			`print(f'[LABELS_IDS] {labels}')`
			`print(f'[LABELS] <-100 * {n_mask}>{tokenizer.decode(labels[n_mask:])}')`
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00

			`def data_collate_fn(batch: List[Dict[str, Any]], tokenizer) -> Dict[str, Any]:`
			`input_ids = [torch.tensor(b['input_ids']) for b in batch]`
			`labels = [torch.tensor(b['labels']) for b in batch]`
			`attention_mask = [`
			`torch.ones(len(input_ids[i]), dtype=torch.int64)`
			`for i in range(len(input_ids))`
			`]`
Added full parameter sft to llm (#402) * Optimized code * update parse_args * fix get_logger bug * update parse_args * Added full parameter fine-tuning * Add support_bf16 warning * Modify the code format and fix bugs 2023-07-24 15:52:09 +08:00
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`input_ids = pad_sequence(`
			`input_ids, batch_first=True, padding_value=tokenizer.pad_token_id)`
			`attention_mask = pad_sequence(`
			`attention_mask, batch_first=True, padding_value=0)`
			`labels = pad_sequence(labels, batch_first=True, padding_value=-100)`
			`return {`
			`'input_ids': input_ids,`
			`'attention_mask': attention_mask,`
			`'labels': labels`
			`}`


			`def print_model_info(model: Module, name: Optional[str] = None) -> None:`
			`if name is None:`
			`name = model.__class__.__name__`
Added full parameter sft to llm (#402) * Optimized code * update parse_args * fix get_logger bug * update parse_args * Added full parameter fine-tuning * Add support_bf16 warning * Modify the code format and fix bugs 2023-07-24 15:52:09 +08:00
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`n_params = sum(p.numel() for p in model.parameters())`
			`n_grads = sum(p.numel() for p in model.parameters() if p.requires_grad)`
			`n_buffers = sum(p.numel() for p in model.buffers())`
Added full parameter sft to llm (#402) * Optimized code * update parse_args * fix get_logger bug * update parse_args * Added full parameter fine-tuning * Add support_bf16 warning * Modify the code format and fix bugs 2023-07-24 15:52:09 +08:00
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`n_params /= 1e6`
			`n_grads /= 1e6`
			`n_buffers /= 1e6`
			`s = [`
			`f'{name}: ',`
			`f'{n_params:.4f}M Params ({n_grads:.4f}M Trainable), ',`
			`f'{n_buffers:.4f}M Buffers',`
			`]`
			`s += '.'`
			`logger.info(''.join(s))`


fix checkpoint, same device bug (#427) 2023-07-29 00:06:27 +08:00			`def show_freeze_layers(model: Module, max_lines: Optional[int] = 20) -> None:`
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`named_p = list(model.named_parameters())`
			`for i, (n, p) in enumerate(named_p):`
fix checkpoint, same device bug (#427) 2023-07-29 00:06:27 +08:00			`if max_lines is not None and i >= max_lines:`
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`logger.info('...')`
			`break`
			`logger.info(f'{n}: requires_grad={p.requires_grad}')`


			`@METRICS.register_module(group_key=default_group, module_name='my_metric')`
			`class MyMetric(Metric):`

			`def __init__(self, vocab_size: int):`
			`self.acc = Accuracy('multiclass', num_classes=vocab_size)`
			`self.loss = MeanMetric()`

			`def add(self, outputs: Dict[str, Any], inputs: Dict[str, Any]) -> None:`
			`loss: Tensor = outputs.loss`
support openbuddy-llama2-13b (#416) 2023-07-26 18:12:55 +08:00			`self.loss.update(loss.cpu())`
Added full parameter sft to llm (#402) * Optimized code * update parse_args * fix get_logger bug * update parse_args * Added full parameter fine-tuning * Add support_bf16 warning * Modify the code format and fix bugs 2023-07-24 15:52:09 +08:00
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`labels: Tensor = inputs['labels']`
			`labels = labels[:, 1:]`
			`labels_mask = labels != -100`
			`logits: Tensor = outputs.logits[:, :-1]`
			`logits = logits[labels_mask].contiguous().view(-1, logits.shape[-1])`
			`pred = logits.argmax(dim=-1)`
			`labels = labels[labels_mask].to(logits.device)`
support openbuddy-llama2-13b (#416) 2023-07-26 18:12:55 +08:00			`self.acc.update(pred.cpu(), labels.cpu())`
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00
			`def evaluate(self):`
			`return {`
			`'acc': self.acc.compute().item(),`
			`'loss': self.loss.compute().item()`
			`}`

			`def merge(self, other: 'MyMetric') -> None:`
Fix/chatglm2 (#384) 2023-07-15 09:59:53 +08:00			`"""This script does not support ddp. TODO"""`
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`raise NotImplementedError`


			`Item = Dict[str, float]`


			`def read_tensorboard_file(fpath: str) -> Dict[str, List[Item]]:`
			`if not os.path.isfile(fpath):`
			`raise FileNotFoundError(f'fpath: {fpath}')`
			`ea = EventAccumulator(fpath)`
			`ea.Reload()`
			`res = {}`
			`tags = ea.Tags()['scalars']`
			`for tag in tags:`
			`values = ea.Scalars(tag)`
			`r = []`
			`for v in values:`
			`r.append({'step': v.step, 'value': v.value})`
			`res[tag] = r`
			`return res`


			`def tensorboard_smoothing(values: List[float],`
			`smooth: float = 0.9) -> List[float]:`
			`norm_factor = 1`
			`x = 0`
			`res = []`
			`for i in range(len(values)):`
			`x = x * smooth + values[i] # Exponential decay`
			`res.append(x / norm_factor)`
Added full parameter sft to llm (#402) * Optimized code * update parse_args * fix get_logger bug * update parse_args * Added full parameter fine-tuning * Add support_bf16 warning * Modify the code format and fix bugs 2023-07-24 15:52:09 +08:00
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`norm_factor *= smooth`
			`norm_factor += 1`
			`return res`


ckpt output directory ignore .safetensors (#410) ckpt output file ignore .safetensors update 2023-07-25 19:27:11 +08:00			`def plot_images(tb_dir: str,`
			`smooth_key: List[str],`
			`smooth_val: float = 0.9,`
			`figsize: Tuple[int, int] = (8, 5),`
			`dpi: int = 100) -> None:`
			`images_dir = os.path.join(os.path.dirname(tb_dir), 'images')`
			`os.makedirs(images_dir, exist_ok=True)`
Added full parameter sft to llm (#402) * Optimized code * update parse_args * fix get_logger bug * update parse_args * Added full parameter fine-tuning * Add support_bf16 warning * Modify the code format and fix bugs 2023-07-24 15:52:09 +08:00
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`fname = os.listdir(tb_dir)[0]`
			`tb_path = os.path.join(tb_dir, fname)`
			`data = read_tensorboard_file(tb_path)`
check format 2023-07-21 23:09:24 +08:00
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`for k in data.keys():`
			`_data = data[k]`
			`steps = [d['step'] for d in _data]`
			`values = [d['value'] for d in _data]`
			`if len(values) == 0:`
			`continue`
			`_, ax = plt.subplots(1, 1, squeeze=True, figsize=figsize, dpi=dpi)`
			`ax.set_title(k)`
			`if len(values) == 1:`
			`ax.scatter(steps, values, color=COLOR_S)`
			`elif k in smooth_key:`
			`ax.plot(steps, values, color=COLOR)`
			`values_s = tensorboard_smoothing(values, smooth_val)`
			`ax.plot(steps, values_s, color=COLOR_S)`
			`else:`
			`ax.plot(steps, values, color=COLOR_S)`
ckpt output directory ignore .safetensors (#410) ckpt output file ignore .safetensors update 2023-07-25 19:27:11 +08:00			`fpath = os.path.join(images_dir, k.replace('/', '_'))`
add example/llm (#372) * add example/llm * fix lint test 2023-07-11 17:35:11 +08:00			`plt.savefig(fpath, dpi=dpi, bbox_inches='tight')`
Added full parameter sft to llm (#402) * Optimized code * update parse_args * fix get_logger bug * update parse_args * Added full parameter fine-tuning * Add support_bf16 warning * Modify the code format and fix bugs 2023-07-24 15:52:09 +08:00

support openbuddy-llama2-13b (#416) 2023-07-26 18:12:55 +08:00			`def inference(input_ids: List[int],`
Added full parameter sft to llm (#402) * Optimized code * update parse_args * fix get_logger bug * update parse_args * Added full parameter fine-tuning * Add support_bf16 warning * Modify the code format and fix bugs 2023-07-24 15:52:09 +08:00			`model,`
			`tokenizer,`
			`streamer: Optional[TextStreamer] = None,`
			`generation_config: Optional[GenerationConfig] = None,`
			`tag: str = '[INFERENCE]') -> str:`
			`print(f'{tag}{tokenizer.decode(input_ids)}', end='')`
			`input_ids = torch.tensor(input_ids)[None].cuda()`
			`attention_mask = torch.ones_like(input_ids)`
fix copytree python37 bug (#464) * fix copytree python37 bug * add copytree_py37 function 2023-08-14 11:45:33 +08:00			`model.eval()`
Added full parameter sft to llm (#402) * Optimized code * update parse_args * fix get_logger bug * update parse_args * Added full parameter fine-tuning * Add support_bf16 warning * Modify the code format and fix bugs 2023-07-24 15:52:09 +08:00			`generate_ids = model.generate(`
			`input_ids=input_ids,`
			`attention_mask=attention_mask,`
			`streamer=streamer,`
			`generation_config=generation_config)`
			`output_text = tokenizer.decode(generate_ids[0])`
			`return output_text`
add qwen 7b base and chat 添加QWen 7b base模型和chat模型及相关pipelines Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/13482235 * add qwen 7b base and chat * fix logger * update examples, lint test * add unittest for qwen base and chat * rename qwen to qwen-7b * resolve imports and add a registry to text-generation * reset load model from pretrained * fix precheck * skip qwen test case now * remove strange file 2023-08-02 09:25:21 +08:00

			`_T = TypeVar('_T')`


			`def parse_args(class_type: Type[_T],`
			`argv: Optional[List[str]] = None) -> Tuple[_T, List[str]]:`
			`parser = HfArgumentParser([class_type])`
			`args, remaining_args = parser.parse_args_into_dataclasses(`
			`argv, return_remaining_strings=True)`
			`logger.info(f'args: {args}')`
			`return args, remaining_args`