ok Merge branch 'master' of github.com:modelscope/modelscope into release/1.7

2026-02-25 04:30:48 +01:00 · 2023-07-05 00:07:50 +08:00
parent e750a912b8 2a79a6cee7
commit c4b7bf0171
15 changed files with 5432 additions and 45 deletions
--- a/examples/pytorch/baichuan/lora_inference.py
+++ b/examples/pytorch/baichuan/lora_inference.py
@@ -0,0 +1,28 @@
+import os.path as osp
+
+import torch
+
+from modelscope.pipelines import pipeline
+from modelscope.swift import Swift
+from modelscope.swift.lora import LoRAConfig
+from modelscope.utils.constant import Tasks
+
+# 使用源模型 model_id 初始化 pipeline
+model_id = 'baichuan-inc/baichuan-7B'
+pipe = pipeline(
+    task=Tasks.text_generation, model=model_id, model_revision='v1.0.2')
+# lora 配置，replace_modules，rank，alpha 需与训练参数相同
+lora_config = LoRAConfig(replace_modules=['pack'], rank=32, lora_alpha=32)
+# 转 bf16，需与训练精度相同
+model = pipe.model.bfloat16()
+# model 转 lora
+Swift.prepare_model(model, lora_config)
+# 加载 lora 参数，默认 link 到于 output/model 路径
+work_dir = './tmp'
+state_dict = torch.load(osp.join(work_dir, 'output/pytorch_model.bin'))
+model.load_state_dict(state_dict)
+# 使用 lora model 替换 pipeline 中的 model
+pipe.model = model
+# 使用 pipeline 推理
+result_zh = pipe('今天天气是真的')
+print(result_zh)
--- a/examples/pytorch/chatglm6b/chatglm_trainer.py
+++ b/examples/pytorch/chatglm6b/chatglm_trainer.py
@@ -16,6 +16,8 @@ class Seq2SeqTrainer(EpochBasedTrainer):
        if ignore_pad_token_for_loss:
            tokens = np.where(tokens != -100, tokens,
                              self.tokenizer.pad_token_id)
+        tokens = np.where(tokens < self.tokenizer.vocab_size, tokens,
+                          self.tokenizer.pad_token_id)
        return [
            t for t in self.tokenizer.batch_decode(
                tokens, skip_special_tokens=True) if t != '</s>'
@@ -59,7 +61,9 @@ class Seq2SeqTrainer(EpochBasedTrainer):

        gen_kwargs['input_ids'] = generation_inputs
        gen_kwargs['pad_token_id'] = self.tokenizer.pad_token_id
-        generated_tokens = self.model.generate(**gen_kwargs)
+        self.model.eval()
+        with torch.no_grad():
+            generated_tokens = self.model.generate(**gen_kwargs)
        generated_tokens = generated_tokens[:, generation_inputs.size()[-1]:]

        # in case the batch is shorter than max length, the output should be padded
--- a/examples/pytorch/chatglm6b/finetune.py
+++ b/examples/pytorch/chatglm6b/finetune.py
@@ -192,8 +192,15 @@ if config['model']['type'] == 'chatglm6b':
    model_config['model']['prefix_projection'] = args.prefix_projection

 tokenizer = ChatGLMTokenizer.from_pretrained(model_dir, trust_remote_code=True)
+
+device_map_kwargs = {}
+device_kwargs = {}
+if args.use_lora != 0:
+    device_map_kwargs['device_map'] = 'auto'
+    # No placement for model, leave the model to `device_map`
+    device_kwargs['device'] = 'cpu'
 model = Model.from_pretrained(
-    model_dir, cfg_dict=model_config, device_map='auto')
+    model_dir, cfg_dict=model_config, **device_map_kwargs)

 if args.ptuning_checkpoint is not None:
    # Evaluation
@@ -378,8 +385,7 @@ trainer = Seq2SeqTrainer(
    seed=args.seed,
    data_collator=data_collator,
    remove_unused_data=True,
-    # No placement for model, leave the model to `device_map`
-    device='cpu',
-    cfg_modify_fn=cfg_modify_fn)
+    cfg_modify_fn=cfg_modify_fn,
+    **device_kwargs)
 trainer.tokenizer = tokenizer
 trainer.train()
--- a/examples/pytorch/chatglm6b/text_generation_metric.py
+++ b/examples/pytorch/chatglm6b/text_generation_metric.py
@@ -53,7 +53,7 @@ class TextGenerationMetric(Metric):
        }
        for pred, label in zip(preds, labels):
            hypothesis = list(jieba.cut(pred))
-            if len(hypothesis) == 0:
+            if len(hypothesis) == 0 or ''.join(hypothesis) == '.':
                hypothesis = ['</s>']
            reference = list(jieba.cut(label))
            rouge = Rouge()
--- a/examples/pytorch/llm_agent/_common.py
+++ b/examples/pytorch/llm_agent/_common.py
@@ -0,0 +1,422 @@
+import ast
+import datetime as dt
+import math
+import os
+import random
+import re
+import sys
+from functools import partial
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+import json
+import matplotlib.pyplot as plt
+import numpy as np
+#
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from matplotlib.axes import Axes
+from matplotlib.figure import Figure
+from numpy import ndarray
+from tensorboard.backend.event_processing.event_accumulator import \
+    EventAccumulator
+from torch import Tensor
+from torch import device as Device
+from torch import dtype as Dtype
+from torch.nn import Module
+from torch.nn.parameter import Parameter
+from torch.nn.utils.rnn import pad_sequence
+from torch.optim import Optimizer
+from torch.optim import lr_scheduler as lrs
+from torch.optim.lr_scheduler import _LRScheduler as LRScheduler
+from torch.utils.data import Dataset
+#
+from torchmetrics import Accuracy, MeanMetric
+#
+from tqdm import tqdm
+
+#
+from modelscope import (Model, MsDataset, get_logger, read_config,
+                        snapshot_download)
+from modelscope.metrics.base import Metric
+from modelscope.metrics.builder import METRICS
+from modelscope.models.nlp.chatglm2 import ChatGLM2Tokenizer
+from modelscope.msdatasets.dataset_cls.custom_datasets import \
+    TorchCustomDataset
+from modelscope.swift import LoRAConfig, Swift
+from modelscope.trainers import EpochBasedTrainer
+from modelscope.utils.config import Config, ConfigDict
+from modelscope.utils.registry import default_group
+
+#
+SYSTEM_TEXT = """{system}"""
+USER_TEXT = """\n\n### 用户
+{user}"""
+ASSISTANT_PROMPT = """\n\n### 助手
+"""
+MAX_LENGTH = 2048
+TEST_MAX_LENGTH = MAX_LENGTH
+
+COLOR, COLOR_S = '#FFE2D9', '#FF7043'
+logger = get_logger()
+#
+
+
+def get_model_dir(model_id: str, model_revision: Optional[str] = None) -> str:
+    model_dir = snapshot_download(model_id, model_revision)
+    return model_dir
+
+
+def _get_version(work_dir: str) -> int:
+    if os.path.isdir(work_dir):
+        fnames = os.listdir(work_dir)
+    else:
+        fnames = []
+    v_list = [-1]
+    for fname in fnames:
+        m = re.match(r'v(\d+)', fname)
+        if m is None:
+            continue
+        v = m.group(1)
+        v_list.append(int(v))
+    return max(v_list) + 1
+
+
+def get_work_dir(work_dir: str) -> str:
+    """add version"""
+    work_dir = os.path.abspath(work_dir)
+    version = _get_version(work_dir)
+    time = dt.datetime.now().strftime('%Y%m%d-%H%M%S')
+    #
+    work_dir = os.path.join(work_dir, f'v{version}-{time}')
+    logger.info(f'work_dir: {work_dir}')
+    return work_dir
+
+
+def select_device(device_ids: List[int]) -> Device:
+    """Call this function before cuda is initialized.
+    Return: master device
+    """
+    if torch.cuda.is_initialized():
+        logger.warning('CUDA has been initialized! Device selection fails!')
+        return torch.device('cuda:0')
+    #
+    log_s = 'Using device: '
+    if len(device_ids) == 0:  # cpu
+        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
+        device: str = 'cpu'
+        log_s += device
+    else:
+        os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(
+            [str(d) for d in device_ids])
+        assert torch.cuda.is_available(
+        ) and torch.cuda.device_count() >= len(device_ids)
+        log_s += f"cuda:{','.join([str(d) for d in device_ids])}"  # e.g. "cuda:1,7,8"
+        device = 'cuda:0'
+    logger.info(log_s)
+    return torch.device(device)
+
+
+def seed_everything(seed: Optional[int] = None, gpu_dtm: bool = False) -> int:
+    if seed is None:
+        seed_max = np.iinfo(np.int32).max
+        seed = random.randint(0, seed_max)
+
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    logger.info(f'Global seed set to {seed}')
+    if gpu_dtm:
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+        logger.info(f'Setting deterministic: {True}, benchmark: {False}')
+    return seed
+
+
+def get_T_max(dataset_len: int, batch_size: int, max_epochs: int,
+              drop_last: bool) -> int:
+    """Calculate T_max in CosineAnnealingLR"""
+    if drop_last:
+        T_max = dataset_len // batch_size
+    else:
+        T_max = math.ceil(dataset_len / batch_size)
+    T_max *= max_epochs
+    return T_max
+
+
+def tokenize_function(system: str, user: str, assistant: Optional[str],
+                      tokenizer) -> Dict[str, Any]:
+    """Only applicable to baichuan and chatglm2. Other models need to be tested"""
+    system_text = SYSTEM_TEXT.format(system=system)
+    user_text = USER_TEXT.format(user=user)
+    system_text_ids: List[int] = tokenizer(
+        system_text, return_attention_mask=False,
+        add_special_tokens=True)['input_ids']
+    user_text_ids: List[int] = tokenizer(
+        user_text, return_attention_mask=False,
+        add_special_tokens=False)['input_ids']
+    assistant_p_input_ids: List[int] = tokenizer(
+        ASSISTANT_PROMPT,
+        return_attention_mask=False,
+        add_special_tokens=False)['input_ids']
+
+    # tokenizer.bos_token_id: Avoid `assistant` being empty
+    assistant_input_ids: List[int] = [tokenizer.bos_token_id]
+    if assistant is not None:
+        assistant_input_ids += tokenizer(
+            assistant, return_attention_mask=False,
+            add_special_tokens=False)['input_ids']
+        assistant_input_ids += [tokenizer.eos_token_id]
+    #
+    input_ids = system_text_ids + user_text_ids + assistant_p_input_ids + assistant_input_ids
+    if assistant is not None:  # train, val
+        if len(input_ids) > MAX_LENGTH:
+            return {}
+        len_mask = len(input_ids) - len(assistant_input_ids)
+        labels = [-100] * len_mask + assistant_input_ids
+    else:  # test
+        input_ids = input_ids[-TEST_MAX_LENGTH:]
+        labels = None
+
+    #
+    return {'input_ids': input_ids, 'labels': labels}
+
+
+class MyDataset(TorchCustomDataset):
+
+    def __init__(self, system: List[str], user: List[str],
+                 assistant: List[str], tokenize_function) -> None:
+        self._data = []
+        for i in tqdm(range(len(system))):
+            _d = tokenize_function(system[i], user[i], assistant[i])
+            if len(_d) == 0:
+                continue
+            self._data.append(_d)
+
+    def __getitem__(self, idx: int) -> Dict[str, Any]:
+        return self._data[idx]
+
+    def __len__(self) -> int:
+        return len(self._data)
+
+
+def stat_dataset(dataset: 'MyDataset') -> None:
+    """Statistical analysis was performed on the data set"""
+    _token_len = []
+    for d in dataset:
+        _token_len.append(len(d['input_ids']))
+    _token_len = np.array(_token_len)
+    mean = _token_len.mean().item()
+    std = _token_len.std().item()
+    min_ = _token_len.min().item()
+    max_ = _token_len.max().item()
+    logger.info(
+        f'Dataset Token Length: {mean:.6f}±{std:.6f}, min={min_:.6f}, max={max_:.6f}, size={_token_len.shape[0]}'
+    )
+
+
+def print_examples(examples: Dict[str, Any], tokenizer) -> None:
+    input_ids, labels = examples['input_ids'], examples['labels']
+    print(f'[INPUT_IDS] {tokenizer.decode(input_ids)}')
+    print()
+    print(
+        f'[LABLES] {tokenizer.decode([l if l != -100 else 0 for l in labels])}'
+    )
+
+
+def data_collate_fn(batch: List[Dict[str, Any]], tokenizer) -> Dict[str, Any]:
+    input_ids = [torch.tensor(b['input_ids']) for b in batch]
+    labels = [torch.tensor(b['labels']) for b in batch]
+    attention_mask = [
+        torch.ones(len(input_ids[i]), dtype=torch.int64)
+        for i in range(len(input_ids))
+    ]
+    #
+    input_ids = pad_sequence(
+        input_ids, batch_first=True, padding_value=tokenizer.pad_token_id)
+    attention_mask = pad_sequence(
+        attention_mask, batch_first=True, padding_value=0)
+    labels = pad_sequence(labels, batch_first=True, padding_value=-100)
+    return {
+        'input_ids': input_ids,
+        'attention_mask': attention_mask,
+        'labels': labels
+    }
+
+
+def print_model_info(model: Module, name: Optional[str] = None) -> None:
+    if name is None:
+        name = model.__class__.__name__
+    #
+    n_params = sum(p.numel() for p in model.parameters())
+    n_grads = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    n_buffers = sum(p.numel() for p in model.buffers())
+    #
+    n_params /= 1e6
+    n_grads /= 1e6
+    n_buffers /= 1e6
+    s = [
+        f'{name}: ',
+        f'{n_params:.4f}M Params ({n_grads:.4f}M Trainable), ',
+        f'{n_buffers:.4f}M Buffers',
+    ]
+    s += '.'
+    logger.info(''.join(s))
+
+
+def show_freeze_layers(model: Module, max_lines: int = 20) -> None:
+    named_p = list(model.named_parameters())
+    for i, (n, p) in enumerate(named_p):
+        if i >= max_lines:
+            logger.info('...')
+            break
+        logger.info(f'{n}: requires_grad={p.requires_grad}')
+
+
+@METRICS.register_module(group_key=default_group, module_name='my_metric')
+class MyMetric(Metric):
+
+    def __init__(self, vocab_size: int):
+        self.acc = Accuracy('multiclass', num_classes=vocab_size)
+        self.loss = MeanMetric()
+
+    def add(self, outputs: Dict[str, Any], inputs: Dict[str, Any]) -> None:
+        loss: Tensor = outputs.loss
+        self.loss.update(loss)
+        #
+        labels: Tensor = inputs['labels']
+        labels = labels[:, 1:]
+        labels_mask = labels != -100
+        logits: Tensor = outputs.logits[:, :-1]
+        logits = logits[labels_mask].contiguous().view(-1, logits.shape[-1])
+        pred = logits.argmax(dim=-1)
+        labels = labels[labels_mask].to(logits.device)
+        self.acc.update(pred, labels)
+
+    def evaluate(self):
+        return {
+            'acc': self.acc.compute().item(),
+            'loss': self.loss.compute().item()
+        }
+
+    def merge(self, other: 'MyMetric') -> None:
+        """This script does not support ddp"""
+        raise NotImplementedError
+
+
+def get_baichuan_model_tokenizer(model_dir: Optional[str] = None,
+                                 load_model: bool = True):
+    if model_dir is None:
+        model_id = 'baichuan-inc/baichuan-7B'
+        model_dir = get_model_dir(model_id, None)
+    #
+    sys.path.insert(0, model_dir)
+    from configuration_baichuan import BaiChuanConfig
+    from tokenization_baichuan import BaiChuanTokenizer
+    from modeling_baichuan import BaiChuanForCausalLM
+    model_config = BaiChuanConfig.from_pretrained(model_dir)
+    model_config.torch_dtype = torch.float16
+    logger.info(f'model_config: {model_config}')
+    tokenizer = BaiChuanTokenizer.from_pretrained(model_dir)
+    model = None
+    if load_model:
+        model = BaiChuanForCausalLM.from_pretrained(
+            model_dir,
+            config=model_config,
+            device_map='auto',
+            torch_dtype=torch.float16)
+    #
+    return model, tokenizer
+
+
+def get_chatglm2_model_tokenizer(model_dir: Optional[str] = None,
+                                 load_model: bool = True):
+    if model_dir is None:
+        model_id = 'ZhipuAI/chatglm2-6b'
+        model_revision = 'v1.0.3'
+        model_dir = snapshot_download(model_id, model_revision)
+    #
+    config = read_config(model_dir)
+    config['model'] = ConfigDict({'type': 'chatglm2-6b'})
+    tokenizer = ChatGLM2Tokenizer.from_pretrained(model_dir)
+    model = None
+    if load_model:
+        model = Model.from_pretrained(
+            model_dir,
+            cfg_dict=config,
+            device_map='auto',
+            torch_dtype=torch.float16)
+    return model, tokenizer
+
+
+def make_dataset(
+    split: str, tokenize_function: Callable[[str, str, Optional[str]],
+                                            Dict[str, Any]]
+) -> MyDataset:
+    """
+    split: Literal["train", "validation"]
+    """
+    dataset = MsDataset.load(
+        'modelscope/ms_hackathon_23_agent_train_dev', split=split)
+    system = []
+    user = []
+    assistant = []
+    for d in dataset:
+        content = ast.literal_eval(d['conversations'])
+        s = content[0]['value']
+        assert len(content) % 2 == 1
+        for i in range(len(content) // 2):
+            system.append(s)
+            user.append(content[2 * i + 1]['value'])
+            assistant.append(content[2 * i + 2]['value'])
+    return MyDataset(system, user, assistant, tokenize_function)
+
+
+Item = Dict[str, float]
+
+
+def read_tensorboard_file(fpath: str) -> Dict[str, List[Item]]:
+    if not os.path.isfile(fpath):
+        raise FileNotFoundError(f'fpath: {fpath}')
+    ea = EventAccumulator(fpath)
+    ea.Reload()
+    res = {}
+    tags = ea.Tags()['scalars']
+    for tag in tags:
+        values = ea.Scalars(tag)
+        r = []
+        for v in values:
+            r.append({'step': v.step, 'value': v.value})
+        res[tag] = r
+    return res
+
+
+def tensorboard_smoothing(values: List[float],
+                          smooth: float = 0.9) -> List[float]:
+    norm_factor = 1
+    x = 0
+    res = []
+    for i in range(len(values)):
+        x = x * smooth + values[i]  # Exponential decay
+        res.append(x / norm_factor)
+        #
+        norm_factor *= smooth
+        norm_factor += 1
+    return res
+
+
+def plot_image(data: Dict[str, List[Item]], key_name: str,
+               smooth: float) -> Figure:
+    _data = data[key_name]
+    steps = [d['step'] for d in _data]
+    values = [d['value'] for d in _data]
+    fig, ax = plt.subplots(1, 1, squeeze=True, figsize=(8, 5), dpi=100)
+    ax.set_title(key_name)
+    if smooth != 0:
+        ax.plot(steps, values, color=COLOR)
+        values_s = tensorboard_smoothing(values, smooth)
+        ax.plot(steps, values_s, color=COLOR_S)
+    else:
+        ax.plot(steps, values, color=COLOR_S)
+    return fig
--- a/examples/pytorch/llm_agent/baichuan_infer.ipynb
+++ b/examples/pytorch/llm_agent/baichuan_infer.ipynb
@@ -0,0 +1,492 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Baichuan 推理"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 配置实验环境"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !pip install transformers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2023-07-02 22:28:00,199] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-07-02 22:28:00,675 - modelscope - INFO - PyTorch version 2.0.1 Found.\n",
+      "2023-07-02 22:28:00,676 - modelscope - INFO - Loading ast index from /home/hackathon/.cache/modelscope/ast_indexer\n",
+      "2023-07-02 22:28:00,700 - modelscope - INFO - Loading done! Current index file version is 1.6.2, with md5 ddf811ee982377c1357284a2bfda3dec and a total number of 861 components indexed\n",
+      "2023-07-02 22:28:01,367 - modelscope - INFO - [0, 1]\n",
+      "2023-07-02 22:28:01,512 - modelscope - INFO - Using device: cuda:0,1\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "device(type='cuda', index=0)"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from _common import *\n",
+    "from transformers import TextStreamer\n",
+    "device_ids = list(range(min(4, torch.cuda.device_count())))\n",
+    "logger.info(device_ids)\n",
+    "select_device(device_ids)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 导入Model, Tokenizer\n",
+    "Note: 你需要设置CKPT_FPATH的内容, 指向`.bin`文件, 或`.pth`文件"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-07-02 22:28:03,375 - modelscope - INFO - Model revision not specified, use default: master in development mode\n",
+      "2023-07-02 22:28:03,375 - modelscope - INFO - Development mode use revision: master\n",
+      "2023-07-02 22:28:03,695 - modelscope - INFO - model_config: BaiChuanConfig {\n",
+      "  \"architectures\": [\n",
+      "    \"BaiChuanForCausalLM\"\n",
+      "  ],\n",
+      "  \"auto_map\": {\n",
+      "    \"AutoConfig\": \"configuration_baichuan.BaiChuanConfig\",\n",
+      "    \"AutoModelForCausalLM\": \"modeling_baichuan.BaiChuanForCausalLM\"\n",
+      "  },\n",
+      "  \"bos_token_id\": 1,\n",
+      "  \"eos_token_id\": 2,\n",
+      "  \"hidden_act\": \"silu\",\n",
+      "  \"hidden_size\": 4096,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 11008,\n",
+      "  \"max_position_embeddings\": 4096,\n",
+      "  \"model_type\": \"baichuan\",\n",
+      "  \"num_attention_heads\": 32,\n",
+      "  \"num_hidden_layers\": 32,\n",
+      "  \"pad_token_id\": 0,\n",
+      "  \"rms_norm_eps\": 1e-06,\n",
+      "  \"tie_word_embeddings\": false,\n",
+      "  \"torch_dtype\": \"float16\",\n",
+      "  \"transformers_version\": \"4.30.2\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"vocab_size\": 64000\n",
+      "}\n",
+      "\n",
+      "The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "BaiChuanForCausalLM(\n",
+       "  (model): Model(\n",
+       "    (embed_tokens): Embedding(64000, 4096, padding_idx=0)\n",
+       "    (layers): ModuleList(\n",
+       "      (0-31): 32 x DecoderLayer(\n",
+       "        (self_attn): Attention(\n",
+       "          (W_pack): Linear(in_features=4096, out_features=12288, bias=False)\n",
+       "          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
+       "          (rotary_emb): RotaryEmbedding()\n",
+       "        )\n",
+       "        (mlp): MLP(\n",
+       "          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
+       "          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)\n",
+       "          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
+       "          (act_fn): SiLUActivation()\n",
+       "        )\n",
+       "        (input_layernorm): RMSNorm()\n",
+       "        (post_attention_layernorm): RMSNorm()\n",
+       "      )\n",
+       "    )\n",
+       "    (norm): RMSNorm()\n",
+       "  )\n",
+       "  (lm_head): Linear(in_features=4096, out_features=64000, bias=False)\n",
+       ")"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "CKPT_FAPTH = \"/home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/output_best/pytorch_model.bin\"\n",
+    "LORA_TARGET_MODULES = [\"W_pack\"]\n",
+    "\n",
+    "model, tokenizer = get_baichuan_model_tokenizer()\n",
+    "if tokenizer.pad_token_id is None:\n",
+    "    tokenizer.pad_token_id = tokenizer.eos_token_id\n",
+    "model.bfloat16()  # Consistent with training"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 导入Lora"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-07-02 22:28:14,108 - modelscope - INFO - lora_config: LoRAConfig(rank=8, replace_modules=['W_pack'], lora_alpha=32, lora_dropout=0, merge_weights=True, use_merged_linear=False, enable_lora=None, fan_in_fan_out=False, bias='none', only_lora_trainable=True, pretrained_weights='/home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/output_best/pytorch_model.bin')\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "BaiChuanForCausalLM(\n",
+       "  (model): Model(\n",
+       "    (embed_tokens): Embedding(64000, 4096, padding_idx=0)\n",
+       "    (layers): ModuleList(\n",
+       "      (0-31): 32 x DecoderLayer(\n",
+       "        (self_attn): Attention(\n",
+       "          (W_pack): Linear(in_features=4096, out_features=12288, bias=False)\n",
+       "          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
+       "          (rotary_emb): RotaryEmbedding()\n",
+       "        )\n",
+       "        (mlp): MLP(\n",
+       "          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
+       "          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)\n",
+       "          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
+       "          (act_fn): SiLUActivation()\n",
+       "        )\n",
+       "        (input_layernorm): RMSNorm()\n",
+       "        (post_attention_layernorm): RMSNorm()\n",
+       "      )\n",
+       "    )\n",
+       "    (norm): RMSNorm()\n",
+       "  )\n",
+       "  (lm_head): Linear(in_features=4096, out_features=64000, bias=False)\n",
+       ")"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "LORA_RANK = 8\n",
+    "LORA_ALPHA = 32\n",
+    "LORA_DROPOUT_P = 0  # Arbitrary value\n",
+    "lora_config = LoRAConfig(\n",
+    "    replace_modules=LORA_TARGET_MODULES,\n",
+    "    rank=LORA_RANK,\n",
+    "    lora_alpha=LORA_ALPHA,\n",
+    "    lora_dropout=LORA_DROPOUT_P,\n",
+    "    pretrained_weights=CKPT_FAPTH)\n",
+    "logger.info(f\"lora_config: {lora_config}\")\n",
+    "Swift.prepare_model(model, lora_config)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 导入Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-07-02 22:28:28,832 - modelscope - INFO - No subset_name specified, defaulting to the default\n",
+      "2023-07-02 22:28:29,317 - modelscope - WARNING - Reusing dataset ms_hackathon_23_agent_train_dev (/home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files)\n",
+      "2023-07-02 22:28:29,318 - modelscope - INFO - Generating dataset ms_hackathon_23_agent_train_dev (/home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files)\n",
+      "2023-07-02 22:28:29,318 - modelscope - INFO - Reusing cached meta-data file: /home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files/941b733ec0354c2172a3386d8788bb37\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "682dc9eedfce4092a25fcadc977c794a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8e53d79d8e4845618231f3afb5bc096f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Extracting data files: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 285/285 [00:00<00:00, 1566679.74it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "test_dataset = make_dataset(\"validation\", lambda system, user, assistant:\n",
+    "                            {\"system\": system, \"user\": user, \"assistant\": assistant})"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 推理"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[TEST] 你是达摩院的ModelScopeGPT(魔搭助手)，你是个大语言模型， 是2023年达摩院的工程师训练得到的。你有多种能力，可以通过插件集成魔搭社区的模型api来回复用户的问题，还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_speech-generation\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"url\": \"http://90.49.118.175:2603/\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}}\n",
+      "\n",
+      "2. {\"plugin_name\": \"modelscope_speech-generation\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"url\": \"http://132.94.116.115:5983/\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}}\n",
+      "\n",
+      "3. {\"plugin_name\": \"modelscope_speech-generation\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"url\": \"http://94.43.176.75:1062/\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}} \n",
+      "\n",
+      "### 用户\n",
+      "生成一首诗歌，主题为“秋天的美景”，读出来这段话 \n",
+      "\n",
+      "### 助手\n",
+      "<s>秋天，是一个美丽的季节，是一个收获的季节，是一个充满诗意的季节。秋天的天空，湛蓝湛蓝的，像一块蓝宝石;秋天的田野，金黄色的稻谷，像一片金色的海洋;秋天的果园，硕果累累，像一幅美丽的画卷。秋天的山林，层林尽染，像一幅色彩斑斓的油画;秋天的河流，清澈见底，像一条银色的丝带。秋天的天空，湛蓝湛蓝的，像一块蓝宝石;秋天的田野，金黄色的稻谷，像一片金色的海洋;秋天的果园，硕果累累，像一幅美丽的画卷。秋天的山林，层林尽染，像一幅色彩斑斓的油画;秋天的河流，清澈见底，像一条银色的丝带。\n",
+      "\n",
+      "[LABELS]秋树红叶舞飘零，\n",
+      "山间小溪水潺潺。\n",
+      "微风拂面感清凉，\n",
+      "散步赏景心旷神怡。\n",
+      "<|startofthink|>```JSON\n",
+      "{\"api_name\": \"modelscope_speech-generation\", \"url\": \"http://90.49.118.175:2603/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"parameters\": {\"text\": \"秋树红叶舞飘零，\n",
+      "山间小溪水潺潺。\n",
+      "微风拂面感清凉，\n",
+      "散步赏景心旷神怡。\", \"gender\": \"woman\"}}\n",
+      "```<|endofthink|>\n",
+      "\n",
+      "<|startofexec|>```JSON\n",
+      "{\"result\": \"<audio id=\"audio\" controls=\"\" preload=\"none\"> <source id=\"wav\" src=\"http://xdp-expriment.oss-cn-zhangjiakou.aliyuncs.com/modelscope/audio/5c68265546564117.wav\"> </audio>\"}\n",
+      "```<|endofexec|>\n",
+      "<audio id=\"audio\" controls=\"\" preload=\"none\"> <source id=\"wav\" src=\"http://xdp-expriment.oss-cn-zhangjiakou.aliyuncs.com/modelscope/audio/5c68265546564117.wav\"> </audio>\n",
+      "-----------------------------------------------------------------------------------\n",
+      "[TEST] 你是达摩院的ModelScopeGPT(魔搭助手)，你是个大语言模型， 是2023年达摩院的工程师训练得到的。你有多种能力，可以通过插件集成魔搭社区的模型api来回复用户的问题，还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息，识别出里面的元素，包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://159.1.4.174:3210/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息，识别出里面的元素，包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}}\n",
+      "\n",
+      "2. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息，识别出里面的元素，包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://172.163.158.154:5325/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息，识别出里面的元素，包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}}\n",
+      "\n",
+      "3. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息，识别出里面的元素，包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://133.94.12.37:3160/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息，识别出里面的元素，包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}} \n",
+      "\n",
+      "### 用户\n",
+      "现在我给你另一条地址，请识别出里面的元素。输入地址：广东省深圳市南山区科技园北区 \n",
+      "\n",
+      "### 助手\n",
+      "<s><|startofthink|>```JSON\n",
+      "{\"api_name\": \"modelscope_text-address\", \"url\": \"http://133.94.12.37:3160/damo/mgeo_geographic_elements_tagging_chinese_base\", \"parameters\": {\"text\": \"广东省深圳市南山区科技园北区\"}}\n",
+      "```<|endofthink|>\n",
+      "\n",
+      "<|startofexec|>```JSON\n",
+      "{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"community\": \"科技园北区\"}\n",
+      "```<|endofexec|>\n",
+      "地址识别json表示：{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"community\": \"科技园北区\"}。我使用的模型是ModelScope的'damo/mgeo_geographic_elements_tagging_chinese_base'模型。这是基于达摩院联合高德发布的多任务多模态地址预训练底座MGeo模型微调得到的。\n",
+      "\n",
+      "[LABELS]<|startofthink|>```JSON\n",
+      "{\"api_name\": \"modelscope_text-address\", \"url\": \"http://159.1.4.174:3210/damo/mgeo_geographic_elements_tagging_chinese_base\", \"parameters\": {\"text\": \"广东省深圳市南山区科技园北区\"}}\n",
+      "```<|endofthink|>\n",
+      "\n",
+      "<|startofexec|>```JSON\n",
+      "{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"\", \"community\": \"科技园北区\", \"poi\": \"\"}\n",
+      "```<|endofexec|>\n",
+      "地址识别json表示：{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"\", \"community\": \"科技园北区\", \"poi\": \"\"}。我使用的模型是ModelScope的'damo/mgeo_geographic_elements_tagging_chinese_base'模型。这是基于达摩院联合高德发布的多任务多模态地址预训练底座MGeo模型微调得到的。\n",
+      "-----------------------------------------------------------------------------------\n",
+      "[TEST] 你是达摩院的ModelScopeGPT(魔搭助手)，你是个大语言模型， 是2023年达摩院的工程师训练得到的。你有多种能力，可以通过插件集成魔搭社区的模型api来回复用户的问题，还能解答用户使用模型遇到的问题和模型知识相关问答。目前支持的插件信息如下，请自行判断是否需要调用插件来解决当前用户问题。若需要调用插件，则需要将插件调用请求按照json格式给出，必须包含api_name、url、parameters字段，并在其前后使用<|startofthink|>和<|endofthink|>作为标志。然后你需要根据插件API调用结果生成合理的答复;若无需调用插件，则直接给出对应回复即可：\n",
+      "\n",
+      "1. {\"name\": \"modelscope_text-translation-zh2en\", \"description\": \"将输入的中文文本翻译成英文\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_text-translation-zh2en\", \"model_id\": \"/damo/nlp_csanmt_translation_zh2en\", \"method\": \"post\", \"description\": \"将输入的中文文本翻译成英文\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的中文文本\", \"required\": \"True\"}]}]}\n",
+      "\n",
+      "2. {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}\n",
+      "\n",
+      "3. {\"name\": \"modelscope_image-generation\", \"description\": \"针对文本输入，生成对应的图片\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_image-generation\", \"model_id\": \"/damo/image_generation\", \"method\": \"post\", \"description\": \"针对文本输入，生成对应的图片\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本信息\", \"required\": \"True\"}]}]} \n",
+      "\n",
+      "### 用户\n",
+      "歌手：古巨基\n",
+      "歌曲名：爱情马戏班\n",
+      "经典歌词：情是何等诡秘能令人使出看家把戏;恋爱就像走纲线般惊险;为你献技 像马戏班\n",
+      "请结合以上信息，编写一个智能音响的播放导语，需要有文采，字数30字以内，凸显一下即将播放该歌曲 \n",
+      "\n",
+      "### 助手\n",
+      "<s>古巨基的《爱情马戏班》，是一首经典的情歌，歌词中充满了对爱情的向往和对爱情的渴望，让人不禁沉醉其中。这首歌的旋律优美动听，歌词朗朗上口，让人听了就忍不住跟着哼唱。\n",
+      "\n",
+      "[LABELS]亲爱的主人，今天我为您带来的是古巨基的经典之作——《爱情马戏班》。这首歌曲描绘了情与爱的神秘和惊险，让人们为之倾倒。让我们一起享受这场爱情的马戏表演吧！\n",
+      "-----------------------------------------------------------------------------------\n",
+      "[TEST] 你是达摩院的ModelScopeGPT(魔搭助手)，你是个大语言模型， 是2023年达摩院的工程师训练得到的。你有多种能力，可以通过插件集成魔搭社区的模型api来回复用户的问题，还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"url\": \"http://114.42.178.183:8005/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
+      "\n",
+      "2. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"url\": \"http://93.82.87.89:6631/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
+      "\n",
+      "3. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"url\": \"http://4.105.93.165:8143/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}} \n",
+      "\n",
+      "### 用户\n",
+      "按照给定的schema抽取出下面文本对应的信息\n",
+      "schema：{\"动物\": null, \"食物\": null, \"颜色\": null}\n",
+      "这只棕色的狗狗很喜欢吃狗粮。 \n",
+      "\n",
+      "### 助手\n",
+      "<s><|startofthink|>```JSON\n",
+      "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://4.105.93.165:8143/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n",
+      "```<|endofthink|>\n",
+      "\n",
+      "<|startofexec|>```JSON\n",
+      "{\"动物\": [\"棕色的狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}\n",
+      "```<|endofexec|>\n",
+      "信息抽取结果：{\"动物\": [\"棕色的狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n",
+      "\n",
+      "[LABELS]<|startofthink|>```JSON\n",
+      "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://114.42.178.183:8005/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n",
+      "```<|endofthink|>\n",
+      "\n",
+      "<|startofexec|>```JSON\n",
+      "{\"动物\": [\"狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}\n",
+      "```<|endofexec|>\n",
+      "信息抽取结果：{\"动物\": [\"狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n",
+      "-----------------------------------------------------------------------------------\n",
+      "[TEST] 你是达摩院的ModelScopeGPT(魔搭助手)，你是个大语言模型， 是2023年达摩院的工程师训练得到的。你有多种能力，可以通过插件集成魔搭社区的模型api来回复用户的问题，还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"url\": \"http://28.179.171.5:6428/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
+      "\n",
+      "2. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"url\": \"http://100.111.18.38:6408/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
+      "\n",
+      "3. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"url\": \"http://144.67.18.142:6381/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}} \n",
+      "\n",
+      "### 用户\n",
+      "按照给定的schema抽取出下面文本对应的信息\n",
+      "schema：{\"人物\": null, \"地理位置\": null, \"组织机构\": null}\n",
+      "谷歌公司是一家全球知名的科技公司，总部位于美国的加利福尼亚州山景市。 \n",
+      "\n",
+      "### 助手\n",
+      "<s><|startofthink|>```JSON\n",
+      "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://144.67.18.142:6381/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"谷歌公司是一家全球知名的科技公司，总部位于美国的加利福尼亚州山景市。\", \"schema\": \"{\\\"人物\\\": null, \\\"地理位置\\\": null, \\\"组织机构\\\": null}\"}}\n",
+      "```<|endofthink|>\n",
+      "\n",
+      "<|startofexec|>```JSON\n",
+      "{\"人物\": [\"谷歌公司\"], \"地理位置\": [\"美国\"], \"组织机构\": [\"科技公司\"]}\n",
+      "```<|endofexec|>\n",
+      "信息抽取结果：{\"人物\": [\"谷歌公司\"], \"地理位置\": [\"美国\"], \"组织机构\": [\"科技公司\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n",
+      "\n",
+      "[LABELS]<|startofthink|>```JSON\n",
+      "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://100.111.18.38:6408/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"谷歌公司是一家全球知名的科技公司，总部位于美国的加利福尼亚州山景市。\", \"schema\": \"{\\\"人物\\\": null, \\\"地理位置\\\": null, \\\"组织机构\\\": null}\"}}\n",
+      "```<|endofthink|>\n",
+      "\n",
+      "<|startofexec|>```JSON\n",
+      "{\"人物\": [], \"地理位置\": [\"美国\", \"加利福尼亚州山景市\"], \"组织机构\": [\"谷歌公司\"]}\n",
+      "```<|endofexec|>\n",
+      "信息抽取结果：{\"人物\": [], \"地理位置\": [\"美国\", \"加利福尼亚州山景市\"], \"组织机构\": [\"谷歌公司\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n",
+      "-----------------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n",
+    "for d in test_dataset[:5]:\n",
+    "    system = d[\"system\"]\n",
+    "    user = d[\"user\"]\n",
+    "    assistant = d[\"assistant\"]\n",
+    "    input_ids = tokenize_function(system, user, None, tokenizer)[\"input_ids\"]\n",
+    "    print(f\"[TEST]{tokenizer.decode(input_ids)}\", end=\"\")\n",
+    "    input_ids = torch.tensor(input_ids)[None].cuda()\n",
+    "    attention_mask = torch.ones_like(input_ids)\n",
+    "    generate_ids = model.generate(input_ids=input_ids, max_new_tokens=512,\n",
+    "                                  attention_mask=attention_mask,\n",
+    "                                  streamer=streamer, pad_token_id=tokenizer.pad_token_id)\n",
+    "    print()\n",
+    "    print(f\"[LABELS]{assistant}\")\n",
+    "    print(\"-----------------------------------------------------------------------------------\")\n",
+    "    # input(\"next[ENTER]\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/examples/pytorch/llm_agent/baichuan_sft.ipynb
+++ b/examples/pytorch/llm_agent/baichuan_sft.ipynb
--- a/examples/pytorch/llm_agent/chatglm2_infer.ipynb
+++ b/examples/pytorch/llm_agent/chatglm2_infer.ipynb
@@ -0,0 +1,526 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## ChatGLM2 推理"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 配置实验环境\n",
+    "The following code is copied from baichuan_infer.ipynb"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !pip install transformers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2023-07-02 21:48:47,527] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-07-02 21:48:48,006 - modelscope - INFO - PyTorch version 2.0.1 Found.\n",
+      "2023-07-02 21:48:48,007 - modelscope - INFO - Loading ast index from /home/hackathon/.cache/modelscope/ast_indexer\n",
+      "2023-07-02 21:48:48,032 - modelscope - INFO - Loading done! Current index file version is 1.6.2, with md5 ddf811ee982377c1357284a2bfda3dec and a total number of 861 components indexed\n",
+      "2023-07-02 21:48:48,708 - modelscope - INFO - [0, 1]\n",
+      "2023-07-02 21:48:48,848 - modelscope - INFO - Using device: cuda:0,1\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "device(type='cuda', index=0)"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from _common import *\n",
+    "from transformers import TextStreamer\n",
+    "device_ids = list(range(min(4, torch.cuda.device_count())))\n",
+    "logger.info(device_ids)\n",
+    "select_device(device_ids)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 导入Model, Tokenizer\n",
+    "Note: 你需要设置CKPT_FPATH的内容, 指向`.bin`文件, 或`.pth`文件"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-07-02 21:48:49,227 - modelscope - INFO - Development mode use revision: v1.0.3\n",
+      "The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. \n",
+      "The tokenizer class you load from this checkpoint is 'ChatGLMTokenizer'. \n",
+      "The class this function is called from is 'ChatGLM2Tokenizer'.\n",
+      "2023-07-02 21:48:49,572 - modelscope - INFO - initialize model from /home/hackathon/.cache/modelscope/hub/ZhipuAI/chatglm2-6b\n",
+      "Failed to load cpm_kernels:No module named 'cpm_kernels'\n",
+      "The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b72b43e11bec49c78c8097deaffea8a7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "ChatGLM2ForConditionalGeneration(\n",
+       "  (transformer): ChatGLMModel(\n",
+       "    (embedding): Embedding(\n",
+       "      (word_embeddings): Embedding(65024, 4096)\n",
+       "    )\n",
+       "    (rotary_pos_emb): RotaryEmbedding()\n",
+       "    (encoder): GLMTransformer(\n",
+       "      (layers): ModuleList(\n",
+       "        (0-27): 28 x GLMBlock(\n",
+       "          (input_layernorm): RMSNorm()\n",
+       "          (self_attention): SelfAttention(\n",
+       "            (query_key_value): Linear(in_features=4096, out_features=4608, bias=True)\n",
+       "            (core_attention): CoreAttention(\n",
+       "              (attention_dropout): Dropout(p=0.0, inplace=False)\n",
+       "            )\n",
+       "            (dense): Linear(in_features=4096, out_features=4096, bias=False)\n",
+       "          )\n",
+       "          (post_attention_layernorm): RMSNorm()\n",
+       "          (mlp): MLP(\n",
+       "            (dense_h_to_4h): Linear(in_features=4096, out_features=27392, bias=False)\n",
+       "            (dense_4h_to_h): Linear(in_features=13696, out_features=4096, bias=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (final_layernorm): RMSNorm()\n",
+       "    )\n",
+       "    (output_layer): Linear(in_features=4096, out_features=65024, bias=False)\n",
+       "  )\n",
+       ")"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "CKPT_FAPTH = \"/home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/output_best/pytorch_model.bin\"\n",
+    "LORA_TARGET_MODULES = [\"query_key_value\"]\n",
+    "\n",
+    "model, tokenizer = get_chatglm2_model_tokenizer()\n",
+    "if tokenizer.eos_token_id is None:\n",
+    "    tokenizer.eos_token_id = tokenizer.pad_token_id\n",
+    "if tokenizer.bos_token_id is None:\n",
+    "    tokenizer.bos_token_id = 1\n",
+    "model.bfloat16()  # Consistent with training"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 导入Lora\n",
+    "The following code is copied from baichuan_infer.ipynb"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-07-02 21:48:56,970 - modelscope - INFO - lora_config: LoRAConfig(rank=8, replace_modules=['query_key_value'], lora_alpha=32, lora_dropout=0, merge_weights=True, use_merged_linear=False, enable_lora=None, fan_in_fan_out=False, bias='none', only_lora_trainable=True, pretrained_weights='/home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/output_best/pytorch_model.bin')\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "ChatGLM2ForConditionalGeneration(\n",
+       "  (transformer): ChatGLMModel(\n",
+       "    (embedding): Embedding(\n",
+       "      (word_embeddings): Embedding(65024, 4096)\n",
+       "    )\n",
+       "    (rotary_pos_emb): RotaryEmbedding()\n",
+       "    (encoder): GLMTransformer(\n",
+       "      (layers): ModuleList(\n",
+       "        (0-27): 28 x GLMBlock(\n",
+       "          (input_layernorm): RMSNorm()\n",
+       "          (self_attention): SelfAttention(\n",
+       "            (query_key_value): Linear(in_features=4096, out_features=4608, bias=True)\n",
+       "            (core_attention): CoreAttention(\n",
+       "              (attention_dropout): Dropout(p=0.0, inplace=False)\n",
+       "            )\n",
+       "            (dense): Linear(in_features=4096, out_features=4096, bias=False)\n",
+       "          )\n",
+       "          (post_attention_layernorm): RMSNorm()\n",
+       "          (mlp): MLP(\n",
+       "            (dense_h_to_4h): Linear(in_features=4096, out_features=27392, bias=False)\n",
+       "            (dense_4h_to_h): Linear(in_features=13696, out_features=4096, bias=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (final_layernorm): RMSNorm()\n",
+       "    )\n",
+       "    (output_layer): Linear(in_features=4096, out_features=65024, bias=False)\n",
+       "  )\n",
+       ")"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "LORA_RANK = 8\n",
+    "LORA_ALPHA = 32\n",
+    "LORA_DROPOUT_P = 0  # Arbitrary value\n",
+    "lora_config = LoRAConfig(\n",
+    "    replace_modules=LORA_TARGET_MODULES,\n",
+    "    rank=LORA_RANK,\n",
+    "    lora_alpha=LORA_ALPHA,\n",
+    "    lora_dropout=LORA_DROPOUT_P,\n",
+    "    pretrained_weights=CKPT_FAPTH)\n",
+    "logger.info(f\"lora_config: {lora_config}\")\n",
+    "Swift.prepare_model(model, lora_config)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 导入Dataset\n",
+    "The following code is copied from baichuan_infer.ipynb"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-07-02 21:49:01,924 - modelscope - INFO - No subset_name specified, defaulting to the default\n",
+      "2023-07-02 21:49:02,374 - modelscope - WARNING - Reusing dataset ms_hackathon_23_agent_train_dev (/home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files)\n",
+      "2023-07-02 21:49:02,375 - modelscope - INFO - Generating dataset ms_hackathon_23_agent_train_dev (/home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files)\n",
+      "2023-07-02 21:49:02,375 - modelscope - INFO - Reusing cached meta-data file: /home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files/941b733ec0354c2172a3386d8788bb37\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "277c2be395d645319f4601f1d1f1e4bf",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "70946b16504c4a88883739bd273bddf6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Extracting data files: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 285/285 [00:00<00:00, 1577014.04it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "test_dataset = make_dataset(\"validation\", lambda system, user, assistant:\n",
+    "                            {\"system\": system, \"user\": user, \"assistant\": assistant})"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 推理\n",
+    "The following code is copied from baichuan_infer.ipynb"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[TEST]你是达摩院的ModelScopeGPT（魔搭助手），你是个大语言模型， 是2023年达摩院的工程师训练得到的。你有多种能力，可以通过插件集成魔搭社区的模型api来回复用户的问题，还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_speech-generation\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"url\": \"http://90.49.118.175:2603/\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}}\n",
+      "\n",
+      "2. {\"plugin_name\": \"modelscope_speech-generation\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"url\": \"http://132.94.116.115:5983/\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}}\n",
+      "\n",
+      "3. {\"plugin_name\": \"modelscope_speech-generation\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"url\": \"http://94.43.176.75:1062/\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}} \n",
+      "\n",
+      "### 用户\n",
+      "生成一首诗歌，主题为“秋天的美景”，读出来这段话 \n",
+      "\n",
+      "### 助手\n",
+      "秋天是一个美丽的世界， \n",
+      "树叶在风中摇曳， \n",
+      "天空是那么的广阔， \n",
+      "秋天的美景让人陶醉。 \n",
+      "\n",
+      "树叶是那么的美丽， \n",
+      "像黄金一样闪耀， \n",
+      "像火焰一样燃烧， \n",
+      "像珍珠一样闪耀。 \n",
+      "\n",
+      "秋天的天空是那么的美丽， \n",
+      "像一面镜子， \n",
+      "像一片湖水， \n",
+      "像一片草原。 \n",
+      "\n",
+      "秋天是一个美丽的世界， \n",
+      "让我们享受它， \n",
+      "让我们欣赏它， \n",
+      "让我们感受它。\n",
+      "\n",
+      "[LABELS]秋树红叶舞飘零，\n",
+      "山间小溪水潺潺。\n",
+      "微风拂面感清凉，\n",
+      "散步赏景心旷神怡。\n",
+      "<|startofthink|>```JSON\n",
+      "{\"api_name\": \"modelscope_speech-generation\", \"url\": \"http://90.49.118.175:2603/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"parameters\": {\"text\": \"秋树红叶舞飘零，\n",
+      "山间小溪水潺潺。\n",
+      "微风拂面感清凉，\n",
+      "散步赏景心旷神怡。\", \"gender\": \"woman\"}}\n",
+      "```<|endofthink|>\n",
+      "\n",
+      "<|startofexec|>```JSON\n",
+      "{\"result\": \"<audio id=\"audio\" controls=\"\" preload=\"none\"> <source id=\"wav\" src=\"http://xdp-expriment.oss-cn-zhangjiakou.aliyuncs.com/modelscope/audio/5c68265546564117.wav\"> </audio>\"}\n",
+      "```<|endofexec|>\n",
+      "<audio id=\"audio\" controls=\"\" preload=\"none\"> <source id=\"wav\" src=\"http://xdp-expriment.oss-cn-zhangjiakou.aliyuncs.com/modelscope/audio/5c68265546564117.wav\"> </audio>\n",
+      "-----------------------------------------------------------------------------------\n",
+      "[TEST]你是达摩院的ModelScopeGPT（魔搭助手），你是个大语言模型， 是2023年达摩院的工程师训练得到的。你有多种能力，可以通过插件集成魔搭社区的模型api来回复用户的问题，还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息，识别出里面的元素，包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://159.1.4.174:3210/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息，识别出里面的元素，包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}}\n",
+      "\n",
+      "2. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息，识别出里面的元素，包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://172.163.158.154:5325/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息，识别出里面的元素，包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}}\n",
+      "\n",
+      "3. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息，识别出里面的元素，包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://133.94.12.37:3160/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息，识别出里面的元素，包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}} \n",
+      "\n",
+      "### 用户\n",
+      "现在我给你另一条地址，请识别出里面的元素。输入地址：广东省深圳市南山区科技园北区 \n",
+      "\n",
+      "### 助手\n",
+      "<|startofthink|>```JSON\n",
+      "{\"api_name\": \"modelscope_text-address\", \"url\": \"http://133.94.12.37:3160/damo/mgeo_geographic_elements_tagging_chinese_base\", \"parameters\": {\"text\": \"广东省深圳市南山区科技园北区\"}}\n",
+      "```<|endofthink|>\n",
+      "\n",
+      "<|startofexec|>```JSON\n",
+      "{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"科技园北区\"}\n",
+      "```<|endofexec|>\n",
+      "地址识别结果为：{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"科技园北区\"}。我识别出的元素包括：prov、city、district、town。\n",
+      "\n",
+      "[LABELS]<|startofthink|>```JSON\n",
+      "{\"api_name\": \"modelscope_text-address\", \"url\": \"http://159.1.4.174:3210/damo/mgeo_geographic_elements_tagging_chinese_base\", \"parameters\": {\"text\": \"广东省深圳市南山区科技园北区\"}}\n",
+      "```<|endofthink|>\n",
+      "\n",
+      "<|startofexec|>```JSON\n",
+      "{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"\", \"community\": \"科技园北区\", \"poi\": \"\"}\n",
+      "```<|endofexec|>\n",
+      "地址识别json表示：{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"\", \"community\": \"科技园北区\", \"poi\": \"\"}。我使用的模型是ModelScope的'damo/mgeo_geographic_elements_tagging_chinese_base'模型。这是基于达摩院联合高德发布的多任务多模态地址预训练底座MGeo模型微调得到的。\n",
+      "-----------------------------------------------------------------------------------\n",
+      "[TEST]你是达摩院的ModelScopeGPT（魔搭助手），你是个大语言模型， 是2023年达摩院的工程师训练得到的。你有多种能力，可以通过插件集成魔搭社区的模型api来回复用户的问题，还能解答用户使用模型遇到的问题和模型知识相关问答。目前支持的插件信息如下，请自行判断是否需要调用插件来解决当前用户问题。若需要调用插件，则需要将插件调用请求按照json格式给出，必须包含api_name、url、parameters字段，并在其前后使用<|startofthink|>和<|endofthink|>作为标志。然后你需要根据插件API调用结果生成合理的答复；若无需调用插件，则直接给出对应回复即可：\n",
+      "\n",
+      "1. {\"name\": \"modelscope_text-translation-zh2en\", \"description\": \"将输入的中文文本翻译成英文\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_text-translation-zh2en\", \"model_id\": \"/damo/nlp_csanmt_translation_zh2en\", \"method\": \"post\", \"description\": \"将输入的中文文本翻译成英文\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的中文文本\", \"required\": \"True\"}]}]}\n",
+      "\n",
+      "2. {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}\n",
+      "\n",
+      "3. {\"name\": \"modelscope_image-generation\", \"description\": \"针对文本输入，生成对应的图片\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_image-generation\", \"model_id\": \"/damo/image_generation\", \"method\": \"post\", \"description\": \"针对文本输入，生成对应的图片\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本信息\", \"required\": \"True\"}]}]} \n",
+      "\n",
+      "### 用户\n",
+      "歌手：古巨基\n",
+      "歌曲名：爱情马戏班\n",
+      "经典歌词：情是何等诡秘能令人使出看家把戏；恋爱就像走纲线般惊险；为你献技 像马戏班\n",
+      "请结合以上信息，编写一个智能音响的播放导语，需要有文采，字数30字以内，凸显一下即将播放该歌曲 \n",
+      "\n",
+      "### 助手\n",
+      "爱情马戏班，由古巨基演唱，是一首充满马戏班元素的浪漫歌曲，歌词中描述了爱情的神秘和危险，是一首值得听一听的浪漫歌曲。\n",
+      "\n",
+      "[LABELS]亲爱的主人，今天我为您带来的是古巨基的经典之作——《爱情马戏班》。这首歌曲描绘了情与爱的神秘和惊险，让人们为之倾倒。让我们一起享受这场爱情的马戏表演吧！\n",
+      "-----------------------------------------------------------------------------------\n",
+      "[TEST]你是达摩院的ModelScopeGPT（魔搭助手），你是个大语言模型， 是2023年达摩院的工程师训练得到的。你有多种能力，可以通过插件集成魔搭社区的模型api来回复用户的问题，还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"url\": \"http://114.42.178.183:8005/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
+      "\n",
+      "2. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"url\": \"http://93.82.87.89:6631/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
+      "\n",
+      "3. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"url\": \"http://4.105.93.165:8143/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}} \n",
+      "\n",
+      "### 用户\n",
+      "按照给定的schema抽取出下面文本对应的信息\n",
+      "schema：{\"动物\": null, \"食物\": null, \"颜色\": null}\n",
+      "这只棕色的狗狗很喜欢吃狗粮。 \n",
+      "\n",
+      "### 助手\n",
+      "<|startofthink|>```JSON\n",
+      "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://4.105.93.165:8143/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n",
+      "```<|endofthink|>\n",
+      "\n",
+      "<|startofexec|>```JSON\n",
+      "{\"prov\": \"http://4.105.93.165:8143/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n",
+      "```<|endofexec|>\n",
+      "\n",
+      "<|startofexec|>```JSON\n",
+      "{\"prov\": \"http://4.105.93.165:8143/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n",
+      "```<|endofexec|>\n",
+      "<|startofexec|>```JSON\n",
+      "{\"prov\": \"http://4.105.93.165:8143/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n",
+      "```<|endofexec|>\n",
+      "<|startofexec|>```JSON\n",
+      "{\"prov\": \"http://4.105.93.165:8143/damo/nlp_structbert_siames\n",
+      "\n",
+      "[LABELS]<|startofthink|>```JSON\n",
+      "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://114.42.178.183:8005/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n",
+      "```<|endofthink|>\n",
+      "\n",
+      "<|startofexec|>```JSON\n",
+      "{\"动物\": [\"狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}\n",
+      "```<|endofexec|>\n",
+      "信息抽取结果：{\"动物\": [\"狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n",
+      "-----------------------------------------------------------------------------------\n",
+      "[TEST]你是达摩院的ModelScopeGPT（魔搭助手），你是个大语言模型， 是2023年达摩院的工程师训练得到的。你有多种能力，可以通过插件集成魔搭社区的模型api来回复用户的问题，还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"url\": \"http://28.179.171.5:6428/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
+      "\n",
+      "2. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"url\": \"http://100.111.18.38:6408/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
+      "\n",
+      "3. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"url\": \"http://144.67.18.142:6381/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}} \n",
+      "\n",
+      "### 用户\n",
+      "按照给定的schema抽取出下面文本对应的信息\n",
+      "schema：{\"人物\": null, \"地理位置\": null, \"组织机构\": null}\n",
+      "谷歌公司是一家全球知名的科技公司，总部位于美国的加利福尼亚州山景市。 \n",
+      "\n",
+      "### 助手\n",
+      "<|startofthink|>```JSON\n",
+      "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://100.111.18.38:6408/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"谷歌公司是一家全球知名的科技公司，总部位于美国的加利福尼亚州山景市。\", \"schema\": \"{\\\"人物\\\": null, \\\"地理位置\\\": null, \\\"组织机构\\\": null}\"}}\n",
+      "```<|endofthink|>\n",
+      "\n",
+      "<|startofexec|>```JSON\n",
+      "{\"人物\": null, \"地理位置\": null, \"组织机构\": null}\n",
+      "```<|endofexec|>\n",
+      "信息抽取结果：{\"人物\": null, \"地理位置\": null, \"组织机构\": null}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调的通用信息抽取模型。\n",
+      "\n",
+      "[LABELS]<|startofthink|>```JSON\n",
+      "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://100.111.18.38:6408/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"谷歌公司是一家全球知名的科技公司，总部位于美国的加利福尼亚州山景市。\", \"schema\": \"{\\\"人物\\\": null, \\\"地理位置\\\": null, \\\"组织机构\\\": null}\"}}\n",
+      "```<|endofthink|>\n",
+      "\n",
+      "<|startofexec|>```JSON\n",
+      "{\"人物\": [], \"地理位置\": [\"美国\", \"加利福尼亚州山景市\"], \"组织机构\": [\"谷歌公司\"]}\n",
+      "```<|endofexec|>\n",
+      "信息抽取结果：{\"人物\": [], \"地理位置\": [\"美国\", \"加利福尼亚州山景市\"], \"组织机构\": [\"谷歌公司\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n",
+      "-----------------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n",
+    "for d in test_dataset[:5]:\n",
+    "    system = d[\"system\"]\n",
+    "    user = d[\"user\"]\n",
+    "    assistant = d[\"assistant\"]\n",
+    "    input_ids = tokenize_function(system, user, None, tokenizer)[\"input_ids\"]\n",
+    "    print(f\"[TEST]{tokenizer.decode(input_ids)}\", end=\"\")\n",
+    "    input_ids = torch.tensor(input_ids)[None].cuda()\n",
+    "    attention_mask = torch.ones_like(input_ids)\n",
+    "    generate_ids = model.generate(input_ids=input_ids, max_new_tokens=512,\n",
+    "                                  attention_mask=attention_mask,\n",
+    "                                  streamer=streamer, pad_token_id=tokenizer.pad_token_id)\n",
+    "    print()\n",
+    "    print(f\"[LABELS]{assistant}\")\n",
+    "    print(\"-----------------------------------------------------------------------------------\")\n",
+    "    # input(\"next[ENTER]\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "hackathon",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/examples/pytorch/llm_agent/chatglm2_sft.ipynb
+++ b/examples/pytorch/llm_agent/chatglm2_sft.ipynb
--- a/examples/pytorch/stable_diffusion/dreambooth/finetune_stable_diffusion_dreambooth.py
+++ b/examples/pytorch/stable_diffusion/dreambooth/finetune_stable_diffusion_dreambooth.py
@@ -1,3 +1,4 @@
+import os
 from dataclasses import dataclass, field

 import cv2
@@ -63,14 +64,20 @@ training_args = StableDiffusionDreamboothArguments(
    task='text-to-image-synthesis').parse_cli()
 config, args = training_args.to_config()

-train_dataset = MsDataset.load(
-    args.train_dataset_name,
-    split='train',
-    download_mode=DownloadMode.FORCE_REDOWNLOAD)
-validation_dataset = MsDataset.load(
-    args.train_dataset_name,
-    split='validation',
-    download_mode=DownloadMode.FORCE_REDOWNLOAD)
+if os.path.exists(args.train_dataset_name):
+    # Load local dataset
+    train_dataset = MsDataset.load(args.train_dataset_name)
+    validation_dataset = MsDataset.load(args.train_dataset_name)
+else:
+    # Load online dataset
+    train_dataset = MsDataset.load(
+        args.train_dataset_name,
+        split='train',
+        download_mode=DownloadMode.FORCE_REDOWNLOAD)
+    validation_dataset = MsDataset.load(
+        args.train_dataset_name,
+        split='validation',
+        download_mode=DownloadMode.FORCE_REDOWNLOAD)


 def cfg_modify_fn(cfg):
@@ -113,4 +120,6 @@ pipe = pipeline(
    model_revision=args.model_revision)

 output = pipe({'text': args.prompt})
+# visualize the result on ipynb and save it
+output
 cv2.imwrite('./dreambooth_result.png', output['output_imgs'][0])
--- a/examples/pytorch/stable_diffusion/lora/finetune_stable_diffusion_lora.py
+++ b/examples/pytorch/stable_diffusion/lora/finetune_stable_diffusion_lora.py
@@ -1,3 +1,4 @@
+import os
 from dataclasses import dataclass, field

 import cv2
@@ -23,14 +24,20 @@ training_args = StableDiffusionLoraArguments(
    task='text-to-image-synthesis').parse_cli()
 config, args = training_args.to_config()

-train_dataset = MsDataset.load(
-    args.train_dataset_name,
-    split='train',
-    download_mode=DownloadMode.FORCE_REDOWNLOAD)
-validation_dataset = MsDataset.load(
-    args.train_dataset_name,
-    split='validation',
-    download_mode=DownloadMode.FORCE_REDOWNLOAD)
+if os.path.exists(args.train_dataset_name):
+    # Load local dataset
+    train_dataset = MsDataset.load(args.train_dataset_name)
+    validation_dataset = MsDataset.load(args.train_dataset_name)
+else:
+    # Load online dataset
+    train_dataset = MsDataset.load(
+        args.train_dataset_name,
+        split='train',
+        download_mode=DownloadMode.FORCE_REDOWNLOAD)
+    validation_dataset = MsDataset.load(
+        args.train_dataset_name,
+        split='validation',
+        download_mode=DownloadMode.FORCE_REDOWNLOAD)


 def cfg_modify_fn(cfg):
@@ -66,4 +73,6 @@ pipe = pipeline(
    model_revision=args.model_revision)

 output = pipe({'text': args.prompt})
+# visualize the result on ipynb and save it
+output
 cv2.imwrite('./lora_result.png', output['output_imgs'][0])
--- a/examples/pytorch/stable_diffusion/tutorial.ipynb
+++ b/examples/pytorch/stable_diffusion/tutorial.ipynb
@@ -0,0 +1,83 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Modelscope微调Stable Diffusion教程\n",
+    "## 原理讲解\n",
+    "\n",
+    "从头训练stable diffusion需要数十万美元和一个月以上的时间，巨额的算力和时间成本让普通人难以承受。因此最理想的手段是利用开源的stable diffusion模型，通过微调手段定制化属于自己的模型。近年涌现出很多有效的微调stable diffusion手段，如[Textual Inversion](https://arxiv.org/abs/2208.01618)、[Dreambooth](https://arxiv.org/pdf/2208.12242.pdf)、[Lora](https://arxiv.org/abs/2106.09685)、[Custom Diffusion](https://arxiv.org/pdf/2302.05543.pdf)等，Modelscope目前已经支持了Dreambooth和Lora两种方法。\n",
+    "\n",
+    "### Dreambooth\n",
+    "如果我们直接使用几张图片微调Stable Diffusion模型，很容易陷入“过拟合”的状态，通常的表现为模型生成的结果同质化且损失了泛化能力。除此之外，还容易遇到语言漂移的问题，严重影响了模型性能。Dreambooth提出了重建损失和特定类别先验保留损失相结合的方法来解决这一问题。\n",
+    "\n",
+    "### Lora\n",
+    "Lora的全称是Low-Rank Adaptation，是一种低阶自适应技术。这项技术起源于微调大型语言模型，在stable diffusion上也能取得非常好的效果。因为大模型是一般是过参数化的，它们有更小的内在维度，Lora模型主要依赖于这个低的内在维度去做任务适配。通过低秩分解(先降维再升维)来模拟参数的改变量，从而以极小的参数量来实现大模型的间接训练。\n",
+    "\n",
+    "如下图所示，Lora在原先的模型层中并行插入了可训练的排序分解矩阵层，这个矩阵层是由一个降维矩阵A和一个升维矩阵B组成的。降维矩阵A采用高斯分布初始化，升维矩阵B初始化为全0，保证训练开始时旁路为0矩阵。在训练的时候原模型固定，只训练降维矩阵A和升维矩阵B；在推理的时候，将矩阵层加到原参数上。大量实验表明，对于stable diffusion我们用Lora微调Unet网络注意力层可以取得良好的效果。\n",
+    "\n",
+    "## 动手实践\n",
+    "\n",
+    "首先我们需要下载代码和安装环境。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "git clone https://github.com/modelscope/modelscope.git\n",
+    "cd modelscope"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "然后我们执行脚本，开始dreambooth和lora的训练和推理。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "bash examples/pytorch/stable_diffusion/dreambooth/run_train_dreambooth.sh"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "bash examples/pytorch/stable_diffusion/lora/run_train_lora.sh"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/modelscope/models/nlp/chatglm/text_generation.py
+++ b/modelscope/models/nlp/chatglm/text_generation.py
@@ -931,7 +931,6 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
            self.num_attention_heads,
            self.hidden_size // self.num_attention_heads)
        # seq_len, b, nh, hidden_size
-        print('#########################:', past_key_values.device)
        past_key_values = self.dropout(past_key_values)
        past_key_values = past_key_values.permute([2, 1, 0, 3, 4]).split(2)
        # past_key_values = [(v[0], v[1]) for v in past_key_values]
--- a/modelscope/models/nlp/chatglm2/text_generation.py
+++ b/modelscope/models/nlp/chatglm2/text_generation.py
@@ -1076,17 +1076,17 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel):
        return inputs

    @torch.no_grad()
-    def chat(self,
-             tokenizer,
-             query: str,
-             history: List[Tuple[str, str]] = None,
-             max_length: int = 2048,
-             num_beams=1,
-             do_sample=True,
-             top_p=0.8,
-             temperature=0.8,
-             logits_processor=None,
-             **kwargs):
+    def _chat(self,
+              tokenizer,
+              query: str,
+              history: List[Tuple[str, str]] = None,
+              max_length: int = 2048,
+              num_beams=1,
+              do_sample=True,
+              top_p=0.8,
+              temperature=0.8,
+              logits_processor=None,
+              **kwargs):
        if history is None:
            history = []
        if logits_processor is None:
@@ -1107,7 +1107,7 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel):
        response = tokenizer.decode(outputs)
        response = self.process_response(response)
        history = history + [(query, response)]
-        return {OutputKeys.RESPONSE: response, OutputKeys.HISTORY: history}
+        return response, history

    @torch.no_grad()
    def stream_chat(self,
@@ -1295,6 +1295,41 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel):
            self.transformer.encoder,
            bits,
            empty_init=empty_init,
-            device=device,
-            **kwargs)
+            device=device)
        return self
+
+    def chat(self, input: Dict, tokenizer) -> Dict:
+        text = input['text']
+        history = input['history']
+        # args
+        if 'max_length' in input:
+            max_length = input['max_length']
+        else:
+            max_length = 2048
+
+        if 'temperature' in input:
+            temperature = input['temperature']
+        else:
+            temperature = 0.95
+
+        if 'num_beams' in input:
+            num_beams = input['num_beams']
+        else:
+            num_beams = 1
+
+        if 'do_sample' in input:
+            do_sample = input['do_sample']
+        else:
+            do_sample = True
+
+        if type(history) == torch.Tensor:
+            history = history.tolist()
+        response, history = self._chat(
+            tokenizer,
+            text,
+            history,
+            max_length=max_length,
+            temperature=temperature,
+            num_beams=num_beams,
+            do_sample=do_sample)
+        return {OutputKeys.RESPONSE: response, OutputKeys.HISTORY: history}
--- a/modelscope/pipelines/nlp/text_generation_pipeline.py
+++ b/modelscope/pipelines/nlp/text_generation_pipeline.py
@@ -6,6 +6,7 @@ from typing import Any, Dict, Optional, Union

 import torch

+from modelscope import snapshot_download
 from modelscope.metainfo import Pipelines
 from modelscope.models.base import Model
 from modelscope.outputs import (ModelOutputBase, OutputKeys,
@@ -65,7 +66,8 @@ class TextGenerationPipeline(Pipeline, PipelineStreamingOutputMixin):
            device=device,
            auto_collate=auto_collate,
            compile=kwargs.pop('compile', False),
-            compile_options=kwargs.pop('compile_options', {}))
+            compile_options=kwargs.pop('compile_options', {}),
+            **kwargs)

        assert isinstance(self.model, Model), \
            f'please check whether model config exists in {ModelFile.CONFIGURATION}'
@@ -192,9 +194,14 @@ class ChatGLM6bTextGenerationPipeline(Pipeline):
                 quantization_bit=None,
                 use_bf16=False,
                 **kwargs):
-        from modelscope.models.nlp.chatglm.text_generation import ChatGLMForConditionalGeneration
-        model = ChatGLMForConditionalGeneration(model) if isinstance(
-            model, str) else model
+        from modelscope.models.nlp.chatglm.text_generation import ChatGLMForConditionalGeneration, ChatGLMConfig
+        if isinstance(model, str):
+            model_dir = snapshot_download(
+                model) if not os.path.exists(model) else model
+            model = ChatGLMForConditionalGeneration.from_pretrained(
+                model_dir).half()
+            if torch.cuda.is_available():
+                model = model.cuda()
        if quantization_bit is not None:
            model = model.quantize(quantization_bit)
        if use_bf16:
@@ -204,11 +211,15 @@ class ChatGLM6bTextGenerationPipeline(Pipeline):

        super().__init__(model=model, **kwargs)

+    def _sanitize_parameters(self, **pipeline_parameters):
+        return {}, pipeline_parameters, {}
+
    def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]:
        return inputs

    # define the forward pass
    def forward(self, inputs: Dict, **forward_params) -> Dict[str, Any]:
+        inputs.update(forward_params)
        return self.model.chat(inputs)

    # format the outputs from pipeline
@@ -225,9 +236,13 @@ class ChatGLM6bV2TextGenerationPipeline(Pipeline):
                 quantization_bit=None,
                 use_bf16=False,
                 **kwargs):
-        from modelscope.models.nlp import ChatGLM2ForConditionalGeneration, ChatGLM2Tokenizer
-        model = ChatGLM2ForConditionalGeneration(model) if isinstance(
-            model, str) else model
+        from modelscope.models.nlp import ChatGLM2ForConditionalGeneration, ChatGLM2Tokenizer, ChatGLM2Config
+        if isinstance(model, str):
+            model_dir = snapshot_download(
+                model) if not os.path.exists(model) else model
+            model = ChatGLM2ForConditionalGeneration.from_pretrained(model_dir)
+            if torch.cuda.is_available():
+                model = model.cuda()
        if quantization_bit is not None:
            model = model.quantize(quantization_bit)
        if use_bf16:
@@ -239,12 +254,16 @@ class ChatGLM6bV2TextGenerationPipeline(Pipeline):

        super().__init__(model=model, **kwargs)

+    def _sanitize_parameters(self, **pipeline_parameters):
+        return {}, pipeline_parameters, {}
+
    def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]:
        return inputs

    # define the forward pass
    def forward(self, inputs: Dict, **forward_params) -> Dict[str, Any]:
-        return self.model.chat(self.tokenizer, inputs['text'])
+        inputs.update(forward_params)
+        return self.model.chat(inputs, self.tokenizer)

    # format the outputs from pipeline
    def postprocess(self, input, **kwargs) -> Dict[str, Any]: