modelscope/examples/pytorch/llm_agent/baichuan_infer.ipynb

{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Baichuan 推理"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 配置实验环境"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2023-07-02 22:28:00,199] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2023-07-02 22:28:00,675 - modelscope - INFO - PyTorch version 2.0.1 Found.\n",
      "2023-07-02 22:28:00,676 - modelscope - INFO - Loading ast index from /home/hackathon/.cache/modelscope/ast_indexer\n",
      "2023-07-02 22:28:00,700 - modelscope - INFO - Loading done! Current index file version is 1.6.2, with md5 ddf811ee982377c1357284a2bfda3dec and a total number of 861 components indexed\n",
      "2023-07-02 22:28:01,367 - modelscope - INFO - [0, 1]\n",
      "2023-07-02 22:28:01,512 - modelscope - INFO - Using device: cuda:0,1\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "device(type='cuda', index=0)"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from _common import *\n",
    "from transformers import TextStreamer\n",
    "device_ids = [0, 1]\n",
    "select_device(device_ids)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 导入Model, Tokenizer\n",
    "Note: 你需要设置CKPT_FPATH的内容, 指向`.bin`文件, 或`.pth`文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2023-07-02 22:28:03,375 - modelscope - INFO - Model revision not specified, use default: master in development mode\n",
      "2023-07-02 22:28:03,375 - modelscope - INFO - Development mode use revision: master\n",
      "2023-07-02 22:28:03,695 - modelscope - INFO - model_config: BaiChuanConfig {\n",
      "  \"architectures\": [\n",
      "    \"BaiChuanForCausalLM\"\n",
      "  ],\n",
      "  \"auto_map\": {\n",
      "    \"AutoConfig\": \"configuration_baichuan.BaiChuanConfig\",\n",
      "    \"AutoModelForCausalLM\": \"modeling_baichuan.BaiChuanForCausalLM\"\n",
      "  },\n",
      "  \"bos_token_id\": 1,\n",
      "  \"eos_token_id\": 2,\n",
      "  \"hidden_act\": \"silu\",\n",
      "  \"hidden_size\": 4096,\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 11008,\n",
      "  \"max_position_embeddings\": 4096,\n",
      "  \"model_type\": \"baichuan\",\n",
      "  \"num_attention_heads\": 32,\n",
      "  \"num_hidden_layers\": 32,\n",
      "  \"pad_token_id\": 0,\n",
      "  \"rms_norm_eps\": 1e-06,\n",
      "  \"tie_word_embeddings\": false,\n",
      "  \"torch_dtype\": \"float16\",\n",
      "  \"transformers_version\": \"4.30.2\",\n",
      "  \"use_cache\": true,\n",
      "  \"vocab_size\": 64000\n",
      "}\n",
      "\n",
      "The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "BaiChuanForCausalLM(\n",
       "  (model): Model(\n",
       "    (embed_tokens): Embedding(64000, 4096, padding_idx=0)\n",
       "    (layers): ModuleList(\n",
       "      (0-31): 32 x DecoderLayer(\n",
       "        (self_attn): Attention(\n",
       "          (W_pack): Linear(in_features=4096, out_features=12288, bias=False)\n",
       "          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
       "          (rotary_emb): RotaryEmbedding()\n",
       "        )\n",
       "        (mlp): MLP(\n",
       "          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
       "          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)\n",
       "          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
       "          (act_fn): SiLUActivation()\n",
       "        )\n",
       "        (input_layernorm): RMSNorm()\n",
       "        (post_attention_layernorm): RMSNorm()\n",
       "      )\n",
       "    )\n",
       "    (norm): RMSNorm()\n",
       "  )\n",
       "  (lm_head): Linear(in_features=4096, out_features=64000, bias=False)\n",
       ")"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "CKPT_FAPTH = '/home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/output_best/pytorch_model.bin'\n",
    "LORA_TARGET_MODULES = ['W_pack']\n",
    "\n",
    "model_dir = snapshot_download('baichuan-inc/baichuan-7B', 'v1.0.5')\n",
    "model, tokenizer = get_baichuan7B_model_tokenizer(model_dir)\n",
    "model.bfloat16()  # Consistent with training"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 导入Lora"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2023-07-02 22:28:14,108 - modelscope - INFO - lora_config: LoRAConfig(rank=8, replace_modules=['W_pack'], lora_alpha=32, lora_dropout=0, merge_weights=True, use_merged_linear=False, enable_lora=None, fan_in_fan_out=False, bias='none', only_lora_trainable=True, pretrained_weights='/home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/output_best/pytorch_model.bin')\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "BaiChuanForCausalLM(\n",
       "  (model): Model(\n",
       "    (embed_tokens): Embedding(64000, 4096, padding_idx=0)\n",
       "    (layers): ModuleList(\n",
       "      (0-31): 32 x DecoderLayer(\n",
       "        (self_attn): Attention(\n",
       "          (W_pack): Linear(in_features=4096, out_features=12288, bias=False)\n",
       "          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
       "          (rotary_emb): RotaryEmbedding()\n",
       "        )\n",
       "        (mlp): MLP(\n",
       "          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
       "          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)\n",
       "          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
       "          (act_fn): SiLUActivation()\n",
       "        )\n",
       "        (input_layernorm): RMSNorm()\n",
       "        (post_attention_layernorm): RMSNorm()\n",
       "      )\n",
       "    )\n",
       "    (norm): RMSNorm()\n",
       "  )\n",
       "  (lm_head): Linear(in_features=4096, out_features=64000, bias=False)\n",
       ")"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "LORA_RANK = 8\n",
    "LORA_ALPHA = 32\n",
    "LORA_DROPOUT_P = 0  # Arbitrary value\n",
    "lora_config = LoRAConfig(\n",
    "    target_modules=LORA_TARGET_MODULES,\n",
    "    r=LORA_RANK,\n",
    "    lora_alpha=LORA_ALPHA,\n",
    "    lora_dropout=LORA_DROPOUT_P,\n",
    "    pretrained_weights=CKPT_FAPTH)\n",
    "logger.info(f'lora_config: {lora_config}')\n",
    "Swift.prepare_model(model, lora_config)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 导入Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2023-07-02 22:28:28,832 - modelscope - INFO - No subset_name specified, defaulting to the default\n",
      "2023-07-02 22:28:29,317 - modelscope - WARNING - Reusing dataset ms_hackathon_23_agent_train_dev (/home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files)\n",
      "2023-07-02 22:28:29,318 - modelscope - INFO - Generating dataset ms_hackathon_23_agent_train_dev (/home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files)\n",
      "2023-07-02 22:28:29,318 - modelscope - INFO - Reusing cached meta-data file: /home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files/941b733ec0354c2172a3386d8788bb37\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "682dc9eedfce4092a25fcadc977c794a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data files: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8e53d79d8e4845618231f3afb5bc096f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Extracting data files: 0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 285/285 [00:00<00:00, 1566679.74it/s]\n"
     ]
    }
   ],
   "source": [
    "test_dataset = make_dataset('validation', lambda system, user, assistant:\n",
    "                            {'system': system, 'user': user, 'assistant': assistant})"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 推理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[TEST] 你是达摩院的ModelScopeGPT(魔搭助手)，你是个大语言模型， 是2023年达摩院的工程师训练得到的。你有多种能力，可以通过插件集成魔搭社区的模型api来回复用户的问题，还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_speech-generation\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"url\": \"http://90.49.118.175:2603/\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}}\n",
      "\n",
      "2. {\"plugin_name\": \"modelscope_speech-generation\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"url\": \"http://132.94.116.115:5983/\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}}\n",
      "\n",
      "3. {\"plugin_name\": \"modelscope_speech-generation\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"url\": \"http://94.43.176.75:1062/\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}} \n",
      "\n",
      "### 用户\n",
      "生成一首诗歌，主题为“秋天的美景”，读出来这段话 \n",
      "\n",
      "### 助手\n",
      "<s>秋天，是一个美丽的季节，是一个收获的季节，是一个充满诗意的季节。秋天的天空，湛蓝湛蓝的，像一块蓝宝石;秋天的田野，金黄色的稻谷，像一片金色的海洋;秋天的果园，硕果累累，像一幅美丽的画卷。秋天的山林，层林尽染，像一幅色彩斑斓的油画;秋天的河流，清澈见底，像一条银色的丝带。秋天的天空，湛蓝湛蓝的，像一块蓝宝石;秋天的田野，金黄色的稻谷，像一片金色的海洋;秋天的果园，硕果累累，像一幅美丽的画卷。秋天的山林，层林尽染，像一幅色彩斑斓的油画;秋天的河流，清澈见底，像一条银色的丝带。\n",
      "\n",
      "[LABELS]秋树红叶舞飘零，\n",
      "山间小溪水潺潺。\n",
      "微风拂面感清凉，\n",
      "散步赏景心旷神怡。\n",
      "<|startofthink|>```JSON\n",
      "{\"api_name\": \"modelscope_speech-generation\", \"url\": \"http://90.49.118.175:2603/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"parameters\": {\"text\": \"秋树红叶舞飘零，\n",
      "山间小溪水潺潺。\n",
      "微风拂面感清凉，\n",
      "散步赏景心旷神怡。\", \"gender\": \"woman\"}}\n",
      "```<|endofthink|>\n",
      "\n",
      "<|startofexec|>```JSON\n",
      "{\"result\": \"<audio id=\"audio\" controls=\"\" preload=\"none\"> <source id=\"wav\" src=\"http://xdp-expriment.oss-cn-zhangjiakou.aliyuncs.com/modelscope/audio/5c68265546564117.wav\"> </audio>\"}\n",
      "```<|endofexec|>\n",
      "<audio id=\"audio\" controls=\"\" preload=\"none\"> <source id=\"wav\" src=\"http://xdp-expriment.oss-cn-zhangjiakou.aliyuncs.com/modelscope/audio/5c68265546564117.wav\"> </audio>\n",
      "-----------------------------------------------------------------------------------\n",
      "[TEST] 你是达摩院的ModelScopeGPT(魔搭助手)，你是个大语言模型， 是2023年达摩院的工程师训练得到的。你有多种能力，可以通过插件集成魔搭社区的模型api来回复用户的问题，还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息，识别出里面的元素，包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://159.1.4.174:3210/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息，识别出里面的元素，包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}}\n",
      "\n",
      "2. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息，识别出里面的元素，包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://172.163.158.154:5325/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息，识别出里面的元素，包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}}\n",
      "\n",
      "3. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息，识别出里面的元素，包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://133.94.12.37:3160/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息，识别出里面的元素，包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}} \n",
      "\n",
      "### 用户\n",
      "现在我给你另一条地址，请识别出里面的元素。输入地址：广东省深圳市南山区科技园北区 \n",
      "\n",
      "### 助手\n",
      "<s><|startofthink|>```JSON\n",
      "{\"api_name\": \"modelscope_text-address\", \"url\": \"http://133.94.12.37:3160/damo/mgeo_geographic_elements_tagging_chinese_base\", \"parameters\": {\"text\": \"广东省深圳市南山区科技园北区\"}}\n",
      "```<|endofthink|>\n",
      "\n",
      "<|startofexec|>```JSON\n",
      "{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"community\": \"科技园北区\"}\n",
      "```<|endofexec|>\n",
      "地址识别json表示：{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"community\": \"科技园北区\"}。我使用的模型是ModelScope的'damo/mgeo_geographic_elements_tagging_chinese_base'模型。这是基于达摩院联合高德发布的多任务多模态地址预训练底座MGeo模型微调得到的。\n",
      "\n",
      "[LABELS]<|startofthink|>```JSON\n",
      "{\"api_name\": \"modelscope_text-address\", \"url\": \"http://159.1.4.174:3210/damo/mgeo_geographic_elements_tagging_chinese_base\", \"parameters\": {\"text\": \"广东省深圳市南山区科技园北区\"}}\n",
      "```<|endofthink|>\n",
      "\n",
      "<|startofexec|>```JSON\n",
      "{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"\", \"community\": \"科技园北区\", \"poi\": \"\"}\n",
      "```<|endofexec|>\n",
      "地址识别json表示：{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"\", \"community\": \"科技园北区\", \"poi\": \"\"}。我使用的模型是ModelScope的'damo/mgeo_geographic_elements_tagging_chinese_base'模型。这是基于达摩院联合高德发布的多任务多模态地址预训练底座MGeo模型微调得到的。\n",
      "-----------------------------------------------------------------------------------\n",
      "[TEST] 你是达摩院的ModelScopeGPT(魔搭助手)，你是个大语言模型， 是2023年达摩院的工程师训练得到的。你有多种能力，可以通过插件集成魔搭社区的模型api来回复用户的问题，还能解答用户使用模型遇到的问题和模型知识相关问答。目前支持的插件信息如下，请自行判断是否需要调用插件来解决当前用户问题。若需要调用插件，则需要将插件调用请求按照json格式给出，必须包含api_name、url、parameters字段，并在其前后使用<|startofthink|>和<|endofthink|>作为标志。然后你需要根据插件API调用结果生成合理的答复;若无需调用插件，则直接给出对应回复即可：\n",
      "\n",
      "1. {\"name\": \"modelscope_text-translation-zh2en\", \"description\": \"将输入的中文文本翻译成英文\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_text-translation-zh2en\", \"model_id\": \"/damo/nlp_csanmt_translation_zh2en\", \"method\": \"post\", \"description\": \"将输入的中文文本翻译成英文\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的中文文本\", \"required\": \"True\"}]}]}\n",
      "\n",
      "2. {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容，用语音表示，同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}\n",
      "\n",
      "3. {\"name\": \"modelscope_image-generation\", \"description\": \"针对文本输入，生成对应的图片\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_image-generation\", \"model_id\": \"/damo/image_generation\", \"method\": \"post\", \"description\": \"针对文本输入，生成对应的图片\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本信息\", \"required\": \"True\"}]}]} \n",
      "\n",
      "### 用户\n",
      "歌手：古巨基\n",
      "歌曲名：爱情马戏班\n",
      "经典歌词：情是何等诡秘能令人使出看家把戏;恋爱就像走纲线般惊险;为你献技 像马戏班\n",
      "请结合以上信息，编写一个智能音响的播放导语，需要有文采，字数30字以内，凸显一下即将播放该歌曲 \n",
      "\n",
      "### 助手\n",
      "<s>古巨基的《爱情马戏班》，是一首经典的情歌，歌词中充满了对爱情的向往和对爱情的渴望，让人不禁沉醉其中。这首歌的旋律优美动听，歌词朗朗上口，让人听了就忍不住跟着哼唱。\n",
      "\n",
      "[LABELS]亲爱的主人，今天我为您带来的是古巨基的经典之作——《爱情马戏班》。这首歌曲描绘了情与爱的神秘和惊险，让人们为之倾倒。让我们一起享受这场爱情的马戏表演吧！\n",
      "-----------------------------------------------------------------------------------\n",
      "[TEST] 你是达摩院的ModelScopeGPT(魔搭助手)，你是个大语言模型， 是2023年达摩院的工程师训练得到的。你有多种能力，可以通过插件集成魔搭社区的模型api来回复用户的问题，还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"url\": \"http://114.42.178.183:8005/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
      "\n",
      "2. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"url\": \"http://93.82.87.89:6631/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
      "\n",
      "3. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"url\": \"http://4.105.93.165:8143/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}} \n",
      "\n",
      "### 用户\n",
      "按照给定的schema抽取出下面文本对应的信息\n",
      "schema：{\"动物\": null, \"食物\": null, \"颜色\": null}\n",
      "这只棕色的狗狗很喜欢吃狗粮。 \n",
      "\n",
      "### 助手\n",
      "<s><|startofthink|>```JSON\n",
      "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://4.105.93.165:8143/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n",
      "```<|endofthink|>\n",
      "\n",
      "<|startofexec|>```JSON\n",
      "{\"动物\": [\"棕色的狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}\n",
      "```<|endofexec|>\n",
      "信息抽取结果：{\"动物\": [\"棕色的狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n",
      "\n",
      "[LABELS]<|startofthink|>```JSON\n",
      "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://114.42.178.183:8005/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n",
      "```<|endofthink|>\n",
      "\n",
      "<|startofexec|>```JSON\n",
      "{\"动物\": [\"狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}\n",
      "```<|endofexec|>\n",
      "信息抽取结果：{\"动物\": [\"狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n",
      "-----------------------------------------------------------------------------------\n",
      "[TEST] 你是达摩院的ModelScopeGPT(魔搭助手)，你是个大语言模型， 是2023年达摩院的工程师训练得到的。你有多种能力，可以通过插件集成魔搭社区的模型api来回复用户的问题，还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"url\": \"http://28.179.171.5:6428/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
      "\n",
      "2. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"url\": \"http://100.111.18.38:6408/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
      "\n",
      "3. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"url\": \"http://144.67.18.142:6381/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本，根据schema要抽取的内容，找出其中对应信息，并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}} \n",
      "\n",
      "### 用户\n",
      "按照给定的schema抽取出下面文本对应的信息\n",
      "schema：{\"人物\": null, \"地理位置\": null, \"组织机构\": null}\n",
      "谷歌公司是一家全球知名的科技公司，总部位于美国的加利福尼亚州山景市。 \n",
      "\n",
      "### 助手\n",
      "<s><|startofthink|>```JSON\n",
      "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://144.67.18.142:6381/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"谷歌公司是一家全球知名的科技公司，总部位于美国的加利福尼亚州山景市。\", \"schema\": \"{\\\"人物\\\": null, \\\"地理位置\\\": null, \\\"组织机构\\\": null}\"}}\n",
      "```<|endofthink|>\n",
      "\n",
      "<|startofexec|>```JSON\n",
      "{\"人物\": [\"谷歌公司\"], \"地理位置\": [\"美国\"], \"组织机构\": [\"科技公司\"]}\n",
      "```<|endofexec|>\n",
      "信息抽取结果：{\"人物\": [\"谷歌公司\"], \"地理位置\": [\"美国\"], \"组织机构\": [\"科技公司\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n",
      "\n",
      "[LABELS]<|startofthink|>```JSON\n",
      "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://100.111.18.38:6408/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"谷歌公司是一家全球知名的科技公司，总部位于美国的加利福尼亚州山景市。\", \"schema\": \"{\\\"人物\\\": null, \\\"地理位置\\\": null, \\\"组织机构\\\": null}\"}}\n",
      "```<|endofthink|>\n",
      "\n",
      "<|startofexec|>```JSON\n",
      "{\"人物\": [], \"地理位置\": [\"美国\", \"加利福尼亚州山景市\"], \"组织机构\": [\"谷歌公司\"]}\n",
      "```<|endofexec|>\n",
      "信息抽取结果：{\"人物\": [], \"地理位置\": [\"美国\", \"加利福尼亚州山景市\"], \"组织机构\": [\"谷歌公司\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n",
      "-----------------------------------------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n",
    "for d in test_dataset[:5]:\n",
    "    system = d['system']\n",
    "    user = d['user']\n",
    "    assistant = d['assistant']\n",
    "    input_ids = tokenize_function(system, user, None, tokenizer)['input_ids']\n",
    "    print(f'[TEST]{tokenizer.decode(input_ids)}', end='')\n",
    "    input_ids = torch.tensor(input_ids)[None].cuda()\n",
    "    attention_mask = torch.ones_like(input_ids)\n",
    "    generate_ids = model.generate(input_ids=input_ids, max_new_tokens=512,\n",
    "                                  attention_mask=attention_mask,\n",
    "                                  streamer=streamer, pad_token_id=tokenizer.eos_token_id, \n",
    "                                  temperature=0.7, top_k=50, top_p=0.7, do_sample=True)\n",
    "    print()\n",
    "    print(f'[LABELS]{assistant}')\n",
    "    print('-----------------------------------------------------------------------------------')\n",
    "    # input('next[ENTER]')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}