Files
modelscope/examples/pytorch/llm_agent/baichuan_infer.ipynb
2023-07-15 09:59:53 +08:00

483 lines
34 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Baichuan 推理"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 配置实验环境"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2023-07-02 22:28:00,199] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-07-02 22:28:00,675 - modelscope - INFO - PyTorch version 2.0.1 Found.\n",
"2023-07-02 22:28:00,676 - modelscope - INFO - Loading ast index from /home/hackathon/.cache/modelscope/ast_indexer\n",
"2023-07-02 22:28:00,700 - modelscope - INFO - Loading done! Current index file version is 1.6.2, with md5 ddf811ee982377c1357284a2bfda3dec and a total number of 861 components indexed\n",
"2023-07-02 22:28:01,367 - modelscope - INFO - [0, 1]\n",
"2023-07-02 22:28:01,512 - modelscope - INFO - Using device: cuda:0,1\n"
]
},
{
"data": {
"text/plain": [
"device(type='cuda', index=0)"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from _common import *\n",
"from transformers import TextStreamer\n",
"device_ids = [0, 1]\n",
"select_device(device_ids)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 导入Model, Tokenizer\n",
"Note: 你需要设置CKPT_FPATH的内容, 指向`.bin`文件, 或`.pth`文件"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-07-02 22:28:03,375 - modelscope - INFO - Model revision not specified, use default: master in development mode\n",
"2023-07-02 22:28:03,375 - modelscope - INFO - Development mode use revision: master\n",
"2023-07-02 22:28:03,695 - modelscope - INFO - model_config: BaiChuanConfig {\n",
" \"architectures\": [\n",
" \"BaiChuanForCausalLM\"\n",
" ],\n",
" \"auto_map\": {\n",
" \"AutoConfig\": \"configuration_baichuan.BaiChuanConfig\",\n",
" \"AutoModelForCausalLM\": \"modeling_baichuan.BaiChuanForCausalLM\"\n",
" },\n",
" \"bos_token_id\": 1,\n",
" \"eos_token_id\": 2,\n",
" \"hidden_act\": \"silu\",\n",
" \"hidden_size\": 4096,\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 11008,\n",
" \"max_position_embeddings\": 4096,\n",
" \"model_type\": \"baichuan\",\n",
" \"num_attention_heads\": 32,\n",
" \"num_hidden_layers\": 32,\n",
" \"pad_token_id\": 0,\n",
" \"rms_norm_eps\": 1e-06,\n",
" \"tie_word_embeddings\": false,\n",
" \"torch_dtype\": \"float16\",\n",
" \"transformers_version\": \"4.30.2\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 64000\n",
"}\n",
"\n",
"The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.\n"
]
},
{
"data": {
"text/plain": [
"BaiChuanForCausalLM(\n",
" (model): Model(\n",
" (embed_tokens): Embedding(64000, 4096, padding_idx=0)\n",
" (layers): ModuleList(\n",
" (0-31): 32 x DecoderLayer(\n",
" (self_attn): Attention(\n",
" (W_pack): Linear(in_features=4096, out_features=12288, bias=False)\n",
" (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
" (rotary_emb): RotaryEmbedding()\n",
" )\n",
" (mlp): MLP(\n",
" (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
" (down_proj): Linear(in_features=11008, out_features=4096, bias=False)\n",
" (up_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
" (act_fn): SiLUActivation()\n",
" )\n",
" (input_layernorm): RMSNorm()\n",
" (post_attention_layernorm): RMSNorm()\n",
" )\n",
" )\n",
" (norm): RMSNorm()\n",
" )\n",
" (lm_head): Linear(in_features=4096, out_features=64000, bias=False)\n",
")"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"CKPT_FAPTH = '/home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/output_best/pytorch_model.bin'\n",
"LORA_TARGET_MODULES = ['W_pack']\n",
"\n",
"model_dir = snapshot_download('baichuan-inc/baichuan-7B', 'v1.0.5')\n",
"model, tokenizer = get_baichuan7B_model_tokenizer(model_dir)\n",
"model.bfloat16() # Consistent with training"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 导入Lora"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-07-02 22:28:14,108 - modelscope - INFO - lora_config: LoRAConfig(rank=8, replace_modules=['W_pack'], lora_alpha=32, lora_dropout=0, merge_weights=True, use_merged_linear=False, enable_lora=None, fan_in_fan_out=False, bias='none', only_lora_trainable=True, pretrained_weights='/home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/output_best/pytorch_model.bin')\n"
]
},
{
"data": {
"text/plain": [
"BaiChuanForCausalLM(\n",
" (model): Model(\n",
" (embed_tokens): Embedding(64000, 4096, padding_idx=0)\n",
" (layers): ModuleList(\n",
" (0-31): 32 x DecoderLayer(\n",
" (self_attn): Attention(\n",
" (W_pack): Linear(in_features=4096, out_features=12288, bias=False)\n",
" (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
" (rotary_emb): RotaryEmbedding()\n",
" )\n",
" (mlp): MLP(\n",
" (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
" (down_proj): Linear(in_features=11008, out_features=4096, bias=False)\n",
" (up_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
" (act_fn): SiLUActivation()\n",
" )\n",
" (input_layernorm): RMSNorm()\n",
" (post_attention_layernorm): RMSNorm()\n",
" )\n",
" )\n",
" (norm): RMSNorm()\n",
" )\n",
" (lm_head): Linear(in_features=4096, out_features=64000, bias=False)\n",
")"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"LORA_RANK = 8\n",
"LORA_ALPHA = 32\n",
"LORA_DROPOUT_P = 0 # Arbitrary value\n",
"lora_config = LoRAConfig(\n",
" replace_modules=LORA_TARGET_MODULES,\n",
" rank=LORA_RANK,\n",
" lora_alpha=LORA_ALPHA,\n",
" lora_dropout=LORA_DROPOUT_P,\n",
" pretrained_weights=CKPT_FAPTH)\n",
"logger.info(f'lora_config: {lora_config}')\n",
"Swift.prepare_model(model, lora_config)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 导入Dataset"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-07-02 22:28:28,832 - modelscope - INFO - No subset_name specified, defaulting to the default\n",
"2023-07-02 22:28:29,317 - modelscope - WARNING - Reusing dataset ms_hackathon_23_agent_train_dev (/home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files)\n",
"2023-07-02 22:28:29,318 - modelscope - INFO - Generating dataset ms_hackathon_23_agent_train_dev (/home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files)\n",
"2023-07-02 22:28:29,318 - modelscope - INFO - Reusing cached meta-data file: /home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files/941b733ec0354c2172a3386d8788bb37\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "682dc9eedfce4092a25fcadc977c794a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data files: 0it [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8e53d79d8e4845618231f3afb5bc096f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Extracting data files: 0it [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 285/285 [00:00<00:00, 1566679.74it/s]\n"
]
}
],
"source": [
"test_dataset = make_dataset('validation', lambda system, user, assistant:\n",
" {'system': system, 'user': user, 'assistant': assistant})"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 推理"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[TEST] 你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力可以通过插件集成魔搭社区的模型api来回复用户的问题还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_speech-generation\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"url\": \"http://90.49.118.175:2603/\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}}\n",
"\n",
"2. {\"plugin_name\": \"modelscope_speech-generation\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"url\": \"http://132.94.116.115:5983/\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}}\n",
"\n",
"3. {\"plugin_name\": \"modelscope_speech-generation\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"url\": \"http://94.43.176.75:1062/\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}} \n",
"\n",
"### 用户\n",
"生成一首诗歌,主题为“秋天的美景”,读出来这段话 \n",
"\n",
"### 助手\n",
"<s>秋天,是一个美丽的季节,是一个收获的季节,是一个充满诗意的季节。秋天的天空,湛蓝湛蓝的,像一块蓝宝石;秋天的田野,金黄色的稻谷,像一片金色的海洋;秋天的果园,硕果累累,像一幅美丽的画卷。秋天的山林,层林尽染,像一幅色彩斑斓的油画;秋天的河流,清澈见底,像一条银色的丝带。秋天的天空,湛蓝湛蓝的,像一块蓝宝石;秋天的田野,金黄色的稻谷,像一片金色的海洋;秋天的果园,硕果累累,像一幅美丽的画卷。秋天的山林,层林尽染,像一幅色彩斑斓的油画;秋天的河流,清澈见底,像一条银色的丝带。\n",
"\n",
"[LABELS]秋树红叶舞飘零,\n",
"山间小溪水潺潺。\n",
"微风拂面感清凉,\n",
"散步赏景心旷神怡。\n",
"<|startofthink|>```JSON\n",
"{\"api_name\": \"modelscope_speech-generation\", \"url\": \"http://90.49.118.175:2603/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"parameters\": {\"text\": \"秋树红叶舞飘零,\n",
"山间小溪水潺潺。\n",
"微风拂面感清凉,\n",
"散步赏景心旷神怡。\", \"gender\": \"woman\"}}\n",
"```<|endofthink|>\n",
"\n",
"<|startofexec|>```JSON\n",
"{\"result\": \"<audio id=\"audio\" controls=\"\" preload=\"none\"> <source id=\"wav\" src=\"http://xdp-expriment.oss-cn-zhangjiakou.aliyuncs.com/modelscope/audio/5c68265546564117.wav\"> </audio>\"}\n",
"```<|endofexec|>\n",
"<audio id=\"audio\" controls=\"\" preload=\"none\"> <source id=\"wav\" src=\"http://xdp-expriment.oss-cn-zhangjiakou.aliyuncs.com/modelscope/audio/5c68265546564117.wav\"> </audio>\n",
"-----------------------------------------------------------------------------------\n",
"[TEST] 你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力可以通过插件集成魔搭社区的模型api来回复用户的问题还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息识别出里面的元素包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://159.1.4.174:3210/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息识别出里面的元素包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}}\n",
"\n",
"2. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息识别出里面的元素包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://172.163.158.154:5325/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息识别出里面的元素包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}}\n",
"\n",
"3. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息识别出里面的元素包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://133.94.12.37:3160/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息识别出里面的元素包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}} \n",
"\n",
"### 用户\n",
"现在我给你另一条地址,请识别出里面的元素。输入地址:广东省深圳市南山区科技园北区 \n",
"\n",
"### 助手\n",
"<s><|startofthink|>```JSON\n",
"{\"api_name\": \"modelscope_text-address\", \"url\": \"http://133.94.12.37:3160/damo/mgeo_geographic_elements_tagging_chinese_base\", \"parameters\": {\"text\": \"广东省深圳市南山区科技园北区\"}}\n",
"```<|endofthink|>\n",
"\n",
"<|startofexec|>```JSON\n",
"{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"community\": \"科技园北区\"}\n",
"```<|endofexec|>\n",
"地址识别json表示{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"community\": \"科技园北区\"}。我使用的模型是ModelScope的'damo/mgeo_geographic_elements_tagging_chinese_base'模型。这是基于达摩院联合高德发布的多任务多模态地址预训练底座MGeo模型微调得到的。\n",
"\n",
"[LABELS]<|startofthink|>```JSON\n",
"{\"api_name\": \"modelscope_text-address\", \"url\": \"http://159.1.4.174:3210/damo/mgeo_geographic_elements_tagging_chinese_base\", \"parameters\": {\"text\": \"广东省深圳市南山区科技园北区\"}}\n",
"```<|endofthink|>\n",
"\n",
"<|startofexec|>```JSON\n",
"{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"\", \"community\": \"科技园北区\", \"poi\": \"\"}\n",
"```<|endofexec|>\n",
"地址识别json表示{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"\", \"community\": \"科技园北区\", \"poi\": \"\"}。我使用的模型是ModelScope的'damo/mgeo_geographic_elements_tagging_chinese_base'模型。这是基于达摩院联合高德发布的多任务多模态地址预训练底座MGeo模型微调得到的。\n",
"-----------------------------------------------------------------------------------\n",
"[TEST] 你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力可以通过插件集成魔搭社区的模型api来回复用户的问题还能解答用户使用模型遇到的问题和模型知识相关问答。目前支持的插件信息如下请自行判断是否需要调用插件来解决当前用户问题。若需要调用插件则需要将插件调用请求按照json格式给出必须包含api_name、url、parameters字段并在其前后使用<|startofthink|>和<|endofthink|>作为标志。然后你需要根据插件API调用结果生成合理的答复;若无需调用插件,则直接给出对应回复即可:\n",
"\n",
"1. {\"name\": \"modelscope_text-translation-zh2en\", \"description\": \"将输入的中文文本翻译成英文\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_text-translation-zh2en\", \"model_id\": \"/damo/nlp_csanmt_translation_zh2en\", \"method\": \"post\", \"description\": \"将输入的中文文本翻译成英文\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的中文文本\", \"required\": \"True\"}]}]}\n",
"\n",
"2. {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}\n",
"\n",
"3. {\"name\": \"modelscope_image-generation\", \"description\": \"针对文本输入,生成对应的图片\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_image-generation\", \"model_id\": \"/damo/image_generation\", \"method\": \"post\", \"description\": \"针对文本输入,生成对应的图片\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本信息\", \"required\": \"True\"}]}]} \n",
"\n",
"### 用户\n",
"歌手:古巨基\n",
"歌曲名:爱情马戏班\n",
"经典歌词:情是何等诡秘能令人使出看家把戏;恋爱就像走纲线般惊险;为你献技 像马戏班\n",
"请结合以上信息编写一个智能音响的播放导语需要有文采字数30字以内凸显一下即将播放该歌曲 \n",
"\n",
"### 助手\n",
"<s>古巨基的《爱情马戏班》,是一首经典的情歌,歌词中充满了对爱情的向往和对爱情的渴望,让人不禁沉醉其中。这首歌的旋律优美动听,歌词朗朗上口,让人听了就忍不住跟着哼唱。\n",
"\n",
"[LABELS]亲爱的主人,今天我为您带来的是古巨基的经典之作——《爱情马戏班》。这首歌曲描绘了情与爱的神秘和惊险,让人们为之倾倒。让我们一起享受这场爱情的马戏表演吧!\n",
"-----------------------------------------------------------------------------------\n",
"[TEST] 你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力可以通过插件集成魔搭社区的模型api来回复用户的问题还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本根据schema要抽取的内容找出其中对应信息并用json格式展示\", \"url\": \"http://114.42.178.183:8005/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本根据schema要抽取的内容找出其中对应信息并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
"\n",
"2. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本根据schema要抽取的内容找出其中对应信息并用json格式展示\", \"url\": \"http://93.82.87.89:6631/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本根据schema要抽取的内容找出其中对应信息并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
"\n",
"3. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本根据schema要抽取的内容找出其中对应信息并用json格式展示\", \"url\": \"http://4.105.93.165:8143/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本根据schema要抽取的内容找出其中对应信息并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}} \n",
"\n",
"### 用户\n",
"按照给定的schema抽取出下面文本对应的信息\n",
"schema{\"动物\": null, \"食物\": null, \"颜色\": null}\n",
"这只棕色的狗狗很喜欢吃狗粮。 \n",
"\n",
"### 助手\n",
"<s><|startofthink|>```JSON\n",
"{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://4.105.93.165:8143/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n",
"```<|endofthink|>\n",
"\n",
"<|startofexec|>```JSON\n",
"{\"动物\": [\"棕色的狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}\n",
"```<|endofexec|>\n",
"信息抽取结果:{\"动物\": [\"棕色的狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n",
"\n",
"[LABELS]<|startofthink|>```JSON\n",
"{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://114.42.178.183:8005/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n",
"```<|endofthink|>\n",
"\n",
"<|startofexec|>```JSON\n",
"{\"动物\": [\"狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}\n",
"```<|endofexec|>\n",
"信息抽取结果:{\"动物\": [\"狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n",
"-----------------------------------------------------------------------------------\n",
"[TEST] 你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力可以通过插件集成魔搭社区的模型api来回复用户的问题还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本根据schema要抽取的内容找出其中对应信息并用json格式展示\", \"url\": \"http://28.179.171.5:6428/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本根据schema要抽取的内容找出其中对应信息并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
"\n",
"2. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本根据schema要抽取的内容找出其中对应信息并用json格式展示\", \"url\": \"http://100.111.18.38:6408/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本根据schema要抽取的内容找出其中对应信息并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
"\n",
"3. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本根据schema要抽取的内容找出其中对应信息并用json格式展示\", \"url\": \"http://144.67.18.142:6381/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本根据schema要抽取的内容找出其中对应信息并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}} \n",
"\n",
"### 用户\n",
"按照给定的schema抽取出下面文本对应的信息\n",
"schema{\"人物\": null, \"地理位置\": null, \"组织机构\": null}\n",
"谷歌公司是一家全球知名的科技公司,总部位于美国的加利福尼亚州山景市。 \n",
"\n",
"### 助手\n",
"<s><|startofthink|>```JSON\n",
"{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://144.67.18.142:6381/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"谷歌公司是一家全球知名的科技公司,总部位于美国的加利福尼亚州山景市。\", \"schema\": \"{\\\"人物\\\": null, \\\"地理位置\\\": null, \\\"组织机构\\\": null}\"}}\n",
"```<|endofthink|>\n",
"\n",
"<|startofexec|>```JSON\n",
"{\"人物\": [\"谷歌公司\"], \"地理位置\": [\"美国\"], \"组织机构\": [\"科技公司\"]}\n",
"```<|endofexec|>\n",
"信息抽取结果:{\"人物\": [\"谷歌公司\"], \"地理位置\": [\"美国\"], \"组织机构\": [\"科技公司\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n",
"\n",
"[LABELS]<|startofthink|>```JSON\n",
"{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://100.111.18.38:6408/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"谷歌公司是一家全球知名的科技公司,总部位于美国的加利福尼亚州山景市。\", \"schema\": \"{\\\"人物\\\": null, \\\"地理位置\\\": null, \\\"组织机构\\\": null}\"}}\n",
"```<|endofthink|>\n",
"\n",
"<|startofexec|>```JSON\n",
"{\"人物\": [], \"地理位置\": [\"美国\", \"加利福尼亚州山景市\"], \"组织机构\": [\"谷歌公司\"]}\n",
"```<|endofexec|>\n",
"信息抽取结果:{\"人物\": [], \"地理位置\": [\"美国\", \"加利福尼亚州山景市\"], \"组织机构\": [\"谷歌公司\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n",
"-----------------------------------------------------------------------------------\n"
]
}
],
"source": [
"streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n",
"for d in test_dataset[:5]:\n",
" system = d['system']\n",
" user = d['user']\n",
" assistant = d['assistant']\n",
" input_ids = tokenize_function(system, user, None, tokenizer)['input_ids']\n",
" print(f'[TEST]{tokenizer.decode(input_ids)}', end='')\n",
" input_ids = torch.tensor(input_ids)[None].cuda()\n",
" attention_mask = torch.ones_like(input_ids)\n",
" generate_ids = model.generate(input_ids=input_ids, max_new_tokens=512,\n",
" attention_mask=attention_mask,\n",
" streamer=streamer, pad_token_id=tokenizer.eos_token_id, \n",
" temperature=0.7, top_k=50, top_p=0.7, do_sample=True)\n",
" print()\n",
" print(f'[LABELS]{assistant}')\n",
" print('-----------------------------------------------------------------------------------')\n",
" # input('next[ENTER]')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}