mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-16 16:27:45 +01:00
515 lines
34 KiB
Plaintext
515 lines
34 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"attachments": {},
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## ChatGLM2 推理"
|
||
]
|
||
},
|
||
{
|
||
"attachments": {},
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 配置实验环境\n",
|
||
"The following code is copied from baichuan_infer.ipynb"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"[2023-07-02 21:48:47,527] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2023-07-02 21:48:48,006 - modelscope - INFO - PyTorch version 2.0.1 Found.\n",
|
||
"2023-07-02 21:48:48,007 - modelscope - INFO - Loading ast index from /home/hackathon/.cache/modelscope/ast_indexer\n",
|
||
"2023-07-02 21:48:48,032 - modelscope - INFO - Loading done! Current index file version is 1.6.2, with md5 ddf811ee982377c1357284a2bfda3dec and a total number of 861 components indexed\n",
|
||
"2023-07-02 21:48:48,708 - modelscope - INFO - [0, 1]\n",
|
||
"2023-07-02 21:48:48,848 - modelscope - INFO - Using device: cuda:0,1\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"device(type='cuda', index=0)"
|
||
]
|
||
},
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"from _common import *\n",
|
||
"from transformers import TextStreamer\n",
|
||
"device_ids = [0, 1]\n",
|
||
"select_device(device_ids)"
|
||
]
|
||
},
|
||
{
|
||
"attachments": {},
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 导入Model, Tokenizer\n",
|
||
"Note: 你需要设置CKPT_FPATH的内容, 指向`.bin`文件, 或`.pth`文件"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2023-07-02 21:48:49,227 - modelscope - INFO - Development mode use revision: v1.0.3\n",
|
||
"The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. \n",
|
||
"The tokenizer class you load from this checkpoint is 'ChatGLMTokenizer'. \n",
|
||
"The class this function is called from is 'ChatGLM2Tokenizer'.\n",
|
||
"2023-07-02 21:48:49,572 - modelscope - INFO - initialize model from /home/hackathon/.cache/modelscope/hub/ZhipuAI/chatglm2-6b\n",
|
||
"Failed to load cpm_kernels:No module named 'cpm_kernels'\n",
|
||
"The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "b72b43e11bec49c78c8097deaffea8a7",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"ChatGLM2ForConditionalGeneration(\n",
|
||
" (transformer): ChatGLMModel(\n",
|
||
" (embedding): Embedding(\n",
|
||
" (word_embeddings): Embedding(65024, 4096)\n",
|
||
" )\n",
|
||
" (rotary_pos_emb): RotaryEmbedding()\n",
|
||
" (encoder): GLMTransformer(\n",
|
||
" (layers): ModuleList(\n",
|
||
" (0-27): 28 x GLMBlock(\n",
|
||
" (input_layernorm): RMSNorm()\n",
|
||
" (self_attention): SelfAttention(\n",
|
||
" (query_key_value): Linear(in_features=4096, out_features=4608, bias=True)\n",
|
||
" (core_attention): CoreAttention(\n",
|
||
" (attention_dropout): Dropout(p=0.0, inplace=False)\n",
|
||
" )\n",
|
||
" (dense): Linear(in_features=4096, out_features=4096, bias=False)\n",
|
||
" )\n",
|
||
" (post_attention_layernorm): RMSNorm()\n",
|
||
" (mlp): MLP(\n",
|
||
" (dense_h_to_4h): Linear(in_features=4096, out_features=27392, bias=False)\n",
|
||
" (dense_4h_to_h): Linear(in_features=13696, out_features=4096, bias=False)\n",
|
||
" )\n",
|
||
" )\n",
|
||
" )\n",
|
||
" (final_layernorm): RMSNorm()\n",
|
||
" )\n",
|
||
" (output_layer): Linear(in_features=4096, out_features=65024, bias=False)\n",
|
||
" )\n",
|
||
")"
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"CKPT_FAPTH = '/home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/output_best/pytorch_model.bin'\n",
|
||
"LORA_TARGET_MODULES = ['query_key_value']\n",
|
||
"\n",
|
||
"model_dir = snapshot_download('ZhipuAI/chatglm2-6b', 'v1.0.6')\n",
|
||
"model, tokenizer = get_chatglm2_model_tokenizer(model_dir)\n",
|
||
"model.bfloat16() # Consistent with training"
|
||
]
|
||
},
|
||
{
|
||
"attachments": {},
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 导入Lora\n",
|
||
"The following code is copied from baichuan_infer.ipynb"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2023-07-02 21:48:56,970 - modelscope - INFO - lora_config: LoRAConfig(rank=8, replace_modules=['query_key_value'], lora_alpha=32, lora_dropout=0, merge_weights=True, use_merged_linear=False, enable_lora=None, fan_in_fan_out=False, bias='none', only_lora_trainable=True, pretrained_weights='/home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/output_best/pytorch_model.bin')\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"ChatGLM2ForConditionalGeneration(\n",
|
||
" (transformer): ChatGLMModel(\n",
|
||
" (embedding): Embedding(\n",
|
||
" (word_embeddings): Embedding(65024, 4096)\n",
|
||
" )\n",
|
||
" (rotary_pos_emb): RotaryEmbedding()\n",
|
||
" (encoder): GLMTransformer(\n",
|
||
" (layers): ModuleList(\n",
|
||
" (0-27): 28 x GLMBlock(\n",
|
||
" (input_layernorm): RMSNorm()\n",
|
||
" (self_attention): SelfAttention(\n",
|
||
" (query_key_value): Linear(in_features=4096, out_features=4608, bias=True)\n",
|
||
" (core_attention): CoreAttention(\n",
|
||
" (attention_dropout): Dropout(p=0.0, inplace=False)\n",
|
||
" )\n",
|
||
" (dense): Linear(in_features=4096, out_features=4096, bias=False)\n",
|
||
" )\n",
|
||
" (post_attention_layernorm): RMSNorm()\n",
|
||
" (mlp): MLP(\n",
|
||
" (dense_h_to_4h): Linear(in_features=4096, out_features=27392, bias=False)\n",
|
||
" (dense_4h_to_h): Linear(in_features=13696, out_features=4096, bias=False)\n",
|
||
" )\n",
|
||
" )\n",
|
||
" )\n",
|
||
" (final_layernorm): RMSNorm()\n",
|
||
" )\n",
|
||
" (output_layer): Linear(in_features=4096, out_features=65024, bias=False)\n",
|
||
" )\n",
|
||
")"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"LORA_RANK = 8\n",
|
||
"LORA_ALPHA = 32\n",
|
||
"LORA_DROPOUT_P = 0 # Arbitrary value\n",
|
||
"lora_config = LoRAConfig(\n",
|
||
" target_modules=LORA_TARGET_MODULES,\n",
|
||
" r=LORA_RANK,\n",
|
||
" lora_alpha=LORA_ALPHA,\n",
|
||
" lora_dropout=LORA_DROPOUT_P,\n",
|
||
" pretrained_weights=CKPT_FAPTH)\n",
|
||
"logger.info(f'lora_config: {lora_config}')\n",
|
||
"Swift.prepare_model(model, lora_config)"
|
||
]
|
||
},
|
||
{
|
||
"attachments": {},
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 导入Dataset\n",
|
||
"The following code is copied from baichuan_infer.ipynb"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2023-07-02 21:49:01,924 - modelscope - INFO - No subset_name specified, defaulting to the default\n",
|
||
"2023-07-02 21:49:02,374 - modelscope - WARNING - Reusing dataset ms_hackathon_23_agent_train_dev (/home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files)\n",
|
||
"2023-07-02 21:49:02,375 - modelscope - INFO - Generating dataset ms_hackathon_23_agent_train_dev (/home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files)\n",
|
||
"2023-07-02 21:49:02,375 - modelscope - INFO - Reusing cached meta-data file: /home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files/941b733ec0354c2172a3386d8788bb37\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "277c2be395d645319f4601f1d1f1e4bf",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"Downloading data files: 0it [00:00, ?it/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "70946b16504c4a88883739bd273bddf6",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"Extracting data files: 0it [00:00, ?it/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"100%|██████████| 285/285 [00:00<00:00, 1577014.04it/s]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"test_dataset = make_dataset('validation', lambda system, user, assistant:\n",
|
||
" {'system': system, 'user': user, 'assistant': assistant})"
|
||
]
|
||
},
|
||
{
|
||
"attachments": {},
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 推理\n",
|
||
"The following code is copied from baichuan_infer.ipynb"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"[TEST]你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_speech-generation\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"url\": \"http://90.49.118.175:2603/\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}}\n",
|
||
"\n",
|
||
"2. {\"plugin_name\": \"modelscope_speech-generation\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"url\": \"http://132.94.116.115:5983/\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}}\n",
|
||
"\n",
|
||
"3. {\"plugin_name\": \"modelscope_speech-generation\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"url\": \"http://94.43.176.75:1062/\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}} \n",
|
||
"\n",
|
||
"### 用户\n",
|
||
"生成一首诗歌,主题为“秋天的美景”,读出来这段话 \n",
|
||
"\n",
|
||
"### 助手\n",
|
||
"秋天是一个美丽的世界, \n",
|
||
"树叶在风中摇曳, \n",
|
||
"天空是那么的广阔, \n",
|
||
"秋天的美景让人陶醉。 \n",
|
||
"\n",
|
||
"树叶是那么的美丽, \n",
|
||
"像黄金一样闪耀, \n",
|
||
"像火焰一样燃烧, \n",
|
||
"像珍珠一样闪耀。 \n",
|
||
"\n",
|
||
"秋天的天空是那么的美丽, \n",
|
||
"像一面镜子, \n",
|
||
"像一片湖水, \n",
|
||
"像一片草原。 \n",
|
||
"\n",
|
||
"秋天是一个美丽的世界, \n",
|
||
"让我们享受它, \n",
|
||
"让我们欣赏它, \n",
|
||
"让我们感受它。\n",
|
||
"\n",
|
||
"[LABELS]秋树红叶舞飘零,\n",
|
||
"山间小溪水潺潺。\n",
|
||
"微风拂面感清凉,\n",
|
||
"散步赏景心旷神怡。\n",
|
||
"<|startofthink|>```JSON\n",
|
||
"{\"api_name\": \"modelscope_speech-generation\", \"url\": \"http://90.49.118.175:2603/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"parameters\": {\"text\": \"秋树红叶舞飘零,\n",
|
||
"山间小溪水潺潺。\n",
|
||
"微风拂面感清凉,\n",
|
||
"散步赏景心旷神怡。\", \"gender\": \"woman\"}}\n",
|
||
"```<|endofthink|>\n",
|
||
"\n",
|
||
"<|startofexec|>```JSON\n",
|
||
"{\"result\": \"<audio id=\"audio\" controls=\"\" preload=\"none\"> <source id=\"wav\" src=\"http://xdp-expriment.oss-cn-zhangjiakou.aliyuncs.com/modelscope/audio/5c68265546564117.wav\"> </audio>\"}\n",
|
||
"```<|endofexec|>\n",
|
||
"<audio id=\"audio\" controls=\"\" preload=\"none\"> <source id=\"wav\" src=\"http://xdp-expriment.oss-cn-zhangjiakou.aliyuncs.com/modelscope/audio/5c68265546564117.wav\"> </audio>\n",
|
||
"-----------------------------------------------------------------------------------\n",
|
||
"[TEST]你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://159.1.4.174:3210/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}}\n",
|
||
"\n",
|
||
"2. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://172.163.158.154:5325/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}}\n",
|
||
"\n",
|
||
"3. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://133.94.12.37:3160/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}} \n",
|
||
"\n",
|
||
"### 用户\n",
|
||
"现在我给你另一条地址,请识别出里面的元素。输入地址:广东省深圳市南山区科技园北区 \n",
|
||
"\n",
|
||
"### 助手\n",
|
||
"<|startofthink|>```JSON\n",
|
||
"{\"api_name\": \"modelscope_text-address\", \"url\": \"http://133.94.12.37:3160/damo/mgeo_geographic_elements_tagging_chinese_base\", \"parameters\": {\"text\": \"广东省深圳市南山区科技园北区\"}}\n",
|
||
"```<|endofthink|>\n",
|
||
"\n",
|
||
"<|startofexec|>```JSON\n",
|
||
"{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"科技园北区\"}\n",
|
||
"```<|endofexec|>\n",
|
||
"地址识别结果为:{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"科技园北区\"}。我识别出的元素包括:prov、city、district、town。\n",
|
||
"\n",
|
||
"[LABELS]<|startofthink|>```JSON\n",
|
||
"{\"api_name\": \"modelscope_text-address\", \"url\": \"http://159.1.4.174:3210/damo/mgeo_geographic_elements_tagging_chinese_base\", \"parameters\": {\"text\": \"广东省深圳市南山区科技园北区\"}}\n",
|
||
"```<|endofthink|>\n",
|
||
"\n",
|
||
"<|startofexec|>```JSON\n",
|
||
"{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"\", \"community\": \"科技园北区\", \"poi\": \"\"}\n",
|
||
"```<|endofexec|>\n",
|
||
"地址识别json表示:{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"\", \"community\": \"科技园北区\", \"poi\": \"\"}。我使用的模型是ModelScope的'damo/mgeo_geographic_elements_tagging_chinese_base'模型。这是基于达摩院联合高德发布的多任务多模态地址预训练底座MGeo模型微调得到的。\n",
|
||
"-----------------------------------------------------------------------------------\n",
|
||
"[TEST]你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。目前支持的插件信息如下,请自行判断是否需要调用插件来解决当前用户问题。若需要调用插件,则需要将插件调用请求按照json格式给出,必须包含api_name、url、parameters字段,并在其前后使用<|startofthink|>和<|endofthink|>作为标志。然后你需要根据插件API调用结果生成合理的答复;若无需调用插件,则直接给出对应回复即可:\n",
|
||
"\n",
|
||
"1. {\"name\": \"modelscope_text-translation-zh2en\", \"description\": \"将输入的中文文本翻译成英文\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_text-translation-zh2en\", \"model_id\": \"/damo/nlp_csanmt_translation_zh2en\", \"method\": \"post\", \"description\": \"将输入的中文文本翻译成英文\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的中文文本\", \"required\": \"True\"}]}]}\n",
|
||
"\n",
|
||
"2. {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}\n",
|
||
"\n",
|
||
"3. {\"name\": \"modelscope_image-generation\", \"description\": \"针对文本输入,生成对应的图片\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_image-generation\", \"model_id\": \"/damo/image_generation\", \"method\": \"post\", \"description\": \"针对文本输入,生成对应的图片\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本信息\", \"required\": \"True\"}]}]} \n",
|
||
"\n",
|
||
"### 用户\n",
|
||
"歌手:古巨基\n",
|
||
"歌曲名:爱情马戏班\n",
|
||
"经典歌词:情是何等诡秘能令人使出看家把戏;恋爱就像走纲线般惊险;为你献技 像马戏班\n",
|
||
"请结合以上信息,编写一个智能音响的播放导语,需要有文采,字数30字以内,凸显一下即将播放该歌曲 \n",
|
||
"\n",
|
||
"### 助手\n",
|
||
"爱情马戏班,由古巨基演唱,是一首充满马戏班元素的浪漫歌曲,歌词中描述了爱情的神秘和危险,是一首值得听一听的浪漫歌曲。\n",
|
||
"\n",
|
||
"[LABELS]亲爱的主人,今天我为您带来的是古巨基的经典之作——《爱情马戏班》。这首歌曲描绘了情与爱的神秘和惊险,让人们为之倾倒。让我们一起享受这场爱情的马戏表演吧!\n",
|
||
"-----------------------------------------------------------------------------------\n",
|
||
"[TEST]你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://114.42.178.183:8005/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
|
||
"\n",
|
||
"2. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://93.82.87.89:6631/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
|
||
"\n",
|
||
"3. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://4.105.93.165:8143/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}} \n",
|
||
"\n",
|
||
"### 用户\n",
|
||
"按照给定的schema抽取出下面文本对应的信息\n",
|
||
"schema:{\"动物\": null, \"食物\": null, \"颜色\": null}\n",
|
||
"这只棕色的狗狗很喜欢吃狗粮。 \n",
|
||
"\n",
|
||
"### 助手\n",
|
||
"<|startofthink|>```JSON\n",
|
||
"{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://4.105.93.165:8143/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n",
|
||
"```<|endofthink|>\n",
|
||
"\n",
|
||
"<|startofexec|>```JSON\n",
|
||
"{\"prov\": \"http://4.105.93.165:8143/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n",
|
||
"```<|endofexec|>\n",
|
||
"\n",
|
||
"<|startofexec|>```JSON\n",
|
||
"{\"prov\": \"http://4.105.93.165:8143/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n",
|
||
"```<|endofexec|>\n",
|
||
"<|startofexec|>```JSON\n",
|
||
"{\"prov\": \"http://4.105.93.165:8143/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n",
|
||
"```<|endofexec|>\n",
|
||
"<|startofexec|>```JSON\n",
|
||
"{\"prov\": \"http://4.105.93.165:8143/damo/nlp_structbert_siames\n",
|
||
"\n",
|
||
"[LABELS]<|startofthink|>```JSON\n",
|
||
"{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://114.42.178.183:8005/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n",
|
||
"```<|endofthink|>\n",
|
||
"\n",
|
||
"<|startofexec|>```JSON\n",
|
||
"{\"动物\": [\"狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}\n",
|
||
"```<|endofexec|>\n",
|
||
"信息抽取结果:{\"动物\": [\"狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n",
|
||
"-----------------------------------------------------------------------------------\n",
|
||
"[TEST]你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://28.179.171.5:6428/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
|
||
"\n",
|
||
"2. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://100.111.18.38:6408/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n",
|
||
"\n",
|
||
"3. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://144.67.18.142:6381/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}} \n",
|
||
"\n",
|
||
"### 用户\n",
|
||
"按照给定的schema抽取出下面文本对应的信息\n",
|
||
"schema:{\"人物\": null, \"地理位置\": null, \"组织机构\": null}\n",
|
||
"谷歌公司是一家全球知名的科技公司,总部位于美国的加利福尼亚州山景市。 \n",
|
||
"\n",
|
||
"### 助手\n",
|
||
"<|startofthink|>```JSON\n",
|
||
"{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://100.111.18.38:6408/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"谷歌公司是一家全球知名的科技公司,总部位于美国的加利福尼亚州山景市。\", \"schema\": \"{\\\"人物\\\": null, \\\"地理位置\\\": null, \\\"组织机构\\\": null}\"}}\n",
|
||
"```<|endofthink|>\n",
|
||
"\n",
|
||
"<|startofexec|>```JSON\n",
|
||
"{\"人物\": null, \"地理位置\": null, \"组织机构\": null}\n",
|
||
"```<|endofexec|>\n",
|
||
"信息抽取结果:{\"人物\": null, \"地理位置\": null, \"组织机构\": null}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调的通用信息抽取模型。\n",
|
||
"\n",
|
||
"[LABELS]<|startofthink|>```JSON\n",
|
||
"{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://100.111.18.38:6408/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"谷歌公司是一家全球知名的科技公司,总部位于美国的加利福尼亚州山景市。\", \"schema\": \"{\\\"人物\\\": null, \\\"地理位置\\\": null, \\\"组织机构\\\": null}\"}}\n",
|
||
"```<|endofthink|>\n",
|
||
"\n",
|
||
"<|startofexec|>```JSON\n",
|
||
"{\"人物\": [], \"地理位置\": [\"美国\", \"加利福尼亚州山景市\"], \"组织机构\": [\"谷歌公司\"]}\n",
|
||
"```<|endofexec|>\n",
|
||
"信息抽取结果:{\"人物\": [], \"地理位置\": [\"美国\", \"加利福尼亚州山景市\"], \"组织机构\": [\"谷歌公司\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n",
|
||
"-----------------------------------------------------------------------------------\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n",
|
||
"for d in test_dataset[:5]:\n",
|
||
" system = d['system']\n",
|
||
" user = d['user']\n",
|
||
" assistant = d['assistant']\n",
|
||
" input_ids = tokenize_function(system, user, None, tokenizer)['input_ids']\n",
|
||
" print(f'[TEST]{tokenizer.decode(input_ids)}', end='')\n",
|
||
" input_ids = torch.tensor(input_ids)[None].cuda()\n",
|
||
" attention_mask = torch.ones_like(input_ids)\n",
|
||
" generate_ids = model.generate(input_ids=input_ids, max_new_tokens=512,\n",
|
||
" attention_mask=attention_mask,\n",
|
||
" streamer=streamer, pad_token_id=tokenizer.eos_token_id, \n",
|
||
" temperature=0.7, top_k=50, top_p=0.7, do_sample=True)\n",
|
||
" print()\n",
|
||
" print(f'[LABELS]{assistant}')\n",
|
||
" print('-----------------------------------------------------------------------------------')\n",
|
||
" # input('next[ENTER]')"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "hackathon",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.12"
|
||
},
|
||
"orig_nbformat": 4
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|