From e10237074e4129dbd17457e08bf21d59e496f785 Mon Sep 17 00:00:00 2001 From: "wenmeng.zwm" Date: Wed, 18 Oct 2023 20:24:42 +0800 Subject: [PATCH] fix chatglm sp_tokenizer error Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14352495 --- modelscope/models/nlp/chatglm/tokenization.py | 7 ++++--- modelscope/models/nlp/llama/text_generation.py | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/modelscope/models/nlp/chatglm/tokenization.py b/modelscope/models/nlp/chatglm/tokenization.py index f5f8cd0c..6ce1b90d 100644 --- a/modelscope/models/nlp/chatglm/tokenization.py +++ b/modelscope/models/nlp/chatglm/tokenization.py @@ -199,6 +199,10 @@ class ChatGLMTokenizer(PreTrainedTokenizer): padding_side='left', num_image_tokens=20000, **kwargs) -> None: + + self.sp_tokenizer = SPTokenizer( + vocab_file, num_image_tokens=num_image_tokens) + super().__init__( do_lower_case=do_lower_case, remove_space=remove_space, @@ -220,9 +224,6 @@ class ChatGLMTokenizer(PreTrainedTokenizer): self.end_token = end_token self.mask_token = mask_token self.gmask_token = gmask_token - - self.sp_tokenizer = SPTokenizer( - vocab_file, num_image_tokens=num_image_tokens) """ Initialisation """ @property diff --git a/modelscope/models/nlp/llama/text_generation.py b/modelscope/models/nlp/llama/text_generation.py index b9cc8032..d95cae34 100644 --- a/modelscope/models/nlp/llama/text_generation.py +++ b/modelscope/models/nlp/llama/text_generation.py @@ -71,6 +71,8 @@ def get_chat_prompt(system: str, text: str, history: List[Tuple[str, str]], # This file is mainly copied from the llama code of transformers +@MODELS.register_module(Tasks.chat, module_name=Models.llama2) +@MODELS.register_module(Tasks.chat, module_name=Models.llama) @MODELS.register_module(Tasks.text_generation, module_name=Models.llama2) @MODELS.register_module(Tasks.chat, module_name=Models.llama2) @MODELS.register_module(Tasks.text_generation, module_name=Models.llama)