mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-25 12:39:25 +01:00
fix chatglm sp_tokenizer error
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14352495
This commit is contained in:
@@ -199,6 +199,10 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
||||
padding_side='left',
|
||||
num_image_tokens=20000,
|
||||
**kwargs) -> None:
|
||||
|
||||
self.sp_tokenizer = SPTokenizer(
|
||||
vocab_file, num_image_tokens=num_image_tokens)
|
||||
|
||||
super().__init__(
|
||||
do_lower_case=do_lower_case,
|
||||
remove_space=remove_space,
|
||||
@@ -220,9 +224,6 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
||||
self.end_token = end_token
|
||||
self.mask_token = mask_token
|
||||
self.gmask_token = gmask_token
|
||||
|
||||
self.sp_tokenizer = SPTokenizer(
|
||||
vocab_file, num_image_tokens=num_image_tokens)
|
||||
""" Initialisation """
|
||||
|
||||
@property
|
||||
|
||||
@@ -71,6 +71,8 @@ def get_chat_prompt(system: str, text: str, history: List[Tuple[str, str]],
|
||||
|
||||
|
||||
# This file is mainly copied from the llama code of transformers
|
||||
@MODELS.register_module(Tasks.chat, module_name=Models.llama2)
|
||||
@MODELS.register_module(Tasks.chat, module_name=Models.llama)
|
||||
@MODELS.register_module(Tasks.text_generation, module_name=Models.llama2)
|
||||
@MODELS.register_module(Tasks.chat, module_name=Models.llama2)
|
||||
@MODELS.register_module(Tasks.text_generation, module_name=Models.llama)
|
||||
|
||||
Reference in New Issue
Block a user