From b5b08f9718100d0a68efbf29d203a3611979312e Mon Sep 17 00:00:00 2001 From: tastelikefeet <58414341+tastelikefeet@users.noreply.github.com> Date: Mon, 6 Jan 2025 17:29:44 +0800 Subject: [PATCH] fix https://www.modelscope.cn/models/iic/nlp_structbert_address-parsing_chinese_base/feedback/issueDetail/20431 (#1170) --- .../preprocessors/nlp/token_classification_preprocessor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modelscope/preprocessors/nlp/token_classification_preprocessor.py b/modelscope/preprocessors/nlp/token_classification_preprocessor.py index b3ff9935..902dafca 100644 --- a/modelscope/preprocessors/nlp/token_classification_preprocessor.py +++ b/modelscope/preprocessors/nlp/token_classification_preprocessor.py @@ -416,6 +416,8 @@ class TokenClassificationTransformersPreprocessor( offset_mapping = [] tokens = self.nlp_tokenizer.tokenizer.tokenize(text) offset = 0 + if getattr(self.nlp_tokenizer.tokenizer, 'do_lower_case', False): + text = text.lower() for token in tokens: is_start = (token[:2] != '##') if is_start: