From b5b08f9718100d0a68efbf29d203a3611979312e Mon Sep 17 00:00:00 2001
From: tastelikefeet <58414341+tastelikefeet@users.noreply.github.com>
Date: Mon, 6 Jan 2025 17:29:44 +0800
Subject: [PATCH] fix
 https://www.modelscope.cn/models/iic/nlp_structbert_address-parsing_chinese_base/feedback/issueDetail/20431
 (#1170)

---
 .../preprocessors/nlp/token_classification_preprocessor.py      | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modelscope/preprocessors/nlp/token_classification_preprocessor.py b/modelscope/preprocessors/nlp/token_classification_preprocessor.py
index b3ff9935..902dafca 100644
--- a/modelscope/preprocessors/nlp/token_classification_preprocessor.py
+++ b/modelscope/preprocessors/nlp/token_classification_preprocessor.py
@@ -416,6 +416,8 @@ class TokenClassificationTransformersPreprocessor(
         offset_mapping = []
         tokens = self.nlp_tokenizer.tokenizer.tokenize(text)
         offset = 0
+        if getattr(self.nlp_tokenizer.tokenizer, 'do_lower_case', False):
+            text = text.lower()
         for token in tokens:
             is_start = (token[:2] != '##')
             if is_start: