mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-25 04:29:22 +01:00
Fix ner tokenizer which won't accept kwargs
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/12664244 * fix slow tokenizer
This commit is contained in:
@@ -388,10 +388,14 @@ class TokenClassificationTransformersPreprocessor(
|
||||
f'tokenizer {tokenizer_name}, please use a fast tokenizer instead, or '
|
||||
f'try to implement a `{method}` method')
|
||||
label_mask, offset_mapping = getattr(self, method)(tokens)
|
||||
padding = self.nlp_tokenizer.get_tokenizer_kwarg('padding')
|
||||
max_length = self.nlp_tokenizer.get_tokenizer_kwarg('max_length')
|
||||
special_token = 1 if self.nlp_tokenizer.get_tokenizer_kwarg(
|
||||
'add_special_tokens') else 0
|
||||
padding = kwargs.get('padding',
|
||||
self.nlp_tokenizer.get_tokenizer_kwarg('padding'))
|
||||
max_length = kwargs.get(
|
||||
'max_length', self.nlp_tokenizer.get_tokenizer_kwarg('max_length'))
|
||||
special_token = 1 if kwargs.get(
|
||||
'add_special_tokens',
|
||||
self.nlp_tokenizer.get_tokenizer_kwarg(
|
||||
'add_special_tokens')) else 0
|
||||
if len(label_mask) > max_length - 2 * special_token:
|
||||
label_mask = label_mask[:(max_length - 2 * special_token)]
|
||||
offset_mapping = offset_mapping[:sum(label_mask)]
|
||||
|
||||
Reference in New Issue
Block a user