mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-25 04:29:22 +01:00
[to #42322933] fix discrepancy between train and inference for es/fr
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11570542 * [to #42322933] fix discrepancy between train and inference for es/fr
This commit is contained in:
@@ -101,9 +101,15 @@ class TranslationPipeline(Pipeline):
|
||||
input_tok = [' '.join(list(item)) for item in input_tok]
|
||||
else:
|
||||
input = [self._punct_normalizer.normalize(item) for item in input]
|
||||
aggressive_dash_splits = True
|
||||
if (self._src_lang in ['es', 'fr'] and self._tgt_lang == 'en') or (
|
||||
self._src_lang == 'en' and self._tgt_lang in ['es', 'fr']):
|
||||
aggressive_dash_splits = False
|
||||
input_tok = [
|
||||
self._tok.tokenize(
|
||||
item, return_str=True, aggressive_dash_splits=True)
|
||||
item,
|
||||
return_str=True,
|
||||
aggressive_dash_splits=aggressive_dash_splits)
|
||||
for item in input
|
||||
]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user