[to #42322933] fix discrepancy between train and inference for es/fr

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11570542

    * [to #42322933] fix discrepancy between train and inference for es/fr
This commit is contained in:
xiangpeng.wxp
2023-02-07 07:39:11 +00:00
parent 06990f90dc
commit 848d4d6c05

View File

@@ -101,9 +101,15 @@ class TranslationPipeline(Pipeline):
input_tok = [' '.join(list(item)) for item in input_tok]
else:
input = [self._punct_normalizer.normalize(item) for item in input]
aggressive_dash_splits = True
if (self._src_lang in ['es', 'fr'] and self._tgt_lang == 'en') or (
self._src_lang == 'en' and self._tgt_lang in ['es', 'fr']):
aggressive_dash_splits = False
input_tok = [
self._tok.tokenize(
item, return_str=True, aggressive_dash_splits=True)
item,
return_str=True,
aggressive_dash_splits=aggressive_dash_splits)
for item in input
]