fix ocr-finetune acc

This commit is contained in:
翎航
2022-10-26 19:44:54 +08:00
parent 5dd9698a33
commit 022fa4948a
3 changed files with 17 additions and 71 deletions

View File

@@ -35,14 +35,13 @@ class AccuracyMetric(Metric):
eval_results = outputs[key]
break
assert type(ground_truths) == type(eval_results)
if isinstance(ground_truths, list):
self.preds.extend(eval_results)
self.labels.extend(ground_truths)
elif isinstance(ground_truths, np.ndarray):
self.preds.extend(eval_results.tolist())
self.labels.extend(ground_truths.tolist())
else:
raise Exception('only support list or np.ndarray')
for truth in ground_truths:
self.labels.append(truth)
for result in eval_results:
if isinstance(truth, str):
self.preds.append(result.strip().replace(' ', ''))
else:
self.preds.append(result)
def evaluate(self):
assert len(self.preds) == len(self.labels)

View File

@@ -56,7 +56,7 @@ class NedMetric(Metric):
@staticmethod
def _distance(pred, ref):
if pred is None or ref is None:
raise TypeError('Argument s0 is NoneType.')
raise TypeError('Argument (pred or ref) is NoneType.')
if pred == ref:
return 0.0
if len(pred) == 0:

View File

@@ -8,78 +8,23 @@ import json
from modelscope.metainfo import Metrics, Trainers
from modelscope.msdatasets import MsDataset
from modelscope.trainers import build_trainer
from modelscope.utils.constant import ModelFile
from modelscope.utils.constant import DownloadMode, ModelFile
from modelscope.utils.test_utils import test_level
class TestOfaTrainer(unittest.TestCase):
def setUp(self) -> None:
# self.finetune_cfg = \
# {'framework': 'pytorch',
# 'task': 'image-captioning',
# 'model': {'type': 'ofa',
# 'beam_search': {'beam_size': 5,
# 'max_len_b': 16,
# 'min_len': 1,
# 'no_repeat_ngram_size': 0},
# 'seed': 7,
# 'max_src_length': 256,
# 'language': 'en',
# 'gen_type': 'generation',
# 'patch_image_size': 480,
# 'max_image_size': 480,
# 'imagenet_default_mean_and_std': False},
# 'pipeline': {'type': 'image-captioning'},
# 'dataset': {'column_map': {'text': 'caption'}},
# 'train': {'work_dir': 'work/ckpts/caption',
# # 'launcher': 'pytorch',
# 'max_epochs': 1,
# 'use_fp16': True,
# 'dataloader': {'batch_size_per_gpu': 4, 'workers_per_gpu': 0},
# 'lr_scheduler': {'name': 'polynomial_decay',
# 'warmup_proportion': 0.01,
# 'lr_end': 1e-07},
# 'lr_scheduler_hook': {'type': 'LrSchedulerHook', 'by_epoch': False},
# 'optimizer': {'type': 'AdamW', 'lr': 5e-05, 'weight_decay': 0.01},
# 'optimizer_hook': {'type': 'TorchAMPOptimizerHook',
# 'cumulative_iters': 1,
# 'grad_clip': {'max_norm': 1.0, 'norm_type': 2},
# 'loss_keys': 'loss'},
# 'criterion': {'name': 'AdjustLabelSmoothedCrossEntropyCriterion',
# 'constraint_range': None,
# 'drop_worst_after': 0,
# 'drop_worst_ratio': 0.0,
# 'ignore_eos': False,
# 'ignore_prefix_size': 0,
# 'label_smoothing': 0.1,
# 'reg_alpha': 1.0,
# 'report_accuracy': False,
# 'sample_patch_num': 196,
# 'sentence_avg': False,
# 'use_rdrop': True},
# 'hooks': [{'type': 'BestCkptSaverHook',
# 'metric_key': 'bleu-4',
# 'interval': 100},
# {'type': 'TextLoggerHook', 'interval': 1},
# {'type': 'IterTimerHook'},
# {'type': 'EvaluationHook', 'by_epoch': True, 'interval': 1}]},
# 'evaluation': {'dataloader': {'batch_size_per_gpu': 4, 'workers_per_gpu': 0},
# 'metrics': [{'type': 'bleu',
# 'eval_tokenized_bleu': False,
# 'ref_name': 'labels',
# 'hyp_name': 'caption'}]},
# 'preprocessor': []}
self.finetune_cfg = \
{'framework': 'pytorch',
'task': 'ocr-recognition',
'model': {'type': 'ofa',
'beam_search': {'beam_size': 5,
'max_len_b': 16,
'max_len_b': 64,
'min_len': 1,
'no_repeat_ngram_size': 0},
'seed': 7,
'max_src_length': 256,
'max_src_length': 128,
'language': 'zh',
'gen_type': 'generation',
'patch_image_size': 480,
@@ -115,13 +60,13 @@ class TestOfaTrainer(unittest.TestCase):
'sentence_avg': False,
'use_rdrop': True},
'hooks': [{'type': 'BestCkptSaverHook',
'metric_key': 'ned',
'metric_key': 'accuracy',
'interval': 100},
{'type': 'TextLoggerHook', 'interval': 1},
{'type': 'IterTimerHook'},
{'type': 'EvaluationHook', 'by_epoch': True, 'interval': 1}]},
'evaluation': {'dataloader': {'batch_size_per_gpu': 4, 'workers_per_gpu': 0},
'metrics': [{'type': 'ned'}]},
'metrics': [{'type': 'accuracy'}]},
'preprocessor': []}
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@@ -140,12 +85,14 @@ class TestOfaTrainer(unittest.TestCase):
'ocr_fudanvi_zh',
subset_name='scene',
namespace='modelscope',
split='train[:12]'),
split='train[:1000]',
download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS),
eval_dataset=MsDataset.load(
'ocr_fudanvi_zh',
subset_name='scene',
namespace='modelscope',
split='validation[:4]'),
split='test[:100]',
download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS),
cfg_file=config_file)
trainer = build_trainer(name=Trainers.ofa, default_args=args)
trainer.train()