[to #42322933] Fix bug in UT daily

1. Fix bugs in daily test 2. Fix a bug that the updating of lr is before the first time of updating of optimizer TODO this will still cause warnings when GA is above 1 3. Remove the judgement of mode in text-classification's preprocessor to fit the base trainer(Bug) Update some regression bins to fit the preprocessor 4. Update the regression tool to let outer code modify atol and rtol 5. Add the default metric for text-classification task 6. Remove the useless ckpt conversion method in bert to avoid the requirement of tf when loading modeling_bert Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10430764
2025-12-21 02:29:23 +01:00 · 2022-10-20 15:29:34 +08:00
parent 1483c64638
commit acba1786b0
13 changed files with 124 additions and 135 deletions
--- a/tests/trainers/test_trainer_with_nlp.py
+++ b/tests/trainers/test_trainer_with_nlp.py
@@ -29,7 +29,8 @@ class TestTrainerWithNlp(unittest.TestCase):
            os.makedirs(self.tmp_dir)

        self.dataset = MsDataset.load(
-            'afqmc_small', namespace='userxiaoming', split='train')
+            'clue', subset_name='afqmc',
+            split='train').to_hf_dataset().select(range(2))

    def tearDown(self):
        shutil.rmtree(self.tmp_dir)
@@ -73,7 +74,7 @@ class TestTrainerWithNlp(unittest.TestCase):
        output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR)
        pipeline_sentence_similarity(output_dir)

-    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 3, 'skip test in current test level')
    def test_trainer_with_backbone_head(self):
        model_id = 'damo/nlp_structbert_sentiment-classification_chinese-base'
        kwargs = dict(
@@ -99,6 +100,8 @@ class TestTrainerWithNlp(unittest.TestCase):
        model_id = 'damo/nlp_structbert_sentiment-classification_chinese-base'
        cfg = read_config(model_id, revision='beta')
        cfg.train.max_epochs = 20
+        cfg.preprocessor.train['label2id'] = {'0': 0, '1': 1}
+        cfg.preprocessor.val['label2id'] = {'0': 0, '1': 1}
        cfg.train.work_dir = self.tmp_dir
        cfg_file = os.path.join(self.tmp_dir, 'config.json')
        cfg.dump(cfg_file)
@@ -120,22 +123,24 @@ class TestTrainerWithNlp(unittest.TestCase):
            checkpoint_path=os.path.join(self.tmp_dir, 'epoch_10.pth'))
        self.assertTrue(Metrics.accuracy in eval_results)

-    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
    def test_trainer_with_configured_datasets(self):
        model_id = 'damo/nlp_structbert_sentence-similarity_chinese-base'
        cfg: Config = read_config(model_id)
        cfg.train.max_epochs = 20
+        cfg.preprocessor.train['label2id'] = {'0': 0, '1': 1}
+        cfg.preprocessor.val['label2id'] = {'0': 0, '1': 1}
        cfg.train.work_dir = self.tmp_dir
        cfg.dataset = {
            'train': {
-                'name': 'afqmc_small',
+                'name': 'clue',
+                'subset_name': 'afqmc',
                'split': 'train',
-                'namespace': 'userxiaoming'
            },
            'val': {
-                'name': 'afqmc_small',
+                'name': 'clue',
+                'subset_name': 'afqmc',
                'split': 'train',
-                'namespace': 'userxiaoming'
            },
        }
        cfg_file = os.path.join(self.tmp_dir, 'config.json')
@@ -159,6 +164,11 @@ class TestTrainerWithNlp(unittest.TestCase):
        model_id = 'damo/nlp_structbert_sentence-similarity_chinese-base'
        cfg: Config = read_config(model_id)
        cfg.train.max_epochs = 3
+        cfg.preprocessor.first_sequence = 'sentence1'
+        cfg.preprocessor.second_sequence = 'sentence2'
+        cfg.preprocessor.label = 'label'
+        cfg.preprocessor.train['label2id'] = {'0': 0, '1': 1}
+        cfg.preprocessor.val['label2id'] = {'0': 0, '1': 1}
        cfg.train.work_dir = self.tmp_dir
        cfg_file = os.path.join(self.tmp_dir, 'config.json')
        cfg.dump(cfg_file)