patch_dynamic_module/tests/trainers/cli/test_cli.py

# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import unittest

import json

from modelscope import MsDataset, TrainingArgs, build_dataset_from_file
from modelscope.utils.test_utils import test_level


class TestCli(unittest.TestCase):

    def setUp(self) -> None:
        content = [{
            'dataset': {
                'dataset_name': 'clue',
                'subset_name': 'cmnli',
                'split': 'train',
            },
            'column_mapping': {
                'sentence1': 'sentence1',
                'sentence2': 'sentence2',
                'label': 'label',
            },
            'usage': 0.8,
        }, {
            'dataset': {
                'dataset_name': 'glue',
                'subset_name': 'mnli',
                'split': 'validation_matched',
            },
            'column_mapping': {
                'premise': 'sentence1',
                'hypothesis': 'sentence2',
                'label': 'label',
            },
            'usage': 'val',
        }]
        with open('./dataset.json', 'w') as f:
            json.dump(content, f)

    def tearDown(self) -> None:
        if os.path.exists('./dataset.json'):
            os.remove('./dataset.json')

    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_merge_dataset_from_file(self):
        dataset = MsDataset.load('clue', subset_name='cmnli', split='train')
        dataset2 = MsDataset.load(
            'glue', subset_name='mnli', split='validation_matched')
        training_args = TrainingArgs(dataset_json_file='./dataset.json')
        train, test = build_dataset_from_file(training_args.dataset_json_file)
        self.assertEqual(len(train) + len(test), len(dataset) + len(dataset2))


if __name__ == '__main__':
    unittest.main()
Support FlexTrain and update the structure of trainer 1. Refactor training_args 2. Refactor hooks 3. Add train_id for push_to_hub 4. Support both output_dir/output_sub_dir for checkpoint_hooks 5. Support copy when hardlink fails when checkpointing 6. Support mixed dataset config file as a CLI argument 7. Add eval txt in output folder Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/12384253 * support the ignorance of file pattern 2023-05-13 12:12:04 +08:00			`# Copyright (c) Alibaba, Inc. and its affiliates.`
Fix CI: test merge dataset failed Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/12771691 * fix CI * add teardown 2023-05-29 10:32:33 +08:00			`import os`
Support FlexTrain and update the structure of trainer 1. Refactor training_args 2. Refactor hooks 3. Add train_id for push_to_hub 4. Support both output_dir/output_sub_dir for checkpoint_hooks 5. Support copy when hardlink fails when checkpointing 6. Support mixed dataset config file as a CLI argument 7. Add eval txt in output folder Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/12384253 * support the ignorance of file pattern 2023-05-13 12:12:04 +08:00			`import unittest`

			`import json`

			`from modelscope import MsDataset, TrainingArgs, build_dataset_from_file`
			`from modelscope.utils.test_utils import test_level`


			`class TestCli(unittest.TestCase):`

			`def setUp(self) -> None:`
			`content = [{`
			`'dataset': {`
			`'dataset_name': 'clue',`
			`'subset_name': 'cmnli',`
			`'split': 'train',`
			`},`
			`'column_mapping': {`
			`'sentence1': 'sentence1',`
			`'sentence2': 'sentence2',`
			`'label': 'label',`
			`},`
Fix CI: test merge dataset failed Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/12771691 * fix CI * add teardown 2023-05-29 10:32:33 +08:00			`'usage': 0.8,`
Support FlexTrain and update the structure of trainer 1. Refactor training_args 2. Refactor hooks 3. Add train_id for push_to_hub 4. Support both output_dir/output_sub_dir for checkpoint_hooks 5. Support copy when hardlink fails when checkpointing 6. Support mixed dataset config file as a CLI argument 7. Add eval txt in output folder Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/12384253 * support the ignorance of file pattern 2023-05-13 12:12:04 +08:00			`}, {`
			`'dataset': {`
			`'dataset_name': 'glue',`
			`'subset_name': 'mnli',`
			`'split': 'validation_matched',`
			`},`
			`'column_mapping': {`
			`'premise': 'sentence1',`
			`'hypothesis': 'sentence2',`
			`'label': 'label',`
			`},`
Fix CI: test merge dataset failed Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/12771691 * fix CI * add teardown 2023-05-29 10:32:33 +08:00			`'usage': 'val',`
Support FlexTrain and update the structure of trainer 1. Refactor training_args 2. Refactor hooks 3. Add train_id for push_to_hub 4. Support both output_dir/output_sub_dir for checkpoint_hooks 5. Support copy when hardlink fails when checkpointing 6. Support mixed dataset config file as a CLI argument 7. Add eval txt in output folder Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/12384253 * support the ignorance of file pattern 2023-05-13 12:12:04 +08:00			`}]`
			`with open('./dataset.json', 'w') as f:`
			`json.dump(content, f)`

Fix CI: test merge dataset failed Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/12771691 * fix CI * add teardown 2023-05-29 10:32:33 +08:00			`def tearDown(self) -> None:`
			`if os.path.exists('./dataset.json'):`
			`os.remove('./dataset.json')`

Support FlexTrain and update the structure of trainer 1. Refactor training_args 2. Refactor hooks 3. Add train_id for push_to_hub 4. Support both output_dir/output_sub_dir for checkpoint_hooks 5. Support copy when hardlink fails when checkpointing 6. Support mixed dataset config file as a CLI argument 7. Add eval txt in output folder Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/12384253 * support the ignorance of file pattern 2023-05-13 12:12:04 +08:00			`@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')`
			`def test_merge_dataset_from_file(self):`
			`dataset = MsDataset.load('clue', subset_name='cmnli', split='train')`
			`dataset2 = MsDataset.load(`
			`'glue', subset_name='mnli', split='validation_matched')`
			`training_args = TrainingArgs(dataset_json_file='./dataset.json')`
			`train, test = build_dataset_from_file(training_args.dataset_json_file)`
			`self.assertEqual(len(train) + len(test), len(dataset) + len(dataset2))`


			`if __name__ == '__main__':`
			`unittest.main()`