tests/pipelines/test_text_classification.py

# Copyright (c) Alibaba, Inc. and its affiliates.
import unittest

from modelscope.models import Model
from modelscope.msdatasets import MsDataset
from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import TextClassificationPipeline
from modelscope.preprocessors import SequenceClassificationPreprocessor
from modelscope.utils.constant import Tasks
from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level


class SequenceClassificationTest(unittest.TestCase, DemoCompatibilityCheck):
    sentence1 = 'i like this wonderful place'

    def setUp(self) -> None:
        self.model_id = 'damo/bert-base-sst2'
        self.task = Tasks.text_classification

    def predict(self, pipeline_ins: TextClassificationPipeline):
        from easynlp.appzoo import load_dataset

        set = load_dataset('glue', 'sst2')
        data = set['test']['sentence'][:3]

        results = pipeline_ins(data[0])
        print(results)
        results = pipeline_ins(data[1])
        print(results)

        print(data)

    def printDataset(self, dataset: MsDataset):
        for i, r in enumerate(dataset):
            if i > 10:
                break
            print(r)

    # @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    @unittest.skip('nlp model does not support tensor input, skipped')
    def test_run_with_model_from_modelhub(self):
        model = Model.from_pretrained(self.model_id)
        preprocessor = SequenceClassificationPreprocessor(
            model.model_dir, first_sequence='sentence', second_sequence=None)
        pipeline_ins = pipeline(
            task=Tasks.text_classification,
            model=model,
            preprocessor=preprocessor)
        print(f'sentence1: {self.sentence1}\n'
              f'pipeline1:{pipeline_ins(input=self.sentence1)}')

    # @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    @unittest.skip('nlp model does not support tensor input, skipped')
    def test_run_with_model_name(self):
        text_classification = pipeline(
            task=Tasks.text_classification, model=self.model_id)
        result = text_classification(
            MsDataset.load(
                'xcopa',
                subset_name='translation-et',
                namespace='damotest',
                split='test',
                target='premise'))
        self.printDataset(result)

    # @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
    @unittest.skip('nlp model does not support tensor input, skipped')
    def test_run_with_default_model(self):
        text_classification = pipeline(task=Tasks.text_classification)
        result = text_classification(
            MsDataset.load(
                'xcopa',
                subset_name='translation-et',
                namespace='damotest',
                split='test',
                target='premise'))
        self.printDataset(result)

    # @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    @unittest.skip('nlp model does not support tensor input, skipped')
    def test_run_with_modelscope_dataset(self):
        text_classification = pipeline(task=Tasks.text_classification)
        # loaded from modelscope dataset
        dataset = MsDataset.load(
            'xcopa',
            subset_name='translation-et',
            namespace='damotest',
            split='test',
            target='premise')
        result = text_classification(dataset)
        self.printDataset(result)

    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
    def test_demo_compatibility(self):
        self.compatibility_check()


if __name__ == '__main__':
    unittest.main()
[to #42322933] NLP 1030 Refactor Features: 1. Refactor the directory structure of nlp models. All model files are placed into either the model folder or the task_model folder 2. Refactor all the comments to google style 3. Add detail comments to important tasks and nlp models, to list the description of the model, and its preprocessor&trainer 4. Model Exporting now supports a direct all to TorchModelExporter(no need to derive from it) 5. Refactor model save_pretrained method to support direct running(independent from trainer) 6. Remove the judgement of Model in the pipeline base class, to support outer register models running in our pipelines 7. Nlp trainer now has a NLPTrainingArguments class , user can pass arguments into the dataclass, and use it as a normal cfg_modify_fn, to simplify the operation of modify cfg. 8. Merge the BACKBONES and the MODELS, so user can get a backbone with the Model.from_pretrained call 9. Model.from_pretrained now support a task argument, so user can use a backbone and load it with a specific task class. 10. Support Preprocessor.from_pretrained method 11. Add standard return classes to important nlp tasks, so some of the pipelines and the models are independent now, the return values of the models will always be tensors, and the pipelines will take care of the conversion to numpy and the following stuffs. 12. Split the file of the nlp preprocessors, to make the dir structure more clear. Bugs Fixing: 1. Fix a bug that lr_scheduler can be called earlier than the optimizer's step 2. Fix a bug that the direct call of Pipelines (not from pipeline(xxx)) throws error 3. Fix a bug that the trainer will not call the correct TaskDataset class 4. Fix a bug that the internal loading of dataset will throws error in the trainer class Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10490585 2022-10-25 12:26:25 +08:00			`# Copyright (c) Alibaba, Inc. and its affiliates.`
			`import unittest`

			`from modelscope.models import Model`
			`from modelscope.msdatasets import MsDataset`
			`from modelscope.pipelines import pipeline`
			`from modelscope.pipelines.nlp import TextClassificationPipeline`
			`from modelscope.preprocessors import SequenceClassificationPreprocessor`
			`from modelscope.utils.constant import Tasks`
			`from modelscope.utils.demo_utils import DemoCompatibilityCheck`
			`from modelscope.utils.test_utils import test_level`


			`class SequenceClassificationTest(unittest.TestCase, DemoCompatibilityCheck):`
			`sentence1 = 'i like this wonderful place'`

			`def setUp(self) -> None:`
			`self.model_id = 'damo/bert-base-sst2'`
			`self.task = Tasks.text_classification`

			`def predict(self, pipeline_ins: TextClassificationPipeline):`
			`from easynlp.appzoo import load_dataset`

			`set = load_dataset('glue', 'sst2')`
			`data = set['test']['sentence'][:3]`

			`results = pipeline_ins(data[0])`
			`print(results)`
			`results = pipeline_ins(data[1])`
			`print(results)`

			`print(data)`

			`def printDataset(self, dataset: MsDataset):`
			`for i, r in enumerate(dataset):`
			`if i > 10:`
			`break`
			`print(r)`

			`# @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')`
			`@unittest.skip('nlp model does not support tensor input, skipped')`
			`def test_run_with_model_from_modelhub(self):`
			`model = Model.from_pretrained(self.model_id)`
			`preprocessor = SequenceClassificationPreprocessor(`
			`model.model_dir, first_sequence='sentence', second_sequence=None)`
			`pipeline_ins = pipeline(`
			`task=Tasks.text_classification,`
			`model=model,`
			`preprocessor=preprocessor)`
			`print(f'sentence1: {self.sentence1}\n'`
			`f'pipeline1:{pipeline_ins(input=self.sentence1)}')`

			`# @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')`
			`@unittest.skip('nlp model does not support tensor input, skipped')`
			`def test_run_with_model_name(self):`
			`text_classification = pipeline(`
			`task=Tasks.text_classification, model=self.model_id)`
			`result = text_classification(`
			`MsDataset.load(`
			`'xcopa',`
			`subset_name='translation-et',`
			`namespace='damotest',`
			`split='test',`
			`target='premise'))`
			`self.printDataset(result)`

			`# @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')`
			`@unittest.skip('nlp model does not support tensor input, skipped')`
			`def test_run_with_default_model(self):`
			`text_classification = pipeline(task=Tasks.text_classification)`
			`result = text_classification(`
			`MsDataset.load(`
			`'xcopa',`
			`subset_name='translation-et',`
			`namespace='damotest',`
			`split='test',`
			`target='premise'))`
			`self.printDataset(result)`

			`# @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')`
			`@unittest.skip('nlp model does not support tensor input, skipped')`
			`def test_run_with_modelscope_dataset(self):`
			`text_classification = pipeline(task=Tasks.text_classification)`
			`# loaded from modelscope dataset`
			`dataset = MsDataset.load(`
			`'xcopa',`
			`subset_name='translation-et',`
			`namespace='damotest',`
			`split='test',`
			`target='premise')`
			`result = text_classification(dataset)`
			`self.printDataset(result)`

			`@unittest.skip('demo compatibility test is only enabled on a needed-basis')`
			`def test_demo_compatibility(self):`
			`self.compatibility_check()`


			`if __name__ == '__main__':`
			`unittest.main()`