2022-05-19 22:18:35 +08:00
|
|
|
# Copyright (c) Alibaba, Inc. and its affiliates.
|
2022-06-08 14:22:23 +08:00
|
|
|
import shutil
|
2022-05-19 22:18:35 +08:00
|
|
|
import unittest
|
|
|
|
|
|
2022-06-09 20:16:26 +08:00
|
|
|
from modelscope.models import Model
|
2022-06-27 11:09:38 +08:00
|
|
|
from modelscope.msdatasets import MsDataset
|
2022-06-09 20:16:26 +08:00
|
|
|
from modelscope.pipelines import SequenceClassificationPipeline, pipeline
|
|
|
|
|
from modelscope.preprocessors import SequenceClassificationPreprocessor
|
2022-06-13 14:15:54 +08:00
|
|
|
from modelscope.utils.constant import Hubs, Tasks
|
2022-06-15 14:53:49 +08:00
|
|
|
from modelscope.utils.test_utils import test_level
|
2022-05-19 22:18:35 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class SequenceClassificationTest(unittest.TestCase):
|
|
|
|
|
|
2022-06-08 14:22:23 +08:00
|
|
|
def setUp(self) -> None:
|
|
|
|
|
self.model_id = 'damo/bert-base-sst2'
|
|
|
|
|
|
2022-05-30 11:53:53 +08:00
|
|
|
def predict(self, pipeline_ins: SequenceClassificationPipeline):
|
2022-05-19 22:18:35 +08:00
|
|
|
from easynlp.appzoo import load_dataset
|
|
|
|
|
|
|
|
|
|
set = load_dataset('glue', 'sst2')
|
|
|
|
|
data = set['test']['sentence'][:3]
|
|
|
|
|
|
2022-05-30 11:53:53 +08:00
|
|
|
results = pipeline_ins(data[0])
|
2022-05-19 22:18:35 +08:00
|
|
|
print(results)
|
2022-05-30 11:53:53 +08:00
|
|
|
results = pipeline_ins(data[1])
|
2022-05-19 22:18:35 +08:00
|
|
|
print(results)
|
|
|
|
|
|
|
|
|
|
print(data)
|
|
|
|
|
|
2022-06-27 11:09:38 +08:00
|
|
|
def printDataset(self, dataset: MsDataset):
|
2022-06-08 11:29:25 +08:00
|
|
|
for i, r in enumerate(dataset):
|
|
|
|
|
if i > 10:
|
|
|
|
|
break
|
|
|
|
|
print(r)
|
|
|
|
|
|
2022-07-23 11:08:43 +08:00
|
|
|
# @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
|
|
|
|
@unittest.skip('nlp model does not support tensor input, skipped')
|
2022-06-08 11:29:25 +08:00
|
|
|
def test_run_with_model_from_modelhub(self):
|
2022-06-08 14:22:23 +08:00
|
|
|
model = Model.from_pretrained(self.model_id)
|
2022-05-30 11:53:53 +08:00
|
|
|
preprocessor = SequenceClassificationPreprocessor(
|
|
|
|
|
model.model_dir, first_sequence='sentence', second_sequence=None)
|
|
|
|
|
pipeline_ins = pipeline(
|
2022-06-01 10:20:53 +08:00
|
|
|
task=Tasks.text_classification,
|
|
|
|
|
model=model,
|
|
|
|
|
preprocessor=preprocessor)
|
2022-05-30 11:53:53 +08:00
|
|
|
self.predict(pipeline_ins)
|
2022-05-19 22:18:35 +08:00
|
|
|
|
2022-07-23 11:08:43 +08:00
|
|
|
# @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
|
|
|
|
@unittest.skip('nlp model does not support tensor input, skipped')
|
2022-06-08 11:29:25 +08:00
|
|
|
def test_run_with_model_name(self):
|
|
|
|
|
text_classification = pipeline(
|
2022-06-08 14:22:23 +08:00
|
|
|
task=Tasks.text_classification, model=self.model_id)
|
2022-06-08 11:29:25 +08:00
|
|
|
result = text_classification(
|
2022-06-27 11:09:38 +08:00
|
|
|
MsDataset.load(
|
2022-07-20 16:38:15 +08:00
|
|
|
'xcopa',
|
|
|
|
|
subset_name='translation-et',
|
|
|
|
|
namespace='damotest',
|
|
|
|
|
split='test',
|
|
|
|
|
target='premise'))
|
2022-06-08 11:29:25 +08:00
|
|
|
self.printDataset(result)
|
|
|
|
|
|
2022-07-23 11:08:43 +08:00
|
|
|
# @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
|
|
|
|
@unittest.skip('nlp model does not support tensor input, skipped')
|
2022-06-09 16:57:33 +08:00
|
|
|
def test_run_with_default_model(self):
|
|
|
|
|
text_classification = pipeline(task=Tasks.text_classification)
|
|
|
|
|
result = text_classification(
|
2022-06-27 11:09:38 +08:00
|
|
|
MsDataset.load(
|
2022-07-20 16:38:15 +08:00
|
|
|
'xcopa',
|
|
|
|
|
subset_name='translation-et',
|
|
|
|
|
namespace='damotest',
|
|
|
|
|
split='test',
|
|
|
|
|
target='premise'))
|
2022-06-21 11:10:28 +08:00
|
|
|
self.printDataset(result)
|
|
|
|
|
|
2022-07-23 11:08:43 +08:00
|
|
|
# @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
|
|
|
@unittest.skip('nlp model does not support tensor input, skipped')
|
2022-06-21 11:10:28 +08:00
|
|
|
def test_run_with_modelscope_dataset(self):
|
|
|
|
|
text_classification = pipeline(task=Tasks.text_classification)
|
|
|
|
|
# loaded from modelscope dataset
|
2022-06-27 11:09:38 +08:00
|
|
|
dataset = MsDataset.load(
|
2022-07-20 16:38:15 +08:00
|
|
|
'xcopa',
|
|
|
|
|
subset_name='translation-et',
|
2022-06-28 20:40:57 +08:00
|
|
|
namespace='damotest',
|
2022-07-20 16:38:15 +08:00
|
|
|
split='test',
|
|
|
|
|
target='premise')
|
2022-05-31 18:27:19 +08:00
|
|
|
result = text_classification(dataset)
|
2022-06-08 11:29:25 +08:00
|
|
|
self.printDataset(result)
|
2022-05-31 18:27:19 +08:00
|
|
|
|
2022-05-19 22:18:35 +08:00
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
unittest.main()
|