2022-05-19 22:18:35 +08:00
|
|
|
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
|
|
|
import tempfile
|
|
|
|
|
import unittest
|
|
|
|
|
import zipfile
|
2022-05-30 11:53:53 +08:00
|
|
|
from pathlib import Path
|
2022-05-19 22:18:35 +08:00
|
|
|
|
2022-05-31 18:27:19 +08:00
|
|
|
from ali_maas_datasets import PyDataset
|
|
|
|
|
|
2022-05-19 22:18:35 +08:00
|
|
|
from maas_lib.fileio import File
|
2022-05-30 11:53:53 +08:00
|
|
|
from maas_lib.models import Model
|
2022-05-19 22:18:35 +08:00
|
|
|
from maas_lib.models.nlp import SequenceClassificationModel
|
2022-05-24 17:14:58 +08:00
|
|
|
from maas_lib.pipelines import SequenceClassificationPipeline, pipeline
|
2022-05-19 22:18:35 +08:00
|
|
|
from maas_lib.preprocessors import SequenceClassificationPreprocessor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SequenceClassificationTest(unittest.TestCase):
|
|
|
|
|
|
2022-05-30 11:53:53 +08:00
|
|
|
def predict(self, pipeline_ins: SequenceClassificationPipeline):
|
2022-05-19 22:18:35 +08:00
|
|
|
from easynlp.appzoo import load_dataset
|
|
|
|
|
|
|
|
|
|
set = load_dataset('glue', 'sst2')
|
|
|
|
|
data = set['test']['sentence'][:3]
|
|
|
|
|
|
2022-05-30 11:53:53 +08:00
|
|
|
results = pipeline_ins(data[0])
|
2022-05-19 22:18:35 +08:00
|
|
|
print(results)
|
2022-05-30 11:53:53 +08:00
|
|
|
results = pipeline_ins(data[1])
|
2022-05-19 22:18:35 +08:00
|
|
|
print(results)
|
|
|
|
|
|
|
|
|
|
print(data)
|
|
|
|
|
|
|
|
|
|
def test_run(self):
|
|
|
|
|
model_url = 'https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com' \
|
|
|
|
|
'/release/easynlp_modelzoo/alibaba-pai/bert-base-sst2.zip'
|
2022-05-30 11:53:53 +08:00
|
|
|
cache_path_str = r'.cache/easynlp/bert-base-sst2.zip'
|
|
|
|
|
cache_path = Path(cache_path_str)
|
|
|
|
|
|
|
|
|
|
if not cache_path.exists():
|
|
|
|
|
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
cache_path.touch(exist_ok=True)
|
|
|
|
|
with cache_path.open('wb') as ofile:
|
2022-05-19 22:18:35 +08:00
|
|
|
ofile.write(File.read(model_url))
|
2022-05-30 11:53:53 +08:00
|
|
|
|
|
|
|
|
with zipfile.ZipFile(cache_path_str, 'r') as zipf:
|
|
|
|
|
zipf.extractall(cache_path.parent)
|
|
|
|
|
path = r'.cache/easynlp/bert-base-sst2'
|
|
|
|
|
model = SequenceClassificationModel(path)
|
|
|
|
|
preprocessor = SequenceClassificationPreprocessor(
|
|
|
|
|
path, first_sequence='sentence', second_sequence=None)
|
|
|
|
|
pipeline1 = SequenceClassificationPipeline(model, preprocessor)
|
|
|
|
|
self.predict(pipeline1)
|
|
|
|
|
pipeline2 = pipeline(
|
|
|
|
|
'text-classification', model=model, preprocessor=preprocessor)
|
|
|
|
|
print(pipeline2('Hello world!'))
|
|
|
|
|
|
|
|
|
|
def test_run_modelhub(self):
|
|
|
|
|
model = Model.from_pretrained('damo/bert-base-sst2')
|
|
|
|
|
preprocessor = SequenceClassificationPreprocessor(
|
|
|
|
|
model.model_dir, first_sequence='sentence', second_sequence=None)
|
|
|
|
|
pipeline_ins = pipeline(
|
|
|
|
|
task='text-classification', model=model, preprocessor=preprocessor)
|
|
|
|
|
self.predict(pipeline_ins)
|
2022-05-19 22:18:35 +08:00
|
|
|
|
2022-05-31 18:27:19 +08:00
|
|
|
def test_dataset(self):
|
|
|
|
|
model_url = 'https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com' \
|
|
|
|
|
'/release/easynlp_modelzoo/alibaba-pai/bert-base-sst2.zip'
|
|
|
|
|
cache_path_str = r'.cache/easynlp/bert-base-sst2.zip'
|
|
|
|
|
cache_path = Path(cache_path_str)
|
|
|
|
|
|
|
|
|
|
if not cache_path.exists():
|
|
|
|
|
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
cache_path.touch(exist_ok=True)
|
|
|
|
|
with cache_path.open('wb') as ofile:
|
|
|
|
|
ofile.write(File.read(model_url))
|
|
|
|
|
|
|
|
|
|
with zipfile.ZipFile(cache_path_str, 'r') as zipf:
|
|
|
|
|
zipf.extractall(cache_path.parent)
|
|
|
|
|
path = r'.cache/easynlp/bert-base-sst2'
|
|
|
|
|
model = SequenceClassificationModel(path)
|
|
|
|
|
preprocessor = SequenceClassificationPreprocessor(
|
|
|
|
|
path, first_sequence='sentence', second_sequence=None)
|
|
|
|
|
text_classification = pipeline(
|
|
|
|
|
'text-classification', model=model, preprocessor=preprocessor)
|
|
|
|
|
dataset = PyDataset.load('glue', name='sst2', target='sentence')
|
|
|
|
|
result = text_classification(dataset)
|
|
|
|
|
for i, r in enumerate(result):
|
|
|
|
|
if i > 10:
|
|
|
|
|
break
|
|
|
|
|
print(r)
|
|
|
|
|
|
2022-05-19 22:18:35 +08:00
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
unittest.main()
|