Files
modelscope/tests/pipelines/test_feature_extraction.py
zhangzhicheng.zzc d721fabb34 [to #42322933]bert with sequence classification / token classification/ fill mask refactor
1.新增支持原始bert模型(非easynlp的 backbone prefix版本)
2.支持bert的在sequence classification/fill mask /token classification上的backbone head形式
3.统一了sequence classification几个任务的pipeline到一个类
4.fill mask 支持backbone head形式
5.token classification的几个子任务(ner,word seg, part of speech)的preprocessor 统一到了一起TokenClassificationPreprocessor
6. sequence classification的几个子任务(single classification, pair classification)的preprocessor 统一到了一起SequenceClassificationPreprocessor
7. 改动register中 cls的group_key 赋值位置,之前的group_key在多个decorators的情况下,会被覆盖,obj_cls的group_key信息不正确
8. 基于backbone head形式将 原本group_key和 module同名的情况尝试做调整,如下在modelscope/pipelines/nlp/sequence_classification_pipeline.py 中 
原本
 @PIPELINES.register_module(
    Tasks.sentiment_classification, module_name=Pipelines.sentiment_classification)
改成
@PIPELINES.register_module(
    Tasks.text_classification, module_name=Pipelines.sentiment_classification)
相应的configuration.json也有改动,这样的改动更符合任务和pipline(子任务)的关系。
8. 其他相应改动为支持上述功能
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10041463
2022-09-27 23:08:33 +08:00

68 lines
2.8 KiB
Python

# Copyright (c) Alibaba, Inc. and its affiliates.
import unittest
import numpy as np
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models import Model
from modelscope.models.nlp import FeatureExtractionModel
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import FeatureExtractionPipeline
from modelscope.preprocessors import NLPPreprocessor
from modelscope.utils.constant import Tasks
from modelscope.utils.demo_utils import DemoCompatibilityCheck
from modelscope.utils.test_utils import test_level
class FeatureExtractionTaskModelTest(unittest.TestCase,
DemoCompatibilityCheck):
def setUp(self) -> None:
self.task = Tasks.feature_extraction
self.model_id = 'damo/pert_feature-extraction_base-test'
sentence1 = '测试embedding'
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_with_direct_file_download(self):
cache_path = snapshot_download(self.model_id)
tokenizer = NLPPreprocessor(cache_path, padding=False)
model = FeatureExtractionModel.from_pretrained(self.model_id)
pipeline1 = FeatureExtractionPipeline(model, preprocessor=tokenizer)
pipeline2 = pipeline(
Tasks.feature_extraction, model=model, preprocessor=tokenizer)
result = pipeline1(input=self.sentence1)
print(f'sentence1: {self.sentence1}\n'
f'pipeline1:{np.shape(result[OutputKeys.TEXT_EMBEDDING])}')
result = pipeline2(input=self.sentence1)
print(f'sentence1: {self.sentence1}\n'
f'pipeline1: {np.shape(result[OutputKeys.TEXT_EMBEDDING])}')
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_with_model_from_modelhub(self):
model = Model.from_pretrained(self.model_id)
tokenizer = NLPPreprocessor(model.model_dir, padding=False)
pipeline_ins = pipeline(
task=Tasks.feature_extraction, model=model, preprocessor=tokenizer)
result = pipeline_ins(input=self.sentence1)
print(np.shape(result[OutputKeys.TEXT_EMBEDDING]))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_run_with_model_name(self):
pipeline_ins = pipeline(
task=Tasks.feature_extraction, model=self.model_id)
result = pipeline_ins(input=self.sentence1)
print(np.shape(result[OutputKeys.TEXT_EMBEDDING]))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_run_with_default_model(self):
pipeline_ins = pipeline(task=Tasks.feature_extraction)
result = pipeline_ins(input=self.sentence1)
print(np.shape(result[OutputKeys.TEXT_EMBEDDING]))
if __name__ == '__main__':
unittest.main()