mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-17 00:37:43 +01:00
The original backbone-head abstraction was not articheted well enough, the input and output parameters of backbone and head were in the form of **kwargs, which was implicit and might cause confustion. Therefore, the following adjustments were made:
原有backbone head抽象程度不够深,backbone 以及head输入输出参数为**kwargs,比较晦涩,同时很多功能无法支持扩展,因此做了如下调整:
1. Divide the basic model based on the structure to: encoder-only model, decoder-only model, single stage model, two stage model, etc., . Now, the encoder-only model was accomplished, while others are under design
2. Derive the structed task-models from the basic model structure above: a single structed task-model is mainly used to parse the backbone/head cfg, in order to apply the correct backbone or head components, some models might adjust the forward method from the basic model
3. Add the initialization parameters, input and output parameters to head class and backbone class, in order to reduce the understanding cost.
4. Remove the original nncrf class and chang it to backbone-head form with the lstm backbone and crf head.
5. Support `model = Model.from_pretrained('bert-based-fill-mask', task='text-classification')`, this method could correctly load the backbone even when the task is different from the original one in configuration.
6. Support loading the model through the transformer's automodel, in the case of quickly integrating the backbone model without coding
7. Unifiy the original task classes in each nlp model and the structed task-model classes, the structed task-model are largely reduce the redundant codes in the original task classed. Still under refactor
8. Support load model configuration from hf transformers config.json, if the model related configuration is missing. Only suppport NLP models
85 lines
3.9 KiB
Python
85 lines
3.9 KiB
Python
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
import shutil
|
|
import unittest
|
|
|
|
from modelscope.hub.snapshot_download import snapshot_download
|
|
from modelscope.models import Model
|
|
from modelscope.models.nlp import (LSTMForTokenClassificationWithCRF,
|
|
ModelForTokenClassification)
|
|
from modelscope.pipelines import pipeline
|
|
from modelscope.pipelines.nlp import TokenClassificationPipeline
|
|
from modelscope.preprocessors import \
|
|
TokenClassificationTransformersPreprocessor
|
|
from modelscope.utils.constant import Tasks
|
|
from modelscope.utils.test_utils import test_level
|
|
|
|
|
|
class PartOfSpeechTest(unittest.TestCase):
|
|
model_id = 'damo/nlp_structbert_part-of-speech_chinese-lite'
|
|
lstmcrf_news_model_id = 'damo/nlp_lstmcrf_part-of-speech_chinese-news'
|
|
sentence = '今天天气不错,适合出去游玩'
|
|
|
|
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
|
def test_run_by_direct_model_download(self):
|
|
cache_path = snapshot_download(self.model_id)
|
|
tokenizer = TokenClassificationTransformersPreprocessor(cache_path)
|
|
model = ModelForTokenClassification.from_pretrained(cache_path)
|
|
pipeline1 = TokenClassificationPipeline(model, preprocessor=tokenizer)
|
|
pipeline2 = pipeline(
|
|
Tasks.part_of_speech, model=model, preprocessor=tokenizer)
|
|
print(f'sentence: {self.sentence}\n'
|
|
f'pipeline1:{pipeline1(input=self.sentence)}')
|
|
print()
|
|
print(f'pipeline2: {pipeline2(input=self.sentence)}')
|
|
|
|
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
|
def test_run_lstmcrf_news_by_direct_model_download(self):
|
|
cache_path = snapshot_download(self.lstmcrf_news_model_id)
|
|
tokenizer = TokenClassificationTransformersPreprocessor(cache_path)
|
|
model = LSTMForTokenClassificationWithCRF.from_pretrained(cache_path)
|
|
pipeline1 = TokenClassificationPipeline(model, preprocessor=tokenizer)
|
|
pipeline2 = pipeline(
|
|
Tasks.part_of_speech, model=model, preprocessor=tokenizer)
|
|
print(f'sentence: {self.sentence}\n'
|
|
f'pipeline1:{pipeline1(input=self.sentence)}')
|
|
print()
|
|
print(f'pipeline2: {pipeline2(input=self.sentence)}')
|
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
def test_run_with_model_from_modelhub(self):
|
|
model = Model.from_pretrained(self.model_id)
|
|
tokenizer = TokenClassificationTransformersPreprocessor(
|
|
model.model_dir)
|
|
pipeline_ins = pipeline(
|
|
task=Tasks.part_of_speech, model=model, preprocessor=tokenizer)
|
|
print(pipeline_ins(input=self.sentence))
|
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
def test_run_lstmcrf_news_with_model_from_modelhub(self):
|
|
model = Model.from_pretrained(self.lstmcrf_news_model_id)
|
|
tokenizer = TokenClassificationTransformersPreprocessor(
|
|
model.model_dir)
|
|
pipeline_ins = pipeline(
|
|
task=Tasks.part_of_speech, model=model, preprocessor=tokenizer)
|
|
print(pipeline_ins(input=self.sentence))
|
|
|
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
|
def test_run_with_model_name(self):
|
|
pipeline_ins = pipeline(task=Tasks.part_of_speech, model=self.model_id)
|
|
print(pipeline_ins(input=self.sentence))
|
|
|
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
|
def test_run_lstmcrf_new_with_model_name(self):
|
|
pipeline_ins = pipeline(
|
|
task=Tasks.part_of_speech, model=self.lstmcrf_news_model_id)
|
|
print(pipeline_ins(input=self.sentence))
|
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
def test_run_with_default_model(self):
|
|
pipeline_ins = pipeline(task=Tasks.part_of_speech)
|
|
print(pipeline_ins(input=self.sentence))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|