[to #41401401] add preprocessor, model and pipeline

* add preprocessor module * add model base and builder * update task constant * add load image preprocessor and its dependency * add pipeline interface and UT covered * support default pipeline for task * add image matting pipeline * refine nlp tokenize interface * add nlp pipeline * fix UT failed * add test for Compose Link: https://code.aone.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/8769235 * add preprocessor module * add test for Compose * fix citest error * fix abs class error * add model base and builder * update task constant * add load image preprocessor and its dependency * add pipeline interface and UT covered * support default pipeline for task * refine models and pipeline interface * add pipeline folder structure * add image matting pipeline * refine nlp tokenize interface * add nlp pipeline 1.add preprossor model pipeline for nlp text classification 2. add corresponding test Link: https://code.aone.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/8757371 * new nlp pipeline * format pre-commit code * update easynlp pipeline * update model_name for easynlp pipeline; add test for maas_lib/utils/typeassert.py * update test_typeassert.py * refactor code 1. rename typeassert to type_assert 2. use lazy import to make easynlp dependency optional 3. refine image matting UT * fix linter test failed * update requirements.txt * fix UT failed * fix citest script to update requirements
2025-12-24 03:59:23 +01:00 · 2022-05-19 22:18:35 +08:00
parent 0a756f6a0d
commit 5e469008fd
39 changed files with 1053 additions and 10 deletions
--- a/maas_lib/pipelines/nlp/sequence_classification_pipeline.py
+++ b/maas_lib/pipelines/nlp/sequence_classification_pipeline.py
@@ -0,0 +1,77 @@
+import os
+import uuid
+from typing import Any, Dict
+
+import json
+import numpy as np
+
+from maas_lib.models.nlp import SequenceClassificationModel
+from maas_lib.preprocessors import SequenceClassificationPreprocessor
+from maas_lib.utils.constant import Tasks
+from ..base import Input, Pipeline
+from ..builder import PIPELINES
+
+__all__ = ['SequenceClassificationPipeline']
+
+
+@PIPELINES.register_module(
+    Tasks.text_classification, module_name=r'bert-sentiment-analysis')
+class SequenceClassificationPipeline(Pipeline):
+
+    def __init__(self, model: SequenceClassificationModel,
+                 preprocessor: SequenceClassificationPreprocessor, **kwargs):
+        """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction
+
+        Args:
+            model (SequenceClassificationModel): a model instance
+            preprocessor (SequenceClassificationPreprocessor): a preprocessor instance
+        """
+
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+
+        from easynlp.utils import io
+        self.label_path = os.path.join(model.model_dir, 'label_mapping.json')
+        with io.open(self.label_path) as f:
+            self.label_mapping = json.load(f)
+        self.label_id_to_name = {
+            idx: name
+            for name, idx in self.label_mapping.items()
+        }
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
+        """process the predict results
+
+        Args:
+            inputs (Dict[str, Any]): _description_
+
+        Returns:
+            Dict[str, str]: the predict results
+        """
+
+        probs = inputs['probabilities']
+        logits = inputs['logits']
+        predictions = np.argsort(-probs, axis=-1)
+        preds = predictions[0]
+        b = 0
+        new_result = list()
+        for pred in preds:
+            new_result.append({
+                'pred': self.label_id_to_name[pred],
+                'prob': float(probs[b][pred]),
+                'logit': float(logits[b][pred])
+            })
+        new_results = list()
+        new_results.append({
+            'id':
+            inputs['id'][b] if 'id' in inputs else str(uuid.uuid4()),
+            'output':
+            new_result,
+            'predictions':
+            new_result[0]['pred'],
+            'probabilities':
+            ','.join([str(t) for t in inputs['probabilities'][b]]),
+            'logits':
+            ','.join([str(t) for t in inputs['logits'][b]])
+        })
+
+        return new_results[0]