[to #46273042]feat: pipeline trainer stat information from snapshot_download

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10837490
2025-12-25 12:39:25 +01:00 · 2022-11-28 15:06:03 +08:00
parent d5ee8aa66d
commit a4c36a2920
59 changed files with 282 additions and 412 deletions
--- a/modelscope/models/base/base_model.py
+++ b/modelscope/models/base/base_model.py
@@ -5,10 +5,10 @@ from abc import ABC, abstractmethod
 from typing import Any, Callable, Dict, List, Optional, Union

 from modelscope.hub.snapshot_download import snapshot_download
-from modelscope.models.builder import MODELS, build_model
+from modelscope.models.builder import build_model
 from modelscope.utils.checkpoint import save_checkpoint, save_pretrained
 from modelscope.utils.config import Config
-from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile, Tasks
+from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke, ModelFile
 from modelscope.utils.device import verify_device
 from modelscope.utils.logger import get_logger

@@ -94,6 +94,10 @@ class Model(ABC):
        if prefetched is not None:
            kwargs.pop('model_prefetched')

+        invoked_by = kwargs.get(Invoke.KEY)
+        if invoked_by is not None:
+            kwargs.pop(Invoke.KEY)
+
        if osp.exists(model_name_or_path):
            local_model_dir = model_name_or_path
        else:
@@ -101,7 +105,13 @@ class Model(ABC):
                raise RuntimeError(
                    'Expecting model is pre-fetched locally, but is not found.'
                )
-            local_model_dir = snapshot_download(model_name_or_path, revision)
+
+            if invoked_by is not None:
+                invoked_by = {Invoke.KEY: invoked_by}
+            else:
+                invoked_by = {Invoke.KEY: Invoke.PRETRAINED}
+            local_model_dir = snapshot_download(
+                model_name_or_path, revision, user_agent=invoked_by)
        logger.info(f'initialize model from {local_model_dir}')
        if cfg_dict is not None:
            cfg = cfg_dict
@@ -133,6 +143,7 @@ class Model(ABC):
            model.cfg = cfg

        model.name = model_name_or_path
+        model.model_dir = local_model_dir
        return model

    def save_pretrained(self,
--- a/modelscope/models/multi_modal/clip/model.py
+++ b/modelscope/models/multi_modal/clip/model.py
@@ -509,8 +509,8 @@ def convert_weights(model: nn.Module):
@MODELS.register_module(Tasks.multi_modal_embedding, module_name=Models.clip)
 class CLIPForMultiModalEmbedding(TorchModel):

-    def __init__(self, model_dir, device_id=-1):
-        super().__init__(model_dir=model_dir, device_id=device_id)
+    def __init__(self, model_dir, *args, **kwargs):
+        super().__init__(model_dir=model_dir, *args, **kwargs)

        # Initialize the model.
        vision_model_config_file = '{}/vision_model_config.json'.format(
--- a/modelscope/models/nlp/mglm/mglm_for_text_summarization.py
+++ b/modelscope/models/nlp/mglm/mglm_for_text_summarization.py
@@ -9,7 +9,6 @@ import numpy as np
 import torch
 import torch.nn.functional as F

-from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.metainfo import Models
 from modelscope.models.base import Tensor, TorchModel
 from modelscope.models.builder import MODELS
--- a/modelscope/pipelines/base.py
+++ b/modelscope/pipelines/base.py
@@ -16,7 +16,7 @@ from modelscope.outputs import TASK_OUTPUTS
 from modelscope.pipeline_inputs import TASK_INPUTS, check_input_type
 from modelscope.preprocessors import Preprocessor
 from modelscope.utils.config import Config
-from modelscope.utils.constant import Frameworks, ModelFile
+from modelscope.utils.constant import Frameworks, Invoke, ModelFile
 from modelscope.utils.device import (create_device, device_placement,
                                     verify_device)
 from modelscope.utils.hub import read_config, snapshot_download
@@ -47,8 +47,10 @@ class Pipeline(ABC):
            logger.info(f'initiate model from location {model}.')
            # expecting model has been prefetched to local cache beforehand
            return Model.from_pretrained(
-                model, model_prefetched=True,
-                device=self.device_name) if is_model(model) else model
+                model,
+                device=self.device_name,
+                model_prefetched=True,
+                invoked_by=Invoke.PIPELINE) if is_model(model) else model
        else:
            return model

@@ -383,15 +385,12 @@ class DistributedPipeline(Pipeline):
                 preprocessor: Union[Preprocessor, List[Preprocessor]] = None,
                 auto_collate=True,
                 **kwargs):
-        self.preprocessor = preprocessor
+        super().__init__(model=model, preprocessor=preprocessor, kwargs=kwargs)
        self._model_prepare = False
        self._model_prepare_lock = Lock()
        self._auto_collate = auto_collate

-        if os.path.exists(model):
-            self.model_dir = model
-        else:
-            self.model_dir = snapshot_download(model)
+        self.model_dir = self.model.model_dir
        self.cfg = read_config(self.model_dir)
        self.world_size = self.cfg.model.world_size
        self.model_pool = None
--- a/modelscope/pipelines/builder.py
+++ b/modelscope/pipelines/builder.py
@@ -7,7 +7,7 @@ from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.metainfo import Pipelines
 from modelscope.models.base import Model
 from modelscope.utils.config import ConfigDict, check_config
-from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Tasks
+from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke, Tasks
 from modelscope.utils.hub import read_config
 from modelscope.utils.registry import Registry, build_from_cfg
 from .base import Pipeline
@@ -209,6 +209,8 @@ DEFAULT_MODEL_FOR_PIPELINE = {
    Tasks.referring_video_object_segmentation:
    (Pipelines.referring_video_object_segmentation,
     'damo/cv_swin-t_referring_video-object-segmentation'),
+    Tasks.video_summarization: (Pipelines.video_summarization,
+                                'damo/cv_googlenet_pgl-video-summarization'),
 }


@@ -220,14 +222,19 @@ def normalize_model_input(model, model_revision):
        # skip revision download if model is a local directory
        if not os.path.exists(model):
            # note that if there is already a local copy, snapshot_download will check and skip downloading
-            model = snapshot_download(model, revision=model_revision)
+            model = snapshot_download(
+                model,
+                revision=model_revision,
+                user_agent={Invoke.KEY: Invoke.PIPELINE})
    elif isinstance(model, list) and isinstance(model[0], str):
        for idx in range(len(model)):
            if is_official_hub_path(
                    model[idx],
                    model_revision) and not os.path.exists(model[idx]):
                model[idx] = snapshot_download(
-                    model[idx], revision=model_revision)
+                    model[idx],
+                    revision=model_revision,
+                    user_agent={Invoke.KEY: Invoke.PIPELINE})
    return model


--- a/modelscope/pipelines/cv/animal_recognition_pipeline.py
+++ b/modelscope/pipelines/cv/animal_recognition_pipeline.py
@@ -8,14 +8,13 @@ import torch
 from PIL import Image
 from torchvision import transforms

-from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.metainfo import Pipelines
 from modelscope.models.cv.animal_recognition import Bottleneck, ResNet
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines.base import Input, Pipeline
 from modelscope.pipelines.builder import PIPELINES
 from modelscope.preprocessors import LoadImage
-from modelscope.utils.constant import Tasks
+from modelscope.utils.constant import Devices, ModelFile, Tasks
 from modelscope.utils.logger import get_logger

 logger = get_logger()
@@ -67,15 +66,10 @@ class AnimalRecognitionPipeline(Pipeline):
            filter_param(src_params, own_state)
            model.load_state_dict(own_state)

-        self.model = resnest101(num_classes=8288)
-        local_model_dir = model
-        if osp.exists(model):
-            local_model_dir = model
-        else:
-            local_model_dir = snapshot_download(model)
-        self.local_path = local_model_dir
+        self.local_path = self.model
        src_params = torch.load(
-            osp.join(local_model_dir, 'pytorch_model.pt'), 'cpu')
+            osp.join(self.local_path, ModelFile.TORCH_MODEL_FILE), Devices.cpu)
+        self.model = resnest101(num_classes=8288)
        load_pretrained(self.model, src_params)
        logger.info('load model done')

--- a/modelscope/pipelines/cv/body_3d_keypoints_pipeline.py
+++ b/modelscope/pipelines/cv/body_3d_keypoints_pipeline.py
@@ -120,8 +120,7 @@ class Body3DKeypointsPipeline(Pipeline):
        """
        super().__init__(model=model, **kwargs)

-        self.keypoint_model_3d = model if isinstance(
-            model, BodyKeypointsDetection3D) else Model.from_pretrained(model)
+        self.keypoint_model_3d = self.model
        self.keypoint_model_3d.eval()

        # init human body 2D keypoints detection pipeline
--- a/modelscope/pipelines/cv/easycv_pipelines/base.py
+++ b/modelscope/pipelines/cv/easycv_pipelines/base.py
@@ -11,7 +11,7 @@ from PIL import ImageFile
 from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.pipelines.util import is_official_hub_path
 from modelscope.utils.config import Config
-from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile
+from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke, ModelFile
 from modelscope.utils.device import create_device


@@ -37,7 +37,9 @@ class EasyCVPipeline(object):
            assert is_official_hub_path(
                model), 'Only support local model path and official hub path!'
            model_dir = snapshot_download(
-                model_id=model, revision=DEFAULT_MODEL_REVISION)
+                model_id=model,
+                revision=DEFAULT_MODEL_REVISION,
+                user_agent={Invoke.KEY: Invoke.PIPELINE})

        assert osp.isdir(model_dir)
        model_files = glob.glob(
@@ -48,6 +50,7 @@ class EasyCVPipeline(object):

        model_path = model_files[0]
        self.model_path = model_path
+        self.model_dir = model_dir

        # get configuration file from source model dir
        self.config_file = os.path.join(model_dir, ModelFile.CONFIGURATION)
--- a/modelscope/pipelines/cv/easycv_pipelines/human_wholebody_keypoint_pipeline.py
+++ b/modelscope/pipelines/cv/easycv_pipelines/human_wholebody_keypoint_pipeline.py
@@ -24,7 +24,6 @@ class HumanWholebodyKeypointsPipeline(EasyCVPipeline):
            model (str): model id on modelscope hub or local model path.
            model_file_pattern (str): model file pattern.
        """
-        self.model_dir = model
        super(HumanWholebodyKeypointsPipeline, self).__init__(
            model=model,
            model_file_pattern=model_file_pattern,
--- a/modelscope/pipelines/cv/general_recognition_pipeline.py
+++ b/modelscope/pipelines/cv/general_recognition_pipeline.py
@@ -8,7 +8,6 @@ import torch
 from PIL import Image
 from torchvision import transforms

-from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.metainfo import Pipelines
 from modelscope.models.cv.animal_recognition import resnet
 from modelscope.outputs import OutputKeys
@@ -67,16 +66,12 @@ class GeneralRecognitionPipeline(Pipeline):
            filter_param(src_params, own_state)
            model.load_state_dict(own_state)

-        self.model = resnest101(num_classes=54092)
-        local_model_dir = model
        device = 'cpu'
-        if osp.exists(model):
-            local_model_dir = model
-        else:
-            local_model_dir = snapshot_download(model)
-        self.local_path = local_model_dir
+        self.local_path = self.model
        src_params = torch.load(
-            osp.join(local_model_dir, ModelFile.TORCH_MODEL_FILE), device)
+            osp.join(self.local_path, ModelFile.TORCH_MODEL_FILE), device)
+
+        self.model = resnest101(num_classes=54092)
        load_pretrained(self.model, src_params)
        logger.info('load model done')

--- a/modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py
+++ b/modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py
@@ -21,7 +21,6 @@ class Hand2DKeypointsPipeline(EasyCVPipeline):
            model (str): model id on modelscope hub or local model path.
            model_file_pattern (str): model file pattern.
        """
-        self.model_dir = model
        super(Hand2DKeypointsPipeline, self).__init__(
            model=model,
            model_file_pattern=model_file_pattern,
--- a/modelscope/pipelines/cv/image_classification_pipeline.py
+++ b/modelscope/pipelines/cv/image_classification_pipeline.py
@@ -1,5 +1,5 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import Any, Dict, Union
+from typing import Any, Dict, Optional, Union

 import cv2
 import numpy as np
@@ -25,22 +25,15 @@ class ImageClassificationPipeline(Pipeline):

    def __init__(self,
                 model: Union[Model, str],
-                 preprocessor: [Preprocessor] = None,
+                 preprocessor: Optional[Preprocessor] = None,
                 **kwargs):
-        super().__init__(model=model)
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
        assert isinstance(model, str) or isinstance(model, Model), \
            'model must be a single str or OfaForAllTasks'
-        if isinstance(model, str):
-            pipe_model = Model.from_pretrained(model)
-        elif isinstance(model, Model):
-            pipe_model = model
-        else:
-            raise NotImplementedError
-        pipe_model.model.eval()
-        pipe_model.to(get_device())
-        if preprocessor is None and isinstance(pipe_model, OfaForAllTasks):
-            preprocessor = OfaPreprocessor(model_dir=pipe_model.model_dir)
-        super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
+        self.model.eval()
+        self.model.to(get_device())
+        if preprocessor is None and isinstance(self.model, OfaForAllTasks):
+            self.preprocessor = OfaPreprocessor(model_dir=self.model.model_dir)

    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        return inputs
--- a/modelscope/pipelines/cv/image_color_enhance_pipeline.py
+++ b/modelscope/pipelines/cv/image_color_enhance_pipeline.py
@@ -32,10 +32,8 @@ class ImageColorEnhancePipeline(Pipeline):
        Args:
            model: model id on modelscope hub.
        """
-        model = model if isinstance(
-            model, ImageColorEnhance) else Model.from_pretrained(model)
-        model.eval()
        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        self.model.eval()

        if torch.cuda.is_available():
            self._device = torch.device('cuda')
--- a/modelscope/pipelines/cv/image_denoise_pipeline.py
+++ b/modelscope/pipelines/cv/image_denoise_pipeline.py
@@ -32,17 +32,14 @@ class ImageDenoisePipeline(Pipeline):
        Args:
            model: model id on modelscope hub.
        """
-        model = model if isinstance(
-            model, NAFNetForImageDenoise) else Model.from_pretrained(model)
-        model.eval()
        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
-        self.config = model.config
+        self.model.eval()
+        self.config = self.model.config

        if torch.cuda.is_available():
            self._device = torch.device('cuda')
        else:
            self._device = torch.device('cpu')
-        self.model = model
        logger.info('load image denoise model done')

    def preprocess(self, input: Input) -> Dict[str, Any]:
--- a/modelscope/pipelines/cv/language_guided_video_summarization_pipeline.py
+++ b/modelscope/pipelines/cv/language_guided_video_summarization_pipeline.py
@@ -44,7 +44,7 @@ class LanguageGuidedVideoSummarizationPipeline(Pipeline):
        """
        super().__init__(model=model, auto_collate=False, **kwargs)
        logger.info(f'loading model from {model}')
-        self.model_dir = model
+        self.model_dir = self.model.model_dir

        self.tmp_dir = kwargs.get('tmp_dir', None)
        if self.tmp_dir is None:
--- a/modelscope/pipelines/cv/virtual_try_on_pipeline.py
+++ b/modelscope/pipelines/cv/virtual_try_on_pipeline.py
@@ -9,7 +9,6 @@ import PIL
 import torch
 from PIL import Image

-from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.metainfo import Pipelines
 from modelscope.models.cv.virual_tryon import SDAFNet_Tryon
 from modelscope.outputs import OutputKeys
@@ -52,17 +51,12 @@ class VirtualTryonPipeline(Pipeline):
            filter_param(src_params, own_state)
            model.load_state_dict(own_state)

-        self.model = SDAFNet_Tryon(ref_in_channel=6).to(self.device)
-        local_model_dir = model
-        if osp.exists(model):
-            local_model_dir = model
-        else:
-            local_model_dir = snapshot_download(model)
-        self.local_path = local_model_dir
+        self.local_path = self.model
        src_params = torch.load(
-            osp.join(local_model_dir, ModelFile.TORCH_MODEL_FILE), 'cpu')
+            osp.join(self.local_path, ModelFile.TORCH_MODEL_FILE), 'cpu')
+        self.model = SDAFNet_Tryon(ref_in_channel=6).to(self.device)
        load_pretrained(self.model, src_params)
-        self.model = self.model.eval()
+        self.model.eval()
        self.size = 192
        from torchvision import transforms
        self.test_transforms = transforms.Compose([
--- a/modelscope/pipelines/multi_modal/image_captioning_pipeline.py
+++ b/modelscope/pipelines/multi_modal/image_captioning_pipeline.py
@@ -29,22 +29,13 @@ class ImageCaptioningPipeline(Pipeline):
        Args:
            model: model id on modelscope hub.
        """
-        super().__init__(model=model)
-        assert isinstance(model, str) or isinstance(model, Model), \
-            'model must be a single str or OfaForAllTasks'
-        if isinstance(model, str):
-            pipe_model = Model.from_pretrained(model)
-        elif isinstance(model, Model):
-            pipe_model = model
-        else:
-            raise NotImplementedError
-        pipe_model.model.eval()
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        self.model.eval()
        if preprocessor is None:
-            if isinstance(pipe_model, OfaForAllTasks):
-                preprocessor = OfaPreprocessor(pipe_model.model_dir)
-            elif isinstance(pipe_model, MPlugForAllTasks):
-                preprocessor = MPlugPreprocessor(pipe_model.model_dir)
-        super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
+            if isinstance(self.model, OfaForAllTasks):
+                self.preprocessor = OfaPreprocessor(self.model.model_dir)
+            elif isinstance(self.model, MPlugForAllTasks):
+                self.preprocessor = MPlugPreprocessor(self.model.model_dir)

    def _batch(self, data):
        if isinstance(self.model, OfaForAllTasks):
--- a/modelscope/pipelines/multi_modal/image_text_retrieval_pipeline.py
+++ b/modelscope/pipelines/multi_modal/image_text_retrieval_pipeline.py
@@ -28,19 +28,10 @@ class ImageTextRetrievalPipeline(Pipeline):
        Args:
            model: model id on modelscope hub.
        """
-        super().__init__(model=model)
-        assert isinstance(model, str) or isinstance(model, Model), \
-            f'model must be a single str or Model, but got {type(model)}'
-        if isinstance(model, str):
-            pipe_model = Model.from_pretrained(model)
-        elif isinstance(model, Model):
-            pipe_model = model
-        else:
-            raise NotImplementedError
-        pipe_model.model.eval()
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        self.model.eval()
        if preprocessor is None:
-            preprocessor = MPlugPreprocessor(pipe_model.model_dir)
-        super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
+            self.preprocessor = MPlugPreprocessor(self.model.model_dir)

    def forward(self, inputs: Dict[str, Any],
                **forward_params) -> Dict[str, Any]:
--- a/modelscope/pipelines/multi_modal/multi_modal_embedding_pipeline.py
+++ b/modelscope/pipelines/multi_modal/multi_modal_embedding_pipeline.py
@@ -28,21 +28,14 @@ class MultiModalEmbeddingPipeline(Pipeline):
        Args:
            model: model id on modelscope hub.
        """
-        if isinstance(model, str):
-            pipe_model = Model.from_pretrained(model)
-        elif isinstance(model, Model):
-            pipe_model = model
-        else:
-            raise NotImplementedError('model must be a single str')
-        pipe_model.eval()
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        self.model.eval()
        if preprocessor is None:
-            if isinstance(pipe_model, CLIPForMultiModalEmbedding):
-                preprocessor = CLIPPreprocessor(pipe_model.model_dir)
+            if isinstance(self.model, CLIPForMultiModalEmbedding):
+                self.preprocessor = CLIPPreprocessor(self.model.model_dir)
            else:
                raise NotImplementedError

-        super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
-
    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
        return self.model(self.preprocess(input))

--- a/modelscope/pipelines/multi_modal/ocr_recognition_pipeline.py
+++ b/modelscope/pipelines/multi_modal/ocr_recognition_pipeline.py
@@ -28,20 +28,11 @@ class OcrRecognitionPipeline(Pipeline):
        Args:
            model: model id on modelscope hub.
        """
-        super().__init__(model=model)
-        assert isinstance(model, str) or isinstance(model, Model), \
-            'model must be a single str or OfaForAllTasks'
-        if isinstance(model, str):
-            pipe_model = Model.from_pretrained(model)
-        elif isinstance(model, Model):
-            pipe_model = model
-        else:
-            raise NotImplementedError
-        pipe_model.model.eval()
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        self.model.eval()
        if preprocessor is None:
-            if isinstance(pipe_model, OfaForAllTasks):
-                preprocessor = OfaPreprocessor(pipe_model.model_dir)
-        super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
+            if isinstance(self.model, OfaForAllTasks):
+                self.preprocessor = OfaPreprocessor(self.model.model_dir)

    def forward(self, inputs: Dict[str, Any],
                **forward_params) -> Dict[str, Any]:
--- a/modelscope/pipelines/multi_modal/text_to_image_synthesis_pipeline.py
+++ b/modelscope/pipelines/multi_modal/text_to_image_synthesis_pipeline.py
@@ -31,18 +31,10 @@ class TextToImageSynthesisPipeline(Pipeline):
        Args:
            model: model id on modelscope hub.
        """
-        device_id = 0 if torch.cuda.is_available() else -1
-        if isinstance(model, str):
-            pipe_model = Model.from_pretrained(model, device_id=device_id)
-        elif isinstance(model, Model):
-            pipe_model = model
-        else:
-            raise NotImplementedError(
-                f'expecting a Model instance or str, but get {type(model)}.')
-        if preprocessor is None and isinstance(pipe_model,
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        if preprocessor is None and isinstance(self.model,
                                               OfaForTextToImageSynthesis):
-            preprocessor = OfaPreprocessor(pipe_model.model_dir)
-        super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
+            self.preprocessor = OfaPreprocessor(self.model.model_dir)

    def preprocess(self, input: Input, **preprocess_params) -> Dict[str, Any]:
        if self.preprocessor is not None:
--- a/modelscope/pipelines/multi_modal/visual_entailment_pipeline.py
+++ b/modelscope/pipelines/multi_modal/visual_entailment_pipeline.py
@@ -1,5 +1,5 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import Any, Dict, Union
+from typing import Any, Dict, Optional, Union

 from modelscope.metainfo import Pipelines
 from modelscope.models.multi_modal import OfaForAllTasks
@@ -18,26 +18,17 @@ class VisualEntailmentPipeline(Pipeline):

    def __init__(self,
                 model: Union[Model, str],
-                 preprocessor: [Preprocessor] = None,
+                 preprocessor: Optional[Preprocessor] = None,
                 **kwargs):
        """
        use `model` and `preprocessor` to create a visual entailment pipeline for prediction
        Args:
            model: model id on modelscope hub.
        """
-        super().__init__(model=model)
-        assert isinstance(model, str) or isinstance(model, Model), \
-            'model must be a single str or OfaForAllTasks'
-        if isinstance(model, str):
-            pipe_model = Model.from_pretrained(model)
-        elif isinstance(model, Model):
-            pipe_model = model
-        else:
-            raise NotImplementedError
-        pipe_model.model.eval()
-        if preprocessor is None and isinstance(pipe_model, OfaForAllTasks):
-            preprocessor = OfaPreprocessor(model_dir=pipe_model.model_dir)
-        super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        self.model.eval()
+        if preprocessor is None and isinstance(self.model, OfaForAllTasks):
+            self.preprocessor = OfaPreprocessor(model_dir=self.model.model_dir)

    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        return inputs
--- a/modelscope/pipelines/multi_modal/visual_grounding_pipeline.py
+++ b/modelscope/pipelines/multi_modal/visual_grounding_pipeline.py
@@ -1,5 +1,5 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import Any, Dict, Union
+from typing import Any, Dict, Optional, Union

 from modelscope.metainfo import Pipelines
 from modelscope.models.multi_modal import OfaForAllTasks
@@ -18,26 +18,17 @@ class VisualGroundingPipeline(Pipeline):

    def __init__(self,
                 model: Union[Model, str],
-                 preprocessor: [Preprocessor] = None,
+                 preprocessor: Optional[Preprocessor] = None,
                 **kwargs):
        """
        use `model` and `preprocessor` to create a visual grounding pipeline for prediction
        Args:
            model: model id on modelscope hub.
        """
-        super().__init__(model=model)
-        assert isinstance(model, str) or isinstance(model, Model), \
-            'model must be a single str or OfaForAllTasks'
-        if isinstance(model, str):
-            pipe_model = Model.from_pretrained(model)
-        elif isinstance(model, Model):
-            pipe_model = model
-        else:
-            raise NotImplementedError
-        pipe_model.model.eval()
-        if preprocessor is None and isinstance(pipe_model, OfaForAllTasks):
-            preprocessor = OfaPreprocessor(model_dir=pipe_model.model_dir)
-        super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        self.model.model.eval()
+        if preprocessor is None and isinstance(self.model, OfaForAllTasks):
+            self.preprocessor = OfaPreprocessor(model_dir=self.model.model_dir)

    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        return inputs
--- a/modelscope/pipelines/multi_modal/visual_question_answering_pipeline.py
+++ b/modelscope/pipelines/multi_modal/visual_question_answering_pipeline.py
@@ -31,15 +31,13 @@ class VisualQuestionAnsweringPipeline(Pipeline):
            model (MPlugForVisualQuestionAnswering): a model instance
            preprocessor (MPlugVisualQuestionAnsweringPreprocessor): a preprocessor instance
        """
-        model = model if isinstance(model,
-                                    Model) else Model.from_pretrained(model)
-        if preprocessor is None:
-            if isinstance(model, OfaForAllTasks):
-                preprocessor = OfaPreprocessor(model.model_dir)
-            elif isinstance(model, MPlugForAllTasks):
-                preprocessor = MPlugPreprocessor(model.model_dir)
-        model.model.eval()
        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        if preprocessor is None:
+            if isinstance(self.model, OfaForAllTasks):
+                self.preprocessor = OfaPreprocessor(self.model.model_dir)
+            elif isinstance(self.model, MPlugForAllTasks):
+                self.preprocessor = MPlugPreprocessor(self.model.model_dir)
+        self.model.eval()

    def forward(self, inputs: Dict[str, Any],
                **forward_params) -> Dict[str, Any]:
--- a/modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py
+++ b/modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py
@@ -32,12 +32,10 @@ class ConversationalTextToSqlPipeline(Pipeline):
            preprocessor (ConversationalTextToSqlPreprocessor):
                a preprocessor instance
        """
-        model = model if isinstance(
-            model, StarForTextToSql) else Model.from_pretrained(model)
-        if preprocessor is None:
-            preprocessor = ConversationalTextToSqlPreprocessor(model.model_dir)
-
        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        if preprocessor is None:
+            self.preprocessor = ConversationalTextToSqlPreprocessor(
+                self.model.model_dir)

    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
        """process the prediction results
--- a/modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py
+++ b/modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py
@@ -30,13 +30,11 @@ class DialogIntentPredictionPipeline(Pipeline):
            or a SpaceForDialogIntent instance.
            preprocessor (DialogIntentPredictionPreprocessor): An optional preprocessor instance.
        """
-        model = model if isinstance(
-            model, SpaceForDialogIntent) else Model.from_pretrained(model)
-        if preprocessor is None:
-            preprocessor = DialogIntentPredictionPreprocessor(model.model_dir)
-        self.model = model
        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
-        self.categories = preprocessor.categories
+        if preprocessor is None:
+            self.preprocessor = DialogIntentPredictionPreprocessor(
+                self.model.model_dir)
+        self.categories = self.preprocessor.categories

    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
        """process the prediction results
--- a/modelscope/pipelines/nlp/dialog_modeling_pipeline.py
+++ b/modelscope/pipelines/nlp/dialog_modeling_pipeline.py
@@ -29,13 +29,10 @@ class DialogModelingPipeline(Pipeline):
            or a SpaceForDialogModeling instance.
            preprocessor (DialogModelingPreprocessor): An optional preprocessor instance.
        """
-        model = model if isinstance(
-            model, SpaceForDialogModeling) else Model.from_pretrained(model)
-        self.model = model
-        if preprocessor is None:
-            preprocessor = DialogModelingPreprocessor(model.model_dir)
        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
-        self.preprocessor = preprocessor
+        if preprocessor is None:
+            self.preprocessor = DialogModelingPreprocessor(
+                self.model.model_dir)

    def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, str]:
        """process the prediction results
--- a/modelscope/pipelines/nlp/dialog_state_tracking_pipeline.py
+++ b/modelscope/pipelines/nlp/dialog_state_tracking_pipeline.py
@@ -31,16 +31,13 @@ class DialogStateTrackingPipeline(Pipeline):
            from the model hub, or a SpaceForDialogStateTracking instance.
            preprocessor (DialogStateTrackingPreprocessor): An optional preprocessor instance.
        """
-
-        model = model if isinstance(
-            model, SpaceForDST) else Model.from_pretrained(model)
-        self.model = model
-        if preprocessor is None:
-            preprocessor = DialogStateTrackingPreprocessor(model.model_dir)
        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        if preprocessor is None:
+            self.preprocessor = DialogStateTrackingPreprocessor(
+                self.model.model_dir)

-        self.tokenizer = preprocessor.tokenizer
-        self.config = preprocessor.config
+        self.tokenizer = self.preprocessor.tokenizer
+        self.config = self.preprocessor.config

    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
        """process the prediction results
--- a/modelscope/pipelines/nlp/document_segmentation_pipeline.py
+++ b/modelscope/pipelines/nlp/document_segmentation_pipeline.py
@@ -31,27 +31,22 @@ class DocumentSegmentationPipeline(Pipeline):
                 model: Union[Model, str],
                 preprocessor: DocumentSegmentationPreprocessor = None,
                 **kwargs):
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)

-        model = model if isinstance(model,
-                                    Model) else Model.from_pretrained(model)
-
-        self.model_dir = model.model_dir
-        self.model_cfg = model.forward()
+        self.model_dir = self.model.model_dir
+        self.model_cfg = self.model.forward()

        if self.model_cfg['type'] == 'bert':
-            config = BertConfig.from_pretrained(model.model_dir, num_labels=2)
+            config = BertConfig.from_pretrained(self.model_dir, num_labels=2)
        elif self.model_cfg['type'] == 'ponet':
-            config = PoNetConfig.from_pretrained(model.model_dir, num_labels=2)
+            config = PoNetConfig.from_pretrained(self.model_dir, num_labels=2)

-        self.document_segmentation_model = model.build_with_config(
+        self.document_segmentation_model = self.model.build_with_config(
            config=config)

        if preprocessor is None:
-            preprocessor = DocumentSegmentationPreprocessor(
-                self.model_dir, config)
-        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
-
-        self.preprocessor = preprocessor
+            self.preprocessor = DocumentSegmentationPreprocessor(
+                self.model.model_dir, config)

    def __call__(
            self, documents: Union[List[List[str]], List[str],
--- a/modelscope/pipelines/nlp/faq_question_answering_pipeline.py
+++ b/modelscope/pipelines/nlp/faq_question_answering_pipeline.py
@@ -21,12 +21,10 @@ class FaqQuestionAnsweringPipeline(Pipeline):
                 model: Union[str, Model],
                 preprocessor: Preprocessor = None,
                 **kwargs):
-        model = Model.from_pretrained(model) if isinstance(model,
-                                                           str) else model
-        if preprocessor is None:
-            preprocessor = Preprocessor.from_pretrained(
-                model.model_dir, **kwargs)
        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        if preprocessor is None:
+            self.preprocessor = Preprocessor.from_pretrained(
+                self.model.model_dir, **kwargs)

    def _sanitize_parameters(self, **pipeline_parameters):
        return pipeline_parameters, pipeline_parameters, pipeline_parameters
@@ -37,11 +35,11 @@ class FaqQuestionAnsweringPipeline(Pipeline):
        sentence_vecs = sentence_vecs.detach().tolist()
        return sentence_vecs

-    def forward(self, inputs: [list, Dict[str, Any]],
+    def forward(self, inputs: Union[list, Dict[str, Any]],
                **forward_params) -> Dict[str, Any]:
        return self.model(inputs)

-    def postprocess(self, inputs: [list, Dict[str, Any]],
+    def postprocess(self, inputs: Union[list, Dict[str, Any]],
                    **postprocess_params) -> Dict[str, Any]:
        scores = inputs['scores']
        labels = []
--- a/modelscope/pipelines/nlp/feature_extraction_pipeline.py
+++ b/modelscope/pipelines/nlp/feature_extraction_pipeline.py
@@ -46,21 +46,18 @@ class FeatureExtractionPipeline(Pipeline):


        """
-        model = model if isinstance(model,
-                                    Model) else Model.from_pretrained(model)
-
-        if preprocessor is None:
-            preprocessor = NLPPreprocessor(
-                model.model_dir,
-                padding=kwargs.pop('padding', False),
-                sequence_length=kwargs.pop('sequence_length', 128))
-        model.eval()
        super().__init__(model=model, preprocessor=preprocessor, **kwargs)

-        self.preprocessor = preprocessor
+        if preprocessor is None:
+            self.preprocessor = NLPPreprocessor(
+                self.model.model_dir,
+                padding=kwargs.pop('padding', False),
+                sequence_length=kwargs.pop('sequence_length', 128))
+        self.model.eval()
+
        self.config = Config.from_file(
-            os.path.join(model.model_dir, ModelFile.CONFIGURATION))
-        self.tokenizer = preprocessor.tokenizer
+            os.path.join(self.model.model_dir, ModelFile.CONFIGURATION))
+        self.tokenizer = self.preprocessor.tokenizer

    def forward(self, inputs: Dict[str, Any],
                **forward_params) -> Dict[str, Any]:
--- a/modelscope/pipelines/nlp/fill_mask_pipeline.py
+++ b/modelscope/pipelines/nlp/fill_mask_pipeline.py
@@ -53,22 +53,18 @@ class FillMaskPipeline(Pipeline):
            If the xlm-roberta(xlm-roberta, veco, etc.) based model is used, the mask token is '<mask>'.
            To view other examples plese check the tests/pipelines/test_fill_mask.py.
        """
-
-        fill_mask_model = Model.from_pretrained(model) if isinstance(
-            model, str) else model
-
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
        if preprocessor is None:
-            preprocessor = Preprocessor.from_pretrained(
-                fill_mask_model.model_dir,
+            self.preprocessor = Preprocessor.from_pretrained(
+                self.model.model_dir,
                first_sequence=first_sequence,
                second_sequence=None,
                sequence_length=kwargs.pop('sequence_length', 128))
-        fill_mask_model.eval()
-        assert hasattr(
-            preprocessor, 'mask_id'
-        ), 'The input preprocessor should have the mask_id attribute.'
-        super().__init__(
-            model=fill_mask_model, preprocessor=preprocessor, **kwargs)
+            assert hasattr(
+                self.preprocessor, 'mask_id'
+            ), 'The input preprocessor should have the mask_id attribute.'
+
+        self.model.eval()

    def forward(self, inputs: Dict[str, Any],
                **forward_params) -> Dict[str, Any]:
--- a/modelscope/pipelines/nlp/information_extraction_pipeline.py
+++ b/modelscope/pipelines/nlp/information_extraction_pipeline.py
@@ -25,15 +25,12 @@ class InformationExtractionPipeline(Pipeline):
                 model: Union[Model, str],
                 preprocessor: Optional[Preprocessor] = None,
                 **kwargs):
-
-        model = model if isinstance(model,
-                                    Model) else Model.from_pretrained(model)
-        if preprocessor is None:
-            preprocessor = RelationExtractionPreprocessor(
-                model.model_dir,
-                sequence_length=kwargs.pop('sequence_length', 512))
-        model.eval()
        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        if preprocessor is None:
+            self.preprocessor = RelationExtractionPreprocessor(
+                self.model.model_dir,
+                sequence_length=kwargs.pop('sequence_length', 512))
+        self.model.eval()

    def forward(self, inputs: Dict[str, Any],
                **forward_params) -> Dict[str, Any]:
--- a/modelscope/pipelines/nlp/mglm_text_summarization_pipeline.py
+++ b/modelscope/pipelines/nlp/mglm_text_summarization_pipeline.py
@@ -21,7 +21,7 @@ class MGLMTextSummarizationPipeline(Pipeline):

    def __init__(self,
                 model: Union[MGLMForTextSummarization, str],
-                 preprocessor: [Preprocessor] = None,
+                 preprocessor: Optional[Preprocessor] = None,
                 *args,
                 **kwargs):
        model = MGLMForTextSummarization(model) if isinstance(model,
--- a/modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
+++ b/modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
@@ -50,15 +50,12 @@ class NamedEntityRecognitionPipeline(TokenClassificationPipeline):

            To view other examples plese check the tests/pipelines/test_named_entity_recognition.py.
        """
-
-        model = model if isinstance(model,
-                                    Model) else Model.from_pretrained(model)
-        if preprocessor is None:
-            preprocessor = TokenClassificationPreprocessor(
-                model.model_dir,
-                sequence_length=kwargs.pop('sequence_length', 128))
-        model.eval()
        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        if preprocessor is None:
+            self.preprocessor = TokenClassificationPreprocessor(
+                self.model.model_dir,
+                sequence_length=kwargs.pop('sequence_length', 128))
+        self.model.eval()
        self.id2label = kwargs.get('id2label')
        if self.id2label is None and hasattr(self.preprocessor, 'id2label'):
            self.id2label = self.preprocessor.id2label
@@ -73,13 +70,11 @@ class NamedEntityRecognitionThaiPipeline(NamedEntityRecognitionPipeline):
                 model: Union[Model, str],
                 preprocessor: Optional[Preprocessor] = None,
                 **kwargs):
-        model = model if isinstance(model,
-                                    Model) else Model.from_pretrained(model)
-        if preprocessor is None:
-            preprocessor = NERPreprocessorThai(
-                model.model_dir,
-                sequence_length=kwargs.pop('sequence_length', 512))
        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        if preprocessor is None:
+            self.preprocessor = NERPreprocessorThai(
+                self.model.model_dir,
+                sequence_length=kwargs.pop('sequence_length', 512))


@PIPELINES.register_module(
@@ -91,10 +86,8 @@ class NamedEntityRecognitionVietPipeline(NamedEntityRecognitionPipeline):
                 model: Union[Model, str],
                 preprocessor: Optional[Preprocessor] = None,
                 **kwargs):
-        model = model if isinstance(model,
-                                    Model) else Model.from_pretrained(model)
-        if preprocessor is None:
-            preprocessor = NERPreprocessorViet(
-                model.model_dir,
-                sequence_length=kwargs.pop('sequence_length', 512))
        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        if preprocessor is None:
+            self.preprocessor = NERPreprocessorViet(
+                self.model.model_dir,
+                sequence_length=kwargs.pop('sequence_length', 512))
--- a/modelscope/pipelines/nlp/sentence_embedding_pipeline.py
+++ b/modelscope/pipelines/nlp/sentence_embedding_pipeline.py
@@ -32,14 +32,13 @@ class SentenceEmbeddingPipeline(Pipeline):
            the model if supplied.
            sequence_length: Max sequence length in the user's custom scenario. 128 will be used as a default value.
        """
-        model = Model.from_pretrained(model) if isinstance(model,
-                                                           str) else model
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
        if preprocessor is None:
-            preprocessor = Preprocessor.from_pretrained(
-                model.model_dir if isinstance(model, Model) else model,
+            self.preprocessor = Preprocessor.from_pretrained(
+                self.model.model_dir
+                if isinstance(self.model, Model) else model,
                first_sequence=first_sequence,
                sequence_length=kwargs.pop('sequence_length', 128))
-        super().__init__(model=model, preprocessor=preprocessor, **kwargs)

    def forward(self, inputs: Dict[str, Any],
                **forward_params) -> Dict[str, Any]:
--- a/modelscope/pipelines/nlp/summarization_pipeline.py
+++ b/modelscope/pipelines/nlp/summarization_pipeline.py
@@ -1,5 +1,5 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-from typing import Any, Dict, Union
+from typing import Any, Dict, Optional, Union

 from modelscope.metainfo import Pipelines
 from modelscope.models.multi_modal import OfaForAllTasks
@@ -18,7 +18,7 @@ class SummarizationPipeline(Pipeline):

    def __init__(self,
                 model: Union[Model, str],
-                 preprocessor: [Preprocessor] = None,
+                 preprocessor: Optional[Preprocessor] = None,
                 **kwargs):
        """Use `model` and `preprocessor` to create a Summarization pipeline for prediction.

@@ -27,19 +27,10 @@ class SummarizationPipeline(Pipeline):
            or a model id from the model hub, or a model instance.
            preprocessor (Preprocessor): An optional preprocessor instance.
        """
-        super().__init__(model=model)
-        assert isinstance(model, str) or isinstance(model, Model), \
-            'model must be a single str or OfaForAllTasks'
-        if isinstance(model, str):
-            pipe_model = Model.from_pretrained(model)
-        elif isinstance(model, Model):
-            pipe_model = model
-        else:
-            raise NotImplementedError
-        pipe_model.model.eval()
-        if preprocessor is None and isinstance(pipe_model, OfaForAllTasks):
-            preprocessor = OfaPreprocessor(model_dir=pipe_model.model_dir)
-        super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        self.model.eval()
+        if preprocessor is None and isinstance(self.model, OfaForAllTasks):
+            self.preprocessor = OfaPreprocessor(model_dir=self.model.model_dir)

    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        return inputs
--- a/modelscope/pipelines/nlp/table_question_answering_pipeline.py
+++ b/modelscope/pipelines/nlp/table_question_answering_pipeline.py
@@ -41,21 +41,22 @@ class TableQuestionAnsweringPipeline(Pipeline):
            preprocessor (TableQuestionAnsweringPreprocessor): a preprocessor instance
            db (Database): a database to store tables in the database
        """
-        model = model if isinstance(
-            model, TableQuestionAnswering) else Model.from_pretrained(model)
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
        if preprocessor is None:
-            preprocessor = TableQuestionAnsweringPreprocessor(model.model_dir)
+            self.preprocessor = TableQuestionAnsweringPreprocessor(
+                self.model.model_dir)

        # initilize tokenizer
        self.tokenizer = BertTokenizer(
-            os.path.join(model.model_dir, ModelFile.VOCAB_FILE))
+            os.path.join(self.model.model_dir, ModelFile.VOCAB_FILE))

        # initialize database
        if db is None:
            self.db = Database(
                tokenizer=self.tokenizer,
-                table_file_path=os.path.join(model.model_dir, 'table.json'),
-                syn_dict_file_path=os.path.join(model.model_dir,
+                table_file_path=os.path.join(self.model.model_dir,
+                                             'table.json'),
+                syn_dict_file_path=os.path.join(self.model.model_dir,
                                                'synonym.txt'))
        else:
            self.db = db
@@ -71,8 +72,6 @@ class TableQuestionAnsweringPipeline(Pipeline):
        self.schema_link_dict = constant.schema_link_dict
        self.limit_dict = constant.limit_dict

-        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
-
    def post_process_multi_turn(self, history_sql, result, table):
        action = self.action_ops[result['action']]
        headers = table['header_name']
--- a/modelscope/pipelines/nlp/text2text_generation_pipeline.py
+++ b/modelscope/pipelines/nlp/text2text_generation_pipeline.py
@@ -63,16 +63,14 @@ class Text2TextGenerationPipeline(Pipeline):

            To view other examples plese check the tests/pipelines/test_text_generation.py.
        """
-        model = model if isinstance(model,
-                                    Model) else Model.from_pretrained(model)
-        if preprocessor is None:
-            preprocessor = Text2TextGenerationPreprocessor(
-                model.model_dir,
-                sequence_length=kwargs.pop('sequence_length', 128))
-        self.tokenizer = preprocessor.tokenizer
-        self.pipeline = model.pipeline.type
-        model.eval()
        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        if preprocessor is None:
+            self.preprocessor = Text2TextGenerationPreprocessor(
+                self.model.model_dir,
+                sequence_length=kwargs.pop('sequence_length', 128))
+        self.tokenizer = self.preprocessor.tokenizer
+        self.pipeline = self.model.pipeline.type
+        self.model.eval()

    def preprocess(self, inputs: Input, **preprocess_params) -> Dict[str, Any]:
        """ Provide specific preprocess for text2text generation pipeline in order to handl multi tasks
--- a/modelscope/pipelines/nlp/text_classification_pipeline.py
+++ b/modelscope/pipelines/nlp/text_classification_pipeline.py
@@ -53,25 +53,24 @@ class TextClassificationPipeline(Pipeline):
        NOTE: Inputs of type 'str' are also supported. In this scenario, the 'first_sequence' and 'second_sequence'
            param will have no affection.
        """
-        model = Model.from_pretrained(model) if isinstance(model,
-                                                           str) else model
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)

        if preprocessor is None:
-            if model.__class__.__name__ == 'OfaForAllTasks':
-                preprocessor = Preprocessor.from_pretrained(
-                    model_name_or_path=model.model_dir,
+            if self.model.__class__.__name__ == 'OfaForAllTasks':
+                self.preprocessor = Preprocessor.from_pretrained(
+                    model_name_or_path=self.model.model_dir,
                    type=Preprocessors.ofa_tasks_preprocessor,
                    field=Fields.multi_modal)
            else:
                first_sequence = kwargs.pop('first_sequence', 'first_sequence')
                second_sequence = kwargs.pop('second_sequence', None)
-                preprocessor = Preprocessor.from_pretrained(
-                    model if isinstance(model, str) else model.model_dir,
+                self.preprocessor = Preprocessor.from_pretrained(
+                    self.model
+                    if isinstance(self.model, str) else self.model.model_dir,
                    first_sequence=first_sequence,
                    second_sequence=second_sequence,
                    sequence_length=kwargs.pop('sequence_length', 512))

-        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
        self.id2label = kwargs.get('id2label')
        if self.id2label is None and hasattr(self.preprocessor, 'id2label'):
            self.id2label = self.preprocessor.id2label
--- a/modelscope/pipelines/nlp/text_error_correction_pipeline.py
+++ b/modelscope/pipelines/nlp/text_error_correction_pipeline.py
@@ -40,15 +40,13 @@ class TextErrorCorrectionPipeline(Pipeline):

        To view other examples plese check the tests/pipelines/test_text_error_correction.py.
        """
-
-        model = model if isinstance(
-            model,
-            BartForTextErrorCorrection) else Model.from_pretrained(model)
-        if preprocessor is None:
-            preprocessor = TextErrorCorrectionPreprocessor(model.model_dir)
-        self.vocab = preprocessor.vocab
        super().__init__(model=model, preprocessor=preprocessor, **kwargs)

+        if preprocessor is None:
+            self.preprocessor = TextErrorCorrectionPreprocessor(
+                self.model.model_dir)
+        self.vocab = self.preprocessor.vocab
+
    def forward(self, inputs: Dict[str, Any],
                **forward_params) -> Dict[str, Any]:
        with torch.no_grad():
--- a/modelscope/pipelines/nlp/text_generation_pipeline.py
+++ b/modelscope/pipelines/nlp/text_generation_pipeline.py
@@ -51,15 +51,14 @@ class TextGenerationPipeline(Pipeline):

            To view other examples plese check the tests/pipelines/test_text_generation.py.
        """
-        model = model if isinstance(model,
-                                    Model) else Model.from_pretrained(model)
-        cfg = read_config(model.model_dir)
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        cfg = read_config(self.model.model_dir)
        self.postprocessor = cfg.pop('postprocessor', 'decode')
        if preprocessor is None:
            preprocessor_cfg = cfg.preprocessor
            preprocessor_cfg.update({
                'model_dir':
-                model.model_dir,
+                self.model.model_dir,
                'first_sequence':
                first_sequence,
                'second_sequence':
@@ -67,9 +66,9 @@ class TextGenerationPipeline(Pipeline):
                'sequence_length':
                kwargs.pop('sequence_length', 128)
            })
-            preprocessor = build_preprocessor(preprocessor_cfg, Fields.nlp)
-        model.eval()
-        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+            self.preprocessor = build_preprocessor(preprocessor_cfg,
+                                                   Fields.nlp)
+        self.model.eval()

    def _sanitize_parameters(self, **pipeline_parameters):
        return {}, pipeline_parameters, {}
--- a/modelscope/pipelines/nlp/text_ranking_pipeline.py
+++ b/modelscope/pipelines/nlp/text_ranking_pipeline.py
@@ -32,14 +32,12 @@ class TextRankingPipeline(Pipeline):
            the model if supplied.
            sequence_length: Max sequence length in the user's custom scenario. 128 will be used as a default value.
        """
-        model = Model.from_pretrained(model) if isinstance(model,
-                                                           str) else model
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)

        if preprocessor is None:
-            preprocessor = Preprocessor.from_pretrained(
-                model.model_dir,
+            self.preprocessor = Preprocessor.from_pretrained(
+                self.model.model_dir,
                sequence_length=kwargs.pop('sequence_length', 128))
-        super().__init__(model=model, preprocessor=preprocessor, **kwargs)

    def forward(self, inputs: Dict[str, Any],
                **forward_params) -> Dict[str, Any]:
--- a/modelscope/pipelines/nlp/token_classification_pipeline.py
+++ b/modelscope/pipelines/nlp/token_classification_pipeline.py
@@ -39,15 +39,14 @@ class TokenClassificationPipeline(Pipeline):
            model (str or Model): A model instance or a model local dir or a model id in the model hub.
            preprocessor (Preprocessor): a preprocessor instance, must not be None.
        """
-        model = Model.from_pretrained(model) if isinstance(model,
-                                                           str) else model
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)

        if preprocessor is None:
-            preprocessor = Preprocessor.from_pretrained(
-                model.model_dir,
+            self.preprocessor = Preprocessor.from_pretrained(
+                self.model.model_dir,
                sequence_length=kwargs.pop('sequence_length', 128))
-        model.eval()
-        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        self.model.eval()
+
        self.id2label = kwargs.get('id2label')
        if self.id2label is None and hasattr(self.preprocessor, 'id2label'):
            self.id2label = self.preprocessor.id2label
--- a/modelscope/pipelines/nlp/translation_quality_estimation_pipeline.py
+++ b/modelscope/pipelines/nlp/translation_quality_estimation_pipeline.py
@@ -27,10 +27,10 @@ class TranslationQualityEstimationPipeline(Pipeline):

    def __init__(self, model: str, device: str = 'gpu', **kwargs):
        super().__init__(model=model, device=device)
-        model_file = os.path.join(model, ModelFile.TORCH_MODEL_FILE)
+        model_file = os.path.join(self.model, ModelFile.TORCH_MODEL_FILE)
        with open(model_file, 'rb') as f:
            buffer = io.BytesIO(f.read())
-        self.tokenizer = XLMRobertaTokenizer.from_pretrained(model)
+        self.tokenizer = XLMRobertaTokenizer.from_pretrained(self.model)
        self.model = torch.jit.load(
            buffer, map_location=self.device).to(self.device)

--- a/modelscope/pipelines/nlp/word_segmentation_pipeline.py
+++ b/modelscope/pipelines/nlp/word_segmentation_pipeline.py
@@ -49,14 +49,13 @@ class WordSegmentationPipeline(TokenClassificationPipeline):

            To view other examples plese check the tests/pipelines/test_word_segmentation.py.
        """
-        model = model if isinstance(model,
-                                    Model) else Model.from_pretrained(model)
-        if preprocessor is None:
-            preprocessor = TokenClassificationPreprocessor(
-                model.model_dir,
-                sequence_length=kwargs.pop('sequence_length', 128))
-        model.eval()
        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        if preprocessor is None:
+            self.preprocessor = TokenClassificationPreprocessor(
+                self.model.model_dir,
+                sequence_length=kwargs.pop('sequence_length', 128))
+        self.model.eval()
+
        self.id2label = kwargs.get('id2label')
        if self.id2label is None and hasattr(self.preprocessor, 'id2label'):
            self.id2label = self.preprocessor.id2label
--- a/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
+++ b/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
@@ -59,16 +59,14 @@ class ZeroShotClassificationPipeline(Pipeline):
        """
        assert isinstance(model, str) or isinstance(model, Model), \
            'model must be a single str or Model'
-        model = model if isinstance(model,
-                                    Model) else Model.from_pretrained(model)
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
        self.entailment_id = 0
        self.contradiction_id = 2
        if preprocessor is None:
-            preprocessor = ZeroShotClassificationPreprocessor(
-                model.model_dir,
+            self.preprocessor = ZeroShotClassificationPreprocessor(
+                self.model.model_dir,
                sequence_length=kwargs.pop('sequence_length', 512))
-        model.eval()
-        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        self.model.eval()

    def _sanitize_parameters(self, **kwargs):
        preprocess_params = {}
--- a/modelscope/pipelines/science/protein_structure_pipeline.py
+++ b/modelscope/pipelines/science/protein_structure_pipeline.py
@@ -105,22 +105,16 @@ class ProteinStructurePipeline(Pipeline):
            >>> print(pipeline_ins(protein))

        """
-        import copy
-        model_path = copy.deepcopy(model) if isinstance(model, str) else None
-        cfg = read_config(model_path)  # only model is str
-        self.cfg = cfg
-        self.config = model_config(
-            cfg['pipeline']['model_name'])  # alphafold config
-        model = model if isinstance(
-            model, Model) else Model.from_pretrained(model_path)
-        self.postprocessor = cfg.pop('postprocessor', None)
-        if preprocessor is None:
-            preprocessor_cfg = cfg.preprocessor
-            preprocessor = build_preprocessor(preprocessor_cfg, Fields.science)
-        model.eval()
-        model.model.inference_mode()
-        model.model_dir = model_path
        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        self.cfg = read_config(self.model.model_dir)
+        self.config = model_config(
+            self.cfg['pipeline']['model_name'])  # alphafold config
+        self.postprocessor = self.cfg.pop('postprocessor', None)
+        if preprocessor is None:
+            preprocessor_cfg = self.cfg.preprocessor
+            self.preprocessor = build_preprocessor(preprocessor_cfg,
+                                                   Fields.science)
+        self.model.eval()

    def _sanitize_parameters(self, **pipeline_parameters):
        return pipeline_parameters, pipeline_parameters, pipeline_parameters
--- a/modelscope/preprocessors/base.py
+++ b/modelscope/preprocessors/base.py
@@ -6,7 +6,8 @@ from typing import Any, Dict, Optional, Sequence

 from modelscope.metainfo import Models, Preprocessors
 from modelscope.utils.config import Config, ConfigDict
-from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModeKeys, Tasks
+from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, Invoke,
+                                       ModeKeys, Tasks)
 from modelscope.utils.hub import read_config, snapshot_download
 from modelscope.utils.logger import get_logger
 from .builder import build_preprocessor
@@ -194,7 +195,9 @@ class Preprocessor(ABC):
        """
        if not os.path.exists(model_name_or_path):
            model_dir = snapshot_download(
-                model_name_or_path, revision=revision)
+                model_name_or_path,
+                revision=revision,
+                user_agent={Invoke.KEY: Invoke.PREPROCESSOR})
        else:
            model_dir = model_name_or_path
        if cfg_dict is None:
--- a/modelscope/preprocessors/multi_modal.py
+++ b/modelscope/preprocessors/multi_modal.py
@@ -14,7 +14,8 @@ from modelscope.metainfo import Preprocessors
 from modelscope.pipelines.base import Input
 from modelscope.preprocessors import load_image
 from modelscope.utils.config import Config
-from modelscope.utils.constant import Fields, ModeKeys, ModelFile, Tasks
+from modelscope.utils.constant import (Fields, Invoke, ModeKeys, ModelFile,
+                                       Tasks)
 from .base import Preprocessor
 from .builder import PREPROCESSORS
 from .ofa import *  # noqa
@@ -57,7 +58,7 @@ class OfaPreprocessor(Preprocessor):
            Tasks.auto_speech_recognition: OfaASRPreprocessor
        }
        model_dir = model_dir if osp.exists(model_dir) else snapshot_download(
-            model_dir)
+            model_dir, user_agent={Invoke.KEY: Invoke.PREPROCESSOR})
        self.cfg = Config.from_file(
            osp.join(model_dir, ModelFile.CONFIGURATION))
        self.preprocess = preprocess_mapping[self.cfg.task](
@@ -131,7 +132,7 @@ class CLIPPreprocessor(Preprocessor):
        """
        super().__init__(*args, **kwargs)
        model_dir = model_dir if osp.exists(model_dir) else snapshot_download(
-            model_dir)
+            model_dir, user_agent={Invoke.KEY: Invoke.PREPROCESSOR})
        self.mode = mode
        # text tokenizer
        from modelscope.models.multi_modal.clip.bert_tokenizer import FullTokenizer
--- a/modelscope/trainers/audio/kws_farfield_trainer.py
+++ b/modelscope/trainers/audio/kws_farfield_trainer.py
@@ -8,7 +8,6 @@ import torch
 from torch import nn as nn
 from torch import optim as optim

-from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.metainfo import Trainers
 from modelscope.models import Model, TorchModel
 from modelscope.msdatasets.task_datasets.audio import KWSDataLoader, KWSDataset
@@ -54,12 +53,8 @@ class KWSFarfieldTrainer(BaseTrainer):
                 **kwargs):

        if isinstance(model, str):
-            if os.path.exists(model):
-                self.model_dir = model if os.path.isdir(
-                    model) else os.path.dirname(model)
-            else:
-                self.model_dir = snapshot_download(
-                    model, revision=model_revision)
+            self.model_dir = self.get_or_download_model_dir(
+                model, model_revision)
            if cfg_file is None:
                cfg_file = os.path.join(self.model_dir,
                                        ModelFile.CONFIGURATION)
--- a/modelscope/trainers/base.py
+++ b/modelscope/trainers/base.py
@@ -1,11 +1,14 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.

+import os
 import time
 from abc import ABC, abstractmethod
 from typing import Callable, Dict, List, Optional, Tuple, Union

+from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.trainers.builder import TRAINERS
 from modelscope.utils.config import Config
+from modelscope.utils.constant import Invoke
 from .utils.log_buffer import LogBuffer


@@ -32,6 +35,17 @@ class BaseTrainer(ABC):
        self.log_buffer = LogBuffer()
        self.timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())

+    def get_or_download_model_dir(self, model, model_revision=None):
+        if os.path.exists(model):
+            model_cache_dir = model if os.path.isdir(
+                model) else os.path.dirname(model)
+        else:
+            model_cache_dir = snapshot_download(
+                model,
+                revision=model_revision,
+                user_agent={Invoke.KEY: Invoke.TRAINER})
+        return model_cache_dir
+
    @abstractmethod
    def train(self, *args, **kwargs):
        """ Train (and evaluate) process
--- a/modelscope/trainers/multi_modal/clip/clip_trainer.py
+++ b/modelscope/trainers/multi_modal/clip/clip_trainer.py
@@ -20,7 +20,7 @@ from modelscope.trainers.builder import TRAINERS
 from modelscope.trainers.optimizer.builder import build_optimizer
 from modelscope.utils.config import Config
 from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, ConfigKeys,
-                                       ModeKeys)
+                                       Invoke, ModeKeys)
 from .clip_trainer_utils import get_loss, get_optimizer_params, get_schedule


@@ -52,7 +52,8 @@ class CLIPTrainer(EpochBasedTrainer):
            model_revision: Optional[str] = DEFAULT_MODEL_REVISION,
            seed: int = 42,
            **kwargs):
-        model = Model.from_pretrained(model, revision=model_revision)
+        model = Model.from_pretrained(
+            model, revision=model_revision, invoked_by=Invoke.TRAINER)
        # for training & eval, we convert the model from FP16 back to FP32
        # to compatible with modelscope amp training
        convert_models_to_fp32(model)
--- a/modelscope/trainers/multi_modal/ofa/ofa_trainer.py
+++ b/modelscope/trainers/multi_modal/ofa/ofa_trainer.py
@@ -23,7 +23,7 @@ from modelscope.trainers.optimizer.builder import build_optimizer
 from modelscope.trainers.parallel.utils import is_parallel
 from modelscope.utils.config import Config
 from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, ConfigKeys,
-                                       ModeKeys)
+                                       Invoke, ModeKeys)
 from .ofa_trainer_utils import (AdjustLabelSmoothedCrossEntropyCriterion,
                                get_schedule)

@@ -49,7 +49,8 @@ class OFATrainer(EpochBasedTrainer):
            model_revision: Optional[str] = DEFAULT_MODEL_REVISION,
            seed: int = 42,
            **kwargs):
-        model = Model.from_pretrained(model, revision=model_revision)
+        model = Model.from_pretrained(
+            model, revision=model_revision, invoked_by=Invoke.TRAINER)
        model_dir = model.model_dir
        self.cfg_modify_fn = cfg_modify_fn
        cfg = self.rebuild_config(Config.from_file(cfg_file))
--- a/modelscope/trainers/multi_modal/team/team_trainer.py
+++ b/modelscope/trainers/multi_modal/team/team_trainer.py
@@ -7,21 +7,17 @@ from typing import Callable, Dict, Optional
 import numpy as np
 import torch
 import torch.nn as nn
-import torchvision.datasets as datasets
-import torchvision.transforms as transforms
 from sklearn.metrics import confusion_matrix
-from torch.optim import AdamW
 from torch.utils.data import DataLoader, Dataset

 from modelscope.metainfo import Trainers
 from modelscope.models.base import Model
-from modelscope.msdatasets import MsDataset
 from modelscope.trainers.base import BaseTrainer
 from modelscope.trainers.builder import TRAINERS
-from modelscope.trainers.multi_modal.team.team_trainer_utils import (
-    get_optimizer, train_mapping, val_mapping)
+from modelscope.trainers.multi_modal.team.team_trainer_utils import \
+    get_optimizer
 from modelscope.utils.config import Config
-from modelscope.utils.constant import DownloadMode, ModeKeys
+from modelscope.utils.constant import Invoke
 from modelscope.utils.logger import get_logger

 logger = get_logger()
@@ -36,7 +32,7 @@ class TEAMImgClsTrainer(BaseTrainer):
        super().__init__(cfg_file)

        self.cfg = Config.from_file(cfg_file)
-        team_model = Model.from_pretrained(model)
+        team_model = Model.from_pretrained(model, invoked_by=Invoke.TRAINER)
        image_model = team_model.model.image_model.vision_transformer
        classification_model = nn.Sequential(
            OrderedDict([('encoder', image_model),
--- a/modelscope/trainers/nlp/csanmt_translation_trainer.py
+++ b/modelscope/trainers/nlp/csanmt_translation_trainer.py
@@ -24,8 +24,7 @@ logger = get_logger()
 class CsanmtTranslationTrainer(BaseTrainer):

    def __init__(self, model: str, cfg_file: str = None, *args, **kwargs):
-        if not osp.exists(model):
-            model = snapshot_download(model)
+        model = self.get_or_download_model_dir(model)
        tf.reset_default_graph()

        self.model_dir = model
--- a/modelscope/trainers/nlp_trainer.py
+++ b/modelscope/trainers/nlp_trainer.py
@@ -10,7 +10,6 @@ import torch
 from torch import nn
 from torch.utils.data import Dataset

-from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.metainfo import Trainers
 from modelscope.metrics.builder import build_metric
 from modelscope.models.base import Model, TorchModel
@@ -478,11 +477,7 @@ class NlpEpochBasedTrainer(EpochBasedTrainer):
        """

        if isinstance(model, str):
-            if os.path.exists(model):
-                model_dir = model if os.path.isdir(model) else os.path.dirname(
-                    model)
-            else:
-                model_dir = snapshot_download(model, revision=model_revision)
+            model_dir = self.get_or_download_model_dir(model, model_revision)
            if cfg_file is None:
                cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION)
        else:
--- a/modelscope/trainers/trainer.py
+++ b/modelscope/trainers/trainer.py
@@ -14,7 +14,6 @@ from torch.utils.data import DataLoader, Dataset
 from torch.utils.data.dataloader import default_collate
 from torch.utils.data.distributed import DistributedSampler

-from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.metainfo import Trainers
 from modelscope.metrics import build_metric, task_default_metrics
 from modelscope.models.base import Model, TorchModel
@@ -98,12 +97,8 @@ class EpochBasedTrainer(BaseTrainer):
        self._seed = seed
        set_random_seed(self._seed)
        if isinstance(model, str):
-            if os.path.exists(model):
-                self.model_dir = model if os.path.isdir(
-                    model) else os.path.dirname(model)
-            else:
-                self.model_dir = snapshot_download(
-                    model, revision=model_revision)
+            self.model_dir = self.get_or_download_model_dir(
+                model, model_revision)
            if cfg_file is None:
                cfg_file = os.path.join(self.model_dir,
                                        ModelFile.CONFIGURATION)
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -291,6 +291,14 @@ class ModelFile(object):
    TS_MODEL_FILE = 'model.ts'


+class Invoke(object):
+    KEY = 'invoked_by'
+    PRETRAINED = 'from_pretrained'
+    PIPELINE = 'pipeline'
+    TRAINER = 'trainer'
+    PREPROCESSOR = 'preprocessor'
+
+
 class ConfigFields(object):
    """ First level keyword in configuration file
    """