[to #46273042]feat: pipeline trainer stat information from snapshot_download

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10837490
This commit is contained in:
mulin.lyh
2022-11-28 15:06:03 +08:00
parent d5ee8aa66d
commit a4c36a2920
59 changed files with 282 additions and 412 deletions

View File

@@ -5,10 +5,10 @@ from abc import ABC, abstractmethod
from typing import Any, Callable, Dict, List, Optional, Union
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models.builder import MODELS, build_model
from modelscope.models.builder import build_model
from modelscope.utils.checkpoint import save_checkpoint, save_pretrained
from modelscope.utils.config import Config
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile, Tasks
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke, ModelFile
from modelscope.utils.device import verify_device
from modelscope.utils.logger import get_logger
@@ -94,6 +94,10 @@ class Model(ABC):
if prefetched is not None:
kwargs.pop('model_prefetched')
invoked_by = kwargs.get(Invoke.KEY)
if invoked_by is not None:
kwargs.pop(Invoke.KEY)
if osp.exists(model_name_or_path):
local_model_dir = model_name_or_path
else:
@@ -101,7 +105,13 @@ class Model(ABC):
raise RuntimeError(
'Expecting model is pre-fetched locally, but is not found.'
)
local_model_dir = snapshot_download(model_name_or_path, revision)
if invoked_by is not None:
invoked_by = {Invoke.KEY: invoked_by}
else:
invoked_by = {Invoke.KEY: Invoke.PRETRAINED}
local_model_dir = snapshot_download(
model_name_or_path, revision, user_agent=invoked_by)
logger.info(f'initialize model from {local_model_dir}')
if cfg_dict is not None:
cfg = cfg_dict
@@ -133,6 +143,7 @@ class Model(ABC):
model.cfg = cfg
model.name = model_name_or_path
model.model_dir = local_model_dir
return model
def save_pretrained(self,

View File

@@ -509,8 +509,8 @@ def convert_weights(model: nn.Module):
@MODELS.register_module(Tasks.multi_modal_embedding, module_name=Models.clip)
class CLIPForMultiModalEmbedding(TorchModel):
def __init__(self, model_dir, device_id=-1):
super().__init__(model_dir=model_dir, device_id=device_id)
def __init__(self, model_dir, *args, **kwargs):
super().__init__(model_dir=model_dir, *args, **kwargs)
# Initialize the model.
vision_model_config_file = '{}/vision_model_config.json'.format(

View File

@@ -9,7 +9,6 @@ import numpy as np
import torch
import torch.nn.functional as F
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Models
from modelscope.models.base import Tensor, TorchModel
from modelscope.models.builder import MODELS

View File

@@ -16,7 +16,7 @@ from modelscope.outputs import TASK_OUTPUTS
from modelscope.pipeline_inputs import TASK_INPUTS, check_input_type
from modelscope.preprocessors import Preprocessor
from modelscope.utils.config import Config
from modelscope.utils.constant import Frameworks, ModelFile
from modelscope.utils.constant import Frameworks, Invoke, ModelFile
from modelscope.utils.device import (create_device, device_placement,
verify_device)
from modelscope.utils.hub import read_config, snapshot_download
@@ -47,8 +47,10 @@ class Pipeline(ABC):
logger.info(f'initiate model from location {model}.')
# expecting model has been prefetched to local cache beforehand
return Model.from_pretrained(
model, model_prefetched=True,
device=self.device_name) if is_model(model) else model
model,
device=self.device_name,
model_prefetched=True,
invoked_by=Invoke.PIPELINE) if is_model(model) else model
else:
return model
@@ -383,15 +385,12 @@ class DistributedPipeline(Pipeline):
preprocessor: Union[Preprocessor, List[Preprocessor]] = None,
auto_collate=True,
**kwargs):
self.preprocessor = preprocessor
super().__init__(model=model, preprocessor=preprocessor, kwargs=kwargs)
self._model_prepare = False
self._model_prepare_lock = Lock()
self._auto_collate = auto_collate
if os.path.exists(model):
self.model_dir = model
else:
self.model_dir = snapshot_download(model)
self.model_dir = self.model.model_dir
self.cfg = read_config(self.model_dir)
self.world_size = self.cfg.model.world_size
self.model_pool = None

View File

@@ -7,7 +7,7 @@ from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Pipelines
from modelscope.models.base import Model
from modelscope.utils.config import ConfigDict, check_config
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Tasks
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke, Tasks
from modelscope.utils.hub import read_config
from modelscope.utils.registry import Registry, build_from_cfg
from .base import Pipeline
@@ -209,6 +209,8 @@ DEFAULT_MODEL_FOR_PIPELINE = {
Tasks.referring_video_object_segmentation:
(Pipelines.referring_video_object_segmentation,
'damo/cv_swin-t_referring_video-object-segmentation'),
Tasks.video_summarization: (Pipelines.video_summarization,
'damo/cv_googlenet_pgl-video-summarization'),
}
@@ -220,14 +222,19 @@ def normalize_model_input(model, model_revision):
# skip revision download if model is a local directory
if not os.path.exists(model):
# note that if there is already a local copy, snapshot_download will check and skip downloading
model = snapshot_download(model, revision=model_revision)
model = snapshot_download(
model,
revision=model_revision,
user_agent={Invoke.KEY: Invoke.PIPELINE})
elif isinstance(model, list) and isinstance(model[0], str):
for idx in range(len(model)):
if is_official_hub_path(
model[idx],
model_revision) and not os.path.exists(model[idx]):
model[idx] = snapshot_download(
model[idx], revision=model_revision)
model[idx],
revision=model_revision,
user_agent={Invoke.KEY: Invoke.PIPELINE})
return model

View File

@@ -8,14 +8,13 @@ import torch
from PIL import Image
from torchvision import transforms
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Pipelines
from modelscope.models.cv.animal_recognition import Bottleneck, ResNet
from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Input, Pipeline
from modelscope.pipelines.builder import PIPELINES
from modelscope.preprocessors import LoadImage
from modelscope.utils.constant import Tasks
from modelscope.utils.constant import Devices, ModelFile, Tasks
from modelscope.utils.logger import get_logger
logger = get_logger()
@@ -67,15 +66,10 @@ class AnimalRecognitionPipeline(Pipeline):
filter_param(src_params, own_state)
model.load_state_dict(own_state)
self.model = resnest101(num_classes=8288)
local_model_dir = model
if osp.exists(model):
local_model_dir = model
else:
local_model_dir = snapshot_download(model)
self.local_path = local_model_dir
self.local_path = self.model
src_params = torch.load(
osp.join(local_model_dir, 'pytorch_model.pt'), 'cpu')
osp.join(self.local_path, ModelFile.TORCH_MODEL_FILE), Devices.cpu)
self.model = resnest101(num_classes=8288)
load_pretrained(self.model, src_params)
logger.info('load model done')

View File

@@ -120,8 +120,7 @@ class Body3DKeypointsPipeline(Pipeline):
"""
super().__init__(model=model, **kwargs)
self.keypoint_model_3d = model if isinstance(
model, BodyKeypointsDetection3D) else Model.from_pretrained(model)
self.keypoint_model_3d = self.model
self.keypoint_model_3d.eval()
# init human body 2D keypoints detection pipeline

View File

@@ -11,7 +11,7 @@ from PIL import ImageFile
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.pipelines.util import is_official_hub_path
from modelscope.utils.config import Config
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke, ModelFile
from modelscope.utils.device import create_device
@@ -37,7 +37,9 @@ class EasyCVPipeline(object):
assert is_official_hub_path(
model), 'Only support local model path and official hub path!'
model_dir = snapshot_download(
model_id=model, revision=DEFAULT_MODEL_REVISION)
model_id=model,
revision=DEFAULT_MODEL_REVISION,
user_agent={Invoke.KEY: Invoke.PIPELINE})
assert osp.isdir(model_dir)
model_files = glob.glob(
@@ -48,6 +50,7 @@ class EasyCVPipeline(object):
model_path = model_files[0]
self.model_path = model_path
self.model_dir = model_dir
# get configuration file from source model dir
self.config_file = os.path.join(model_dir, ModelFile.CONFIGURATION)

View File

@@ -24,7 +24,6 @@ class HumanWholebodyKeypointsPipeline(EasyCVPipeline):
model (str): model id on modelscope hub or local model path.
model_file_pattern (str): model file pattern.
"""
self.model_dir = model
super(HumanWholebodyKeypointsPipeline, self).__init__(
model=model,
model_file_pattern=model_file_pattern,

View File

@@ -8,7 +8,6 @@ import torch
from PIL import Image
from torchvision import transforms
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Pipelines
from modelscope.models.cv.animal_recognition import resnet
from modelscope.outputs import OutputKeys
@@ -67,16 +66,12 @@ class GeneralRecognitionPipeline(Pipeline):
filter_param(src_params, own_state)
model.load_state_dict(own_state)
self.model = resnest101(num_classes=54092)
local_model_dir = model
device = 'cpu'
if osp.exists(model):
local_model_dir = model
else:
local_model_dir = snapshot_download(model)
self.local_path = local_model_dir
self.local_path = self.model
src_params = torch.load(
osp.join(local_model_dir, ModelFile.TORCH_MODEL_FILE), device)
osp.join(self.local_path, ModelFile.TORCH_MODEL_FILE), device)
self.model = resnest101(num_classes=54092)
load_pretrained(self.model, src_params)
logger.info('load model done')

View File

@@ -21,7 +21,6 @@ class Hand2DKeypointsPipeline(EasyCVPipeline):
model (str): model id on modelscope hub or local model path.
model_file_pattern (str): model file pattern.
"""
self.model_dir = model
super(Hand2DKeypointsPipeline, self).__init__(
model=model,
model_file_pattern=model_file_pattern,

View File

@@ -1,5 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Any, Dict, Union
from typing import Any, Dict, Optional, Union
import cv2
import numpy as np
@@ -25,22 +25,15 @@ class ImageClassificationPipeline(Pipeline):
def __init__(self,
model: Union[Model, str],
preprocessor: [Preprocessor] = None,
preprocessor: Optional[Preprocessor] = None,
**kwargs):
super().__init__(model=model)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
assert isinstance(model, str) or isinstance(model, Model), \
'model must be a single str or OfaForAllTasks'
if isinstance(model, str):
pipe_model = Model.from_pretrained(model)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError
pipe_model.model.eval()
pipe_model.to(get_device())
if preprocessor is None and isinstance(pipe_model, OfaForAllTasks):
preprocessor = OfaPreprocessor(model_dir=pipe_model.model_dir)
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
self.model.eval()
self.model.to(get_device())
if preprocessor is None and isinstance(self.model, OfaForAllTasks):
self.preprocessor = OfaPreprocessor(model_dir=self.model.model_dir)
def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
return inputs

View File

@@ -32,10 +32,8 @@ class ImageColorEnhancePipeline(Pipeline):
Args:
model: model id on modelscope hub.
"""
model = model if isinstance(
model, ImageColorEnhance) else Model.from_pretrained(model)
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
if torch.cuda.is_available():
self._device = torch.device('cuda')

View File

@@ -32,17 +32,14 @@ class ImageDenoisePipeline(Pipeline):
Args:
model: model id on modelscope hub.
"""
model = model if isinstance(
model, NAFNetForImageDenoise) else Model.from_pretrained(model)
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.config = model.config
self.model.eval()
self.config = self.model.config
if torch.cuda.is_available():
self._device = torch.device('cuda')
else:
self._device = torch.device('cpu')
self.model = model
logger.info('load image denoise model done')
def preprocess(self, input: Input) -> Dict[str, Any]:

View File

@@ -44,7 +44,7 @@ class LanguageGuidedVideoSummarizationPipeline(Pipeline):
"""
super().__init__(model=model, auto_collate=False, **kwargs)
logger.info(f'loading model from {model}')
self.model_dir = model
self.model_dir = self.model.model_dir
self.tmp_dir = kwargs.get('tmp_dir', None)
if self.tmp_dir is None:

View File

@@ -9,7 +9,6 @@ import PIL
import torch
from PIL import Image
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Pipelines
from modelscope.models.cv.virual_tryon import SDAFNet_Tryon
from modelscope.outputs import OutputKeys
@@ -52,17 +51,12 @@ class VirtualTryonPipeline(Pipeline):
filter_param(src_params, own_state)
model.load_state_dict(own_state)
self.model = SDAFNet_Tryon(ref_in_channel=6).to(self.device)
local_model_dir = model
if osp.exists(model):
local_model_dir = model
else:
local_model_dir = snapshot_download(model)
self.local_path = local_model_dir
self.local_path = self.model
src_params = torch.load(
osp.join(local_model_dir, ModelFile.TORCH_MODEL_FILE), 'cpu')
osp.join(self.local_path, ModelFile.TORCH_MODEL_FILE), 'cpu')
self.model = SDAFNet_Tryon(ref_in_channel=6).to(self.device)
load_pretrained(self.model, src_params)
self.model = self.model.eval()
self.model.eval()
self.size = 192
from torchvision import transforms
self.test_transforms = transforms.Compose([

View File

@@ -29,22 +29,13 @@ class ImageCaptioningPipeline(Pipeline):
Args:
model: model id on modelscope hub.
"""
super().__init__(model=model)
assert isinstance(model, str) or isinstance(model, Model), \
'model must be a single str or OfaForAllTasks'
if isinstance(model, str):
pipe_model = Model.from_pretrained(model)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError
pipe_model.model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
if preprocessor is None:
if isinstance(pipe_model, OfaForAllTasks):
preprocessor = OfaPreprocessor(pipe_model.model_dir)
elif isinstance(pipe_model, MPlugForAllTasks):
preprocessor = MPlugPreprocessor(pipe_model.model_dir)
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
if isinstance(self.model, OfaForAllTasks):
self.preprocessor = OfaPreprocessor(self.model.model_dir)
elif isinstance(self.model, MPlugForAllTasks):
self.preprocessor = MPlugPreprocessor(self.model.model_dir)
def _batch(self, data):
if isinstance(self.model, OfaForAllTasks):

View File

@@ -28,19 +28,10 @@ class ImageTextRetrievalPipeline(Pipeline):
Args:
model: model id on modelscope hub.
"""
super().__init__(model=model)
assert isinstance(model, str) or isinstance(model, Model), \
f'model must be a single str or Model, but got {type(model)}'
if isinstance(model, str):
pipe_model = Model.from_pretrained(model)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError
pipe_model.model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
if preprocessor is None:
preprocessor = MPlugPreprocessor(pipe_model.model_dir)
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
self.preprocessor = MPlugPreprocessor(self.model.model_dir)
def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:

View File

@@ -28,21 +28,14 @@ class MultiModalEmbeddingPipeline(Pipeline):
Args:
model: model id on modelscope hub.
"""
if isinstance(model, str):
pipe_model = Model.from_pretrained(model)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError('model must be a single str')
pipe_model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
if preprocessor is None:
if isinstance(pipe_model, CLIPForMultiModalEmbedding):
preprocessor = CLIPPreprocessor(pipe_model.model_dir)
if isinstance(self.model, CLIPForMultiModalEmbedding):
self.preprocessor = CLIPPreprocessor(self.model.model_dir)
else:
raise NotImplementedError
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
return self.model(self.preprocess(input))

View File

@@ -28,20 +28,11 @@ class OcrRecognitionPipeline(Pipeline):
Args:
model: model id on modelscope hub.
"""
super().__init__(model=model)
assert isinstance(model, str) or isinstance(model, Model), \
'model must be a single str or OfaForAllTasks'
if isinstance(model, str):
pipe_model = Model.from_pretrained(model)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError
pipe_model.model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
if preprocessor is None:
if isinstance(pipe_model, OfaForAllTasks):
preprocessor = OfaPreprocessor(pipe_model.model_dir)
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
if isinstance(self.model, OfaForAllTasks):
self.preprocessor = OfaPreprocessor(self.model.model_dir)
def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:

View File

@@ -31,18 +31,10 @@ class TextToImageSynthesisPipeline(Pipeline):
Args:
model: model id on modelscope hub.
"""
device_id = 0 if torch.cuda.is_available() else -1
if isinstance(model, str):
pipe_model = Model.from_pretrained(model, device_id=device_id)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError(
f'expecting a Model instance or str, but get {type(model)}.')
if preprocessor is None and isinstance(pipe_model,
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None and isinstance(self.model,
OfaForTextToImageSynthesis):
preprocessor = OfaPreprocessor(pipe_model.model_dir)
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
self.preprocessor = OfaPreprocessor(self.model.model_dir)
def preprocess(self, input: Input, **preprocess_params) -> Dict[str, Any]:
if self.preprocessor is not None:

View File

@@ -1,5 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Any, Dict, Union
from typing import Any, Dict, Optional, Union
from modelscope.metainfo import Pipelines
from modelscope.models.multi_modal import OfaForAllTasks
@@ -18,26 +18,17 @@ class VisualEntailmentPipeline(Pipeline):
def __init__(self,
model: Union[Model, str],
preprocessor: [Preprocessor] = None,
preprocessor: Optional[Preprocessor] = None,
**kwargs):
"""
use `model` and `preprocessor` to create a visual entailment pipeline for prediction
Args:
model: model id on modelscope hub.
"""
super().__init__(model=model)
assert isinstance(model, str) or isinstance(model, Model), \
'model must be a single str or OfaForAllTasks'
if isinstance(model, str):
pipe_model = Model.from_pretrained(model)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError
pipe_model.model.eval()
if preprocessor is None and isinstance(pipe_model, OfaForAllTasks):
preprocessor = OfaPreprocessor(model_dir=pipe_model.model_dir)
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
if preprocessor is None and isinstance(self.model, OfaForAllTasks):
self.preprocessor = OfaPreprocessor(model_dir=self.model.model_dir)
def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
return inputs

View File

@@ -1,5 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Any, Dict, Union
from typing import Any, Dict, Optional, Union
from modelscope.metainfo import Pipelines
from modelscope.models.multi_modal import OfaForAllTasks
@@ -18,26 +18,17 @@ class VisualGroundingPipeline(Pipeline):
def __init__(self,
model: Union[Model, str],
preprocessor: [Preprocessor] = None,
preprocessor: Optional[Preprocessor] = None,
**kwargs):
"""
use `model` and `preprocessor` to create a visual grounding pipeline for prediction
Args:
model: model id on modelscope hub.
"""
super().__init__(model=model)
assert isinstance(model, str) or isinstance(model, Model), \
'model must be a single str or OfaForAllTasks'
if isinstance(model, str):
pipe_model = Model.from_pretrained(model)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError
pipe_model.model.eval()
if preprocessor is None and isinstance(pipe_model, OfaForAllTasks):
preprocessor = OfaPreprocessor(model_dir=pipe_model.model_dir)
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.model.eval()
if preprocessor is None and isinstance(self.model, OfaForAllTasks):
self.preprocessor = OfaPreprocessor(model_dir=self.model.model_dir)
def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
return inputs

View File

@@ -31,15 +31,13 @@ class VisualQuestionAnsweringPipeline(Pipeline):
model (MPlugForVisualQuestionAnswering): a model instance
preprocessor (MPlugVisualQuestionAnsweringPreprocessor): a preprocessor instance
"""
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
if preprocessor is None:
if isinstance(model, OfaForAllTasks):
preprocessor = OfaPreprocessor(model.model_dir)
elif isinstance(model, MPlugForAllTasks):
preprocessor = MPlugPreprocessor(model.model_dir)
model.model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
if isinstance(self.model, OfaForAllTasks):
self.preprocessor = OfaPreprocessor(self.model.model_dir)
elif isinstance(self.model, MPlugForAllTasks):
self.preprocessor = MPlugPreprocessor(self.model.model_dir)
self.model.eval()
def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:

View File

@@ -32,12 +32,10 @@ class ConversationalTextToSqlPipeline(Pipeline):
preprocessor (ConversationalTextToSqlPreprocessor):
a preprocessor instance
"""
model = model if isinstance(
model, StarForTextToSql) else Model.from_pretrained(model)
if preprocessor is None:
preprocessor = ConversationalTextToSqlPreprocessor(model.model_dir)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
self.preprocessor = ConversationalTextToSqlPreprocessor(
self.model.model_dir)
def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
"""process the prediction results

View File

@@ -30,13 +30,11 @@ class DialogIntentPredictionPipeline(Pipeline):
or a SpaceForDialogIntent instance.
preprocessor (DialogIntentPredictionPreprocessor): An optional preprocessor instance.
"""
model = model if isinstance(
model, SpaceForDialogIntent) else Model.from_pretrained(model)
if preprocessor is None:
preprocessor = DialogIntentPredictionPreprocessor(model.model_dir)
self.model = model
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.categories = preprocessor.categories
if preprocessor is None:
self.preprocessor = DialogIntentPredictionPreprocessor(
self.model.model_dir)
self.categories = self.preprocessor.categories
def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
"""process the prediction results

View File

@@ -29,13 +29,10 @@ class DialogModelingPipeline(Pipeline):
or a SpaceForDialogModeling instance.
preprocessor (DialogModelingPreprocessor): An optional preprocessor instance.
"""
model = model if isinstance(
model, SpaceForDialogModeling) else Model.from_pretrained(model)
self.model = model
if preprocessor is None:
preprocessor = DialogModelingPreprocessor(model.model_dir)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.preprocessor = preprocessor
if preprocessor is None:
self.preprocessor = DialogModelingPreprocessor(
self.model.model_dir)
def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, str]:
"""process the prediction results

View File

@@ -31,16 +31,13 @@ class DialogStateTrackingPipeline(Pipeline):
from the model hub, or a SpaceForDialogStateTracking instance.
preprocessor (DialogStateTrackingPreprocessor): An optional preprocessor instance.
"""
model = model if isinstance(
model, SpaceForDST) else Model.from_pretrained(model)
self.model = model
if preprocessor is None:
preprocessor = DialogStateTrackingPreprocessor(model.model_dir)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
self.preprocessor = DialogStateTrackingPreprocessor(
self.model.model_dir)
self.tokenizer = preprocessor.tokenizer
self.config = preprocessor.config
self.tokenizer = self.preprocessor.tokenizer
self.config = self.preprocessor.config
def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
"""process the prediction results

View File

@@ -31,27 +31,22 @@ class DocumentSegmentationPipeline(Pipeline):
model: Union[Model, str],
preprocessor: DocumentSegmentationPreprocessor = None,
**kwargs):
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
self.model_dir = model.model_dir
self.model_cfg = model.forward()
self.model_dir = self.model.model_dir
self.model_cfg = self.model.forward()
if self.model_cfg['type'] == 'bert':
config = BertConfig.from_pretrained(model.model_dir, num_labels=2)
config = BertConfig.from_pretrained(self.model_dir, num_labels=2)
elif self.model_cfg['type'] == 'ponet':
config = PoNetConfig.from_pretrained(model.model_dir, num_labels=2)
config = PoNetConfig.from_pretrained(self.model_dir, num_labels=2)
self.document_segmentation_model = model.build_with_config(
self.document_segmentation_model = self.model.build_with_config(
config=config)
if preprocessor is None:
preprocessor = DocumentSegmentationPreprocessor(
self.model_dir, config)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.preprocessor = preprocessor
self.preprocessor = DocumentSegmentationPreprocessor(
self.model.model_dir, config)
def __call__(
self, documents: Union[List[List[str]], List[str],

View File

@@ -21,12 +21,10 @@ class FaqQuestionAnsweringPipeline(Pipeline):
model: Union[str, Model],
preprocessor: Preprocessor = None,
**kwargs):
model = Model.from_pretrained(model) if isinstance(model,
str) else model
if preprocessor is None:
preprocessor = Preprocessor.from_pretrained(
model.model_dir, **kwargs)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
self.preprocessor = Preprocessor.from_pretrained(
self.model.model_dir, **kwargs)
def _sanitize_parameters(self, **pipeline_parameters):
return pipeline_parameters, pipeline_parameters, pipeline_parameters
@@ -37,11 +35,11 @@ class FaqQuestionAnsweringPipeline(Pipeline):
sentence_vecs = sentence_vecs.detach().tolist()
return sentence_vecs
def forward(self, inputs: [list, Dict[str, Any]],
def forward(self, inputs: Union[list, Dict[str, Any]],
**forward_params) -> Dict[str, Any]:
return self.model(inputs)
def postprocess(self, inputs: [list, Dict[str, Any]],
def postprocess(self, inputs: Union[list, Dict[str, Any]],
**postprocess_params) -> Dict[str, Any]:
scores = inputs['scores']
labels = []

View File

@@ -46,21 +46,18 @@ class FeatureExtractionPipeline(Pipeline):
"""
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
if preprocessor is None:
preprocessor = NLPPreprocessor(
model.model_dir,
padding=kwargs.pop('padding', False),
sequence_length=kwargs.pop('sequence_length', 128))
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.preprocessor = preprocessor
if preprocessor is None:
self.preprocessor = NLPPreprocessor(
self.model.model_dir,
padding=kwargs.pop('padding', False),
sequence_length=kwargs.pop('sequence_length', 128))
self.model.eval()
self.config = Config.from_file(
os.path.join(model.model_dir, ModelFile.CONFIGURATION))
self.tokenizer = preprocessor.tokenizer
os.path.join(self.model.model_dir, ModelFile.CONFIGURATION))
self.tokenizer = self.preprocessor.tokenizer
def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:

View File

@@ -53,22 +53,18 @@ class FillMaskPipeline(Pipeline):
If the xlm-roberta(xlm-roberta, veco, etc.) based model is used, the mask token is '<mask>'.
To view other examples plese check the tests/pipelines/test_fill_mask.py.
"""
fill_mask_model = Model.from_pretrained(model) if isinstance(
model, str) else model
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
preprocessor = Preprocessor.from_pretrained(
fill_mask_model.model_dir,
self.preprocessor = Preprocessor.from_pretrained(
self.model.model_dir,
first_sequence=first_sequence,
second_sequence=None,
sequence_length=kwargs.pop('sequence_length', 128))
fill_mask_model.eval()
assert hasattr(
preprocessor, 'mask_id'
), 'The input preprocessor should have the mask_id attribute.'
super().__init__(
model=fill_mask_model, preprocessor=preprocessor, **kwargs)
assert hasattr(
self.preprocessor, 'mask_id'
), 'The input preprocessor should have the mask_id attribute.'
self.model.eval()
def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:

View File

@@ -25,15 +25,12 @@ class InformationExtractionPipeline(Pipeline):
model: Union[Model, str],
preprocessor: Optional[Preprocessor] = None,
**kwargs):
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
if preprocessor is None:
preprocessor = RelationExtractionPreprocessor(
model.model_dir,
sequence_length=kwargs.pop('sequence_length', 512))
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
self.preprocessor = RelationExtractionPreprocessor(
self.model.model_dir,
sequence_length=kwargs.pop('sequence_length', 512))
self.model.eval()
def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:

View File

@@ -21,7 +21,7 @@ class MGLMTextSummarizationPipeline(Pipeline):
def __init__(self,
model: Union[MGLMForTextSummarization, str],
preprocessor: [Preprocessor] = None,
preprocessor: Optional[Preprocessor] = None,
*args,
**kwargs):
model = MGLMForTextSummarization(model) if isinstance(model,

View File

@@ -50,15 +50,12 @@ class NamedEntityRecognitionPipeline(TokenClassificationPipeline):
To view other examples plese check the tests/pipelines/test_named_entity_recognition.py.
"""
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
if preprocessor is None:
preprocessor = TokenClassificationPreprocessor(
model.model_dir,
sequence_length=kwargs.pop('sequence_length', 128))
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
self.preprocessor = TokenClassificationPreprocessor(
self.model.model_dir,
sequence_length=kwargs.pop('sequence_length', 128))
self.model.eval()
self.id2label = kwargs.get('id2label')
if self.id2label is None and hasattr(self.preprocessor, 'id2label'):
self.id2label = self.preprocessor.id2label
@@ -73,13 +70,11 @@ class NamedEntityRecognitionThaiPipeline(NamedEntityRecognitionPipeline):
model: Union[Model, str],
preprocessor: Optional[Preprocessor] = None,
**kwargs):
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
if preprocessor is None:
preprocessor = NERPreprocessorThai(
model.model_dir,
sequence_length=kwargs.pop('sequence_length', 512))
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
self.preprocessor = NERPreprocessorThai(
self.model.model_dir,
sequence_length=kwargs.pop('sequence_length', 512))
@PIPELINES.register_module(
@@ -91,10 +86,8 @@ class NamedEntityRecognitionVietPipeline(NamedEntityRecognitionPipeline):
model: Union[Model, str],
preprocessor: Optional[Preprocessor] = None,
**kwargs):
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
if preprocessor is None:
preprocessor = NERPreprocessorViet(
model.model_dir,
sequence_length=kwargs.pop('sequence_length', 512))
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
self.preprocessor = NERPreprocessorViet(
self.model.model_dir,
sequence_length=kwargs.pop('sequence_length', 512))

View File

@@ -32,14 +32,13 @@ class SentenceEmbeddingPipeline(Pipeline):
the model if supplied.
sequence_length: Max sequence length in the user's custom scenario. 128 will be used as a default value.
"""
model = Model.from_pretrained(model) if isinstance(model,
str) else model
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
preprocessor = Preprocessor.from_pretrained(
model.model_dir if isinstance(model, Model) else model,
self.preprocessor = Preprocessor.from_pretrained(
self.model.model_dir
if isinstance(self.model, Model) else model,
first_sequence=first_sequence,
sequence_length=kwargs.pop('sequence_length', 128))
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:

View File

@@ -1,5 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Any, Dict, Union
from typing import Any, Dict, Optional, Union
from modelscope.metainfo import Pipelines
from modelscope.models.multi_modal import OfaForAllTasks
@@ -18,7 +18,7 @@ class SummarizationPipeline(Pipeline):
def __init__(self,
model: Union[Model, str],
preprocessor: [Preprocessor] = None,
preprocessor: Optional[Preprocessor] = None,
**kwargs):
"""Use `model` and `preprocessor` to create a Summarization pipeline for prediction.
@@ -27,19 +27,10 @@ class SummarizationPipeline(Pipeline):
or a model id from the model hub, or a model instance.
preprocessor (Preprocessor): An optional preprocessor instance.
"""
super().__init__(model=model)
assert isinstance(model, str) or isinstance(model, Model), \
'model must be a single str or OfaForAllTasks'
if isinstance(model, str):
pipe_model = Model.from_pretrained(model)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError
pipe_model.model.eval()
if preprocessor is None and isinstance(pipe_model, OfaForAllTasks):
preprocessor = OfaPreprocessor(model_dir=pipe_model.model_dir)
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
if preprocessor is None and isinstance(self.model, OfaForAllTasks):
self.preprocessor = OfaPreprocessor(model_dir=self.model.model_dir)
def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
return inputs

View File

@@ -41,21 +41,22 @@ class TableQuestionAnsweringPipeline(Pipeline):
preprocessor (TableQuestionAnsweringPreprocessor): a preprocessor instance
db (Database): a database to store tables in the database
"""
model = model if isinstance(
model, TableQuestionAnswering) else Model.from_pretrained(model)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
preprocessor = TableQuestionAnsweringPreprocessor(model.model_dir)
self.preprocessor = TableQuestionAnsweringPreprocessor(
self.model.model_dir)
# initilize tokenizer
self.tokenizer = BertTokenizer(
os.path.join(model.model_dir, ModelFile.VOCAB_FILE))
os.path.join(self.model.model_dir, ModelFile.VOCAB_FILE))
# initialize database
if db is None:
self.db = Database(
tokenizer=self.tokenizer,
table_file_path=os.path.join(model.model_dir, 'table.json'),
syn_dict_file_path=os.path.join(model.model_dir,
table_file_path=os.path.join(self.model.model_dir,
'table.json'),
syn_dict_file_path=os.path.join(self.model.model_dir,
'synonym.txt'))
else:
self.db = db
@@ -71,8 +72,6 @@ class TableQuestionAnsweringPipeline(Pipeline):
self.schema_link_dict = constant.schema_link_dict
self.limit_dict = constant.limit_dict
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
def post_process_multi_turn(self, history_sql, result, table):
action = self.action_ops[result['action']]
headers = table['header_name']

View File

@@ -63,16 +63,14 @@ class Text2TextGenerationPipeline(Pipeline):
To view other examples plese check the tests/pipelines/test_text_generation.py.
"""
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
if preprocessor is None:
preprocessor = Text2TextGenerationPreprocessor(
model.model_dir,
sequence_length=kwargs.pop('sequence_length', 128))
self.tokenizer = preprocessor.tokenizer
self.pipeline = model.pipeline.type
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
self.preprocessor = Text2TextGenerationPreprocessor(
self.model.model_dir,
sequence_length=kwargs.pop('sequence_length', 128))
self.tokenizer = self.preprocessor.tokenizer
self.pipeline = self.model.pipeline.type
self.model.eval()
def preprocess(self, inputs: Input, **preprocess_params) -> Dict[str, Any]:
""" Provide specific preprocess for text2text generation pipeline in order to handl multi tasks

View File

@@ -53,25 +53,24 @@ class TextClassificationPipeline(Pipeline):
NOTE: Inputs of type 'str' are also supported. In this scenario, the 'first_sequence' and 'second_sequence'
param will have no affection.
"""
model = Model.from_pretrained(model) if isinstance(model,
str) else model
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
if model.__class__.__name__ == 'OfaForAllTasks':
preprocessor = Preprocessor.from_pretrained(
model_name_or_path=model.model_dir,
if self.model.__class__.__name__ == 'OfaForAllTasks':
self.preprocessor = Preprocessor.from_pretrained(
model_name_or_path=self.model.model_dir,
type=Preprocessors.ofa_tasks_preprocessor,
field=Fields.multi_modal)
else:
first_sequence = kwargs.pop('first_sequence', 'first_sequence')
second_sequence = kwargs.pop('second_sequence', None)
preprocessor = Preprocessor.from_pretrained(
model if isinstance(model, str) else model.model_dir,
self.preprocessor = Preprocessor.from_pretrained(
self.model
if isinstance(self.model, str) else self.model.model_dir,
first_sequence=first_sequence,
second_sequence=second_sequence,
sequence_length=kwargs.pop('sequence_length', 512))
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.id2label = kwargs.get('id2label')
if self.id2label is None and hasattr(self.preprocessor, 'id2label'):
self.id2label = self.preprocessor.id2label

View File

@@ -40,15 +40,13 @@ class TextErrorCorrectionPipeline(Pipeline):
To view other examples plese check the tests/pipelines/test_text_error_correction.py.
"""
model = model if isinstance(
model,
BartForTextErrorCorrection) else Model.from_pretrained(model)
if preprocessor is None:
preprocessor = TextErrorCorrectionPreprocessor(model.model_dir)
self.vocab = preprocessor.vocab
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
self.preprocessor = TextErrorCorrectionPreprocessor(
self.model.model_dir)
self.vocab = self.preprocessor.vocab
def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:
with torch.no_grad():

View File

@@ -51,15 +51,14 @@ class TextGenerationPipeline(Pipeline):
To view other examples plese check the tests/pipelines/test_text_generation.py.
"""
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
cfg = read_config(model.model_dir)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
cfg = read_config(self.model.model_dir)
self.postprocessor = cfg.pop('postprocessor', 'decode')
if preprocessor is None:
preprocessor_cfg = cfg.preprocessor
preprocessor_cfg.update({
'model_dir':
model.model_dir,
self.model.model_dir,
'first_sequence':
first_sequence,
'second_sequence':
@@ -67,9 +66,9 @@ class TextGenerationPipeline(Pipeline):
'sequence_length':
kwargs.pop('sequence_length', 128)
})
preprocessor = build_preprocessor(preprocessor_cfg, Fields.nlp)
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.preprocessor = build_preprocessor(preprocessor_cfg,
Fields.nlp)
self.model.eval()
def _sanitize_parameters(self, **pipeline_parameters):
return {}, pipeline_parameters, {}

View File

@@ -32,14 +32,12 @@ class TextRankingPipeline(Pipeline):
the model if supplied.
sequence_length: Max sequence length in the user's custom scenario. 128 will be used as a default value.
"""
model = Model.from_pretrained(model) if isinstance(model,
str) else model
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
preprocessor = Preprocessor.from_pretrained(
model.model_dir,
self.preprocessor = Preprocessor.from_pretrained(
self.model.model_dir,
sequence_length=kwargs.pop('sequence_length', 128))
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:

View File

@@ -39,15 +39,14 @@ class TokenClassificationPipeline(Pipeline):
model (str or Model): A model instance or a model local dir or a model id in the model hub.
preprocessor (Preprocessor): a preprocessor instance, must not be None.
"""
model = Model.from_pretrained(model) if isinstance(model,
str) else model
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
preprocessor = Preprocessor.from_pretrained(
model.model_dir,
self.preprocessor = Preprocessor.from_pretrained(
self.model.model_dir,
sequence_length=kwargs.pop('sequence_length', 128))
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
self.id2label = kwargs.get('id2label')
if self.id2label is None and hasattr(self.preprocessor, 'id2label'):
self.id2label = self.preprocessor.id2label

View File

@@ -27,10 +27,10 @@ class TranslationQualityEstimationPipeline(Pipeline):
def __init__(self, model: str, device: str = 'gpu', **kwargs):
super().__init__(model=model, device=device)
model_file = os.path.join(model, ModelFile.TORCH_MODEL_FILE)
model_file = os.path.join(self.model, ModelFile.TORCH_MODEL_FILE)
with open(model_file, 'rb') as f:
buffer = io.BytesIO(f.read())
self.tokenizer = XLMRobertaTokenizer.from_pretrained(model)
self.tokenizer = XLMRobertaTokenizer.from_pretrained(self.model)
self.model = torch.jit.load(
buffer, map_location=self.device).to(self.device)

View File

@@ -49,14 +49,13 @@ class WordSegmentationPipeline(TokenClassificationPipeline):
To view other examples plese check the tests/pipelines/test_word_segmentation.py.
"""
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
if preprocessor is None:
preprocessor = TokenClassificationPreprocessor(
model.model_dir,
sequence_length=kwargs.pop('sequence_length', 128))
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
self.preprocessor = TokenClassificationPreprocessor(
self.model.model_dir,
sequence_length=kwargs.pop('sequence_length', 128))
self.model.eval()
self.id2label = kwargs.get('id2label')
if self.id2label is None and hasattr(self.preprocessor, 'id2label'):
self.id2label = self.preprocessor.id2label

View File

@@ -59,16 +59,14 @@ class ZeroShotClassificationPipeline(Pipeline):
"""
assert isinstance(model, str) or isinstance(model, Model), \
'model must be a single str or Model'
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.entailment_id = 0
self.contradiction_id = 2
if preprocessor is None:
preprocessor = ZeroShotClassificationPreprocessor(
model.model_dir,
self.preprocessor = ZeroShotClassificationPreprocessor(
self.model.model_dir,
sequence_length=kwargs.pop('sequence_length', 512))
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
def _sanitize_parameters(self, **kwargs):
preprocess_params = {}

View File

@@ -105,22 +105,16 @@ class ProteinStructurePipeline(Pipeline):
>>> print(pipeline_ins(protein))
"""
import copy
model_path = copy.deepcopy(model) if isinstance(model, str) else None
cfg = read_config(model_path) # only model is str
self.cfg = cfg
self.config = model_config(
cfg['pipeline']['model_name']) # alphafold config
model = model if isinstance(
model, Model) else Model.from_pretrained(model_path)
self.postprocessor = cfg.pop('postprocessor', None)
if preprocessor is None:
preprocessor_cfg = cfg.preprocessor
preprocessor = build_preprocessor(preprocessor_cfg, Fields.science)
model.eval()
model.model.inference_mode()
model.model_dir = model_path
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.cfg = read_config(self.model.model_dir)
self.config = model_config(
self.cfg['pipeline']['model_name']) # alphafold config
self.postprocessor = self.cfg.pop('postprocessor', None)
if preprocessor is None:
preprocessor_cfg = self.cfg.preprocessor
self.preprocessor = build_preprocessor(preprocessor_cfg,
Fields.science)
self.model.eval()
def _sanitize_parameters(self, **pipeline_parameters):
return pipeline_parameters, pipeline_parameters, pipeline_parameters

View File

@@ -6,7 +6,8 @@ from typing import Any, Dict, Optional, Sequence
from modelscope.metainfo import Models, Preprocessors
from modelscope.utils.config import Config, ConfigDict
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModeKeys, Tasks
from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, Invoke,
ModeKeys, Tasks)
from modelscope.utils.hub import read_config, snapshot_download
from modelscope.utils.logger import get_logger
from .builder import build_preprocessor
@@ -194,7 +195,9 @@ class Preprocessor(ABC):
"""
if not os.path.exists(model_name_or_path):
model_dir = snapshot_download(
model_name_or_path, revision=revision)
model_name_or_path,
revision=revision,
user_agent={Invoke.KEY: Invoke.PREPROCESSOR})
else:
model_dir = model_name_or_path
if cfg_dict is None:

View File

@@ -14,7 +14,8 @@ from modelscope.metainfo import Preprocessors
from modelscope.pipelines.base import Input
from modelscope.preprocessors import load_image
from modelscope.utils.config import Config
from modelscope.utils.constant import Fields, ModeKeys, ModelFile, Tasks
from modelscope.utils.constant import (Fields, Invoke, ModeKeys, ModelFile,
Tasks)
from .base import Preprocessor
from .builder import PREPROCESSORS
from .ofa import * # noqa
@@ -57,7 +58,7 @@ class OfaPreprocessor(Preprocessor):
Tasks.auto_speech_recognition: OfaASRPreprocessor
}
model_dir = model_dir if osp.exists(model_dir) else snapshot_download(
model_dir)
model_dir, user_agent={Invoke.KEY: Invoke.PREPROCESSOR})
self.cfg = Config.from_file(
osp.join(model_dir, ModelFile.CONFIGURATION))
self.preprocess = preprocess_mapping[self.cfg.task](
@@ -131,7 +132,7 @@ class CLIPPreprocessor(Preprocessor):
"""
super().__init__(*args, **kwargs)
model_dir = model_dir if osp.exists(model_dir) else snapshot_download(
model_dir)
model_dir, user_agent={Invoke.KEY: Invoke.PREPROCESSOR})
self.mode = mode
# text tokenizer
from modelscope.models.multi_modal.clip.bert_tokenizer import FullTokenizer

View File

@@ -8,7 +8,6 @@ import torch
from torch import nn as nn
from torch import optim as optim
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Trainers
from modelscope.models import Model, TorchModel
from modelscope.msdatasets.task_datasets.audio import KWSDataLoader, KWSDataset
@@ -54,12 +53,8 @@ class KWSFarfieldTrainer(BaseTrainer):
**kwargs):
if isinstance(model, str):
if os.path.exists(model):
self.model_dir = model if os.path.isdir(
model) else os.path.dirname(model)
else:
self.model_dir = snapshot_download(
model, revision=model_revision)
self.model_dir = self.get_or_download_model_dir(
model, model_revision)
if cfg_file is None:
cfg_file = os.path.join(self.model_dir,
ModelFile.CONFIGURATION)

View File

@@ -1,11 +1,14 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import time
from abc import ABC, abstractmethod
from typing import Callable, Dict, List, Optional, Tuple, Union
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.trainers.builder import TRAINERS
from modelscope.utils.config import Config
from modelscope.utils.constant import Invoke
from .utils.log_buffer import LogBuffer
@@ -32,6 +35,17 @@ class BaseTrainer(ABC):
self.log_buffer = LogBuffer()
self.timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
def get_or_download_model_dir(self, model, model_revision=None):
if os.path.exists(model):
model_cache_dir = model if os.path.isdir(
model) else os.path.dirname(model)
else:
model_cache_dir = snapshot_download(
model,
revision=model_revision,
user_agent={Invoke.KEY: Invoke.TRAINER})
return model_cache_dir
@abstractmethod
def train(self, *args, **kwargs):
""" Train (and evaluate) process

View File

@@ -20,7 +20,7 @@ from modelscope.trainers.builder import TRAINERS
from modelscope.trainers.optimizer.builder import build_optimizer
from modelscope.utils.config import Config
from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, ConfigKeys,
ModeKeys)
Invoke, ModeKeys)
from .clip_trainer_utils import get_loss, get_optimizer_params, get_schedule
@@ -52,7 +52,8 @@ class CLIPTrainer(EpochBasedTrainer):
model_revision: Optional[str] = DEFAULT_MODEL_REVISION,
seed: int = 42,
**kwargs):
model = Model.from_pretrained(model, revision=model_revision)
model = Model.from_pretrained(
model, revision=model_revision, invoked_by=Invoke.TRAINER)
# for training & eval, we convert the model from FP16 back to FP32
# to compatible with modelscope amp training
convert_models_to_fp32(model)

View File

@@ -23,7 +23,7 @@ from modelscope.trainers.optimizer.builder import build_optimizer
from modelscope.trainers.parallel.utils import is_parallel
from modelscope.utils.config import Config
from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, ConfigKeys,
ModeKeys)
Invoke, ModeKeys)
from .ofa_trainer_utils import (AdjustLabelSmoothedCrossEntropyCriterion,
get_schedule)
@@ -49,7 +49,8 @@ class OFATrainer(EpochBasedTrainer):
model_revision: Optional[str] = DEFAULT_MODEL_REVISION,
seed: int = 42,
**kwargs):
model = Model.from_pretrained(model, revision=model_revision)
model = Model.from_pretrained(
model, revision=model_revision, invoked_by=Invoke.TRAINER)
model_dir = model.model_dir
self.cfg_modify_fn = cfg_modify_fn
cfg = self.rebuild_config(Config.from_file(cfg_file))

View File

@@ -7,21 +7,17 @@ from typing import Callable, Dict, Optional
import numpy as np
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from sklearn.metrics import confusion_matrix
from torch.optim import AdamW
from torch.utils.data import DataLoader, Dataset
from modelscope.metainfo import Trainers
from modelscope.models.base import Model
from modelscope.msdatasets import MsDataset
from modelscope.trainers.base import BaseTrainer
from modelscope.trainers.builder import TRAINERS
from modelscope.trainers.multi_modal.team.team_trainer_utils import (
get_optimizer, train_mapping, val_mapping)
from modelscope.trainers.multi_modal.team.team_trainer_utils import \
get_optimizer
from modelscope.utils.config import Config
from modelscope.utils.constant import DownloadMode, ModeKeys
from modelscope.utils.constant import Invoke
from modelscope.utils.logger import get_logger
logger = get_logger()
@@ -36,7 +32,7 @@ class TEAMImgClsTrainer(BaseTrainer):
super().__init__(cfg_file)
self.cfg = Config.from_file(cfg_file)
team_model = Model.from_pretrained(model)
team_model = Model.from_pretrained(model, invoked_by=Invoke.TRAINER)
image_model = team_model.model.image_model.vision_transformer
classification_model = nn.Sequential(
OrderedDict([('encoder', image_model),

View File

@@ -24,8 +24,7 @@ logger = get_logger()
class CsanmtTranslationTrainer(BaseTrainer):
def __init__(self, model: str, cfg_file: str = None, *args, **kwargs):
if not osp.exists(model):
model = snapshot_download(model)
model = self.get_or_download_model_dir(model)
tf.reset_default_graph()
self.model_dir = model

View File

@@ -10,7 +10,6 @@ import torch
from torch import nn
from torch.utils.data import Dataset
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Trainers
from modelscope.metrics.builder import build_metric
from modelscope.models.base import Model, TorchModel
@@ -478,11 +477,7 @@ class NlpEpochBasedTrainer(EpochBasedTrainer):
"""
if isinstance(model, str):
if os.path.exists(model):
model_dir = model if os.path.isdir(model) else os.path.dirname(
model)
else:
model_dir = snapshot_download(model, revision=model_revision)
model_dir = self.get_or_download_model_dir(model, model_revision)
if cfg_file is None:
cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION)
else:

View File

@@ -14,7 +14,6 @@ from torch.utils.data import DataLoader, Dataset
from torch.utils.data.dataloader import default_collate
from torch.utils.data.distributed import DistributedSampler
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Trainers
from modelscope.metrics import build_metric, task_default_metrics
from modelscope.models.base import Model, TorchModel
@@ -98,12 +97,8 @@ class EpochBasedTrainer(BaseTrainer):
self._seed = seed
set_random_seed(self._seed)
if isinstance(model, str):
if os.path.exists(model):
self.model_dir = model if os.path.isdir(
model) else os.path.dirname(model)
else:
self.model_dir = snapshot_download(
model, revision=model_revision)
self.model_dir = self.get_or_download_model_dir(
model, model_revision)
if cfg_file is None:
cfg_file = os.path.join(self.model_dir,
ModelFile.CONFIGURATION)

View File

@@ -291,6 +291,14 @@ class ModelFile(object):
TS_MODEL_FILE = 'model.ts'
class Invoke(object):
KEY = 'invoked_by'
PRETRAINED = 'from_pretrained'
PIPELINE = 'pipeline'
TRAINER = 'trainer'
PREPROCESSOR = 'preprocessor'
class ConfigFields(object):
""" First level keyword in configuration file
"""