From 76c6ff6329da1625c602abe670b8d6bd9b52e279 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Tue, 21 Jun 2022 20:04:25 +0800
Subject: [PATCH 1/3] [to #42675838]merge model hub code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

合并model hub 代码
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9096493
---
 modelscope/hub/__init__.py                    |   0
 modelscope/hub/api.py                         | 265 ++++++++++++++++
 modelscope/hub/constants.py                   |   8 +
 modelscope/hub/errors.py                      |  30 ++
 modelscope/hub/file_download.py               | 254 +++++++++++++++
 modelscope/hub/git.py                         |  82 +++++
 modelscope/hub/repository.py                  | 173 +++++++++++
 modelscope/hub/snapshot_download.py           | 125 ++++++++
 modelscope/hub/utils/__init__.py              |   0
 modelscope/hub/utils/_subprocess.py           |  40 +++
 modelscope/hub/utils/caching.py               | 294 ++++++++++++++++++
 modelscope/hub/utils/utils.py                 |  39 +++
 modelscope/models/base.py                     |   8 +-
 modelscope/pipelines/base.py                  |   8 +-
 modelscope/pipelines/util.py                  |   3 +-
 modelscope/preprocessors/multi_model.py       |   7 +-
 modelscope/utils/hub.py                       |  11 +-
 requirements/runtime.txt                      |   5 +-
 tests/hub/__init__.py                         |   0
 tests/hub/test_hub_operation.py               | 157 ++++++++++
 tests/pipelines/test_image_matting.py         |   6 -
 tests/pipelines/test_ocr_detection.py         |   2 +-
 tests/pipelines/test_sentence_similarity.py   |  11 +-
 tests/pipelines/test_speech_signal_process.py |   6 -
 tests/pipelines/test_text_classification.py   |   6 -
 tests/pipelines/test_text_generation.py       |   3 +-
 tests/pipelines/test_word_segmentation.py     |  11 +-
 tests/run.py                                  |   2 +-
 tests/utils/test_hub_operation.py             |  50 ---
 29 files changed, 1487 insertions(+), 119 deletions(-)
 create mode 100644 modelscope/hub/__init__.py
 create mode 100644 modelscope/hub/api.py
 create mode 100644 modelscope/hub/constants.py
 create mode 100644 modelscope/hub/errors.py
 create mode 100644 modelscope/hub/file_download.py
 create mode 100644 modelscope/hub/git.py
 create mode 100644 modelscope/hub/repository.py
 create mode 100644 modelscope/hub/snapshot_download.py
 create mode 100644 modelscope/hub/utils/__init__.py
 create mode 100644 modelscope/hub/utils/_subprocess.py
 create mode 100644 modelscope/hub/utils/caching.py
 create mode 100644 modelscope/hub/utils/utils.py
 create mode 100644 tests/hub/__init__.py
 create mode 100644 tests/hub/test_hub_operation.py
 delete mode 100644 tests/utils/test_hub_operation.py

diff --git a/modelscope/hub/__init__.py b/modelscope/hub/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py
new file mode 100644
index 00000000..104eafbd
--- /dev/null
+++ b/modelscope/hub/api.py
@@ -0,0 +1,265 @@
+import imp
+import os
+import pickle
+import subprocess
+from http.cookiejar import CookieJar
+from os.path import expanduser
+from typing import List, Optional, Tuple, Union
+
+import requests
+
+from modelscope.utils.logger import get_logger
+from .constants import LOGGER_NAME
+from .errors import NotExistError, is_ok, raise_on_error
+from .utils.utils import get_endpoint, model_id_to_group_owner_name
+
+logger = get_logger()
+
+
+class HubApi:
+
+    def __init__(self, endpoint=None):
+        self.endpoint = endpoint if endpoint is not None else get_endpoint()
+
+    def login(
+        self,
+        user_name: str,
+        password: str,
+    ) -> tuple():
+        """
+        Login with username and password
+
+        Args:
+            username(`str`): user name on modelscope
+            password(`str`): password
+
+        Returns:
+            cookies: to authenticate yourself to ModelScope open-api
+            gitlab token: to access private repos
+
+        <Tip>
+            You only have to login once within 30 days.
+        </Tip>
+
+        TODO: handle cookies expire
+
+        """
+        path = f'{self.endpoint}/api/v1/login'
+        r = requests.post(
+            path, json={
+                'username': user_name,
+                'password': password
+            })
+        r.raise_for_status()
+        d = r.json()
+        raise_on_error(d)
+
+        token = d['Data']['AccessToken']
+        cookies = r.cookies
+
+        # save token and cookie
+        ModelScopeConfig.save_token(token)
+        ModelScopeConfig.save_cookies(cookies)
+        ModelScopeConfig.write_to_git_credential(user_name, password)
+
+        return d['Data']['AccessToken'], cookies
+
+    def create_model(self, model_id: str, chinese_name: str, visibility: int,
+                     license: str) -> str:
+        """
+        Create model repo at ModelScopeHub
+
+        Args:
+            model_id:(`str`): The model id
+            chinese_name(`str`): chinese name of the model
+            visibility(`int`): visibility of the model(1-private, 3-internal, 5-public)
+            license(`str`): license of the model, candidates can be found at: TBA
+
+        Returns:
+            name of the model created
+
+        <Tip>
+            model_id = {owner}/{name}
+        </Tip>
+        """
+        cookies = ModelScopeConfig.get_cookies()
+        if cookies is None:
+            raise ValueError('Token does not exist, please login first.')
+
+        path = f'{self.endpoint}/api/v1/models'
+        owner_or_group, name = model_id_to_group_owner_name(model_id)
+        r = requests.post(
+            path,
+            json={
+                'Path': owner_or_group,
+                'Name': name,
+                'ChineseName': chinese_name,
+                'Visibility': visibility,
+                'License': license
+            },
+            cookies=cookies)
+        r.raise_for_status()
+        raise_on_error(r.json())
+        d = r.json()
+        return d['Data']['Name']
+
+    def delete_model(self, model_id):
+        """_summary_
+
+        Args:
+            model_id (str): The model id.
+        <Tip>
+            model_id = {owner}/{name}
+        </Tip>
+        """
+        cookies = ModelScopeConfig.get_cookies()
+        path = f'{self.endpoint}/api/v1/models/{model_id}'
+
+        r = requests.delete(path, cookies=cookies)
+        r.raise_for_status()
+        raise_on_error(r.json())
+
+    def get_model_url(self, model_id):
+        return f'{self.endpoint}/api/v1/models/{model_id}.git'
+
+    def get_model(
+        self,
+        model_id: str,
+        revision: str = 'master',
+    ) -> str:
+        """
+        Get model information at modelscope_hub
+
+        Args:
+            model_id(`str`): The model id.
+            revision(`str`): revision of model
+        Returns:
+            The model details information.
+        Raises:
+            NotExistError: If the model is not exist, will throw NotExistError
+        <Tip>
+            model_id = {owner}/{name}
+        </Tip>
+        """
+        cookies = ModelScopeConfig.get_cookies()
+        owner_or_group, name = model_id_to_group_owner_name(model_id)
+        path = f'{self.endpoint}/api/v1/models/{owner_or_group}/{name}?{revision}'
+
+        r = requests.get(path, cookies=cookies)
+        if r.status_code == 200:
+            if is_ok(r.json()):
+                return r.json()['Data']
+            else:
+                raise NotExistError(r.json()['Message'])
+        else:
+            r.raise_for_status()
+
+    def get_model_branches_and_tags(
+        self,
+        model_id: str,
+    ) -> Tuple[List[str], List[str]]:
+        cookies = ModelScopeConfig.get_cookies()
+
+        path = f'{self.endpoint}/api/v1/models/{model_id}/revisions'
+        r = requests.get(path, cookies=cookies)
+        r.raise_for_status()
+        d = r.json()
+        raise_on_error(d)
+        info = d['Data']
+        branches = [x['Revision'] for x in info['RevisionMap']['Branches']
+                    ] if info['RevisionMap']['Branches'] else []
+        tags = [x['Revision'] for x in info['RevisionMap']['Tags']
+                ] if info['RevisionMap']['Tags'] else []
+        return branches, tags
+
+    def get_model_files(
+            self,
+            model_id: str,
+            revision: Optional[str] = 'master',
+            root: Optional[str] = None,
+            recursive: Optional[str] = False,
+            use_cookies: Union[bool, CookieJar] = False) -> List[dict]:
+
+        cookies = None
+        if isinstance(use_cookies, CookieJar):
+            cookies = use_cookies
+        elif use_cookies:
+            cookies = ModelScopeConfig.get_cookies()
+            if cookies is None:
+                raise ValueError('Token does not exist, please login first.')
+
+        path = f'{self.endpoint}/api/v1/models/{model_id}/repo/files?Revision={revision}&Recursive={recursive}'
+        if root is not None:
+            path = path + f'&Root={root}'
+
+        r = requests.get(path, cookies=cookies)
+
+        r.raise_for_status()
+        d = r.json()
+        raise_on_error(d)
+
+        files = []
+        for file in d['Data']['Files']:
+            if file['Name'] == '.gitignore' or file['Name'] == '.gitattributes':
+                continue
+
+            files.append(file)
+        return files
+
+
+class ModelScopeConfig:
+    path_credential = expanduser('~/.modelscope/credentials')
+    os.makedirs(path_credential, exist_ok=True)
+
+    @classmethod
+    def save_cookies(cls, cookies: CookieJar):
+        with open(os.path.join(cls.path_credential, 'cookies'), 'wb+') as f:
+            pickle.dump(cookies, f)
+
+    @classmethod
+    def get_cookies(cls):
+        try:
+            with open(os.path.join(cls.path_credential, 'cookies'), 'rb') as f:
+                return pickle.load(f)
+        except FileNotFoundError:
+            logger.warn("Auth token does not exist, you'll get authentication \
+                error when downloading private model files. Please login first"
+                        )
+
+    @classmethod
+    def save_token(cls, token: str):
+        with open(os.path.join(cls.path_credential, 'token'), 'w+') as f:
+            f.write(token)
+
+    @classmethod
+    def get_token(cls) -> Optional[str]:
+        """
+        Get token or None if not existent.
+
+        Returns:
+            `str` or `None`: The token, `None` if it doesn't exist.
+
+        """
+        token = None
+        try:
+            with open(os.path.join(cls.path_credential, 'token'), 'r') as f:
+                token = f.read()
+        except FileNotFoundError:
+            pass
+        return token
+
+    @staticmethod
+    def write_to_git_credential(username: str, password: str):
+        with subprocess.Popen(
+                'git credential-store store'.split(),
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+        ) as process:
+            input_username = f'username={username.lower()}'
+            input_password = f'password={password}'
+
+            process.stdin.write(
+                f'url={get_endpoint()}\n{input_username}\n{input_password}\n\n'
+                .encode('utf-8'))
+            process.stdin.flush()
diff --git a/modelscope/hub/constants.py b/modelscope/hub/constants.py
new file mode 100644
index 00000000..a38f9afb
--- /dev/null
+++ b/modelscope/hub/constants.py
@@ -0,0 +1,8 @@
+MODELSCOPE_URL_SCHEME = 'http://'
+DEFAULT_MODELSCOPE_DOMAIN = '101.201.119.157:32330'
+DEFAULT_MODELSCOPE_GITLAB_DOMAIN = '101.201.119.157:31102'
+
+DEFAULT_MODELSCOPE_GROUP = 'damo'
+MODEL_ID_SEPARATOR = '/'
+
+LOGGER_NAME = 'ModelScopeHub'
diff --git a/modelscope/hub/errors.py b/modelscope/hub/errors.py
new file mode 100644
index 00000000..13ea709f
--- /dev/null
+++ b/modelscope/hub/errors.py
@@ -0,0 +1,30 @@
+class NotExistError(Exception):
+    pass
+
+
+class RequestError(Exception):
+    pass
+
+
+def is_ok(rsp):
+    """ Check the request is ok
+
+    Args:
+        rsp (_type_): The request response body
+        Failed: {'Code': 10010101004, 'Message': 'get model info failed, err: unauthorized permission',
+                 'RequestId': '', 'Success': False}
+        Success: {'Code': 200, 'Data': {}, 'Message': 'success', 'RequestId': '', 'Success': True}
+    """
+    return rsp['Code'] == 200 and rsp['Success']
+
+
+def raise_on_error(rsp):
+    """If response error, raise exception
+
+    Args:
+        rsp (_type_): The server response
+    """
+    if rsp['Code'] == 200 and rsp['Success']:
+        return True
+    else:
+        raise RequestError(rsp['Message'])
diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py
new file mode 100644
index 00000000..e5c64f1c
--- /dev/null
+++ b/modelscope/hub/file_download.py
@@ -0,0 +1,254 @@
+import copy
+import fnmatch
+import logging
+import os
+import sys
+import tempfile
+import time
+from functools import partial
+from hashlib import sha256
+from pathlib import Path
+from typing import BinaryIO, Dict, Optional, Union
+from uuid import uuid4
+
+import json
+import requests
+from filelock import FileLock
+from requests.exceptions import HTTPError
+from tqdm import tqdm
+
+from modelscope import __version__
+from modelscope.utils.logger import get_logger
+from .api import HubApi, ModelScopeConfig
+from .constants import (DEFAULT_MODELSCOPE_GROUP, LOGGER_NAME,
+                        MODEL_ID_SEPARATOR)
+from .errors import NotExistError, RequestError, raise_on_error
+from .utils.caching import ModelFileSystemCache
+from .utils.utils import (get_cache_dir, get_endpoint,
+                          model_id_to_group_owner_name)
+
+SESSION_ID = uuid4().hex
+logger = get_logger()
+
+
+def model_file_download(
+    model_id: str,
+    file_path: str,
+    revision: Optional[str] = 'master',
+    cache_dir: Optional[str] = None,
+    user_agent: Union[Dict, str, None] = None,
+    local_files_only: Optional[bool] = False,
+) -> Optional[str]:  # pragma: no cover
+    """
+    Download from a given URL and cache it if it's not already present in the
+    local cache.
+
+    Given a URL, this function looks for the corresponding file in the local
+    cache. If it's not there, download it. Then return the path to the cached
+    file.
+
+    Args:
+        model_id (`str`):
+            The model to whom the file to be downloaded belongs.
+        file_path(`str`):
+            Path of the file to be downloaded, relative to the root of model repo
+        revision(`str`, *optional*):
+            revision of the model file to be downloaded.
+            Can be any of a branch, tag or commit hash, default to `master`
+        cache_dir (`str`, `Path`, *optional*):
+            Path to the folder where cached files are stored.
+        user_agent (`dict`, `str`, *optional*):
+            The user-agent info in the form of a dictionary or a string.
+        local_files_only (`bool`, *optional*, defaults to `False`):
+            If `True`, avoid downloading the file and return the path to the
+            local cached file if it exists.
+            if `False`, download the file anyway even it exists
+
+    Returns:
+        Local path (string) of file or if networking is off, last version of
+        file cached on disk.
+
+    <Tip>
+
+    Raises the following errors:
+
+        - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
+          if `use_auth_token=True` and the token cannot be found.
+        - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError)
+          if ETag cannot be determined.
+        - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+          if some parameter value is invalid
+
+    </Tip>
+    """
+    if cache_dir is None:
+        cache_dir = get_cache_dir()
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+
+    group_or_owner, name = model_id_to_group_owner_name(model_id)
+
+    cache = ModelFileSystemCache(cache_dir, group_or_owner, name)
+
+    # if local_files_only is `True` and the file already exists in cached_path
+    # return the cached path
+    if local_files_only:
+        cached_file_path = cache.get_file_by_path(file_path)
+        if cached_file_path is not None:
+            logger.warning(
+                "File exists in local cache, but we're not sure it's up to date"
+            )
+            return cached_file_path
+        else:
+            raise ValueError(
+                'Cannot find the requested files in the cached path and outgoing'
+                ' traffic has been disabled. To enable model look-ups and downloads'
+                " online, set 'local_files_only' to False.")
+
+    _api = HubApi()
+    headers = {'user-agent': http_user_agent(user_agent=user_agent, )}
+    branches, tags = _api.get_model_branches_and_tags(model_id)
+    file_to_download_info = None
+    is_commit_id = False
+    if revision in branches or revision in tags:  # The revision is version or tag,
+        # we need to confirm the version is up to date
+        # we need to get the file list to check if the lateast version is cached, if so return, otherwise download
+        model_files = _api.get_model_files(
+            model_id=model_id,
+            revision=revision,
+            recursive=True,
+        )
+
+        for model_file in model_files:
+            if model_file['Type'] == 'tree':
+                continue
+
+            if model_file['Path'] == file_path:
+                model_file['Branch'] = revision
+                if cache.exists(model_file):
+                    return cache.get_file_by_info(model_file)
+                else:
+                    file_to_download_info = model_file
+
+        if file_to_download_info is None:
+            raise NotExistError('The file path: %s not exist in: %s' %
+                                (file_path, model_id))
+    else:  # the revision is commit id.
+        cached_file_path = cache.get_file_by_path_and_commit_id(
+            file_path, revision)
+        if cached_file_path is not None:
+            logger.info('The specified file is in cache, skip downloading!')
+            return cached_file_path  # the file is in cache.
+        is_commit_id = True
+    # we need to download again
+    # TODO: skip using JWT for authorization, use cookie instead
+    cookies = ModelScopeConfig.get_cookies()
+    url_to_download = get_file_download_url(model_id, file_path, revision)
+    file_to_download_info = {
+        'Path': file_path,
+        'Revision':
+        revision if is_commit_id else file_to_download_info['Revision']
+    }
+    # Prevent parallel downloads of the same file with a lock.
+    lock_path = cache.get_root_location() + '.lock'
+
+    with FileLock(lock_path):
+        temp_file_name = next(tempfile._get_candidate_names())
+        http_get_file(
+            url_to_download,
+            cache_dir,
+            temp_file_name,
+            headers=headers,
+            cookies=None if cookies is None else cookies.get_dict())
+        return cache.put_file(file_to_download_info,
+                              os.path.join(cache_dir, temp_file_name))
+
+
+def http_user_agent(user_agent: Union[Dict, str, None] = None, ) -> str:
+    """Formats a user-agent string with basic info about a request.
+
+    Args:
+        user_agent (`str`, `dict`, *optional*):
+            The user agent info in the form of a dictionary or a single string.
+
+    Returns:
+        The formatted user-agent string.
+    """
+    ua = f'modelscope/{__version__}; python/{sys.version.split()[0]}; session_id/{SESSION_ID}'
+
+    if isinstance(user_agent, dict):
+        ua = '; '.join(f'{k}/{v}' for k, v in user_agent.items())
+    elif isinstance(user_agent, str):
+        ua = user_agent
+    return ua
+
+
+def get_file_download_url(model_id: str, file_path: str, revision: str):
+    """
+    Format file download url according to `model_id`, `revision` and `file_path`.
+    e.g., Given `model_id=john/bert`, `revision=master`, `file_path=README.md`,
+    the resulted download url is: https://maas.co/api/v1/models/john/bert/repo?Revision=master&FilePath=README.md
+    """
+    download_url_template = '{endpoint}/api/v1/models/{model_id}/repo?Revision={revision}&FilePath={file_path}'
+    return download_url_template.format(
+        endpoint=get_endpoint(),
+        model_id=model_id,
+        revision=revision,
+        file_path=file_path,
+    )
+
+
+def http_get_file(
+    url: str,
+    local_dir: str,
+    file_name: str,
+    cookies: Dict[str, str],
+    headers: Optional[Dict[str, str]] = None,
+):
+    """
+    Download remote file. Do not gobble up errors.
+    This method is only used by snapshot_download, since the behavior is quite different with single file download
+    TODO: consolidate with http_get_file() to avoild duplicate code
+
+    Args:
+        url(`str`):
+            actual download url of the file
+        local_dir(`str`):
+            local directory where the downloaded file stores
+        file_name(`str`):
+            name of the file stored in `local_dir`
+        cookies(`Dict[str, str]`):
+            cookies used to authentication the user, which is used for downloading private repos
+        headers(`Optional[Dict[str, str]] = None`):
+            http headers to carry necessary info when requesting the remote file
+
+    """
+    temp_file_manager = partial(
+        tempfile.NamedTemporaryFile, mode='wb', dir=local_dir, delete=False)
+
+    with temp_file_manager() as temp_file:
+        logger.info('downloading %s to %s', url, temp_file.name)
+        headers = copy.deepcopy(headers)
+
+        r = requests.get(url, stream=True, headers=headers, cookies=cookies)
+        r.raise_for_status()
+
+        content_length = r.headers.get('Content-Length')
+        total = int(content_length) if content_length is not None else None
+
+        progress = tqdm(
+            unit='B',
+            unit_scale=True,
+            unit_divisor=1024,
+            total=total,
+            initial=0,
+            desc='Downloading',
+        )
+        for chunk in r.iter_content(chunk_size=1024):
+            if chunk:  # filter out keep-alive new chunks
+                progress.update(len(chunk))
+                temp_file.write(chunk)
+        progress.close()
+
+    logger.info('storing %s in cache at %s', url, local_dir)
+    os.replace(temp_file.name, os.path.join(local_dir, file_name))
diff --git a/modelscope/hub/git.py b/modelscope/hub/git.py
new file mode 100644
index 00000000..5f079105
--- /dev/null
+++ b/modelscope/hub/git.py
@@ -0,0 +1,82 @@
+from threading import local
+from tkinter.messagebox import NO
+from typing import Union
+
+from modelscope.utils.logger import get_logger
+from .constants import LOGGER_NAME
+from .utils._subprocess import run_subprocess
+
+logger = get_logger
+
+
+def git_clone(
+    local_dir: str,
+    repo_url: str,
+):
+    # TODO: use "git clone" or "git lfs clone" according to git version
+    # TODO: print stderr when subprocess fails
+    run_subprocess(
+        f'git clone {repo_url}'.split(),
+        local_dir,
+        True,
+    )
+
+
+def git_checkout(
+    local_dir: str,
+    revsion: str,
+):
+    run_subprocess(f'git checkout {revsion}'.split(), local_dir)
+
+
+def git_add(local_dir: str, ):
+    run_subprocess(
+        'git add .'.split(),
+        local_dir,
+        True,
+    )
+
+
+def git_commit(local_dir: str, commit_message: str):
+    run_subprocess(
+        'git commit -v -m'.split() + [commit_message],
+        local_dir,
+        True,
+    )
+
+
+def git_push(local_dir: str, branch: str):
+    # check current branch
+    cur_branch = git_current_branch(local_dir)
+    if cur_branch != branch:
+        logger.error(
+            "You're trying to push to a different branch, please double check")
+        return
+
+    run_subprocess(
+        f'git push origin {branch}'.split(),
+        local_dir,
+        True,
+    )
+
+
+def git_current_branch(local_dir: str) -> Union[str, None]:
+    """
+    Get current branch name
+
+    Args:
+        local_dir(`str`): local model repo directory
+
+    Returns
+        branch name you're currently on
+    """
+    try:
+        process = run_subprocess(
+            'git rev-parse --abbrev-ref HEAD'.split(),
+            local_dir,
+            True,
+        )
+
+        return str(process.stdout).strip()
+    except Exception as e:
+        raise e
diff --git a/modelscope/hub/repository.py b/modelscope/hub/repository.py
new file mode 100644
index 00000000..6367f903
--- /dev/null
+++ b/modelscope/hub/repository.py
@@ -0,0 +1,173 @@
+import os
+import subprocess
+from pathlib import Path
+from typing import Optional, Union
+
+from modelscope.utils.logger import get_logger
+from .api import ModelScopeConfig
+from .constants import MODELSCOPE_URL_SCHEME
+from .git import git_add, git_checkout, git_clone, git_commit, git_push
+from .utils._subprocess import run_subprocess
+from .utils.utils import get_gitlab_domain
+
+logger = get_logger()
+
+
+class Repository:
+
+    def __init__(
+        self,
+        local_dir: str,
+        clone_from: Optional[str] = None,
+        auth_token: Optional[str] = None,
+        private: Optional[bool] = False,
+        revision: Optional[str] = 'master',
+    ):
+        """
+        Instantiate a Repository object by cloning the remote ModelScopeHub repo
+        Args:
+            local_dir(`str`):
+                local directory to store the model files
+            clone_from(`Optional[str] = None`):
+                model id in ModelScope-hub from which git clone
+                You should ignore this parameter when `local_dir` is already a git repo
+            auth_token(`Optional[str]`):
+                token obtained when calling `HubApi.login()`. Usually you can safely ignore the parameter
+                as the token is already saved when you login the first time
+            private(`Optional[bool]`):
+                whether the model is private, default to False
+            revision(`Optional[str]`):
+                revision of the model you want to clone from. Can be any of a branch, tag or commit hash
+        """
+        logger.info('Instantiating Repository object...')
+
+        # Create local directory if not exist
+        os.makedirs(local_dir, exist_ok=True)
+        self.local_dir = os.path.join(os.getcwd(), local_dir)
+
+        self.private = private
+
+        # Check git and git-lfs installation
+        self.check_git_versions()
+
+        # Retrieve auth token
+        if not private and isinstance(auth_token, str):
+            logger.warning(
+                'cloning a public repo with a token, which will be ignored')
+            self.token = None
+        else:
+            if isinstance(auth_token, str):
+                self.token = auth_token
+            else:
+                self.token = ModelScopeConfig.get_token()
+
+            if self.token is None:
+                raise EnvironmentError(
+                    'Token does not exist, the clone will fail for private repo.'
+                    'Please login first.')
+
+        # git clone
+        if clone_from is not None:
+            self.model_id = clone_from
+            logger.info('cloning model repo to %s ...', self.local_dir)
+            git_clone(self.local_dir, self.get_repo_url())
+        else:
+            if is_git_repo(self.local_dir):
+                logger.debug('[Repository] is a valid git repo')
+            else:
+                raise ValueError(
+                    'If not specifying `clone_from`, you need to pass Repository a'
+                    ' valid git clone.')
+
+        # git checkout
+        if isinstance(revision, str) and revision != 'master':
+            git_checkout(revision)
+
+    def push_to_hub(self,
+                    commit_message: str,
+                    revision: Optional[str] = 'master'):
+        """
+        Push changes changes to hub
+
+        Args:
+            commit_message(`str`):
+                commit message describing the changes, it's mandatory
+            revision(`Optional[str]`):
+                remote branch you want to push to, default to `master`
+
+        <Tip>
+            The function complains when local and remote branch are different, please be careful
+        </Tip>
+
+        """
+        git_add(self.local_dir)
+        git_commit(self.local_dir, commit_message)
+
+        logger.info('Pushing changes to repo...')
+        git_push(self.local_dir, revision)
+
+        # TODO: if git push fails, how to retry?
+
+    def check_git_versions(self):
+        """
+        Checks that `git` and `git-lfs` can be run.
+
+        Raises:
+            `EnvironmentError`: if `git` or `git-lfs` are not installed.
+        """
+        try:
+            git_version = run_subprocess('git --version'.split(),
+                                         self.local_dir).stdout.strip()
+        except FileNotFoundError:
+            raise EnvironmentError(
+                'Looks like you do not have git installed, please install.')
+
+        try:
+            lfs_version = run_subprocess('git-lfs --version'.split(),
+                                         self.local_dir).stdout.strip()
+        except FileNotFoundError:
+            raise EnvironmentError(
+                'Looks like you do not have git-lfs installed, please install.'
+                ' You can install from https://git-lfs.github.com/.'
+                ' Then run `git lfs install` (you only have to do this once).')
+        logger.info(git_version + '\n' + lfs_version)
+
+    def get_repo_url(self) -> str:
+        """
+        Get repo url to clone, according whether the repo is private or not
+        """
+        url = None
+
+        if self.private:
+            url = f'{MODELSCOPE_URL_SCHEME}oauth2:{self.token}@{get_gitlab_domain()}/{self.model_id}'
+        else:
+            url = f'{MODELSCOPE_URL_SCHEME}{get_gitlab_domain()}/{self.model_id}'
+
+        if not url:
+            raise ValueError(
+                'Empty repo url, please check clone_from parameter')
+
+        logger.debug('url to clone: %s', str(url))
+
+        return url
+
+
+def is_git_repo(folder: Union[str, Path]) -> bool:
+    """
+    Check if the folder is the root or part of a git repository
+
+    Args:
+        folder (`str`):
+            The folder in which to run the command.
+
+    Returns:
+        `bool`: `True` if the repository is part of a repository, `False`
+        otherwise.
+    """
+    folder_exists = os.path.exists(os.path.join(folder, '.git'))
+    git_branch = subprocess.run(
+        'git branch'.split(),
+        cwd=folder,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE)
+    return folder_exists and git_branch.returncode == 0
diff --git a/modelscope/hub/snapshot_download.py b/modelscope/hub/snapshot_download.py
new file mode 100644
index 00000000..90d850f4
--- /dev/null
+++ b/modelscope/hub/snapshot_download.py
@@ -0,0 +1,125 @@
+import os
+import tempfile
+from glob import glob
+from pathlib import Path
+from typing import Dict, Optional, Union
+
+from modelscope.utils.logger import get_logger
+from .api import HubApi, ModelScopeConfig
+from .constants import DEFAULT_MODELSCOPE_GROUP, MODEL_ID_SEPARATOR
+from .errors import NotExistError, RequestError, raise_on_error
+from .file_download import (get_file_download_url, http_get_file,
+                            http_user_agent)
+from .utils.caching import ModelFileSystemCache
+from .utils.utils import get_cache_dir, model_id_to_group_owner_name
+
+logger = get_logger()
+
+
+def snapshot_download(model_id: str,
+                      revision: Optional[str] = 'master',
+                      cache_dir: Union[str, Path, None] = None,
+                      user_agent: Optional[Union[Dict, str]] = None,
+                      local_files_only: Optional[bool] = False,
+                      private: Optional[bool] = False) -> str:
+    """Download all files of a repo.
+    Downloads a whole snapshot of a repo's files at the specified revision. This
+    is useful when you want all files from a repo, because you don't know which
+    ones you will need a priori. All files are nested inside a folder in order
+    to keep their actual filename relative to that folder.
+
+    An alternative would be to just clone a repo but this would require that the
+    user always has git and git-lfs installed, and properly configured.
+    Args:
+        model_id (`str`):
+            A user or an organization name and a repo name separated by a `/`.
+        revision (`str`, *optional*):
+            An optional Git revision id which can be a branch name, a tag, or a
+            commit hash. NOTE: currently only branch and tag name is supported
+        cache_dir (`str`, `Path`, *optional*):
+            Path to the folder where cached files are stored.
+        user_agent (`str`, `dict`, *optional*):
+            The user-agent info in the form of a dictionary or a string.
+        local_files_only (`bool`, *optional*, defaults to `False`):
+            If `True`, avoid downloading the file and return the path to the
+            local cached file if it exists.
+    Returns:
+        Local folder path (string) of repo snapshot
+
+    <Tip>
+    Raises the following errors:
+    - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
+      if `use_auth_token=True` and the token cannot be found.
+    - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if
+      ETag cannot be determined.
+    - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+      if some parameter value is invalid
+    </Tip>
+    """
+
+    if cache_dir is None:
+        cache_dir = get_cache_dir()
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+
+    group_or_owner, name = model_id_to_group_owner_name(model_id)
+
+    cache = ModelFileSystemCache(cache_dir, group_or_owner, name)
+    if local_files_only:
+        if len(cache.cached_files) == 0:
+            raise ValueError(
+                'Cannot find the requested files in the cached path and outgoing'
+                ' traffic has been disabled. To enable model look-ups and downloads'
+                " online, set 'local_files_only' to False.")
+        logger.warn('We can not confirm the cached file is for revision: %s'
+                    % revision)
+        return cache.get_root_location(
+        )  # we can not confirm the cached file is for snapshot 'revision'
+    else:
+        # make headers
+        headers = {'user-agent': http_user_agent(user_agent=user_agent, )}
+        _api = HubApi()
+        # get file list from model repo
+        branches, tags = _api.get_model_branches_and_tags(model_id)
+        if revision not in branches and revision not in tags:
+            raise NotExistError('The specified branch or tag : %s not exist!'
+                                % revision)
+
+        model_files = _api.get_model_files(
+            model_id=model_id,
+            revision=revision,
+            recursive=True,
+            use_cookies=private)
+
+        cookies = None
+        if private:
+            cookies = ModelScopeConfig.get_cookies()
+
+        for model_file in model_files:
+            if model_file['Type'] == 'tree':
+                continue
+            # check model_file is exist in cache, if exist, skip download, otherwise download
+            if cache.exists(model_file):
+                logger.info(
+                    'The specified file is in cache, skip downloading!')
+                continue
+
+            # get download url
+            url = get_file_download_url(
+                model_id=model_id,
+                file_path=model_file['Path'],
+                revision=revision)
+
+            # First download to /tmp
+            http_get_file(
+                url=url,
+                local_dir=tempfile.gettempdir(),
+                file_name=model_file['Name'],
+                headers=headers,
+                cookies=None if cookies is None else cookies.get_dict())
+            # put file to cache
+            cache.put_file(
+                model_file,
+                os.path.join(tempfile.gettempdir(), model_file['Name']))
+
+        return os.path.join(cache.get_root_location())
diff --git a/modelscope/hub/utils/__init__.py b/modelscope/hub/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/modelscope/hub/utils/_subprocess.py b/modelscope/hub/utils/_subprocess.py
new file mode 100644
index 00000000..77e9fc48
--- /dev/null
+++ b/modelscope/hub/utils/_subprocess.py
@@ -0,0 +1,40 @@
+import subprocess
+from typing import List
+
+
+def run_subprocess(command: List[str],
+                   folder: str,
+                   check=True,
+                   **kwargs) -> subprocess.CompletedProcess:
+    """
+    Method to run subprocesses. Calling this will capture the `stderr` and `stdout`,
+    please call `subprocess.run` manually in case you would like for them not to
+    be captured.
+
+    Args:
+        command (`List[str]`):
+            The command to execute as a list of strings.
+        folder (`str`):
+            The folder in which to run the command.
+        check (`bool`, *optional*, defaults to `True`):
+            Setting `check` to `True` will raise a `subprocess.CalledProcessError`
+            when the subprocess has a non-zero exit code.
+        kwargs (`Dict[str]`):
+            Keyword arguments to be passed to the `subprocess.run` underlying command.
+
+    Returns:
+        `subprocess.CompletedProcess`: The completed process.
+    """
+    if isinstance(command, str):
+        raise ValueError(
+            '`run_subprocess` should be called with a list of strings.')
+
+    return subprocess.run(
+        command,
+        stderr=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        check=check,
+        encoding='utf-8',
+        cwd=folder,
+        **kwargs,
+    )
diff --git a/modelscope/hub/utils/caching.py b/modelscope/hub/utils/caching.py
new file mode 100644
index 00000000..ac258385
--- /dev/null
+++ b/modelscope/hub/utils/caching.py
@@ -0,0 +1,294 @@
+import hashlib
+import logging
+import os
+import pickle
+import tempfile
+import time
+from shutil import move, rmtree
+
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+class FileSystemCache(object):
+    KEY_FILE_NAME = '.msc'
+    """Local file cache.
+    """
+
+    def __init__(
+        self,
+        cache_root_location: str,
+        **kwargs,
+    ):
+        """
+        Parameters
+        ----------
+        cache_location: str
+            The root location to store files.
+        """
+        os.makedirs(cache_root_location, exist_ok=True)
+        self.cache_root_location = cache_root_location
+        self.load_cache()
+
+    def get_root_location(self):
+        return self.cache_root_location
+
+    def load_cache(self):
+        """Read set of stored blocks from file
+        Args:
+            owner(`str`): individual or group username at modelscope, can be empty for official models
+            name(`str`): name of the model
+        Returns:
+            The model details information.
+        Raises:
+            NotExistError: If the model is not exist, will throw NotExistError
+            TODO: Error based error code.
+        <Tip>
+            model_id = {owner}/{name}
+        </Tip>
+        """
+        self.cached_files = []
+        cache_keys_file_path = os.path.join(self.cache_root_location,
+                                            FileSystemCache.KEY_FILE_NAME)
+        if os.path.exists(cache_keys_file_path):
+            with open(cache_keys_file_path, 'rb') as f:
+                self.cached_files = pickle.load(f)
+
+    def save_cached_files(self):
+        """Save cache metadata."""
+        # save new meta to tmp and move to KEY_FILE_NAME
+        cache_keys_file_path = os.path.join(self.cache_root_location,
+                                            FileSystemCache.KEY_FILE_NAME)
+        # TODO: Sync file write
+        fd, fn = tempfile.mkstemp()
+        with open(fd, 'wb') as f:
+            pickle.dump(self.cached_files, f)
+        move(fn, cache_keys_file_path)
+
+    def get_file(self, key):
+        """Check the key is in the cache, if exist, return the file, otherwise return None.
+        Args:
+            key(`str`): The cache key.
+        Returns:
+            If file exist, return the cached file location, otherwise None.
+        Raises:
+            None
+        <Tip>
+            model_id = {owner}/{name}
+        </Tip>
+        """
+        pass
+
+    def put_file(self, key, location):
+        """Put file to the cache,
+        Args:
+            key(`str`): The cache key
+            location(`str`): Location of the file, we will move the file to cache.
+        Returns:
+            The cached file path of the file.
+        Raises:
+            None
+        <Tip>
+            model_id = {owner}/{name}
+        </Tip>
+        """
+        pass
+
+    def remove_key(self, key):
+        """Remove cache key in index, The file is removed manually
+
+        Args:
+            key (dict): The cache key.
+        """
+        self.cached_files.remove(key)
+        self.save_cached_files()
+
+    def exists(self, key):
+        for cache_file in self.cached_files:
+            if cache_file == key:
+                return True
+
+        return False
+
+    def clear_cache(self):
+        """Remove all files and metadat from the cache
+
+        In the case of multiple cache locations, this clears only the last one,
+        which is assumed to be the read/write one.
+        """
+        rmtree(self.cache_root_location)
+        self.load_cache()
+
+    def hash_name(self, key):
+        return hashlib.sha256(key.encode()).hexdigest()
+
+
+class ModelFileSystemCache(FileSystemCache):
+    """Local cache file layout
+       cache_root/owner/model_name/|individual cached files
+                                   |.mk: file, The cache index file
+       Save only one version for each file.
+    """
+
+    def __init__(self, cache_root, owner, name):
+        """Put file to the cache
+        Args:
+            cache_root(`str`): The modelscope local cache root(default: ~/.modelscope/cache/models/)
+            owner(`str`): The model owner.
+            name('str'): The name of the model
+            branch('str'): The branch of model
+            tag('str'): The tag of model
+        Returns:
+        Raises:
+            None
+        <Tip>
+            model_id = {owner}/{name}
+        </Tip>
+        """
+        super().__init__(os.path.join(cache_root, owner, name))
+
+    def get_file_by_path(self, file_path):
+        """Retrieve the cache if there is file match the path.
+        Args:
+            file_path (str): The file path in the model.
+        Returns:
+            path: the full path of the file.
+        """
+        for cached_file in self.cached_files:
+            if file_path == cached_file['Path']:
+                cached_file_path = os.path.join(self.cache_root_location,
+                                                cached_file['Path'])
+                if os.path.exists(cached_file_path):
+                    return cached_file_path
+                else:
+                    self.remove_key(cached_file)
+
+        return None
+
+    def get_file_by_path_and_commit_id(self, file_path, commit_id):
+        """Retrieve the cache if there is file match the path.
+        Args:
+            file_path (str): The file path in the model.
+            commit_id (str): The commit id of the file
+        Returns:
+            path: the full path of the file.
+        """
+        for cached_file in self.cached_files:
+            if file_path == cached_file['Path'] and \
+               (cached_file['Revision'].startswith(commit_id) or commit_id.startswith(cached_file['Revision'])):
+                cached_file_path = os.path.join(self.cache_root_location,
+                                                cached_file['Path'])
+                if os.path.exists(cached_file_path):
+                    return cached_file_path
+                else:
+                    self.remove_key(cached_file)
+
+        return None
+
+    def get_file_by_info(self, model_file_info):
+        """Check if exist cache file.
+
+        Args:
+            model_file_info (ModelFileInfo): The file information of the file.
+
+        Returns:
+            _type_: _description_
+        """
+        cache_key = self.__get_cache_key(model_file_info)
+        for cached_file in self.cached_files:
+            if cached_file == cache_key:
+                orig_path = os.path.join(self.cache_root_location,
+                                         cached_file['Path'])
+                if os.path.exists(orig_path):
+                    return orig_path
+                else:
+                    self.remove_key(cached_file)
+
+        return None
+
+    def __get_cache_key(self, model_file_info):
+        cache_key = {
+            'Path': model_file_info['Path'],
+            'Revision': model_file_info['Revision'],  # commit id
+        }
+        return cache_key
+
+    def exists(self, model_file_info):
+        """Check the file is cached or not.
+
+        Args:
+            model_file_info (CachedFileInfo): The cached file info
+
+        Returns:
+            bool: If exists return True otherwise False
+        """
+        key = self.__get_cache_key(model_file_info)
+        is_exists = False
+        for cached_key in self.cached_files:
+            if cached_key['Path'] == key['Path'] and (
+                    cached_key['Revision'].startswith(key['Revision'])
+                    or key['Revision'].startswith(cached_key['Revision'])):
+                is_exists = True
+        file_path = os.path.join(self.cache_root_location,
+                                 model_file_info['Path'])
+        if is_exists:
+            if os.path.exists(file_path):
+                return True
+            else:
+                self.remove_key(
+                    model_file_info)  # sameone may manual delete the file
+        return False
+
+    def remove_if_exists(self, model_file_info):
+        """We in cache, remove it.
+
+        Args:
+            model_file_info (ModelFileInfo): The model file information from server.
+        """
+        for cached_file in self.cached_files:
+            if cached_file['Path'] == model_file_info['Path']:
+                self.remove_key(cached_file)
+                file_path = os.path.join(self.cache_root_location,
+                                         cached_file['Path'])
+                if os.path.exists(file_path):
+                    os.remove(file_path)
+
+    def put_file(self, model_file_info, model_file_location):
+        """Put model on model_file_location to cache, the model first download to /tmp, and move to cache.
+
+        Args:
+            model_file_info (str): The file description returned by get_model_files
+                                      sample:
+                                    {
+                                        "CommitMessage": "add model\n",
+                                        "CommittedDate": 1654857567,
+                                        "CommitterName": "mulin.lyh",
+                                        "IsLFS": false,
+                                        "Mode": "100644",
+                                        "Name": "resnet18.pth",
+                                        "Path": "resnet18.pth",
+                                        "Revision": "09b68012b27de0048ba74003690a890af7aff192",
+                                        "Size": 46827520,
+                                        "Type": "blob"
+                                    }
+            model_file_location (str): The location of the temporary file.
+        Raises:
+            NotImplementedError: _description_
+
+        Returns:
+            str: The location of the cached file.
+        """
+        self.remove_if_exists(model_file_info)  # backup old revision
+        cache_key = self.__get_cache_key(model_file_info)
+        cache_full_path = os.path.join(
+            self.cache_root_location,
+            cache_key['Path'])  # Branch and Tag do not have same name.
+        cache_file_dir = os.path.dirname(cache_full_path)
+        if not os.path.exists(cache_file_dir):
+            os.makedirs(cache_file_dir, exist_ok=True)
+        # We can't make operation transaction
+        move(model_file_location, cache_full_path)
+        self.cached_files.append(cache_key)
+        self.save_cached_files()
+        return cache_full_path
diff --git a/modelscope/hub/utils/utils.py b/modelscope/hub/utils/utils.py
new file mode 100644
index 00000000..d0704de8
--- /dev/null
+++ b/modelscope/hub/utils/utils.py
@@ -0,0 +1,39 @@
+import os
+
+from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN,
+                                      DEFAULT_MODELSCOPE_GITLAB_DOMAIN,
+                                      DEFAULT_MODELSCOPE_GROUP,
+                                      MODEL_ID_SEPARATOR,
+                                      MODELSCOPE_URL_SCHEME)
+
+
+def model_id_to_group_owner_name(model_id):
+    if MODEL_ID_SEPARATOR in model_id:
+        group_or_owner = model_id.split(MODEL_ID_SEPARATOR)[0]
+        name = model_id.split(MODEL_ID_SEPARATOR)[1]
+    else:
+        group_or_owner = DEFAULT_MODELSCOPE_GROUP
+        name = model_id
+    return group_or_owner, name
+
+
+def get_cache_dir():
+    """
+    cache dir precedence:
+        function parameter > enviroment > ~/.cache/modelscope/hub
+    """
+    default_cache_dir = os.path.expanduser(
+        os.path.join('~/.cache', 'modelscope'))
+    return os.getenv('MODELSCOPE_CACHE', os.path.join(default_cache_dir,
+                                                      'hub'))
+
+
+def get_endpoint():
+    modelscope_domain = os.getenv('MODELSCOPE_DOMAIN',
+                                  DEFAULT_MODELSCOPE_DOMAIN)
+    return MODELSCOPE_URL_SCHEME + modelscope_domain
+
+
+def get_gitlab_domain():
+    return os.getenv('MODELSCOPE_GITLAB_DOMAIN',
+                     DEFAULT_MODELSCOPE_GITLAB_DOMAIN)
diff --git a/modelscope/models/base.py b/modelscope/models/base.py
index ab0d22cc..99309a7e 100644
--- a/modelscope/models/base.py
+++ b/modelscope/models/base.py
@@ -4,12 +4,10 @@ import os.path as osp
 from abc import ABC, abstractmethod
 from typing import Dict, Union
 
-from maas_hub.snapshot_download import snapshot_download
-
+from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.models.builder import build_model
 from modelscope.utils.config import Config
 from modelscope.utils.constant import ModelFile
-from modelscope.utils.hub import get_model_cache_dir
 
 Tensor = Union['torch.Tensor', 'tf.Tensor']
 
@@ -47,9 +45,7 @@ class Model(ABC):
         if osp.exists(model_name_or_path):
             local_model_dir = model_name_or_path
         else:
-            cache_path = get_model_cache_dir(model_name_or_path)
-            local_model_dir = cache_path if osp.exists(
-                cache_path) else snapshot_download(model_name_or_path)
+            local_model_dir = snapshot_download(model_name_or_path)
             # else:
             #     raise ValueError(
             #         'Remote model repo {model_name_or_path} does not exists')
diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py
index 1da65213..59bd298b 100644
--- a/modelscope/pipelines/base.py
+++ b/modelscope/pipelines/base.py
@@ -4,13 +4,11 @@ import os.path as osp
 from abc import ABC, abstractmethod
 from typing import Any, Dict, Generator, List, Union
 
-from maas_hub.snapshot_download import snapshot_download
-
+from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.models.base import Model
 from modelscope.preprocessors import Preprocessor
 from modelscope.pydatasets import PyDataset
 from modelscope.utils.config import Config
-from modelscope.utils.hub import get_model_cache_dir
 from modelscope.utils.logger import get_logger
 from .outputs import TASK_OUTPUTS
 from .util import is_model_name
@@ -32,9 +30,7 @@ class Pipeline(ABC):
         # TODO @wenmeng.zwm replace model.startswith('damo/') with get_model
         if isinstance(model, str) and model.startswith('damo/'):
             if not osp.exists(model):
-                cache_path = get_model_cache_dir(model)
-                model = cache_path if osp.exists(
-                    cache_path) else snapshot_download(model)
+                model = snapshot_download(model)
             return Model.from_pretrained(model) if is_model_name(
                 model) else model
         elif isinstance(model, Model):
diff --git a/modelscope/pipelines/util.py b/modelscope/pipelines/util.py
index 37c9c929..6fe6e9fd 100644
--- a/modelscope/pipelines/util.py
+++ b/modelscope/pipelines/util.py
@@ -2,8 +2,7 @@
 import os.path as osp
 from typing import List, Union
 
-from maas_hub.file_download import model_file_download
-
+from modelscope.hub.file_download import model_file_download
 from modelscope.utils.config import Config
 from modelscope.utils.constant import ModelFile
 from modelscope.utils.logger import get_logger
diff --git a/modelscope/preprocessors/multi_model.py b/modelscope/preprocessors/multi_model.py
index de211611..ea2e7493 100644
--- a/modelscope/preprocessors/multi_model.py
+++ b/modelscope/preprocessors/multi_model.py
@@ -4,11 +4,10 @@ from typing import Any, Dict, Union
 
 import numpy as np
 import torch
-from maas_hub.snapshot_download import snapshot_download
 from PIL import Image
 
+from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.utils.constant import Fields, ModelFile
-from modelscope.utils.hub import get_model_cache_dir
 from modelscope.utils.type_assert import type_assert
 from .base import Preprocessor
 from .builder import PREPROCESSORS
@@ -34,9 +33,7 @@ class OfaImageCaptionPreprocessor(Preprocessor):
         if osp.exists(model_dir):
             local_model_dir = model_dir
         else:
-            cache_path = get_model_cache_dir(model_dir)
-            local_model_dir = cache_path if osp.exists(
-                cache_path) else snapshot_download(model_dir)
+            local_model_dir = snapshot_download(model_dir)
         local_model = osp.join(local_model_dir, ModelFile.TORCH_MODEL_FILE)
         bpe_dir = local_model_dir
 
diff --git a/modelscope/utils/hub.py b/modelscope/utils/hub.py
index 2f61b148..245642d1 100644
--- a/modelscope/utils/hub.py
+++ b/modelscope/utils/hub.py
@@ -2,13 +2,10 @@
 
 import os
 
-from maas_hub.constants import MODEL_ID_SEPARATOR
+from modelscope.hub.constants import MODEL_ID_SEPARATOR
+from modelscope.hub.utils.utils import get_cache_dir
 
 
 # temp solution before the hub-cache is in place
-def get_model_cache_dir(model_id: str, branch: str = 'master'):
-    model_id_expanded = model_id.replace('/',
-                                         MODEL_ID_SEPARATOR) + '.' + branch
-    default_cache_dir = os.path.expanduser(os.path.join('~/.cache', 'maas'))
-    return os.getenv('MAAS_CACHE',
-                     os.path.join(default_cache_dir, 'hub', model_id_expanded))
+def get_model_cache_dir(model_id: str):
+    return os.path.join(get_cache_dir(), model_id)
diff --git a/requirements/runtime.txt b/requirements/runtime.txt
index e97352aa..6580de53 100644
--- a/requirements/runtime.txt
+++ b/requirements/runtime.txt
@@ -1,13 +1,16 @@
 addict
 datasets
 easydict
-https://mindscope.oss-cn-hangzhou.aliyuncs.com/sdklib/maas_hub-0.2.4.dev0-py3-none-any.whl
+filelock>=3.3.0
 numpy
 opencv-python-headless
 Pillow>=6.2.0
 pyyaml
 requests
+requests==2.27.1
 scipy
+setuptools==58.0.4
 tokenizers<=0.10.3
+tqdm>=4.64.0
 transformers<=4.16.2
 yapf
diff --git a/tests/hub/__init__.py b/tests/hub/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/hub/test_hub_operation.py b/tests/hub/test_hub_operation.py
new file mode 100644
index 00000000..2277860b
--- /dev/null
+++ b/tests/hub/test_hub_operation.py
@@ -0,0 +1,157 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import os.path as osp
+import subprocess
+import tempfile
+import unittest
+import uuid
+
+from modelscope.hub.api import HubApi, ModelScopeConfig
+from modelscope.hub.file_download import model_file_download
+from modelscope.hub.repository import Repository
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.hub.utils.utils import get_gitlab_domain
+
+USER_NAME = 'maasadmin'
+PASSWORD = '12345678'
+
+model_chinese_name = '达摩卡通化模型'
+model_org = 'unittest'
+DEFAULT_GIT_PATH = 'git'
+
+
+class GitError(Exception):
+    pass
+
+
+# TODO make thest git operation to git library after merge code.
+def run_git_command(git_path, *args) -> subprocess.CompletedProcess:
+    response = subprocess.run([git_path, *args], capture_output=True)
+    try:
+        response.check_returncode()
+        return response.stdout.decode('utf8')
+    except subprocess.CalledProcessError as error:
+        raise GitError(error.stderr.decode('utf8'))
+
+
+# for public project, token can None, private repo, there must token.
+def clone(local_dir: str, token: str, url: str):
+    url = url.replace('//', '//oauth2:%s@' % token)
+    clone_args = '-C %s clone %s' % (local_dir, url)
+    clone_args = clone_args.split(' ')
+    stdout = run_git_command(DEFAULT_GIT_PATH, *clone_args)
+    print('stdout: %s' % stdout)
+
+
+def push(local_dir: str, token: str, url: str):
+    url = url.replace('//', '//oauth2:%s@' % token)
+    push_args = '-C %s push %s' % (local_dir, url)
+    push_args = push_args.split(' ')
+    stdout = run_git_command(DEFAULT_GIT_PATH, *push_args)
+    print('stdout: %s' % stdout)
+
+
+sample_model_url = 'https://mindscope.oss-cn-hangzhou.aliyuncs.com/test_models/mnist-12.onnx'
+download_model_file_name = 'mnist-12.onnx'
+
+
+class HubOperationTest(unittest.TestCase):
+
+    def setUp(self):
+        self.old_cwd = os.getcwd()
+        self.api = HubApi()
+        # note this is temporary before official account management is ready
+        self.api.login(USER_NAME, PASSWORD)
+        self.model_name = uuid.uuid4().hex
+        self.model_id = '%s/%s' % (model_org, self.model_name)
+        self.api.create_model(
+            model_id=self.model_id,
+            chinese_name=model_chinese_name,
+            visibility=5,  # 1-private, 5-public
+            license='apache-2.0')
+
+    def tearDown(self):
+        os.chdir(self.old_cwd)
+        self.api.delete_model(model_id=self.model_id)
+
+    def test_model_repo_creation(self):
+        # change to proper model names before use
+        try:
+            info = self.api.get_model(model_id=self.model_id)
+            assert info['Name'] == self.model_name
+        except KeyError as ke:
+            if ke.args[0] == 'name':
+                print(f'model {self.model_name} already exists, ignore')
+            else:
+                raise
+
+    # Note that this can be done via git operation once model repo
+    # has been created. Git-Op is the RECOMMENDED model upload approach
+    def test_model_upload(self):
+        url = f'http://{get_gitlab_domain()}/{self.model_id}'
+        print(url)
+        temporary_dir = tempfile.mkdtemp()
+        os.chdir(temporary_dir)
+        cmd_args = 'clone %s' % url
+        cmd_args = cmd_args.split(' ')
+        out = run_git_command('git', *cmd_args)
+        print(out)
+        repo_dir = os.path.join(temporary_dir, self.model_name)
+        os.chdir(repo_dir)
+        os.system('touch file1')
+        os.system('git add file1')
+        os.system("git commit -m 'Test'")
+        token = ModelScopeConfig.get_token()
+        push(repo_dir, token, url)
+
+    def test_download_single_file(self):
+        url = f'http://{get_gitlab_domain()}/{self.model_id}'
+        print(url)
+        temporary_dir = tempfile.mkdtemp()
+        os.chdir(temporary_dir)
+        os.system('git clone %s' % url)
+        repo_dir = os.path.join(temporary_dir, self.model_name)
+        os.chdir(repo_dir)
+        os.system('wget %s' % sample_model_url)
+        os.system('git add .')
+        os.system("git commit -m 'Add file'")
+        token = ModelScopeConfig.get_token()
+        push(repo_dir, token, url)
+        assert os.path.exists(
+            os.path.join(temporary_dir, self.model_name,
+                         download_model_file_name))
+        downloaded_file = model_file_download(
+            model_id=self.model_id, file_path=download_model_file_name)
+        mdtime1 = os.path.getmtime(downloaded_file)
+        # download again
+        downloaded_file = model_file_download(
+            model_id=self.model_id, file_path=download_model_file_name)
+        mdtime2 = os.path.getmtime(downloaded_file)
+        assert mdtime1 == mdtime2
+
+    def test_snapshot_download(self):
+        url = f'http://{get_gitlab_domain()}/{self.model_id}'
+        print(url)
+        temporary_dir = tempfile.mkdtemp()
+        os.chdir(temporary_dir)
+        os.system('git clone %s' % url)
+        repo_dir = os.path.join(temporary_dir, self.model_name)
+        os.chdir(repo_dir)
+        os.system('wget %s' % sample_model_url)
+        os.system('git add .')
+        os.system("git commit -m 'Add file'")
+        token = ModelScopeConfig.get_token()
+        push(repo_dir, token, url)
+        snapshot_path = snapshot_download(model_id=self.model_id)
+        downloaded_file_path = os.path.join(snapshot_path,
+                                            download_model_file_name)
+        assert os.path.exists(downloaded_file_path)
+        mdtime1 = os.path.getmtime(downloaded_file_path)
+        # download again
+        snapshot_path = snapshot_download(model_id=self.model_id)
+        mdtime2 = os.path.getmtime(downloaded_file_path)
+        assert mdtime1 == mdtime2
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py
index e557ba86..751b6975 100644
--- a/tests/pipelines/test_image_matting.py
+++ b/tests/pipelines/test_image_matting.py
@@ -10,7 +10,6 @@ from modelscope.fileio import File
 from modelscope.pipelines import pipeline
 from modelscope.pydatasets import PyDataset
 from modelscope.utils.constant import ModelFile, Tasks
-from modelscope.utils.hub import get_model_cache_dir
 from modelscope.utils.test_utils import test_level
 
 
@@ -18,11 +17,6 @@ class ImageMattingTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/cv_unet_image-matting'
-        # switch to False if downloading everytime is not desired
-        purge_cache = True
-        if purge_cache:
-            shutil.rmtree(
-                get_model_cache_dir(self.model_id), ignore_errors=True)
 
     @unittest.skip('deprecated, download model from model hub instead')
     def test_run_with_direct_file_download(self):
diff --git a/tests/pipelines/test_ocr_detection.py b/tests/pipelines/test_ocr_detection.py
index 62fcedd3..986961b7 100644
--- a/tests/pipelines/test_ocr_detection.py
+++ b/tests/pipelines/test_ocr_detection.py
@@ -27,7 +27,7 @@ class OCRDetectionTest(unittest.TestCase):
         print('ocr detection results: ')
         print(result)
 
-    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     def test_run_modelhub_default_model(self):
         ocr_detection = pipeline(Tasks.ocr_detection)
         self.pipeline_inference(ocr_detection, self.test_image)
diff --git a/tests/pipelines/test_sentence_similarity.py b/tests/pipelines/test_sentence_similarity.py
index ac2ff4fb..43e585ba 100644
--- a/tests/pipelines/test_sentence_similarity.py
+++ b/tests/pipelines/test_sentence_similarity.py
@@ -2,14 +2,12 @@
 import shutil
 import unittest
 
-from maas_hub.snapshot_download import snapshot_download
-
+from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.models import Model
 from modelscope.models.nlp import SbertForSentenceSimilarity
 from modelscope.pipelines import SentenceSimilarityPipeline, pipeline
 from modelscope.preprocessors import SequenceClassificationPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.hub import get_model_cache_dir
 from modelscope.utils.test_utils import test_level
 
 
@@ -18,13 +16,6 @@ class SentenceSimilarityTest(unittest.TestCase):
     sentence1 = '今天气温比昨天高么？'
     sentence2 = '今天湿度比昨天高么？'
 
-    def setUp(self) -> None:
-        # switch to False if downloading everytime is not desired
-        purge_cache = True
-        if purge_cache:
-            shutil.rmtree(
-                get_model_cache_dir(self.model_id), ignore_errors=True)
-
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_run(self):
         cache_path = snapshot_download(self.model_id)
diff --git a/tests/pipelines/test_speech_signal_process.py b/tests/pipelines/test_speech_signal_process.py
index 8b5c9468..f1369a2f 100644
--- a/tests/pipelines/test_speech_signal_process.py
+++ b/tests/pipelines/test_speech_signal_process.py
@@ -5,7 +5,6 @@ import unittest
 from modelscope.fileio import File
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.utils.hub import get_model_cache_dir
 
 NEAREND_MIC_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/AEC/sample_audio/nearend_mic.wav'
 FAREND_SPEECH_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/AEC/sample_audio/farend_speech.wav'
@@ -30,11 +29,6 @@ class SpeechSignalProcessTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/speech_dfsmn_aec_psm_16k'
-        # switch to False if downloading everytime is not desired
-        purge_cache = True
-        if purge_cache:
-            shutil.rmtree(
-                get_model_cache_dir(self.model_id), ignore_errors=True)
         # A temporary hack to provide c++ lib. Download it first.
         download(AEC_LIB_URL, AEC_LIB_FILE)
 
diff --git a/tests/pipelines/test_text_classification.py b/tests/pipelines/test_text_classification.py
index bb24fece..8ecd9ed4 100644
--- a/tests/pipelines/test_text_classification.py
+++ b/tests/pipelines/test_text_classification.py
@@ -11,7 +11,6 @@ from modelscope.pipelines import SequenceClassificationPipeline, pipeline
 from modelscope.preprocessors import SequenceClassificationPreprocessor
 from modelscope.pydatasets import PyDataset
 from modelscope.utils.constant import Hubs, Tasks
-from modelscope.utils.hub import get_model_cache_dir
 from modelscope.utils.test_utils import test_level
 
 
@@ -19,11 +18,6 @@ class SequenceClassificationTest(unittest.TestCase):
 
     def setUp(self) -> None:
         self.model_id = 'damo/bert-base-sst2'
-        # switch to False if downloading everytime is not desired
-        purge_cache = True
-        if purge_cache:
-            shutil.rmtree(
-                get_model_cache_dir(self.model_id), ignore_errors=True)
 
     def predict(self, pipeline_ins: SequenceClassificationPipeline):
         from easynlp.appzoo import load_dataset
diff --git a/tests/pipelines/test_text_generation.py b/tests/pipelines/test_text_generation.py
index fbdd165f..cb5194c2 100644
--- a/tests/pipelines/test_text_generation.py
+++ b/tests/pipelines/test_text_generation.py
@@ -1,8 +1,7 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import unittest
 
-from maas_hub.snapshot_download import snapshot_download
-
+from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.models import Model
 from modelscope.models.nlp import PalmForTextGeneration
 from modelscope.pipelines import TextGenerationPipeline, pipeline
diff --git a/tests/pipelines/test_word_segmentation.py b/tests/pipelines/test_word_segmentation.py
index 4ec2bf29..7c57d9ad 100644
--- a/tests/pipelines/test_word_segmentation.py
+++ b/tests/pipelines/test_word_segmentation.py
@@ -2,14 +2,12 @@
 import shutil
 import unittest
 
-from maas_hub.snapshot_download import snapshot_download
-
+from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.models import Model
 from modelscope.models.nlp import StructBertForTokenClassification
 from modelscope.pipelines import WordSegmentationPipeline, pipeline
 from modelscope.preprocessors import TokenClassifcationPreprocessor
 from modelscope.utils.constant import Tasks
-from modelscope.utils.hub import get_model_cache_dir
 from modelscope.utils.test_utils import test_level
 
 
@@ -17,13 +15,6 @@ class WordSegmentationTest(unittest.TestCase):
     model_id = 'damo/nlp_structbert_word-segmentation_chinese-base'
     sentence = '今天天气不错，适合出去游玩'
 
-    def setUp(self) -> None:
-        # switch to False if downloading everytime is not desired
-        purge_cache = True
-        if purge_cache:
-            shutil.rmtree(
-                get_model_cache_dir(self.model_id), ignore_errors=True)
-
     @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     def test_run_by_direct_model_download(self):
         cache_path = snapshot_download(self.model_id)
diff --git a/tests/run.py b/tests/run.py
index a904ba8e..38c5a897 100644
--- a/tests/run.py
+++ b/tests/run.py
@@ -61,7 +61,7 @@ if __name__ == '__main__':
     parser.add_argument(
         '--test_dir', default='tests', help='directory to be tested')
     parser.add_argument(
-        '--level', default=0, help='2 -- all, 1 -- p1, 0 -- p0')
+        '--level', default=0, type=int, help='2 -- all, 1 -- p1, 0 -- p0')
     args = parser.parse_args()
     set_test_level(args.level)
     logger.info(f'TEST LEVEL: {test_level()}')
diff --git a/tests/utils/test_hub_operation.py b/tests/utils/test_hub_operation.py
deleted file mode 100644
index f432a60c..00000000
--- a/tests/utils/test_hub_operation.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import os.path as osp
-import unittest
-
-from maas_hub.maas_api import MaasApi
-from maas_hub.repository import Repository
-
-USER_NAME = 'maasadmin'
-PASSWORD = '12345678'
-
-
-class HubOperationTest(unittest.TestCase):
-
-    def setUp(self):
-        self.api = MaasApi()
-        # note this is temporary before official account management is ready
-        self.api.login(USER_NAME, PASSWORD)
-
-    @unittest.skip('to be used for local test only')
-    def test_model_repo_creation(self):
-        # change to proper model names before use
-        model_name = 'cv_unet_person-image-cartoon_compound-models'
-        model_chinese_name = '达摩卡通化模型'
-        model_org = 'damo'
-        try:
-            self.api.create_model(
-                owner=model_org,
-                name=model_name,
-                chinese_name=model_chinese_name,
-                visibility=5,  # 1-private, 5-public
-                license='apache-2.0')
-        # TODO: support proper name duplication checking
-        except KeyError as ke:
-            if ke.args[0] == 'name':
-                print(f'model {self.model_name} already exists, ignore')
-            else:
-                raise
-
-    # Note that this can be done via git operation once model repo
-    # has been created. Git-Op is the RECOMMENDED model upload approach
-    @unittest.skip('to be used for local test only')
-    def test_model_upload(self):
-        local_path = '/path/to/local/model/directory'
-        assert osp.exists(local_path), 'Local model directory not exist.'
-        repo = Repository(local_dir=local_path)
-        repo.push_to_hub(commit_message='Upload model files')
-
-
-if __name__ == '__main__':
-    unittest.main()

From e288cf076e791ccfd23eb165b21a6fdbeb958abb Mon Sep 17 00:00:00 2001
From: "wenmeng.zwm" <wenmeng.zwm@alibaba-inc.com>
Date: Wed, 22 Jun 2022 14:15:32 +0800
Subject: [PATCH 2/3] [to #42362853] refactor pipeline and standardize
 module_name

* using get_model to validate hub path
* support reading pipeline info from configuration file
* add metainfo const
* update model type and pipeline type and fix UT
* relax requimrent for protobuf
* skip two dataset tests due to temporal failure

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9118154
---
 modelscope/metainfo.py                        | 94 +++++++++++++++++++
 .../models/audio/tts/am/sambert_hifi_16k.py   |  4 +-
 .../generic_text_to_speech_frontend.py        |  3 +-
 .../models/audio/tts/vocoder/hifigan16k.py    |  3 +-
 modelscope/models/base.py                     | 19 +++-
 .../multi_model/image_captioning_model.py     |  4 +-
 .../nlp/bert_for_sequence_classification.py   |  4 +-
 .../models/nlp/palm_for_text_generation.py    |  3 +-
 .../nlp/sbert_for_sentence_similarity.py      |  4 +-
 .../nlp/sbert_for_token_classification.py     |  5 +-
 .../pipelines/audio/linear_aec_pipeline.py    |  4 +-
 .../audio/text_to_speech_pipeline.py          |  3 +-
 modelscope/pipelines/base.py                  | 12 +--
 modelscope/pipelines/builder.py               | 65 ++++++++-----
 .../pipelines/cv/image_cartoon_pipeline.py    |  3 +-
 .../pipelines/cv/image_matting_pipeline.py    |  3 +-
 .../pipelines/cv/ocr_detection_pipeline.py    |  3 +-
 .../multi_modal/image_captioning_pipeline.py  |  4 +-
 .../nlp/sentence_similarity_pipeline.py       |  4 +-
 .../nlp/sequence_classification_pipeline.py   |  3 +-
 .../pipelines/nlp/text_generation_pipeline.py |  4 +-
 .../nlp/word_segmentation_pipeline.py         |  4 +-
 modelscope/pipelines/util.py                  | 53 +++++++++--
 modelscope/preprocessors/image.py             |  3 +-
 modelscope/preprocessors/multi_model.py       |  3 +-
 modelscope/preprocessors/nlp.py               |  8 +-
 modelscope/preprocessors/text_to_speech.py    |  5 +-
 modelscope/utils/hub.py                       | 40 +++++++-
 requirements/audio.txt                        | 10 +-
 tests/pipelines/test_image_matting.py         |  2 +-
 tests/pipelines/test_speech_signal_process.py |  3 +-
 tests/pipelines/test_text_classification.py   | 25 -----
 tests/pipelines/test_text_to_speech.py        |  5 +-
 tests/preprocessors/test_text_to_speech.py    |  3 +-
 tests/pydatasets/test_py_dataset.py           |  2 +
 35 files changed, 303 insertions(+), 114 deletions(-)
 create mode 100644 modelscope/metainfo.py

diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py
new file mode 100644
index 00000000..63af2ec4
--- /dev/null
+++ b/modelscope/metainfo.py
@@ -0,0 +1,94 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+
+class Models(object):
+    """ Names for different models.
+
+        Holds the standard model name to use for identifying different model.
+    This should be used to register models.
+
+        Model name should only contain model info but not task info.
+    """
+    # vision models
+
+    # nlp models
+    bert = 'bert'
+    palm2_0 = 'palm2.0'
+    structbert = 'structbert'
+
+    # audio models
+    sambert_hifi_16k = 'sambert-hifi-16k'
+    generic_tts_frontend = 'generic-tts-frontend'
+    hifigan16k = 'hifigan16k'
+
+    # multi-modal models
+    ofa = 'ofa'
+
+
+class Pipelines(object):
+    """ Names for different pipelines.
+
+        Holds the standard pipline name to use for identifying different pipeline.
+    This should be used to register pipelines.
+
+        For pipeline which support different models and implements the common function, we
+    should use task name for this pipeline.
+        For pipeline which suuport only one model, we should use ${Model}-${Task} as its name.
+    """
+    # vision tasks
+    image_matting = 'unet-image-matting'
+    person_image_cartoon = 'unet-person-image-cartoon'
+    ocr_detection = 'resnet18-ocr-detection'
+
+    # nlp tasks
+    sentence_similarity = 'sentence-similarity'
+    word_segmentation = 'word-segmentation'
+    text_generation = 'text-generation'
+    sentiment_analysis = 'sentiment-analysis'
+
+    # audio tasks
+    sambert_hifigan_16k_tts = 'sambert-hifigan-16k-tts'
+    speech_dfsmn_aec_psm_16k = 'speech-dfsmn-aec-psm-16k'
+
+    # multi-modal tasks
+    image_caption = 'image-caption'
+
+
+class Trainers(object):
+    """ Names for different trainer.
+
+        Holds the standard trainer name to use for identifying different trainer.
+    This should be used to register trainers.
+
+        For a general Trainer, you can use easynlp-trainer/ofa-trainer/sofa-trainer.
+        For a model specific Trainer, you can use ${ModelName}-${Task}-trainer.
+    """
+
+    default = 'Trainer'
+
+
+class Preprocessors(object):
+    """ Names for different preprocessor.
+
+        Holds the standard preprocessor name to use for identifying different preprocessor.
+    This should be used to register preprocessors.
+
+        For a general preprocessor, just use the function name as preprocessor name such as
+    resize-image, random-crop
+        For a model-specific preprocessor, use ${modelname}-${fuction}
+    """
+
+    # cv preprocessor
+    load_image = 'load-image'
+
+    # nlp preprocessor
+    bert_seq_cls_tokenizer = 'bert-seq-cls-tokenizer'
+    palm_text_gen_tokenizer = 'palm-text-gen-tokenizer'
+    sbert_token_cls_tokenizer = 'sbert-token-cls-tokenizer'
+
+    # audio preprocessor
+    linear_aec_fbank = 'linear-aec-fbank'
+    text_to_tacotron_symbols = 'text-to-tacotron-symbols'
+
+    # multi-modal
+    ofa_image_caption = 'ofa-image-caption'
diff --git a/modelscope/models/audio/tts/am/sambert_hifi_16k.py b/modelscope/models/audio/tts/am/sambert_hifi_16k.py
index 2db9abc6..415e88b3 100644
--- a/modelscope/models/audio/tts/am/sambert_hifi_16k.py
+++ b/modelscope/models/audio/tts/am/sambert_hifi_16k.py
@@ -6,6 +6,7 @@ import numpy as np
 import tensorflow as tf
 from sklearn.preprocessing import MultiLabelBinarizer
 
+from modelscope.metainfo import Models
 from modelscope.models.base import Model
 from modelscope.models.builder import MODELS
 from modelscope.utils.constant import ModelFile, Tasks
@@ -26,7 +27,8 @@ def multi_label_symbol_to_sequence(my_classes, my_symbol):
     return one_hot.fit_transform(sequences)
 
 
-@MODELS.register_module(Tasks.text_to_speech, module_name=r'sambert_hifi_16k')
+@MODELS.register_module(
+    Tasks.text_to_speech, module_name=Models.sambert_hifi_16k)
 class SambertNetHifi16k(Model):
 
     def __init__(self,
diff --git a/modelscope/models/audio/tts/frontend/generic_text_to_speech_frontend.py b/modelscope/models/audio/tts/frontend/generic_text_to_speech_frontend.py
index c6aabf75..9f13f36f 100644
--- a/modelscope/models/audio/tts/frontend/generic_text_to_speech_frontend.py
+++ b/modelscope/models/audio/tts/frontend/generic_text_to_speech_frontend.py
@@ -2,6 +2,7 @@ import os
 import zipfile
 from typing import Any, Dict, List
 
+from modelscope.metainfo import Models
 from modelscope.models.base import Model
 from modelscope.models.builder import MODELS
 from modelscope.utils.audio.tts_exceptions import (
@@ -13,7 +14,7 @@ __all__ = ['GenericTtsFrontend']
 
 
 @MODELS.register_module(
-    Tasks.text_to_speech, module_name=r'generic_tts_frontend')
+    Tasks.text_to_speech, module_name=Models.generic_tts_frontend)
 class GenericTtsFrontend(Model):
 
     def __init__(self, model_dir='.', lang_type='pinyin', *args, **kwargs):
diff --git a/modelscope/models/audio/tts/vocoder/hifigan16k.py b/modelscope/models/audio/tts/vocoder/hifigan16k.py
index 0d917dbe..b3fd9cf6 100644
--- a/modelscope/models/audio/tts/vocoder/hifigan16k.py
+++ b/modelscope/models/audio/tts/vocoder/hifigan16k.py
@@ -10,6 +10,7 @@ import numpy as np
 import torch
 from scipy.io.wavfile import write
 
+from modelscope.metainfo import Models
 from modelscope.models.base import Model
 from modelscope.models.builder import MODELS
 from modelscope.utils.audio.tts_exceptions import \
@@ -36,7 +37,7 @@ class AttrDict(dict):
         self.__dict__ = self
 
 
-@MODELS.register_module(Tasks.text_to_speech, module_name=r'hifigan16k')
+@MODELS.register_module(Tasks.text_to_speech, module_name=Models.hifigan16k)
 class Hifigan16k(Model):
 
     def __init__(self, model_dir, *args, **kwargs):
diff --git a/modelscope/models/base.py b/modelscope/models/base.py
index 99309a7e..cb6d2b0e 100644
--- a/modelscope/models/base.py
+++ b/modelscope/models/base.py
@@ -8,6 +8,9 @@ from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.models.builder import build_model
 from modelscope.utils.config import Config
 from modelscope.utils.constant import ModelFile
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
 
 Tensor = Union['torch.Tensor', 'tf.Tensor']
 
@@ -46,18 +49,24 @@ class Model(ABC):
             local_model_dir = model_name_or_path
         else:
             local_model_dir = snapshot_download(model_name_or_path)
-            # else:
-            #     raise ValueError(
-            #         'Remote model repo {model_name_or_path} does not exists')
-
+        logger.info(f'initialize model from {local_model_dir}')
         cfg = Config.from_file(
             osp.join(local_model_dir, ModelFile.CONFIGURATION))
         task_name = cfg.task
         model_cfg = cfg.model
+        assert hasattr(
+            cfg, 'pipeline'), 'pipeline config is missing from config file.'
+        pipeline_cfg = cfg.pipeline
         # TODO @wenmeng.zwm may should manually initialize model after model building
         if hasattr(model_cfg, 'model_type') and not hasattr(model_cfg, 'type'):
             model_cfg.type = model_cfg.model_type
+
         model_cfg.model_dir = local_model_dir
+
         for k, v in kwargs.items():
             model_cfg.k = v
-        return build_model(model_cfg, task_name)
+        model = build_model(model_cfg, task_name)
+
+        # dynamically add pipeline info to model for pipeline inference
+        model.pipeline = pipeline_cfg
+        return model
diff --git a/modelscope/models/multi_model/image_captioning_model.py b/modelscope/models/multi_model/image_captioning_model.py
index fad0663e..79ab2b5f 100644
--- a/modelscope/models/multi_model/image_captioning_model.py
+++ b/modelscope/models/multi_model/image_captioning_model.py
@@ -3,6 +3,7 @@ from typing import Any, Dict
 
 from PIL import Image
 
+from modelscope.metainfo import Models
 from modelscope.utils.constant import ModelFile, Tasks
 from ..base import Model
 from ..builder import MODELS
@@ -10,8 +11,7 @@ from ..builder import MODELS
 __all__ = ['OfaForImageCaptioning']
 
 
-@MODELS.register_module(
-    Tasks.image_captioning, module_name=r'ofa-image-captioning')
+@MODELS.register_module(Tasks.image_captioning, module_name=Models.ofa)
 class OfaForImageCaptioning(Model):
 
     def __init__(self, model_dir, *args, **kwargs):
diff --git a/modelscope/models/nlp/bert_for_sequence_classification.py b/modelscope/models/nlp/bert_for_sequence_classification.py
index a3cc4b68..7d85fa28 100644
--- a/modelscope/models/nlp/bert_for_sequence_classification.py
+++ b/modelscope/models/nlp/bert_for_sequence_classification.py
@@ -4,6 +4,7 @@ from typing import Any, Dict
 import json
 import numpy as np
 
+from modelscope.metainfo import Models
 from modelscope.utils.constant import Tasks
 from ..base import Model
 from ..builder import MODELS
@@ -11,8 +12,7 @@ from ..builder import MODELS
 __all__ = ['BertForSequenceClassification']
 
 
-@MODELS.register_module(
-    Tasks.text_classification, module_name=r'bert-sentiment-analysis')
+@MODELS.register_module(Tasks.text_classification, module_name=Models.bert)
 class BertForSequenceClassification(Model):
 
     def __init__(self, model_dir: str, *args, **kwargs):
diff --git a/modelscope/models/nlp/palm_for_text_generation.py b/modelscope/models/nlp/palm_for_text_generation.py
index e5799feb..f4518d4f 100644
--- a/modelscope/models/nlp/palm_for_text_generation.py
+++ b/modelscope/models/nlp/palm_for_text_generation.py
@@ -1,5 +1,6 @@
 from typing import Dict
 
+from modelscope.metainfo import Models
 from modelscope.utils.constant import Tasks
 from ..base import Model, Tensor
 from ..builder import MODELS
@@ -7,7 +8,7 @@ from ..builder import MODELS
 __all__ = ['PalmForTextGeneration']
 
 
-@MODELS.register_module(Tasks.text_generation, module_name=r'palm2.0')
+@MODELS.register_module(Tasks.text_generation, module_name=Models.palm2_0)
 class PalmForTextGeneration(Model):
 
     def __init__(self, model_dir: str, *args, **kwargs):
diff --git a/modelscope/models/nlp/sbert_for_sentence_similarity.py b/modelscope/models/nlp/sbert_for_sentence_similarity.py
index 98daac92..cbcef1ce 100644
--- a/modelscope/models/nlp/sbert_for_sentence_similarity.py
+++ b/modelscope/models/nlp/sbert_for_sentence_similarity.py
@@ -8,6 +8,7 @@ from sofa import SbertModel
 from sofa.models.sbert.modeling_sbert import SbertPreTrainedModel
 from torch import nn
 
+from modelscope.metainfo import Models
 from modelscope.utils.constant import Tasks
 from ..base import Model, Tensor
 from ..builder import MODELS
@@ -38,8 +39,7 @@ class SbertTextClassifier(SbertPreTrainedModel):
 
 
 @MODELS.register_module(
-    Tasks.sentence_similarity,
-    module_name=r'sbert-base-chinese-sentence-similarity')
+    Tasks.sentence_similarity, module_name=Models.structbert)
 class SbertForSentenceSimilarity(Model):
 
     def __init__(self, model_dir: str, *args, **kwargs):
diff --git a/modelscope/models/nlp/sbert_for_token_classification.py b/modelscope/models/nlp/sbert_for_token_classification.py
index b918dc37..fdf5afaf 100644
--- a/modelscope/models/nlp/sbert_for_token_classification.py
+++ b/modelscope/models/nlp/sbert_for_token_classification.py
@@ -4,6 +4,7 @@ import numpy as np
 import torch
 from sofa import SbertConfig, SbertForTokenClassification
 
+from modelscope.metainfo import Models
 from modelscope.utils.constant import Tasks
 from ..base import Model, Tensor
 from ..builder import MODELS
@@ -11,9 +12,7 @@ from ..builder import MODELS
 __all__ = ['StructBertForTokenClassification']
 
 
-@MODELS.register_module(
-    Tasks.word_segmentation,
-    module_name=r'structbert-chinese-word-segmentation')
+@MODELS.register_module(Tasks.word_segmentation, module_name=Models.structbert)
 class StructBertForTokenClassification(Model):
 
     def __init__(self, model_dir: str, *args, **kwargs):
diff --git a/modelscope/pipelines/audio/linear_aec_pipeline.py b/modelscope/pipelines/audio/linear_aec_pipeline.py
index 528d8d47..70562b19 100644
--- a/modelscope/pipelines/audio/linear_aec_pipeline.py
+++ b/modelscope/pipelines/audio/linear_aec_pipeline.py
@@ -7,6 +7,7 @@ import scipy.io.wavfile as wav
 import torch
 import yaml
 
+from modelscope.metainfo import Pipelines
 from modelscope.preprocessors.audio import LinearAECAndFbank
 from modelscope.utils.constant import ModelFile, Tasks
 from ..base import Pipeline
@@ -39,7 +40,8 @@ def initialize_config(module_cfg):
 
 
 @PIPELINES.register_module(
-    Tasks.speech_signal_process, module_name=r'speech_dfsmn_aec_psm_16k')
+    Tasks.speech_signal_process,
+    module_name=Pipelines.speech_dfsmn_aec_psm_16k)
 class LinearAECPipeline(Pipeline):
     r"""AEC Inference Pipeline only support 16000 sample rate.
 
diff --git a/modelscope/pipelines/audio/text_to_speech_pipeline.py b/modelscope/pipelines/audio/text_to_speech_pipeline.py
index ecd9daac..22586d3e 100644
--- a/modelscope/pipelines/audio/text_to_speech_pipeline.py
+++ b/modelscope/pipelines/audio/text_to_speech_pipeline.py
@@ -3,6 +3,7 @@ from typing import Any, Dict, List
 
 import numpy as np
 
+from modelscope.metainfo import Pipelines
 from modelscope.models import Model
 from modelscope.models.audio.tts.am import SambertNetHifi16k
 from modelscope.models.audio.tts.vocoder import Hifigan16k
@@ -15,7 +16,7 @@ __all__ = ['TextToSpeechSambertHifigan16kPipeline']
 
 
 @PIPELINES.register_module(
-    Tasks.text_to_speech, module_name=r'tts-sambert-hifigan-16k')
+    Tasks.text_to_speech, module_name=Pipelines.sambert_hifigan_16k_tts)
 class TextToSpeechSambertHifigan16kPipeline(Pipeline):
 
     def __init__(self,
diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py
index 59bd298b..7e32f543 100644
--- a/modelscope/pipelines/base.py
+++ b/modelscope/pipelines/base.py
@@ -11,7 +11,7 @@ from modelscope.pydatasets import PyDataset
 from modelscope.utils.config import Config
 from modelscope.utils.logger import get_logger
 from .outputs import TASK_OUTPUTS
-from .util import is_model_name
+from .util import is_model, is_official_hub_path
 
 Tensor = Union['torch.Tensor', 'tf.Tensor']
 Input = Union[str, tuple, PyDataset, 'PIL.Image.Image', 'numpy.ndarray']
@@ -27,12 +27,10 @@ class Pipeline(ABC):
 
     def initiate_single_model(self, model):
         logger.info(f'initiate model from {model}')
-        # TODO @wenmeng.zwm replace model.startswith('damo/') with get_model
-        if isinstance(model, str) and model.startswith('damo/'):
-            if not osp.exists(model):
-                model = snapshot_download(model)
-            return Model.from_pretrained(model) if is_model_name(
-                model) else model
+        if isinstance(model, str) and is_official_hub_path(model):
+            model = snapshot_download(
+                model) if not osp.exists(model) else model
+            return Model.from_pretrained(model) if is_model(model) else model
         elif isinstance(model, Model):
             return model
         else:
diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py
index 5e1fbd87..90d613f8 100644
--- a/modelscope/pipelines/builder.py
+++ b/modelscope/pipelines/builder.py
@@ -3,32 +3,39 @@
 import os.path as osp
 from typing import List, Union
 
+from attr import has
+
+from modelscope.metainfo import Pipelines
 from modelscope.models.base import Model
 from modelscope.utils.config import Config, ConfigDict
-from modelscope.utils.constant import Tasks
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.hub import read_config
 from modelscope.utils.registry import Registry, build_from_cfg
 from .base import Pipeline
+from .util import is_official_hub_path
 
 PIPELINES = Registry('pipelines')
 
 DEFAULT_MODEL_FOR_PIPELINE = {
     # TaskName: (pipeline_module_name, model_repo)
     Tasks.word_segmentation:
-    ('structbert-chinese-word-segmentation',
+    (Pipelines.word_segmentation,
      'damo/nlp_structbert_word-segmentation_chinese-base'),
     Tasks.sentence_similarity:
-    ('sbert-base-chinese-sentence-similarity',
+    (Pipelines.sentence_similarity,
      'damo/nlp_structbert_sentence-similarity_chinese-base'),
-    Tasks.image_matting: ('image-matting', 'damo/cv_unet_image-matting'),
-    Tasks.text_classification:
-    ('bert-sentiment-analysis', 'damo/bert-base-sst2'),
-    Tasks.text_generation: ('palm2.0',
+    Tasks.image_matting:
+    (Pipelines.image_matting, 'damo/cv_unet_image-matting'),
+    Tasks.text_classification: (Pipelines.sentiment_analysis,
+                                'damo/bert-base-sst2'),
+    Tasks.text_generation: (Pipelines.text_generation,
                             'damo/nlp_palm2.0_text-generation_chinese-base'),
-    Tasks.image_captioning: ('ofa', 'damo/ofa_image-caption_coco_large_en'),
+    Tasks.image_captioning: (Pipelines.image_caption,
+                             'damo/ofa_image-caption_coco_large_en'),
     Tasks.image_generation:
-    ('person-image-cartoon',
+    (Pipelines.person_image_cartoon,
      'damo/cv_unet_person-image-cartoon_compound-models'),
-    Tasks.ocr_detection: ('ocr-detection',
+    Tasks.ocr_detection: (Pipelines.ocr_detection,
                           'damo/cv_resnet18_ocr-detection-line-level_damo'),
 }
 
@@ -86,30 +93,40 @@ def pipeline(task: str = None,
     if task is None and pipeline_name is None:
         raise ValueError('task or pipeline_name is required')
 
+    assert isinstance(model, (type(None), str, Model, list)), \
+        f'model should be either None, str, List[str], Model, or List[Model], but got {type(model)}'
+
     if pipeline_name is None:
         # get default pipeline for this task
         if isinstance(model, str) \
            or (isinstance(model, list) and isinstance(model[0], str)):
-
-            # if is_model_name(model):
-            if (isinstance(model, str) and model.startswith('damo/')) \
-               or (isinstance(model, list) and model[0].startswith('damo/')) \
-               or (isinstance(model, str) and osp.exists(model)):
-                # TODO @wenmeng.zwm  add support when model is a str of modelhub address
-                # read pipeline info from modelhub configuration file.
-                pipeline_name, default_model_repo = get_default_pipeline_info(
-                    task)
+            if is_official_hub_path(model):
+                # read config file from hub and parse
+                cfg = read_config(model) if isinstance(
+                    model, str) else read_config(model[0])
+                assert hasattr(
+                    cfg,
+                    'pipeline'), 'pipeline config is missing from config file.'
+                pipeline_name = cfg.pipeline.type
             else:
+                # used for test case, when model is str and is not hub path
                 pipeline_name = get_pipeline_by_model_name(task, model)
+        elif isinstance(model, Model) or \
+                (isinstance(model, list) and isinstance(model[0], Model)):
+            # get pipeline info from Model object
+            first_model = model[0] if isinstance(model, list) else model
+            if not hasattr(first_model, 'pipeline'):
+                # model is instantiated by user, we should parse config again
+                cfg = read_config(first_model.model_dir)
+                assert hasattr(
+                    cfg,
+                    'pipeline'), 'pipeline config is missing from config file.'
+                first_model.pipeline = cfg.pipeline
+            pipeline_name = first_model.pipeline.type
         else:
             pipeline_name, default_model_repo = get_default_pipeline_info(task)
-
-        if model is None:
             model = default_model_repo
 
-    assert isinstance(model, (type(None), str, Model, list)), \
-        f'model should be either None, str, List[str], Model, or List[Model], but got {type(model)}'
-
     cfg = ConfigDict(type=pipeline_name, model=model)
 
     if kwargs:
diff --git a/modelscope/pipelines/cv/image_cartoon_pipeline.py b/modelscope/pipelines/cv/image_cartoon_pipeline.py
index d253eaf5..717336e9 100644
--- a/modelscope/pipelines/cv/image_cartoon_pipeline.py
+++ b/modelscope/pipelines/cv/image_cartoon_pipeline.py
@@ -6,6 +6,7 @@ import numpy as np
 import PIL
 import tensorflow as tf
 
+from modelscope.metainfo import Pipelines
 from modelscope.models.cv.cartoon.facelib.facer import FaceAna
 from modelscope.models.cv.cartoon.mtcnn_pytorch.src.align_trans import (
     get_reference_facial_points, warp_and_crop_face)
@@ -25,7 +26,7 @@ logger = get_logger()
 
 
 @PIPELINES.register_module(
-    Tasks.image_generation, module_name='person-image-cartoon')
+    Tasks.image_generation, module_name=Pipelines.person_image_cartoon)
 class ImageCartoonPipeline(Pipeline):
 
     def __init__(self, model: str):
diff --git a/modelscope/pipelines/cv/image_matting_pipeline.py b/modelscope/pipelines/cv/image_matting_pipeline.py
index 0c60dfa7..b3e27e4b 100644
--- a/modelscope/pipelines/cv/image_matting_pipeline.py
+++ b/modelscope/pipelines/cv/image_matting_pipeline.py
@@ -5,6 +5,7 @@ import cv2
 import numpy as np
 import PIL
 
+from modelscope.metainfo import Pipelines
 from modelscope.pipelines.base import Input
 from modelscope.preprocessors import load_image
 from modelscope.utils.constant import ModelFile, Tasks
@@ -16,7 +17,7 @@ logger = get_logger()
 
 
 @PIPELINES.register_module(
-    Tasks.image_matting, module_name=Tasks.image_matting)
+    Tasks.image_matting, module_name=Pipelines.image_matting)
 class ImageMattingPipeline(Pipeline):
 
     def __init__(self, model: str):
diff --git a/modelscope/pipelines/cv/ocr_detection_pipeline.py b/modelscope/pipelines/cv/ocr_detection_pipeline.py
index 9728e441..0502fe36 100644
--- a/modelscope/pipelines/cv/ocr_detection_pipeline.py
+++ b/modelscope/pipelines/cv/ocr_detection_pipeline.py
@@ -10,6 +10,7 @@ import PIL
 import tensorflow as tf
 import tf_slim as slim
 
+from modelscope.metainfo import Pipelines
 from modelscope.pipelines.base import Input
 from modelscope.preprocessors import load_image
 from modelscope.utils.constant import ModelFile, Tasks
@@ -38,7 +39,7 @@ tf.app.flags.DEFINE_float('link_threshold', 0.6,
 
 
 @PIPELINES.register_module(
-    Tasks.ocr_detection, module_name=Tasks.ocr_detection)
+    Tasks.ocr_detection, module_name=Pipelines.ocr_detection)
 class OCRDetectionPipeline(Pipeline):
 
     def __init__(self, model: str):
diff --git a/modelscope/pipelines/multi_modal/image_captioning_pipeline.py b/modelscope/pipelines/multi_modal/image_captioning_pipeline.py
index f0b1f53c..9f32caf4 100644
--- a/modelscope/pipelines/multi_modal/image_captioning_pipeline.py
+++ b/modelscope/pipelines/multi_modal/image_captioning_pipeline.py
@@ -1,5 +1,6 @@
 from typing import Any, Dict, Union
 
+from modelscope.metainfo import Pipelines
 from modelscope.preprocessors import OfaImageCaptionPreprocessor, Preprocessor
 from modelscope.utils.constant import Tasks
 from modelscope.utils.logger import get_logger
@@ -9,7 +10,8 @@ from ..builder import PIPELINES
 logger = get_logger()
 
 
-@PIPELINES.register_module(Tasks.image_captioning, module_name='ofa')
+@PIPELINES.register_module(
+    Tasks.image_captioning, module_name=Pipelines.image_caption)
 class ImageCaptionPipeline(Pipeline):
 
     def __init__(self,
diff --git a/modelscope/pipelines/nlp/sentence_similarity_pipeline.py b/modelscope/pipelines/nlp/sentence_similarity_pipeline.py
index 1b630c10..71df86e2 100644
--- a/modelscope/pipelines/nlp/sentence_similarity_pipeline.py
+++ b/modelscope/pipelines/nlp/sentence_similarity_pipeline.py
@@ -2,6 +2,7 @@ from typing import Any, Dict, Union
 
 import numpy as np
 
+from modelscope.metainfo import Pipelines
 from modelscope.models.nlp import SbertForSentenceSimilarity
 from modelscope.preprocessors import SequenceClassificationPreprocessor
 from modelscope.utils.constant import Tasks
@@ -13,8 +14,7 @@ __all__ = ['SentenceSimilarityPipeline']
 
 
 @PIPELINES.register_module(
-    Tasks.sentence_similarity,
-    module_name=r'sbert-base-chinese-sentence-similarity')
+    Tasks.sentence_similarity, module_name=Pipelines.sentence_similarity)
 class SentenceSimilarityPipeline(Pipeline):
 
     def __init__(self,
diff --git a/modelscope/pipelines/nlp/sequence_classification_pipeline.py b/modelscope/pipelines/nlp/sequence_classification_pipeline.py
index 1dbe2efd..43c81d60 100644
--- a/modelscope/pipelines/nlp/sequence_classification_pipeline.py
+++ b/modelscope/pipelines/nlp/sequence_classification_pipeline.py
@@ -2,6 +2,7 @@ from typing import Any, Dict, Union
 
 import numpy as np
 
+from modelscope.metainfo import Pipelines
 from modelscope.models.nlp import BertForSequenceClassification
 from modelscope.preprocessors import SequenceClassificationPreprocessor
 from modelscope.utils.constant import Tasks
@@ -13,7 +14,7 @@ __all__ = ['SequenceClassificationPipeline']
 
 
 @PIPELINES.register_module(
-    Tasks.text_classification, module_name=r'bert-sentiment-analysis')
+    Tasks.text_classification, module_name=Pipelines.sentiment_analysis)
 class SequenceClassificationPipeline(Pipeline):
 
     def __init__(self,
diff --git a/modelscope/pipelines/nlp/text_generation_pipeline.py b/modelscope/pipelines/nlp/text_generation_pipeline.py
index 881e7ea6..ebd4be8e 100644
--- a/modelscope/pipelines/nlp/text_generation_pipeline.py
+++ b/modelscope/pipelines/nlp/text_generation_pipeline.py
@@ -1,5 +1,6 @@
 from typing import Dict, Optional, Union
 
+from modelscope.metainfo import Pipelines
 from modelscope.models import Model
 from modelscope.models.nlp import PalmForTextGeneration
 from modelscope.preprocessors import TextGenerationPreprocessor
@@ -10,7 +11,8 @@ from ..builder import PIPELINES
 __all__ = ['TextGenerationPipeline']
 
 
-@PIPELINES.register_module(Tasks.text_generation, module_name=r'palm2.0')
+@PIPELINES.register_module(
+    Tasks.text_generation, module_name=Pipelines.text_generation)
 class TextGenerationPipeline(Pipeline):
 
     def __init__(self,
diff --git a/modelscope/pipelines/nlp/word_segmentation_pipeline.py b/modelscope/pipelines/nlp/word_segmentation_pipeline.py
index 1cc08a38..a45dafc3 100644
--- a/modelscope/pipelines/nlp/word_segmentation_pipeline.py
+++ b/modelscope/pipelines/nlp/word_segmentation_pipeline.py
@@ -1,5 +1,6 @@
 from typing import Any, Dict, Optional, Union
 
+from modelscope.metainfo import Pipelines
 from modelscope.models import Model
 from modelscope.models.nlp import StructBertForTokenClassification
 from modelscope.preprocessors import TokenClassifcationPreprocessor
@@ -11,8 +12,7 @@ __all__ = ['WordSegmentationPipeline']
 
 
 @PIPELINES.register_module(
-    Tasks.word_segmentation,
-    module_name=r'structbert-chinese-word-segmentation')
+    Tasks.word_segmentation, module_name=Pipelines.word_segmentation)
 class WordSegmentationPipeline(Pipeline):
 
     def __init__(self,
diff --git a/modelscope/pipelines/util.py b/modelscope/pipelines/util.py
index 6fe6e9fd..d034a7d4 100644
--- a/modelscope/pipelines/util.py
+++ b/modelscope/pipelines/util.py
@@ -2,6 +2,7 @@
 import os.path as osp
 from typing import List, Union
 
+from modelscope.hub.api import HubApi
 from modelscope.hub.file_download import model_file_download
 from modelscope.utils.config import Config
 from modelscope.utils.constant import ModelFile
@@ -19,31 +20,63 @@ def is_config_has_model(cfg_file):
         return False
 
 
-def is_model_name(model: Union[str, List]):
-    """ whether model is a valid modelhub path
+def is_official_hub_path(path: Union[str, List]):
+    """ Whether path is a official hub name or a valid local
+    path to official hub directory.
     """
 
-    def is_model_name_impl(model):
-        if osp.exists(model):
-            cfg_file = osp.join(model, ModelFile.CONFIGURATION)
+    def is_official_hub_impl(path):
+        if osp.exists(path):
+            cfg_file = osp.join(path, ModelFile.CONFIGURATION)
+            return osp.exists(cfg_file)
+        else:
+            try:
+                _ = HubApi().get_model(path)
+                return True
+            except Exception:
+                return False
+
+    if isinstance(path, str):
+        return is_official_hub_impl(path)
+    else:
+        results = [is_official_hub_impl(m) for m in path]
+        all_true = all(results)
+        any_true = any(results)
+        if any_true and not all_true:
+            raise ValueError(
+                f'some model are hub address, some are not, model list: {path}'
+            )
+
+        return all_true
+
+
+def is_model(path: Union[str, List]):
+    """ whether path is a valid modelhub path and containing model config
+    """
+
+    def is_modelhub_path_impl(path):
+        if osp.exists(path):
+            cfg_file = osp.join(path, ModelFile.CONFIGURATION)
             if osp.exists(cfg_file):
                 return is_config_has_model(cfg_file)
             else:
                 return False
         else:
             try:
-                cfg_file = model_file_download(model, ModelFile.CONFIGURATION)
+                cfg_file = model_file_download(path, ModelFile.CONFIGURATION)
                 return is_config_has_model(cfg_file)
             except Exception:
                 return False
 
-    if isinstance(model, str):
-        return is_model_name_impl(model)
+    if isinstance(path, str):
+        return is_modelhub_path_impl(path)
     else:
-        results = [is_model_name_impl(m) for m in model]
+        results = [is_modelhub_path_impl(m) for m in path]
         all_true = all(results)
         any_true = any(results)
         if any_true and not all_true:
-            raise ValueError('some model are hub address, some are not')
+            raise ValueError(
+                f'some models are hub address, some are not, model list: {path}'
+            )
 
         return all_true
diff --git a/modelscope/preprocessors/image.py b/modelscope/preprocessors/image.py
index 6bd8aed5..b2123fb7 100644
--- a/modelscope/preprocessors/image.py
+++ b/modelscope/preprocessors/image.py
@@ -5,11 +5,12 @@ from typing import Dict, Union
 from PIL import Image, ImageOps
 
 from modelscope.fileio import File
+from modelscope.metainfo import Preprocessors
 from modelscope.utils.constant import Fields
 from .builder import PREPROCESSORS
 
 
-@PREPROCESSORS.register_module(Fields.cv)
+@PREPROCESSORS.register_module(Fields.cv, Preprocessors.load_image)
 class LoadImage:
     """Load an image from file or url.
     Added or updated keys are "filename", "img", "img_shape",
diff --git a/modelscope/preprocessors/multi_model.py b/modelscope/preprocessors/multi_model.py
index ea2e7493..aa0bc8a7 100644
--- a/modelscope/preprocessors/multi_model.py
+++ b/modelscope/preprocessors/multi_model.py
@@ -7,6 +7,7 @@ import torch
 from PIL import Image
 
 from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Preprocessors
 from modelscope.utils.constant import Fields, ModelFile
 from modelscope.utils.type_assert import type_assert
 from .base import Preprocessor
@@ -19,7 +20,7 @@ __all__ = [
 
 
 @PREPROCESSORS.register_module(
-    Fields.multi_modal, module_name=r'ofa-image-caption')
+    Fields.multi_modal, module_name=Preprocessors.ofa_image_caption)
 class OfaImageCaptionPreprocessor(Preprocessor):
 
     def __init__(self, model_dir: str, *args, **kwargs):
diff --git a/modelscope/preprocessors/nlp.py b/modelscope/preprocessors/nlp.py
index 0abb01cc..7a47a866 100644
--- a/modelscope/preprocessors/nlp.py
+++ b/modelscope/preprocessors/nlp.py
@@ -5,6 +5,7 @@ from typing import Any, Dict, Union
 
 from transformers import AutoTokenizer
 
+from modelscope.metainfo import Preprocessors
 from modelscope.utils.constant import Fields, InputFields
 from modelscope.utils.type_assert import type_assert
 from .base import Preprocessor
@@ -31,7 +32,7 @@ class Tokenize(Preprocessor):
 
 
 @PREPROCESSORS.register_module(
-    Fields.nlp, module_name=r'bert-sequence-classification')
+    Fields.nlp, module_name=Preprocessors.bert_seq_cls_tokenizer)
 class SequenceClassificationPreprocessor(Preprocessor):
 
     def __init__(self, model_dir: str, *args, **kwargs):
@@ -124,7 +125,8 @@ class SequenceClassificationPreprocessor(Preprocessor):
         return rst
 
 
-@PREPROCESSORS.register_module(Fields.nlp, module_name=r'palm2.0')
+@PREPROCESSORS.register_module(
+    Fields.nlp, module_name=Preprocessors.palm_text_gen_tokenizer)
 class TextGenerationPreprocessor(Preprocessor):
 
     def __init__(self, model_dir: str, tokenizer, *args, **kwargs):
@@ -180,7 +182,7 @@ class TextGenerationPreprocessor(Preprocessor):
 
 
 @PREPROCESSORS.register_module(
-    Fields.nlp, module_name=r'bert-token-classification')
+    Fields.nlp, module_name=Preprocessors.sbert_token_cls_tokenizer)
 class TokenClassifcationPreprocessor(Preprocessor):
 
     def __init__(self, model_dir: str, *args, **kwargs):
diff --git a/modelscope/preprocessors/text_to_speech.py b/modelscope/preprocessors/text_to_speech.py
index 8b8dae14..9d8af6fa 100644
--- a/modelscope/preprocessors/text_to_speech.py
+++ b/modelscope/preprocessors/text_to_speech.py
@@ -3,6 +3,7 @@ import io
 from typing import Any, Dict, Union
 
 from modelscope.fileio import File
+from modelscope.metainfo import Preprocessors
 from modelscope.models.audio.tts.frontend import GenericTtsFrontend
 from modelscope.models.base import Model
 from modelscope.utils.audio.tts_exceptions import *  # noqa F403
@@ -10,11 +11,11 @@ from modelscope.utils.constant import Fields
 from .base import Preprocessor
 from .builder import PREPROCESSORS
 
-__all__ = ['TextToTacotronSymbols', 'text_to_tacotron_symbols']
+__all__ = ['TextToTacotronSymbols']
 
 
 @PREPROCESSORS.register_module(
-    Fields.audio, module_name=r'text_to_tacotron_symbols')
+    Fields.audio, module_name=Preprocessors.text_to_tacotron_symbols)
 class TextToTacotronSymbols(Preprocessor):
     """extract tacotron symbols from text.
 
diff --git a/modelscope/utils/hub.py b/modelscope/utils/hub.py
index 245642d1..01a1b1b0 100644
--- a/modelscope/utils/hub.py
+++ b/modelscope/utils/hub.py
@@ -1,11 +1,49 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
 import os
+import os.path as osp
+from typing import List, Union
 
-from modelscope.hub.constants import MODEL_ID_SEPARATOR
+from numpy import deprecate
+
+from modelscope.hub.file_download import model_file_download
+from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.hub.utils.utils import get_cache_dir
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile
 
 
 # temp solution before the hub-cache is in place
+@deprecate
 def get_model_cache_dir(model_id: str):
     return os.path.join(get_cache_dir(), model_id)
+
+
+def read_config(model_id_or_path: str):
+    """ Read config from hub or local path
+
+    Args:
+        model_id_or_path (str): Model repo name or local directory path.
+
+    Return:
+        config (:obj:`Config`): config object
+    """
+    if not os.path.exists(model_id_or_path):
+        local_path = model_file_download(model_id_or_path,
+                                         ModelFile.CONFIGURATION)
+    else:
+        local_path = os.path.join(model_id_or_path, ModelFile.CONFIGURATION)
+
+    return Config.from_file(local_path)
+
+
+def auto_load(model: Union[str, List[str]]):
+    if isinstance(model, str):
+        if not osp.exists(model):
+            model = snapshot_download(model)
+    else:
+        model = [
+            snapshot_download(m) if not osp.exists(m) else m for m in model
+        ]
+
+    return model
diff --git a/requirements/audio.txt b/requirements/audio.txt
index 140836a8..3b625261 100644
--- a/requirements/audio.txt
+++ b/requirements/audio.txt
@@ -1,10 +1,10 @@
 #tts
 h5py==2.10.0
-#https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp36-cp36m-linux_x86_64.whl
-https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp37-cp37m-linux_x86_64.whl
+https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp36-cp36m-linux_x86_64.whl; python_version=='3.6'
+https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp37-cp37m-linux_x86_64.whl; python_version=='3.7'
+https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp38-cp38-linux_x86_64.whl; python_version=='3.8'
+https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp39-cp39-linux_x86_64.whl; python_version=='3.9'
 https://swap.oss-cn-hangzhou.aliyuncs.com/Jiaqi%2Fmaas%2Ftts%2Frequirements%2Fpytorch_wavelets-1.3.0-py3-none-any.whl?Expires=1685688388&OSSAccessKeyId=LTAI4Ffebq4d9jTVDwiSbY4L&Signature=jcQbg5EZ%2Bdys3%2F4BRn3srrKLdIg%3D
-#https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp38-cp38-linux_x86_64.whl
-#https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp39-cp39-linux_x86_64.whl
 inflect
 keras==2.2.4
 librosa
@@ -12,7 +12,7 @@ lxml
 matplotlib
 nara_wpe
 numpy==1.18.*
-protobuf==3.20.*
+protobuf>3,<=3.20
 ptflops
 PyWavelets>=1.0.0
 scikit-learn==0.23.2
diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py
index 751b6975..23ea678b 100644
--- a/tests/pipelines/test_image_matting.py
+++ b/tests/pipelines/test_image_matting.py
@@ -60,7 +60,7 @@ class ImageMattingTest(unittest.TestCase):
         cv2.imwrite('result.png', result['output_png'])
         print(f'Output written to {osp.abspath("result.png")}')
 
-    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     def test_run_with_modelscope_dataset(self):
         dataset = PyDataset.load('beans', split='train', target='image')
         img_matting = pipeline(Tasks.image_matting, model=self.model_id)
diff --git a/tests/pipelines/test_speech_signal_process.py b/tests/pipelines/test_speech_signal_process.py
index f1369a2f..23939f8e 100644
--- a/tests/pipelines/test_speech_signal_process.py
+++ b/tests/pipelines/test_speech_signal_process.py
@@ -3,6 +3,7 @@ import shutil
 import unittest
 
 from modelscope.fileio import File
+from modelscope.metainfo import Pipelines
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 
@@ -42,7 +43,7 @@ class SpeechSignalProcessTest(unittest.TestCase):
         aec = pipeline(
             Tasks.speech_signal_process,
             model=self.model_id,
-            pipeline_name=r'speech_dfsmn_aec_psm_16k')
+            pipeline_name=Pipelines.speech_dfsmn_aec_psm_16k)
         aec(input, output_path='output.wav')
 
 
diff --git a/tests/pipelines/test_text_classification.py b/tests/pipelines/test_text_classification.py
index 8ecd9ed4..2581c220 100644
--- a/tests/pipelines/test_text_classification.py
+++ b/tests/pipelines/test_text_classification.py
@@ -38,31 +38,6 @@ class SequenceClassificationTest(unittest.TestCase):
                 break
             print(r)
 
-    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
-    def test_run(self):
-        model_url = 'https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com' \
-                    '/release/easynlp_modelzoo/alibaba-pai/bert-base-sst2.zip'
-        cache_path_str = r'.cache/easynlp/bert-base-sst2.zip'
-        cache_path = Path(cache_path_str)
-
-        if not cache_path.exists():
-            cache_path.parent.mkdir(parents=True, exist_ok=True)
-            cache_path.touch(exist_ok=True)
-            with cache_path.open('wb') as ofile:
-                ofile.write(File.read(model_url))
-
-        with zipfile.ZipFile(cache_path_str, 'r') as zipf:
-            zipf.extractall(cache_path.parent)
-        path = r'.cache/easynlp/'
-        model = BertForSequenceClassification(path)
-        preprocessor = SequenceClassificationPreprocessor(
-            path, first_sequence='sentence', second_sequence=None)
-        pipeline1 = SequenceClassificationPipeline(model, preprocessor)
-        self.predict(pipeline1)
-        pipeline2 = pipeline(
-            Tasks.text_classification, model=model, preprocessor=preprocessor)
-        print(pipeline2('Hello world!'))
-
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_run_with_model_from_modelhub(self):
         model = Model.from_pretrained(self.model_id)
diff --git a/tests/pipelines/test_text_to_speech.py b/tests/pipelines/test_text_to_speech.py
index c9b988a1..0d76cbac 100644
--- a/tests/pipelines/test_text_to_speech.py
+++ b/tests/pipelines/test_text_to_speech.py
@@ -11,6 +11,7 @@ import torch
 from scipy.io.wavfile import write
 
 from modelscope.fileio import File
+from modelscope.metainfo import Pipelines, Preprocessors
 from modelscope.models import Model, build_model
 from modelscope.models.audio.tts.am import SambertNetHifi16k
 from modelscope.models.audio.tts.vocoder import AttrDict, Hifigan16k
@@ -32,7 +33,7 @@ class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase):
         voc_model_id = 'damo/speech_hifigan16k_tts_zhitian_emo'
 
         cfg_preprocessor = dict(
-            type='text_to_tacotron_symbols',
+            type=Preprocessors.text_to_tacotron_symbols,
             model_name=preprocessor_model_id,
             lang_type=lang_type)
         preprocessor = build_preprocessor(cfg_preprocessor, Fields.audio)
@@ -45,7 +46,7 @@ class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase):
         self.assertTrue(voc is not None)
 
         sambert_tts = pipeline(
-            pipeline_name='tts-sambert-hifigan-16k',
+            pipeline_name=Pipelines.sambert_hifigan_16k_tts,
             config_file='',
             model=[am, voc],
             preprocessor=preprocessor)
diff --git a/tests/preprocessors/test_text_to_speech.py b/tests/preprocessors/test_text_to_speech.py
index 18b66987..fd2473fd 100644
--- a/tests/preprocessors/test_text_to_speech.py
+++ b/tests/preprocessors/test_text_to_speech.py
@@ -1,6 +1,7 @@
 import shutil
 import unittest
 
+from modelscope.metainfo import Preprocessors
 from modelscope.preprocessors import build_preprocessor
 from modelscope.utils.constant import Fields, InputFields
 from modelscope.utils.logger import get_logger
@@ -14,7 +15,7 @@ class TtsPreprocessorTest(unittest.TestCase):
         lang_type = 'pinyin'
         text = '今天天气不错，我们去散步吧。'
         cfg = dict(
-            type='text_to_tacotron_symbols',
+            type=Preprocessors.text_to_tacotron_symbols,
             model_name='damo/speech_binary_tts_frontend_resource',
             lang_type=lang_type)
         preprocessor = build_preprocessor(cfg, Fields.audio)
diff --git a/tests/pydatasets/test_py_dataset.py b/tests/pydatasets/test_py_dataset.py
index 4ad767fa..bc38e369 100644
--- a/tests/pydatasets/test_py_dataset.py
+++ b/tests/pydatasets/test_py_dataset.py
@@ -33,6 +33,8 @@ class ImgPreprocessor(Preprocessor):
 
 class PyDatasetTest(unittest.TestCase):
 
+    @unittest.skipUnless(test_level() >= 2,
+                         'skip test due to dataset api problem')
     def test_ds_basic(self):
         ms_ds_full = PyDataset.load('squad')
         ms_ds_full_hf = hfdata.load_dataset('squad')

From 849410e107c2ce27d00f69641cb14436b3507388 Mon Sep 17 00:00:00 2001
From: "luoyiyun.lyy" <luoyiyun.lyy@alibaba-inc.com>
Date: Wed, 22 Jun 2022 15:49:58 +0800
Subject: [PATCH 3/3] [to #41474818]fix: fix errors in task name definition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

修改constant.py中task name定义的错误
- sentence-similarity重复定义
- fill-mask多加了一个空格
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9125956

    * [to #41474818]fix: fix errors in task name definition
---
 modelscope/utils/constant.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py
index c26a9e24..3eb2890a 100644
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -43,8 +43,7 @@ class Tasks(object):
     text_generation = 'text-generation'
     table_question_answering = 'table-question-answering'
     feature_extraction = 'feature-extraction'
-    sentence_similarity = 'sentence-similarity'
-    fill_mask = 'fill-mask '
+    fill_mask = 'fill-mask'
     summarization = 'summarization'
     question_answering = 'question-answering'