From 82ee20f4473bcab2e5492430bfc74b712d2f8ff9 Mon Sep 17 00:00:00 2001 From: liuyhwangyh Date: Thu, 23 May 2024 20:34:52 +0800 Subject: [PATCH] fix issue #845 (#861) * fix #845 Co-authored-by: mulin.lyh --- modelscope/hub/api.py | 2 ++ modelscope/hub/file_download.py | 7 ++-- modelscope/hub/git.py | 12 ++++--- modelscope/hub/snapshot_download.py | 5 +-- modelscope/hub/utils/utils.py | 19 +--------- modelscope/utils/ast_utils.py | 5 +-- modelscope/utils/audio/audio_utils.py | 5 ++- modelscope/utils/config_ds.py | 9 ++--- modelscope/utils/deploy_checker.py | 4 --- modelscope/utils/file_utils.py | 36 ++++++++++++++++++- modelscope/utils/plugins.py | 4 +-- modelscope/version.py | 2 +- tests/json_call_test.py | 7 ++-- tests/pipelines/test_ofa_tasks.py | 4 ++- tests/run_analysis.py | 9 +++-- .../test_image_defrcn_fewshot_trainer.py | 4 +-- 16 files changed, 75 insertions(+), 59 deletions(-) diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index ff921699..d0bb9c1a 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -267,6 +267,8 @@ class HubApi: This function must be called before calling HubApi's login with a valid token which can be obtained from ModelScope's website. + If any error, please upload via git commands. + Args: model_id (str): The model id to be uploaded, caller must have write permission for it. diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py index 8a204487..c925f306 100644 --- a/modelscope/hub/file_download.py +++ b/modelscope/hub/file_download.py @@ -21,11 +21,12 @@ from modelscope.hub.constants import ( API_FILE_DOWNLOAD_TIMEOUT, FILE_HASH, MODELSCOPE_DOWNLOAD_PARALLELS, MODELSCOPE_PARALLEL_DOWNLOAD_THRESHOLD_MB) from modelscope.utils.constant import DEFAULT_MODEL_REVISION +from modelscope.utils.file_utils import get_model_cache_root from modelscope.utils.logger import get_logger from .errors import FileDownloadError, NotExistError from .utils.caching import ModelFileSystemCache -from .utils.utils import (file_integrity_validation, get_cache_dir, - get_endpoint, model_id_to_group_owner_name) +from .utils.utils import (file_integrity_validation, get_endpoint, + model_id_to_group_owner_name) logger = get_logger() @@ -75,7 +76,7 @@ def model_file_download( if some parameter value is invalid """ if cache_dir is None: - cache_dir = get_cache_dir() + cache_dir = get_model_cache_root() if isinstance(cache_dir, Path): cache_dir = str(cache_dir) temporary_cache_dir = os.path.join(cache_dir, 'temp') diff --git a/modelscope/hub/git.py b/modelscope/hub/git.py index b0fae148..581f248f 100644 --- a/modelscope/hub/git.py +++ b/modelscope/hub/git.py @@ -45,8 +45,9 @@ class GitCommandWrapper(metaclass=Singleton): logger.debug(' '.join(args)) git_env = os.environ.copy() git_env['GIT_TERMINAL_PROMPT'] = '0' + command = [self.git_path, *args] response = subprocess.run( - [self.git_path, *args], + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=git_env, @@ -55,10 +56,11 @@ class GitCommandWrapper(metaclass=Singleton): response.check_returncode() return response except subprocess.CalledProcessError as error: - logger.error('There are error run git command.') - raise GitError( - 'stdout: %s, stderr: %s' % - (response.stdout.decode('utf8'), error.stderr.decode('utf8'))) + output = 'stdout: %s, stderr: %s' % ( + response.stdout.decode('utf8'), error.stderr.decode('utf8')) + logger.error('Running git command: %s failed, output: %s.' % + (command, output)) + raise GitError(output) def config_auth_token(self, repo_dir, auth_token): url = self.get_repo_remote_url(repo_dir) diff --git a/modelscope/hub/snapshot_download.py b/modelscope/hub/snapshot_download.py index 7000b850..128a251d 100644 --- a/modelscope/hub/snapshot_download.py +++ b/modelscope/hub/snapshot_download.py @@ -9,13 +9,14 @@ from typing import Dict, List, Optional, Union from modelscope.hub.api import HubApi, ModelScopeConfig from modelscope.utils.constant import DEFAULT_MODEL_REVISION +from modelscope.utils.file_utils import get_model_cache_root from modelscope.utils.logger import get_logger from .constants import (FILE_HASH, MODELSCOPE_DOWNLOAD_PARALLELS, MODELSCOPE_PARALLEL_DOWNLOAD_THRESHOLD_MB) from .file_download import (get_file_download_url, http_get_file, parallel_download) from .utils.caching import ModelFileSystemCache -from .utils.utils import (file_integrity_validation, get_cache_dir, +from .utils.utils import (file_integrity_validation, model_id_to_group_owner_name) logger = get_logger() @@ -65,7 +66,7 @@ def snapshot_download(model_id: str, """ if cache_dir is None: - cache_dir = get_cache_dir() + cache_dir = get_model_cache_root() if isinstance(cache_dir, Path): cache_dir = str(cache_dir) temporary_cache_dir = os.path.join(cache_dir, 'temp') diff --git a/modelscope/hub/utils/utils.py b/modelscope/hub/utils/utils.py index 31e6e72c..64d9f5bb 100644 --- a/modelscope/hub/utils/utils.py +++ b/modelscope/hub/utils/utils.py @@ -12,7 +12,7 @@ from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN, MODEL_ID_SEPARATOR, MODELSCOPE_SDK_DEBUG, MODELSCOPE_URL_SCHEME) from modelscope.hub.errors import FileIntegrityError -from modelscope.utils.file_utils import get_default_cache_dir +from modelscope.utils.file_utils import get_default_modelscope_cache_dir from modelscope.utils.logger import get_logger logger = get_logger() @@ -28,23 +28,6 @@ def model_id_to_group_owner_name(model_id): return group_or_owner, name -def get_cache_dir(model_id: Optional[str] = None): - """cache dir precedence: - function parameter > environment > ~/.cache/modelscope/hub - - Args: - model_id (str, optional): The model id. - - Returns: - str: the model_id dir if model_id not None, otherwise cache root dir. - """ - default_cache_dir = get_default_cache_dir() - base_path = os.getenv('MODELSCOPE_CACHE', - os.path.join(default_cache_dir, 'hub')) - return base_path if model_id is None else os.path.join( - base_path, model_id + '/') - - def get_release_datetime(): if MODELSCOPE_SDK_DEBUG in os.environ: rt = int(round(datetime.now().timestamp())) diff --git a/modelscope/utils/ast_utils.py b/modelscope/utils/ast_utils.py index 1aca1ce1..05e2e237 100644 --- a/modelscope/utils/ast_utils.py +++ b/modelscope/utils/ast_utils.py @@ -14,11 +14,12 @@ import gast import json from modelscope.fileio.file import LocalStorage +# do not delete from modelscope.metainfo import (CustomDatasets, Heads, Hooks, LR_Schedulers, Metrics, Models, Optimizers, Pipelines, Preprocessors, TaskModels, Trainers) from modelscope.utils.constant import Fields, Tasks -from modelscope.utils.file_utils import get_default_cache_dir +from modelscope.utils.file_utils import get_modelscope_cache_dir from modelscope.utils.logger import get_logger from modelscope.utils.registry import default_group @@ -29,7 +30,7 @@ p = Path(__file__) # get the path of package 'modelscope' SKIP_FUNCTION_SCANNING = True MODELSCOPE_PATH = p.resolve().parents[1] -INDEXER_FILE_DIR = get_default_cache_dir() +INDEXER_FILE_DIR = get_modelscope_cache_dir() REGISTER_MODULE = 'register_module' IGNORED_PACKAGES = ['modelscope', '.'] SCAN_SUB_FOLDERS = [ diff --git a/modelscope/utils/audio/audio_utils.py b/modelscope/utils/audio/audio_utils.py index 562769b8..5b53bf6c 100644 --- a/modelscope/utils/audio/audio_utils.py +++ b/modelscope/utils/audio/audio_utils.py @@ -1,7 +1,6 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import os import re -import shutil import struct import sys import tempfile @@ -11,7 +10,7 @@ from urllib.parse import urlparse import numpy as np from modelscope.fileio.file import HTTPStorage -from modelscope.hub.utils.utils import get_cache_dir +from modelscope.utils.file_utils import get_model_cache_root from modelscope.utils.hub import snapshot_download from modelscope.utils.logger import get_logger @@ -334,7 +333,7 @@ def update_local_model(model_config, model_path, extra_args): model_revision = extra_args['update_model'] if model_config.__contains__('model'): model_name = model_config['model'] - dst_dir_root = get_cache_dir() + dst_dir_root = get_model_cache_root() if isinstance(model_path, str) and os.path.exists( model_path) and not model_path.startswith(dst_dir_root): try: diff --git a/modelscope/utils/config_ds.py b/modelscope/utils/config_ds.py index fce823c4..72a25887 100644 --- a/modelscope/utils/config_ds.py +++ b/modelscope/utils/config_ds.py @@ -5,14 +5,11 @@ from pathlib import Path # Cache location from modelscope.hub.constants import DEFAULT_MODELSCOPE_DATA_ENDPOINT +from modelscope.utils.file_utils import get_modelscope_cache_dir -DEFAULT_CACHE_HOME = Path.home().joinpath('.cache') -CACHE_HOME = os.getenv('CACHE_HOME', DEFAULT_CACHE_HOME) -DEFAULT_MS_CACHE_HOME = os.path.join(CACHE_HOME, 'modelscope', 'hub') -MS_CACHE_HOME = os.path.expanduser( - os.getenv('MS_CACHE_HOME', DEFAULT_MS_CACHE_HOME)) +MS_CACHE_HOME = get_modelscope_cache_dir() -DEFAULT_MS_DATASETS_CACHE = os.path.join(MS_CACHE_HOME, 'datasets') +DEFAULT_MS_DATASETS_CACHE = os.path.join(MS_CACHE_HOME, 'hub', 'datasets') MS_DATASETS_CACHE = Path( os.getenv('MS_DATASETS_CACHE', DEFAULT_MS_DATASETS_CACHE)) diff --git a/modelscope/utils/deploy_checker.py b/modelscope/utils/deploy_checker.py index c57f7d64..9d2ea54a 100644 --- a/modelscope/utils/deploy_checker.py +++ b/modelscope/utils/deploy_checker.py @@ -1,13 +1,9 @@ import argparse -import os import traceback from typing import List, Union -import json - from modelscope.hub.api import HubApi from modelscope.hub.file_download import model_file_download -from modelscope.hub.utils.utils import get_cache_dir from modelscope.pipelines import pipeline from modelscope.utils.config import Config from modelscope.utils.constant import ModelFile diff --git a/modelscope/utils/file_utils.py b/modelscope/utils/file_utils.py index 6bf37698..56c32441 100644 --- a/modelscope/utils/file_utils.py +++ b/modelscope/utils/file_utils.py @@ -31,7 +31,7 @@ def func_receive_dict_inputs(func): return False -def get_default_cache_dir(): +def get_default_modelscope_cache_dir(): """ default base dir: '~/.cache/modelscope' """ @@ -39,6 +39,40 @@ def get_default_cache_dir(): return default_cache_dir +def get_modelscope_cache_dir() -> str: + """Get modelscope cache dir, default location or + setting with MODELSCOPE_CACHE + + Returns: + str: the modelscope cache root. + """ + return os.getenv('MODELSCOPE_CACHE', get_default_modelscope_cache_dir()) + + +def get_model_cache_root() -> str: + """Get model cache root path. + + Returns: + str: the modelscope cache root. + """ + return os.path.join(get_modelscope_cache_dir(), 'hub') + + +def get_model_cache_dir(model_id: str) -> str: + """cache dir precedence: + function parameter > environment > ~/.cache/modelscope/hub/model_id + + Args: + model_id (str, optional): The model id. + + Returns: + str: the model_id dir if model_id not None, otherwise cache root dir. + """ + root_path = get_model_cache_root() + return root_path if model_id is None else os.path.join( + root_path, model_id + '/') + + def read_file(path): with open(path, 'r') as f: diff --git a/modelscope/utils/plugins.py b/modelscope/utils/plugins.py index b4485830..e0731c8c 100644 --- a/modelscope/utils/plugins.py +++ b/modelscope/utils/plugins.py @@ -20,14 +20,14 @@ import pkg_resources from modelscope.fileio.file import LocalStorage from modelscope.utils.ast_utils import FilesAstScanning from modelscope.utils.constant import DEFAULT_MODEL_REVISION -from modelscope.utils.file_utils import get_default_cache_dir +from modelscope.utils.file_utils import get_modelscope_cache_dir from modelscope.utils.hub import read_config, snapshot_download from modelscope.utils.logger import get_logger logger = get_logger() storage = LocalStorage() -MODELSCOPE_FILE_DIR = get_default_cache_dir() +MODELSCOPE_FILE_DIR = get_modelscope_cache_dir() MODELSCOPE_DYNAMIC_MODULE = 'modelscope_modules' BASE_MODULE_DIR = os.path.join(MODELSCOPE_FILE_DIR, MODELSCOPE_DYNAMIC_MODULE) diff --git a/modelscope/version.py b/modelscope/version.py index fb0e01f3..031a86b4 100644 --- a/modelscope/version.py +++ b/modelscope/version.py @@ -1,5 +1,5 @@ # Make sure to modify __release_datetime__ to release time when making official release. -__version__ = '1.9.4' +__version__ = '2.0.0' # default release datetime for branches under active development is set # to be a time far-far-away-into-the-future __release_datetime__ = '2099-09-06 00:00:00' diff --git a/tests/json_call_test.py b/tests/json_call_test.py index 7073a90d..df3f3146 100644 --- a/tests/json_call_test.py +++ b/tests/json_call_test.py @@ -4,10 +4,10 @@ import json from modelscope.hub.api import HubApi from modelscope.hub.file_download import model_file_download -from modelscope.hub.utils.utils import get_cache_dir from modelscope.pipelines import pipeline from modelscope.utils.config import Config from modelscope.utils.constant import ModelFile +from modelscope.utils.file_utils import get_model_cache_dir from modelscope.utils.input_output import ( call_pipeline_with_json, get_pipeline_information_by_pipeline, get_task_input_examples, pipeline_output_to_service_base64_output) @@ -20,9 +20,8 @@ class ModelJsonTest: def test_single(self, model_id: str, model_revision=None): # get model_revision & task info - cache_root = get_cache_dir() - configuration_file = os.path.join(cache_root, model_id, - ModelFile.CONFIGURATION) + configuration_file = os.path.join( + get_model_cache_dir(model_id), ModelFile.CONFIGURATION) if not model_revision: model_revision = self.api.list_model_revisions( model_id=model_id)[0] diff --git a/tests/pipelines/test_ofa_tasks.py b/tests/pipelines/test_ofa_tasks.py index 55c3ae65..5d4709ad 100644 --- a/tests/pipelines/test_ofa_tasks.py +++ b/tests/pipelines/test_ofa_tasks.py @@ -316,7 +316,9 @@ class OfaTasksTest(unittest.TestCase): result[OutputKeys.OUTPUT_IMG].save('result.png') print(f'Output written to {osp.abspath("result.png")}') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipUnless( + test_level() >= 1, + 'skip test in current test level, model has no text2phone_dict.txt') def test_run_with_asr_with_name(self): model = 'damo/ofa_mmspeech_pretrain_base_zh' ofa_pipe = pipeline(Tasks.auto_speech_recognition, model=model) diff --git a/tests/run_analysis.py b/tests/run_analysis.py index 76a665ff..a10b2e03 100644 --- a/tests/run_analysis.py +++ b/tests/run_analysis.py @@ -12,10 +12,10 @@ from utils.source_file_analyzer import (get_all_register_modules, from modelscope.hub.api import HubApi from modelscope.hub.file_download import model_file_download -from modelscope.hub.utils.utils import (get_cache_dir, - model_id_to_group_owner_name) +from modelscope.hub.utils.utils import model_id_to_group_owner_name from modelscope.utils.config import Config from modelscope.utils.constant import ModelFile +from modelscope.utils.file_utils import get_model_cache_dir from modelscope.utils.logger import get_logger logger = get_logger() @@ -36,12 +36,11 @@ def get_models_info(groups: list) -> dict: if len(models) >= total_count: break page += 1 - cache_root = get_cache_dir() models_info = {} # key model id, value model info for model_info in models: model_id = '%s/%s' % (group, model_info['Name']) - configuration_file = os.path.join(cache_root, model_id, - ModelFile.CONFIGURATION) + configuration_file = os.path.join( + get_model_cache_dir(model_id), ModelFile.CONFIGURATION) if not os.path.exists(configuration_file): try: model_revisions = api.list_model_revisions(model_id=model_id) diff --git a/tests/trainers/test_image_defrcn_fewshot_trainer.py b/tests/trainers/test_image_defrcn_fewshot_trainer.py index 440849f1..d042fc23 100644 --- a/tests/trainers/test_image_defrcn_fewshot_trainer.py +++ b/tests/trainers/test_image_defrcn_fewshot_trainer.py @@ -6,11 +6,11 @@ import sys import tempfile import unittest -from modelscope.hub.utils.utils import get_cache_dir from modelscope.metainfo import Trainers from modelscope.msdatasets import MsDataset from modelscope.trainers import build_trainer from modelscope.utils.constant import DownloadMode +from modelscope.utils.file_utils import get_model_cache_dir from modelscope.utils.test_utils import test_level @@ -57,7 +57,7 @@ class TestImageDefrcnFewShotTrainer(unittest.TestCase): cfg.model.roi_heads.freeze_feat = False cfg.model.roi_heads.cls_dropout = False cfg.model.weights = os.path.join( - get_cache_dir(), self.model_id, + get_model_cache_dir(self.model_id), 'ImageNetPretrained/MSRA/R-101.pkl') cfg.datasets.root = self.data_dir