diff --git a/modelscope/utils/config_ds.py b/modelscope/utils/config_ds.py index 72a25887..eae85888 100644 --- a/modelscope/utils/config_ds.py +++ b/modelscope/utils/config_ds.py @@ -5,13 +5,14 @@ from pathlib import Path # Cache location from modelscope.hub.constants import DEFAULT_MODELSCOPE_DATA_ENDPOINT -from modelscope.utils.file_utils import get_modelscope_cache_dir +from modelscope.utils.file_utils import (get_dataset_cache_root, + get_modelscope_cache_dir) MS_CACHE_HOME = get_modelscope_cache_dir() -DEFAULT_MS_DATASETS_CACHE = os.path.join(MS_CACHE_HOME, 'hub', 'datasets') -MS_DATASETS_CACHE = Path( - os.getenv('MS_DATASETS_CACHE', DEFAULT_MS_DATASETS_CACHE)) +# NOTE: removed `MS_DATASETS_CACHE` env, +# default is `~/.cache/modelscope/hub/datasets` +MS_DATASETS_CACHE = get_dataset_cache_root() DOWNLOADED_DATASETS_DIR = 'downloads' DEFAULT_DOWNLOADED_DATASETS_PATH = os.path.join(MS_DATASETS_CACHE, diff --git a/modelscope/utils/file_utils.py b/modelscope/utils/file_utils.py index c00e8d26..50f66e34 100644 --- a/modelscope/utils/file_utils.py +++ b/modelscope/utils/file_utils.py @@ -60,11 +60,16 @@ def get_model_cache_root() -> str: def get_dataset_cache_root() -> str: """Get dataset raw file cache root path. + if `MODELSCOPE_CACHE` is set, return `MODELSCOPE_CACHE/datasets`, + else return `~/.cache/modelscope/hub/datasets` Returns: str: the modelscope dataset raw file cache root. """ - return os.path.join(get_modelscope_cache_dir(), 'datasets') + if os.getenv('MODELSCOPE_CACHE'): + return os.path.join(get_modelscope_cache_dir(), 'datasets') + else: + return os.path.join(get_modelscope_cache_dir(), 'hub', 'datasets') def get_dataset_cache_dir(dataset_id: str) -> str: