Unify datasets cache dir (#1178)

* fix cache

* fix lint

* fix dataset cache

* fix lint

* remove
This commit is contained in:
Yunlin Mao
2025-01-10 17:56:05 +08:00
committed by suluyana
parent 9441eb7310
commit cfd32abab2
2 changed files with 11 additions and 5 deletions

View File

@@ -5,13 +5,14 @@ from pathlib import Path
# Cache location
from modelscope.hub.constants import DEFAULT_MODELSCOPE_DATA_ENDPOINT
from modelscope.utils.file_utils import get_modelscope_cache_dir
from modelscope.utils.file_utils import (get_dataset_cache_root,
get_modelscope_cache_dir)
MS_CACHE_HOME = get_modelscope_cache_dir()
DEFAULT_MS_DATASETS_CACHE = os.path.join(MS_CACHE_HOME, 'hub', 'datasets')
MS_DATASETS_CACHE = Path(
os.getenv('MS_DATASETS_CACHE', DEFAULT_MS_DATASETS_CACHE))
# NOTE: removed `MS_DATASETS_CACHE` env,
# default is `~/.cache/modelscope/hub/datasets`
MS_DATASETS_CACHE = get_dataset_cache_root()
DOWNLOADED_DATASETS_DIR = 'downloads'
DEFAULT_DOWNLOADED_DATASETS_PATH = os.path.join(MS_DATASETS_CACHE,

View File

@@ -60,11 +60,16 @@ def get_model_cache_root() -> str:
def get_dataset_cache_root() -> str:
"""Get dataset raw file cache root path.
if `MODELSCOPE_CACHE` is set, return `MODELSCOPE_CACHE/datasets`,
else return `~/.cache/modelscope/hub/datasets`
Returns:
str: the modelscope dataset raw file cache root.
"""
return os.path.join(get_modelscope_cache_dir(), 'datasets')
if os.getenv('MODELSCOPE_CACHE'):
return os.path.join(get_modelscope_cache_dir(), 'datasets')
else:
return os.path.join(get_modelscope_cache_dir(), 'hub', 'datasets')
def get_dataset_cache_dir(dataset_id: str) -> str: