diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index 0460d2bd..1955976c 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -82,7 +82,8 @@ from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DownloadChannel, DownloadMode, Frameworks, ModelFile, Tasks, VirgoDatasetConfig) -from modelscope.utils.file_utils import get_file_hash, get_file_size +from modelscope.utils.file_utils import (get_file_hash, get_file_size, + is_relative_path) from modelscope.utils.logger import get_logger from modelscope.utils.repo_utils import (DATASET_LFS_SUFFIX, DEFAULT_IGNORE_PATTERNS, @@ -1461,7 +1462,6 @@ class HubApi: >>> for commit in commit_history.commits: ... print(f"{commit.short_id}: {commit.title}") """ - from datasets.utils.file_utils import is_relative_path if is_relative_path(repo_id) and repo_id.count('/') == 1: _owner, _dataset_name = repo_id.split('/') @@ -1520,7 +1520,6 @@ class HubApi: List: The response containing the dataset repository tree information. e.g. [{'CommitId': None, 'CommitMessage': '...', 'Size': 0, 'Type': 'tree'}, ...] """ - from datasets.utils.file_utils import is_relative_path if is_relative_path(repo_id) and repo_id.count('/') == 1: _owner, _dataset_name = repo_id.split('/') diff --git a/modelscope/msdatasets/download/download_manager.py b/modelscope/msdatasets/download/download_manager.py index 5e36cdce..bc75641d 100644 --- a/modelscope/msdatasets/download/download_manager.py +++ b/modelscope/msdatasets/download/download_manager.py @@ -3,10 +3,11 @@ from datasets.download.download_manager import DownloadManager from datasets.download.streaming_download_manager import \ StreamingDownloadManager -from datasets.utils.file_utils import cached_path, is_relative_path +from datasets.utils.file_utils import cached_path from modelscope.msdatasets.download.download_config import DataDownloadConfig from modelscope.msdatasets.utils.oss_utils import OssUtilities +from modelscope.utils.file_utils import is_relative_path class DataDownloadManager(DownloadManager): diff --git a/modelscope/msdatasets/ms_dataset.py b/modelscope/msdatasets/ms_dataset.py index 81f65652..a6d223f3 100644 --- a/modelscope/msdatasets/ms_dataset.py +++ b/modelscope/msdatasets/ms_dataset.py @@ -9,7 +9,6 @@ import numpy as np from datasets import (Dataset, DatasetDict, Features, IterableDataset, IterableDatasetDict) from datasets.packaged_modules import _PACKAGED_DATASETS_MODULES -from datasets.utils.file_utils import is_relative_path from modelscope.hub.repository import DatasetRepository from modelscope.msdatasets.context.dataset_context_config import \ @@ -32,6 +31,7 @@ from modelscope.utils.constant import (DEFAULT_DATASET_NAMESPACE, REPO_TYPE_DATASET, ConfigFields, DatasetFormations, DownloadMode, Hubs, ModeKeys, Tasks, UploadMode) +from modelscope.utils.file_utils import is_relative_path from modelscope.utils.import_utils import is_tf_available, is_torch_available from modelscope.utils.logger import get_logger diff --git a/modelscope/msdatasets/utils/hf_datasets_util.py b/modelscope/msdatasets/utils/hf_datasets_util.py index e275fc87..993714fe 100644 --- a/modelscope/msdatasets/utils/hf_datasets_util.py +++ b/modelscope/msdatasets/utils/hf_datasets_util.py @@ -46,7 +46,6 @@ from datasets.packaged_modules import (_EXTENSION_TO_MODULE, from datasets.utils import file_utils from datasets.utils.file_utils import (_raise_if_offline_mode_is_enabled, cached_path, is_local_path, - is_relative_path, relative_to_absolute_path) from datasets.utils.info_utils import is_small_dataset from datasets.utils.metadata import MetadataConfigs @@ -68,6 +67,7 @@ from modelscope.msdatasets.utils.hf_file_utils import get_from_cache_ms from modelscope.utils.config_ds import MS_DATASETS_CACHE from modelscope.utils.constant import DEFAULT_DATASET_REVISION, REPO_TYPE_DATASET from modelscope.utils.import_utils import has_attr_in_class +from modelscope.utils.file_utils import is_relative_path from modelscope.utils.logger import get_logger logger = get_logger() diff --git a/modelscope/utils/file_utils.py b/modelscope/utils/file_utils.py index 7afea306..2f50e7af 100644 --- a/modelscope/utils/file_utils.py +++ b/modelscope/utils/file_utils.py @@ -6,6 +6,7 @@ import os from pathlib import Path from shutil import Error, copy2, copystat from typing import BinaryIO, Optional, Union +from urllib.parse import urlparse # TODO: remove this api, unify to flattened args @@ -274,3 +275,11 @@ def get_file_hash( 'chunk_nums': len(chunk_hash_list), 'chunk_hash_list': chunk_hash_list, } + + +def is_relative_path(url_or_filename: str) -> bool: + """ + Check if a given string is a relative path. + """ + return urlparse( + url_or_filename).scheme == '' and not os.path.isabs(url_or_filename)