diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index 5c8599b0..ff921699 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -893,6 +893,17 @@ class HubApi: # else: # return file_url + def get_dataset_file_url_origin( + self, + file_name: str, + dataset_name: str, + namespace: str, + revision: Optional[str] = DEFAULT_DATASET_REVISION): + if file_name and os.path.splitext(file_name)[-1] in META_FILES_FORMAT: + file_name = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/repo?' \ + f'Revision={revision}&FilePath={file_name}' + return file_name + def get_dataset_access_config( self, dataset_name: str, diff --git a/modelscope/msdatasets/utils/dataset_utils.py b/modelscope/msdatasets/utils/dataset_utils.py index 6d939ef1..960693c1 100644 --- a/modelscope/msdatasets/utils/dataset_utils.py +++ b/modelscope/msdatasets/utils/dataset_utils.py @@ -195,7 +195,7 @@ def get_dataset_files(subset_split_into: dict, for split, info in subset_split_into.items(): custom_type_map[split] = info.get('custom', '') - meta_map[split] = modelscope_api.get_dataset_file_url( + meta_map[split] = modelscope_api.get_dataset_file_url_origin( info.get('meta', ''), dataset_name, namespace, revision) if info.get('file'): file_map[split] = info['file']