Merge pull request #811 from modelscope/dev/fix_datasets_load

Fix api get_dataset_file_url
This commit is contained in:
Xingjun.Wang
2024-03-26 17:46:01 +08:00
committed by GitHub
2 changed files with 12 additions and 1 deletions

View File

@@ -893,6 +893,17 @@ class HubApi:
# else:
# return file_url
def get_dataset_file_url_origin(
self,
file_name: str,
dataset_name: str,
namespace: str,
revision: Optional[str] = DEFAULT_DATASET_REVISION):
if file_name and os.path.splitext(file_name)[-1] in META_FILES_FORMAT:
file_name = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/repo?' \
f'Revision={revision}&FilePath={file_name}'
return file_name
def get_dataset_access_config(
self,
dataset_name: str,

View File

@@ -195,7 +195,7 @@ def get_dataset_files(subset_split_into: dict,
for split, info in subset_split_into.items():
custom_type_map[split] = info.get('custom', '')
meta_map[split] = modelscope_api.get_dataset_file_url(
meta_map[split] = modelscope_api.get_dataset_file_url_origin(
info.get('meta', ''), dataset_name, namespace, revision)
if info.get('file'):
file_map[split] = info['file']