Unify dataset download log and remove tqdm disable option (#997)

* use tqdm auto

* unify dataset download log and remove tqdm disable option

---------

Co-authored-by: Yingda Chen <yingda.chen@alibaba-inc.com>
This commit is contained in:
Yingda Chen
2024-09-24 21:38:36 +08:00
committed by GitHub
parent d5c9c82340
commit 058df0e34c
2 changed files with 6 additions and 37 deletions

View File

@@ -555,7 +555,7 @@ def get_module_without_script(self) -> DatasetModule:
download_config = self.download_config.copy() download_config = self.download_config.copy()
if download_config.download_desc is None: if download_config.download_desc is None:
download_config.download_desc = 'Downloading readme' download_config.download_desc = 'Downloading [README.md]'
try: try:
url_or_filename = _ms_api.get_dataset_file_url( url_or_filename = _ms_api.get_dataset_file_url(
file_name='README.md', file_name='README.md',

View File

@@ -42,7 +42,6 @@ def get_from_cache_ms(
ignore_url_params=False, ignore_url_params=False,
storage_options=None, storage_options=None,
download_desc=None, download_desc=None,
disable_tqdm=False,
) -> str: ) -> str:
""" """
Given a URL, look for the corresponding file in the local cache. Given a URL, look for the corresponding file in the local cache.
@@ -88,6 +87,8 @@ def get_from_cache_ms(
# if we don't ask for 'force_download' then we spare a request # if we don't ask for 'force_download' then we spare a request
filename = hash_url_to_filename(cached_url, etag=None) filename = hash_url_to_filename(cached_url, etag=None)
cache_path = os.path.join(cache_dir, filename) cache_path = os.path.join(cache_dir, filename)
if download_desc is None:
download_desc = 'Downloading [' + filename + ']'
if os.path.exists(cache_path) and not force_download and not use_etag: if os.path.exists(cache_path) and not force_download and not use_etag:
return cache_path return cache_path
@@ -211,42 +212,10 @@ def get_from_cache_ms(
if scheme == 'ftp': if scheme == 'ftp':
ftp_get(url, temp_file) ftp_get(url, temp_file)
elif scheme not in ('http', 'https'): elif scheme not in ('http', 'https'):
fsspec_get_sig = inspect.signature(fsspec_get)
if 'disable_tqdm' in fsspec_get_sig.parameters:
fsspec_get(url,
temp_file,
storage_options=storage_options,
desc=download_desc,
disable_tqdm=disable_tqdm
)
else:
fsspec_get(url, temp_file, storage_options=storage_options, desc=download_desc) fsspec_get(url, temp_file, storage_options=storage_options, desc=download_desc)
else: else:
http_get_sig = inspect.signature(http_get) http_get(url, temp_file=temp_file, proxies=proxies, resume_size=resume_size,
headers=headers, cookies=cookies, max_retries=max_retries, desc=download_desc)
if 'disable_tqdm' in http_get_sig.parameters:
http_get(
url,
temp_file=temp_file,
proxies=proxies,
resume_size=resume_size,
headers=headers,
cookies=cookies,
max_retries=max_retries,
desc=download_desc,
disable_tqdm=disable_tqdm,
)
else:
http_get(
url,
temp_file=temp_file,
proxies=proxies,
resume_size=resume_size,
headers=headers,
cookies=cookies,
max_retries=max_retries,
desc=download_desc,
)
logger.info(f'storing {url} in cache at {cache_path}') logger.info(f'storing {url} in cache at {cache_path}')
shutil.move(temp_file.name, cache_path) shutil.move(temp_file.name, cache_path)