add download failed retry (#523)

* add download failed retry * fix lint issue
2025-12-25 12:39:25 +01:00 · 2023-09-07 22:25:59 +08:00
parent 5c5f05021b
commit 64d24df4d3
2 changed files with 29 additions and 16 deletions
--- a/modelscope/hub/constants.py
+++ b/modelscope/hub/constants.py
@@ -19,7 +19,7 @@ REQUESTS_API_HTTP_METHOD = ['get', 'head', 'post', 'put', 'patch', 'delete']
 API_HTTP_CLIENT_TIMEOUT = 60
 API_RESPONSE_FIELD_DATA = 'Data'
 API_FILE_DOWNLOAD_RETRY_TIMES = 5
-API_FILE_DOWNLOAD_TIMEOUT = 60 * 5
+API_FILE_DOWNLOAD_TIMEOUT = 30
 API_FILE_DOWNLOAD_CHUNK_SIZE = 1024 * 1024 * 16
 API_RESPONSE_FIELD_GIT_ACCESS_TOKEN = 'AccessToken'
 API_RESPONSE_FIELD_USERNAME = 'Username'
--- a/modelscope/hub/file_download.py
+++ b/modelscope/hub/file_download.py
@@ -187,23 +187,36 @@ def get_file_download_url(model_id: str, file_path: str, revision: str):
    )


-def download_part(params):
+def download_part_with_retry(params):
    # unpack parameters
    progress, start, end, url, file_name, cookies, headers = params
    get_headers = {} if headers is None else copy.deepcopy(headers)
    get_headers['Range'] = 'bytes=%s-%s' % (start, end)
-    with open(file_name, 'rb+') as f:
-        f.seek(start)
-        r = requests.get(
-            url,
-            stream=True,
-            headers=get_headers,
-            cookies=cookies,
-            timeout=API_FILE_DOWNLOAD_TIMEOUT)
-        for chunk in r.iter_content(chunk_size=API_FILE_DOWNLOAD_CHUNK_SIZE):
-            if chunk:  # filter out keep-alive new chunks
-                f.write(chunk)
-                progress.update(len(chunk))
+    retry = Retry(
+        total=API_FILE_DOWNLOAD_RETRY_TIMES,
+        backoff_factor=1,
+        allowed_methods=['GET'])
+    while True:
+        try:
+            with open(file_name, 'rb+') as f:
+                f.seek(start)
+                r = requests.get(
+                    url,
+                    stream=True,
+                    headers=get_headers,
+                    cookies=cookies,
+                    timeout=API_FILE_DOWNLOAD_TIMEOUT)
+                for chunk in r.iter_content(
+                        chunk_size=API_FILE_DOWNLOAD_CHUNK_SIZE):
+                    if chunk:  # filter out keep-alive new chunks
+                        f.write(chunk)
+            progress.update(end - start)
+            break
+        except (Exception) as e:  # no matter what exception, we will retry.
+            retry = retry.increment('GET', url, error=e)
+            logger.warning('Download file from: %s to: %s failed, will retry' %
+                           (start, end))
+            retry.sleep()


 def parallel_download(
@@ -226,7 +239,7 @@ def parallel_download(
            initial=0,
            desc='Downloading',
        )
-        PART_SIZE = 160 * 1024 * 1012  # every part is 160M
+        PART_SIZE = 160 * 1024 * 1024  # every part is 160M
        tasks = []
        for idx in range(int(file_size / PART_SIZE)):
            start = idx * PART_SIZE
@@ -240,7 +253,7 @@ def parallel_download(
        with ThreadPoolExecutor(
                max_workers=parallels,
                thread_name_prefix='download') as executor:
-            list(executor.map(download_part, tasks))
+            list(executor.map(download_part_with_retry, tasks))

        progress.close()