mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-16 16:27:45 +01:00
add download retry reason message and some optimize (#734)
1. optimize download retyr message 2. fix input_output pipeline_info bug on python3.8 Co-authored-by: mulin.lyh <mulin.lyh@taobao.com>
This commit is contained in:
@@ -19,7 +19,7 @@ REQUESTS_API_HTTP_METHOD = ['get', 'head', 'post', 'put', 'patch', 'delete']
|
||||
API_HTTP_CLIENT_TIMEOUT = 60
|
||||
API_RESPONSE_FIELD_DATA = 'Data'
|
||||
API_FILE_DOWNLOAD_RETRY_TIMES = 5
|
||||
API_FILE_DOWNLOAD_TIMEOUT = 30
|
||||
API_FILE_DOWNLOAD_TIMEOUT = 60
|
||||
API_FILE_DOWNLOAD_CHUNK_SIZE = 1024 * 1024 * 16
|
||||
API_RESPONSE_FIELD_GIT_ACCESS_TOKEN = 'AccessToken'
|
||||
API_RESPONSE_FIELD_USERNAME = 'Username'
|
||||
@@ -29,8 +29,6 @@ MODELSCOPE_CLOUD_ENVIRONMENT = 'MODELSCOPE_ENVIRONMENT'
|
||||
MODELSCOPE_CLOUD_USERNAME = 'MODELSCOPE_USERNAME'
|
||||
MODELSCOPE_SDK_DEBUG = 'MODELSCOPE_SDK_DEBUG'
|
||||
ONE_YEAR_SECONDS = 24 * 365 * 60 * 60
|
||||
MODEL_META_FILE_NAME = '.mdl'
|
||||
MODEL_META_MODEL_ID = 'id'
|
||||
MODELSCOPE_REQUEST_ID = 'X-Request-ID'
|
||||
|
||||
|
||||
|
||||
@@ -190,7 +190,7 @@ def get_file_download_url(model_id: str, file_path: str, revision: str):
|
||||
|
||||
def download_part_with_retry(params):
|
||||
# unpack parameters
|
||||
progress, start, end, url, file_name, cookies, headers = params
|
||||
model_file_name, progress, start, end, url, file_name, cookies, headers = params
|
||||
get_headers = {} if headers is None else copy.deepcopy(headers)
|
||||
get_headers['Range'] = 'bytes=%s-%s' % (start, end)
|
||||
get_headers['X-Request-ID'] = str(uuid.uuid4().hex)
|
||||
@@ -216,8 +216,8 @@ def download_part_with_retry(params):
|
||||
break
|
||||
except (Exception) as e: # no matter what exception, we will retry.
|
||||
retry = retry.increment('GET', url, error=e)
|
||||
logger.warning('Download file from: %s to: %s failed, will retry' %
|
||||
(start, end))
|
||||
logger.warning('Downloading: %s failed, reason: %s will retry' %
|
||||
(model_file_name, e))
|
||||
retry.sleep()
|
||||
|
||||
|
||||
@@ -246,10 +246,10 @@ def parallel_download(
|
||||
for idx in range(int(file_size / PART_SIZE)):
|
||||
start = idx * PART_SIZE
|
||||
end = (idx + 1) * PART_SIZE - 1
|
||||
tasks.append(
|
||||
(progress, start, end, url, temp_file.name, cookies, headers))
|
||||
tasks.append((file_name, progress, start, end, url, temp_file.name,
|
||||
cookies, headers))
|
||||
if end + 1 < file_size:
|
||||
tasks.append((progress, end + 1, file_size - 1, url,
|
||||
tasks.append((file_name, progress, end + 1, file_size - 1, url,
|
||||
temp_file.name, cookies, headers))
|
||||
parallels = MODELSCOPE_DOWNLOAD_PARALLELS if MODELSCOPE_DOWNLOAD_PARALLELS <= 4 else 4
|
||||
with ThreadPoolExecutor(
|
||||
|
||||
@@ -103,6 +103,10 @@ def snapshot_download(model_id: str,
|
||||
'Snapshot': 'True'
|
||||
}
|
||||
}
|
||||
if cache.cached_model_revision is not None:
|
||||
snapshot_header[
|
||||
'cached_model_revision'] = cache.cached_model_revision
|
||||
|
||||
model_files = _api.get_model_files(
|
||||
model_id=model_id,
|
||||
revision=revision,
|
||||
@@ -158,7 +162,9 @@ def snapshot_download(model_id: str,
|
||||
temp_file = os.path.join(temp_cache_dir, model_file['Name'])
|
||||
if FILE_HASH in model_file:
|
||||
file_integrity_validation(temp_file, model_file[FILE_HASH])
|
||||
# put file to cache
|
||||
# put file into to cache
|
||||
cache.put_file(model_file, temp_file)
|
||||
|
||||
cache.save_model_version(revision=revision)
|
||||
|
||||
return os.path.join(cache.get_root_location())
|
||||
|
||||
@@ -6,7 +6,6 @@ import pickle
|
||||
import tempfile
|
||||
from shutil import move, rmtree
|
||||
|
||||
from modelscope.hub.constants import MODEL_META_FILE_NAME, MODEL_META_MODEL_ID
|
||||
from modelscope.utils.logger import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
@@ -16,6 +15,9 @@ logger = get_logger()
|
||||
|
||||
class FileSystemCache(object):
|
||||
KEY_FILE_NAME = '.msc'
|
||||
MODEL_META_FILE_NAME = '.mdl'
|
||||
MODEL_META_MODEL_ID = 'id'
|
||||
MODEL_VERSION_FILE_NAME = '.mv'
|
||||
"""Local file cache.
|
||||
"""
|
||||
|
||||
@@ -133,24 +135,42 @@ class ModelFileSystemCache(FileSystemCache):
|
||||
self.load_model_meta()
|
||||
else:
|
||||
super().__init__(os.path.join(cache_root, owner, name))
|
||||
self.model_meta = {MODEL_META_MODEL_ID: '%s/%s' % (owner, name)}
|
||||
self.model_meta = {
|
||||
FileSystemCache.MODEL_META_MODEL_ID: '%s/%s' % (owner, name)
|
||||
}
|
||||
self.save_model_meta()
|
||||
self.cached_model_revision = self.load_model_version()
|
||||
|
||||
def load_model_meta(self):
|
||||
meta_file_path = os.path.join(self.cache_root_location,
|
||||
MODEL_META_FILE_NAME)
|
||||
FileSystemCache.MODEL_META_FILE_NAME)
|
||||
if os.path.exists(meta_file_path):
|
||||
with open(meta_file_path, 'rb') as f:
|
||||
self.model_meta = pickle.load(f)
|
||||
else:
|
||||
self.model_meta = {MODEL_META_MODEL_ID: 'unknown'}
|
||||
self.model_meta = {FileSystemCache.MODEL_META_MODEL_ID: 'unknown'}
|
||||
|
||||
def load_model_version(self):
|
||||
model_version_file_path = os.path.join(
|
||||
self.cache_root_location, FileSystemCache.MODEL_VERSION_FILE_NAME)
|
||||
if os.path.exists(model_version_file_path):
|
||||
with open(model_version_file_path, 'r') as f:
|
||||
return f.read().strip()
|
||||
else:
|
||||
return None
|
||||
|
||||
def save_model_version(self, revision: str):
|
||||
model_version_file_path = os.path.join(
|
||||
self.cache_root_location, FileSystemCache.MODEL_VERSION_FILE_NAME)
|
||||
with open(model_version_file_path, 'w') as f:
|
||||
f.write(revision)
|
||||
|
||||
def get_model_id(self):
|
||||
return self.model_meta[MODEL_META_MODEL_ID]
|
||||
return self.model_meta[FileSystemCache.MODEL_META_MODEL_ID]
|
||||
|
||||
def save_model_meta(self):
|
||||
meta_file_path = os.path.join(self.cache_root_location,
|
||||
MODEL_META_FILE_NAME)
|
||||
FileSystemCache.MODEL_META_FILE_NAME)
|
||||
with open(meta_file_path, 'wb') as f:
|
||||
pickle.dump(self.model_meta, f)
|
||||
|
||||
|
||||
@@ -547,6 +547,9 @@ class PipelineInfomation():
|
||||
},
|
||||
}
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.__dict__.get('_%s' % key)
|
||||
|
||||
|
||||
def is_url(url: str):
|
||||
"""Check the input url is valid url.
|
||||
@@ -645,7 +648,7 @@ def call_pipeline_with_json(pipeline_info: PipelineInfomation,
|
||||
# result = pipeline(**pipeline_inputs)
|
||||
# else:
|
||||
pipeline_inputs, parameters = service_base64_input_to_pipeline_input(
|
||||
pipeline_info['task_name'], body)
|
||||
pipeline_info.task_name, body)
|
||||
result = pipeline(pipeline_inputs, **parameters)
|
||||
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user