mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-25 04:29:22 +01:00
add hash verficiation into cache file existence check (#1116)
* add hash check into cache file existence check Co-authored-by: Yingda Chen <yingda.chen@alibaba-inc.com>
This commit is contained in:
@@ -214,7 +214,7 @@ def _repo_file_download(
|
||||
if repo_file['Path'] == file_path:
|
||||
if cache.exists(repo_file):
|
||||
logger.debug(
|
||||
f'File {repo_file["Name"]} already in cache, skip downloading!'
|
||||
f'File {repo_file["Name"]} already in cache with identical hash, skip downloading!'
|
||||
)
|
||||
return cache.get_file_by_info(repo_file)
|
||||
else:
|
||||
@@ -251,7 +251,7 @@ def _repo_file_download(
|
||||
if repo_file['Path'] == file_path:
|
||||
if cache.exists(repo_file):
|
||||
logger.debug(
|
||||
f'File {repo_file["Name"]} already in cache, skip downloading!'
|
||||
f'File {repo_file["Name"]} already in cache with identical hash, skip downloading!'
|
||||
)
|
||||
return cache.get_file_by_info(repo_file)
|
||||
else:
|
||||
|
||||
@@ -468,7 +468,8 @@ def _download_file_lists(
|
||||
if cache.exists(repo_file):
|
||||
file_name = os.path.basename(repo_file['Name'])
|
||||
logger.debug(
|
||||
f'File {file_name} already in cache, skip downloading!')
|
||||
f'File {file_name} already in cache with identical hash, skip downloading!'
|
||||
)
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.warning('The file pattern is invalid : %s' % e)
|
||||
|
||||
@@ -7,6 +7,8 @@ import tempfile
|
||||
from shutil import move, rmtree
|
||||
from typing import Dict
|
||||
|
||||
from modelscope.hub.constants import FILE_HASH
|
||||
from modelscope.hub.utils.utils import compute_hash
|
||||
from modelscope.utils.logger import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
@@ -252,26 +254,36 @@ class ModelFileSystemCache(FileSystemCache):
|
||||
return cache_key
|
||||
|
||||
def exists(self, model_file_info):
|
||||
"""Check the file is cached or not.
|
||||
"""Check the file is cached or not. Note existence check will also cover digest check
|
||||
|
||||
Args:
|
||||
model_file_info (CachedFileInfo): The cached file info
|
||||
|
||||
Returns:
|
||||
bool: If exists return True otherwise False
|
||||
bool: If exists and has the same hash, return True otherwise False
|
||||
"""
|
||||
key = self.__get_cache_key(model_file_info)
|
||||
is_exists = False
|
||||
file_path = key['Path']
|
||||
cache_file_path = os.path.join(self.cache_root_location,
|
||||
model_file_info['Path'])
|
||||
for cached_key in self.cached_files:
|
||||
if cached_key['Path'] == key['Path'] and (
|
||||
if cached_key['Path'] == file_path and (
|
||||
cached_key['Revision'].startswith(key['Revision'])
|
||||
or key['Revision'].startswith(cached_key['Revision'])):
|
||||
is_exists = True
|
||||
break
|
||||
file_path = os.path.join(self.cache_root_location,
|
||||
model_file_info['Path'])
|
||||
expected_hash = model_file_info[FILE_HASH]
|
||||
if expected_hash is not None and os.path.exists(
|
||||
cache_file_path):
|
||||
cache_file_sha256 = compute_hash(cache_file_path)
|
||||
if expected_hash == cache_file_sha256:
|
||||
is_exists = True
|
||||
break
|
||||
else:
|
||||
logger.info(
|
||||
f'File [{file_path}] exists in cache but with a mismatched hash, will re-download.'
|
||||
)
|
||||
if is_exists:
|
||||
if os.path.exists(file_path):
|
||||
if os.path.exists(cache_file_path):
|
||||
return True
|
||||
else:
|
||||
self.remove_key(
|
||||
|
||||
Reference in New Issue
Block a user