mirror of
https://github.com/modelscope/modelscope.git
synced 2026-05-18 13:15:06 +02:00
ok Merge branch 'master' of github.com:modelscope/modelscope into release/1.35
This commit is contained in:
@@ -145,7 +145,8 @@ class DownloadCMD(CLICommand):
|
||||
cache_dir=self.args.cache_dir,
|
||||
local_dir=self.args.local_dir,
|
||||
revision=self.args.revision,
|
||||
cookies=cookies)
|
||||
cookies=cookies,
|
||||
token=self.args.token)
|
||||
elif len(
|
||||
self.args.files) > 1: # download specified multiple files.
|
||||
snapshot_download(
|
||||
@@ -155,7 +156,8 @@ class DownloadCMD(CLICommand):
|
||||
local_dir=self.args.local_dir,
|
||||
allow_file_pattern=self.args.files,
|
||||
max_workers=self.args.max_workers,
|
||||
cookies=cookies)
|
||||
cookies=cookies,
|
||||
token=self.args.token)
|
||||
else: # download repo
|
||||
snapshot_download(
|
||||
self.args.model,
|
||||
@@ -165,7 +167,8 @@ class DownloadCMD(CLICommand):
|
||||
allow_file_pattern=convert_patterns(self.args.include),
|
||||
ignore_file_pattern=convert_patterns(self.args.exclude),
|
||||
max_workers=self.args.max_workers,
|
||||
cookies=cookies)
|
||||
cookies=cookies,
|
||||
token=self.args.token)
|
||||
print(f'\nSuccessfully Downloaded from model {self.args.model}.\n')
|
||||
elif self.args.dataset:
|
||||
dataset_revision: str = self.args.revision if self.args.revision else DEFAULT_DATASET_REVISION
|
||||
@@ -176,7 +179,8 @@ class DownloadCMD(CLICommand):
|
||||
cache_dir=self.args.cache_dir,
|
||||
local_dir=self.args.local_dir,
|
||||
revision=dataset_revision,
|
||||
cookies=cookies)
|
||||
cookies=cookies,
|
||||
token=self.args.token)
|
||||
elif len(
|
||||
self.args.files) > 1: # download specified multiple files.
|
||||
dataset_snapshot_download(
|
||||
@@ -186,7 +190,8 @@ class DownloadCMD(CLICommand):
|
||||
local_dir=self.args.local_dir,
|
||||
allow_file_pattern=self.args.files,
|
||||
max_workers=self.args.max_workers,
|
||||
cookies=cookies)
|
||||
cookies=cookies,
|
||||
token=self.args.token)
|
||||
else: # download repo
|
||||
dataset_snapshot_download(
|
||||
self.args.dataset,
|
||||
@@ -196,7 +201,8 @@ class DownloadCMD(CLICommand):
|
||||
allow_file_pattern=convert_patterns(self.args.include),
|
||||
ignore_file_pattern=convert_patterns(self.args.exclude),
|
||||
max_workers=self.args.max_workers,
|
||||
cookies=cookies)
|
||||
cookies=cookies,
|
||||
token=self.args.token)
|
||||
print(
|
||||
f'\nSuccessfully Downloaded from dataset {self.args.dataset}.\n'
|
||||
)
|
||||
|
||||
@@ -446,6 +446,7 @@ def download_part_with_retry(params):
|
||||
headers=get_headers,
|
||||
cookies=cookies,
|
||||
timeout=API_FILE_DOWNLOAD_TIMEOUT)
|
||||
r.raise_for_status()
|
||||
for chunk in r.iter_content(
|
||||
chunk_size=API_FILE_DOWNLOAD_CHUNK_SIZE):
|
||||
if chunk: # filter out keep-alive new chunks
|
||||
@@ -738,15 +739,21 @@ def download_file(
|
||||
temp_file = os.path.join(temporary_cache_dir, file_meta['Path'])
|
||||
if FILE_HASH in file_meta:
|
||||
expected_hash = file_meta[FILE_HASH]
|
||||
# if a real-time hash has been computed
|
||||
if file_digest is not None:
|
||||
# if real-time hash mismatched, try to compute it again
|
||||
if file_digest != expected_hash:
|
||||
print(
|
||||
'Mismatched real-time digest found, falling back to lump-sum hash computation'
|
||||
)
|
||||
file_integrity_validation(temp_file, expected_hash)
|
||||
logger.warning(
|
||||
'Mismatched real-time digest for %s, falling back to full hash check',
|
||||
file_meta['Path'])
|
||||
if not file_integrity_validation(temp_file, expected_hash):
|
||||
raise FileDownloadError(
|
||||
'File %s hash validation failed after download, '
|
||||
'the file may be corrupted. Please retry.'
|
||||
% file_meta['Path'])
|
||||
else:
|
||||
file_integrity_validation(temp_file, expected_hash)
|
||||
if not file_integrity_validation(temp_file, expected_hash):
|
||||
raise FileDownloadError(
|
||||
'File %s hash validation failed after download, '
|
||||
'the file may be corrupted. Please retry.'
|
||||
% file_meta['Path'])
|
||||
# put file into to cache
|
||||
return cache.put_file(file_meta, temp_file)
|
||||
|
||||
@@ -308,7 +308,7 @@ def _snapshot_download(
|
||||
|
||||
_api = HubApi(token=token)
|
||||
endpoint = _api.get_endpoint_for_read(
|
||||
repo_id=repo_id, repo_type=repo_type)
|
||||
repo_id=repo_id, repo_type=repo_type, token=token)
|
||||
if cookies is None:
|
||||
cookies = _api.get_cookies()
|
||||
if repo_type == REPO_TYPE_MODEL:
|
||||
@@ -393,8 +393,8 @@ def _snapshot_download(
|
||||
revision_detail = revision or DEFAULT_DATASET_REVISION
|
||||
|
||||
logger.info('Fetching dataset repo file list...')
|
||||
repo_files = fetch_repo_files(_api, repo_id, revision_detail,
|
||||
endpoint)
|
||||
repo_files = fetch_repo_files(
|
||||
_api, repo_id, revision_detail, endpoint, token=token)
|
||||
|
||||
if repo_files is None:
|
||||
logger.error(
|
||||
@@ -427,10 +427,13 @@ def _snapshot_download(
|
||||
return cache_root_path
|
||||
|
||||
|
||||
def fetch_repo_files(_api, repo_id, revision, endpoint):
|
||||
def fetch_repo_files(_api, repo_id, revision, endpoint, token=None):
|
||||
_owner, _dataset_name = repo_id.split('/')
|
||||
_hub_id, _ = _api.get_dataset_id_and_type(
|
||||
dataset_name=_dataset_name, namespace=_owner, endpoint=endpoint)
|
||||
dataset_name=_dataset_name,
|
||||
namespace=_owner,
|
||||
endpoint=endpoint,
|
||||
token=token)
|
||||
|
||||
page_number = 1
|
||||
page_size = 150
|
||||
@@ -446,6 +449,7 @@ def fetch_repo_files(_api, repo_id, revision, endpoint):
|
||||
page_number=page_number,
|
||||
page_size=page_size,
|
||||
endpoint=endpoint,
|
||||
token=token,
|
||||
dataset_hub_id=_hub_id)
|
||||
except Exception as e:
|
||||
logger.error(f'Error fetching dataset files: {e}')
|
||||
|
||||
@@ -54,8 +54,47 @@ class FileSystemCache(object):
|
||||
cache_keys_file_path = os.path.join(self.cache_root_location,
|
||||
FileSystemCache.KEY_FILE_NAME)
|
||||
if os.path.exists(cache_keys_file_path):
|
||||
with open(cache_keys_file_path, 'rb') as f:
|
||||
self.cached_files = pickle.load(f)
|
||||
try:
|
||||
with open(cache_keys_file_path, 'rb') as f:
|
||||
data = pickle.load(f)
|
||||
if isinstance(data, list):
|
||||
self.cached_files = data
|
||||
else:
|
||||
logger.warning(
|
||||
'Cache index %s has unexpected type %s, resetting.',
|
||||
cache_keys_file_path,
|
||||
type(data).__name__)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
'Failed to load cache index %s: %s. '
|
||||
'Resetting — already-downloaded files will be re-validated on next run.',
|
||||
cache_keys_file_path, e)
|
||||
try:
|
||||
os.replace(cache_keys_file_path,
|
||||
cache_keys_file_path + '.corrupted')
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def _save_cached_files_unlocked(self):
|
||||
"""Write .msc atomically. Caller must hold ``_cache_lock``."""
|
||||
cache_keys_file_path = os.path.join(self.cache_root_location,
|
||||
FileSystemCache.KEY_FILE_NAME)
|
||||
fd, temp_filename = tempfile.mkstemp(
|
||||
suffix='.tmp', dir=self.cache_root_location)
|
||||
try:
|
||||
with os.fdopen(fd, 'wb') as f:
|
||||
pickle.dump(list(self.cached_files), f)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(temp_filename, cache_keys_file_path)
|
||||
except Exception:
|
||||
try:
|
||||
os.close(fd)
|
||||
except OSError:
|
||||
pass
|
||||
if os.path.exists(temp_filename):
|
||||
os.unlink(temp_filename)
|
||||
raise
|
||||
|
||||
def save_cached_files(self):
|
||||
"""
|
||||
@@ -65,23 +104,7 @@ class FileSystemCache(object):
|
||||
[{'Path': 'configuration.json', 'Revision': 'f01dxxx'}, {'Path': 'model.bin', 'Revision': '1159xxx'}, ...]
|
||||
"""
|
||||
with self._cache_lock:
|
||||
cache_keys_file_path = os.path.join(self.cache_root_location,
|
||||
FileSystemCache.KEY_FILE_NAME)
|
||||
fd, temp_filename = tempfile.mkstemp(
|
||||
suffix='.tmp', dir=self.cache_root_location)
|
||||
|
||||
try:
|
||||
with os.fdopen(fd, 'wb') as f:
|
||||
pickle.dump(self.cached_files, f)
|
||||
move(temp_filename, cache_keys_file_path)
|
||||
except Exception:
|
||||
try:
|
||||
os.close(fd)
|
||||
except OSError:
|
||||
pass
|
||||
if os.path.exists(temp_filename):
|
||||
os.unlink(temp_filename)
|
||||
raise
|
||||
self._save_cached_files_unlocked()
|
||||
|
||||
def get_file(self, key):
|
||||
"""Check the key is in the cache, if exist, return the file, otherwise return None.
|
||||
@@ -170,10 +193,22 @@ class ModelFileSystemCache(FileSystemCache):
|
||||
meta_file_path = os.path.join(self.cache_root_location,
|
||||
FileSystemCache.MODEL_META_FILE_NAME)
|
||||
if os.path.exists(meta_file_path):
|
||||
with open(meta_file_path, 'rb') as f:
|
||||
self.model_meta = pickle.load(f)
|
||||
else:
|
||||
self.model_meta = {FileSystemCache.MODEL_META_MODEL_ID: 'unknown'}
|
||||
try:
|
||||
with open(meta_file_path, 'rb') as f:
|
||||
data = pickle.load(f)
|
||||
if isinstance(data, dict):
|
||||
self.model_meta = data
|
||||
return
|
||||
logger.warning('Model meta %s has unexpected type, resetting.',
|
||||
meta_file_path)
|
||||
except Exception as e:
|
||||
logger.warning('Failed to load model meta %s: %s. Resetting.',
|
||||
meta_file_path, e)
|
||||
try:
|
||||
os.replace(meta_file_path, meta_file_path + '.corrupted')
|
||||
except OSError:
|
||||
pass
|
||||
self.model_meta = {FileSystemCache.MODEL_META_MODEL_ID: 'unknown'}
|
||||
|
||||
def load_model_version(self):
|
||||
model_version_file_path = os.path.join(
|
||||
@@ -341,16 +376,15 @@ class ModelFileSystemCache(FileSystemCache):
|
||||
Returns:
|
||||
str: The location of the cached file.
|
||||
"""
|
||||
self.remove_if_exists(model_file_info) # backup old revision
|
||||
self.remove_if_exists(model_file_info)
|
||||
cache_key = self.__get_cache_key(model_file_info)
|
||||
cache_full_path = os.path.join(
|
||||
self.cache_root_location,
|
||||
cache_key['Path']) # Branch and Tag do not have same name.
|
||||
cache_full_path = os.path.join(self.cache_root_location,
|
||||
cache_key['Path'])
|
||||
cache_file_dir = os.path.dirname(cache_full_path)
|
||||
if not os.path.exists(cache_file_dir):
|
||||
os.makedirs(cache_file_dir, exist_ok=True)
|
||||
# We can't make operation transaction
|
||||
move(model_file_location, cache_full_path)
|
||||
self.cached_files.append(cache_key)
|
||||
self.save_cached_files()
|
||||
with self._cache_lock:
|
||||
self.cached_files.append(cache_key)
|
||||
self._save_cached_files_unlocked()
|
||||
return cache_full_path
|
||||
|
||||
@@ -12,7 +12,7 @@ from modelscope.hub.repository import Repository
|
||||
from modelscope.utils.logger import get_logger
|
||||
from modelscope.utils.test_utils import (TEST_ACCESS_TOKEN1,
|
||||
TEST_MODEL_CHINESE_NAME,
|
||||
TEST_MODEL_ORG)
|
||||
TEST_MODEL_ORG, test_level)
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
@@ -58,21 +58,25 @@ class DownloadCMDTest(unittest.TestCase):
|
||||
logger.warning(f'Error deleting model {self.model_id}: {e}')
|
||||
super().tearDown()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_download(self):
|
||||
cmd = f'python -m modelscope.cli.cli download --model {self.model_id}'
|
||||
stat, output = subprocess.getstatusoutput(cmd)
|
||||
self.assertEqual(stat, 0)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_download_with_position_arg(self):
|
||||
cmd = f'python -m modelscope.cli.cli download {self.model_id}'
|
||||
stat, output = subprocess.getstatusoutput(cmd)
|
||||
self.assertEqual(stat, 0)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_download_file(self):
|
||||
cmd = f'python -m modelscope.cli.cli download --model {self.model_id} {download_model_file_name}'
|
||||
stat, output = subprocess.getstatusoutput(cmd)
|
||||
self.assertEqual(stat, 0)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_download_with_cache(self):
|
||||
cmd = f'python -m modelscope.cli.cli download --model {self.model_id} --cache_dir {self.tmp_dir}'
|
||||
stat, output = subprocess.getstatusoutput(cmd)
|
||||
@@ -83,6 +87,7 @@ class DownloadCMDTest(unittest.TestCase):
|
||||
osp.exists(
|
||||
f'{self.tmp_dir}/{self.model_id}/{download_model_file_name}'))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_download_with_revision(self):
|
||||
cmd = f'python -m modelscope.cli.cli download --model {self.model_id} --revision {self.revision}'
|
||||
stat, output = subprocess.getstatusoutput(cmd)
|
||||
@@ -91,5 +96,58 @@ class DownloadCMDTest(unittest.TestCase):
|
||||
self.assertEqual(stat, 0)
|
||||
|
||||
|
||||
class DownloadCMDTokenTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
|
||||
self.api = HubApi()
|
||||
|
||||
# Create private model repo
|
||||
self.model_id = '%s/%s' % (TEST_MODEL_ORG, 'test_model_with_token')
|
||||
self.api.create_repo(
|
||||
repo_id=self.model_id,
|
||||
repo_type='model',
|
||||
visibility='private',
|
||||
license=Licenses.APACHE_V2,
|
||||
chinese_name=TEST_MODEL_CHINESE_NAME,
|
||||
exist_ok=True,
|
||||
)
|
||||
|
||||
# Create private dataset repo
|
||||
self.dataset_id = '%s/%s' % (TEST_MODEL_ORG, 'test_dataset_with_token')
|
||||
self.api.create_repo(
|
||||
repo_id=self.dataset_id,
|
||||
repo_type='dataset',
|
||||
visibility='private',
|
||||
license=Licenses.APACHE_V2,
|
||||
exist_ok=True,
|
||||
)
|
||||
|
||||
def tearDown(self):
|
||||
try:
|
||||
self.api.delete_model(model_id=self.model_id)
|
||||
except Exception as e:
|
||||
logger.warning(f'Error deleting model {self.model_id}: {e}')
|
||||
try:
|
||||
self.api.delete_dataset(dataset_id=self.dataset_id)
|
||||
except Exception as e:
|
||||
logger.warning(f'Error deleting dataset {self.dataset_id}: {e}')
|
||||
super().tearDown()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_download_model_with_token(self):
|
||||
cmd = f'python -m modelscope.cli.cli download --model {self.model_id} --token {TEST_ACCESS_TOKEN1}'
|
||||
stat, output = subprocess.getstatusoutput(cmd)
|
||||
print(output)
|
||||
self.assertEqual(stat, 0)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_download_dataset_with_token(self):
|
||||
cmd = f'python -m modelscope.cli.cli download --dataset {self.dataset_id} --token {TEST_ACCESS_TOKEN1}'
|
||||
stat, output = subprocess.getstatusoutput(cmd)
|
||||
print(output)
|
||||
self.assertEqual(stat, 0)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -15,7 +15,7 @@ class MCPApiTest(unittest.TestCase):
|
||||
self.api = MCPApi()
|
||||
self.api.login(TEST_ACCESS_TOKEN1)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
||||
def test_list_mcp_servers(self):
|
||||
"""Test list_mcp_servers functionality and validation."""
|
||||
result = self.api.list_mcp_servers(total_count=5)
|
||||
@@ -31,7 +31,7 @@ class MCPApiTest(unittest.TestCase):
|
||||
for field in ['name', 'id', 'description']:
|
||||
self.assertIn(field, server)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
||||
def test_list_operational_mcp_servers(self):
|
||||
"""Test list_operational_mcp_servers functionality."""
|
||||
result = self.api.list_operational_mcp_servers()
|
||||
@@ -53,7 +53,7 @@ class MCPApiTest(unittest.TestCase):
|
||||
self.assertIn('url', first_config)
|
||||
self.assertTrue(first_config['url'].startswith('https://'))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
||||
def test_get_mcp_server(self):
|
||||
"""Test get_mcp_server functionality and validation."""
|
||||
result = self.api.get_mcp_server('@modelcontextprotocol/fetch')
|
||||
|
||||
Reference in New Issue
Block a user