ok Merge branch 'master' of github.com:modelscope/modelscope into release/1.35

This commit is contained in:
班扬
2026-04-11 14:09:38 +08:00
6 changed files with 161 additions and 52 deletions

View File

@@ -145,7 +145,8 @@ class DownloadCMD(CLICommand):
cache_dir=self.args.cache_dir,
local_dir=self.args.local_dir,
revision=self.args.revision,
cookies=cookies)
cookies=cookies,
token=self.args.token)
elif len(
self.args.files) > 1: # download specified multiple files.
snapshot_download(
@@ -155,7 +156,8 @@ class DownloadCMD(CLICommand):
local_dir=self.args.local_dir,
allow_file_pattern=self.args.files,
max_workers=self.args.max_workers,
cookies=cookies)
cookies=cookies,
token=self.args.token)
else: # download repo
snapshot_download(
self.args.model,
@@ -165,7 +167,8 @@ class DownloadCMD(CLICommand):
allow_file_pattern=convert_patterns(self.args.include),
ignore_file_pattern=convert_patterns(self.args.exclude),
max_workers=self.args.max_workers,
cookies=cookies)
cookies=cookies,
token=self.args.token)
print(f'\nSuccessfully Downloaded from model {self.args.model}.\n')
elif self.args.dataset:
dataset_revision: str = self.args.revision if self.args.revision else DEFAULT_DATASET_REVISION
@@ -176,7 +179,8 @@ class DownloadCMD(CLICommand):
cache_dir=self.args.cache_dir,
local_dir=self.args.local_dir,
revision=dataset_revision,
cookies=cookies)
cookies=cookies,
token=self.args.token)
elif len(
self.args.files) > 1: # download specified multiple files.
dataset_snapshot_download(
@@ -186,7 +190,8 @@ class DownloadCMD(CLICommand):
local_dir=self.args.local_dir,
allow_file_pattern=self.args.files,
max_workers=self.args.max_workers,
cookies=cookies)
cookies=cookies,
token=self.args.token)
else: # download repo
dataset_snapshot_download(
self.args.dataset,
@@ -196,7 +201,8 @@ class DownloadCMD(CLICommand):
allow_file_pattern=convert_patterns(self.args.include),
ignore_file_pattern=convert_patterns(self.args.exclude),
max_workers=self.args.max_workers,
cookies=cookies)
cookies=cookies,
token=self.args.token)
print(
f'\nSuccessfully Downloaded from dataset {self.args.dataset}.\n'
)

View File

@@ -446,6 +446,7 @@ def download_part_with_retry(params):
headers=get_headers,
cookies=cookies,
timeout=API_FILE_DOWNLOAD_TIMEOUT)
r.raise_for_status()
for chunk in r.iter_content(
chunk_size=API_FILE_DOWNLOAD_CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks
@@ -738,15 +739,21 @@ def download_file(
temp_file = os.path.join(temporary_cache_dir, file_meta['Path'])
if FILE_HASH in file_meta:
expected_hash = file_meta[FILE_HASH]
# if a real-time hash has been computed
if file_digest is not None:
# if real-time hash mismatched, try to compute it again
if file_digest != expected_hash:
print(
'Mismatched real-time digest found, falling back to lump-sum hash computation'
)
file_integrity_validation(temp_file, expected_hash)
logger.warning(
'Mismatched real-time digest for %s, falling back to full hash check',
file_meta['Path'])
if not file_integrity_validation(temp_file, expected_hash):
raise FileDownloadError(
'File %s hash validation failed after download, '
'the file may be corrupted. Please retry.'
% file_meta['Path'])
else:
file_integrity_validation(temp_file, expected_hash)
if not file_integrity_validation(temp_file, expected_hash):
raise FileDownloadError(
'File %s hash validation failed after download, '
'the file may be corrupted. Please retry.'
% file_meta['Path'])
# put file into to cache
return cache.put_file(file_meta, temp_file)

View File

@@ -308,7 +308,7 @@ def _snapshot_download(
_api = HubApi(token=token)
endpoint = _api.get_endpoint_for_read(
repo_id=repo_id, repo_type=repo_type)
repo_id=repo_id, repo_type=repo_type, token=token)
if cookies is None:
cookies = _api.get_cookies()
if repo_type == REPO_TYPE_MODEL:
@@ -393,8 +393,8 @@ def _snapshot_download(
revision_detail = revision or DEFAULT_DATASET_REVISION
logger.info('Fetching dataset repo file list...')
repo_files = fetch_repo_files(_api, repo_id, revision_detail,
endpoint)
repo_files = fetch_repo_files(
_api, repo_id, revision_detail, endpoint, token=token)
if repo_files is None:
logger.error(
@@ -427,10 +427,13 @@ def _snapshot_download(
return cache_root_path
def fetch_repo_files(_api, repo_id, revision, endpoint):
def fetch_repo_files(_api, repo_id, revision, endpoint, token=None):
_owner, _dataset_name = repo_id.split('/')
_hub_id, _ = _api.get_dataset_id_and_type(
dataset_name=_dataset_name, namespace=_owner, endpoint=endpoint)
dataset_name=_dataset_name,
namespace=_owner,
endpoint=endpoint,
token=token)
page_number = 1
page_size = 150
@@ -446,6 +449,7 @@ def fetch_repo_files(_api, repo_id, revision, endpoint):
page_number=page_number,
page_size=page_size,
endpoint=endpoint,
token=token,
dataset_hub_id=_hub_id)
except Exception as e:
logger.error(f'Error fetching dataset files: {e}')

View File

@@ -54,8 +54,47 @@ class FileSystemCache(object):
cache_keys_file_path = os.path.join(self.cache_root_location,
FileSystemCache.KEY_FILE_NAME)
if os.path.exists(cache_keys_file_path):
with open(cache_keys_file_path, 'rb') as f:
self.cached_files = pickle.load(f)
try:
with open(cache_keys_file_path, 'rb') as f:
data = pickle.load(f)
if isinstance(data, list):
self.cached_files = data
else:
logger.warning(
'Cache index %s has unexpected type %s, resetting.',
cache_keys_file_path,
type(data).__name__)
except Exception as e:
logger.warning(
'Failed to load cache index %s: %s. '
'Resetting — already-downloaded files will be re-validated on next run.',
cache_keys_file_path, e)
try:
os.replace(cache_keys_file_path,
cache_keys_file_path + '.corrupted')
except OSError:
pass
def _save_cached_files_unlocked(self):
"""Write .msc atomically. Caller must hold ``_cache_lock``."""
cache_keys_file_path = os.path.join(self.cache_root_location,
FileSystemCache.KEY_FILE_NAME)
fd, temp_filename = tempfile.mkstemp(
suffix='.tmp', dir=self.cache_root_location)
try:
with os.fdopen(fd, 'wb') as f:
pickle.dump(list(self.cached_files), f)
f.flush()
os.fsync(f.fileno())
os.replace(temp_filename, cache_keys_file_path)
except Exception:
try:
os.close(fd)
except OSError:
pass
if os.path.exists(temp_filename):
os.unlink(temp_filename)
raise
def save_cached_files(self):
"""
@@ -65,23 +104,7 @@ class FileSystemCache(object):
[{'Path': 'configuration.json', 'Revision': 'f01dxxx'}, {'Path': 'model.bin', 'Revision': '1159xxx'}, ...]
"""
with self._cache_lock:
cache_keys_file_path = os.path.join(self.cache_root_location,
FileSystemCache.KEY_FILE_NAME)
fd, temp_filename = tempfile.mkstemp(
suffix='.tmp', dir=self.cache_root_location)
try:
with os.fdopen(fd, 'wb') as f:
pickle.dump(self.cached_files, f)
move(temp_filename, cache_keys_file_path)
except Exception:
try:
os.close(fd)
except OSError:
pass
if os.path.exists(temp_filename):
os.unlink(temp_filename)
raise
self._save_cached_files_unlocked()
def get_file(self, key):
"""Check the key is in the cache, if exist, return the file, otherwise return None.
@@ -170,10 +193,22 @@ class ModelFileSystemCache(FileSystemCache):
meta_file_path = os.path.join(self.cache_root_location,
FileSystemCache.MODEL_META_FILE_NAME)
if os.path.exists(meta_file_path):
with open(meta_file_path, 'rb') as f:
self.model_meta = pickle.load(f)
else:
self.model_meta = {FileSystemCache.MODEL_META_MODEL_ID: 'unknown'}
try:
with open(meta_file_path, 'rb') as f:
data = pickle.load(f)
if isinstance(data, dict):
self.model_meta = data
return
logger.warning('Model meta %s has unexpected type, resetting.',
meta_file_path)
except Exception as e:
logger.warning('Failed to load model meta %s: %s. Resetting.',
meta_file_path, e)
try:
os.replace(meta_file_path, meta_file_path + '.corrupted')
except OSError:
pass
self.model_meta = {FileSystemCache.MODEL_META_MODEL_ID: 'unknown'}
def load_model_version(self):
model_version_file_path = os.path.join(
@@ -341,16 +376,15 @@ class ModelFileSystemCache(FileSystemCache):
Returns:
str: The location of the cached file.
"""
self.remove_if_exists(model_file_info) # backup old revision
self.remove_if_exists(model_file_info)
cache_key = self.__get_cache_key(model_file_info)
cache_full_path = os.path.join(
self.cache_root_location,
cache_key['Path']) # Branch and Tag do not have same name.
cache_full_path = os.path.join(self.cache_root_location,
cache_key['Path'])
cache_file_dir = os.path.dirname(cache_full_path)
if not os.path.exists(cache_file_dir):
os.makedirs(cache_file_dir, exist_ok=True)
# We can't make operation transaction
move(model_file_location, cache_full_path)
self.cached_files.append(cache_key)
self.save_cached_files()
with self._cache_lock:
self.cached_files.append(cache_key)
self._save_cached_files_unlocked()
return cache_full_path

View File

@@ -12,7 +12,7 @@ from modelscope.hub.repository import Repository
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import (TEST_ACCESS_TOKEN1,
TEST_MODEL_CHINESE_NAME,
TEST_MODEL_ORG)
TEST_MODEL_ORG, test_level)
logger = get_logger()
@@ -58,21 +58,25 @@ class DownloadCMDTest(unittest.TestCase):
logger.warning(f'Error deleting model {self.model_id}: {e}')
super().tearDown()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_download(self):
cmd = f'python -m modelscope.cli.cli download --model {self.model_id}'
stat, output = subprocess.getstatusoutput(cmd)
self.assertEqual(stat, 0)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_download_with_position_arg(self):
cmd = f'python -m modelscope.cli.cli download {self.model_id}'
stat, output = subprocess.getstatusoutput(cmd)
self.assertEqual(stat, 0)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_download_file(self):
cmd = f'python -m modelscope.cli.cli download --model {self.model_id} {download_model_file_name}'
stat, output = subprocess.getstatusoutput(cmd)
self.assertEqual(stat, 0)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_download_with_cache(self):
cmd = f'python -m modelscope.cli.cli download --model {self.model_id} --cache_dir {self.tmp_dir}'
stat, output = subprocess.getstatusoutput(cmd)
@@ -83,6 +87,7 @@ class DownloadCMDTest(unittest.TestCase):
osp.exists(
f'{self.tmp_dir}/{self.model_id}/{download_model_file_name}'))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_download_with_revision(self):
cmd = f'python -m modelscope.cli.cli download --model {self.model_id} --revision {self.revision}'
stat, output = subprocess.getstatusoutput(cmd)
@@ -91,5 +96,58 @@ class DownloadCMDTest(unittest.TestCase):
self.assertEqual(stat, 0)
class DownloadCMDTokenTest(unittest.TestCase):
def setUp(self):
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
self.api = HubApi()
# Create private model repo
self.model_id = '%s/%s' % (TEST_MODEL_ORG, 'test_model_with_token')
self.api.create_repo(
repo_id=self.model_id,
repo_type='model',
visibility='private',
license=Licenses.APACHE_V2,
chinese_name=TEST_MODEL_CHINESE_NAME,
exist_ok=True,
)
# Create private dataset repo
self.dataset_id = '%s/%s' % (TEST_MODEL_ORG, 'test_dataset_with_token')
self.api.create_repo(
repo_id=self.dataset_id,
repo_type='dataset',
visibility='private',
license=Licenses.APACHE_V2,
exist_ok=True,
)
def tearDown(self):
try:
self.api.delete_model(model_id=self.model_id)
except Exception as e:
logger.warning(f'Error deleting model {self.model_id}: {e}')
try:
self.api.delete_dataset(dataset_id=self.dataset_id)
except Exception as e:
logger.warning(f'Error deleting dataset {self.dataset_id}: {e}')
super().tearDown()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_download_model_with_token(self):
cmd = f'python -m modelscope.cli.cli download --model {self.model_id} --token {TEST_ACCESS_TOKEN1}'
stat, output = subprocess.getstatusoutput(cmd)
print(output)
self.assertEqual(stat, 0)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_download_dataset_with_token(self):
cmd = f'python -m modelscope.cli.cli download --dataset {self.dataset_id} --token {TEST_ACCESS_TOKEN1}'
stat, output = subprocess.getstatusoutput(cmd)
print(output)
self.assertEqual(stat, 0)
if __name__ == '__main__':
unittest.main()

View File

@@ -15,7 +15,7 @@ class MCPApiTest(unittest.TestCase):
self.api = MCPApi()
self.api.login(TEST_ACCESS_TOKEN1)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_list_mcp_servers(self):
"""Test list_mcp_servers functionality and validation."""
result = self.api.list_mcp_servers(total_count=5)
@@ -31,7 +31,7 @@ class MCPApiTest(unittest.TestCase):
for field in ['name', 'id', 'description']:
self.assertIn(field, server)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_list_operational_mcp_servers(self):
"""Test list_operational_mcp_servers functionality."""
result = self.api.list_operational_mcp_servers()
@@ -53,7 +53,7 @@ class MCPApiTest(unittest.TestCase):
self.assertIn('url', first_config)
self.assertTrue(first_config['url'].startswith('https://'))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_get_mcp_server(self):
"""Test get_mcp_server functionality and validation."""
result = self.api.get_mcp_server('@modelcontextprotocol/fetch')