ok Merge branch 'master' of github.com:modelscope/modelscope into release/1.35

2026-05-18 13:15:06 +02:00 · 2026-04-11 14:09:38 +08:00
parent 5207517f7a 32d7c7062c
commit 13e6de5c0a
6 changed files with 161 additions and 52 deletions
--- a/modelscope/cli/download.py
+++ b/modelscope/cli/download.py
@@ -145,7 +145,8 @@ class DownloadCMD(CLICommand):
                    cache_dir=self.args.cache_dir,
                    local_dir=self.args.local_dir,
                    revision=self.args.revision,
-                    cookies=cookies)
+                    cookies=cookies,
+                    token=self.args.token)
            elif len(
                    self.args.files) > 1:  # download specified multiple files.
                snapshot_download(
@@ -155,7 +156,8 @@ class DownloadCMD(CLICommand):
                    local_dir=self.args.local_dir,
                    allow_file_pattern=self.args.files,
                    max_workers=self.args.max_workers,
-                    cookies=cookies)
+                    cookies=cookies,
+                    token=self.args.token)
            else:  # download repo
                snapshot_download(
                    self.args.model,
@@ -165,7 +167,8 @@ class DownloadCMD(CLICommand):
                    allow_file_pattern=convert_patterns(self.args.include),
                    ignore_file_pattern=convert_patterns(self.args.exclude),
                    max_workers=self.args.max_workers,
-                    cookies=cookies)
+                    cookies=cookies,
+                    token=self.args.token)
            print(f'\nSuccessfully Downloaded from model {self.args.model}.\n')
        elif self.args.dataset:
            dataset_revision: str = self.args.revision if self.args.revision else DEFAULT_DATASET_REVISION
@@ -176,7 +179,8 @@ class DownloadCMD(CLICommand):
                    cache_dir=self.args.cache_dir,
                    local_dir=self.args.local_dir,
                    revision=dataset_revision,
-                    cookies=cookies)
+                    cookies=cookies,
+                    token=self.args.token)
            elif len(
                    self.args.files) > 1:  # download specified multiple files.
                dataset_snapshot_download(
@@ -186,7 +190,8 @@ class DownloadCMD(CLICommand):
                    local_dir=self.args.local_dir,
                    allow_file_pattern=self.args.files,
                    max_workers=self.args.max_workers,
-                    cookies=cookies)
+                    cookies=cookies,
+                    token=self.args.token)
            else:  # download repo
                dataset_snapshot_download(
                    self.args.dataset,
@@ -196,7 +201,8 @@ class DownloadCMD(CLICommand):
                    allow_file_pattern=convert_patterns(self.args.include),
                    ignore_file_pattern=convert_patterns(self.args.exclude),
                    max_workers=self.args.max_workers,
-                    cookies=cookies)
+                    cookies=cookies,
+                    token=self.args.token)
            print(
                f'\nSuccessfully Downloaded from dataset {self.args.dataset}.\n'
            )
--- a/modelscope/hub/file_download.py
+++ b/modelscope/hub/file_download.py
@@ -446,6 +446,7 @@ def download_part_with_retry(params):
                    headers=get_headers,
                    cookies=cookies,
                    timeout=API_FILE_DOWNLOAD_TIMEOUT)
+                r.raise_for_status()
                for chunk in r.iter_content(
                        chunk_size=API_FILE_DOWNLOAD_CHUNK_SIZE):
                    if chunk:  # filter out keep-alive new chunks
@@ -738,15 +739,21 @@ def download_file(
    temp_file = os.path.join(temporary_cache_dir, file_meta['Path'])
    if FILE_HASH in file_meta:
        expected_hash = file_meta[FILE_HASH]
-        # if a real-time hash has been computed
        if file_digest is not None:
-            # if real-time hash mismatched, try to compute it again
            if file_digest != expected_hash:
-                print(
-                    'Mismatched real-time digest found, falling back to lump-sum hash computation'
-                )
-                file_integrity_validation(temp_file, expected_hash)
+                logger.warning(
+                    'Mismatched real-time digest for %s, falling back to full hash check',
+                    file_meta['Path'])
+                if not file_integrity_validation(temp_file, expected_hash):
+                    raise FileDownloadError(
+                        'File %s hash validation failed after download, '
+                        'the file may be corrupted. Please retry.'
+                        % file_meta['Path'])
        else:
-            file_integrity_validation(temp_file, expected_hash)
+            if not file_integrity_validation(temp_file, expected_hash):
+                raise FileDownloadError(
+                    'File %s hash validation failed after download, '
+                    'the file may be corrupted. Please retry.'
+                    % file_meta['Path'])
    # put file into to cache
    return cache.put_file(file_meta, temp_file)
--- a/modelscope/hub/snapshot_download.py
+++ b/modelscope/hub/snapshot_download.py
@@ -308,7 +308,7 @@ def _snapshot_download(

        _api = HubApi(token=token)
        endpoint = _api.get_endpoint_for_read(
-            repo_id=repo_id, repo_type=repo_type)
+            repo_id=repo_id, repo_type=repo_type, token=token)
        if cookies is None:
            cookies = _api.get_cookies()
        if repo_type == REPO_TYPE_MODEL:
@@ -393,8 +393,8 @@ def _snapshot_download(
            revision_detail = revision or DEFAULT_DATASET_REVISION

            logger.info('Fetching dataset repo file list...')
-            repo_files = fetch_repo_files(_api, repo_id, revision_detail,
-                                          endpoint)
+            repo_files = fetch_repo_files(
+                _api, repo_id, revision_detail, endpoint, token=token)

            if repo_files is None:
                logger.error(
@@ -427,10 +427,13 @@ def _snapshot_download(
        return cache_root_path


-def fetch_repo_files(_api, repo_id, revision, endpoint):
+def fetch_repo_files(_api, repo_id, revision, endpoint, token=None):
    _owner, _dataset_name = repo_id.split('/')
    _hub_id, _ = _api.get_dataset_id_and_type(
-        dataset_name=_dataset_name, namespace=_owner, endpoint=endpoint)
+        dataset_name=_dataset_name,
+        namespace=_owner,
+        endpoint=endpoint,
+        token=token)

    page_number = 1
    page_size = 150
@@ -446,6 +449,7 @@ def fetch_repo_files(_api, repo_id, revision, endpoint):
                page_number=page_number,
                page_size=page_size,
                endpoint=endpoint,
+                token=token,
                dataset_hub_id=_hub_id)
        except Exception as e:
            logger.error(f'Error fetching dataset files: {e}')
--- a/modelscope/hub/utils/caching.py
+++ b/modelscope/hub/utils/caching.py
@@ -54,8 +54,47 @@ class FileSystemCache(object):
        cache_keys_file_path = os.path.join(self.cache_root_location,
                                            FileSystemCache.KEY_FILE_NAME)
        if os.path.exists(cache_keys_file_path):
-            with open(cache_keys_file_path, 'rb') as f:
-                self.cached_files = pickle.load(f)
+            try:
+                with open(cache_keys_file_path, 'rb') as f:
+                    data = pickle.load(f)
+                if isinstance(data, list):
+                    self.cached_files = data
+                else:
+                    logger.warning(
+                        'Cache index %s has unexpected type %s, resetting.',
+                        cache_keys_file_path,
+                        type(data).__name__)
+            except Exception as e:
+                logger.warning(
+                    'Failed to load cache index %s: %s. '
+                    'Resetting — already-downloaded files will be re-validated on next run.',
+                    cache_keys_file_path, e)
+                try:
+                    os.replace(cache_keys_file_path,
+                               cache_keys_file_path + '.corrupted')
+                except OSError:
+                    pass
+
+    def _save_cached_files_unlocked(self):
+        """Write .msc atomically. Caller must hold ``_cache_lock``."""
+        cache_keys_file_path = os.path.join(self.cache_root_location,
+                                            FileSystemCache.KEY_FILE_NAME)
+        fd, temp_filename = tempfile.mkstemp(
+            suffix='.tmp', dir=self.cache_root_location)
+        try:
+            with os.fdopen(fd, 'wb') as f:
+                pickle.dump(list(self.cached_files), f)
+                f.flush()
+                os.fsync(f.fileno())
+            os.replace(temp_filename, cache_keys_file_path)
+        except Exception:
+            try:
+                os.close(fd)
+            except OSError:
+                pass
+            if os.path.exists(temp_filename):
+                os.unlink(temp_filename)
+            raise

    def save_cached_files(self):
        """
@@ -65,23 +104,7 @@ class FileSystemCache(object):
            [{'Path': 'configuration.json', 'Revision': 'f01dxxx'}, {'Path': 'model.bin', 'Revision': '1159xxx'}, ...]
        """
        with self._cache_lock:
-            cache_keys_file_path = os.path.join(self.cache_root_location,
-                                                FileSystemCache.KEY_FILE_NAME)
-            fd, temp_filename = tempfile.mkstemp(
-                suffix='.tmp', dir=self.cache_root_location)
-
-            try:
-                with os.fdopen(fd, 'wb') as f:
-                    pickle.dump(self.cached_files, f)
-                move(temp_filename, cache_keys_file_path)
-            except Exception:
-                try:
-                    os.close(fd)
-                except OSError:
-                    pass
-                if os.path.exists(temp_filename):
-                    os.unlink(temp_filename)
-                raise
+            self._save_cached_files_unlocked()

    def get_file(self, key):
        """Check the key is in the cache, if exist, return the file, otherwise return None.
@@ -170,10 +193,22 @@ class ModelFileSystemCache(FileSystemCache):
        meta_file_path = os.path.join(self.cache_root_location,
                                      FileSystemCache.MODEL_META_FILE_NAME)
        if os.path.exists(meta_file_path):
-            with open(meta_file_path, 'rb') as f:
-                self.model_meta = pickle.load(f)
-        else:
-            self.model_meta = {FileSystemCache.MODEL_META_MODEL_ID: 'unknown'}
+            try:
+                with open(meta_file_path, 'rb') as f:
+                    data = pickle.load(f)
+                if isinstance(data, dict):
+                    self.model_meta = data
+                    return
+                logger.warning('Model meta %s has unexpected type, resetting.',
+                               meta_file_path)
+            except Exception as e:
+                logger.warning('Failed to load model meta %s: %s. Resetting.',
+                               meta_file_path, e)
+                try:
+                    os.replace(meta_file_path, meta_file_path + '.corrupted')
+                except OSError:
+                    pass
+        self.model_meta = {FileSystemCache.MODEL_META_MODEL_ID: 'unknown'}

    def load_model_version(self):
        model_version_file_path = os.path.join(
@@ -341,16 +376,15 @@ class ModelFileSystemCache(FileSystemCache):
        Returns:
            str: The location of the cached file.
        """
-        self.remove_if_exists(model_file_info)  # backup old revision
+        self.remove_if_exists(model_file_info)
        cache_key = self.__get_cache_key(model_file_info)
-        cache_full_path = os.path.join(
-            self.cache_root_location,
-            cache_key['Path'])  # Branch and Tag do not have same name.
+        cache_full_path = os.path.join(self.cache_root_location,
+                                       cache_key['Path'])
        cache_file_dir = os.path.dirname(cache_full_path)
        if not os.path.exists(cache_file_dir):
            os.makedirs(cache_file_dir, exist_ok=True)
-        # We can't make operation transaction
        move(model_file_location, cache_full_path)
-        self.cached_files.append(cache_key)
-        self.save_cached_files()
+        with self._cache_lock:
+            self.cached_files.append(cache_key)
+            self._save_cached_files_unlocked()
        return cache_full_path
--- a/tests/cli/test_download_cmd.py
+++ b/tests/cli/test_download_cmd.py
@@ -12,7 +12,7 @@ from modelscope.hub.repository import Repository
 from modelscope.utils.logger import get_logger
 from modelscope.utils.test_utils import (TEST_ACCESS_TOKEN1,
                                         TEST_MODEL_CHINESE_NAME,
-                                         TEST_MODEL_ORG)
+                                         TEST_MODEL_ORG, test_level)

 logger = get_logger()

@@ -58,21 +58,25 @@ class DownloadCMDTest(unittest.TestCase):
            logger.warning(f'Error deleting model {self.model_id}: {e}')
        super().tearDown()

+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_download(self):
        cmd = f'python -m modelscope.cli.cli download --model {self.model_id}'
        stat, output = subprocess.getstatusoutput(cmd)
        self.assertEqual(stat, 0)

+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_download_with_position_arg(self):
        cmd = f'python -m modelscope.cli.cli download {self.model_id}'
        stat, output = subprocess.getstatusoutput(cmd)
        self.assertEqual(stat, 0)

+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_download_file(self):
        cmd = f'python -m modelscope.cli.cli download --model {self.model_id} {download_model_file_name}'
        stat, output = subprocess.getstatusoutput(cmd)
        self.assertEqual(stat, 0)

+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_download_with_cache(self):
        cmd = f'python -m modelscope.cli.cli download --model {self.model_id} --cache_dir {self.tmp_dir}'
        stat, output = subprocess.getstatusoutput(cmd)
@@ -83,6 +87,7 @@ class DownloadCMDTest(unittest.TestCase):
            osp.exists(
                f'{self.tmp_dir}/{self.model_id}/{download_model_file_name}'))

+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_download_with_revision(self):
        cmd = f'python -m modelscope.cli.cli download --model {self.model_id} --revision {self.revision}'
        stat, output = subprocess.getstatusoutput(cmd)
@@ -91,5 +96,58 @@ class DownloadCMDTest(unittest.TestCase):
        self.assertEqual(stat, 0)


+class DownloadCMDTokenTest(unittest.TestCase):
+
+    def setUp(self):
+        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+        self.api = HubApi()
+
+        # Create private model repo
+        self.model_id = '%s/%s' % (TEST_MODEL_ORG, 'test_model_with_token')
+        self.api.create_repo(
+            repo_id=self.model_id,
+            repo_type='model',
+            visibility='private',
+            license=Licenses.APACHE_V2,
+            chinese_name=TEST_MODEL_CHINESE_NAME,
+            exist_ok=True,
+        )
+
+        # Create private dataset repo
+        self.dataset_id = '%s/%s' % (TEST_MODEL_ORG, 'test_dataset_with_token')
+        self.api.create_repo(
+            repo_id=self.dataset_id,
+            repo_type='dataset',
+            visibility='private',
+            license=Licenses.APACHE_V2,
+            exist_ok=True,
+        )
+
+    def tearDown(self):
+        try:
+            self.api.delete_model(model_id=self.model_id)
+        except Exception as e:
+            logger.warning(f'Error deleting model {self.model_id}: {e}')
+        try:
+            self.api.delete_dataset(dataset_id=self.dataset_id)
+        except Exception as e:
+            logger.warning(f'Error deleting dataset {self.dataset_id}: {e}')
+        super().tearDown()
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_download_model_with_token(self):
+        cmd = f'python -m modelscope.cli.cli download --model {self.model_id} --token {TEST_ACCESS_TOKEN1}'
+        stat, output = subprocess.getstatusoutput(cmd)
+        print(output)
+        self.assertEqual(stat, 0)
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_download_dataset_with_token(self):
+        cmd = f'python -m modelscope.cli.cli download --dataset {self.dataset_id} --token {TEST_ACCESS_TOKEN1}'
+        stat, output = subprocess.getstatusoutput(cmd)
+        print(output)
+        self.assertEqual(stat, 0)
+
+
 if __name__ == '__main__':
    unittest.main()
--- a/tests/mcp/test_mcp_api.py
+++ b/tests/mcp/test_mcp_api.py
@@ -15,7 +15,7 @@ class MCPApiTest(unittest.TestCase):
        self.api = MCPApi()
        self.api.login(TEST_ACCESS_TOKEN1)

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
    def test_list_mcp_servers(self):
        """Test list_mcp_servers functionality and validation."""
        result = self.api.list_mcp_servers(total_count=5)
@@ -31,7 +31,7 @@ class MCPApiTest(unittest.TestCase):
        for field in ['name', 'id', 'description']:
            self.assertIn(field, server)

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
    def test_list_operational_mcp_servers(self):
        """Test list_operational_mcp_servers functionality."""
        result = self.api.list_operational_mcp_servers()
@@ -53,7 +53,7 @@ class MCPApiTest(unittest.TestCase):
                self.assertIn('url', first_config)
                self.assertTrue(first_config['url'].startswith('https://'))

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
    def test_get_mcp_server(self):
        """Test get_mcp_server functionality and validation."""
        result = self.api.get_mcp_server('@modelcontextprotocol/fetch')