From ac7404ce8733e199118d75be9e9c6db13620b89c Mon Sep 17 00:00:00 2001 From: hjh0119 Date: Fri, 4 Jul 2025 11:35:38 +0800 Subject: [PATCH 01/13] bump version --- modelscope/version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modelscope/version.py b/modelscope/version.py index 031a86b4..2bb6af92 100644 --- a/modelscope/version.py +++ b/modelscope/version.py @@ -1,5 +1,5 @@ # Make sure to modify __release_datetime__ to release time when making official release. -__version__ = '2.0.0' +__version__ = '1.28.0' # default release datetime for branches under active development is set # to be a time far-far-away-into-the-future -__release_datetime__ = '2099-09-06 00:00:00' +__release_datetime__ = '2025-07-08 00:00:00' From 5a8f97cbd109a4e57d8d259b25b4a715a285e41e Mon Sep 17 00:00:00 2001 From: hjh0119 Date: Mon, 7 Jul 2025 10:34:30 +0800 Subject: [PATCH 02/13] update docker image for 1.28 --- docker/build_image.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docker/build_image.py b/docker/build_image.py index 844eff9d..cade7adb 100644 --- a/docker/build_image.py +++ b/docker/build_image.py @@ -269,19 +269,19 @@ class LLMImageBuilder(Builder): # A mirrored image of nvidia/cuda:12.4.0-devel-ubuntu22.04 args.base_image = 'nvidia/cuda:12.4.0-devel-ubuntu22.04' if not args.torch_version: - args.torch_version = '2.6.0' - args.torchaudio_version = '2.6.0' - args.torchvision_version = '0.21.0' + args.torch_version = '2.7.1' + args.torchaudio_version = '2.7.1' + args.torchvision_version = '0.22.1' if not args.cuda_version: args.cuda_version = '12.4.0' if not args.vllm_version: - args.vllm_version = '0.8.5.post1' + args.vllm_version = '0.9.1' if not args.lmdeploy_version: - args.lmdeploy_version = '0.7.2.post1' + args.lmdeploy_version = '0.9.1' if not args.autogptq_version: args.autogptq_version = '0.7.1' if not args.flashattn_version: - args.flashattn_version = '2.7.1.post4' + args.flashattn_version = '2.8.0.post2' return args def generate_dockerfile(self) -> str: From 48c777f4c3b61bbe73f11bf6e2bb887835eb92e4 Mon Sep 17 00:00:00 2001 From: hjh0119 Date: Mon, 7 Jul 2025 14:01:51 +0800 Subject: [PATCH 03/13] revert vllm&torch dep --- docker/build_image.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/build_image.py b/docker/build_image.py index cade7adb..3f556ef3 100644 --- a/docker/build_image.py +++ b/docker/build_image.py @@ -269,13 +269,13 @@ class LLMImageBuilder(Builder): # A mirrored image of nvidia/cuda:12.4.0-devel-ubuntu22.04 args.base_image = 'nvidia/cuda:12.4.0-devel-ubuntu22.04' if not args.torch_version: - args.torch_version = '2.7.1' - args.torchaudio_version = '2.7.1' - args.torchvision_version = '0.22.1' + args.torch_version = '2.6.0' + args.torchaudio_version = '2.6.0' + args.torchvision_version = '0.21.0' if not args.cuda_version: args.cuda_version = '12.4.0' if not args.vllm_version: - args.vllm_version = '0.9.1' + args.vllm_version = '0.8.5.post1' if not args.lmdeploy_version: args.lmdeploy_version = '0.9.1' if not args.autogptq_version: From 339b06ca417bc09a10f722456f386886831e0801 Mon Sep 17 00:00:00 2001 From: hjh0119 Date: Mon, 7 Jul 2025 17:07:44 +0800 Subject: [PATCH 04/13] revert flash-attn dep --- docker/build_image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/build_image.py b/docker/build_image.py index 3f556ef3..bfef7be5 100644 --- a/docker/build_image.py +++ b/docker/build_image.py @@ -281,7 +281,7 @@ class LLMImageBuilder(Builder): if not args.autogptq_version: args.autogptq_version = '0.7.1' if not args.flashattn_version: - args.flashattn_version = '2.8.0.post2' + args.flashattn_version = '2.7.1.post4' return args def generate_dockerfile(self) -> str: From c63952e90c1e760fb70a3cfaa13723fa49669105 Mon Sep 17 00:00:00 2001 From: hjh0119 Date: Tue, 8 Jul 2025 21:14:31 +0800 Subject: [PATCH 05/13] fix vaehook custom_group_norm shape dismatch --- modelscope/pipelines/multi_modal/diffusers_wrapped/vaehook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelscope/pipelines/multi_modal/diffusers_wrapped/vaehook.py b/modelscope/pipelines/multi_modal/diffusers_wrapped/vaehook.py index 711a287e..5455bd18 100644 --- a/modelscope/pipelines/multi_modal/diffusers_wrapped/vaehook.py +++ b/modelscope/pipelines/multi_modal/diffusers_wrapped/vaehook.py @@ -277,7 +277,7 @@ def custom_group_norm(input, """ b, c, h, w = input.shape channel_in_group = c // num_groups - input_reshaped = input.reshape(1, b * num_groups, channel_in_group, h, w) + input_reshaped = input.reshape(b * num_groups, channel_in_group, h, w) out = F.batch_norm( input_reshaped, From 3d11b891ca04442fdabbe368385374ce1330cf7a Mon Sep 17 00:00:00 2001 From: Koko-ry <2024104299@ruc.edu.cn> Date: Wed, 16 Jul 2025 14:33:31 +0800 Subject: [PATCH 06/13] Fix RCE issue for plugins (#1415) 1. Add `trust_remote_code` in func `from_pretrained` with plugins application --- modelscope/models/base/base_model.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/modelscope/models/base/base_model.py b/modelscope/models/base/base_model.py index b6fea4d0..b294a30a 100644 --- a/modelscope/models/base/base_model.py +++ b/modelscope/models/base/base_model.py @@ -85,6 +85,7 @@ class Model(ABC): revision: Optional[str] = DEFAULT_MODEL_REVISION, cfg_dict: Config = None, device: str = None, + trust_remote_code: Optional[bool] = False, **kwargs): """Instantiate a model from local directory or remote model repo. Note that when loading from remote, the model revision can be specified. @@ -96,6 +97,7 @@ class Model(ABC): cfg_dict(Config, `optional`): An optional model config. If provided, it will replace the config read out of the `model_name_or_path` device(str, `optional`): The device to load the model. + trust_remote_code(bool, `optional`): Whether to trust and allow execution of remote code. Default is False. **kwargs: task(str, `optional`): The `Tasks` enumeration value to replace the task value read out of config in the `model_name_or_path`. This is useful when the model to be loaded is not @@ -181,10 +183,21 @@ class Model(ABC): f'`{ModelFile.CONFIGURATION}` file not found.') model_cfg.model_dir = local_model_dir - # install and import remote repos before build - register_plugins_repo(cfg.safe_get('plugins')) - register_modelhub_repo(local_model_dir, cfg.get('allow_remote', False)) - + # Security check: Only allow execution of remote code or plugins if trust_remote_code is True + plugins = cfg.safe_get('plugins') + if plugins and not trust_remote_code: + raise RuntimeError( + 'Detected plugins field in the model configuration file, but ' + 'trust_remote_code=True was not explicitly set.\n' + 'To prevent potential execution of malicious code, loading has been refused.\n' + 'If you trust this model repository, please pass trust_remote_code=True to from_pretrained.' + ) + if plugins and trust_remote_code: + logger.warning( + 'Use trust_remote_code=True. Will invoke codes or install plugins from remote model repo. ' + 'Please make sure that you can trust the external codes.') + register_modelhub_repo(local_model_dir, allow_remote=trust_remote_code) + register_plugins_repo(plugins) for k, v in kwargs.items(): model_cfg[k] = v if device is not None: From d031f3e20b6e42485e67e1af21d3a11079358e86 Mon Sep 17 00:00:00 2001 From: tastelikefeet <58414341+tastelikefeet@users.noreply.github.com> Date: Thu, 17 Jul 2025 18:38:24 +0800 Subject: [PATCH 07/13] weak file lock (#1417) --- modelscope/hub/snapshot_download.py | 110 ++++++++++++++++++--------- modelscope/hub/utils/utils.py | 59 +++++++++++++- tests/hub/test_download_file_lock.py | 79 +++++++++++++++++++ 3 files changed, 211 insertions(+), 37 deletions(-) create mode 100644 tests/hub/test_download_file_lock.py diff --git a/modelscope/hub/snapshot_download.py b/modelscope/hub/snapshot_download.py index e64b2f67..a2e91d7e 100644 --- a/modelscope/hub/snapshot_download.py +++ b/modelscope/hub/snapshot_download.py @@ -4,17 +4,11 @@ import fnmatch import os import re import uuid +from contextlib import nullcontext from http.cookiejar import CookieJar from pathlib import Path from typing import Dict, List, Optional, Type, Union -from modelscope.hub.api import HubApi, ModelScopeConfig -from modelscope.hub.errors import InvalidParameter -from modelscope.hub.file_download import (create_temporary_directory_and_cache, - download_file, get_file_download_url) -from modelscope.hub.utils.caching import ModelFileSystemCache -from modelscope.hub.utils.utils import (get_model_masked_directory, - model_id_to_group_owner_name) from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DEFAULT_MODEL_REVISION, INTRA_CLOUD_ACCELERATION, @@ -23,7 +17,15 @@ from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, from modelscope.utils.file_utils import get_modelscope_cache_dir from modelscope.utils.logger import get_logger from modelscope.utils.thread_utils import thread_executor +from .api import HubApi, ModelScopeConfig from .callback import ProgressCallback +from .errors import InvalidParameter +from .file_download import (create_temporary_directory_and_cache, + download_file, get_file_download_url) +from .utils.caching import ModelFileSystemCache +from .utils.utils import (get_model_masked_directory, + model_id_to_group_owner_name, strtobool, + weak_file_lock) logger = get_logger() @@ -43,6 +45,7 @@ def snapshot_download( max_workers: int = 8, repo_id: str = None, repo_type: Optional[str] = REPO_TYPE_MODEL, + enable_file_lock: Optional[bool] = None, progress_callbacks: List[Type[ProgressCallback]] = None, ) -> str: """Download all files of a repo. @@ -79,6 +82,9 @@ def snapshot_download( If provided, files matching any of the patterns are not downloaded, priority over ignore_file_pattern. For hugging-face compatibility. max_workers (`int`): The maximum number of workers to download files, default 8. + enable_file_lock (`bool`): Enable file lock, this is useful in multiprocessing downloading, default `True`. + If you find something wrong with file lock and have a problem modifying your code, + change `MODELSCOPE_HUB_FILE_LOCK` env to `false`. progress_callbacks (`List[Type[ProgressCallback]]`, **optional**, default to `None`): progress callbacks to track the download progress. Raises: @@ -109,21 +115,35 @@ def snapshot_download( if revision is None: revision = DEFAULT_DATASET_REVISION if repo_type == REPO_TYPE_DATASET else DEFAULT_MODEL_REVISION - return _snapshot_download( - repo_id, - repo_type=repo_type, - revision=revision, - cache_dir=cache_dir, - user_agent=user_agent, - local_files_only=local_files_only, - cookies=cookies, - ignore_file_pattern=ignore_file_pattern, - allow_file_pattern=allow_file_pattern, - local_dir=local_dir, - ignore_patterns=ignore_patterns, - allow_patterns=allow_patterns, - max_workers=max_workers, - progress_callbacks=progress_callbacks) + if enable_file_lock is None: + enable_file_lock = strtobool( + os.environ.get('MODELSCOPE_HUB_FILE_LOCK', 'true')) + + if enable_file_lock: + system_cache = cache_dir if cache_dir is not None else get_modelscope_cache_dir( + ) + os.makedirs(os.path.join(system_cache, '.lock'), exist_ok=True) + lock_file = os.path.join(system_cache, '.lock', + repo_id.replace('/', '___')) + context = weak_file_lock(lock_file) + else: + context = nullcontext() + with context: + return _snapshot_download( + repo_id, + repo_type=repo_type, + revision=revision, + cache_dir=cache_dir, + user_agent=user_agent, + local_files_only=local_files_only, + cookies=cookies, + ignore_file_pattern=ignore_file_pattern, + allow_file_pattern=allow_file_pattern, + local_dir=local_dir, + ignore_patterns=ignore_patterns, + allow_patterns=allow_patterns, + max_workers=max_workers, + progress_callbacks=progress_callbacks) def dataset_snapshot_download( @@ -138,6 +158,7 @@ def dataset_snapshot_download( allow_file_pattern: Optional[Union[str, List[str]]] = None, allow_patterns: Optional[Union[List[str], str]] = None, ignore_patterns: Optional[Union[List[str], str]] = None, + enable_file_lock: Optional[bool] = None, max_workers: int = 8, ) -> str: """Download raw files of a dataset. @@ -171,6 +192,9 @@ def dataset_snapshot_download( ignore_patterns (`str` or `List`, *optional*, default to `None`): If provided, files matching any of the patterns are not downloaded, priority over ignore_file_pattern. For hugging-face compatibility. + enable_file_lock (`bool`): Enable file lock, this is useful in multiprocessing downloading, default `True`. + If you find something wrong with file lock and have a problem modifying your code, + change `MODELSCOPE_HUB_FILE_LOCK` env to `false`. max_workers (`int`): The maximum number of workers to download files, default 8. Raises: ValueError: the value details. @@ -187,20 +211,34 @@ def dataset_snapshot_download( - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) if some parameter value is invalid """ - return _snapshot_download( - dataset_id, - repo_type=REPO_TYPE_DATASET, - revision=revision, - cache_dir=cache_dir, - user_agent=user_agent, - local_files_only=local_files_only, - cookies=cookies, - ignore_file_pattern=ignore_file_pattern, - allow_file_pattern=allow_file_pattern, - local_dir=local_dir, - ignore_patterns=ignore_patterns, - allow_patterns=allow_patterns, - max_workers=max_workers) + if enable_file_lock is None: + enable_file_lock = strtobool( + os.environ.get('MODELSCOPE_HUB_FILE_LOCK', 'true')) + + if enable_file_lock: + system_cache = cache_dir if cache_dir is not None else get_modelscope_cache_dir( + ) + os.makedirs(os.path.join(system_cache, '.lock'), exist_ok=True) + lock_file = os.path.join(system_cache, '.lock', + dataset_id.replace('/', '___')) + context = weak_file_lock(lock_file) + else: + context = nullcontext() + with context: + return _snapshot_download( + dataset_id, + repo_type=REPO_TYPE_DATASET, + revision=revision, + cache_dir=cache_dir, + user_agent=user_agent, + local_files_only=local_files_only, + cookies=cookies, + ignore_file_pattern=ignore_file_pattern, + allow_file_pattern=allow_file_pattern, + local_dir=local_dir, + ignore_patterns=ignore_patterns, + allow_patterns=allow_patterns, + max_workers=max_workers) def _snapshot_download( diff --git a/modelscope/hub/utils/utils.py b/modelscope/hub/utils/utils.py index 3c5ee67a..28bcdbf2 100644 --- a/modelscope/hub/utils/utils.py +++ b/modelscope/hub/utils/utils.py @@ -1,12 +1,15 @@ # Copyright (c) Alibaba, Inc. and its affiliates. +import contextlib import hashlib import os import sys import time from datetime import datetime from pathlib import Path -from typing import List, Optional, Union +from typing import Generator, List, Optional, Union + +from filelock import BaseFileLock, FileLock, SoftFileLock, Timeout from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN, DEFAULT_MODELSCOPE_GROUP, @@ -242,3 +245,57 @@ def tabulate(rows: List[List[Union[str, int]]], headers: List[str]) -> str: for row in rows: lines.append(row_format.format(*row)) return '\n'.join(lines) + + +# Part of the code borrowed from the awesome work of huggingface_hub/transformers +def strtobool(val): + val = val.lower() + if val in {'y', 'yes', 't', 'true', 'on', '1'}: + return 1 + if val in {'n', 'no', 'f', 'false', 'off', '0'}: + return 0 + raise ValueError(f'invalid truth value {val!r}') + + +@contextlib.contextmanager +def weak_file_lock(lock_file: Union[str, Path], + *, + timeout: Optional[float] = None + ) -> Generator[BaseFileLock, None, None]: + default_interval = 60 + lock = FileLock(lock_file, timeout=default_interval) + start_time = time.time() + + while True: + elapsed_time = time.time() - start_time + if timeout is not None and elapsed_time >= timeout: + raise Timeout(str(lock_file)) + + try: + lock.acquire( + timeout=min(default_interval, timeout - elapsed_time) + if timeout else default_interval) # noqa + except Timeout: + logger.info( + f'Still waiting to acquire lock on {lock_file} (elapsed: {time.time() - start_time:.1f} seconds)' + ) + except NotImplementedError as e: + if 'use SoftFileLock instead' in str(e): + logger.warning( + 'FileSystem does not appear to support flock. Falling back to SoftFileLock for %s', + lock_file) + lock = SoftFileLock(lock_file, timeout=default_interval) + continue + else: + break + + try: + yield lock + finally: + try: + lock.release() + except OSError: + try: + Path(lock_file).unlink() + except OSError: + pass diff --git a/tests/hub/test_download_file_lock.py b/tests/hub/test_download_file_lock.py new file mode 100644 index 00000000..4a6fe803 --- /dev/null +++ b/tests/hub/test_download_file_lock.py @@ -0,0 +1,79 @@ +import hashlib +import multiprocessing +import os +import tempfile +import unittest + +from modelscope import snapshot_download + + +def download_model(model_name, cache_dir, enable_lock): + if not enable_lock: + os.environ['MODELSCOPE_HUB_FILE_LOCK'] = 'false' + snapshot_download(model_name, cache_dir=cache_dir) + + +class FileLockDownloadingTest(unittest.TestCase): + + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + + def tearDown(self): + self.temp_dir.cleanup() + + def test_multi_processing_file_lock(self): + + models = [ + 'iic/nlp_bert_relation-extraction_chinese-base', + 'iic/nlp_bert_relation-extraction_chinese-base', + 'iic/nlp_bert_relation-extraction_chinese-base', + ] + args_list = [(model, self.temp_dir.name, True) for model in models] + + with multiprocessing.Pool(processes=3) as pool: + pool.starmap(download_model, args_list) + + def get_file_sha256(file_path): + sha256_hash = hashlib.sha256() + with open(file_path, 'rb') as f: + for chunk in iter(lambda: f.read(4096), b''): + sha256_hash.update(chunk) + return sha256_hash.hexdigest() + + tensor_file = os.path.join( + self.temp_dir.name, 'iic', + 'nlp_bert_relation-extraction_chinese-base', 'pytorch_model.bin') + sha256 = '2b623d2c06c8101c1283657d35bc22d69bcc10f62ded0ba6d0606e4130f9c8af' + self.assertTrue(get_file_sha256(tensor_file) == sha256) + + def test_multi_processing_disabled(self): + try: + models = [ + 'iic/nlp_bert_backbone_base_std', + 'iic/nlp_bert_backbone_base_std', + 'iic/nlp_bert_backbone_base_std', + ] + args_list = [(model, self.temp_dir.name, False) + for model in models] + + with multiprocessing.Pool(processes=3) as pool: + pool.starmap(download_model, args_list) + + def get_file_sha256(file_path): + sha256_hash = hashlib.sha256() + with open(file_path, 'rb') as f: + for chunk in iter(lambda: f.read(4096), b''): + sha256_hash.update(chunk) + return sha256_hash.hexdigest() + + tensor_file = os.path.join(self.temp_dir.name, 'iic', + 'nlp_bert_backbone_base_std', + 'pytorch_model.bin') + sha256 = 'c6a293a8091f7eaa1ac7ecf88fd6f4cc00f6957188b2730d34faa787f15d3caa' + self.assertTrue(get_file_sha256(tensor_file) != sha256) + except Exception: # noqa + pass + + +if __name__ == '__main__': + unittest.main() From f3d014d3ce49cf47d16746fc5e64a83ced575440 Mon Sep 17 00:00:00 2001 From: Koko-ry <2024104299@ruc.edu.cn> Date: Thu, 17 Jul 2025 18:57:44 +0800 Subject: [PATCH 08/13] Skip UT TextToSpeechSambertHifigan16kPipelineTest temporarily (#1420) --- tests/pipelines/test_text_to_speech.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipelines/test_text_to_speech.py b/tests/pipelines/test_text_to_speech.py index 30654780..d458dda3 100644 --- a/tests/pipelines/test_text_to_speech.py +++ b/tests/pipelines/test_text_to_speech.py @@ -94,7 +94,7 @@ class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase): 'text': self.en_text }] - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_pipeline(self): for i in range(len(self.test_models)): logger.info('test %s' % self.test_model_name[i]) From 62078f1796438e65ce31271e7dfae6fd32b6a741 Mon Sep 17 00:00:00 2001 From: "Xingjun.Wang" Date: Thu, 17 Jul 2025 19:27:39 +0800 Subject: [PATCH 09/13] Fix dataset infos (#1414) --- modelscope/hub/api.py | 133 ++++++++++----- modelscope/hub/file_download.py | 35 ++-- modelscope/hub/snapshot_download.py | 38 ++--- .../msdatasets/utils/hf_datasets_util.py | 156 +++++++----------- 4 files changed, 191 insertions(+), 171 deletions(-) diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index ff423838..e4de81b0 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -374,7 +374,7 @@ class HubApi: by a `/`. repo_type (`str`, *optional*): `None` or `"model"` if getting repository info from a model. Default is `None`. - TODO: support dataset and studio + TODO: support studio endpoint(`str`): None or specific endpoint to use, when None, use the default endpoint set in HubApi class (self.endpoint) @@ -886,6 +886,9 @@ class HubApi: raise_on_error(d) files = [] + if not d[API_RESPONSE_FIELD_DATA]['Files']: + logger.warning(f'No files found in model {model_id} at revision {revision}.') + return files for file in d[API_RESPONSE_FIELD_DATA]['Files']: if file['Name'] == '.gitignore' or file['Name'] == '.gitattributes': continue @@ -993,29 +996,6 @@ class HubApi: dataset_type = resp['Data']['Type'] return dataset_id, dataset_type - def get_dataset_infos(self, - dataset_hub_id: str, - revision: str, - files_metadata: bool = False, - timeout: float = 100, - recursive: str = 'True', - endpoint: Optional[str] = None): - """ - Get dataset infos. - """ - if not endpoint: - endpoint = self.endpoint - datahub_url = f'{endpoint}/api/v1/datasets/{dataset_hub_id}/repo/tree' - params = {'Revision': revision, 'Root': None, 'Recursive': recursive} - cookies = ModelScopeConfig.get_cookies() - if files_metadata: - params['blobs'] = True - r = self.session.get(datahub_url, params=params, cookies=cookies, timeout=timeout) - resp = r.json() - datahub_raise_on_error(datahub_url, resp, r) - - return resp - def list_repo_tree(self, dataset_name: str, namespace: str, @@ -1025,6 +1005,11 @@ class HubApi: page_number: int = 1, page_size: int = 100, endpoint: Optional[str] = None): + """ + @deprecated: Use `get_dataset_files` instead. + """ + warnings.warn('The function `list_repo_tree` is deprecated, use `get_dataset_files` instead.', + DeprecationWarning) dataset_hub_id, dataset_type = self.get_dataset_id_and_type( dataset_name=dataset_name, namespace=namespace, endpoint=endpoint) @@ -1044,6 +1029,59 @@ class HubApi: return resp + def get_dataset_files(self, + repo_id: str, + *, + revision: str = DEFAULT_REPOSITORY_REVISION, + root_path: str = '/', + recursive: bool = True, + page_number: int = 1, + page_size: int = 100, + endpoint: Optional[str] = None): + """ + Get the dataset files. + + Args: + repo_id (str): The repository id, in the format of `namespace/dataset_name`. + revision (str): The branch or tag name. Defaults to `DEFAULT_REPOSITORY_REVISION`. + root_path (str): The root path to list. Defaults to '/'. + recursive (bool): Whether to list recursively. Defaults to True. + page_number (int): The page number for pagination. Defaults to 1. + page_size (int): The number of items per page. Defaults to 100. + endpoint (Optional[str]): The endpoint to use, defaults to None to use the endpoint specified in the class. + + Returns: + List: The response containing the dataset repository tree information. + e.g. [{'CommitId': None, 'CommitMessage': '...', 'Size': 0, 'Type': 'tree'}, ...] + """ + from datasets.utils.file_utils import is_relative_path + + if is_relative_path(repo_id) and repo_id.count('/') == 1: + _owner, _dataset_name = repo_id.split('/') + else: + raise ValueError(f'Invalid repo_id: {repo_id} !') + + dataset_hub_id, dataset_type = self.get_dataset_id_and_type( + dataset_name=_dataset_name, namespace=_owner, endpoint=endpoint) + + if not endpoint: + endpoint = self.endpoint + datahub_url = f'{endpoint}/api/v1/datasets/{dataset_hub_id}/repo/tree' + params = { + 'Revision': revision, + 'Root': root_path, + 'Recursive': 'True' if recursive else 'False', + 'PageNumber': page_number, + 'PageSize': page_size + } + cookies = ModelScopeConfig.get_cookies() + + r = self.session.get(datahub_url, params=params, cookies=cookies) + resp = r.json() + datahub_raise_on_error(datahub_url, resp, r) + + return resp['Data']['Files'] + def get_dataset_meta_file_list(self, dataset_name: str, namespace: str, dataset_id: str, revision: str, endpoint: Optional[str] = None): """ Get the meta file-list of the dataset. """ @@ -2150,22 +2188,40 @@ class HubApi: recursive=True, endpoint=endpoint ) - file_list = [f['Path'] for f in files] + file_paths = [f['Path'] for f in files] + elif repo_type == REPO_TYPE_DATASET: + file_paths = [] + page_number = 1 + page_size = 100 + while True: + try: + dataset_files: List[Dict[str, Any]] = self.get_dataset_files( + repo_id=repo_id, + revision=revision or DEFAULT_DATASET_REVISION, + recursive=True, + page_number=page_number, + page_size=page_size, + endpoint=endpoint, + ) + except Exception as e: + logger.error(f'Get dataset: {repo_id} file list failed, message: {str(e)}') + break + + # Parse data (Type: 'tree' or 'blob') + for file_info_d in dataset_files: + if file_info_d['Type'] != 'tree': + file_paths.append(file_info_d['Path']) + + if len(dataset_files) < page_size: + break + + page_number += 1 else: - namespace, dataset_name = repo_id.split('/') - dataset_hub_id, _ = self.get_dataset_id_and_type(dataset_name, namespace, endpoint=endpoint) - dataset_info = self.get_dataset_infos( - dataset_hub_id, - revision or DEFAULT_DATASET_REVISION, - recursive='True', - endpoint=endpoint - ) - files = dataset_info.get('Data', {}).get('Files', []) - file_list = [f['Path'] for f in files] + raise ValueError(f'Unsupported repo_type: {repo_type}, supported repos: {REPO_TYPE_SUPPORT}') # Glob pattern matching to_delete = [] - for path in file_list: + for path in file_paths: for delete_pattern in delete_patterns: if fnmatch.fnmatch(path, delete_pattern): to_delete.append(path) @@ -2181,12 +2237,15 @@ class HubApi: 'Revision': revision or DEFAULT_MODEL_REVISION, 'FilePath': path } - else: + elif repo_type == REPO_TYPE_DATASET: owner, dataset_name = repo_id.split('/') url = f'{endpoint}/api/v1/datasets/{owner}/{dataset_name}/repo' params = { 'FilePath': path } + else: + raise ValueError(f'Unsupported repo_type: {repo_type}, supported repos: {REPO_TYPE_SUPPORT}') + r = self.session.delete(url, params=params, cookies=cookies, headers=headers) raise_for_http_status(r) resp = r.json() diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py index 872c7f4c..eeb0d414 100644 --- a/modelscope/hub/file_download.py +++ b/modelscope/hub/file_download.py @@ -234,25 +234,22 @@ def _repo_file_download( page_number = 1 page_size = 100 while True: - files_list_tree = _api.list_repo_tree( - dataset_name=name, - namespace=group_or_owner, - revision=revision, - root_path='/', - recursive=True, - page_number=page_number, - page_size=page_size, - endpoint=endpoint) - if not ('Code' in files_list_tree - and files_list_tree['Code'] == 200): - print( - 'Get dataset: %s file list failed, request_id: %s, message: %s' - % (repo_id, files_list_tree['RequestId'], - files_list_tree['Message'])) - return None - repo_files = files_list_tree['Data']['Files'] + try: + dataset_files = _api.get_dataset_files( + repo_id=repo_id, + revision=revision, + root_path='/', + recursive=True, + page_number=page_number, + page_size=page_size, + endpoint=endpoint) + except Exception as e: + logger.error( + f'Get dataset: {repo_id} file list failed, error: {e}') + break + is_exist = False - for repo_file in repo_files: + for repo_file in dataset_files: if repo_file['Type'] == 'tree': continue @@ -267,7 +264,7 @@ def _repo_file_download( file_to_download_meta = repo_file is_exist = True break - if len(repo_files) < page_size or is_exist: + if len(dataset_files) < page_size or is_exist: break page_number += 1 diff --git a/modelscope/hub/snapshot_download.py b/modelscope/hub/snapshot_download.py index a2e91d7e..f30c9312 100644 --- a/modelscope/hub/snapshot_download.py +++ b/modelscope/hub/snapshot_download.py @@ -381,8 +381,8 @@ def _snapshot_download( revision_detail = revision or DEFAULT_DATASET_REVISION logger.info('Fetching dataset repo file list...') - repo_files = fetch_repo_files(_api, name, group_or_owner, - revision_detail, endpoint) + repo_files = fetch_repo_files(_api, repo_id, revision_detail, + endpoint) if repo_files is None: logger.error( @@ -415,32 +415,28 @@ def _snapshot_download( return cache_root_path -def fetch_repo_files(_api, name, group_or_owner, revision, endpoint): +def fetch_repo_files(_api, repo_id, revision, endpoint): page_number = 1 page_size = 150 repo_files = [] while True: - files_list_tree = _api.list_repo_tree( - dataset_name=name, - namespace=group_or_owner, - revision=revision, - root_path='/', - recursive=True, - page_number=page_number, - page_size=page_size, - endpoint=endpoint) + try: + dataset_files = _api.get_dataset_files( + repo_id=repo_id, + revision=revision, + root_path='/', + recursive=True, + page_number=page_number, + page_size=page_size, + endpoint=endpoint) + except Exception as e: + logger.error(f'Error fetching dataset files: {e}') + break - if not ('Code' in files_list_tree and files_list_tree['Code'] == 200): - logger.error(f'Get dataset file list failed, request_id: \ - {files_list_tree["RequestId"]}, message: {files_list_tree["Message"]}' - ) - return None + repo_files.extend(dataset_files) - cur_repo_files = files_list_tree['Data']['Files'] - repo_files.extend(cur_repo_files) - - if len(cur_repo_files) < page_size: + if len(dataset_files) < page_size: break page_number += 1 diff --git a/modelscope/msdatasets/utils/hf_datasets_util.py b/modelscope/msdatasets/utils/hf_datasets_util.py index a76bfbce..224964f4 100644 --- a/modelscope/msdatasets/utils/hf_datasets_util.py +++ b/modelscope/msdatasets/utils/hf_datasets_util.py @@ -158,56 +158,46 @@ def _dataset_info( """ - _api = HubApi() - _namespace, _dataset_name = repo_id.split('/') - endpoint = _api.get_endpoint_for_read( - repo_id=repo_id, repo_type=REPO_TYPE_DATASET) - dataset_hub_id, dataset_type = _api.get_dataset_id_and_type( - dataset_name=_dataset_name, namespace=_namespace, endpoint=endpoint) + # Note: refer to `_list_repo_tree()`, for patching `HfApi.list_repo_tree` + repo_info_iter = self.list_repo_tree( + repo_id=repo_id, + path_in_repo='/', + revision=revision, + recursive=False, + expand=expand, + token=token, + repo_type=REPO_TYPE_DATASET, + ) - revision: str = revision or DEFAULT_DATASET_REVISION - data = _api.get_dataset_infos(dataset_hub_id=dataset_hub_id, - revision=revision, - files_metadata=files_metadata, - timeout=timeout, - endpoint=endpoint) - - # Parse data - data_d: dict = data['Data'] - data_file_list: list = data_d['Files'] - # commit_info: dict = data_d['LatestCommitter'] - - # Update data # TODO: columns align with HfDatasetInfo - data['id'] = repo_id - data['private'] = False - data['author'] = repo_id.split('/')[0] if repo_id else None - data['sha'] = revision - data['lastModified'] = None - data['gated'] = False - data['disabled'] = False - data['downloads'] = 0 - data['likes'] = 0 - data['tags'] = [] - data['cardData'] = [] - data['createdAt'] = None + # Update data_info + data_info = dict({}) + data_info['id'] = repo_id + data_info['private'] = False + data_info['author'] = repo_id.split('/')[0] if repo_id else None + data_info['sha'] = revision + data_info['lastModified'] = None + data_info['gated'] = False + data_info['disabled'] = False + data_info['downloads'] = 0 + data_info['likes'] = 0 + data_info['tags'] = [] + data_info['cardData'] = [] + data_info['createdAt'] = None # e.g. {'rfilename': 'xxx', 'blobId': 'xxx', 'size': 0, 'lfs': {'size': 0, 'sha256': 'xxx', 'pointerSize': 0}} - data['siblings'] = [] - for file_info_d in data_file_list: - file_info = { - 'rfilename': file_info_d['Path'], - 'blobId': file_info_d['Id'], - 'size': file_info_d['Size'], - 'type': 'directory' if file_info_d['Type'] == 'tree' else 'file', - 'lfs': { - 'size': file_info_d['Size'], - 'sha256': file_info_d['Sha256'], - 'pointerSize': 0 - } - } - data['siblings'].append(file_info) + data_siblings = [] + for info_item in repo_info_iter: + if isinstance(info_item, RepoFile): + data_siblings.append( + dict( + rfilename=info_item.rfilename, + blobId=info_item.blob_id, + size=info_item.size, + ) + ) + data_info['siblings'] = data_siblings - return HfDatasetInfo(**data) + return HfDatasetInfo(**data_info) def _list_repo_tree( @@ -225,35 +215,26 @@ def _list_repo_tree( _api = HubApi(timeout=3 * 60, max_retries=3) endpoint = _api.get_endpoint_for_read( repo_id=repo_id, repo_type=REPO_TYPE_DATASET) - if is_relative_path(repo_id) and repo_id.count('/') == 1: - _namespace, _dataset_name = repo_id.split('/') - elif is_relative_path(repo_id) and repo_id.count('/') == 0: - logger.warning(f'Got a relative path: {repo_id} without namespace, ' - f'Use default namespace: {DEFAULT_DATASET_NAMESPACE}') - _namespace, _dataset_name = DEFAULT_DATASET_NAMESPACE, repo_id - else: - raise ValueError(f'Invalid repo_id: {repo_id} !') + # List all files in the repo page_number = 1 page_size = 100 while True: - data: dict = _api.list_repo_tree(dataset_name=_dataset_name, - namespace=_namespace, - revision=revision or DEFAULT_DATASET_REVISION, - root_path=path_in_repo or None, - recursive=True, - page_number=page_number, - page_size=page_size, - endpoint=endpoint - ) - if not ('Code' in data and data['Code'] == 200): - logger.error(f'Get dataset: {repo_id} file list failed, message: {data["Message"]}') - return None + try: + dataset_files = _api.get_dataset_files( + repo_id=repo_id, + revision=revision or DEFAULT_DATASET_REVISION, + root_path=path_in_repo or '/', + recursive=recursive, + page_number=page_number, + page_size=page_size, + endpoint=endpoint, + ) + except Exception as e: + logger.error(f'Get dataset: {repo_id} file list failed, message: {e}') + break - # Parse data (Type: 'tree' or 'blob') - data_file_list: list = data['Data']['Files'] - - for file_info_d in data_file_list: + for file_info_d in dataset_files: path_info = {} path_info['type'] = 'directory' if file_info_d['Type'] == 'tree' else 'file' path_info['path'] = file_info_d['Path'] @@ -262,7 +243,7 @@ def _list_repo_tree( yield RepoFile(**path_info) if path_info['type'] == 'file' else RepoFolder(**path_info) - if len(data_file_list) < page_size: + if len(dataset_files) < page_size: break page_number += 1 @@ -278,30 +259,17 @@ def _get_paths_info( token: Optional[Union[bool, str]] = None, ) -> List[Union[RepoFile, RepoFolder]]: - _api = HubApi() - _namespace, _dataset_name = repo_id.split('/') - endpoint = _api.get_endpoint_for_read( - repo_id=repo_id, repo_type=REPO_TYPE_DATASET) - dataset_hub_id, dataset_type = _api.get_dataset_id_and_type( - dataset_name=_dataset_name, namespace=_namespace, endpoint=endpoint) + # Refer to func: `_list_repo_tree()`, for patching `HfApi.list_repo_tree` + repo_info_iter = self.list_repo_tree( + repo_id=repo_id, + recursive=False, + expand=expand, + revision=revision, + repo_type=repo_type, + token=token, + ) - revision: str = revision or DEFAULT_DATASET_REVISION - data = _api.get_dataset_infos(dataset_hub_id=dataset_hub_id, - revision=revision, - files_metadata=False, - recursive='False') - data_d: dict = data['Data'] - data_file_list: list = data_d['Files'] - - return [ - RepoFile(path=item_d['Name'], - size=item_d['Size'], - oid=item_d['Revision'], - lfs=None, # TODO: lfs type to be supported - last_commit=None, # TODO: lfs type to be supported - security=None - ) for item_d in data_file_list if item_d['Name'] == 'README.md' - ] + return [item_info for item_info in repo_info_iter] def _download_repo_file(repo_id: str, path_in_repo: str, download_config: DownloadConfig, revision: str): From e173fd96192ca50bc1291b1eb0e915a400417473 Mon Sep 17 00:00:00 2001 From: "Xingjun.Wang" Date: Tue, 22 Jul 2025 14:22:00 +0800 Subject: [PATCH 10/13] Add `MODELSCOPE_CREDENTIALS_PATH` for multi-users env (#1423) --- modelscope/hub/api.py | 4 ++-- modelscope/hub/constants.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index e4de81b0..b55d21cd 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -35,10 +35,10 @@ from modelscope.hub.constants import (API_HTTP_CLIENT_MAX_RETRIES, API_RESPONSE_FIELD_GIT_ACCESS_TOKEN, API_RESPONSE_FIELD_MESSAGE, API_RESPONSE_FIELD_USERNAME, - DEFAULT_CREDENTIALS_PATH, DEFAULT_MAX_WORKERS, MODELSCOPE_CLOUD_ENVIRONMENT, MODELSCOPE_CLOUD_USERNAME, + MODELSCOPE_CREDENTIALS_PATH, MODELSCOPE_DOMAIN, MODELSCOPE_PREFER_AI_SITE, MODELSCOPE_REQUEST_ID, @@ -2263,7 +2263,7 @@ class HubApi: class ModelScopeConfig: - path_credential = expanduser(DEFAULT_CREDENTIALS_PATH) + path_credential = expanduser(MODELSCOPE_CREDENTIALS_PATH) COOKIES_FILE_NAME = 'cookies' GIT_TOKEN_FILE_NAME = 'git_token' USER_INFO_FILE_NAME = 'user' diff --git a/modelscope/hub/constants.py b/modelscope/hub/constants.py index ba8f7b17..5210b1ed 100644 --- a/modelscope/hub/constants.py +++ b/modelscope/hub/constants.py @@ -16,6 +16,8 @@ MODEL_ID_SEPARATOR = '/' FILE_HASH = 'Sha256' LOGGER_NAME = 'ModelScopeHub' DEFAULT_CREDENTIALS_PATH = Path.home().joinpath('.modelscope', 'credentials') +MODELSCOPE_CREDENTIALS_PATH = os.environ.get( + 'MODELSCOPE_CREDENTIALS_PATH', DEFAULT_CREDENTIALS_PATH.as_posix()) REQUESTS_API_HTTP_METHOD = ['get', 'head', 'post', 'put', 'patch', 'delete'] API_HTTP_CLIENT_TIMEOUT = 60 API_HTTP_CLIENT_MAX_RETRIES = 2 From 595f3ea263f569759a7d52f6a43bfa9f0aa54505 Mon Sep 17 00:00:00 2001 From: "xingjun.wxj" Date: Fri, 25 Jul 2025 14:36:26 +0800 Subject: [PATCH 11/13] merge master and set version to 1.28.1 --- modelscope/version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modelscope/version.py b/modelscope/version.py index 2bb6af92..340bb69c 100644 --- a/modelscope/version.py +++ b/modelscope/version.py @@ -1,5 +1,5 @@ # Make sure to modify __release_datetime__ to release time when making official release. -__version__ = '1.28.0' +__version__ = '1.28.1' # default release datetime for branches under active development is set # to be a time far-far-away-into-the-future -__release_datetime__ = '2025-07-08 00:00:00' +__release_datetime__ = '2025-07-26 00:00:00' From 341857cc8d4e24076d5c4c4ee284024b1b9e8fe5 Mon Sep 17 00:00:00 2001 From: "Xingjun.Wang" Date: Wed, 6 Aug 2025 15:36:24 +0800 Subject: [PATCH 12/13] fix features for datasets<=3.6.0 (#1447) --- .../msdatasets/utils/hf_datasets_util.py | 83 ++++++++++++++++++- 1 file changed, 80 insertions(+), 3 deletions(-) diff --git a/modelscope/msdatasets/utils/hf_datasets_util.py b/modelscope/msdatasets/utils/hf_datasets_util.py index 224964f4..9053d062 100644 --- a/modelscope/msdatasets/utils/hf_datasets_util.py +++ b/modelscope/msdatasets/utils/hf_datasets_util.py @@ -6,9 +6,10 @@ import contextlib import inspect import os import warnings +from dataclasses import dataclass, field, fields from functools import partial from pathlib import Path -from typing import Dict, Iterable, List, Mapping, Optional, Sequence, Union, Tuple, Literal +from typing import Dict, Iterable, List, Mapping, Optional, Sequence, Union, Tuple, Literal, Any, ClassVar from urllib.parse import urlencode @@ -16,7 +17,9 @@ import requests from datasets import (BuilderConfig, Dataset, DatasetBuilder, DatasetDict, DownloadConfig, DownloadManager, DownloadMode, Features, IterableDataset, IterableDatasetDict, Split, - VerificationMode, Version, config, data_files) + VerificationMode, Version, config, data_files, LargeList, Sequence as SequenceHf) +from datasets.features import features +from datasets.features.features import _FEATURE_TYPES from datasets.data_files import ( FILES_TO_IGNORE, DataFilesDict, EmptyDatasetError, _get_data_files_patterns, _is_inside_unrequested_special_dir, @@ -49,6 +52,7 @@ from datasets.utils.info_utils import is_small_dataset from datasets.utils.metadata import MetadataConfigs from datasets.utils.py_utils import get_imports from datasets.utils.track import tracked_str + from fsspec import filesystem from fsspec.core import _un_chain from fsspec.utils import stringify_path @@ -62,7 +66,7 @@ from modelscope import HubApi from modelscope.hub.utils.utils import get_endpoint from modelscope.msdatasets.utils.hf_file_utils import get_from_cache_ms from modelscope.utils.config_ds import MS_DATASETS_CACHE -from modelscope.utils.constant import DEFAULT_DATASET_NAMESPACE, DEFAULT_DATASET_REVISION, REPO_TYPE_DATASET +from modelscope.utils.constant import DEFAULT_DATASET_REVISION, REPO_TYPE_DATASET from modelscope.utils.import_utils import has_attr_in_class from modelscope.utils.logger import get_logger @@ -89,6 +93,76 @@ ExpandDatasetProperty_T = Literal[ ] +# Patch datasets features +@dataclass(repr=False) +class ListMs(SequenceHf): + """Feature type for large list data composed of child feature data type. + + It is backed by `pyarrow.ListType`, which uses 32-bit offsets or a fixed length. + + Args: + feature ([`FeatureType`]): + Child feature data type of each item within the large list. + length (optional `int`, default to -1): + Length of the list if it is fixed. + Defaults to -1 which means an arbitrary length. + """ + + feature: Any + length: int = -1 + id: Optional[str] = field(default=None, repr=False) + # Automatically constructed + pa_type: ClassVar[Any] = None + _type: str = field(default='List', init=False, repr=False) + + def __repr__(self): + if self.length != -1: + return f'{type(self).__name__}({self.feature}, length={self.length})' + else: + return f'{type(self).__name__}({self.feature})' + + +_FEATURE_TYPES['List'] = ListMs + + +def generate_from_dict_ms(obj: Any): + """Regenerate the nested feature object from a deserialized dict. + We use the '_type' fields to get the dataclass name to load. + + generate_from_dict is the recursive helper for Features.from_dict, and allows for a convenient constructor syntax + to define features from deserialized JSON dictionaries. This function is used in particular when deserializing + a :class:`DatasetInfo` that was dumped to a JSON object. This acts as an analogue to + :meth:`Features.from_arrow_schema` and handles the recursive field-by-field instantiation, but doesn't require any + mapping to/from pyarrow, except for the fact that it takes advantage of the mapping of pyarrow primitive dtypes + that :class:`Value` automatically performs. + """ + # Nested structures: we allow dict, list/tuples, sequences + if isinstance(obj, list): + return [generate_from_dict_ms(value) for value in obj] + # Otherwise we have a dict or a dataclass + if '_type' not in obj or isinstance(obj['_type'], dict): + return {key: generate_from_dict_ms(value) for key, value in obj.items()} + obj = dict(obj) + _type = obj.pop('_type') + class_type = _FEATURE_TYPES.get(_type, None) or globals().get(_type, None) + + if class_type is None: + raise ValueError(f"Feature type '{_type}' not found. Available feature types: {list(_FEATURE_TYPES.keys())}") + + if class_type == LargeList: + feature = obj.pop('feature') + return LargeList(generate_from_dict_ms(feature), **obj) + if class_type == ListMs: + feature = obj.pop('feature') + return ListMs(generate_from_dict_ms(feature), **obj) + if class_type == SequenceHf: # backward compatibility, this translates to a List or a dict + feature = obj.pop('feature') + return SequenceHf(feature=generate_from_dict_ms(feature), **obj) + + field_names = {f.name for f in fields(class_type)} + return class_type(**{k: v for k, v in obj.items() if k in field_names}) + + def _download_ms(self, url_or_filename: str, download_config: DownloadConfig) -> str: url_or_filename = str(url_or_filename) # for temp val @@ -1377,6 +1451,7 @@ def load_dataset_with_ctx(*args, **kwargs): resolve_pattern_origin = data_files.resolve_pattern get_module_without_script_origin = HubDatasetModuleFactoryWithoutScript.get_module get_module_with_script_origin = HubDatasetModuleFactoryWithScript.get_module + generate_from_dict_origin = features.generate_from_dict # Monkey patching with modelscope functions config.HF_ENDPOINT = get_endpoint() @@ -1392,6 +1467,7 @@ def load_dataset_with_ctx(*args, **kwargs): data_files.resolve_pattern = _resolve_pattern HubDatasetModuleFactoryWithoutScript.get_module = get_module_without_script HubDatasetModuleFactoryWithScript.get_module = get_module_with_script + features.generate_from_dict = generate_from_dict_ms streaming = kwargs.get('streaming', False) @@ -1402,6 +1478,7 @@ def load_dataset_with_ctx(*args, **kwargs): # Restore the original functions config.HF_ENDPOINT = hf_endpoint_origin file_utils.get_from_cache = get_from_cache_origin + features.generate_from_dict = generate_from_dict_origin # Keep the context during the streaming iteration if not streaming: config.HF_ENDPOINT = hf_endpoint_origin From 2f792d936fe4fc67d87cbed5bc4470130fe1bf4d Mon Sep 17 00:00:00 2001 From: "xingjun.wxj" Date: Wed, 6 Aug 2025 15:38:06 +0800 Subject: [PATCH 13/13] set version to 1.28.2 --- modelscope/version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modelscope/version.py b/modelscope/version.py index 340bb69c..a502fbd2 100644 --- a/modelscope/version.py +++ b/modelscope/version.py @@ -1,5 +1,5 @@ # Make sure to modify __release_datetime__ to release time when making official release. -__version__ = '1.28.1' +__version__ = '1.28.2' # default release datetime for branches under active development is set # to be a time far-far-away-into-the-future -__release_datetime__ = '2025-07-26 00:00:00' +__release_datetime__ = '2025-08-06 16:00:00'