From 1366dcf479da6fe3d0d80c199ef8db2fd33466a4 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Tue, 10 Dec 2024 19:28:51 +0800 Subject: [PATCH 01/36] fix ut --- tests/trainers/test_finetune_vision_efficient_tuning_swift.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/trainers/test_finetune_vision_efficient_tuning_swift.py b/tests/trainers/test_finetune_vision_efficient_tuning_swift.py index 56a5b6fc..6ab6211c 100644 --- a/tests/trainers/test_finetune_vision_efficient_tuning_swift.py +++ b/tests/trainers/test_finetune_vision_efficient_tuning_swift.py @@ -51,13 +51,14 @@ class TestVisionEfficientTuningSwiftTrainer(unittest.TestCase): cfg.model.finetune = True cfg.train.max_epochs = self.max_epochs cfg.train.lr_scheduler.T_max = self.max_epochs + cfg.train.dataloader.workers_per_gpu = 0 + cfg.evaluation.dataloader.workers_per_gpu = 0 cfg.model.backbone.lora_length = 0 return cfg lora_config = LoRAConfig( r=self.tune_length, target_modules=['qkv'], - merge_weights=False, use_merged_linear=True, enable_lora=[True]) From 6e68cb92c1cc9316fbe0f2afc2464eab683b578a Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Wed, 25 Dec 2024 20:29:44 +0800 Subject: [PATCH 02/36] add whoami --- modelscope/utils/hf_util.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py index 8f7c06da..499319dd 100644 --- a/modelscope/utils/hf_util.py +++ b/modelscope/utils/hf_util.py @@ -2,7 +2,7 @@ import os from pathlib import Path from types import MethodType -from typing import Optional, Union +from typing import Optional, Union, Dict from transformers import AutoConfig as AutoConfigHF from transformers import AutoFeatureExtractor as AutoFeatureExtractorHF @@ -163,6 +163,10 @@ def _file_download(repo_id: str, revision=revision) +def _whoami(self, token: Union[bool, str, None] = None) -> Dict: + return 'unknown' + + def _patch_pretrained_class(): def get_model_dir(pretrained_model_name_or_path, ignore_file_pattern, @@ -300,6 +304,10 @@ def patch_hub(): huggingface_hub.file_exists = hf_api.file_exists huggingface_hub.hf_api.file_exists = hf_api.file_exists + hf_api.whoami = MethodType(_whoami, api) + huggingface_hub.whoami = hf_api.whoami + huggingface_hub.hf_api.whoami = hf_api.whoami + _patch_pretrained_class() From 3a5505f50e6c1662a76a8b653c2c6268a5fb13a8 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Wed, 25 Dec 2024 23:29:15 +0800 Subject: [PATCH 03/36] wip --- modelscope/hub/api.py | 60 +++++++++++++++++++------------ modelscope/hub/create_model.py | 60 +++++++++++++++++++++++++++++++ modelscope/hub/push_to_hub.py | 50 ++++++++++++++++++++++++++ modelscope/hub/utils/utils.py | 66 +++++++++++++++++++++++++++++++++- modelscope/utils/hf_util.py | 60 +++++++++++++++++++++++++++++-- 5 files changed, 269 insertions(+), 27 deletions(-) create mode 100644 modelscope/hub/create_model.py diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index b2118ea8..0e4b4cde 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -36,8 +36,8 @@ from modelscope.hub.constants import (API_HTTP_CLIENT_MAX_RETRIES, TEMPORARY_FOLDER_NAME, DatasetVisibility, Licenses, ModelVisibility) from modelscope.hub.errors import (InvalidParameter, NotExistError, - NotLoginException, NoValidRevisionError, - RequestError, datahub_raise_on_error, + NotLoginException, RequestError, + datahub_raise_on_error, handle_http_post_error, handle_http_response, is_ok, raise_for_http_status, raise_on_error) @@ -62,6 +62,7 @@ logger = get_logger() class HubApi: """Model hub api interface. """ + def __init__(self, endpoint: Optional[str] = None, timeout=API_HTTP_CLIENT_TIMEOUT, @@ -94,14 +95,14 @@ class HubApi: timeout=timeout)) def login( - self, - access_token: str, + self, + access_token: Optional[str] = None, ): """Login with your SDK access token, which can be obtained from https://www.modelscope.cn user center. Args: - access_token (str): user access token on modelscope. + access_token (str): user access token on modelscope, set this argument or set `MODELSCOPE_API_TOKEN`. Returns: cookies: to authenticate yourself to ModelScope open-api @@ -110,6 +111,9 @@ class HubApi: Note: You only have to login once within 30 days. """ + if access_token is None: + access_token = os.environ.get('MODELSCOPE_API_TOKEN') + assert access_token is not None, 'Please pass in access_token or set `MODELSCOPE_API_TOKEN`' path = f'{self.endpoint}/api/v1/login' r = self.session.post( path, @@ -132,6 +136,15 @@ class HubApi: return d[API_RESPONSE_FIELD_DATA][ API_RESPONSE_FIELD_GIT_ACCESS_TOKEN], cookies + def try_login(self, access_token: Optional[str] = None) -> bool: + """Wraps the `login` method and returns bool. + """ + try: + self.login(access_token) + return True + except AssertionError: + return False + def create_model(self, model_id: str, visibility: Optional[int] = ModelVisibility.PUBLIC, @@ -211,9 +224,9 @@ class HubApi: return f'{self.endpoint}/api/v1/models/{model_id}.git' def get_model( - self, - model_id: str, - revision: Optional[str] = DEFAULT_MODEL_REVISION, + self, + model_id: str, + revision: Optional[str] = DEFAULT_MODEL_REVISION, ) -> str: """Get model information at ModelScope @@ -249,10 +262,10 @@ class HubApi: raise_for_http_status(r) def repo_exists( - self, - repo_id: str, - *, - repo_type: Optional[str] = None, + self, + repo_id: str, + *, + repo_type: Optional[str] = None, ) -> bool: """ Checks if a repository exists on ModelScope @@ -460,7 +473,7 @@ class HubApi: r = self.session.put( path, data='{"Path":"%s", "PageNumber":%s, "PageSize": %s}' % - (owner_or_group, page_number, page_size), + (owner_or_group, page_number, page_size), cookies=cookies, headers=self.builder_headers(self.headers)) handle_http_response(r, logger, cookies, owner_or_group) @@ -476,7 +489,7 @@ class HubApi: def _check_cookie(self, use_cookies: Union[bool, - CookieJar] = False) -> CookieJar: + CookieJar] = False) -> CookieJar: cookies = None if isinstance(use_cookies, CookieJar): cookies = use_cookies @@ -587,7 +600,8 @@ class HubApi: else: if revision is None: # user not specified revision, use latest revision before release time revisions_detail = [x for x in - all_tags_detail if x['CreatedAt'] <= release_timestamp] if all_tags_detail else [] # noqa E501 + all_tags_detail if + x['CreatedAt'] <= release_timestamp] if all_tags_detail else [] # noqa E501 if len(revisions_detail) > 0: revision = revisions_detail[0]['Revision'] # use latest revision before release time. revision_detail = revisions_detail[0] @@ -621,9 +635,9 @@ class HubApi: cookies=cookies)['Revision'] def get_model_branches_and_tags_details( - self, - model_id: str, - use_cookies: Union[bool, CookieJar] = False, + self, + model_id: str, + use_cookies: Union[bool, CookieJar] = False, ) -> Tuple[List[str], List[str]]: """Get model branch and tags. @@ -647,9 +661,9 @@ class HubApi: return info['RevisionMap']['Branches'], info['RevisionMap']['Tags'] def get_model_branches_and_tags( - self, - model_id: str, - use_cookies: Union[bool, CookieJar] = False, + self, + model_id: str, + use_cookies: Union[bool, CookieJar] = False, ) -> Tuple[List[str], List[str]]: """Get model branch and tags. @@ -1087,7 +1101,7 @@ class HubApi: def list_oss_dataset_objects(self, dataset_name, namespace, max_limit, is_recursive, is_filter_dir, revision): url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/oss/tree/?' \ - f'MaxLimit={max_limit}&Revision={revision}&Recursive={is_recursive}&FilterDir={is_filter_dir}' + f'MaxLimit={max_limit}&Revision={revision}&Recursive={is_recursive}&FilterDir={is_filter_dir}' cookies = ModelScopeConfig.get_cookies() resp = self.session.get(url=url, cookies=cookies, timeout=1800) @@ -1116,7 +1130,7 @@ class HubApi: raise ValueError('Args cannot be empty!') url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/oss/prefix?Prefix={object_name}/' \ - f'&Revision={revision}' + f'&Revision={revision}' cookies = ModelScopeConfig.get_cookies() resp = self.session.delete(url=url, cookies=cookies) diff --git a/modelscope/hub/create_model.py b/modelscope/hub/create_model.py new file mode 100644 index 00000000..7722ce3d --- /dev/null +++ b/modelscope/hub/create_model.py @@ -0,0 +1,60 @@ +import json +import tempfile +from typing import Dict, Optional, Any +from urllib.error import HTTPError + +from modelscope.hub.api import ModelScopeConfig, HubApi + +from modelscope.hub.constants import (ModelVisibility) +from .utils.utils import (add_patterns_to_gitattributes, + add_patterns_to_file) +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +def create_model_repo(repo_id: str, + token: Optional[str] = None, + private: bool = False, + config_json: Optional[Dict[str, Any]] = None) -> str: + """Create model repo and create .gitattributes file and .gitignore file + + Args: + repo_id(str): The repo id + token(str, Optional): The access token of the user + private(bool): If is a private repo + config_json(Dict[str, Any]): An optional config_json to fill into the configuration.json file, + If None, the default content will be uploaded: + ```json + {"framework": "pytorch", "task": "text-generation", "allow_remote": True} + ``` + You can manually modify this in the modelhub. + """ + api = HubApi() + assert repo_id is not None, 'Please enter a valid hub_model_id' + api.try_login(token) + visibility = ModelVisibility.PRIVATE if private else ModelVisibility.PUBLIC + if '/' not in repo_id: + user_name = ModelScopeConfig.get_user_info()[0] + assert isinstance(user_name, str) + hub_model_id = f'{user_name}/{repo_id}' + logger.info(f"'/' not in hub_model_id, pushing to personal repo {hub_model_id}") + try: + api.create_model(repo_id, visibility) + except HTTPError: + # The remote repository has been created + pass + + with tempfile.TemporaryDirectory() as temp_cache_dir: + from modelscope.hub.repository import Repository + repo = Repository(temp_cache_dir, repo_id) + add_patterns_to_gitattributes(repo, ['*.safetensors', '*.bin', '*.pt', '*.gguf']) + default_config = {"framework": "pytorch", "task": "text-generation", "allow_remote": True} + if not config_json: + config_json = {} + config = {**default_config, **config_json} + add_patterns_to_file( + repo, + 'configuration.json', [json.dumps(config)], + ignore_push_error=True) + return repo_id \ No newline at end of file diff --git a/modelscope/hub/push_to_hub.py b/modelscope/hub/push_to_hub.py index 2b2b4091..3a52afbb 100644 --- a/modelscope/hub/push_to_hub.py +++ b/modelscope/hub/push_to_hub.py @@ -4,6 +4,10 @@ import concurrent.futures import os import shutil from multiprocessing import Manager, Process, Value +from pathlib import Path +from typing import List, Optional, Union + +import json from modelscope.hub.api import HubApi from modelscope.hub.constants import ModelVisibility @@ -19,6 +23,52 @@ _tasks = dict() _manager = None +def push_model_to_hub(repo_id: str, + folder_path: Union[str, Path], + path_in_repo: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + token: Union[str, bool, None] = None, + private: bool = False, + revision: Optional[str] = 'master', + ignore_patterns: Optional[Union[List[str], str]] = None, + **kwargs): + from modelscope.hub.create_model import create_model_repo + create_model_repo(repo_id, token, private) + from modelscope import push_to_hub + commit_message = commit_message or 'Upload folder using api' + if commit_description: + commit_message = commit_message + '\n' + commit_description + if not os.path.exists(os.path.join(folder_path, 'configuration.json')): + default_config = { + 'framework': 'pytorch', + 'task': 'text-generation', + 'allow_remote': True + } + config_json = kwargs.get('config_json') or {} + config = {**default_config, **config_json} + with open(os.path.join(folder_path, 'configuration.json'), 'w') as f: + f.write(json.dumps(config)) + if ignore_patterns: + ignore_patterns = [p for p in ignore_patterns if p != '_*'] + if path_in_repo: + # We don't support part submit for now + path_in_repo = os.path.basename(folder_path) + folder_path = os.path.dirname(folder_path) + ignore_patterns = [] + if revision is None or revision == 'main': + revision = 'master' + push_to_hub( + repo_id, + folder_path, + token, + private, + commit_message=commit_message, + ignore_patterns=ignore_patterns, + revision=revision, + tag=path_in_repo) + + def _api_push_to_hub(repo_name, output_dir, token, diff --git a/modelscope/hub/utils/utils.py b/modelscope/hub/utils/utils.py index bb38f26a..b8c6320a 100644 --- a/modelscope/hub/utils/utils.py +++ b/modelscope/hub/utils/utils.py @@ -4,7 +4,7 @@ import hashlib import os from datetime import datetime from pathlib import Path -from typing import Optional +from typing import List, Optional import requests @@ -125,3 +125,67 @@ def file_integrity_validation(file_path, expected_sha256): file_path, expected_sha256, file_sha256) logger.error(msg) raise FileIntegrityError(msg) + + +def add_patterns_to_file(repo, + file_name: str, + patterns: List[str], + commit_message: Optional[str] = None, + ignore_push_error=False) -> None: + if isinstance(patterns, str): + patterns = [patterns] + if commit_message is None: + commit_message = f'Add `{patterns[0]}` patterns to {file_name}' + + # Get current file content + repo_dir = repo.model_dir + file_path = os.path.join(repo_dir, file_name) + if os.path.exists(file_path): + with open(file_path, 'r', encoding='utf-8') as f: + current_content = f.read() + else: + current_content = '' + # Add the patterns to file + content = current_content + for pattern in patterns: + if pattern not in content: + if len(content) > 0 and not content.endswith('\n'): + content += '\n' + content += f'{pattern}\n' + + # Write the file if it has changed + if content != current_content: + with open(file_path, 'w', encoding='utf-8') as f: + logger.debug(f'Writing {file_name} file. Content: {content}') + f.write(content) + try: + repo.push(commit_message) + except Exception as e: + if ignore_push_error: + pass + else: + raise e + + +def add_patterns_to_gitignore(repo, + patterns: List[str], + commit_message: Optional[str] = None) -> None: + add_patterns_to_file( + repo, '.gitignore', patterns, commit_message, ignore_push_error=True) + + +def add_patterns_to_gitattributes( + repo, + patterns: List[str], + commit_message: Optional[str] = None) -> None: + new_patterns = [] + suffix = 'filter=lfs diff=lfs merge=lfs -text' + for pattern in patterns: + if suffix not in pattern: + pattern = f'{pattern} {suffix}' + new_patterns.append(pattern) + file_name = '.gitattributes' + if commit_message is None: + commit_message = f'Add `{patterns[0]}` patterns to {file_name}' + add_patterns_to_file( + repo, file_name, new_patterns, commit_message, ignore_push_error=True) diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py index 499319dd..7ecd10f2 100644 --- a/modelscope/utils/hf_util.py +++ b/modelscope/utils/hf_util.py @@ -1,9 +1,13 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import os +import tempfile +from functools import partial from pathlib import Path from types import MethodType -from typing import Optional, Union, Dict +from typing import Dict, List, Optional, Union +from urllib.error import HTTPError +from huggingface_hub.hf_api import CommitInfo, future_compatible from transformers import AutoConfig as AutoConfigHF from transformers import AutoFeatureExtractor as AutoFeatureExtractorHF from transformers import AutoImageProcessor as AutoImageProcessorHF @@ -64,7 +68,7 @@ from transformers import (PretrainedConfig, PreTrainedModel, from transformers import T5EncoderModel as T5EncoderModelHF from transformers import __version__ as transformers_version -from modelscope import snapshot_download +from modelscope import push_to_hub, snapshot_download from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke from .logger import get_logger @@ -164,7 +168,8 @@ def _file_download(repo_id: str, def _whoami(self, token: Union[bool, str, None] = None) -> Dict: - return 'unknown' + from modelscope.hub.api import ModelScopeConfig + return {'name': ModelScopeConfig.get_user_info()[0] or 'unknown'} def _patch_pretrained_class(): @@ -308,6 +313,55 @@ def patch_hub(): huggingface_hub.whoami = hf_api.whoami huggingface_hub.hf_api.whoami = hf_api.whoami + def create_repo(repo_id: str, + *, + token: Union[str, bool, None] = None, + private: bool = False, + **kwargs) -> 'RepoUrl': + """ + Create a new repository on the hub. + + Args: + repo_id: The ID of the repository to create. + token: The authentication token to use. + private: Whether the repository should be private. + **kwargs: Additional arguments. + + Returns: + RepoUrl: The URL of the created repository. + """ + from modelscope.hub.create_model import create_model_repo + hub_model_id = create_model_repo(repo_id, token, private) + from huggingface_hub import RepoUrl + return RepoUrl(url=hub_model_id, ) + + @future_compatible + def upload_folder( + *, + repo_id: str, + folder_path: Union[str, Path], + path_in_repo: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + token: Union[str, bool, None] = None, + revision: Optional[str] = 'master', + ignore_patterns: Optional[Union[List[str], str]] = None, + **kwargs, + ): + from modelscope.hub.push_to_hub import push_model_to_hub + push_model_to_hub(repo_id, folder_path, path_in_repo, commit_message, + commit_description, token, True, revision, + ignore_patterns) + return CommitInfo( + commit_url=f'https://www.modelscope.cn/models/{repo_id}/files', + commit_message=commit_message, + commit_description=commit_description, + oid=None, + ) + + huggingface_hub.create_repo = create_repo + huggingface_hub.upload_folder = partial(upload_folder, api) + _patch_pretrained_class() From 7769531e9ec264d6dad9573fffe44fd3d8e41f08 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Thu, 26 Dec 2024 23:16:25 +0800 Subject: [PATCH 04/36] add upload files --- modelscope/hub/push_to_hub.py | 31 +++++++++++++++++++++++++++++++ modelscope/hub/utils/utils.py | 4 +++- modelscope/utils/hf_util.py | 30 +++++++++++++++++++++++++++++- 3 files changed, 63 insertions(+), 2 deletions(-) diff --git a/modelscope/hub/push_to_hub.py b/modelscope/hub/push_to_hub.py index 3a52afbb..d8b98087 100644 --- a/modelscope/hub/push_to_hub.py +++ b/modelscope/hub/push_to_hub.py @@ -3,6 +3,7 @@ import concurrent.futures import os import shutil +import tempfile from multiprocessing import Manager, Process, Value from pathlib import Path from typing import List, Optional, Union @@ -23,6 +24,36 @@ _tasks = dict() _manager = None +def push_files_to_hub( + path_or_fileobj: Union[str, Path], + path_in_repo: str, + repo_id: str, + token: Union[str, bool, None] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, +): + if not os.path.exists(path_or_fileobj): + return + + from modelscope import HubApi + api = HubApi() + api.login(token) + if not commit_message: + commit_message = 'Updating files' + if commit_description: + commit_message = commit_message + '\n' + commit_description + with tempfile.TemporaryDirectory() as temp_cache_dir: + from modelscope.hub.repository import Repository + repo = Repository(temp_cache_dir, repo_id, revision=revision) + sub_folder = os.path.join(temp_cache_dir, path_in_repo) + os.makedirs(sub_folder, exist_ok=True) + if os.path.isfile(path_or_fileobj): + shutil.copyfile(path_or_fileobj, sub_folder) + else: + shutil.copytree(path_or_fileobj, sub_folder, dirs_exist_ok=True) + repo.push(commit_message) + def push_model_to_hub(repo_id: str, folder_path: Union[str, Path], path_in_repo: Optional[str] = None, diff --git a/modelscope/hub/utils/utils.py b/modelscope/hub/utils/utils.py index b8c6320a..5ee928c4 100644 --- a/modelscope/hub/utils/utils.py +++ b/modelscope/hub/utils/utils.py @@ -2,9 +2,11 @@ import hashlib import os +import shutil +import tempfile from datetime import datetime from pathlib import Path -from typing import List, Optional +from typing import List, Optional, Union, BinaryIO import requests diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py index 7ecd10f2..cc63f214 100644 --- a/modelscope/utils/hf_util.py +++ b/modelscope/utils/hf_util.py @@ -4,7 +4,7 @@ import tempfile from functools import partial from pathlib import Path from types import MethodType -from typing import Dict, List, Optional, Union +from typing import Dict, List, Optional, Union, BinaryIO from urllib.error import HTTPError from huggingface_hub.hf_api import CommitInfo, future_compatible @@ -313,6 +313,9 @@ def patch_hub(): huggingface_hub.whoami = hf_api.whoami huggingface_hub.hf_api.whoami = hf_api.whoami + from huggingface_hub.repocard import RepoCard + RepoCard.validate = lambda *args, **kwargs: None + def create_repo(repo_id: str, *, token: Union[str, bool, None] = None, @@ -359,9 +362,34 @@ def patch_hub(): oid=None, ) + @future_compatible + def upload_file( + self, + *, + path_or_fileobj: Union[str, Path, bytes, BinaryIO], + path_in_repo: str, + repo_id: str, + token: Union[str, bool, None] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + **kwargs, + ): + + from modelscope.hub.push_to_hub import push_files_to_hub + push_files_to_hub(path_or_fileobj, path_in_repo, repo_id, token, revision, commit_message, commit_description) + + huggingface_hub.create_repo = create_repo huggingface_hub.upload_folder = partial(upload_folder, api) + hf_api.upload_file = MethodType(upload_file, api) + huggingface_hub.upload_file = hf_api.upload_file + huggingface_hub.hf_api.upload_file = hf_api.upload_file + + from transformers.utils import hub + hub.create_repo = create_repo + _patch_pretrained_class() From d39f0b12ce1f090bbb5bc379da63c15874e68f10 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Thu, 26 Dec 2024 23:43:14 +0800 Subject: [PATCH 05/36] fix --- modelscope/hub/create_model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modelscope/hub/create_model.py b/modelscope/hub/create_model.py index 7722ce3d..505268f5 100644 --- a/modelscope/hub/create_model.py +++ b/modelscope/hub/create_model.py @@ -1,7 +1,7 @@ import json import tempfile from typing import Dict, Optional, Any -from urllib.error import HTTPError +from requests.exceptions import HTTPError from modelscope.hub.api import ModelScopeConfig, HubApi @@ -37,8 +37,8 @@ def create_model_repo(repo_id: str, if '/' not in repo_id: user_name = ModelScopeConfig.get_user_info()[0] assert isinstance(user_name, str) - hub_model_id = f'{user_name}/{repo_id}' - logger.info(f"'/' not in hub_model_id, pushing to personal repo {hub_model_id}") + repo_id = f'{user_name}/{repo_id}' + logger.info(f"'/' not in hub_model_id, pushing to personal repo {repo_id}") try: api.create_model(repo_id, visibility) except HTTPError: From 402904333ae5f489671eedd8e468da41b974fa62 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Fri, 27 Dec 2024 00:04:59 +0800 Subject: [PATCH 06/36] fix --- modelscope/utils/hf_util.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py index cc63f214..788619ec 100644 --- a/modelscope/utils/hf_util.py +++ b/modelscope/utils/hf_util.py @@ -313,8 +313,8 @@ def patch_hub(): huggingface_hub.whoami = hf_api.whoami huggingface_hub.hf_api.whoami = hf_api.whoami - from huggingface_hub.repocard import RepoCard - RepoCard.validate = lambda *args, **kwargs: None + from huggingface_hub import repocard + repocard.RepoCard.validate = lambda *args, **kwargs: None def create_repo(repo_id: str, *, @@ -386,6 +386,7 @@ def patch_hub(): hf_api.upload_file = MethodType(upload_file, api) huggingface_hub.upload_file = hf_api.upload_file huggingface_hub.hf_api.upload_file = hf_api.upload_file + repocard.upload_file = hf_api.upload_file from transformers.utils import hub hub.create_repo = create_repo From 560a21e2a58042668a4b166b63d992bd0e7a21f0 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Sat, 28 Dec 2024 23:08:23 +0800 Subject: [PATCH 07/36] wip --- modelscope/hub/create_model.py | 27 +++-- modelscope/hub/push_to_hub.py | 15 +-- modelscope/hub/utils/utils.py | 2 +- modelscope/utils/hf_util.py | 182 +++++++++++++-------------------- 4 files changed, 96 insertions(+), 130 deletions(-) diff --git a/modelscope/hub/create_model.py b/modelscope/hub/create_model.py index 505268f5..93ee248d 100644 --- a/modelscope/hub/create_model.py +++ b/modelscope/hub/create_model.py @@ -1,14 +1,13 @@ -import json import tempfile -from typing import Dict, Optional, Any +from typing import Any, Dict, Optional + +import json from requests.exceptions import HTTPError -from modelscope.hub.api import ModelScopeConfig, HubApi - -from modelscope.hub.constants import (ModelVisibility) -from .utils.utils import (add_patterns_to_gitattributes, - add_patterns_to_file) +from modelscope.hub.api import HubApi, ModelScopeConfig +from modelscope.hub.constants import ModelVisibility from modelscope.utils.logger import get_logger +from .utils.utils import add_patterns_to_file, add_patterns_to_gitattributes logger = get_logger() @@ -38,7 +37,8 @@ def create_model_repo(repo_id: str, user_name = ModelScopeConfig.get_user_info()[0] assert isinstance(user_name, str) repo_id = f'{user_name}/{repo_id}' - logger.info(f"'/' not in hub_model_id, pushing to personal repo {repo_id}") + logger.info( + f"'/' not in hub_model_id, pushing to personal repo {repo_id}") try: api.create_model(repo_id, visibility) except HTTPError: @@ -48,8 +48,13 @@ def create_model_repo(repo_id: str, with tempfile.TemporaryDirectory() as temp_cache_dir: from modelscope.hub.repository import Repository repo = Repository(temp_cache_dir, repo_id) - add_patterns_to_gitattributes(repo, ['*.safetensors', '*.bin', '*.pt', '*.gguf']) - default_config = {"framework": "pytorch", "task": "text-generation", "allow_remote": True} + add_patterns_to_gitattributes( + repo, ['*.safetensors', '*.bin', '*.pt', '*.gguf']) + default_config = { + 'framework': 'pytorch', + 'task': 'text-generation', + 'allow_remote': True + } if not config_json: config_json = {} config = {**default_config, **config_json} @@ -57,4 +62,4 @@ def create_model_repo(repo_id: str, repo, 'configuration.json', [json.dumps(config)], ignore_push_error=True) - return repo_id \ No newline at end of file + return repo_id diff --git a/modelscope/hub/push_to_hub.py b/modelscope/hub/push_to_hub.py index d8b98087..47c7cc69 100644 --- a/modelscope/hub/push_to_hub.py +++ b/modelscope/hub/push_to_hub.py @@ -25,13 +25,13 @@ _manager = None def push_files_to_hub( - path_or_fileobj: Union[str, Path], - path_in_repo: str, - repo_id: str, - token: Union[str, bool, None] = None, - revision: Optional[str] = None, - commit_message: Optional[str] = None, - commit_description: Optional[str] = None, + path_or_fileobj: Union[str, Path], + path_in_repo: str, + repo_id: str, + token: Union[str, bool, None] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, ): if not os.path.exists(path_or_fileobj): return @@ -54,6 +54,7 @@ def push_files_to_hub( shutil.copytree(path_or_fileobj, sub_folder, dirs_exist_ok=True) repo.push(commit_message) + def push_model_to_hub(repo_id: str, folder_path: Union[str, Path], path_in_repo: Optional[str] = None, diff --git a/modelscope/hub/utils/utils.py b/modelscope/hub/utils/utils.py index 5ee928c4..d8d963d4 100644 --- a/modelscope/hub/utils/utils.py +++ b/modelscope/hub/utils/utils.py @@ -6,7 +6,7 @@ import shutil import tempfile from datetime import datetime from pathlib import Path -from typing import List, Optional, Union, BinaryIO +from typing import BinaryIO, List, Optional, Union import requests diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py index 788619ec..f1861473 100644 --- a/modelscope/utils/hf_util.py +++ b/modelscope/utils/hf_util.py @@ -1,11 +1,9 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import os -import tempfile from functools import partial from pathlib import Path from types import MethodType -from typing import Dict, List, Optional, Union, BinaryIO -from urllib.error import HTTPError +from typing import BinaryIO, Dict, List, Optional, Union from huggingface_hub.hf_api import CommitInfo, future_compatible from transformers import AutoConfig as AutoConfigHF @@ -68,7 +66,7 @@ from transformers import (PretrainedConfig, PreTrainedModel, from transformers import T5EncoderModel as T5EncoderModelHF from transformers import __version__ as transformers_version -from modelscope import push_to_hub, snapshot_download +from modelscope import snapshot_download from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke from .logger import get_logger @@ -79,13 +77,29 @@ except ImportError: GPTQConfigHF = None AwqConfigHF = None +try: + from peft import ( + PeftConfig as PeftConfigHF, + PeftModel as PeftModelHF, + PeftModelForCausalLM as PeftModelForCausalLMHF, + PeftModelForSequenceClassification as + PeftModelForSequenceClassificationHF, + PeftMixedModel as PeftMixedModelHF, + ) +except ImportError: + PeftConfigHF = None + PeftModelHF = None + PeftModelForCausalLMHF = None + PeftModelForSequenceClassificationHF = None + PeftMixedModelHF = None + logger = get_logger() class UnsupportedAutoClass: def __init__(self, name: str): - self.error_msg =\ + self.error_msg = \ f'{name} is not supported with your installed Transformers version {transformers_version}. ' + \ 'Please update your Transformers by "pip install transformers -U".' @@ -186,113 +200,56 @@ def _patch_pretrained_class(): model_dir = pretrained_model_name_or_path return model_dir - def patch_tokenizer_base(): - """ Monkey patch PreTrainedTokenizerBase.from_pretrained to adapt to modelscope hub. - """ - ori_from_pretrained = PreTrainedTokenizerBase.from_pretrained.__func__ + ignore_file_pattern = [ + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt' + ] - @classmethod - def from_pretrained(cls, pretrained_model_name_or_path, *model_args, - **kwargs): - ignore_file_pattern = [ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt' - ] - model_dir = get_model_dir(pretrained_model_name_or_path, - ignore_file_pattern, **kwargs) - return ori_from_pretrained(cls, model_dir, *model_args, **kwargs) + def patch_pretrained_model_name_or_path(cls, pretrained_model_name_or_path, + *model_args, **kwargs): + model_dir = get_model_dir(pretrained_model_name_or_path, + kwargs.pop('ignore_file_pattern', None), + **kwargs) + return kwargs.pop('ori_func')(cls, model_dir, *model_args, **kwargs) - PreTrainedTokenizerBase.from_pretrained = from_pretrained + PreTrainedTokenizerBase.from_pretrained = partial( + patch_pretrained_model_name_or_path, + ori_func=PreTrainedTokenizerBase.from_pretrained, + ignore_file_pattern=ignore_file_pattern) + PretrainedConfig.from_pretrained = partial( + patch_pretrained_model_name_or_path, + ori_func=PretrainedConfig.from_pretrained, + ignore_file_pattern=ignore_file_pattern) + PretrainedConfig.get_config_dict = partial( + patch_pretrained_model_name_or_path, + ori_func=PretrainedConfig.get_config_dict, + ignore_file_pattern=ignore_file_pattern) + if PeftConfigHF is not None: + PeftConfigHF.from_pretrained = partial( + patch_pretrained_model_name_or_path, + ori_func=PeftConfigHF.from_pretrained, + ignore_file_pattern=ignore_file_pattern) + PreTrainedModel.from_pretrained = partial( + patch_pretrained_model_name_or_path, + ori_func=PreTrainedModel.from_pretrained) + AutoImageProcessorHF.from_pretrained = partial( + patch_pretrained_model_name_or_path, + ori_func=PreTrainedModel.from_pretrained) + AutoProcessorHF.from_pretrained = partial( + patch_pretrained_model_name_or_path, + ori_func=AutoProcessorHF.from_pretrained) + AutoFeatureExtractorHF.from_pretrained = partial( + patch_pretrained_model_name_or_path, + ori_func=AutoFeatureExtractorHF.from_pretrained) - def patch_config_base(): - """ Monkey patch PretrainedConfig.from_pretrained to adapt to modelscope hub. - """ - ori_from_pretrained = PretrainedConfig.from_pretrained.__func__ - ori_get_config_dict = PretrainedConfig.get_config_dict.__func__ + def _get_peft_type(cls, model_id, **kwargs): + model_dir = get_model_dir(model_id, ignore_file_pattern, **kwargs) + return kwargs.pop('ori_func')(cls, model_dir, **kwargs) - @classmethod - def from_pretrained(cls, pretrained_model_name_or_path, *model_args, - **kwargs): - ignore_file_pattern = [ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt' - ] - model_dir = get_model_dir(pretrained_model_name_or_path, - ignore_file_pattern, **kwargs) - return ori_from_pretrained(cls, model_dir, *model_args, **kwargs) - - @classmethod - def get_config_dict(cls, pretrained_model_name_or_path, **kwargs): - ignore_file_pattern = [ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt' - ] - model_dir = get_model_dir(pretrained_model_name_or_path, - ignore_file_pattern, **kwargs) - return ori_get_config_dict(cls, model_dir, **kwargs) - - PretrainedConfig.from_pretrained = from_pretrained - PretrainedConfig.get_config_dict = get_config_dict - - def patch_model_base(): - """ Monkey patch PreTrainedModel.from_pretrained to adapt to modelscope hub. - """ - ori_from_pretrained = PreTrainedModel.from_pretrained.__func__ - - @classmethod - def from_pretrained(cls, pretrained_model_name_or_path, *model_args, - **kwargs): - model_dir = get_model_dir(pretrained_model_name_or_path, None, - **kwargs) - return ori_from_pretrained(cls, model_dir, *model_args, **kwargs) - - PreTrainedModel.from_pretrained = from_pretrained - - def patch_image_processor_base(): - """ Monkey patch AutoImageProcessorHF.from_pretrained to adapt to modelscope hub. - """ - ori_from_pretrained = AutoImageProcessorHF.from_pretrained.__func__ - - @classmethod - def from_pretrained(cls, pretrained_model_name_or_path, *model_args, - **kwargs): - model_dir = get_model_dir(pretrained_model_name_or_path, None, - **kwargs) - return ori_from_pretrained(cls, model_dir, *model_args, **kwargs) - - AutoImageProcessorHF.from_pretrained = from_pretrained - - def patch_auto_processor_base(): - """ Monkey patch AutoProcessorHF.from_pretrained to adapt to modelscope hub. - """ - ori_from_pretrained = AutoProcessorHF.from_pretrained.__func__ - - @classmethod - def from_pretrained(cls, pretrained_model_name_or_path, *model_args, - **kwargs): - model_dir = get_model_dir(pretrained_model_name_or_path, None, - **kwargs) - return ori_from_pretrained(cls, model_dir, *model_args, **kwargs) - - AutoProcessorHF.from_pretrained = from_pretrained - - def patch_feature_extractor_base(): - """ Monkey patch AutoFeatureExtractorHF.from_pretrained to adapt to modelscope hub. - """ - ori_from_pretrained = AutoFeatureExtractorHF.from_pretrained.__func__ - - @classmethod - def from_pretrained(cls, pretrained_model_name_or_path, *model_args, - **kwargs): - model_dir = get_model_dir(pretrained_model_name_or_path, None, - **kwargs) - return ori_from_pretrained(cls, model_dir, *model_args, **kwargs) - - AutoFeatureExtractorHF.from_pretrained = from_pretrained - - patch_tokenizer_base() - patch_config_base() - patch_model_base() - patch_image_processor_base() - patch_auto_processor_base() - patch_feature_extractor_base() + if PeftConfigHF is not None: + PeftConfigHF._get_peft_type = partial( + _get_peft_type, + ori_func=PeftConfigHF._get_peft_type, + ignore_file_pattern=ignore_file_pattern) def patch_hub(): @@ -375,10 +332,9 @@ def patch_hub(): commit_description: Optional[str] = None, **kwargs, ): - from modelscope.hub.push_to_hub import push_files_to_hub - push_files_to_hub(path_or_fileobj, path_in_repo, repo_id, token, revision, commit_message, commit_description) - + push_files_to_hub(path_or_fileobj, path_in_repo, repo_id, token, + revision, commit_message, commit_description) huggingface_hub.create_repo = create_repo huggingface_hub.upload_folder = partial(upload_folder, api) @@ -467,12 +423,14 @@ AutoModelForZeroShotImageClassification = get_wrapped_class( AutoModelForZeroShotImageClassificationHF) try: from transformers import AutoModelForImageToImage as AutoModelForImageToImageHF + AutoModelForImageToImage = get_wrapped_class(AutoModelForImageToImageHF) except ImportError: AutoModelForImageToImage = UnsupportedAutoClass('AutoModelForImageToImage') try: from transformers import AutoModelForImageTextToText as AutoModelForImageTextToTextHF + AutoModelForImageTextToText = get_wrapped_class( AutoModelForImageTextToTextHF) except ImportError: @@ -481,6 +439,7 @@ except ImportError: try: from transformers import AutoModelForKeypointDetection as AutoModelForKeypointDetectionHF + AutoModelForKeypointDetection = get_wrapped_class( AutoModelForKeypointDetectionHF) except ImportError: @@ -517,6 +476,7 @@ T5EncoderModel = get_wrapped_class(T5EncoderModelHF) try: from transformers import \ Qwen2VLForConditionalGeneration as Qwen2VLForConditionalGenerationHF + Qwen2VLForConditionalGeneration = get_wrapped_class( Qwen2VLForConditionalGenerationHF) except ImportError: From 9f7484bb82de6eaa8aa64d134a780a82e2ea377a Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Sun, 29 Dec 2024 13:21:01 +0800 Subject: [PATCH 08/36] wip --- modelscope/hub/check_model.py | 7 +--- modelscope/hub/create_model.py | 4 +- modelscope/utils/hf_util.py | 69 +++++++++++++++++++++++----------- 3 files changed, 51 insertions(+), 29 deletions(-) diff --git a/modelscope/hub/check_model.py b/modelscope/hub/check_model.py index 2cbfc5ef..7d094779 100644 --- a/modelscope/hub/check_model.py +++ b/modelscope/hub/check_model.py @@ -99,15 +99,12 @@ def check_local_model_is_latest( pass # ignore -def check_model_is_id(model_id: str, token=None): - if token is None: - token = os.environ.get('MODELSCOPE_API_TOKEN') +def check_model_is_id(model_id: str, token: Optional[str] = None): if model_id is None or os.path.exists(model_id): return False else: _api = HubApi() - if token is not None: - _api.login(token) + _api.try_login(token) try: _api.get_model(model_id=model_id, ) return True diff --git a/modelscope/hub/create_model.py b/modelscope/hub/create_model.py index 93ee248d..b1811acc 100644 --- a/modelscope/hub/create_model.py +++ b/modelscope/hub/create_model.py @@ -21,7 +21,7 @@ def create_model_repo(repo_id: str, Args: repo_id(str): The repo id token(str, Optional): The access token of the user - private(bool): If is a private repo + private(bool): If is a private repo, default False config_json(Dict[str, Any]): An optional config_json to fill into the configuration.json file, If None, the default content will be uploaded: ```json @@ -30,7 +30,7 @@ def create_model_repo(repo_id: str, You can manually modify this in the modelhub. """ api = HubApi() - assert repo_id is not None, 'Please enter a valid hub_model_id' + assert repo_id is not None, 'Please enter a valid repo id' api.try_login(token) visibility = ModelVisibility.PRIVATE if private else ModelVisibility.PUBLIC if '/' not in repo_id: diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py index f1861473..859f1db5 100644 --- a/modelscope/utils/hf_util.py +++ b/modelscope/utils/hf_util.py @@ -118,15 +118,6 @@ def user_agent(invoked_by=None): return uagent -def _try_login(token: Optional[str] = None): - from modelscope.hub.api import HubApi - api = HubApi() - if token is None: - token = os.environ.get('MODELSCOPE_API_TOKEN') - if token: - api.login(token) - - def _file_exists( self, repo_id: str, @@ -141,9 +132,9 @@ def _file_exists( logger.warning( 'The passed in repo_type will not be used in modelscope. Now only model repo can be queried.' ) - _try_login(token) from modelscope.hub.api import HubApi api = HubApi() + api.try_login(token) return api.file_exists(repo_id, filename, revision=revision) @@ -171,7 +162,9 @@ def _file_download(repo_id: str, from modelscope.hub.file_download import model_file_download as file_download else: from modelscope.hub.file_download import dataset_file_download as file_download - _try_login(token) + from modelscope import HubApi + api = HubApi() + api.try_login(token) return file_download( repo_id, file_path=os.path.join(subfolder, filename) if subfolder else filename, @@ -223,11 +216,6 @@ def _patch_pretrained_class(): patch_pretrained_model_name_or_path, ori_func=PretrainedConfig.get_config_dict, ignore_file_pattern=ignore_file_pattern) - if PeftConfigHF is not None: - PeftConfigHF.from_pretrained = partial( - patch_pretrained_model_name_or_path, - ori_func=PeftConfigHF.from_pretrained, - ignore_file_pattern=ignore_file_pattern) PreTrainedModel.from_pretrained = partial( patch_pretrained_model_name_or_path, ori_func=PreTrainedModel.from_pretrained) @@ -240,6 +228,31 @@ def _patch_pretrained_class(): AutoFeatureExtractorHF.from_pretrained = partial( patch_pretrained_model_name_or_path, ori_func=AutoFeatureExtractorHF.from_pretrained) + if PeftConfigHF is not None: + PeftConfigHF.from_pretrained = partial( + patch_pretrained_model_name_or_path, + ori_func=PeftConfigHF.from_pretrained, + ignore_file_pattern=ignore_file_pattern) + + def patch_peft_model_id(cls, model, model_id, *model_args, **kwargs): + model_dir = get_model_dir(model_id, + kwargs.pop('ignore_file_pattern', None), + **kwargs) + return kwargs.pop('ori_func')(cls, model, model_dir, *model_args, **kwargs) + + if PeftModelHF is not None: + PeftModelHF.from_pretrained = partial( + patch_peft_model_id, + ori_func=PeftModelHF.from_pretrained) + PeftModelForCausalLMHF.from_pretrained = partial( + patch_peft_model_id, + ori_func=PeftModelForCausalLMHF.from_pretrained) + PeftModelForSequenceClassificationHF.from_pretrained = partial( + patch_peft_model_id, + ori_func=PeftModelForSequenceClassificationHF.from_pretrained) + PeftMixedModelHF.from_pretrained = partial( + patch_peft_model_id, + ori_func=PeftMixedModelHF.from_pretrained) def _get_peft_type(cls, model_id, **kwargs): model_dir = get_model_dir(model_id, ignore_file_pattern, **kwargs) @@ -259,21 +272,26 @@ def patch_hub(): from huggingface_hub import hf_api from huggingface_hub.hf_api import api + # Patch hf_hub_download huggingface_hub.hf_hub_download = _file_download huggingface_hub.file_download.hf_hub_download = _file_download + # Patch file_exists hf_api.file_exists = MethodType(_file_exists, api) huggingface_hub.file_exists = hf_api.file_exists huggingface_hub.hf_api.file_exists = hf_api.file_exists + # Patch whoami hf_api.whoami = MethodType(_whoami, api) huggingface_hub.whoami = hf_api.whoami huggingface_hub.hf_api.whoami = hf_api.whoami + # Patch repocard.validate from huggingface_hub import repocard repocard.RepoCard.validate = lambda *args, **kwargs: None - def create_repo(repo_id: str, + def create_repo(self, + repo_id: str, *, token: Union[str, bool, None] = None, private: bool = False, @@ -336,17 +354,24 @@ def patch_hub(): push_files_to_hub(path_or_fileobj, path_in_repo, repo_id, token, revision, commit_message, commit_description) - huggingface_hub.create_repo = create_repo - huggingface_hub.upload_folder = partial(upload_folder, api) + # Patch create_repo + from transformers.utils import hub + hf_api.create_repo = MethodType(create_repo, api) + huggingface_hub.create_repo = hf_api.create_repo + huggingface_hub.hf_api.create_repo = hf_api.create_repo + hub.create_repo = create_repo + # Patch upload_folder + hf_api.upload_folder = MethodType(upload_folder, api) + huggingface_hub.upload_folder = hf_api.upload_folder + huggingface_hub.hf_api.upload_folder = hf_api.upload_folder + + # Patch upload_file hf_api.upload_file = MethodType(upload_file, api) huggingface_hub.upload_file = hf_api.upload_file huggingface_hub.hf_api.upload_file = hf_api.upload_file repocard.upload_file = hf_api.upload_file - from transformers.utils import hub - hub.create_repo = create_repo - _patch_pretrained_class() From 225d1058e375aeecffb38e6df35e1943978f7af1 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Sun, 29 Dec 2024 13:32:23 +0800 Subject: [PATCH 09/36] lint --- modelscope/hub/api.py | 4 +--- modelscope/utils/hf_util.py | 9 ++++----- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index 0e4b4cde..14d22874 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -487,9 +487,7 @@ class HubApi: raise_for_http_status(r) return None - def _check_cookie(self, - use_cookies: Union[bool, - CookieJar] = False) -> CookieJar: + def _check_cookie(self, use_cookies: Union[bool, CookieJar] = False) -> CookieJar: # noqa cookies = None if isinstance(use_cookies, CookieJar): cookies = use_cookies diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py index 859f1db5..3ef131cc 100644 --- a/modelscope/utils/hf_util.py +++ b/modelscope/utils/hf_util.py @@ -238,12 +238,12 @@ def _patch_pretrained_class(): model_dir = get_model_dir(model_id, kwargs.pop('ignore_file_pattern', None), **kwargs) - return kwargs.pop('ori_func')(cls, model, model_dir, *model_args, **kwargs) + return kwargs.pop('ori_func')(cls, model, model_dir, *model_args, + **kwargs) if PeftModelHF is not None: PeftModelHF.from_pretrained = partial( - patch_peft_model_id, - ori_func=PeftModelHF.from_pretrained) + patch_peft_model_id, ori_func=PeftModelHF.from_pretrained) PeftModelForCausalLMHF.from_pretrained = partial( patch_peft_model_id, ori_func=PeftModelForCausalLMHF.from_pretrained) @@ -251,8 +251,7 @@ def _patch_pretrained_class(): patch_peft_model_id, ori_func=PeftModelForSequenceClassificationHF.from_pretrained) PeftMixedModelHF.from_pretrained = partial( - patch_peft_model_id, - ori_func=PeftMixedModelHF.from_pretrained) + patch_peft_model_id, ori_func=PeftMixedModelHF.from_pretrained) def _get_peft_type(cls, model_id, **kwargs): model_dir = get_model_dir(model_id, ignore_file_pattern, **kwargs) From c42a0aca1f28527fe7c9893e15c5c776dc695ebf Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Sun, 29 Dec 2024 14:41:36 +0800 Subject: [PATCH 10/36] wip --- modelscope/utils/hf_util.py | 543 ------------------------- modelscope/utils/hf_util/__init__.py | 0 modelscope/utils/hf_util/auto_class.py | 187 +++++++++ modelscope/utils/hf_util/patcher.py | 268 ++++++++++++ tests/hub/test_patch_hf.py | 19 + 5 files changed, 474 insertions(+), 543 deletions(-) delete mode 100644 modelscope/utils/hf_util.py create mode 100644 modelscope/utils/hf_util/__init__.py create mode 100644 modelscope/utils/hf_util/auto_class.py create mode 100644 modelscope/utils/hf_util/patcher.py create mode 100644 tests/hub/test_patch_hf.py diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py deleted file mode 100644 index 3ef131cc..00000000 --- a/modelscope/utils/hf_util.py +++ /dev/null @@ -1,543 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import os -from functools import partial -from pathlib import Path -from types import MethodType -from typing import BinaryIO, Dict, List, Optional, Union - -from huggingface_hub.hf_api import CommitInfo, future_compatible -from transformers import AutoConfig as AutoConfigHF -from transformers import AutoFeatureExtractor as AutoFeatureExtractorHF -from transformers import AutoImageProcessor as AutoImageProcessorHF -from transformers import AutoModel as AutoModelHF -from transformers import \ - AutoModelForAudioClassification as AutoModelForAudioClassificationHF -from transformers import AutoModelForCausalLM as AutoModelForCausalLMHF -from transformers import \ - AutoModelForDocumentQuestionAnswering as \ - AutoModelForDocumentQuestionAnsweringHF -from transformers import \ - AutoModelForImageClassification as AutoModelForImageClassificationHF -from transformers import \ - AutoModelForImageSegmentation as AutoModelForImageSegmentationHF -from transformers import \ - AutoModelForInstanceSegmentation as AutoModelForInstanceSegmentationHF -from transformers import \ - AutoModelForMaskedImageModeling as AutoModelForMaskedImageModelingHF -from transformers import AutoModelForMaskedLM as AutoModelForMaskedLMHF -from transformers import \ - AutoModelForMaskGeneration as AutoModelForMaskGenerationHF -from transformers import \ - AutoModelForObjectDetection as AutoModelForObjectDetectionHF -from transformers import AutoModelForPreTraining as AutoModelForPreTrainingHF -from transformers import \ - AutoModelForQuestionAnswering as AutoModelForQuestionAnsweringHF -from transformers import \ - AutoModelForSemanticSegmentation as AutoModelForSemanticSegmentationHF -from transformers import AutoModelForSeq2SeqLM as AutoModelForSeq2SeqLMHF -from transformers import \ - AutoModelForSequenceClassification as AutoModelForSequenceClassificationHF -from transformers import \ - AutoModelForSpeechSeq2Seq as AutoModelForSpeechSeq2SeqHF -from transformers import \ - AutoModelForTableQuestionAnswering as AutoModelForTableQuestionAnsweringHF -from transformers import AutoModelForTextEncoding as AutoModelForTextEncodingHF -from transformers import \ - AutoModelForTokenClassification as AutoModelForTokenClassificationHF -from transformers import \ - AutoModelForUniversalSegmentation as AutoModelForUniversalSegmentationHF -from transformers import AutoModelForVision2Seq as AutoModelForVision2SeqHF -from transformers import \ - AutoModelForVisualQuestionAnswering as \ - AutoModelForVisualQuestionAnsweringHF -from transformers import \ - AutoModelForZeroShotImageClassification as \ - AutoModelForZeroShotImageClassificationHF -from transformers import \ - AutoModelForZeroShotObjectDetection as \ - AutoModelForZeroShotObjectDetectionHF -from transformers import AutoProcessor as AutoProcessorHF -from transformers import AutoTokenizer as AutoTokenizerHF -from transformers import BatchFeature as BatchFeatureHF -from transformers import BitsAndBytesConfig as BitsAndBytesConfigHF -from transformers import GenerationConfig as GenerationConfigHF -from transformers import (PretrainedConfig, PreTrainedModel, - PreTrainedTokenizerBase) -from transformers import T5EncoderModel as T5EncoderModelHF -from transformers import __version__ as transformers_version - -from modelscope import snapshot_download -from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke -from .logger import get_logger - -try: - from transformers import GPTQConfig as GPTQConfigHF - from transformers import AwqConfig as AwqConfigHF -except ImportError: - GPTQConfigHF = None - AwqConfigHF = None - -try: - from peft import ( - PeftConfig as PeftConfigHF, - PeftModel as PeftModelHF, - PeftModelForCausalLM as PeftModelForCausalLMHF, - PeftModelForSequenceClassification as - PeftModelForSequenceClassificationHF, - PeftMixedModel as PeftMixedModelHF, - ) -except ImportError: - PeftConfigHF = None - PeftModelHF = None - PeftModelForCausalLMHF = None - PeftModelForSequenceClassificationHF = None - PeftMixedModelHF = None - -logger = get_logger() - - -class UnsupportedAutoClass: - - def __init__(self, name: str): - self.error_msg = \ - f'{name} is not supported with your installed Transformers version {transformers_version}. ' + \ - 'Please update your Transformers by "pip install transformers -U".' - - def from_pretrained(self, pretrained_model_name_or_path, *model_args, - **kwargs): - raise ImportError(self.error_msg) - - def from_config(self, cls, config): - raise ImportError(self.error_msg) - - -def user_agent(invoked_by=None): - if invoked_by is None: - invoked_by = Invoke.PRETRAINED - uagent = '%s/%s' % (Invoke.KEY, invoked_by) - return uagent - - -def _file_exists( - self, - repo_id: str, - filename: str, - *, - repo_type: Optional[str] = None, - revision: Optional[str] = None, - token: Union[str, bool, None] = None, -): - """Patch huggingface_hub.file_exists""" - if repo_type is not None: - logger.warning( - 'The passed in repo_type will not be used in modelscope. Now only model repo can be queried.' - ) - from modelscope.hub.api import HubApi - api = HubApi() - api.try_login(token) - return api.file_exists(repo_id, filename, revision=revision) - - -def _file_download(repo_id: str, - filename: str, - *, - subfolder: Optional[str] = None, - repo_type: Optional[str] = None, - revision: Optional[str] = None, - cache_dir: Union[str, Path, None] = None, - local_dir: Union[str, Path, None] = None, - token: Union[bool, str, None] = None, - local_files_only: bool = False, - **kwargs): - """Patch huggingface_hub.hf_hub_download""" - if len(kwargs) > 0: - logger.warning( - 'The passed in library_name,library_version,user_agent,force_download,proxies' - 'etag_timeout,headers,endpoint ' - 'will not be used in modelscope.') - assert repo_type in ( - None, 'model', - 'dataset'), f'repo_type={repo_type} is not supported in ModelScope' - if repo_type in (None, 'model'): - from modelscope.hub.file_download import model_file_download as file_download - else: - from modelscope.hub.file_download import dataset_file_download as file_download - from modelscope import HubApi - api = HubApi() - api.try_login(token) - return file_download( - repo_id, - file_path=os.path.join(subfolder, filename) if subfolder else filename, - cache_dir=cache_dir, - local_dir=local_dir, - local_files_only=local_files_only, - revision=revision) - - -def _whoami(self, token: Union[bool, str, None] = None) -> Dict: - from modelscope.hub.api import ModelScopeConfig - return {'name': ModelScopeConfig.get_user_info()[0] or 'unknown'} - - -def _patch_pretrained_class(): - - def get_model_dir(pretrained_model_name_or_path, ignore_file_pattern, - **kwargs): - if not os.path.exists(pretrained_model_name_or_path): - revision = kwargs.pop('revision', None) - model_dir = snapshot_download( - pretrained_model_name_or_path, - revision=revision, - ignore_file_pattern=ignore_file_pattern) - else: - model_dir = pretrained_model_name_or_path - return model_dir - - ignore_file_pattern = [ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt' - ] - - def patch_pretrained_model_name_or_path(cls, pretrained_model_name_or_path, - *model_args, **kwargs): - model_dir = get_model_dir(pretrained_model_name_or_path, - kwargs.pop('ignore_file_pattern', None), - **kwargs) - return kwargs.pop('ori_func')(cls, model_dir, *model_args, **kwargs) - - PreTrainedTokenizerBase.from_pretrained = partial( - patch_pretrained_model_name_or_path, - ori_func=PreTrainedTokenizerBase.from_pretrained, - ignore_file_pattern=ignore_file_pattern) - PretrainedConfig.from_pretrained = partial( - patch_pretrained_model_name_or_path, - ori_func=PretrainedConfig.from_pretrained, - ignore_file_pattern=ignore_file_pattern) - PretrainedConfig.get_config_dict = partial( - patch_pretrained_model_name_or_path, - ori_func=PretrainedConfig.get_config_dict, - ignore_file_pattern=ignore_file_pattern) - PreTrainedModel.from_pretrained = partial( - patch_pretrained_model_name_or_path, - ori_func=PreTrainedModel.from_pretrained) - AutoImageProcessorHF.from_pretrained = partial( - patch_pretrained_model_name_or_path, - ori_func=PreTrainedModel.from_pretrained) - AutoProcessorHF.from_pretrained = partial( - patch_pretrained_model_name_or_path, - ori_func=AutoProcessorHF.from_pretrained) - AutoFeatureExtractorHF.from_pretrained = partial( - patch_pretrained_model_name_or_path, - ori_func=AutoFeatureExtractorHF.from_pretrained) - if PeftConfigHF is not None: - PeftConfigHF.from_pretrained = partial( - patch_pretrained_model_name_or_path, - ori_func=PeftConfigHF.from_pretrained, - ignore_file_pattern=ignore_file_pattern) - - def patch_peft_model_id(cls, model, model_id, *model_args, **kwargs): - model_dir = get_model_dir(model_id, - kwargs.pop('ignore_file_pattern', None), - **kwargs) - return kwargs.pop('ori_func')(cls, model, model_dir, *model_args, - **kwargs) - - if PeftModelHF is not None: - PeftModelHF.from_pretrained = partial( - patch_peft_model_id, ori_func=PeftModelHF.from_pretrained) - PeftModelForCausalLMHF.from_pretrained = partial( - patch_peft_model_id, - ori_func=PeftModelForCausalLMHF.from_pretrained) - PeftModelForSequenceClassificationHF.from_pretrained = partial( - patch_peft_model_id, - ori_func=PeftModelForSequenceClassificationHF.from_pretrained) - PeftMixedModelHF.from_pretrained = partial( - patch_peft_model_id, ori_func=PeftMixedModelHF.from_pretrained) - - def _get_peft_type(cls, model_id, **kwargs): - model_dir = get_model_dir(model_id, ignore_file_pattern, **kwargs) - return kwargs.pop('ori_func')(cls, model_dir, **kwargs) - - if PeftConfigHF is not None: - PeftConfigHF._get_peft_type = partial( - _get_peft_type, - ori_func=PeftConfigHF._get_peft_type, - ignore_file_pattern=ignore_file_pattern) - - -def patch_hub(): - """Patch hf hub, which to make users can download models from modelscope to speed up. - """ - import huggingface_hub - from huggingface_hub import hf_api - from huggingface_hub.hf_api import api - - # Patch hf_hub_download - huggingface_hub.hf_hub_download = _file_download - huggingface_hub.file_download.hf_hub_download = _file_download - - # Patch file_exists - hf_api.file_exists = MethodType(_file_exists, api) - huggingface_hub.file_exists = hf_api.file_exists - huggingface_hub.hf_api.file_exists = hf_api.file_exists - - # Patch whoami - hf_api.whoami = MethodType(_whoami, api) - huggingface_hub.whoami = hf_api.whoami - huggingface_hub.hf_api.whoami = hf_api.whoami - - # Patch repocard.validate - from huggingface_hub import repocard - repocard.RepoCard.validate = lambda *args, **kwargs: None - - def create_repo(self, - repo_id: str, - *, - token: Union[str, bool, None] = None, - private: bool = False, - **kwargs) -> 'RepoUrl': - """ - Create a new repository on the hub. - - Args: - repo_id: The ID of the repository to create. - token: The authentication token to use. - private: Whether the repository should be private. - **kwargs: Additional arguments. - - Returns: - RepoUrl: The URL of the created repository. - """ - from modelscope.hub.create_model import create_model_repo - hub_model_id = create_model_repo(repo_id, token, private) - from huggingface_hub import RepoUrl - return RepoUrl(url=hub_model_id, ) - - @future_compatible - def upload_folder( - *, - repo_id: str, - folder_path: Union[str, Path], - path_in_repo: Optional[str] = None, - commit_message: Optional[str] = None, - commit_description: Optional[str] = None, - token: Union[str, bool, None] = None, - revision: Optional[str] = 'master', - ignore_patterns: Optional[Union[List[str], str]] = None, - **kwargs, - ): - from modelscope.hub.push_to_hub import push_model_to_hub - push_model_to_hub(repo_id, folder_path, path_in_repo, commit_message, - commit_description, token, True, revision, - ignore_patterns) - return CommitInfo( - commit_url=f'https://www.modelscope.cn/models/{repo_id}/files', - commit_message=commit_message, - commit_description=commit_description, - oid=None, - ) - - @future_compatible - def upload_file( - self, - *, - path_or_fileobj: Union[str, Path, bytes, BinaryIO], - path_in_repo: str, - repo_id: str, - token: Union[str, bool, None] = None, - revision: Optional[str] = None, - commit_message: Optional[str] = None, - commit_description: Optional[str] = None, - **kwargs, - ): - from modelscope.hub.push_to_hub import push_files_to_hub - push_files_to_hub(path_or_fileobj, path_in_repo, repo_id, token, - revision, commit_message, commit_description) - - # Patch create_repo - from transformers.utils import hub - hf_api.create_repo = MethodType(create_repo, api) - huggingface_hub.create_repo = hf_api.create_repo - huggingface_hub.hf_api.create_repo = hf_api.create_repo - hub.create_repo = create_repo - - # Patch upload_folder - hf_api.upload_folder = MethodType(upload_folder, api) - huggingface_hub.upload_folder = hf_api.upload_folder - huggingface_hub.hf_api.upload_folder = hf_api.upload_folder - - # Patch upload_file - hf_api.upload_file = MethodType(upload_file, api) - huggingface_hub.upload_file = hf_api.upload_file - huggingface_hub.hf_api.upload_file = hf_api.upload_file - repocard.upload_file = hf_api.upload_file - - _patch_pretrained_class() - - -def get_wrapped_class(module_class, - ignore_file_pattern=[], - file_filter=None, - **kwargs): - """Get a custom wrapper class for auto classes to download the models from the ModelScope hub - Args: - module_class: The actual module class - ignore_file_pattern (`str` or `List`, *optional*, default to `None`): - Any file pattern to be ignored in downloading, like exact file names or file extensions. - Returns: - The wrapper - """ - default_ignore_file_pattern = ignore_file_pattern - default_file_filter = file_filter - - class ClassWrapper(module_class): - - @classmethod - def from_pretrained(cls, pretrained_model_name_or_path, *model_args, - **kwargs): - ignore_file_pattern = kwargs.pop('ignore_file_pattern', - default_ignore_file_pattern) - subfolder = kwargs.pop('subfolder', default_file_filter) - file_filter = None - if subfolder: - file_filter = f'{subfolder}/*' - if not os.path.exists(pretrained_model_name_or_path): - revision = kwargs.pop('revision', DEFAULT_MODEL_REVISION) - if file_filter is None: - model_dir = snapshot_download( - pretrained_model_name_or_path, - revision=revision, - ignore_file_pattern=ignore_file_pattern, - user_agent=user_agent()) - else: - model_dir = os.path.join( - snapshot_download( - pretrained_model_name_or_path, - revision=revision, - ignore_file_pattern=ignore_file_pattern, - allow_file_pattern=file_filter, - user_agent=user_agent()), subfolder) - else: - model_dir = pretrained_model_name_or_path - - module_obj = module_class.from_pretrained(model_dir, *model_args, - **kwargs) - - if module_class.__name__.startswith('AutoModel'): - module_obj.model_dir = model_dir - return module_obj - - ClassWrapper.__name__ = module_class.__name__ - ClassWrapper.__qualname__ = module_class.__qualname__ - return ClassWrapper - - -AutoModel = get_wrapped_class(AutoModelHF) -AutoModelForCausalLM = get_wrapped_class(AutoModelForCausalLMHF) -AutoModelForSeq2SeqLM = get_wrapped_class(AutoModelForSeq2SeqLMHF) -AutoModelForVision2Seq = get_wrapped_class(AutoModelForVision2SeqHF) -AutoModelForSequenceClassification = get_wrapped_class( - AutoModelForSequenceClassificationHF) -AutoModelForTokenClassification = get_wrapped_class( - AutoModelForTokenClassificationHF) -AutoModelForImageSegmentation = get_wrapped_class( - AutoModelForImageSegmentationHF) -AutoModelForImageClassification = get_wrapped_class( - AutoModelForImageClassificationHF) -AutoModelForZeroShotImageClassification = get_wrapped_class( - AutoModelForZeroShotImageClassificationHF) -try: - from transformers import AutoModelForImageToImage as AutoModelForImageToImageHF - - AutoModelForImageToImage = get_wrapped_class(AutoModelForImageToImageHF) -except ImportError: - AutoModelForImageToImage = UnsupportedAutoClass('AutoModelForImageToImage') - -try: - from transformers import AutoModelForImageTextToText as AutoModelForImageTextToTextHF - - AutoModelForImageTextToText = get_wrapped_class( - AutoModelForImageTextToTextHF) -except ImportError: - AutoModelForImageTextToText = UnsupportedAutoClass( - 'AutoModelForImageTextToText') - -try: - from transformers import AutoModelForKeypointDetection as AutoModelForKeypointDetectionHF - - AutoModelForKeypointDetection = get_wrapped_class( - AutoModelForKeypointDetectionHF) -except ImportError: - AutoModelForKeypointDetection = UnsupportedAutoClass( - 'AutoModelForKeypointDetection') - -AutoModelForQuestionAnswering = get_wrapped_class( - AutoModelForQuestionAnsweringHF) -AutoModelForTableQuestionAnswering = get_wrapped_class( - AutoModelForTableQuestionAnsweringHF) -AutoModelForVisualQuestionAnswering = get_wrapped_class( - AutoModelForVisualQuestionAnsweringHF) -AutoModelForDocumentQuestionAnswering = get_wrapped_class( - AutoModelForDocumentQuestionAnsweringHF) -AutoModelForSemanticSegmentation = get_wrapped_class( - AutoModelForSemanticSegmentationHF) -AutoModelForUniversalSegmentation = get_wrapped_class( - AutoModelForUniversalSegmentationHF) -AutoModelForInstanceSegmentation = get_wrapped_class( - AutoModelForInstanceSegmentationHF) -AutoModelForObjectDetection = get_wrapped_class(AutoModelForObjectDetectionHF) -AutoModelForZeroShotObjectDetection = get_wrapped_class( - AutoModelForZeroShotObjectDetectionHF) -AutoModelForAudioClassification = get_wrapped_class( - AutoModelForAudioClassificationHF) -AutoModelForSpeechSeq2Seq = get_wrapped_class(AutoModelForSpeechSeq2SeqHF) -AutoModelForMaskedImageModeling = get_wrapped_class( - AutoModelForMaskedImageModelingHF) -AutoModelForMaskedLM = get_wrapped_class(AutoModelForMaskedLMHF) -AutoModelForMaskGeneration = get_wrapped_class(AutoModelForMaskGenerationHF) -AutoModelForPreTraining = get_wrapped_class(AutoModelForPreTrainingHF) -AutoModelForTextEncoding = get_wrapped_class(AutoModelForTextEncodingHF) -T5EncoderModel = get_wrapped_class(T5EncoderModelHF) -try: - from transformers import \ - Qwen2VLForConditionalGeneration as Qwen2VLForConditionalGenerationHF - - Qwen2VLForConditionalGeneration = get_wrapped_class( - Qwen2VLForConditionalGenerationHF) -except ImportError: - Qwen2VLForConditionalGeneration = UnsupportedAutoClass( - 'Qwen2VLForConditionalGeneration') - -AutoTokenizer = get_wrapped_class( - AutoTokenizerHF, - ignore_file_pattern=[ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' - ]) -AutoProcessor = get_wrapped_class( - AutoProcessorHF, - ignore_file_pattern=[ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' - ]) -AutoConfig = get_wrapped_class( - AutoConfigHF, - ignore_file_pattern=[ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' - ]) -GenerationConfig = get_wrapped_class( - GenerationConfigHF, - ignore_file_pattern=[ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' - ]) -BitsAndBytesConfig = get_wrapped_class( - BitsAndBytesConfigHF, - ignore_file_pattern=[ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' - ]) -AutoImageProcessor = get_wrapped_class( - AutoImageProcessorHF, - ignore_file_pattern=[ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' - ]) - -GPTQConfig = GPTQConfigHF -AwqConfig = AwqConfigHF -BatchFeature = get_wrapped_class(BatchFeatureHF) diff --git a/modelscope/utils/hf_util/__init__.py b/modelscope/utils/hf_util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modelscope/utils/hf_util/auto_class.py b/modelscope/utils/hf_util/auto_class.py new file mode 100644 index 00000000..e12471a1 --- /dev/null +++ b/modelscope/utils/hf_util/auto_class.py @@ -0,0 +1,187 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import inspect +import os +import sys +from functools import partial +from pathlib import Path +import importlib +from types import MethodType +from typing import BinaryIO, Dict, List, Optional, Union + +from huggingface_hub.hf_api import CommitInfo, future_compatible +from modelscope import snapshot_download +from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke +from modelscope.utils.logger import get_logger + +try: + from transformers import AutoModelForImageToImage as AutoModelForImageToImageHF + + AutoModelForImageToImage = get_wrapped_class(AutoModelForImageToImageHF) +except ImportError: + AutoModelForImageToImage = UnsupportedAutoClass('AutoModelForImageToImage') + +try: + from transformers import AutoModelForImageTextToText as AutoModelForImageTextToTextHF + + AutoModelForImageTextToText = get_wrapped_class( + AutoModelForImageTextToTextHF) +except ImportError: + AutoModelForImageTextToText = UnsupportedAutoClass( + 'AutoModelForImageTextToText') + +try: + from transformers import AutoModelForKeypointDetection as AutoModelForKeypointDetectionHF + + AutoModelForKeypointDetection = get_wrapped_class( + AutoModelForKeypointDetectionHF) +except ImportError: + AutoModelForKeypointDetection = UnsupportedAutoClass( + 'AutoModelForKeypointDetection') + +try: + from transformers import \ + Qwen2VLForConditionalGeneration as Qwen2VLForConditionalGenerationHF + + Qwen2VLForConditionalGeneration = get_wrapped_class( + Qwen2VLForConditionalGenerationHF) +except ImportError: + Qwen2VLForConditionalGeneration = UnsupportedAutoClass( + 'Qwen2VLForConditionalGeneration') + + +logger = get_logger() + + +def get_wrapped_class(module_class, + ignore_file_pattern=[], + file_filter=None, + **kwargs): + """Get a custom wrapper class for auto classes to download the models from the ModelScope hub + Args: + module_class: The actual module class + ignore_file_pattern (`str` or `List`, *optional*, default to `None`): + Any file pattern to be ignored in downloading, like exact file names or file extensions. + Returns: + The wrapper + """ + default_ignore_file_pattern = ignore_file_pattern + default_file_filter = file_filter + + class ClassWrapper(module_class): + + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path, *model_args, + **kwargs): + ignore_file_pattern = kwargs.pop('ignore_file_pattern', + default_ignore_file_pattern) + subfolder = kwargs.pop('subfolder', default_file_filter) + file_filter = None + if subfolder: + file_filter = f'{subfolder}/*' + if not os.path.exists(pretrained_model_name_or_path): + revision = kwargs.pop('revision', DEFAULT_MODEL_REVISION) + if file_filter is None: + model_dir = snapshot_download( + pretrained_model_name_or_path, + revision=revision, + ignore_file_pattern=ignore_file_pattern, + user_agent=user_agent()) + else: + model_dir = os.path.join( + snapshot_download( + pretrained_model_name_or_path, + revision=revision, + ignore_file_pattern=ignore_file_pattern, + allow_file_pattern=file_filter, + user_agent=user_agent()), subfolder) + else: + model_dir = pretrained_model_name_or_path + + module_obj = module_class.from_pretrained(model_dir, *model_args, + **kwargs) + + if module_class.__name__.startswith('AutoModel'): + module_obj.model_dir = model_dir + return module_obj + + ClassWrapper.__name__ = module_class.__name__ + ClassWrapper.__qualname__ = module_class.__qualname__ + return ClassWrapper + + +AutoModel = get_wrapped_class(AutoModelHF) +AutoModelForCausalLM = get_wrapped_class(AutoModelForCausalLMHF) +AutoModelForSeq2SeqLM = get_wrapped_class(AutoModelForSeq2SeqLMHF) +AutoModelForVision2Seq = get_wrapped_class(AutoModelForVision2SeqHF) +AutoModelForSequenceClassification = get_wrapped_class( + AutoModelForSequenceClassificationHF) +AutoModelForTokenClassification = get_wrapped_class( + AutoModelForTokenClassificationHF) +AutoModelForImageSegmentation = get_wrapped_class( + AutoModelForImageSegmentationHF) +AutoModelForImageClassification = get_wrapped_class( + AutoModelForImageClassificationHF) +AutoModelForZeroShotImageClassification = get_wrapped_class( + AutoModelForZeroShotImageClassificationHF) +AutoModelForQuestionAnswering = get_wrapped_class( + AutoModelForQuestionAnsweringHF) +AutoModelForTableQuestionAnswering = get_wrapped_class( + AutoModelForTableQuestionAnsweringHF) +AutoModelForVisualQuestionAnswering = get_wrapped_class( + AutoModelForVisualQuestionAnsweringHF) +AutoModelForDocumentQuestionAnswering = get_wrapped_class( + AutoModelForDocumentQuestionAnsweringHF) +AutoModelForSemanticSegmentation = get_wrapped_class( + AutoModelForSemanticSegmentationHF) +AutoModelForUniversalSegmentation = get_wrapped_class( + AutoModelForUniversalSegmentationHF) +AutoModelForInstanceSegmentation = get_wrapped_class( + AutoModelForInstanceSegmentationHF) +AutoModelForObjectDetection = get_wrapped_class(AutoModelForObjectDetectionHF) +AutoModelForZeroShotObjectDetection = get_wrapped_class( + AutoModelForZeroShotObjectDetectionHF) +AutoModelForAudioClassification = get_wrapped_class( + AutoModelForAudioClassificationHF) +AutoModelForSpeechSeq2Seq = get_wrapped_class(AutoModelForSpeechSeq2SeqHF) +AutoModelForMaskedImageModeling = get_wrapped_class( + AutoModelForMaskedImageModelingHF) +AutoModelForMaskedLM = get_wrapped_class(AutoModelForMaskedLMHF) +AutoModelForMaskGeneration = get_wrapped_class(AutoModelForMaskGenerationHF) +AutoModelForPreTraining = get_wrapped_class(AutoModelForPreTrainingHF) +AutoModelForTextEncoding = get_wrapped_class(AutoModelForTextEncodingHF) +T5EncoderModel = get_wrapped_class(T5EncoderModelHF) + +AutoTokenizer = get_wrapped_class( + AutoTokenizerHF, + ignore_file_pattern=[ + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' + ]) +AutoProcessor = get_wrapped_class( + AutoProcessorHF, + ignore_file_pattern=[ + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' + ]) +AutoConfig = get_wrapped_class( + AutoConfigHF, + ignore_file_pattern=[ + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' + ]) +GenerationConfig = get_wrapped_class( + GenerationConfigHF, + ignore_file_pattern=[ + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' + ]) +BitsAndBytesConfig = get_wrapped_class( + BitsAndBytesConfigHF, + ignore_file_pattern=[ + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' + ]) +AutoImageProcessor = get_wrapped_class( + AutoImageProcessorHF, + ignore_file_pattern=[ + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' + ]) + +GPTQConfig = GPTQConfigHF +AwqConfig = AwqConfigHF +BatchFeature = get_wrapped_class(BatchFeatureHF) diff --git a/modelscope/utils/hf_util/patcher.py b/modelscope/utils/hf_util/patcher.py new file mode 100644 index 00000000..23a0174c --- /dev/null +++ b/modelscope/utils/hf_util/patcher.py @@ -0,0 +1,268 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import inspect +import os +import sys +from functools import partial +from pathlib import Path +import importlib +from types import MethodType +from typing import BinaryIO, Dict, List, Optional, Union + +from huggingface_hub.hf_api import CommitInfo, future_compatible +from modelscope import snapshot_download +from modelscope.utils.constant import Invoke +from modelscope.utils.logger import get_logger + + +logger = get_logger() + + +extra_modules = ['T5'] +lazy_module = sys.modules['transformers'] +all_modules = lazy_module._modules +all_imported_modules = [] +for module in all_modules: + if 'auto' in module.lower() or any(m in module for m in extra_modules): + all_imported_modules.append(importlib.import_module(f'transformers.{module}')) + + +def user_agent(invoked_by=None): + if invoked_by is None: + invoked_by = Invoke.PRETRAINED + uagent = '%s/%s' % (Invoke.KEY, invoked_by) + return uagent + + +def _patch_pretrained_class(): + + def get_model_dir(pretrained_model_name_or_path, ignore_file_pattern, + **kwargs): + if not os.path.exists(pretrained_model_name_or_path): + revision = kwargs.pop('revision', None) + model_dir = snapshot_download( + pretrained_model_name_or_path, + revision=revision, + ignore_file_pattern=ignore_file_pattern) + else: + model_dir = pretrained_model_name_or_path + return model_dir + + ignore_file_pattern = [ + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt' + ] + + def patch_pretrained_model_name_or_path(cls, pretrained_model_name_or_path, + *model_args, **kwargs): + model_dir = get_model_dir(pretrained_model_name_or_path, + kwargs.pop('ignore_file_pattern', None), + **kwargs) + return kwargs.pop('ori_func')(cls, model_dir, *model_args, **kwargs) + + def patch_peft_model_id(cls, model, model_id, *model_args, **kwargs): + model_dir = get_model_dir(model_id, + kwargs.pop('ignore_file_pattern', None), + **kwargs) + return kwargs.pop('ori_func')(cls, model, model_dir, *model_args, + **kwargs) + + def _get_peft_type(cls, model_id, **kwargs): + model_dir = get_model_dir(model_id, ignore_file_pattern, **kwargs) + return kwargs.pop('ori_func')(cls, model_dir, **kwargs) + + for var in all_imported_modules: + if var is None: + continue + name = var.__name__ + need_model = 'model' in name.lower() or 'processor' in name.lower() or 'extractor' in name.lower() + if need_model: + ignore_file_pattern_kwargs = {} + else: + ignore_file_pattern_kwargs = {'ignore_file_pattern': ignore_file_pattern} + + if name.endswith('HF'): + has_from_pretrained = hasattr(var, 'from_pretrained') + has_get_peft_type = hasattr(var, '_get_peft_type') + parameters = inspect.signature(var.from_pretrained).parameters + is_peft = 'model' in parameters and 'model_id' in parameters + if has_from_pretrained: + if not is_peft: + var.from_pretrained = partial(patch_pretrained_model_name_or_path, + ori_func=var.from_pretrained, + **ignore_file_pattern_kwargs) + else: + var.from_pretrained = partial(patch_peft_model_id, + ori_func=var.from_pretrained, + **ignore_file_pattern_kwargs) + if has_get_peft_type: + var._get_peft_type = partial(_get_peft_type, + ori_func=var._get_peft_type, + **ignore_file_pattern_kwargs) + + +def _patch_hub(): + import huggingface_hub + from huggingface_hub import hf_api + from huggingface_hub.hf_api import api + + def _file_exists( + self, + repo_id: str, + filename: str, + *, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + token: Union[str, bool, None] = None, + ): + """Patch huggingface_hub.file_exists""" + if repo_type is not None: + logger.warning( + 'The passed in repo_type will not be used in modelscope. Now only model repo can be queried.' + ) + from modelscope.hub.api import HubApi + api = HubApi() + api.try_login(token) + return api.file_exists(repo_id, filename, revision=revision) + + def _file_download(repo_id: str, + filename: str, + *, + subfolder: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + cache_dir: Union[str, Path, None] = None, + local_dir: Union[str, Path, None] = None, + token: Union[bool, str, None] = None, + local_files_only: bool = False, + **kwargs): + """Patch huggingface_hub.hf_hub_download""" + if len(kwargs) > 0: + logger.warning( + 'The passed in library_name,library_version,user_agent,force_download,proxies' + 'etag_timeout,headers,endpoint ' + 'will not be used in modelscope.') + assert repo_type in ( + None, 'model', + 'dataset'), f'repo_type={repo_type} is not supported in ModelScope' + if repo_type in (None, 'model'): + from modelscope.hub.file_download import model_file_download as file_download + else: + from modelscope.hub.file_download import dataset_file_download as file_download + from modelscope import HubApi + api = HubApi() + api.try_login(token) + return file_download( + repo_id, + file_path=os.path.join(subfolder, filename) if subfolder else filename, + cache_dir=cache_dir, + local_dir=local_dir, + local_files_only=local_files_only, + revision=revision) + + def _whoami(self, token: Union[bool, str, None] = None) -> Dict: + from modelscope.hub.api import ModelScopeConfig + return {'name': ModelScopeConfig.get_user_info()[0] or 'unknown'} + + # Patch hf_hub_download + huggingface_hub.hf_hub_download = _file_download + huggingface_hub.file_download.hf_hub_download = _file_download + + # Patch file_exists + hf_api.file_exists = MethodType(_file_exists, api) + huggingface_hub.file_exists = hf_api.file_exists + huggingface_hub.hf_api.file_exists = hf_api.file_exists + + # Patch whoami + hf_api.whoami = MethodType(_whoami, api) + huggingface_hub.whoami = hf_api.whoami + huggingface_hub.hf_api.whoami = hf_api.whoami + + # Patch repocard.validate + from huggingface_hub import repocard + repocard.RepoCard.validate = lambda *args, **kwargs: None + + def create_repo(self, + repo_id: str, + *, + token: Union[str, bool, None] = None, + private: bool = False, + **kwargs) -> 'RepoUrl': + """ + Create a new repository on the hub. + + Args: + repo_id: The ID of the repository to create. + token: The authentication token to use. + private: Whether the repository should be private. + **kwargs: Additional arguments. + + Returns: + RepoUrl: The URL of the created repository. + """ + from modelscope.hub.create_model import create_model_repo + hub_model_id = create_model_repo(repo_id, token, private) + from huggingface_hub import RepoUrl + return RepoUrl(url=hub_model_id, ) + + @future_compatible + def upload_folder( + *, + repo_id: str, + folder_path: Union[str, Path], + path_in_repo: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + token: Union[str, bool, None] = None, + revision: Optional[str] = 'master', + ignore_patterns: Optional[Union[List[str], str]] = None, + **kwargs, + ): + from modelscope.hub.push_to_hub import push_model_to_hub + push_model_to_hub(repo_id, folder_path, path_in_repo, commit_message, + commit_description, token, True, revision, + ignore_patterns) + return CommitInfo( + commit_url=f'https://www.modelscope.cn/models/{repo_id}/files', + commit_message=commit_message, + commit_description=commit_description, + oid=None, + ) + + @future_compatible + def upload_file( + self, + *, + path_or_fileobj: Union[str, Path, bytes, BinaryIO], + path_in_repo: str, + repo_id: str, + token: Union[str, bool, None] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + **kwargs, + ): + from modelscope.hub.push_to_hub import push_files_to_hub + push_files_to_hub(path_or_fileobj, path_in_repo, repo_id, token, + revision, commit_message, commit_description) + + # Patch create_repo + from transformers.utils import hub + hf_api.create_repo = MethodType(create_repo, api) + huggingface_hub.create_repo = hf_api.create_repo + huggingface_hub.hf_api.create_repo = hf_api.create_repo + hub.create_repo = create_repo + + # Patch upload_folder + hf_api.upload_folder = MethodType(upload_folder, api) + huggingface_hub.upload_folder = hf_api.upload_folder + huggingface_hub.hf_api.upload_folder = hf_api.upload_folder + + # Patch upload_file + hf_api.upload_file = MethodType(upload_file, api) + huggingface_hub.upload_file = hf_api.upload_file + huggingface_hub.hf_api.upload_file = hf_api.upload_file + repocard.upload_file = hf_api.upload_file + + +def patch_hub(): + _patch_hub() + _patch_pretrained_class() diff --git a/tests/hub/test_patch_hf.py b/tests/hub/test_patch_hf.py new file mode 100644 index 00000000..dbaf2c11 --- /dev/null +++ b/tests/hub/test_patch_hf.py @@ -0,0 +1,19 @@ +import unittest + +from modelscope.msdatasets import MsDataset +from modelscope.utils.test_utils import test_level + + +class DownloadDatasetTest(unittest.TestCase): + + def setUp(self): + from modelscope.utils.hf_util import patch_hub + patch_hub() + + def test_automodel_download(self): + from transformers import AutoModel + model = AutoModel.from_pretrained('AI-ModelScope/bert-base-uncased') + self.assertTrue(model is not None) + + + From 640b3bd49bbb4629ff09276386ffc54f481b6e47 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Sun, 29 Dec 2024 18:13:05 +0800 Subject: [PATCH 11/36] wip --- modelscope/utils/hf_util/auto_class.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelscope/utils/hf_util/auto_class.py b/modelscope/utils/hf_util/auto_class.py index e12471a1..2a43f0e9 100644 --- a/modelscope/utils/hf_util/auto_class.py +++ b/modelscope/utils/hf_util/auto_class.py @@ -48,7 +48,7 @@ except ImportError: Qwen2VLForConditionalGeneration = UnsupportedAutoClass( 'Qwen2VLForConditionalGeneration') - + logger = get_logger() From c8f958182d9bed6ab8bcd5385449098b4a55215f Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Wed, 8 Jan 2025 20:50:07 +0800 Subject: [PATCH 12/36] wip --- modelscope/utils/hf_util/patcher.py | 210 ++++++++++++++++++++-------- 1 file changed, 149 insertions(+), 61 deletions(-) diff --git a/modelscope/utils/hf_util/patcher.py b/modelscope/utils/hf_util/patcher.py index 23a0174c..fd5103f1 100644 --- a/modelscope/utils/hf_util/patcher.py +++ b/modelscope/utils/hf_util/patcher.py @@ -1,18 +1,17 @@ # Copyright (c) Alibaba, Inc. and its affiliates. +import importlib import inspect import os import sys from functools import partial from pathlib import Path -import importlib from types import MethodType from typing import BinaryIO, Dict, List, Optional, Union from huggingface_hub.hf_api import CommitInfo, future_compatible -from modelscope import snapshot_download -from modelscope.utils.constant import Invoke -from modelscope.utils.logger import get_logger +from modelscope import snapshot_download +from modelscope.utils.logger import get_logger logger = get_logger() @@ -26,13 +25,6 @@ for module in all_modules: all_imported_modules.append(importlib.import_module(f'transformers.{module}')) -def user_agent(invoked_by=None): - if invoked_by is None: - invoked_by = Invoke.PRETRAINED - uagent = '%s/%s' % (Invoke.KEY, invoked_by) - return uagent - - def _patch_pretrained_class(): def get_model_dir(pretrained_model_name_or_path, ignore_file_pattern, @@ -79,24 +71,51 @@ def _patch_pretrained_class(): else: ignore_file_pattern_kwargs = {'ignore_file_pattern': ignore_file_pattern} - if name.endswith('HF'): - has_from_pretrained = hasattr(var, 'from_pretrained') - has_get_peft_type = hasattr(var, '_get_peft_type') - parameters = inspect.signature(var.from_pretrained).parameters - is_peft = 'model' in parameters and 'model_id' in parameters - if has_from_pretrained: - if not is_peft: - var.from_pretrained = partial(patch_pretrained_model_name_or_path, - ori_func=var.from_pretrained, - **ignore_file_pattern_kwargs) - else: - var.from_pretrained = partial(patch_peft_model_id, - ori_func=var.from_pretrained, - **ignore_file_pattern_kwargs) - if has_get_peft_type: - var._get_peft_type = partial(_get_peft_type, - ori_func=var._get_peft_type, + has_from_pretrained = hasattr(var, 'from_pretrained') + has_get_peft_type = hasattr(var, '_get_peft_type') + has_get_config_dict = hasattr(var, 'get_config_dict') + parameters = inspect.signature(var.from_pretrained).parameters + is_peft = 'model' in parameters and 'model_id' in parameters + if has_from_pretrained and not hasattr(var, '_from_pretrained_origin'): + var._from_pretrained_origin = var.from_pretrained + if not is_peft: + var.from_pretrained = partial(patch_pretrained_model_name_or_path, + ori_func=var._from_pretrained_origin, **ignore_file_pattern_kwargs) + else: + var.from_pretrained = partial(patch_peft_model_id, + ori_func=var._from_pretrained_origin, + **ignore_file_pattern_kwargs) + delattr(var, '_from_pretrained_origin') + if has_get_peft_type and not hasattr(var, '_get_peft_type_origin'): + var._get_peft_type_origin = var._get_peft_type + var._get_peft_type = partial(_get_peft_type, + ori_func=var._get_peft_type_origin, + **ignore_file_pattern_kwargs) + delattr(var, '_get_peft_type_origin') + + if has_get_config_dict and not hasattr(var, '_get_config_dict_origin'): + var._get_config_dict_origin = var.get_config_dict + var.get_config_dict = partial(patch_pretrained_model_name_or_path, + ori_func=var._get_config_dict_origin, + **ignore_file_pattern_kwargs) + delattr(var, '_get_config_dict_origin') + + +def _unpatch_pretrained_class(): + for var in all_imported_modules: + if var is None: + continue + + has_from_pretrained = hasattr(var, 'from_pretrained') + has_get_peft_type = hasattr(var, '_get_peft_type') + has_get_config_dict = hasattr(var, 'get_config_dict') + if has_from_pretrained and hasattr(var, '_from_pretrained_origin'): + var.from_pretrained = var._from_pretrained_origin + if has_get_peft_type and hasattr(var, '_get_peft_type_origin'): + var._get_peft_type = var._get_peft_type_origin + if has_get_config_dict and hasattr(var, '_get_config_dict_origin'): + var.get_config_dict = var._get_config_dict_origin def _patch_hub(): @@ -160,26 +179,11 @@ def _patch_hub(): def _whoami(self, token: Union[bool, str, None] = None) -> Dict: from modelscope.hub.api import ModelScopeConfig + from modelscope.hub.api import HubApi + api = HubApi() + api.try_login(token) return {'name': ModelScopeConfig.get_user_info()[0] or 'unknown'} - # Patch hf_hub_download - huggingface_hub.hf_hub_download = _file_download - huggingface_hub.file_download.hf_hub_download = _file_download - - # Patch file_exists - hf_api.file_exists = MethodType(_file_exists, api) - huggingface_hub.file_exists = hf_api.file_exists - huggingface_hub.hf_api.file_exists = hf_api.file_exists - - # Patch whoami - hf_api.whoami = MethodType(_whoami, api) - huggingface_hub.whoami = hf_api.whoami - huggingface_hub.hf_api.whoami = hf_api.whoami - - # Patch repocard.validate - from huggingface_hub import repocard - repocard.RepoCard.validate = lambda *args, **kwargs: None - def create_repo(self, repo_id: str, *, @@ -244,25 +248,109 @@ def _patch_hub(): push_files_to_hub(path_or_fileobj, path_in_repo, repo_id, token, revision, commit_message, commit_description) - # Patch create_repo - from transformers.utils import hub - hf_api.create_repo = MethodType(create_repo, api) - huggingface_hub.create_repo = hf_api.create_repo - huggingface_hub.hf_api.create_repo = hf_api.create_repo - hub.create_repo = create_repo + # Patch repocard.validate + from huggingface_hub import repocard + if not hasattr(repocard.RepoCard, '_validate_origin'): + repocard.RepoCard._validate_origin = repocard.RepoCard.validate + repocard.RepoCard.validate = lambda *args, **kwargs: None - # Patch upload_folder - hf_api.upload_folder = MethodType(upload_folder, api) - huggingface_hub.upload_folder = hf_api.upload_folder - huggingface_hub.hf_api.upload_folder = hf_api.upload_folder + if not hasattr(hf_api, '_hf_hub_download_origin'): + # Patch hf_hub_download + hf_api._hf_hub_download_origin = huggingface_hub.file_download.hf_hub_download + huggingface_hub.hf_hub_download = _file_download + huggingface_hub.file_download.hf_hub_download = _file_download - # Patch upload_file - hf_api.upload_file = MethodType(upload_file, api) - huggingface_hub.upload_file = hf_api.upload_file - huggingface_hub.hf_api.upload_file = hf_api.upload_file - repocard.upload_file = hf_api.upload_file + if not hasattr(hf_api, '_file_exists_origin'): + # Patch file_exists + hf_api._file_exists_origin = hf_api.file_exists + hf_api.file_exists = MethodType(_file_exists, api) + huggingface_hub.file_exists = hf_api.file_exists + huggingface_hub.hf_api.file_exists = hf_api.file_exists + if not hasattr(hf_api, '_whoami_origin'): + # Patch whoami + hf_api._whoami_origin = hf_api.whoami + hf_api.whoami = MethodType(_whoami, api) + huggingface_hub.whoami = hf_api.whoami + huggingface_hub.hf_api.whoami = hf_api.whoami + + if not hasattr(hf_api, '_create_repo_origin'): + # Patch create_repo + from transformers.utils import hub + hf_api._create_repo_origin = hf_api.create_repo + hf_api.create_repo = MethodType(create_repo, api) + huggingface_hub.create_repo = hf_api.create_repo + huggingface_hub.hf_api.create_repo = hf_api.create_repo + hub.create_repo = hf_api.create_repo + + if not hasattr(hf_api, '_upload_folder_origin'): + # Patch upload_folder + hf_api._upload_folder_origin = hf_api.upload_folder + hf_api.upload_folder = MethodType(upload_folder, api) + huggingface_hub.upload_folder = hf_api.upload_folder + huggingface_hub.hf_api.upload_folder = hf_api.upload_folder + + if not hasattr(hf_api, '_upload_file_origin'): + # Patch upload_file + hf_api._upload_file_origin = hf_api.upload_file + hf_api.upload_file = MethodType(upload_file, api) + huggingface_hub.upload_file = hf_api.upload_file + huggingface_hub.hf_api.upload_file = hf_api.upload_file + repocard.upload_file = hf_api.upload_file + + +def _unpatch_hub(): + import huggingface_hub + from huggingface_hub import hf_api + + from huggingface_hub import repocard + if hasattr(repocard.RepoCard, '_validate_origin'): + repocard.RepoCard.validate = repocard.RepoCard._validate_origin + delattr(repocard.RepoCard, '_validate_origin') + + if hasattr(hf_api, '_hf_hub_download_origin'): + huggingface_hub.file_download.hf_hub_download = hf_api._hf_hub_download_origin + huggingface_hub.hf_hub_download = hf_api._hf_hub_download_origin + huggingface_hub.file_download.hf_hub_download = hf_api._hf_hub_download_origin + delattr(hf_api, '_hf_hub_download_origin') + + if hasattr(hf_api, '_file_exists_origin'): + hf_api.file_exists = hf_api._file_exists_origin + huggingface_hub.file_exists = hf_api.file_exists + huggingface_hub.hf_api.file_exists = hf_api.file_exists + delattr(hf_api, '_file_exists_origin') + + if hasattr(hf_api, '_whoami_origin'): + hf_api.whoami = hf_api._whoami_origin + huggingface_hub.whoami = hf_api.whoami + huggingface_hub.hf_api.whoami = hf_api.whoami + delattr(hf_api, '_whoami_origin') + + if hasattr(hf_api, '_create_repo_origin'): + from transformers.utils import hub + hf_api.create_repo = hf_api._create_repo_origin + huggingface_hub.create_repo = hf_api.create_repo + huggingface_hub.hf_api.create_repo = hf_api.create_repo + hub.create_repo = hf_api.create_repo + delattr(hf_api, '_create_repo_origin') + + if hasattr(hf_api, '_upload_folder_origin'): + hf_api.upload_folder = hf_api._upload_folder_origin + huggingface_hub.upload_folder = hf_api.upload_folder + huggingface_hub.hf_api.upload_folder = hf_api.upload_folder + delattr(hf_api, '_upload_folder_origin') + + if hasattr(hf_api, '_upload_file_origin'): + hf_api.upload_file = hf_api._upload_file_origin + huggingface_hub.upload_file = hf_api.upload_file + huggingface_hub.hf_api.upload_file = hf_api.upload_file + repocard.upload_file = hf_api.upload_file + delattr(hf_api, '_upload_file_origin') def patch_hub(): _patch_hub() _patch_pretrained_class() + + +def unpatch_hub(): + _unpatch_pretrained_class() From defb668defb609bc4c7a2dbe0206971be225f2b4 Mon Sep 17 00:00:00 2001 From: luyan Date: Tue, 21 Jan 2025 10:32:59 +0800 Subject: [PATCH 13/36] tmp --- modelscope/__init__.py | 5 +++-- modelscope/pipelines/builder.py | 20 ++++++++++++++++++-- modelscope/utils/hf_util.py | 1 + 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/modelscope/__init__.py b/modelscope/__init__.py index c969be68..8c461cf8 100644 --- a/modelscope/__init__.py +++ b/modelscope/__init__.py @@ -54,7 +54,7 @@ if TYPE_CHECKING: AutoModelForMaskedLM, AutoTokenizer, AutoModelForMaskGeneration, AutoModelForPreTraining, AutoModelForTextEncoding, AutoImageProcessor, BatchFeature, Qwen2VLForConditionalGeneration, - T5EncoderModel) + T5EncoderModel, hf_pipeline) else: print( 'transformer is not installed, please install it if you want to use related modules' @@ -131,7 +131,8 @@ else: 'AutoModelForMaskedLM', 'AutoTokenizer', 'AutoModelForMaskGeneration', 'AutoModelForPreTraining', 'AutoModelForTextEncoding', 'AutoImageProcessor', 'BatchFeature', - 'Qwen2VLForConditionalGeneration', 'T5EncoderModel' + 'Qwen2VLForConditionalGeneration', 'T5EncoderModel', + 'hf_pipeline' ] import sys diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index 596d6d22..b07ad315 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -13,10 +13,14 @@ from modelscope.utils.hub import read_config from modelscope.utils.plugins import (register_modelhub_repo, register_plugins_repo) from modelscope.utils.registry import Registry, build_from_cfg +from modelscope.utils.logger import get_logger +from modelscope.utils.import_utils import is_transformers_available + from .base import Pipeline from .util import is_official_hub_path PIPELINES = Registry('pipelines') +logger = get_logger() def normalize_model_input(model, @@ -109,6 +113,7 @@ def pipeline(task: str = None, if task is None and pipeline_name is None: raise ValueError('task or pipeline_name is required') + pipeline_props = None if pipeline_name is None: # get default pipeline for this task if isinstance(model, str) \ @@ -157,8 +162,11 @@ def pipeline(task: str = None, if pipeline_name: pipeline_props = {'type': pipeline_name} else: - check_config(cfg) - pipeline_props = cfg.pipeline + try: + check_config(cfg) + pipeline_props = cfg.pipeline + except AssertionError as e: + logger.info(str(e)) elif model is not None: # get pipeline info from Model object @@ -176,6 +184,14 @@ def pipeline(task: str = None, else: pipeline_props = {'type': pipeline_name} + if not pipeline_props and is_transformers_available(): + from modelscope.utils.hf_util import hf_pipeline + return hf_pipeline(task=task, + model=model, + framework=framework, + device=device, + **kwargs) + pipeline_props['model'] = model pipeline_props['device'] = device cfg = ConfigDict(pipeline_props) diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py index 8f7c06da..9b9d4e2e 100644 --- a/modelscope/utils/hf_util.py +++ b/modelscope/utils/hf_util.py @@ -63,6 +63,7 @@ from transformers import (PretrainedConfig, PreTrainedModel, PreTrainedTokenizerBase) from transformers import T5EncoderModel as T5EncoderModelHF from transformers import __version__ as transformers_version +from transformers import pipeline as hf_pipeline from modelscope import snapshot_download from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke From 2eda82949937d9bb8a263dbeab59de4bbf435853 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Wed, 22 Jan 2025 23:06:20 +0800 Subject: [PATCH 14/36] fix --- modelscope/__init__.py | 10 +- modelscope/utils/hf_util/__init__.py | 2 + modelscope/utils/hf_util/auto_class.py | 288 +++++++++++-------------- modelscope/utils/hf_util/patcher.py | 214 +++++++++++------- modelscope/utils/import_utils.py | 4 + tests/hub/test_patch_hf.py | 3 - tests/utils/test_hf_util.py | 69 +++++- 7 files changed, 334 insertions(+), 256 deletions(-) diff --git a/modelscope/__init__.py b/modelscope/__init__.py index c969be68..0f0469b0 100644 --- a/modelscope/__init__.py +++ b/modelscope/__init__.py @@ -134,6 +134,14 @@ else: 'Qwen2VLForConditionalGeneration', 'T5EncoderModel' ] + from modelscope.utils import hf_util + + extra_objects = {} + attributes = dir(hf_util) + imports = [attr for attr in attributes if not attr.startswith('__')] + for _import in imports: + extra_objects[_import] = getattr(hf_util, _import) + import sys sys.modules[__name__] = LazyImportModule( @@ -141,5 +149,5 @@ else: globals()['__file__'], _import_structure, module_spec=__spec__, - extra_objects={}, + extra_objects=extra_objects, ) diff --git a/modelscope/utils/hf_util/__init__.py b/modelscope/utils/hf_util/__init__.py index e69de29b..a138ff7a 100644 --- a/modelscope/utils/hf_util/__init__.py +++ b/modelscope/utils/hf_util/__init__.py @@ -0,0 +1,2 @@ +from .auto_class import * +from .patcher import patch_context, patch_hub, unpatch_hub diff --git a/modelscope/utils/hf_util/auto_class.py b/modelscope/utils/hf_util/auto_class.py index 2a43f0e9..157158fd 100644 --- a/modelscope/utils/hf_util/auto_class.py +++ b/modelscope/utils/hf_util/auto_class.py @@ -1,187 +1,143 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import inspect import os -import sys -from functools import partial -from pathlib import Path -import importlib -from types import MethodType -from typing import BinaryIO, Dict, List, Optional, Union +from typing import TYPE_CHECKING -from huggingface_hub.hf_api import CommitInfo, future_compatible -from modelscope import snapshot_download -from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke -from modelscope.utils.logger import get_logger +if TYPE_CHECKING: + from transformers import __version__ as transformers_version + try: + from transformers import Qwen2VLForConditionalGeneration + except ImportError: + pass -try: - from transformers import AutoModelForImageToImage as AutoModelForImageToImageHF + try: + from transformers import GPTQConfig + from transformers import AwqConfig + except ImportError: + pass - AutoModelForImageToImage = get_wrapped_class(AutoModelForImageToImageHF) -except ImportError: - AutoModelForImageToImage = UnsupportedAutoClass('AutoModelForImageToImage') + try: + from transformers import AutoModelForImageToImage + except ImportError: + pass -try: - from transformers import AutoModelForImageTextToText as AutoModelForImageTextToTextHF + try: + from transformers import AutoModelForImageTextToText + except ImportError: + pass - AutoModelForImageTextToText = get_wrapped_class( - AutoModelForImageTextToTextHF) -except ImportError: - AutoModelForImageTextToText = UnsupportedAutoClass( - 'AutoModelForImageTextToText') + try: + from transformers import AutoModelForKeypointDetection + except ImportError: + pass -try: - from transformers import AutoModelForKeypointDetection as AutoModelForKeypointDetectionHF +else: - AutoModelForKeypointDetection = get_wrapped_class( - AutoModelForKeypointDetectionHF) -except ImportError: - AutoModelForKeypointDetection = UnsupportedAutoClass( - 'AutoModelForKeypointDetection') + class UnsupportedAutoClass: -try: - from transformers import \ - Qwen2VLForConditionalGeneration as Qwen2VLForConditionalGenerationHF + def __init__(self, name: str): + self.error_msg =\ + f'{name} is not supported with your installed Transformers version {transformers_version}. ' + \ + 'Please update your Transformers by "pip install transformers -U".' - Qwen2VLForConditionalGeneration = get_wrapped_class( - Qwen2VLForConditionalGenerationHF) -except ImportError: - Qwen2VLForConditionalGeneration = UnsupportedAutoClass( - 'Qwen2VLForConditionalGeneration') - - -logger = get_logger() - - -def get_wrapped_class(module_class, - ignore_file_pattern=[], - file_filter=None, - **kwargs): - """Get a custom wrapper class for auto classes to download the models from the ModelScope hub - Args: - module_class: The actual module class - ignore_file_pattern (`str` or `List`, *optional*, default to `None`): - Any file pattern to be ignored in downloading, like exact file names or file extensions. - Returns: - The wrapper - """ - default_ignore_file_pattern = ignore_file_pattern - default_file_filter = file_filter - - class ClassWrapper(module_class): - - @classmethod - def from_pretrained(cls, pretrained_model_name_or_path, *model_args, + def from_pretrained(self, pretrained_model_name_or_path, *model_args, **kwargs): - ignore_file_pattern = kwargs.pop('ignore_file_pattern', - default_ignore_file_pattern) - subfolder = kwargs.pop('subfolder', default_file_filter) - file_filter = None - if subfolder: - file_filter = f'{subfolder}/*' - if not os.path.exists(pretrained_model_name_or_path): - revision = kwargs.pop('revision', DEFAULT_MODEL_REVISION) - if file_filter is None: - model_dir = snapshot_download( - pretrained_model_name_or_path, - revision=revision, - ignore_file_pattern=ignore_file_pattern, - user_agent=user_agent()) - else: - model_dir = os.path.join( - snapshot_download( + raise ImportError(self.error_msg) + + def from_config(self, cls, config): + raise ImportError(self.error_msg) + + def user_agent(invoked_by=None): + from modelscope.utils.constant import Invoke + + if invoked_by is None: + invoked_by = Invoke.PRETRAINED + uagent = '%s/%s' % (Invoke.KEY, invoked_by) + return uagent + + def get_wrapped_class(module_class, + ignore_file_pattern=[], + file_filter=None, + **kwargs): + """Get a custom wrapper class for auto classes to download the models from the ModelScope hub + Args: + module_class: The actual module class + ignore_file_pattern (`str` or `List`, *optional*, default to `None`): + Any file pattern to be ignored in downloading, like exact file names or file extensions. + Returns: + The wrapper + """ + default_ignore_file_pattern = ignore_file_pattern + default_file_filter = file_filter + + class ClassWrapper(module_class): + + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path, + *model_args, **kwargs): + + from modelscope import snapshot_download + from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke + + ignore_file_pattern = kwargs.pop('ignore_file_pattern', + default_ignore_file_pattern) + subfolder = kwargs.pop('subfolder', default_file_filter) + file_filter = None + if subfolder: + file_filter = f'{subfolder}/*' + if not os.path.exists(pretrained_model_name_or_path): + revision = kwargs.pop('revision', DEFAULT_MODEL_REVISION) + if file_filter is None: + model_dir = snapshot_download( pretrained_model_name_or_path, revision=revision, ignore_file_pattern=ignore_file_pattern, - allow_file_pattern=file_filter, - user_agent=user_agent()), subfolder) - else: - model_dir = pretrained_model_name_or_path + user_agent=user_agent()) + else: + model_dir = os.path.join( + snapshot_download( + pretrained_model_name_or_path, + revision=revision, + ignore_file_pattern=ignore_file_pattern, + allow_file_pattern=file_filter, + user_agent=user_agent()), subfolder) + else: + model_dir = pretrained_model_name_or_path - module_obj = module_class.from_pretrained(model_dir, *model_args, - **kwargs) + module_obj = module_class.from_pretrained( + model_dir, *model_args, **kwargs) - if module_class.__name__.startswith('AutoModel'): - module_obj.model_dir = model_dir - return module_obj + if module_class.__name__.startswith('AutoModel'): + module_obj.model_dir = model_dir + return module_obj - ClassWrapper.__name__ = module_class.__name__ - ClassWrapper.__qualname__ = module_class.__qualname__ - return ClassWrapper + ClassWrapper.__name__ = module_class.__name__ + ClassWrapper.__qualname__ = module_class.__qualname__ + return ClassWrapper + from .patcher import get_all_imported_modules + all_imported_modules = get_all_imported_modules() + all_available_modules = [] + large_file_free = ['config', 'tokenizer'] + for module in all_imported_modules: + try: + if (hasattr(module, 'from_pretrained') + and 'pretrained_model_name_or_path' in inspect.signature( + module.from_pretrained).parameters): + if any(lf in module.__name__.lower() + for lf in large_file_free): + ignore_file_patterns = { + 'ignore_file_pattern': [ + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', + r'\w+\.pt', r'\w+\.h5' + ] + } + else: + ignore_file_patterns = {} + all_available_modules.append( + get_wrapped_class(module, **ignore_file_patterns)) + except (ImportError, AttributeError): + pass -AutoModel = get_wrapped_class(AutoModelHF) -AutoModelForCausalLM = get_wrapped_class(AutoModelForCausalLMHF) -AutoModelForSeq2SeqLM = get_wrapped_class(AutoModelForSeq2SeqLMHF) -AutoModelForVision2Seq = get_wrapped_class(AutoModelForVision2SeqHF) -AutoModelForSequenceClassification = get_wrapped_class( - AutoModelForSequenceClassificationHF) -AutoModelForTokenClassification = get_wrapped_class( - AutoModelForTokenClassificationHF) -AutoModelForImageSegmentation = get_wrapped_class( - AutoModelForImageSegmentationHF) -AutoModelForImageClassification = get_wrapped_class( - AutoModelForImageClassificationHF) -AutoModelForZeroShotImageClassification = get_wrapped_class( - AutoModelForZeroShotImageClassificationHF) -AutoModelForQuestionAnswering = get_wrapped_class( - AutoModelForQuestionAnsweringHF) -AutoModelForTableQuestionAnswering = get_wrapped_class( - AutoModelForTableQuestionAnsweringHF) -AutoModelForVisualQuestionAnswering = get_wrapped_class( - AutoModelForVisualQuestionAnsweringHF) -AutoModelForDocumentQuestionAnswering = get_wrapped_class( - AutoModelForDocumentQuestionAnsweringHF) -AutoModelForSemanticSegmentation = get_wrapped_class( - AutoModelForSemanticSegmentationHF) -AutoModelForUniversalSegmentation = get_wrapped_class( - AutoModelForUniversalSegmentationHF) -AutoModelForInstanceSegmentation = get_wrapped_class( - AutoModelForInstanceSegmentationHF) -AutoModelForObjectDetection = get_wrapped_class(AutoModelForObjectDetectionHF) -AutoModelForZeroShotObjectDetection = get_wrapped_class( - AutoModelForZeroShotObjectDetectionHF) -AutoModelForAudioClassification = get_wrapped_class( - AutoModelForAudioClassificationHF) -AutoModelForSpeechSeq2Seq = get_wrapped_class(AutoModelForSpeechSeq2SeqHF) -AutoModelForMaskedImageModeling = get_wrapped_class( - AutoModelForMaskedImageModelingHF) -AutoModelForMaskedLM = get_wrapped_class(AutoModelForMaskedLMHF) -AutoModelForMaskGeneration = get_wrapped_class(AutoModelForMaskGenerationHF) -AutoModelForPreTraining = get_wrapped_class(AutoModelForPreTrainingHF) -AutoModelForTextEncoding = get_wrapped_class(AutoModelForTextEncodingHF) -T5EncoderModel = get_wrapped_class(T5EncoderModelHF) - -AutoTokenizer = get_wrapped_class( - AutoTokenizerHF, - ignore_file_pattern=[ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' - ]) -AutoProcessor = get_wrapped_class( - AutoProcessorHF, - ignore_file_pattern=[ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' - ]) -AutoConfig = get_wrapped_class( - AutoConfigHF, - ignore_file_pattern=[ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' - ]) -GenerationConfig = get_wrapped_class( - GenerationConfigHF, - ignore_file_pattern=[ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' - ]) -BitsAndBytesConfig = get_wrapped_class( - BitsAndBytesConfigHF, - ignore_file_pattern=[ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' - ]) -AutoImageProcessor = get_wrapped_class( - AutoImageProcessorHF, - ignore_file_pattern=[ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' - ]) - -GPTQConfig = GPTQConfigHF -AwqConfig = AwqConfigHF -BatchFeature = get_wrapped_class(BatchFeatureHF) + for module in all_available_modules: + globals()[module.__name__] = module diff --git a/modelscope/utils/hf_util/patcher.py b/modelscope/utils/hf_util/patcher.py index fd5103f1..c81c70d9 100644 --- a/modelscope/utils/hf_util/patcher.py +++ b/modelscope/utils/hf_util/patcher.py @@ -1,4 +1,5 @@ # Copyright (c) Alibaba, Inc. and its affiliates. +import contextlib import importlib import inspect import os @@ -8,27 +9,59 @@ from pathlib import Path from types import MethodType from typing import BinaryIO, Dict, List, Optional, Union -from huggingface_hub.hf_api import CommitInfo, future_compatible -from modelscope import snapshot_download -from modelscope.utils.logger import get_logger +def get_all_imported_modules(): + all_imported_modules = [] + if importlib.util.find_spec('transformers') is not None: + import transformers + extra_modules = ['T5'] + lazy_module = sys.modules['transformers'] + _import_structure = lazy_module._import_structure + for key in _import_structure: + values = _import_structure[key] + for value in values: + # pretrained + if 'auto' in value.lower() or any(m in value + for m in extra_modules): + try: + module = importlib.import_module( + f'.{key}', transformers.__name__) + value = getattr(module, value) + all_imported_modules.append(value) + except (ImportError, AttributeError): + pass -logger = get_logger() + if importlib.util.find_spec('peft') is not None: + import peft + attributes = dir(peft) + imports = [attr for attr in attributes if not attr.startswith('__')] + all_imported_modules.extend( + [getattr(peft, _import) for _import in imports]) + + if importlib.util.find_spec('diffusers') is not None: + import diffusers + if importlib.util.find_spec('diffusers') is not None: + lazy_module = sys.modules['diffusers'] + _import_structure = lazy_module._import_structure + for key in _import_structure: + values = _import_structure[key] + for value in values: + if 'pipeline' in value.lower(): + try: + module = importlib.import_module( + f'.{key}', diffusers.__name__) + value = getattr(module, value) + all_imported_modules.append(value) + except (ImportError, AttributeError): + pass + return all_imported_modules -extra_modules = ['T5'] -lazy_module = sys.modules['transformers'] -all_modules = lazy_module._modules -all_imported_modules = [] -for module in all_modules: - if 'auto' in module.lower() or any(m in module for m in extra_modules): - all_imported_modules.append(importlib.import_module(f'transformers.{module}')) - - -def _patch_pretrained_class(): +def _patch_pretrained_class(all_imported_modules): def get_model_dir(pretrained_model_name_or_path, ignore_file_pattern, **kwargs): + from modelscope import snapshot_download if not os.path.exists(pretrained_model_name_or_path): revision = kwargs.pop('revision', None) model_dir = snapshot_download( @@ -43,94 +76,109 @@ def _patch_pretrained_class(): r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt' ] - def patch_pretrained_model_name_or_path(cls, pretrained_model_name_or_path, + def patch_pretrained_model_name_or_path(pretrained_model_name_or_path, *model_args, **kwargs): model_dir = get_model_dir(pretrained_model_name_or_path, kwargs.pop('ignore_file_pattern', None), **kwargs) - return kwargs.pop('ori_func')(cls, model_dir, *model_args, **kwargs) + return kwargs.pop('ori_func')(model_dir, *model_args, **kwargs) - def patch_peft_model_id(cls, model, model_id, *model_args, **kwargs): + def patch_peft_model_id(model, model_id, *model_args, **kwargs): model_dir = get_model_dir(model_id, kwargs.pop('ignore_file_pattern', None), **kwargs) - return kwargs.pop('ori_func')(cls, model, model_dir, *model_args, - **kwargs) + return kwargs.pop('ori_func')(model, model_dir, *model_args, **kwargs) - def _get_peft_type(cls, model_id, **kwargs): + def _get_peft_type(model_id, **kwargs): model_dir = get_model_dir(model_id, ignore_file_pattern, **kwargs) - return kwargs.pop('ori_func')(cls, model_dir, **kwargs) + return kwargs.pop('ori_func')(model_dir, **kwargs) for var in all_imported_modules: - if var is None: + if var is None or not hasattr(var, '__name__'): continue name = var.__name__ - need_model = 'model' in name.lower() or 'processor' in name.lower() or 'extractor' in name.lower() + need_model = 'model' in name.lower() or 'processor' in name.lower( + ) or 'extractor' in name.lower() if need_model: ignore_file_pattern_kwargs = {} else: - ignore_file_pattern_kwargs = {'ignore_file_pattern': ignore_file_pattern} + ignore_file_pattern_kwargs = { + 'ignore_file_pattern': ignore_file_pattern + } - has_from_pretrained = hasattr(var, 'from_pretrained') - has_get_peft_type = hasattr(var, '_get_peft_type') - has_get_config_dict = hasattr(var, 'get_config_dict') - parameters = inspect.signature(var.from_pretrained).parameters - is_peft = 'model' in parameters and 'model_id' in parameters + try: + has_from_pretrained = hasattr(var, 'from_pretrained') + has_get_peft_type = hasattr(var, '_get_peft_type') + has_get_config_dict = hasattr(var, 'get_config_dict') + except ImportError: + continue if has_from_pretrained and not hasattr(var, '_from_pretrained_origin'): + parameters = inspect.signature(var.from_pretrained).parameters + is_peft = 'model' in parameters and 'model_id' in parameters var._from_pretrained_origin = var.from_pretrained if not is_peft: - var.from_pretrained = partial(patch_pretrained_model_name_or_path, - ori_func=var._from_pretrained_origin, - **ignore_file_pattern_kwargs) + var.from_pretrained = partial( + patch_pretrained_model_name_or_path, + ori_func=var._from_pretrained_origin, + **ignore_file_pattern_kwargs) else: - var.from_pretrained = partial(patch_peft_model_id, - ori_func=var._from_pretrained_origin, - **ignore_file_pattern_kwargs) - delattr(var, '_from_pretrained_origin') + var.from_pretrained = partial( + patch_peft_model_id, + ori_func=var._from_pretrained_origin, + **ignore_file_pattern_kwargs) if has_get_peft_type and not hasattr(var, '_get_peft_type_origin'): var._get_peft_type_origin = var._get_peft_type - var._get_peft_type = partial(_get_peft_type, - ori_func=var._get_peft_type_origin, - **ignore_file_pattern_kwargs) - delattr(var, '_get_peft_type_origin') + var._get_peft_type = partial( + _get_peft_type, + ori_func=var._get_peft_type_origin, + **ignore_file_pattern_kwargs) if has_get_config_dict and not hasattr(var, '_get_config_dict_origin'): var._get_config_dict_origin = var.get_config_dict - var.get_config_dict = partial(patch_pretrained_model_name_or_path, - ori_func=var._get_config_dict_origin, - **ignore_file_pattern_kwargs) - delattr(var, '_get_config_dict_origin') + var.get_config_dict = partial( + patch_pretrained_model_name_or_path, + ori_func=var._get_config_dict_origin, + **ignore_file_pattern_kwargs) -def _unpatch_pretrained_class(): +def _unpatch_pretrained_class(all_imported_modules): for var in all_imported_modules: if var is None: continue - has_from_pretrained = hasattr(var, 'from_pretrained') - has_get_peft_type = hasattr(var, '_get_peft_type') - has_get_config_dict = hasattr(var, 'get_config_dict') + try: + has_from_pretrained = hasattr(var, 'from_pretrained') + has_get_peft_type = hasattr(var, '_get_peft_type') + has_get_config_dict = hasattr(var, 'get_config_dict') + except ImportError: + continue if has_from_pretrained and hasattr(var, '_from_pretrained_origin'): var.from_pretrained = var._from_pretrained_origin + delattr(var, '_from_pretrained_origin') if has_get_peft_type and hasattr(var, '_get_peft_type_origin'): var._get_peft_type = var._get_peft_type_origin + delattr(var, '_get_peft_type_origin') if has_get_config_dict and hasattr(var, '_get_config_dict_origin'): var.get_config_dict = var._get_config_dict_origin + delattr(var, '_get_config_dict_origin') def _patch_hub(): import huggingface_hub from huggingface_hub import hf_api from huggingface_hub.hf_api import api + from huggingface_hub.hf_api import CommitInfo, future_compatible + from modelscope import get_logger + logger = get_logger() def _file_exists( - self, - repo_id: str, - filename: str, - *, - repo_type: Optional[str] = None, - revision: Optional[str] = None, - token: Union[str, bool, None] = None, + self, + repo_id: str, + filename: str, + *, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + token: Union[str, bool, None] = None, ): """Patch huggingface_hub.file_exists""" if repo_type is not None: @@ -171,7 +219,8 @@ def _patch_hub(): api.try_login(token) return file_download( repo_id, - file_path=os.path.join(subfolder, filename) if subfolder else filename, + file_path=os.path.join(subfolder, filename) + if subfolder else filename, cache_dir=cache_dir, local_dir=local_dir, local_files_only=local_files_only, @@ -209,16 +258,16 @@ def _patch_hub(): @future_compatible def upload_folder( - *, - repo_id: str, - folder_path: Union[str, Path], - path_in_repo: Optional[str] = None, - commit_message: Optional[str] = None, - commit_description: Optional[str] = None, - token: Union[str, bool, None] = None, - revision: Optional[str] = 'master', - ignore_patterns: Optional[Union[List[str], str]] = None, - **kwargs, + *, + repo_id: str, + folder_path: Union[str, Path], + path_in_repo: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + token: Union[str, bool, None] = None, + revision: Optional[str] = 'master', + ignore_patterns: Optional[Union[List[str], str]] = None, + **kwargs, ): from modelscope.hub.push_to_hub import push_model_to_hub push_model_to_hub(repo_id, folder_path, path_in_repo, commit_message, @@ -233,16 +282,16 @@ def _patch_hub(): @future_compatible def upload_file( - self, - *, - path_or_fileobj: Union[str, Path, bytes, BinaryIO], - path_in_repo: str, - repo_id: str, - token: Union[str, bool, None] = None, - revision: Optional[str] = None, - commit_message: Optional[str] = None, - commit_description: Optional[str] = None, - **kwargs, + self, + *, + path_or_fileobj: Union[str, Path, bytes, BinaryIO], + path_in_repo: str, + repo_id: str, + token: Union[str, bool, None] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + **kwargs, ): from modelscope.hub.push_to_hub import push_files_to_hub push_files_to_hub(path_or_fileobj, path_in_repo, repo_id, token, @@ -347,10 +396,19 @@ def _unpatch_hub(): repocard.upload_file = hf_api.upload_file delattr(hf_api, '_upload_file_origin') + def patch_hub(): _patch_hub() - _patch_pretrained_class() + _patch_pretrained_class(get_all_imported_modules()) def unpatch_hub(): - _unpatch_pretrained_class() + _unpatch_pretrained_class(get_all_imported_modules()) + _unpatch_hub() + + +@contextlib.contextmanager +def patch_context(): + patch_hub() + yield + unpatch_hub() diff --git a/modelscope/utils/import_utils.py b/modelscope/utils/import_utils.py index 984df7af..51ff7a96 100644 --- a/modelscope/utils/import_utils.py +++ b/modelscope/utils/import_utils.py @@ -282,6 +282,10 @@ def is_transformers_available(): return importlib.util.find_spec('transformers') is not None +def is_diffusers_available(): + return importlib.util.find_spec('diffusers') is not None + + def is_tensorrt_llm_available(): return importlib.util.find_spec('tensorrt_llm') is not None diff --git a/tests/hub/test_patch_hf.py b/tests/hub/test_patch_hf.py index dbaf2c11..13754923 100644 --- a/tests/hub/test_patch_hf.py +++ b/tests/hub/test_patch_hf.py @@ -14,6 +14,3 @@ class DownloadDatasetTest(unittest.TestCase): from transformers import AutoModel model = AutoModel.from_pretrained('AI-ModelScope/bert-base-uncased') self.assertTrue(model is not None) - - - diff --git a/tests/utils/test_hf_util.py b/tests/utils/test_hf_util.py index 9d6b61bd..03de5aea 100644 --- a/tests/utils/test_hf_util.py +++ b/tests/utils/test_hf_util.py @@ -2,8 +2,7 @@ import unittest -from modelscope import (AutoConfig, AutoModel, AutoModelForCausalLM, - AutoTokenizer, GenerationConfig) +from modelscope.utils.hf_util.patcher import patch_context class HFUtilTest(unittest.TestCase): @@ -15,6 +14,7 @@ class HFUtilTest(unittest.TestCase): pass def test_auto_tokenizer(self): + from modelscope import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained( 'baichuan-inc/Baichuan2-7B-Chat', trust_remote_code=True, @@ -28,11 +28,13 @@ class HFUtilTest(unittest.TestCase): self.assertTrue(BitsAndBytesConfig is not None) def test_auto_model(self): + from modelscope import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained( 'baichuan-inc/baichuan-7B', trust_remote_code=True) self.assertTrue(model is not None) def test_auto_config(self): + from modelscope import AutoConfig, GenerationConfig config = AutoConfig.from_pretrained( 'baichuan-inc/Baichuan-13B-Chat', trust_remote_code=True, @@ -45,12 +47,63 @@ class HFUtilTest(unittest.TestCase): self.assertEqual(gen_config.assistant_token_id, 196) def test_transformer_patch(self): - tokenizer = AutoTokenizer.from_pretrained( - 'iic/nlp_structbert_sentiment-classification_chinese-base') - self.assertIsNotNone(tokenizer) - model = AutoModelForCausalLM.from_pretrained( - 'iic/nlp_structbert_sentiment-classification_chinese-base') - self.assertIsNotNone(model) + with patch_context(): + from transformers import AutoTokenizer, AutoModelForCausalLM + tokenizer = AutoTokenizer.from_pretrained( + 'iic/nlp_structbert_sentiment-classification_chinese-base') + self.assertIsNotNone(tokenizer) + model = AutoModelForCausalLM.from_pretrained( + 'iic/nlp_structbert_sentiment-classification_chinese-base') + self.assertIsNotNone(model) + + def test_patch_model(self): + from modelscope.utils.hf_util.patcher import patch_context + with patch_context(): + from transformers import AutoModel + model = AutoModel.from_pretrained( + 'iic/nlp_structbert_sentiment-classification_chinese-tiny') + self.assertTrue(model is not None) + try: + model = AutoModel.from_pretrained( + 'iic/nlp_structbert_sentiment-classification_chinese-tiny') + except Exception: + pass + else: + self.assertTrue(False) + + def test_patch_config(self): + with patch_context(): + from transformers import AutoConfig + config = AutoConfig.from_pretrained( + 'iic/nlp_structbert_sentiment-classification_chinese-tiny') + self.assertTrue(config is not None) + try: + config = AutoConfig.from_pretrained( + 'iic/nlp_structbert_sentiment-classification_chinese-tiny') + except Exception: + pass + else: + self.assertTrue(False) + + def test_patch_diffusers(self): + with patch_context(): + from diffusers import StableDiffusionPipeline + pipe = StableDiffusionPipeline.from_pretrained( + 'AI-ModelScope/stable-diffusion-v1-5') + self.assertTrue(pipe is not None) + try: + pipe = StableDiffusionPipeline.from_pretrained( + 'AI-ModelScope/stable-diffusion-v1-5') + except Exception: + pass + else: + self.assertTrue(False) + + def test_patch_peft(self): + with patch_context(): + from peft import PeftModel + self.assertTrue(hasattr(PeftModel, '_from_pretrained_origin')) + self.assertFalse(hasattr(PeftModel, '_from_pretrained_origin')) if __name__ == '__main__': From 4723e5c0fff2f803d2cb04016cca74eb0e010ff6 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Sun, 26 Jan 2025 16:26:37 +0800 Subject: [PATCH 15/36] fix --- modelscope/__init__.py | 29 +--- modelscope/hub/push_to_hub.py | 54 +------ modelscope/utils/hf_util/auto_class.py | 121 +++++----------- modelscope/utils/hf_util/patcher.py | 193 +++++++++++++++++++------ modelscope/utils/test_utils.py | 2 +- tests/utils/test_hf_util.py | 112 +++++++++++++- 6 files changed, 306 insertions(+), 205 deletions(-) diff --git a/modelscope/__init__.py b/modelscope/__init__.py index 0f0469b0..88323cc3 100644 --- a/modelscope/__init__.py +++ b/modelscope/__init__.py @@ -31,6 +31,7 @@ if TYPE_CHECKING: from .trainers import (EpochBasedTrainer, Hook, Priority, TrainingArgs, build_dataset_from_file) from .utils.constant import Tasks + from .utils.hf_util import patch_hub, patch_context, unpatch_hub if is_transformers_available(): from .utils.hf_util import ( AutoModel, AutoProcessor, AutoFeatureExtractor, GenerationConfig, @@ -106,34 +107,6 @@ else: 'msdatasets': ['MsDataset'] } - if is_transformers_available(): - _import_structure['utils.hf_util'] = [ - 'AutoModel', 'AutoProcessor', 'AutoFeatureExtractor', - 'GenerationConfig', 'AutoConfig', 'GPTQConfig', 'AwqConfig', - 'BitsAndBytesConfig', 'AutoModelForCausalLM', - 'AutoModelForSeq2SeqLM', 'AutoModelForVision2Seq', - 'AutoModelForSequenceClassification', - 'AutoModelForTokenClassification', - 'AutoModelForImageClassification', 'AutoModelForImageToImage', - 'AutoModelForImageTextToText', - 'AutoModelForZeroShotImageClassification', - 'AutoModelForKeypointDetection', - 'AutoModelForDocumentQuestionAnswering', - 'AutoModelForSemanticSegmentation', - 'AutoModelForUniversalSegmentation', - 'AutoModelForInstanceSegmentation', 'AutoModelForObjectDetection', - 'AutoModelForZeroShotObjectDetection', - 'AutoModelForAudioClassification', 'AutoModelForSpeechSeq2Seq', - 'AutoModelForMaskedImageModeling', - 'AutoModelForVisualQuestionAnswering', - 'AutoModelForTableQuestionAnswering', - 'AutoModelForImageSegmentation', 'AutoModelForQuestionAnswering', - 'AutoModelForMaskedLM', 'AutoTokenizer', - 'AutoModelForMaskGeneration', 'AutoModelForPreTraining', - 'AutoModelForTextEncoding', 'AutoImageProcessor', 'BatchFeature', - 'Qwen2VLForConditionalGeneration', 'T5EncoderModel' - ] - from modelscope.utils import hf_util extra_objects = {} diff --git a/modelscope/hub/push_to_hub.py b/modelscope/hub/push_to_hub.py index 47c7cc69..fdd4a17f 100644 --- a/modelscope/hub/push_to_hub.py +++ b/modelscope/hub/push_to_hub.py @@ -29,7 +29,7 @@ def push_files_to_hub( path_in_repo: str, repo_id: str, token: Union[str, bool, None] = None, - revision: Optional[str] = None, + revision: Optional[str] = DEFAULT_REPOSITORY_REVISION, commit_message: Optional[str] = None, commit_description: Optional[str] = None, ): @@ -49,56 +49,12 @@ def push_files_to_hub( sub_folder = os.path.join(temp_cache_dir, path_in_repo) os.makedirs(sub_folder, exist_ok=True) if os.path.isfile(path_or_fileobj): - shutil.copyfile(path_or_fileobj, sub_folder) + dest_file = os.path.join(sub_folder, + os.path.basename(path_or_fileobj)) + shutil.copyfile(path_or_fileobj, dest_file) else: shutil.copytree(path_or_fileobj, sub_folder, dirs_exist_ok=True) - repo.push(commit_message) - - -def push_model_to_hub(repo_id: str, - folder_path: Union[str, Path], - path_in_repo: Optional[str] = None, - commit_message: Optional[str] = None, - commit_description: Optional[str] = None, - token: Union[str, bool, None] = None, - private: bool = False, - revision: Optional[str] = 'master', - ignore_patterns: Optional[Union[List[str], str]] = None, - **kwargs): - from modelscope.hub.create_model import create_model_repo - create_model_repo(repo_id, token, private) - from modelscope import push_to_hub - commit_message = commit_message or 'Upload folder using api' - if commit_description: - commit_message = commit_message + '\n' + commit_description - if not os.path.exists(os.path.join(folder_path, 'configuration.json')): - default_config = { - 'framework': 'pytorch', - 'task': 'text-generation', - 'allow_remote': True - } - config_json = kwargs.get('config_json') or {} - config = {**default_config, **config_json} - with open(os.path.join(folder_path, 'configuration.json'), 'w') as f: - f.write(json.dumps(config)) - if ignore_patterns: - ignore_patterns = [p for p in ignore_patterns if p != '_*'] - if path_in_repo: - # We don't support part submit for now - path_in_repo = os.path.basename(folder_path) - folder_path = os.path.dirname(folder_path) - ignore_patterns = [] - if revision is None or revision == 'main': - revision = 'master' - push_to_hub( - repo_id, - folder_path, - token, - private, - commit_message=commit_message, - ignore_patterns=ignore_patterns, - revision=revision, - tag=path_in_repo) + repo.push(commit_message) def _api_push_to_hub(repo_name, diff --git a/modelscope/utils/hf_util/auto_class.py b/modelscope/utils/hf_util/auto_class.py index 157158fd..24786188 100644 --- a/modelscope/utils/hf_util/auto_class.py +++ b/modelscope/utils/hf_util/auto_class.py @@ -5,6 +5,44 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: from transformers import __version__ as transformers_version + + from transformers import AutoConfig + from transformers import AutoFeatureExtractor + from transformers import AutoImageProcessor + from transformers import AutoModel + from transformers import AutoModelForAudioClassification + from transformers import AutoModelForCausalLM + from transformers import AutoModelForDocumentQuestionAnswering + from transformers import AutoModelForImageClassification + from transformers import AutoModelForImageSegmentation + from transformers import AutoModelForInstanceSegmentation + from transformers import AutoModelForMaskedImageModeling + from transformers import AutoModelForMaskedLM + from transformers import AutoModelForMaskGeneration + from transformers import AutoModelForObjectDetection + from transformers import AutoModelForPreTraining + from transformers import AutoModelForQuestionAnswering + from transformers import AutoModelForSemanticSegmentation + from transformers import AutoModelForSeq2SeqLM + from transformers import AutoModelForSequenceClassification + from transformers import AutoModelForSpeechSeq2Seq + from transformers import AutoModelForTableQuestionAnswering + from transformers import AutoModelForTextEncoding + from transformers import AutoModelForTokenClassification + from transformers import AutoModelForUniversalSegmentation + from transformers import AutoModelForVision2Seq + from transformers import AutoModelForVisualQuestionAnswering + from transformers import AutoModelForZeroShotImageClassification + from transformers import AutoModelForZeroShotObjectDetection + from transformers import AutoProcessor + from transformers import AutoTokenizer + from transformers import BatchFeature + from transformers import BitsAndBytesConfig + from transformers import GenerationConfig + from transformers import (PretrainedConfig, PreTrainedModel, + PreTrainedTokenizerBase) + from transformers import T5EncoderModel + try: from transformers import Qwen2VLForConditionalGeneration except ImportError: @@ -55,89 +93,10 @@ else: uagent = '%s/%s' % (Invoke.KEY, invoked_by) return uagent - def get_wrapped_class(module_class, - ignore_file_pattern=[], - file_filter=None, - **kwargs): - """Get a custom wrapper class for auto classes to download the models from the ModelScope hub - Args: - module_class: The actual module class - ignore_file_pattern (`str` or `List`, *optional*, default to `None`): - Any file pattern to be ignored in downloading, like exact file names or file extensions. - Returns: - The wrapper - """ - default_ignore_file_pattern = ignore_file_pattern - default_file_filter = file_filter + from .patcher import get_all_imported_modules, _patch_pretrained_class - class ClassWrapper(module_class): - - @classmethod - def from_pretrained(cls, pretrained_model_name_or_path, - *model_args, **kwargs): - - from modelscope import snapshot_download - from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke - - ignore_file_pattern = kwargs.pop('ignore_file_pattern', - default_ignore_file_pattern) - subfolder = kwargs.pop('subfolder', default_file_filter) - file_filter = None - if subfolder: - file_filter = f'{subfolder}/*' - if not os.path.exists(pretrained_model_name_or_path): - revision = kwargs.pop('revision', DEFAULT_MODEL_REVISION) - if file_filter is None: - model_dir = snapshot_download( - pretrained_model_name_or_path, - revision=revision, - ignore_file_pattern=ignore_file_pattern, - user_agent=user_agent()) - else: - model_dir = os.path.join( - snapshot_download( - pretrained_model_name_or_path, - revision=revision, - ignore_file_pattern=ignore_file_pattern, - allow_file_pattern=file_filter, - user_agent=user_agent()), subfolder) - else: - model_dir = pretrained_model_name_or_path - - module_obj = module_class.from_pretrained( - model_dir, *model_args, **kwargs) - - if module_class.__name__.startswith('AutoModel'): - module_obj.model_dir = model_dir - return module_obj - - ClassWrapper.__name__ = module_class.__name__ - ClassWrapper.__qualname__ = module_class.__qualname__ - return ClassWrapper - - from .patcher import get_all_imported_modules all_imported_modules = get_all_imported_modules() - all_available_modules = [] - large_file_free = ['config', 'tokenizer'] - for module in all_imported_modules: - try: - if (hasattr(module, 'from_pretrained') - and 'pretrained_model_name_or_path' in inspect.signature( - module.from_pretrained).parameters): - if any(lf in module.__name__.lower() - for lf in large_file_free): - ignore_file_patterns = { - 'ignore_file_pattern': [ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', - r'\w+\.pt', r'\w+\.h5' - ] - } - else: - ignore_file_patterns = {} - all_available_modules.append( - get_wrapped_class(module, **ignore_file_patterns)) - except (ImportError, AttributeError): - pass + all_available_modules = _patch_pretrained_class(all_imported_modules, wrap=True) for module in all_available_modules: globals()[module.__name__] = module diff --git a/modelscope/utils/hf_util/patcher.py b/modelscope/utils/hf_util/patcher.py index c81c70d9..f88e7d2a 100644 --- a/modelscope/utils/hf_util/patcher.py +++ b/modelscope/utils/hf_util/patcher.py @@ -11,18 +11,20 @@ from typing import BinaryIO, Dict, List, Optional, Union def get_all_imported_modules(): + """Find all modules in transformers/peft/diffusers""" all_imported_modules = [] + transformers_include_names = ['Auto', 'T5', 'BitsAndBytes', 'GenerationConfig', + 'Quant', 'Awq', 'GPTQ', 'BatchFeature', 'Qwen2'] + diffusers_include_names = ['Pipeline'] if importlib.util.find_spec('transformers') is not None: import transformers - extra_modules = ['T5'] lazy_module = sys.modules['transformers'] _import_structure = lazy_module._import_structure for key in _import_structure: values = _import_structure[key] for value in values: # pretrained - if 'auto' in value.lower() or any(m in value - for m in extra_modules): + if any([name in value for name in transformers_include_names]): try: module = importlib.import_module( f'.{key}', transformers.__name__) @@ -46,7 +48,7 @@ def get_all_imported_modules(): for key in _import_structure: values = _import_structure[key] for value in values: - if 'pipeline' in value.lower(): + if any([name in value for name in diffusers_include_names]): try: module = importlib.import_module( f'.{key}', diffusers.__name__) @@ -57,9 +59,11 @@ def get_all_imported_modules(): return all_imported_modules -def _patch_pretrained_class(all_imported_modules): +def _patch_pretrained_class(all_imported_modules, wrap=False): - def get_model_dir(pretrained_model_name_or_path, ignore_file_pattern, + def get_model_dir(pretrained_model_name_or_path, + ignore_file_pattern=None, + allow_file_pattern=None, **kwargs): from modelscope import snapshot_download if not os.path.exists(pretrained_model_name_or_path): @@ -67,14 +71,13 @@ def _patch_pretrained_class(all_imported_modules): model_dir = snapshot_download( pretrained_model_name_or_path, revision=revision, - ignore_file_pattern=ignore_file_pattern) + ignore_file_pattern=ignore_file_pattern, + allow_file_pattern=allow_file_pattern) else: model_dir = pretrained_model_name_or_path return model_dir - ignore_file_pattern = [ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt' - ] + ignore_file_pattern = [r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5'] def patch_pretrained_model_name_or_path(pretrained_model_name_or_path, *model_args, **kwargs): @@ -93,6 +96,88 @@ def _patch_pretrained_class(all_imported_modules): model_dir = get_model_dir(model_id, ignore_file_pattern, **kwargs) return kwargs.pop('ori_func')(model_dir, **kwargs) + def get_wrapped_class(module_class: 'PreTrainedModel', + ignore_file_pattern: Optional[Union[str, List[str]]] = None, + allow_file_pattern: Optional[Union[str, List[str]]] = None, + **kwargs): + """Get a custom wrapper class for auto classes to download the models from the ModelScope hub + Args: + module_class (`PreTrainedModel`): The actual module class + ignore_file_pattern (`str` or `List`, *optional*, default to `None`): + Any file pattern to be ignored, like exact file names or file extensions. + allow_file_pattern (`str` or `List`, *optional*, default to `None`): + Any file pattern to be included, like exact file names or file extensions. + Returns: + The wrapper + """ + + def from_pretrained(model, model_id, *model_args, **kwargs): + model_dir = get_model_dir(model_id, + ignore_file_pattern=ignore_file_pattern, + allow_file_pattern=allow_file_pattern, + **kwargs) + + module_obj = module_class.from_pretrained( + model, model_dir, *model_args, **kwargs) + + return module_obj + + class ClassWrapper(module_class): + + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path, + *model_args, **kwargs): + model_dir = get_model_dir(pretrained_model_name_or_path, + ignore_file_pattern=ignore_file_pattern, + allow_file_pattern=allow_file_pattern, + **kwargs) + + module_obj = module_class.from_pretrained( + model_dir, *model_args, **kwargs) + + if module_class.__name__.startswith('AutoModel'): + module_obj.model_dir = model_dir + return module_obj + + @classmethod + def _get_peft_type(cls, model_id, **kwargs): + model_dir = get_model_dir(model_id, + kwargs.pop('ignore_file_pattern', None), + **kwargs) + + module_obj = module_class._get_peft_type( + model_dir, **kwargs) + return module_obj + + @classmethod + def get_config_dict(cls, pretrained_model_name_or_path, *model_args, **kwargs): + model_dir = get_model_dir(pretrained_model_name_or_path, + kwargs.pop('ignore_file_pattern', None), + **kwargs) + + module_obj = module_class.get_config_dict( + model_dir, *model_args, **kwargs) + return module_obj + + if not hasattr(module_class, 'from_pretrained'): + del ClassWrapper.from_pretrained + else: + parameters = inspect.signature(var.from_pretrained).parameters + if 'model' in parameters and 'model_id' in parameters: + # peft + ClassWrapper.from_pretrained = from_pretrained + + if not hasattr(module_class, '_get_peft_type'): + del ClassWrapper._get_peft_type + + if not hasattr(module_class, 'get_config_dict'): + del ClassWrapper.get_config_dict + + ClassWrapper.__name__ = module_class.__name__ + ClassWrapper.__qualname__ = module_class.__qualname__ + return ClassWrapper + + all_available_modules = [] for var in all_imported_modules: if var is None or not hasattr(var, '__name__'): continue @@ -107,38 +192,53 @@ def _patch_pretrained_class(all_imported_modules): } try: + # some TFxxx classes has import errors has_from_pretrained = hasattr(var, 'from_pretrained') has_get_peft_type = hasattr(var, '_get_peft_type') has_get_config_dict = hasattr(var, 'get_config_dict') except ImportError: continue - if has_from_pretrained and not hasattr(var, '_from_pretrained_origin'): - parameters = inspect.signature(var.from_pretrained).parameters - is_peft = 'model' in parameters and 'model_id' in parameters - var._from_pretrained_origin = var.from_pretrained - if not is_peft: - var.from_pretrained = partial( - patch_pretrained_model_name_or_path, - ori_func=var._from_pretrained_origin, - **ignore_file_pattern_kwargs) - else: - var.from_pretrained = partial( - patch_peft_model_id, - ori_func=var._from_pretrained_origin, - **ignore_file_pattern_kwargs) - if has_get_peft_type and not hasattr(var, '_get_peft_type_origin'): - var._get_peft_type_origin = var._get_peft_type - var._get_peft_type = partial( - _get_peft_type, - ori_func=var._get_peft_type_origin, - **ignore_file_pattern_kwargs) - if has_get_config_dict and not hasattr(var, '_get_config_dict_origin'): - var._get_config_dict_origin = var.get_config_dict - var.get_config_dict = partial( - patch_pretrained_model_name_or_path, - ori_func=var._get_config_dict_origin, - **ignore_file_pattern_kwargs) + if wrap: + try: + if not has_from_pretrained and not has_get_config_dict and not has_get_peft_type: + all_available_modules.append(var) + else: + all_available_modules.append(get_wrapped_class(var, ignore_file_pattern)) + except Exception: + all_available_modules.append(var) + else: + if has_from_pretrained and not hasattr(var, '_from_pretrained_origin'): + parameters = inspect.signature(var.from_pretrained).parameters + # different argument names + is_peft = 'model' in parameters and 'model_id' in parameters + var._from_pretrained_origin = var.from_pretrained + if not is_peft: + var.from_pretrained = partial( + patch_pretrained_model_name_or_path, + ori_func=var._from_pretrained_origin, + **ignore_file_pattern_kwargs) + else: + var.from_pretrained = partial( + patch_peft_model_id, + ori_func=var._from_pretrained_origin, + **ignore_file_pattern_kwargs) + if has_get_peft_type and not hasattr(var, '_get_peft_type_origin'): + var._get_peft_type_origin = var._get_peft_type + var._get_peft_type = partial( + _get_peft_type, + ori_func=var._get_peft_type_origin, + **ignore_file_pattern_kwargs) + + if has_get_config_dict and not hasattr(var, '_get_config_dict_origin'): + var._get_config_dict_origin = var.get_config_dict + var.get_config_dict = partial( + patch_pretrained_model_name_or_path, + ori_func=var._get_config_dict_origin, + **ignore_file_pattern_kwargs) + + all_available_modules.append(var) + return all_available_modules def _unpatch_pretrained_class(all_imported_modules): @@ -167,7 +267,7 @@ def _patch_hub(): import huggingface_hub from huggingface_hub import hf_api from huggingface_hub.hf_api import api - from huggingface_hub.hf_api import CommitInfo, future_compatible + from huggingface_hub.hf_api import future_compatible from modelscope import get_logger logger = get_logger() @@ -258,6 +358,7 @@ def _patch_hub(): @future_compatible def upload_folder( + self, *, repo_id: str, folder_path: Union[str, Path], @@ -269,10 +370,16 @@ def _patch_hub(): ignore_patterns: Optional[Union[List[str], str]] = None, **kwargs, ): - from modelscope.hub.push_to_hub import push_model_to_hub - push_model_to_hub(repo_id, folder_path, path_in_repo, commit_message, - commit_description, token, True, revision, - ignore_patterns) + from modelscope.hub.push_to_hub import push_files_to_hub + push_files_to_hub( + path_or_fileobj=folder_path, + path_in_repo=path_in_repo, + repo_id=repo_id, + commit_message=commit_message, + commit_description=commit_description, + revision=revision, + token=token) + from modelscope.utils.repo_utils import CommitInfo return CommitInfo( commit_url=f'https://www.modelscope.cn/models/{repo_id}/files', commit_message=commit_message, @@ -280,6 +387,8 @@ def _patch_hub(): oid=None, ) + from modelscope.utils.constant import DEFAULT_REPOSITORY_REVISION + @future_compatible def upload_file( self, @@ -288,7 +397,7 @@ def _patch_hub(): path_in_repo: str, repo_id: str, token: Union[str, bool, None] = None, - revision: Optional[str] = None, + revision: Optional[str] = DEFAULT_REPOSITORY_REVISION, commit_message: Optional[str] = None, commit_description: Optional[str] = None, **kwargs, diff --git a/modelscope/utils/test_utils.py b/modelscope/utils/test_utils.py index 3859be61..35c1ad5d 100644 --- a/modelscope/utils/test_utils.py +++ b/modelscope/utils/test_utils.py @@ -29,7 +29,7 @@ TEST_ACCESS_TOKEN1 = os.environ.get('TEST_ACCESS_TOKEN_CITEST', None) TEST_ACCESS_TOKEN2 = os.environ.get('TEST_ACCESS_TOKEN_SDKDEV', None) TEST_MODEL_CHINESE_NAME = '内部测试模型' -TEST_MODEL_ORG = 'citest' +TEST_MODEL_ORG = 'tastelikefeet' def delete_credential(): diff --git a/tests/utils/test_hf_util.py b/tests/utils/test_hf_util.py index 03de5aea..ba2b24d9 100644 --- a/tests/utils/test_hf_util.py +++ b/tests/utils/test_hf_util.py @@ -1,17 +1,51 @@ # Copyright (c) Alibaba, Inc. and its affiliates. - +import os +import shutil +import tempfile import unittest +import uuid +from huggingface_hub import CommitInfo, RepoUrl + +from modelscope import HubApi from modelscope.utils.hf_util.patcher import patch_context +from modelscope.utils.logger import get_logger +from modelscope.utils.test_utils import TEST_MODEL_ORG + +logger = get_logger() class HFUtilTest(unittest.TestCase): def setUp(self): - pass + logger.info('SetUp') + self.api = HubApi() + self.user = TEST_MODEL_ORG + print(self.user) + self.create_model_name = '%s/%s_%s' % (self.user, 'test_model_upload', + uuid.uuid4().hex) + logger.info('create %s' % self.create_model_name) + temporary_dir = tempfile.mkdtemp() + self.work_dir = temporary_dir + self.model_dir = os.path.join(temporary_dir, self.create_model_name) + self.repo_path = os.path.join(self.work_dir, 'repo_path') + self.test_folder = os.path.join(temporary_dir, 'test_folder') + self.test_file1 = os.path.join( + os.path.join(temporary_dir, 'test_folder', '1.json')) + self.test_file2 = os.path.join(os.path.join(temporary_dir, '2.json')) + os.makedirs(self.test_folder, exist_ok=True) + with open(self.test_file1, 'w') as f: + f.write('{}') + with open(self.test_file2, 'w') as f: + f.write('{}') def tearDown(self): - pass + logger.info('TearDown') + shutil.rmtree(self.model_dir, ignore_errors=True) + try: + self.api.delete_model(model_id=self.create_model_name) + except Exception: + pass def test_auto_tokenizer(self): from modelscope import AutoTokenizer @@ -24,7 +58,7 @@ class HFUtilTest(unittest.TestCase): self.assertFalse(tokenizer.is_fast) def test_quantization_import(self): - from modelscope import GPTQConfig, BitsAndBytesConfig + from modelscope import BitsAndBytesConfig self.assertTrue(BitsAndBytesConfig is not None) def test_auto_model(self): @@ -71,6 +105,16 @@ class HFUtilTest(unittest.TestCase): else: self.assertTrue(False) + def test_patch_config_bert(self): + from transformers import BertConfig + try: + BertConfig.from_pretrained( + 'iic/nlp_structbert_sentiment-classification_chinese-tiny') + except Exception: + pass + else: + self.assertTrue(False) + def test_patch_config(self): with patch_context(): from transformers import AutoConfig @@ -85,6 +129,13 @@ class HFUtilTest(unittest.TestCase): else: self.assertTrue(False) + # Test patch again + with patch_context(): + from transformers import AutoConfig + config = AutoConfig.from_pretrained( + 'iic/nlp_structbert_sentiment-classification_chinese-tiny') + self.assertTrue(config is not None) + def test_patch_diffusers(self): with patch_context(): from diffusers import StableDiffusionPipeline @@ -105,6 +156,59 @@ class HFUtilTest(unittest.TestCase): self.assertTrue(hasattr(PeftModel, '_from_pretrained_origin')) self.assertFalse(hasattr(PeftModel, '_from_pretrained_origin')) + def test_patch_file_exists(self): + with patch_context(): + from huggingface_hub import file_exists + self.assertTrue( + file_exists('AI-ModelScope/stable-diffusion-v1-5', + 'feature_extractor/preprocessor_config.json')) + try: + # Import again + from huggingface_hub import file_exists # noqa + file_exists('AI-ModelScope/stable-diffusion-v1-5', + 'feature_extractor/preprocessor_config.json') + except Exception: + pass + else: + self.assertTrue(False) + + def test_patch_file_download(self): + with patch_context(): + from huggingface_hub import hf_hub_download + local_dir = hf_hub_download( + 'AI-ModelScope/stable-diffusion-v1-5', + 'feature_extractor/preprocessor_config.json') + logger.info('patch file_download dir: ' + local_dir) + self.assertTrue(local_dir is not None) + + def test_patch_create_repo(self): + with patch_context(): + from huggingface_hub import create_repo + repo_url: RepoUrl = create_repo(self.create_model_name) + logger.info('patch create repo result: ' + repo_url.repo_id) + self.assertTrue(repo_url is not None) + from huggingface_hub import upload_folder + commit_info: CommitInfo = upload_folder( + repo_id=self.create_model_name, + folder_path=self.test_folder, + path_in_repo='') + logger.info('patch create repo result: ' + commit_info.commit_url) + self.assertTrue(commit_info is not None) + from huggingface_hub import file_exists + self.assertTrue(file_exists(self.create_model_name, '1.json')) + from huggingface_hub import upload_file + commit_info: CommitInfo = upload_file( + path_or_fileobj=self.test_file2, + path_in_repo='test_folder2', + repo_id=self.create_model_name) + self.assertTrue( + file_exists(self.create_model_name, 'test_folder2/2.json')) + + def test_who_am_i(self): + with patch_context(): + from huggingface_hub import whoami + self.assertTrue(whoami()['name'] == self.user) + if __name__ == '__main__': unittest.main() From 1900b574506336b38af0dd1314fa669a3a7a517a Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Sun, 26 Jan 2025 16:53:50 +0800 Subject: [PATCH 16/36] fix --- modelscope/utils/hf_util/auto_class.py | 29 +------- modelscope/utils/hf_util/patcher.py | 92 +++++++++++++++----------- tests/utils/test_hf_util.py | 6 +- 3 files changed, 60 insertions(+), 67 deletions(-) diff --git a/modelscope/utils/hf_util/auto_class.py b/modelscope/utils/hf_util/auto_class.py index 24786188..996d6bd9 100644 --- a/modelscope/utils/hf_util/auto_class.py +++ b/modelscope/utils/hf_util/auto_class.py @@ -1,6 +1,4 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -import inspect -import os from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -71,32 +69,9 @@ if TYPE_CHECKING: else: - class UnsupportedAutoClass: - - def __init__(self, name: str): - self.error_msg =\ - f'{name} is not supported with your installed Transformers version {transformers_version}. ' + \ - 'Please update your Transformers by "pip install transformers -U".' - - def from_pretrained(self, pretrained_model_name_or_path, *model_args, - **kwargs): - raise ImportError(self.error_msg) - - def from_config(self, cls, config): - raise ImportError(self.error_msg) - - def user_agent(invoked_by=None): - from modelscope.utils.constant import Invoke - - if invoked_by is None: - invoked_by = Invoke.PRETRAINED - uagent = '%s/%s' % (Invoke.KEY, invoked_by) - return uagent - from .patcher import get_all_imported_modules, _patch_pretrained_class - - all_imported_modules = get_all_imported_modules() - all_available_modules = _patch_pretrained_class(all_imported_modules, wrap=True) + all_available_modules = _patch_pretrained_class( + get_all_imported_modules(), wrap=True) for module in all_available_modules: globals()[module.__name__] = module diff --git a/modelscope/utils/hf_util/patcher.py b/modelscope/utils/hf_util/patcher.py index f88e7d2a..cc874683 100644 --- a/modelscope/utils/hf_util/patcher.py +++ b/modelscope/utils/hf_util/patcher.py @@ -13,8 +13,10 @@ from typing import BinaryIO, Dict, List, Optional, Union def get_all_imported_modules(): """Find all modules in transformers/peft/diffusers""" all_imported_modules = [] - transformers_include_names = ['Auto', 'T5', 'BitsAndBytes', 'GenerationConfig', - 'Quant', 'Awq', 'GPTQ', 'BatchFeature', 'Qwen2'] + transformers_include_names = [ + 'Auto', 'T5', 'BitsAndBytes', 'GenerationConfig', 'Quant', 'Awq', + 'GPTQ', 'BatchFeature', 'Qwen2' + ] diffusers_include_names = ['Pipeline'] if importlib.util.find_spec('transformers') is not None: import transformers @@ -48,7 +50,8 @@ def get_all_imported_modules(): for key in _import_structure: values = _import_structure[key] for value in values: - if any([name in value for name in diffusers_include_names]): + if any([name in value + for name in diffusers_include_names]): try: module = importlib.import_module( f'.{key}', diffusers.__name__) @@ -60,6 +63,14 @@ def get_all_imported_modules(): def _patch_pretrained_class(all_imported_modules, wrap=False): + """Patch all class to download from modelscope + + Args: + wrap: Wrap the class or monkey patch the original class + + Returns: + The classes after patched + """ def get_model_dir(pretrained_model_name_or_path, ignore_file_pattern=None, @@ -67,39 +78,40 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): **kwargs): from modelscope import snapshot_download if not os.path.exists(pretrained_model_name_or_path): - revision = kwargs.pop('revision', None) model_dir = snapshot_download( pretrained_model_name_or_path, - revision=revision, + revision=kwargs.pop('revision', None), ignore_file_pattern=ignore_file_pattern, allow_file_pattern=allow_file_pattern) else: model_dir = pretrained_model_name_or_path return model_dir - ignore_file_pattern = [r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5'] + ignore_file_pattern = [ + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' + ] def patch_pretrained_model_name_or_path(pretrained_model_name_or_path, *model_args, **kwargs): - model_dir = get_model_dir(pretrained_model_name_or_path, - kwargs.pop('ignore_file_pattern', None), - **kwargs) + """Patch all from_pretrained/get_config_dict""" + model_dir = get_model_dir(pretrained_model_name_or_path, **kwargs) return kwargs.pop('ori_func')(model_dir, *model_args, **kwargs) def patch_peft_model_id(model, model_id, *model_args, **kwargs): - model_dir = get_model_dir(model_id, - kwargs.pop('ignore_file_pattern', None), - **kwargs) + """Patch all peft.from_pretrained""" + model_dir = get_model_dir(model_id, **kwargs) return kwargs.pop('ori_func')(model, model_dir, *model_args, **kwargs) def _get_peft_type(model_id, **kwargs): - model_dir = get_model_dir(model_id, ignore_file_pattern, **kwargs) + """Patch all _get_peft_type""" + model_dir = get_model_dir(model_id, **kwargs) return kwargs.pop('ori_func')(model_dir, **kwargs) - def get_wrapped_class(module_class: 'PreTrainedModel', - ignore_file_pattern: Optional[Union[str, List[str]]] = None, - allow_file_pattern: Optional[Union[str, List[str]]] = None, - **kwargs): + def get_wrapped_class( + module_class: 'PreTrainedModel', + ignore_file_pattern: Optional[Union[str, List[str]]] = None, + allow_file_pattern: Optional[Union[str, List[str]]] = None, + **kwargs): """Get a custom wrapper class for auto classes to download the models from the ModelScope hub Args: module_class (`PreTrainedModel`): The actual module class @@ -108,17 +120,19 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): allow_file_pattern (`str` or `List`, *optional*, default to `None`): Any file pattern to be included, like exact file names or file extensions. Returns: - The wrapper + The wrapped class """ def from_pretrained(model, model_id, *model_args, **kwargs): - model_dir = get_model_dir(model_id, - ignore_file_pattern=ignore_file_pattern, - allow_file_pattern=allow_file_pattern, - **kwargs) + # model is an instance + model_dir = get_model_dir( + model_id, + ignore_file_pattern=ignore_file_pattern, + allow_file_pattern=allow_file_pattern, + **kwargs) - module_obj = module_class.from_pretrained( - model, model_dir, *model_args, **kwargs) + module_obj = module_class.from_pretrained(model, model_dir, + *model_args, **kwargs) return module_obj @@ -127,10 +141,11 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): @classmethod def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): - model_dir = get_model_dir(pretrained_model_name_or_path, - ignore_file_pattern=ignore_file_pattern, - allow_file_pattern=allow_file_pattern, - **kwargs) + model_dir = get_model_dir( + pretrained_model_name_or_path, + ignore_file_pattern=ignore_file_pattern, + allow_file_pattern=allow_file_pattern, + **kwargs) module_obj = module_class.from_pretrained( model_dir, *model_args, **kwargs) @@ -141,18 +156,14 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): @classmethod def _get_peft_type(cls, model_id, **kwargs): - model_dir = get_model_dir(model_id, - kwargs.pop('ignore_file_pattern', None), - **kwargs) - - module_obj = module_class._get_peft_type( - model_dir, **kwargs) + model_dir = get_model_dir(model_id, **kwargs) + module_obj = module_class._get_peft_type(model_dir, **kwargs) return module_obj @classmethod - def get_config_dict(cls, pretrained_model_name_or_path, *model_args, **kwargs): + def get_config_dict(cls, pretrained_model_name_or_path, + *model_args, **kwargs): model_dir = get_model_dir(pretrained_model_name_or_path, - kwargs.pop('ignore_file_pattern', None), **kwargs) module_obj = module_class.get_config_dict( @@ -204,11 +215,13 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): if not has_from_pretrained and not has_get_config_dict and not has_get_peft_type: all_available_modules.append(var) else: - all_available_modules.append(get_wrapped_class(var, ignore_file_pattern)) + all_available_modules.append( + get_wrapped_class(var, ignore_file_pattern)) except Exception: all_available_modules.append(var) else: - if has_from_pretrained and not hasattr(var, '_from_pretrained_origin'): + if has_from_pretrained and not hasattr(var, + '_from_pretrained_origin'): parameters = inspect.signature(var.from_pretrained).parameters # different argument names is_peft = 'model' in parameters and 'model_id' in parameters @@ -230,7 +243,8 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): ori_func=var._get_peft_type_origin, **ignore_file_pattern_kwargs) - if has_get_config_dict and not hasattr(var, '_get_config_dict_origin'): + if has_get_config_dict and not hasattr(var, + '_get_config_dict_origin'): var._get_config_dict_origin = var.get_config_dict var.get_config_dict = partial( patch_pretrained_model_name_or_path, diff --git a/tests/utils/test_hf_util.py b/tests/utils/test_hf_util.py index ba2b24d9..6b5d39ed 100644 --- a/tests/utils/test_hf_util.py +++ b/tests/utils/test_hf_util.py @@ -152,8 +152,12 @@ class HFUtilTest(unittest.TestCase): def test_patch_peft(self): with patch_context(): + from transformers import AutoModelForCausalLM from peft import PeftModel - self.assertTrue(hasattr(PeftModel, '_from_pretrained_origin')) + model = AutoModelForCausalLM.from_pretrained('OpenBMB/MiniCPM3-4B') + model = PeftModel.from_pretrained(model, + 'OpenBMB/MiniCPM3-RAG-LoRA') + self.assertTrue(model is not None) self.assertFalse(hasattr(PeftModel, '_from_pretrained_origin')) def test_patch_file_exists(self): From 22b7b25f44dc2b84edec82146e439b5210042c8e Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Sun, 26 Jan 2025 17:04:26 +0800 Subject: [PATCH 17/36] lint --- modelscope/utils/hf_util/auto_class.py | 2 -- modelscope/utils/test_utils.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/modelscope/utils/hf_util/auto_class.py b/modelscope/utils/hf_util/auto_class.py index 996d6bd9..bdf972cb 100644 --- a/modelscope/utils/hf_util/auto_class.py +++ b/modelscope/utils/hf_util/auto_class.py @@ -2,8 +2,6 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from transformers import __version__ as transformers_version - from transformers import AutoConfig from transformers import AutoFeatureExtractor from transformers import AutoImageProcessor diff --git a/modelscope/utils/test_utils.py b/modelscope/utils/test_utils.py index 35c1ad5d..718ef414 100644 --- a/modelscope/utils/test_utils.py +++ b/modelscope/utils/test_utils.py @@ -29,7 +29,7 @@ TEST_ACCESS_TOKEN1 = os.environ.get('TEST_ACCESS_TOKEN_CITEST', None) TEST_ACCESS_TOKEN2 = os.environ.get('TEST_ACCESS_TOKEN_SDKDEV', None) TEST_MODEL_CHINESE_NAME = '内部测试模型' -TEST_MODEL_ORG = 'tastelikefeet' +TEST_MODEL_ORG = os.environ.get('TEST_MODEL_ORG', 'citest') def delete_credential(): From d0cccf64afba01a321f0abdba047b322f5144327 Mon Sep 17 00:00:00 2001 From: tastelikefeet Date: Sun, 26 Jan 2025 18:31:41 +0800 Subject: [PATCH 18/36] fix --- modelscope/utils/hf_util/patcher.py | 38 +++++++++++++++++++++-------- tests/utils/test_hf_util.py | 7 +++--- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/modelscope/utils/hf_util/patcher.py b/modelscope/utils/hf_util/patcher.py index cc874683..a51f8911 100644 --- a/modelscope/utils/hf_util/patcher.py +++ b/modelscope/utils/hf_util/patcher.py @@ -78,9 +78,10 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): **kwargs): from modelscope import snapshot_download if not os.path.exists(pretrained_model_name_or_path): + revision = kwargs.pop('revision', None) model_dir = snapshot_download( pretrained_model_name_or_path, - revision=kwargs.pop('revision', None), + revision=revision, ignore_file_pattern=ignore_file_pattern, allow_file_pattern=allow_file_pattern) else: @@ -88,23 +89,33 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): return model_dir ignore_file_pattern = [ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5', + r'\w+\.ckpt' ] def patch_pretrained_model_name_or_path(pretrained_model_name_or_path, *model_args, **kwargs): """Patch all from_pretrained/get_config_dict""" - model_dir = get_model_dir(pretrained_model_name_or_path, **kwargs) + model_dir = get_model_dir(pretrained_model_name_or_path, + kwargs.pop('ignore_file_pattern', None), + kwargs.pop('allow_file_pattern', None), + **kwargs) return kwargs.pop('ori_func')(model_dir, *model_args, **kwargs) def patch_peft_model_id(model, model_id, *model_args, **kwargs): """Patch all peft.from_pretrained""" - model_dir = get_model_dir(model_id, **kwargs) + model_dir = get_model_dir(model_id, + kwargs.pop('ignore_file_pattern', None), + kwargs.pop('allow_file_pattern', None), + **kwargs) return kwargs.pop('ori_func')(model, model_dir, *model_args, **kwargs) def _get_peft_type(model_id, **kwargs): """Patch all _get_peft_type""" - model_dir = get_model_dir(model_id, **kwargs) + model_dir = get_model_dir(model_id, + kwargs.pop('ignore_file_pattern', None), + kwargs.pop('allow_file_pattern', None), + **kwargs) return kwargs.pop('ori_func')(model_dir, **kwargs) def get_wrapped_class( @@ -156,15 +167,22 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): @classmethod def _get_peft_type(cls, model_id, **kwargs): - model_dir = get_model_dir(model_id, **kwargs) + model_dir = get_model_dir( + model_id, + ignore_file_pattern=ignore_file_pattern, + allow_file_pattern=allow_file_pattern, + **kwargs) module_obj = module_class._get_peft_type(model_dir, **kwargs) return module_obj @classmethod def get_config_dict(cls, pretrained_model_name_or_path, *model_args, **kwargs): - model_dir = get_model_dir(pretrained_model_name_or_path, - **kwargs) + model_dir = get_model_dir( + pretrained_model_name_or_path, + ignore_file_pattern=ignore_file_pattern, + allow_file_pattern=allow_file_pattern, + **kwargs) module_obj = module_class.get_config_dict( model_dir, *model_args, **kwargs) @@ -194,7 +212,7 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): continue name = var.__name__ need_model = 'model' in name.lower() or 'processor' in name.lower( - ) or 'extractor' in name.lower() + ) or 'extractor' in name.lower() or 'pipeline' in name.lower() if need_model: ignore_file_pattern_kwargs = {} else: @@ -216,7 +234,7 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): all_available_modules.append(var) else: all_available_modules.append( - get_wrapped_class(var, ignore_file_pattern)) + get_wrapped_class(var, **ignore_file_pattern_kwargs)) except Exception: all_available_modules.append(var) else: diff --git a/tests/utils/test_hf_util.py b/tests/utils/test_hf_util.py index 6b5d39ed..87650c5c 100644 --- a/tests/utils/test_hf_util.py +++ b/tests/utils/test_hf_util.py @@ -154,9 +154,10 @@ class HFUtilTest(unittest.TestCase): with patch_context(): from transformers import AutoModelForCausalLM from peft import PeftModel - model = AutoModelForCausalLM.from_pretrained('OpenBMB/MiniCPM3-4B') - model = PeftModel.from_pretrained(model, - 'OpenBMB/MiniCPM3-RAG-LoRA') + model = AutoModelForCausalLM.from_pretrained( + 'OpenBMB/MiniCPM3-4B', trust_remote_code=True) + model = PeftModel.from_pretrained( + model, 'OpenBMB/MiniCPM3-RAG-LoRA', trust_remote_code=True) self.assertTrue(model is not None) self.assertFalse(hasattr(PeftModel, '_from_pretrained_origin')) From cf6aa132ded2537e802c0f5fcfcfb9c7bae86a69 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Sun, 26 Jan 2025 18:35:45 +0800 Subject: [PATCH 19/36] remove useless file --- tests/hub/test_patch_hf.py | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 tests/hub/test_patch_hf.py diff --git a/tests/hub/test_patch_hf.py b/tests/hub/test_patch_hf.py deleted file mode 100644 index 13754923..00000000 --- a/tests/hub/test_patch_hf.py +++ /dev/null @@ -1,16 +0,0 @@ -import unittest - -from modelscope.msdatasets import MsDataset -from modelscope.utils.test_utils import test_level - - -class DownloadDatasetTest(unittest.TestCase): - - def setUp(self): - from modelscope.utils.hf_util import patch_hub - patch_hub() - - def test_automodel_download(self): - from transformers import AutoModel - model = AutoModel.from_pretrained('AI-ModelScope/bert-base-uncased') - self.assertTrue(model is not None) From 565e45370e3b828f7d11b6202ae25a443324cae8 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Sun, 26 Jan 2025 18:50:28 +0800 Subject: [PATCH 20/36] add tests --- tests/utils/test_hf_util.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/utils/test_hf_util.py b/tests/utils/test_hf_util.py index 87650c5c..f93969fa 100644 --- a/tests/utils/test_hf_util.py +++ b/tests/utils/test_hf_util.py @@ -150,6 +150,11 @@ class HFUtilTest(unittest.TestCase): else: self.assertTrue(False) + from modelscope import StableDiffusionPipeline + pipe = StableDiffusionPipeline.from_pretrained( + 'AI-ModelScope/stable-diffusion-v1-5') + self.assertTrue(pipe is not None) + def test_patch_peft(self): with patch_context(): from transformers import AutoModelForCausalLM From 816efec5d17ddb7c7aeb97d72c71df48ca19f918 Mon Sep 17 00:00:00 2001 From: suluyan Date: Mon, 27 Jan 2025 10:19:35 +0800 Subject: [PATCH 21/36] tmp for hf wrapper --- modelscope/pipelines/builder.py | 14 ++- modelscope/utils/hf_util.py | 166 +++++++++++++++++++++++++++++++- 2 files changed, 175 insertions(+), 5 deletions(-) diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index b07ad315..6f2de064 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -76,7 +76,7 @@ def pipeline(task: str = None, config_file: str = None, pipeline_name: str = None, framework: str = None, - device: str = 'gpu', + device: str = None, model_revision: Optional[str] = DEFAULT_MODEL_REVISION, ignore_file_pattern: List[str] = None, **kwargs) -> Pipeline: @@ -174,9 +174,13 @@ def pipeline(task: str = None, if not hasattr(first_model, 'pipeline'): # model is instantiated by user, we should parse config again cfg = read_config(first_model.model_dir) - check_config(cfg) - first_model.pipeline = cfg.pipeline - pipeline_props = first_model.pipeline + try: + check_config(cfg) + first_model.pipeline = cfg.pipeline + except AssertionError as e: + logger.info(str(e)) + if first_model.__dict__.get('pipeline'): + pipeline_props = first_model.pipeline else: pipeline_name, default_model_repo = get_default_pipeline_info(task) model = normalize_model_input(default_model_repo, model_revision) @@ -192,6 +196,8 @@ def pipeline(task: str = None, device=device, **kwargs) + if not device: + device = 'gpu' pipeline_props['model'] = model pipeline_props['device'] = device cfg = ConfigDict(pipeline_props) diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py index 9b9d4e2e..0459a217 100644 --- a/modelscope/utils/hf_util.py +++ b/modelscope/utils/hf_util.py @@ -63,11 +63,13 @@ from transformers import (PretrainedConfig, PreTrainedModel, PreTrainedTokenizerBase) from transformers import T5EncoderModel as T5EncoderModelHF from transformers import __version__ as transformers_version -from transformers import pipeline as hf_pipeline +from transformers import pipeline +from transformers import Pipeline as PipelineHF from modelscope import snapshot_download from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke from .logger import get_logger +from ..pipelines.multi_modal.disco_guided_diffusion_pipeline.utils import NoneClass try: from transformers import GPTQConfig as GPTQConfigHF @@ -78,6 +80,168 @@ except ImportError: logger = get_logger() +def _get_hf_device(device): + if device + return device + +def _get_hf_pipeline_class(): + return NoneClass + +def _wrapper_hf_pipeline_class(hf_pipeline_class: PipelineHF): + class HFPipelineWrapper(PipelineHF): + def save_pretrained( + self, + save_directory: Union[str, os.PathLike], + safe_serialization: bool = True, + **kwargs, + ): + push_to_hub = kwargs.get('push_to_hub', False) + if push_to_hub: + kwargs.pop('push_to_hub') + + + super().save_pretrained(self, + save_directory=save_directory, + safe_serialization=safe_serialization, + push_to_hub=False, + **kwargs) + + def _upload_modified_files( + self, + working_dir: Union[str, os.PathLike], + repo_id: str, + files_timestamps: Dict[str, float], + commit_message: Optional[str] = None, + token: Optional[Union[bool, str]] = None, + create_pr: bool = False, + revision: str = None, + commit_description: str = None, + ): + """ + Uploads all modified files in `working_dir` to `repo_id`, based on `files_timestamps`. + """ + if commit_message is None: + if "Model" in self.__class__.__name__: + commit_message = "Upload model" + elif "Config" in self.__class__.__name__: + commit_message = "Upload config" + elif "Tokenizer" in self.__class__.__name__: + commit_message = "Upload tokenizer" + elif "FeatureExtractor" in self.__class__.__name__: + commit_message = "Upload feature extractor" + elif "Processor" in self.__class__.__name__: + commit_message = "Upload processor" + else: + commit_message = f"Upload {self.__class__.__name__}" + modified_files = [ + f + for f in os.listdir(working_dir) + if f not in files_timestamps or os.path.getmtime(os.path.join(working_dir, f)) > files_timestamps[f] + ] + + # filter for actual files + folders at the root level + modified_files = [ + f + for f in modified_files + if os.path.isfile(os.path.join(working_dir, f)) or os.path.isdir(os.path.join(working_dir, f)) + ] + + operations = [] + # upload standalone files + for file in modified_files: + if os.path.isdir(os.path.join(working_dir, file)): + # go over individual files of folder + for f in os.listdir(os.path.join(working_dir, file)): + operations.append( + CommitOperationAdd( + path_or_fileobj=os.path.join(working_dir, file, f), path_in_repo=os.path.join(file, f) + ) + ) + else: + operations.append( + CommitOperationAdd(path_or_fileobj=os.path.join(working_dir, file), path_in_repo=file) + ) + + if revision is not None and not revision.startswith("refs/pr"): + try: + create_branch(repo_id=repo_id, branch=revision, token=token, exist_ok=True) + except HfHubHTTPError as e: + if e.response.status_code == 403 and create_pr: + # If we are creating a PR on a repo we don't have access to, we can't create the branch. + # so let's assume the branch already exists. If it's not the case, an error will be raised when + # calling `create_commit` below. + pass + else: + raise + + logger.info(f"Uploading the following files to {repo_id}: {','.join(modified_files)}") + return create_commit( + repo_id=repo_id, + operations=operations, + commit_message=commit_message, + commit_description=commit_description, + token=token, + create_pr=create_pr, + revision=revision, + ) + + def _create_repo(self, + repo_id: str, + private: Optional[bool] = None, + token: Optional[Union[bool, str]] = None, + repo_url: Optional[str] = None, + organization: Optional[str] = None, + ) -> str: + """ + Create the repo if needed, cleans up repo_id with deprecated kwargs `repo_url` and `organization`, retrieves + the token. + """ + if repo_url is not None: + warnings.warn( + "The `repo_url` argument is deprecated and will be removed in v5 of Transformers. Use `repo_id` " + "instead." + ) + if repo_id is not None: + raise ValueError( + "`repo_id` and `repo_url` are both specified. Please set only the argument `repo_id`." + ) + repo_id = repo_url.replace(f"{HUGGINGFACE_CO_RESOLVE_ENDPOINT}/", "") + if organization is not None: + warnings.warn( + "The `organization` argument is deprecated and will be removed in v5 of Transformers. Set your " + "organization directly in the `repo_id` passed instead (`repo_id={organization}/{model_id}`)." + ) + if not repo_id.startswith(organization): + if "/" in repo_id: + repo_id = repo_id.split("/")[-1] + repo_id = f"{organization}/{repo_id}" + + url = create_repo(repo_id=repo_id, token=token, private=private, exist_ok=True) + return url.repo_id + +def hf_pipeline( + task: str = None, + model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, + framework: Optional[str] = None, + device: Optional[Union[int, str, "torch.device"]] = None, + **kwargs, +)->PipelineHF: + if isinstance(model, str): + if not os.path.exists(model): + model = snapshot_download(model) + + framework = 'pt' if framework == 'pytorch' else framework + + device = _get_hf_device(device) + hf_pipeline_class = _get_hf_pipeline_class() + wrapped_pipeline_class = _wrapper_hf_pipeline_class(hf_pipeline_class) + + return pipeline(task=task, + model=model, + framework=framework, + device=device, + pipeline_class=wrapped_pipeline_class, + **kwargs) class UnsupportedAutoClass: From eaf4ea8f2198f5c940484c088efde99ac19b2d08 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Mon, 27 Jan 2025 21:46:34 +0800 Subject: [PATCH 22/36] fix comments --- modelscope/hub/api.py | 40 ++++++++++++++---- modelscope/hub/create_model.py | 65 ----------------------------- modelscope/hub/push_to_hub.py | 7 +++- modelscope/utils/hf_util/patcher.py | 18 ++++---- 4 files changed, 49 insertions(+), 81 deletions(-) delete mode 100644 modelscope/hub/create_model.py diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index 63f954d6..694c56f5 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -9,6 +9,7 @@ import pickle import platform import re import shutil +import tempfile import uuid from collections import defaultdict from http import HTTPStatus @@ -47,7 +48,9 @@ from modelscope.hub.errors import (InvalidParameter, NotExistError, raise_for_http_status, raise_on_error) from modelscope.hub.git import GitCommandWrapper from modelscope.hub.repository import Repository -from modelscope.hub.utils.utils import (get_endpoint, get_readable_folder_size, +from modelscope.hub.utils.utils import (add_patterns_to_file, + add_patterns_to_gitattributes, + get_endpoint, get_readable_folder_size, get_release_datetime, model_id_to_group_owner_name) from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, @@ -158,6 +161,7 @@ class HubApi: self.login(access_token) return True except AssertionError: + logger.warning('Login failed.') return False def create_model(self, @@ -1210,21 +1214,22 @@ class HubApi: repo_type: Optional[str] = REPO_TYPE_MODEL, chinese_name: Optional[str] = '', license: Optional[str] = Licenses.APACHE_V2, + **kwargs, ) -> str: # TODO: exist_ok - if not repo_id: raise ValueError('Repo id cannot be empty!') - if token: - self.login(access_token=token) - else: - logger.warning('No token provided, will use the cached token.') + self.try_login(token) + if '/' not in repo_id: + user_name = ModelScopeConfig.get_user_info()[0] + assert isinstance(user_name, str) + repo_id = f'{user_name}/{repo_id}' + logger.info( + f"'/' not in hub_model_id, pushing to personal repo {repo_id}") repo_id_list = repo_id.split('/') - if len(repo_id_list) != 2: - raise ValueError('Invalid repo id, should be in the format of `owner_name/repo_name`') namespace, repo_name = repo_id_list if repo_type == REPO_TYPE_MODEL: @@ -1240,6 +1245,25 @@ class HubApi: chinese_name=chinese_name, ) + with tempfile.TemporaryDirectory() as temp_cache_dir: + from modelscope.hub.repository import Repository + repo = Repository(temp_cache_dir, repo_id) + add_patterns_to_gitattributes( + repo, ['*.safetensors', '*.bin', '*.pt', '*.gguf']) + default_config = { + 'framework': 'pytorch', + 'task': 'text-generation', + 'allow_remote': True + } + config_json = kwargs.get('config_json') + if not config_json: + config_json = {} + config = {**default_config, **config_json} + add_patterns_to_file( + repo, + 'configuration.json', [json.dumps(config)], + ignore_push_error=True) + elif repo_type == REPO_TYPE_DATASET: visibilities = {k: v for k, v in DatasetVisibility.__dict__.items() if not k.startswith('__')} visibility: int = visibilities.get(visibility.upper()) diff --git a/modelscope/hub/create_model.py b/modelscope/hub/create_model.py deleted file mode 100644 index b1811acc..00000000 --- a/modelscope/hub/create_model.py +++ /dev/null @@ -1,65 +0,0 @@ -import tempfile -from typing import Any, Dict, Optional - -import json -from requests.exceptions import HTTPError - -from modelscope.hub.api import HubApi, ModelScopeConfig -from modelscope.hub.constants import ModelVisibility -from modelscope.utils.logger import get_logger -from .utils.utils import add_patterns_to_file, add_patterns_to_gitattributes - -logger = get_logger() - - -def create_model_repo(repo_id: str, - token: Optional[str] = None, - private: bool = False, - config_json: Optional[Dict[str, Any]] = None) -> str: - """Create model repo and create .gitattributes file and .gitignore file - - Args: - repo_id(str): The repo id - token(str, Optional): The access token of the user - private(bool): If is a private repo, default False - config_json(Dict[str, Any]): An optional config_json to fill into the configuration.json file, - If None, the default content will be uploaded: - ```json - {"framework": "pytorch", "task": "text-generation", "allow_remote": True} - ``` - You can manually modify this in the modelhub. - """ - api = HubApi() - assert repo_id is not None, 'Please enter a valid repo id' - api.try_login(token) - visibility = ModelVisibility.PRIVATE if private else ModelVisibility.PUBLIC - if '/' not in repo_id: - user_name = ModelScopeConfig.get_user_info()[0] - assert isinstance(user_name, str) - repo_id = f'{user_name}/{repo_id}' - logger.info( - f"'/' not in hub_model_id, pushing to personal repo {repo_id}") - try: - api.create_model(repo_id, visibility) - except HTTPError: - # The remote repository has been created - pass - - with tempfile.TemporaryDirectory() as temp_cache_dir: - from modelscope.hub.repository import Repository - repo = Repository(temp_cache_dir, repo_id) - add_patterns_to_gitattributes( - repo, ['*.safetensors', '*.bin', '*.pt', '*.gguf']) - default_config = { - 'framework': 'pytorch', - 'task': 'text-generation', - 'allow_remote': True - } - if not config_json: - config_json = {} - config = {**default_config, **config_json} - add_patterns_to_file( - repo, - 'configuration.json', [json.dumps(config)], - ignore_push_error=True) - return repo_id diff --git a/modelscope/hub/push_to_hub.py b/modelscope/hub/push_to_hub.py index fdd4a17f..3dc70b1d 100644 --- a/modelscope/hub/push_to_hub.py +++ b/modelscope/hub/push_to_hub.py @@ -24,7 +24,7 @@ _tasks = dict() _manager = None -def push_files_to_hub( +def _push_files_to_hub( path_or_fileobj: Union[str, Path], path_in_repo: str, repo_id: str, @@ -33,6 +33,11 @@ def push_files_to_hub( commit_message: Optional[str] = None, commit_description: Optional[str] = None, ): + """Push files to model hub incrementally + + This function if used for patch_hub, user is not recommended to call this. + This function will be merged to push_to_hub in later sprints. + """ if not os.path.exists(path_or_fileobj): return diff --git a/modelscope/utils/hf_util/patcher.py b/modelscope/utils/hf_util/patcher.py index a51f8911..93d0af1b 100644 --- a/modelscope/utils/hf_util/patcher.py +++ b/modelscope/utils/hf_util/patcher.py @@ -383,8 +383,12 @@ def _patch_hub(): Returns: RepoUrl: The URL of the created repository. """ - from modelscope.hub.create_model import create_model_repo - hub_model_id = create_model_repo(repo_id, token, private) + from modelscope.hub.api import HubApi + api = HubApi() + from modelscope.hub.constants import ModelVisibility + visibility = ModelVisibility.PRIVATE if private else ModelVisibility.PUBLIC + hub_model_id = api.create_repo( + repo_id, token=token, visibility=visibility, **kwargs) from huggingface_hub import RepoUrl return RepoUrl(url=hub_model_id, ) @@ -402,8 +406,8 @@ def _patch_hub(): ignore_patterns: Optional[Union[List[str], str]] = None, **kwargs, ): - from modelscope.hub.push_to_hub import push_files_to_hub - push_files_to_hub( + from modelscope.hub.push_to_hub import _push_files_to_hub + _push_files_to_hub( path_or_fileobj=folder_path, path_in_repo=path_in_repo, repo_id=repo_id, @@ -434,9 +438,9 @@ def _patch_hub(): commit_description: Optional[str] = None, **kwargs, ): - from modelscope.hub.push_to_hub import push_files_to_hub - push_files_to_hub(path_or_fileobj, path_in_repo, repo_id, token, - revision, commit_message, commit_description) + from modelscope.hub.push_to_hub import _push_files_to_hub + _push_files_to_hub(path_or_fileobj, path_in_repo, repo_id, token, + revision, commit_message, commit_description) # Patch repocard.validate from huggingface_hub import repocard From 462eaab3cff83d7fadc7ed0846c8911a11ced15b Mon Sep 17 00:00:00 2001 From: suluyan Date: Tue, 11 Feb 2025 10:25:41 +0800 Subject: [PATCH 23/36] runable --- modelscope/hub/git.py | 1 + modelscope/utils/hf_util/__init__.py | 1 + modelscope/utils/hf_util/patcher.py | 106 ++++++++++++++++++- modelscope/utils/hf_util/pipeline_builder.py | 49 +++++++++ 4 files changed, 153 insertions(+), 4 deletions(-) create mode 100644 modelscope/utils/hf_util/pipeline_builder.py diff --git a/modelscope/hub/git.py b/modelscope/hub/git.py index 144d9d69..d03ca773 100644 --- a/modelscope/hub/git.py +++ b/modelscope/hub/git.py @@ -46,6 +46,7 @@ class GitCommandWrapper(metaclass=Singleton): git_env = os.environ.copy() git_env['GIT_TERMINAL_PROMPT'] = '0' command = [self.git_path, *args] + command = [item for item in command if item] response = subprocess.run( command, stdout=subprocess.PIPE, diff --git a/modelscope/utils/hf_util/__init__.py b/modelscope/utils/hf_util/__init__.py index a138ff7a..c9d1883b 100644 --- a/modelscope/utils/hf_util/__init__.py +++ b/modelscope/utils/hf_util/__init__.py @@ -1,2 +1,3 @@ from .auto_class import * from .patcher import patch_context, patch_hub, unpatch_hub +from .pipeline_builder import hf_pipeline \ No newline at end of file diff --git a/modelscope/utils/hf_util/patcher.py b/modelscope/utils/hf_util/patcher.py index 4eb82acb..5fcd390e 100644 --- a/modelscope/utils/hf_util/patcher.py +++ b/modelscope/utils/hf_util/patcher.py @@ -26,7 +26,7 @@ def get_all_imported_modules(): all_imported_modules = [] transformers_include_names = [ 'Auto', 'T5', 'BitsAndBytes', 'GenerationConfig', 'Quant', 'Awq', - 'GPTQ', 'BatchFeature', 'Qwen', 'Llama' + 'GPTQ', 'BatchFeature', 'Qwen', 'Llama', 'Pipeline' ] diffusers_include_names = ['Pipeline'] if importlib.util.find_spec('transformers') is not None: @@ -144,6 +144,35 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): **kwargs) return kwargs.pop('ori_func')(model_dir, **kwargs) + def save_pretrained(save_directory: Union[str, os.PathLike], + safe_serialization: bool = True, + **kwargs): + obj = kwargs.pop('obj') + push_to_hub = kwargs.pop('push_to_hub', False) + + obj._save_pretrained_origin(obj, + save_directory=save_directory, + safe_serialization=safe_serialization, + push_to_hub=False, + **kwargs) + + # Class members may be unpatched, so push_to_hub is done separately here + if push_to_hub: + from modelscope.hub.push_to_hub import push_to_hub + from modelscope.hub.api import HubApi + api = HubApi() + + token = kwargs.get("token") + commit_message = kwargs.pop("commit_message", None) + repo_name = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1]) + api.create_repo(repo_name, **kwargs) + + push_to_hub(repo_name=repo_name, + output_dir=save_directory, + commit_message=commit_message, + token=token) + #return kwargs.pop('ori_func')(obj, save_directory, safe_serialization, **kwargs) + def get_wrapped_class( module_class: 'PreTrainedModel', ignore_file_pattern: Optional[Union[str, List[str]]] = None, @@ -214,6 +243,56 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): model_dir, *model_args, **kwargs) return module_obj + def save_pretrained( + self, + save_directory: Union[str, os.PathLike], + safe_serialization: bool = True, + **kwargs, + ): + push_to_hub = kwargs.pop('push_to_hub', False) + if push_to_hub: + import json + from modelscope.hub.repository import Repository + from modelscope.hub.utils.utils import add_content_to_file + from modelscope.hub.push_to_hub import push_to_hub + from modelscope.hub.api import HubApi + api = HubApi() + + token = kwargs.get("token") + api.login(token) + commit_message = kwargs.pop("commit_message", None) + repo_name = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1]) + api.create_repo(repo_name) + repo = Repository(save_directory, repo_name) + default_config = { + 'framework': 'pytorch', + 'task': 'text-generation', + 'allow_remote': True + } + config_json = kwargs.get('config_json') + if not config_json: + config_json = {} + config = {**default_config, **config_json} + add_content_to_file( + repo, + 'configuration.json', [json.dumps(config)], + ignore_push_error=True) + super().save_pretrained(save_directory=save_directory, + safe_serialization=safe_serialization, + push_to_hub=False, + **kwargs) + + # Class members may be unpatched, so push_to_hub is done separately here + if push_to_hub: + + #api.create_repo(repo_name, **kwargs) + + + push_to_hub(repo_name=repo_name, + output_dir=save_directory, + commit_message=commit_message, + token=token) + if not hasattr(module_class, 'from_pretrained'): del ClassWrapper.from_pretrained else: @@ -228,6 +307,9 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): if not hasattr(module_class, 'get_config_dict'): del ClassWrapper.get_config_dict + if not hasattr(module_class, 'save_pretrained'): + del ClassWrapper.save_pretrained + ClassWrapper.__name__ = module_class.__name__ ClassWrapper.__qualname__ = module_class.__qualname__ return ClassWrapper @@ -251,17 +333,21 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): has_from_pretrained = hasattr(var, 'from_pretrained') has_get_peft_type = hasattr(var, '_get_peft_type') has_get_config_dict = hasattr(var, 'get_config_dict') + has_save_pretrained = hasattr(var, 'save_pretrained') except ImportError: continue - if wrap: + # save_pretrained is not a classmethod and cannot be overridden by replacing the class method. It requires replacing the class object method. + if wrap or ('pipeline' in name.lower() and has_save_pretrained): + print(f'var wrap: {var}') try: - if not has_from_pretrained and not has_get_config_dict and not has_get_peft_type: + if not has_from_pretrained and not has_get_config_dict and not has_get_peft_type and not has_save_pretrained: all_available_modules.append(var) else: all_available_modules.append( get_wrapped_class(var, **ignore_file_pattern_kwargs)) - except Exception: + except Exception as e: + print(f'wrap failed: {e}') all_available_modules.append(var) else: if has_from_pretrained and not hasattr(var, @@ -295,6 +381,14 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): ori_func=var._get_config_dict_origin, **ignore_file_pattern_kwargs) + if has_save_pretrained and not hasattr(var, '_save_pretrained_origin'): + var._save_pretrained_origin = var.save_pretrained + var.save_pretrained = partial( + save_pretrained, + ori_func=var._save_pretrained_origin, + obj=var, + **ignore_file_pattern_kwargs) + all_available_modules.append(var) return all_available_modules @@ -308,6 +402,7 @@ def _unpatch_pretrained_class(all_imported_modules): has_from_pretrained = hasattr(var, 'from_pretrained') has_get_peft_type = hasattr(var, '_get_peft_type') has_get_config_dict = hasattr(var, 'get_config_dict') + has_save_pretrained = hasattr(var, 'save_pretrained') except ImportError: continue if has_from_pretrained and hasattr(var, '_from_pretrained_origin'): @@ -319,6 +414,9 @@ def _unpatch_pretrained_class(all_imported_modules): if has_get_config_dict and hasattr(var, '_get_config_dict_origin'): var.get_config_dict = var._get_config_dict_origin delattr(var, '_get_config_dict_origin') + if has_save_pretrained and hasattr(var, '_save_pretrained_origin'): + var.save_pretrained = var._save_pretrained_origin + delattr(var, '_save_pretrained_origin') def _patch_hub(): diff --git a/modelscope/utils/hf_util/pipeline_builder.py b/modelscope/utils/hf_util/pipeline_builder.py new file mode 100644 index 00000000..c7c240ad --- /dev/null +++ b/modelscope/utils/hf_util/pipeline_builder.py @@ -0,0 +1,49 @@ +from typing import Optional, Union +import os +import torch +from modelscope.hub import snapshot_download +from transformers import (TFPreTrainedModel, PreTrainedModel, pipeline) +from transformers.pipelines import get_task, check_task +from transformers import Pipeline as PipelineHF +from modelscope.utils.hf_util.patcher import patch_hub, _patch_pretrained_class + +def _get_hf_device(device): + if isinstance(device, str): + device_name = device.lower() + eles = device_name.split(':') + if eles[0] == 'gpu': + eles = ['cuda'] + eles[1:] + device = ''.join(eles) + return device + +def _get_hf_pipeline_class(task, model): + if not task: + task = get_task(model) + normalized_task, targeted_task, task_options = check_task(task) + pipeline_class = targeted_task["impl"] + pipeline_class = _patch_pretrained_class([pipeline_class])[0] + return pipeline_class + +def hf_pipeline( + task: str = None, + model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, + framework: Optional[str] = None, + device: Optional[Union[int, str, "torch.device"]] = None, + **kwargs, +) -> PipelineHF: + if isinstance(model, str): + if not os.path.exists(model): + model = snapshot_download(model) + + framework = 'pt' if framework == 'pytorch' else framework + + device = _get_hf_device(device) + pipeline_class = _get_hf_pipeline_class(task, model) + + return pipeline(task=task, + model=model, + framework=framework, + device=device, + pipeline_class=pipeline_class, + #pipeline_class=QuestionAnsweringPipeline, + **kwargs) From a742035f6eab40013adf58930df0847e846b6165 Mon Sep 17 00:00:00 2001 From: suluyan Date: Tue, 11 Feb 2025 10:45:28 +0800 Subject: [PATCH 24/36] fix auto-merge --- modelscope/hub/api.py | 13 +++---------- modelscope/hub/utils/utils.py | 1 - modelscope/utils/hf_util/patcher.py | 2 -- 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index 559b567e..88875bfc 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -121,6 +121,7 @@ class HubApi: Args: access_token (str): user access token on modelscope, set this argument or set `MODELSCOPE_API_TOKEN`. If neither of the tokens exist, login will directly return. + Returns: cookies: to authenticate yourself to ModelScope open-api git_token: token to access your git repository. @@ -154,16 +155,6 @@ class HubApi: return d[API_RESPONSE_FIELD_DATA][ API_RESPONSE_FIELD_GIT_ACCESS_TOKEN], cookies - def try_login(self, access_token: Optional[str] = None) -> bool: - """Wraps the `login` method and returns bool. - """ - try: - self.login(access_token) - return True - except AssertionError: - logger.warning('Login failed.') - return False - def create_model(self, model_id: str, visibility: Optional[int] = ModelVisibility.PUBLIC, @@ -1224,6 +1215,8 @@ class HubApi: self.login(access_token=token) repo_id_list = repo_id.split('/') + if len(repo_id_list) != 2: + raise ValueError('Invalid repo id, should be in the format of `owner_name/repo_name`') namespace, repo_name = repo_id_list if repo_type == REPO_TYPE_MODEL: diff --git a/modelscope/hub/utils/utils.py b/modelscope/hub/utils/utils.py index 1d05297d..3ad96fe2 100644 --- a/modelscope/hub/utils/utils.py +++ b/modelscope/hub/utils/utils.py @@ -186,4 +186,3 @@ def add_content_to_file(repo, pass else: raise e - diff --git a/modelscope/utils/hf_util/patcher.py b/modelscope/utils/hf_util/patcher.py index 5fcd390e..138550ea 100644 --- a/modelscope/utils/hf_util/patcher.py +++ b/modelscope/utils/hf_util/patcher.py @@ -339,7 +339,6 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): # save_pretrained is not a classmethod and cannot be overridden by replacing the class method. It requires replacing the class object method. if wrap or ('pipeline' in name.lower() and has_save_pretrained): - print(f'var wrap: {var}') try: if not has_from_pretrained and not has_get_config_dict and not has_get_peft_type and not has_save_pretrained: all_available_modules.append(var) @@ -347,7 +346,6 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): all_available_modules.append( get_wrapped_class(var, **ignore_file_pattern_kwargs)) except Exception as e: - print(f'wrap failed: {e}') all_available_modules.append(var) else: if has_from_pretrained and not hasattr(var, From 3b9c5a095beaacf4369c594d5668fe0c199bb8d7 Mon Sep 17 00:00:00 2001 From: suluyan Date: Tue, 11 Feb 2025 11:01:11 +0800 Subject: [PATCH 25/36] fix lint --- modelscope/pipelines/builder.py | 16 ++-- modelscope/utils/hf_util/__init__.py | 2 +- modelscope/utils/hf_util/patcher.py | 79 +++++++++++--------- modelscope/utils/hf_util/pipeline_builder.py | 35 +++++---- 4 files changed, 71 insertions(+), 61 deletions(-) diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index 6f2de064..e6f5fedf 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -10,12 +10,11 @@ from modelscope.utils.config import ConfigDict, check_config from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, Invoke, Tasks, ThirdParty) from modelscope.utils.hub import read_config +from modelscope.utils.import_utils import is_transformers_available +from modelscope.utils.logger import get_logger from modelscope.utils.plugins import (register_modelhub_repo, register_plugins_repo) from modelscope.utils.registry import Registry, build_from_cfg -from modelscope.utils.logger import get_logger -from modelscope.utils.import_utils import is_transformers_available - from .base import Pipeline from .util import is_official_hub_path @@ -190,11 +189,12 @@ def pipeline(task: str = None, if not pipeline_props and is_transformers_available(): from modelscope.utils.hf_util import hf_pipeline - return hf_pipeline(task=task, - model=model, - framework=framework, - device=device, - **kwargs) + return hf_pipeline( + task=task, + model=model, + framework=framework, + device=device, + **kwargs) if not device: device = 'gpu' diff --git a/modelscope/utils/hf_util/__init__.py b/modelscope/utils/hf_util/__init__.py index c9d1883b..ac8349c9 100644 --- a/modelscope/utils/hf_util/__init__.py +++ b/modelscope/utils/hf_util/__init__.py @@ -1,3 +1,3 @@ from .auto_class import * from .patcher import patch_context, patch_hub, unpatch_hub -from .pipeline_builder import hf_pipeline \ No newline at end of file +from .pipeline_builder import hf_pipeline diff --git a/modelscope/utils/hf_util/patcher.py b/modelscope/utils/hf_util/patcher.py index 138550ea..103ac941 100644 --- a/modelscope/utils/hf_util/patcher.py +++ b/modelscope/utils/hf_util/patcher.py @@ -118,7 +118,6 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): model_dir = pretrained_model_name_or_path return model_dir - def patch_pretrained_model_name_or_path(pretrained_model_name_or_path, *model_args, **kwargs): """Patch all from_pretrained/get_config_dict""" @@ -150,11 +149,12 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): obj = kwargs.pop('obj') push_to_hub = kwargs.pop('push_to_hub', False) - obj._save_pretrained_origin(obj, - save_directory=save_directory, - safe_serialization=safe_serialization, - push_to_hub=False, - **kwargs) + obj._save_pretrained_origin( + obj, + save_directory=save_directory, + safe_serialization=safe_serialization, + push_to_hub=False, + **kwargs) # Class members may be unpatched, so push_to_hub is done separately here if push_to_hub: @@ -162,16 +162,17 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): from modelscope.hub.api import HubApi api = HubApi() - token = kwargs.get("token") - commit_message = kwargs.pop("commit_message", None) - repo_name = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1]) + token = kwargs.get('token') + commit_message = kwargs.pop('commit_message', None) + repo_name = kwargs.pop('repo_id', + save_directory.split(os.path.sep)[-1]) api.create_repo(repo_name, **kwargs) - push_to_hub(repo_name=repo_name, - output_dir=save_directory, - commit_message=commit_message, - token=token) - #return kwargs.pop('ori_func')(obj, save_directory, safe_serialization, **kwargs) + push_to_hub( + repo_name=repo_name, + output_dir=save_directory, + commit_message=commit_message, + token=token) def get_wrapped_class( module_class: 'PreTrainedModel', @@ -244,10 +245,10 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): return module_obj def save_pretrained( - self, - save_directory: Union[str, os.PathLike], - safe_serialization: bool = True, - **kwargs, + self, + save_directory: Union[str, os.PathLike], + safe_serialization: bool = True, + **kwargs, ): push_to_hub = kwargs.pop('push_to_hub', False) if push_to_hub: @@ -258,10 +259,12 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): from modelscope.hub.api import HubApi api = HubApi() - token = kwargs.get("token") + token = kwargs.get('token') api.login(token) - commit_message = kwargs.pop("commit_message", None) - repo_name = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1]) + commit_message = kwargs.pop('commit_message', None) + repo_name = kwargs.pop( + 'repo_id', + save_directory.split(os.path.sep)[-1]) api.create_repo(repo_name) repo = Repository(save_directory, repo_name) default_config = { @@ -277,21 +280,19 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): repo, 'configuration.json', [json.dumps(config)], ignore_push_error=True) - super().save_pretrained(save_directory=save_directory, - safe_serialization=safe_serialization, - push_to_hub=False, - **kwargs) + super().save_pretrained( + save_directory=save_directory, + safe_serialization=safe_serialization, + push_to_hub=False, + **kwargs) # Class members may be unpatched, so push_to_hub is done separately here if push_to_hub: - - #api.create_repo(repo_name, **kwargs) - - - push_to_hub(repo_name=repo_name, - output_dir=save_directory, - commit_message=commit_message, - token=token) + push_to_hub( + repo_name=repo_name, + output_dir=save_directory, + commit_message=commit_message, + token=token) if not hasattr(module_class, 'from_pretrained'): del ClassWrapper.from_pretrained @@ -337,15 +338,17 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): except ImportError: continue - # save_pretrained is not a classmethod and cannot be overridden by replacing the class method. It requires replacing the class object method. + # save_pretrained is not a classmethod and cannot be overridden by replacing + # the class method. It requires replacing the class object method. if wrap or ('pipeline' in name.lower() and has_save_pretrained): try: - if not has_from_pretrained and not has_get_config_dict and not has_get_peft_type and not has_save_pretrained: + if (not has_from_pretrained and not has_get_config_dict + and not has_get_peft_type and not has_save_pretrained): all_available_modules.append(var) else: all_available_modules.append( get_wrapped_class(var, **ignore_file_pattern_kwargs)) - except Exception as e: + except Exception: all_available_modules.append(var) else: if has_from_pretrained and not hasattr(var, @@ -379,7 +382,8 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): ori_func=var._get_config_dict_origin, **ignore_file_pattern_kwargs) - if has_save_pretrained and not hasattr(var, '_save_pretrained_origin'): + if has_save_pretrained and not hasattr(var, + '_save_pretrained_origin'): var._save_pretrained_origin = var.save_pretrained var.save_pretrained = partial( save_pretrained, @@ -659,6 +663,7 @@ def _patch_hub(): from transformers.utils import hub hub.create_commit = hf_api.create_commit + def _unpatch_hub(): import huggingface_hub from huggingface_hub import hf_api diff --git a/modelscope/utils/hf_util/pipeline_builder.py b/modelscope/utils/hf_util/pipeline_builder.py index c7c240ad..5386bead 100644 --- a/modelscope/utils/hf_util/pipeline_builder.py +++ b/modelscope/utils/hf_util/pipeline_builder.py @@ -1,11 +1,14 @@ -from typing import Optional, Union import os +from typing import Optional, Union + import torch -from modelscope.hub import snapshot_download -from transformers import (TFPreTrainedModel, PreTrainedModel, pipeline) -from transformers.pipelines import get_task, check_task from transformers import Pipeline as PipelineHF -from modelscope.utils.hf_util.patcher import patch_hub, _patch_pretrained_class +from transformers import PreTrainedModel, TFPreTrainedModel, pipeline +from transformers.pipelines import check_task, get_task + +from modelscope.hub import snapshot_download +from modelscope.utils.hf_util.patcher import _patch_pretrained_class, patch_hub + def _get_hf_device(device): if isinstance(device, str): @@ -16,19 +19,21 @@ def _get_hf_device(device): device = ''.join(eles) return device + def _get_hf_pipeline_class(task, model): if not task: task = get_task(model) normalized_task, targeted_task, task_options = check_task(task) - pipeline_class = targeted_task["impl"] + pipeline_class = targeted_task['impl'] pipeline_class = _patch_pretrained_class([pipeline_class])[0] return pipeline_class + def hf_pipeline( task: str = None, - model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None, + model: Optional[Union[str, 'PreTrainedModel', 'TFPreTrainedModel']] = None, framework: Optional[str] = None, - device: Optional[Union[int, str, "torch.device"]] = None, + device: Optional[Union[int, str, 'torch.device']] = None, **kwargs, ) -> PipelineHF: if isinstance(model, str): @@ -40,10 +45,10 @@ def hf_pipeline( device = _get_hf_device(device) pipeline_class = _get_hf_pipeline_class(task, model) - return pipeline(task=task, - model=model, - framework=framework, - device=device, - pipeline_class=pipeline_class, - #pipeline_class=QuestionAnsweringPipeline, - **kwargs) + return pipeline( + task=task, + model=model, + framework=framework, + device=device, + pipeline_class=pipeline_class, + **kwargs) From e4c7ea33dac58737babe75f75b415c18e346fb47 Mon Sep 17 00:00:00 2001 From: suluyan Date: Tue, 11 Feb 2025 11:06:52 +0800 Subject: [PATCH 26/36] remove duplicate create_repo code --- modelscope/utils/hf_util/patcher.py | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/modelscope/utils/hf_util/patcher.py b/modelscope/utils/hf_util/patcher.py index 103ac941..e941408d 100644 --- a/modelscope/utils/hf_util/patcher.py +++ b/modelscope/utils/hf_util/patcher.py @@ -252,34 +252,19 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): ): push_to_hub = kwargs.pop('push_to_hub', False) if push_to_hub: - import json - from modelscope.hub.repository import Repository - from modelscope.hub.utils.utils import add_content_to_file from modelscope.hub.push_to_hub import push_to_hub from modelscope.hub.api import HubApi - api = HubApi() token = kwargs.get('token') - api.login(token) commit_message = kwargs.pop('commit_message', None) repo_name = kwargs.pop( 'repo_id', save_directory.split(os.path.sep)[-1]) + + api = HubApi() + api.login(token) api.create_repo(repo_name) - repo = Repository(save_directory, repo_name) - default_config = { - 'framework': 'pytorch', - 'task': 'text-generation', - 'allow_remote': True - } - config_json = kwargs.get('config_json') - if not config_json: - config_json = {} - config = {**default_config, **config_json} - add_content_to_file( - repo, - 'configuration.json', [json.dumps(config)], - ignore_push_error=True) + super().save_pretrained( save_directory=save_directory, safe_serialization=safe_serialization, From 4945d4340c26d74c2f508272e0c39d2768e70ab2 Mon Sep 17 00:00:00 2001 From: suluyan Date: Tue, 11 Feb 2025 12:50:30 +0800 Subject: [PATCH 27/36] add test_case --- tests/utils/test_hf_util.py | 51 +++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/tests/utils/test_hf_util.py b/tests/utils/test_hf_util.py index 9826d991..c2ccdc8d 100644 --- a/tests/utils/test_hf_util.py +++ b/tests/utils/test_hf_util.py @@ -40,6 +40,13 @@ class HFUtilTest(unittest.TestCase): with open(self.test_file2, 'w') as f: f.write('{}') + self.pipeline_qa_context = r""" + Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a + question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tune + a model on a SQuAD task, you may leverage the examples/pytorch/question-answering/run_squad.py script. + """ + self.pipeline_qa_question = "What is a good example of a question answering dataset?" + def tearDown(self): logger.info('TearDown') shutil.rmtree(self.model_dir, ignore_errors=True) @@ -235,6 +242,50 @@ class HFUtilTest(unittest.TestCase): 'Qwen/Qwen1.5-0.5B-Chat', trust_remote_code=True) model.push_to_hub(self.create_model_name) + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_pipeline_model_id(self): + from modelscope import pipeline + model_id = 'damotestx/distilbert-base-cased-distilled-squad' + qa = pipeline("question-answering", model=model_id) + assert qa(question=self.pipeline_qa_question, context=self.pipeline_qa_context) + + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_pipeline_auto_model(self): + from modelscope import pipeline, AutoModelForQuestionAnswering, AutoTokenizer + model_id = 'damotestx/distilbert-base-cased-distilled-squad' + model = AutoModelForQuestionAnswering.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained(model_id) + qa = pipeline("question-answering", model=model, tokenizer=tokenizer) + assert qa(question=self.pipeline_qa_question, context=self.pipeline_qa_context) + + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_pipeline_save_pretrained(self): + from modelscope import pipeline + model_id = 'damotestx/distilbert-base-cased-distilled-squad' + + pipe_ori = pipeline("question-answering", model=model_id) + + result_ori = pipe_ori(question=self.pipeline_qa_question, context=self.pipeline_qa_context) + + # save_pretrained + repo_id = 'damotestx/tst_push5' + save_dir = './tmp_test_hf_pipeline' + try: + os.system(f'rm -rf {save_dir}') + self.api.delete_model(repo_id) + # wait for delete repo + import time + time.sleep(5) + except Exception: + # if repo not exists + pass + pipe_ori.save_pretrained(save_dir, push_to_hub=True, repo_id=repo_id) + + # load from saved + pipe_new = pipeline("question_answering", model=repo_id) + result_new = pipe_new(question=self.pipeline_qa_question, context=self.pipeline_qa_context) + + assert result_new == result_ori if __name__ == '__main__': unittest.main() From 021e912a38094c408cd1b66a068149ed7758e952 Mon Sep 17 00:00:00 2001 From: suluyan Date: Tue, 11 Feb 2025 14:31:34 +0800 Subject: [PATCH 28/36] fix create_repo --- tests/utils/test_hf_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/test_hf_util.py b/tests/utils/test_hf_util.py index c2ccdc8d..d5d17255 100644 --- a/tests/utils/test_hf_util.py +++ b/tests/utils/test_hf_util.py @@ -282,7 +282,7 @@ class HFUtilTest(unittest.TestCase): pipe_ori.save_pretrained(save_dir, push_to_hub=True, repo_id=repo_id) # load from saved - pipe_new = pipeline("question_answering", model=repo_id) + pipe_new = pipeline("question-answering", model=repo_id) result_new = pipe_new(question=self.pipeline_qa_question, context=self.pipeline_qa_context) assert result_new == result_ori From 05ce8a7dc72db325444c9995fc5e436678700bc6 Mon Sep 17 00:00:00 2001 From: suluyan Date: Tue, 11 Feb 2025 14:56:55 +0800 Subject: [PATCH 29/36] remove the class method rewrite of save_pretrained from patch & fix lint --- modelscope/utils/hf_util/patcher.py | 47 ++--------------------------- tests/utils/test_hf_util.py | 34 +++++++++++++-------- 2 files changed, 25 insertions(+), 56 deletions(-) diff --git a/modelscope/utils/hf_util/patcher.py b/modelscope/utils/hf_util/patcher.py index e941408d..4c2eaef1 100644 --- a/modelscope/utils/hf_util/patcher.py +++ b/modelscope/utils/hf_util/patcher.py @@ -143,37 +143,6 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): **kwargs) return kwargs.pop('ori_func')(model_dir, **kwargs) - def save_pretrained(save_directory: Union[str, os.PathLike], - safe_serialization: bool = True, - **kwargs): - obj = kwargs.pop('obj') - push_to_hub = kwargs.pop('push_to_hub', False) - - obj._save_pretrained_origin( - obj, - save_directory=save_directory, - safe_serialization=safe_serialization, - push_to_hub=False, - **kwargs) - - # Class members may be unpatched, so push_to_hub is done separately here - if push_to_hub: - from modelscope.hub.push_to_hub import push_to_hub - from modelscope.hub.api import HubApi - api = HubApi() - - token = kwargs.get('token') - commit_message = kwargs.pop('commit_message', None) - repo_name = kwargs.pop('repo_id', - save_directory.split(os.path.sep)[-1]) - api.create_repo(repo_name, **kwargs) - - push_to_hub( - repo_name=repo_name, - output_dir=save_directory, - commit_message=commit_message, - token=token) - def get_wrapped_class( module_class: 'PreTrainedModel', ignore_file_pattern: Optional[Union[str, List[str]]] = None, @@ -254,6 +223,7 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): if push_to_hub: from modelscope.hub.push_to_hub import push_to_hub from modelscope.hub.api import HubApi + from modelscope.hub.repository import Repository token = kwargs.get('token') commit_message = kwargs.pop('commit_message', None) @@ -264,6 +234,8 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): api = HubApi() api.login(token) api.create_repo(repo_name) + # clone the repo + Repository(save_directory, repo_name) super().save_pretrained( save_directory=save_directory, @@ -367,15 +339,6 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): ori_func=var._get_config_dict_origin, **ignore_file_pattern_kwargs) - if has_save_pretrained and not hasattr(var, - '_save_pretrained_origin'): - var._save_pretrained_origin = var.save_pretrained - var.save_pretrained = partial( - save_pretrained, - ori_func=var._save_pretrained_origin, - obj=var, - **ignore_file_pattern_kwargs) - all_available_modules.append(var) return all_available_modules @@ -389,7 +352,6 @@ def _unpatch_pretrained_class(all_imported_modules): has_from_pretrained = hasattr(var, 'from_pretrained') has_get_peft_type = hasattr(var, '_get_peft_type') has_get_config_dict = hasattr(var, 'get_config_dict') - has_save_pretrained = hasattr(var, 'save_pretrained') except ImportError: continue if has_from_pretrained and hasattr(var, '_from_pretrained_origin'): @@ -401,9 +363,6 @@ def _unpatch_pretrained_class(all_imported_modules): if has_get_config_dict and hasattr(var, '_get_config_dict_origin'): var.get_config_dict = var._get_config_dict_origin delattr(var, '_get_config_dict_origin') - if has_save_pretrained and hasattr(var, '_save_pretrained_origin'): - var.save_pretrained = var._save_pretrained_origin - delattr(var, '_save_pretrained_origin') def _patch_hub(): diff --git a/tests/utils/test_hf_util.py b/tests/utils/test_hf_util.py index d5d17255..61ad18cc 100644 --- a/tests/utils/test_hf_util.py +++ b/tests/utils/test_hf_util.py @@ -41,11 +41,12 @@ class HFUtilTest(unittest.TestCase): f.write('{}') self.pipeline_qa_context = r""" - Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a - question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tune - a model on a SQuAD task, you may leverage the examples/pytorch/question-answering/run_squad.py script. + Extractive Question Answering is the task of extracting an answer from a text given a question. An example + of a question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would + like to fine-tune a model on a SQuAD task, you may leverage the + examples/pytorch/question-answering/run_squad.py script. """ - self.pipeline_qa_question = "What is a good example of a question answering dataset?" + self.pipeline_qa_question = 'What is a good example of a question answering dataset?' def tearDown(self): logger.info('TearDown') @@ -246,8 +247,10 @@ class HFUtilTest(unittest.TestCase): def test_pipeline_model_id(self): from modelscope import pipeline model_id = 'damotestx/distilbert-base-cased-distilled-squad' - qa = pipeline("question-answering", model=model_id) - assert qa(question=self.pipeline_qa_question, context=self.pipeline_qa_context) + qa = pipeline('question-answering', model=model_id) + assert qa( + question=self.pipeline_qa_question, + context=self.pipeline_qa_context) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_pipeline_auto_model(self): @@ -255,17 +258,21 @@ class HFUtilTest(unittest.TestCase): model_id = 'damotestx/distilbert-base-cased-distilled-squad' model = AutoModelForQuestionAnswering.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) - qa = pipeline("question-answering", model=model, tokenizer=tokenizer) - assert qa(question=self.pipeline_qa_question, context=self.pipeline_qa_context) + qa = pipeline('question-answering', model=model, tokenizer=tokenizer) + assert qa( + question=self.pipeline_qa_question, + context=self.pipeline_qa_context) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_pipeline_save_pretrained(self): from modelscope import pipeline model_id = 'damotestx/distilbert-base-cased-distilled-squad' - pipe_ori = pipeline("question-answering", model=model_id) + pipe_ori = pipeline('question-answering', model=model_id) - result_ori = pipe_ori(question=self.pipeline_qa_question, context=self.pipeline_qa_context) + result_ori = pipe_ori( + question=self.pipeline_qa_question, + context=self.pipeline_qa_context) # save_pretrained repo_id = 'damotestx/tst_push5' @@ -282,10 +289,13 @@ class HFUtilTest(unittest.TestCase): pipe_ori.save_pretrained(save_dir, push_to_hub=True, repo_id=repo_id) # load from saved - pipe_new = pipeline("question-answering", model=repo_id) - result_new = pipe_new(question=self.pipeline_qa_question, context=self.pipeline_qa_context) + pipe_new = pipeline('question-answering', model=repo_id) + result_new = pipe_new( + question=self.pipeline_qa_question, + context=self.pipeline_qa_context) assert result_new == result_ori + if __name__ == '__main__': unittest.main() From d0c4cac2324f499b9bf01db4e79def2c4a99c5f1 Mon Sep 17 00:00:00 2001 From: suluyan Date: Tue, 11 Feb 2025 15:04:07 +0800 Subject: [PATCH 30/36] fix test user --- tests/utils/test_hf_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/test_hf_util.py b/tests/utils/test_hf_util.py index 61ad18cc..058e92c7 100644 --- a/tests/utils/test_hf_util.py +++ b/tests/utils/test_hf_util.py @@ -275,7 +275,7 @@ class HFUtilTest(unittest.TestCase): context=self.pipeline_qa_context) # save_pretrained - repo_id = 'damotestx/tst_push5' + repo_id = self.create_model_name save_dir = './tmp_test_hf_pipeline' try: os.system(f'rm -rf {save_dir}') From 6637fa094a6a6e2104fe59e7069ce610c9862f6f Mon Sep 17 00:00:00 2001 From: suluyan Date: Tue, 11 Feb 2025 16:36:15 +0800 Subject: [PATCH 31/36] fix log --- modelscope/pipelines/builder.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index e6f5fedf..fea8c859 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -188,13 +188,18 @@ def pipeline(task: str = None, pipeline_props = {'type': pipeline_name} if not pipeline_props and is_transformers_available(): - from modelscope.utils.hf_util import hf_pipeline - return hf_pipeline( - task=task, - model=model, - framework=framework, - device=device, - **kwargs) + try: + from modelscope.utils.hf_util import hf_pipeline + return hf_pipeline( + task=task, + model=model, + framework=framework, + device=device, + **kwargs) + except Exception as e: + logger.error( + f'Failed to initialize the pipeline using the transformers pipeline, details: {e}' + ) if not device: device = 'gpu' From f854ef20047af8db7ac39c45b6143dd52ac3f10f Mon Sep 17 00:00:00 2001 From: suluyan Date: Tue, 11 Feb 2025 17:58:57 +0800 Subject: [PATCH 32/36] fix log --- modelscope/pipelines/builder.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index fea8c859..7e5cc6b5 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -198,8 +198,9 @@ def pipeline(task: str = None, **kwargs) except Exception as e: logger.error( - f'Failed to initialize the pipeline using the transformers pipeline, details: {e}' - ) + 'We couldn\'t find a suitable pipeline from ms, so we tried to load it using the transformers pipeline,' + ' but that also failed.') + raise e if not device: device = 'gpu' From 0ae86a48bcf710950141475b4af9ee6ea2731cfb Mon Sep 17 00:00:00 2001 From: suluyan Date: Tue, 11 Feb 2025 20:29:53 +0800 Subject: [PATCH 33/36] fix --- modelscope/hub/check_model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modelscope/hub/check_model.py b/modelscope/hub/check_model.py index a20ccd6d..6d39c275 100644 --- a/modelscope/hub/check_model.py +++ b/modelscope/hub/check_model.py @@ -72,6 +72,7 @@ def check_local_model_is_latest( use_cookies=cookies, ) model_cache = None + # download via non-git method if not os.path.exists(os.path.join(model_root_path, '.git')): model_cache = ModelFileSystemCache(model_root_path) for model_file in model_files: From d7f8cd9b5a15449491ead5fcc5dd119424861b67 Mon Sep 17 00:00:00 2001 From: "xingjun.wxj" Date: Thu, 13 Mar 2025 14:38:37 +0800 Subject: [PATCH 34/36] fix patcher --- modelscope/utils/hf_util/patcher.py | 66 +++-------------------------- 1 file changed, 7 insertions(+), 59 deletions(-) diff --git a/modelscope/utils/hf_util/patcher.py b/modelscope/utils/hf_util/patcher.py index b31b6e1d..28f8eeb5 100644 --- a/modelscope/utils/hf_util/patcher.py +++ b/modelscope/utils/hf_util/patcher.py @@ -27,8 +27,7 @@ def get_all_imported_modules(): transformers_include_names = [ 'Auto.*', 'T5.*', 'BitsAndBytesConfig', 'GenerationConfig', 'Awq.*', 'GPTQ.*', 'BatchFeature', 'Qwen.*', 'Llama.*', 'PretrainedConfig', - 'PreTrainedTokenizer', 'PreTrainedModel', 'PreTrainedTokenizerFast', - 'Pipeline' + 'PreTrainedTokenizer', 'PreTrainedModel', 'PreTrainedTokenizerFast' ] peft_include_names = ['.*PeftModel.*', '.*Config'] diffusers_include_names = ['^(?!TF|Flax).*Pipeline$'] @@ -253,44 +252,6 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): model_dir, *model_args, **kwargs) return module_obj - def save_pretrained( - self, - save_directory: Union[str, os.PathLike], - safe_serialization: bool = True, - **kwargs, - ): - push_to_hub = kwargs.pop('push_to_hub', False) - if push_to_hub: - from modelscope.hub.push_to_hub import push_to_hub - from modelscope.hub.api import HubApi - from modelscope.hub.repository import Repository - - token = kwargs.get('token') - commit_message = kwargs.pop('commit_message', None) - repo_name = kwargs.pop( - 'repo_id', - save_directory.split(os.path.sep)[-1]) - - api = HubApi() - api.login(token) - api.create_repo(repo_name) - # clone the repo - Repository(save_directory, repo_name) - - super().save_pretrained( - save_directory=save_directory, - safe_serialization=safe_serialization, - push_to_hub=False, - **kwargs) - - # Class members may be unpatched, so push_to_hub is done separately here - if push_to_hub: - push_to_hub( - repo_name=repo_name, - output_dir=save_directory, - commit_message=commit_message, - token=token) - if not hasattr(module_class, 'from_pretrained'): del ClassWrapper.from_pretrained else: @@ -305,9 +266,6 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): if not hasattr(module_class, 'get_config_dict'): del ClassWrapper.get_config_dict - if not hasattr(module_class, 'save_pretrained'): - del ClassWrapper.save_pretrained - ClassWrapper.__name__ = module_class.__name__ ClassWrapper.__qualname__ = module_class.__qualname__ return ClassWrapper @@ -331,21 +289,17 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): has_from_pretrained = hasattr(var, 'from_pretrained') has_get_peft_type = hasattr(var, '_get_peft_type') has_get_config_dict = hasattr(var, 'get_config_dict') - has_save_pretrained = hasattr(var, 'save_pretrained') except: # noqa continue - # save_pretrained is not a classmethod and cannot be overridden by replacing - # the class method. It requires replacing the class object method. - if wrap or ('pipeline' in name.lower() and has_save_pretrained): + if wrap: try: - if (not has_from_pretrained and not has_get_config_dict - and not has_get_peft_type and not has_save_pretrained): + if not has_from_pretrained and not has_get_config_dict and not has_get_peft_type: all_available_modules.append(var) else: all_available_modules.append( get_wrapped_class(var, **ignore_file_pattern_kwargs)) - except Exception: + except: # noqa all_available_modules.append(var) else: if has_from_pretrained and not hasattr(var, @@ -370,10 +324,9 @@ def _patch_pretrained_class(all_imported_modules, wrap=False): if has_get_config_dict and not hasattr(var, '_get_config_dict_origin'): var._get_config_dict_origin = var.get_config_dict - var.get_config_dict = partial( - patch_pretrained_model_name_or_path, - ori_func=var._get_config_dict_origin, - **ignore_file_pattern_kwargs) + var.get_config_dict = classmethod( + partial(patch_get_config_dict, + **ignore_file_pattern_kwargs)) all_available_modules.append(var) return all_available_modules @@ -619,11 +572,6 @@ def _patch_hub(): # Patch repocard.validate from huggingface_hub import repocard if not hasattr(repocard.RepoCard, '_validate_origin'): - - def load(*args, **kwargs): # noqa - from huggingface_hub.errors import EntryNotFoundError - raise EntryNotFoundError(message='API not supported.') - repocard.RepoCard._validate_origin = repocard.RepoCard.validate repocard.RepoCard.validate = lambda *args, **kwargs: None repocard.RepoCard._load_origin = repocard.RepoCard.load From 767f0a921afe83baec71062f581401c1a7fde44a Mon Sep 17 00:00:00 2001 From: "xingjun.wxj" Date: Thu, 13 Mar 2025 15:19:08 +0800 Subject: [PATCH 35/36] update pipeline builder --- modelscope/utils/hf_util/pipeline_builder.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/modelscope/utils/hf_util/pipeline_builder.py b/modelscope/utils/hf_util/pipeline_builder.py index f6590d04..82fe7fbf 100644 --- a/modelscope/utils/hf_util/pipeline_builder.py +++ b/modelscope/utils/hf_util/pipeline_builder.py @@ -1,11 +1,6 @@ import os from typing import Optional, Union -import torch -from transformers import Pipeline as PipelineHF -from transformers import PreTrainedModel, TFPreTrainedModel, pipeline -from transformers.pipelines import check_task, get_task - from modelscope.hub import snapshot_download from modelscope.utils.hf_util.patcher import _patch_pretrained_class @@ -21,6 +16,7 @@ def _get_hf_device(device): def _get_hf_pipeline_class(task, model): + from transformers.pipelines import check_task, get_task if not task: task = get_task(model) normalized_task, targeted_task, task_options = check_task(task) @@ -35,7 +31,8 @@ def hf_pipeline( framework: Optional[str] = None, device: Optional[Union[int, str, 'torch.device']] = None, **kwargs, -) -> PipelineHF: +) -> 'transformers.Pipeline': + from transformers import pipeline if isinstance(model, str): if not os.path.exists(model): model = snapshot_download(model) From cde343533e069bbb46bdb4bcf3c9ca1951591bec Mon Sep 17 00:00:00 2001 From: suluyana <110878454+suluyana@users.noreply.github.com> Date: Thu, 13 Mar 2025 15:52:34 +0800 Subject: [PATCH 36/36] fix external_engine_for_llm_checker (#1260) Co-authored-by: suluyan --- modelscope/pipelines/builder.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index 7e5cc6b5..5fb66178 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -263,13 +263,14 @@ def external_engine_for_llm_checker(model: Union[str, List[str], Model, kwargs: Dict[str, Any]) -> Optional[str]: from .nlp.llm_pipeline import ModelTypeHelper, LLMAdapterRegistry from ..hub.check_model import get_model_id_from_cache - from swift.llm import get_model_info_meta if isinstance(model, list): model = model[0] if not isinstance(model, str): model = model.model_dir - if kwargs.get('llm_framework') == 'swift': + llm_framework = kwargs.get('llm_framework', '') + if llm_framework == 'swift': + from swift.llm import get_model_info_meta # check if swift supports if os.path.exists(model): model_id = get_model_id_from_cache(model) @@ -280,9 +281,8 @@ def external_engine_for_llm_checker(model: Union[str, List[str], Model, info = get_model_info_meta(model_id) model_type = info[0].model_type except Exception as e: - logger.warning( - f'Cannot using llm_framework with {model_id}, ' - f'ignoring llm_framework={self.llm_framework} : {e}') + logger.warning(f'Cannot using llm_framework with {model_id}, ' + f'ignoring llm_framework={llm_framework} : {e}') model_type = None if model_type: return 'llm'