Merge remote-tracking branch 'origin' into feat/hf_pipeline

This commit is contained in:
suluyan
2025-02-19 11:37:10 +08:00
11 changed files with 206 additions and 107 deletions

View File

@@ -45,8 +45,10 @@ else \
pip cache purge; \
fi
RUN echo "cache bust $(date +%Y%m%d%H%M%S)" && \
sh /tmp/install.sh {version_args} && \
ARG CUR_TIME={cur_time}
RUN echo $CUR_TIME
RUN sh /tmp/install.sh {version_args} && \
curl -fsSL https://ollama.com/install.sh | sh && \
pip install --no-cache-dir -U funasr scikit-learn && \
pip install --no-cache-dir -U qwen_vl_utils pyav librosa timm transformers accelerate peft trl safetensors && \
@@ -58,7 +60,7 @@ RUN echo "cache bust $(date +%Y%m%d%H%M%S)" && \
pip install .[eval] && pip install evalscope -U --no-dependencies && pip install xtuner --no-dependencies && \
cd / && rm -fr /tmp/ms-swift && pip cache purge; \
pip install --no-cache-dir torch=={torch_version} torchvision=={torchvision_version} torchaudio=={torchaudio_version} {index_url} && \
pip install --no-cache-dir transformers huggingface-hub==0.25.* -U && pip cache purge; \
pip install --no-cache-dir transformers huggingface-hub==0.25.* -U && pip install --no-cache-dr timm>=0.9.0 && pip cache purge; \
pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
pip config set install.trusted-host mirrors.aliyun.com && \
cp /tmp/resources/ubuntu2204.aliyun /etc/apt/sources.list

View File

@@ -160,6 +160,7 @@ class CPUImageBuilder(Builder):
content = content.replace('{extra_content}', extra_content)
content = content.replace('{meta_file}', meta_file)
content = content.replace('{version_args}', version_args)
content = content.replace('{cur_time}', formatted_time)
content = content.replace('{install_ms_deps}', 'True')
content = content.replace('{torch_version}',
self.args.torch_version)
@@ -222,6 +223,7 @@ RUN pip install tf-keras==2.16.0 --no-dependencies && \
content = content.replace('{extra_content}', extra_content)
content = content.replace('{meta_file}', meta_file)
content = content.replace('{version_args}', version_args)
content = content.replace('{cur_time}', formatted_time)
content = content.replace('{install_ms_deps}', 'True')
content = content.replace('{torch_version}',
self.args.torch_version)
@@ -265,15 +267,15 @@ class LLMImageBuilder(Builder):
# A mirrored image of nvidia/cuda:12.4.0-devel-ubuntu22.04
args.base_image = 'nvidia/cuda:12.4.0-devel-ubuntu22.04'
if not args.torch_version:
args.torch_version = '2.4.0'
args.torchaudio_version = '2.4.0'
args.torchvision_version = '0.19.0'
args.torch_version = '2.5.1'
args.torchaudio_version = '2.5.1'
args.torchvision_version = '0.20.1'
if not args.cuda_version:
args.cuda_version = '12.4.0'
if not args.vllm_version:
args.vllm_version = '0.6.3.post1'
args.vllm_version = '0.7.2'
if not args.lmdeploy_version:
args.lmdeploy_version = '0.6.2'
args.lmdeploy_version = '0.7.0.post2'
if not args.autogptq_version:
args.autogptq_version = '0.7.1'
if not args.flashattn_version:
@@ -296,6 +298,7 @@ class LLMImageBuilder(Builder):
content = content.replace('{extra_content}', extra_content)
content = content.replace('{meta_file}', meta_file)
content = content.replace('{version_args}', version_args)
content = content.replace('{cur_time}', formatted_time)
content = content.replace('{install_ms_deps}', 'False')
content = content.replace('{torch_version}',
self.args.torch_version)

View File

@@ -39,7 +39,8 @@ from modelscope.hub.constants import (API_HTTP_CLIENT_MAX_RETRIES,
MODELSCOPE_REQUEST_ID, ONE_YEAR_SECONDS,
REQUESTS_API_HTTP_METHOD,
TEMPORARY_FOLDER_NAME, DatasetVisibility,
Licenses, ModelVisibility)
Licenses, ModelVisibility, Visibility,
VisibilityMap)
from modelscope.hub.errors import (InvalidParameter, NotExistError,
NotLoginException, RequestError,
datahub_raise_on_error,
@@ -59,9 +60,9 @@ from modelscope.utils.constant import (DEFAULT_DATASET_REVISION,
REPO_TYPE_DATASET, REPO_TYPE_MODEL,
REPO_TYPE_SUPPORT, ConfigFields,
DatasetFormations, DatasetMetaFormats,
DatasetVisibilityMap, DownloadChannel,
DownloadMode, Frameworks, ModelFile,
Tasks, VirgoDatasetConfig)
DownloadChannel, DownloadMode,
Frameworks, ModelFile, Tasks,
VirgoDatasetConfig)
from modelscope.utils.file_utils import get_file_hash, get_file_size
from modelscope.utils.logger import get_logger
from modelscope.utils.repo_utils import (DATASET_LFS_SUFFIX,
@@ -291,7 +292,7 @@ class HubApi:
Returns:
True if the repository exists, False otherwise.
"""
if (repo_type is not None) and repo_type.lower != REPO_TYPE_MODEL:
if (repo_type is not None) and repo_type.lower() != REPO_TYPE_MODEL:
raise Exception('Not support repo-type: %s' % repo_type)
if (repo_id is None) or repo_id.count('/') != 1:
raise Exception('Invalid repo_id: %s, must be of format namespace/name' % repo_type)
@@ -1095,7 +1096,7 @@ class HubApi:
# get visibility of the dataset
raise_on_error(resp)
data = resp['Data']
visibility = DatasetVisibilityMap.get(data['Visibility'])
visibility = VisibilityMap.get(data['Visibility'])
datahub_sts_url = f'{datahub_url}/ststoken?Revision={revision}'
r_sts = self.session.get(url=datahub_sts_url, cookies=cookies,
@@ -1201,7 +1202,7 @@ class HubApi:
repo_id: str,
*,
token: Union[str, bool, None] = None,
visibility: Optional[str] = 'public',
visibility: Optional[str] = Visibility.PUBLIC,
repo_type: Optional[str] = REPO_TYPE_MODEL,
chinese_name: Optional[str] = '',
license: Optional[str] = Licenses.APACHE_V2,
@@ -1225,29 +1226,31 @@ class HubApi:
if visibility is None:
raise ValueError(f'Invalid visibility: {visibility}, '
f'supported visibilities: `public`, `private`, `internal`')
repo_url: str = self.create_model(
model_id=repo_id,
visibility=visibility,
license=license,
chinese_name=chinese_name,
)
with tempfile.TemporaryDirectory() as temp_cache_dir:
from modelscope.hub.repository import Repository
repo = Repository(temp_cache_dir, repo_id)
default_config = {
'framework': 'pytorch',
'task': 'text-generation',
'allow_remote': True
}
config_json = kwargs.get('config_json')
if not config_json:
config_json = {}
config = {**default_config, **config_json}
add_content_to_file(
repo,
'configuration.json', [json.dumps(config)],
ignore_push_error=True)
if not self.repo_exists(repo_id, repo_type=repo_type):
repo_url: str = self.create_model(
model_id=repo_id,
visibility=visibility,
license=license,
chinese_name=chinese_name,
)
with tempfile.TemporaryDirectory() as temp_cache_dir:
from modelscope.hub.repository import Repository
repo = Repository(temp_cache_dir, repo_id)
default_config = {
'framework': 'pytorch',
'task': 'text-generation',
'allow_remote': True
}
config_json = kwargs.get('config_json')
if not config_json:
config_json = {}
config = {**default_config, **config_json}
add_content_to_file(
repo,
'configuration.json', [json.dumps(config)],
ignore_push_error=True)
else:
repo_url = f'{self.endpoint}/{repo_id}'
elif repo_type == REPO_TYPE_DATASET:
visibilities = {k: v for k, v in DatasetVisibility.__dict__.items() if not k.startswith('__')}
@@ -1255,13 +1258,16 @@ class HubApi:
if visibility is None:
raise ValueError(f'Invalid visibility: {visibility}, '
f'supported visibilities: `public`, `private`, `internal`')
repo_url: str = self.create_dataset(
dataset_name=repo_name,
namespace=namespace,
chinese_name=chinese_name,
license=license,
visibility=visibility,
)
if not self.repo_exists(repo_id, repo_type=repo_type):
repo_url: str = self.create_dataset(
dataset_name=repo_name,
namespace=namespace,
chinese_name=chinese_name,
license=license,
visibility=visibility,
)
else:
repo_url = f'{self.endpoint}/datasets/{namespace}/{repo_name}'
else:
raise ValueError(f'Invalid repo type: {repo_type}, supported repos: {REPO_TYPE_SUPPORT}')

View File

@@ -58,3 +58,16 @@ class DatasetVisibility(object):
PRIVATE = 1
INTERNAL = 3
PUBLIC = 5
class Visibility(object):
PRIVATE = 'private'
INTERNAL = 'internal'
PUBLIC = 'public'
VisibilityMap = {
ModelVisibility.PRIVATE: Visibility.PRIVATE,
ModelVisibility.INTERNAL: Visibility.INTERNAL,
ModelVisibility.PUBLIC: Visibility.PUBLIC
}

View File

@@ -51,7 +51,10 @@ def _push_files_to_hub(
with tempfile.TemporaryDirectory() as temp_cache_dir:
from modelscope.hub.repository import Repository
repo = Repository(temp_cache_dir, repo_id, revision=revision)
sub_folder = os.path.join(temp_cache_dir, path_in_repo)
if path_in_repo:
sub_folder = os.path.join(temp_cache_dir, path_in_repo)
else:
sub_folder = temp_cache_dir
os.makedirs(sub_folder, exist_ok=True)
if os.path.isfile(path_or_fileobj):
dest_file = os.path.join(sub_folder,

View File

@@ -2,20 +2,15 @@
import hashlib
import os
import shutil
import tempfile
from datetime import datetime
from pathlib import Path
from typing import BinaryIO, List, Optional, Union
import requests
from typing import List, Optional, Union
from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN,
DEFAULT_MODELSCOPE_GROUP,
MODEL_ID_SEPARATOR, MODELSCOPE_SDK_DEBUG,
MODELSCOPE_URL_SCHEME)
from modelscope.hub.errors import FileIntegrityError
from modelscope.utils.file_utils import get_default_modelscope_cache_dir
from modelscope.utils.logger import get_logger
logger = get_logger()

View File

@@ -4,11 +4,14 @@ import os
import torch
import torch.nn as nn
from modelscope import get_logger
from modelscope.metainfo import Models
from modelscope.models.base.base_torch_model import TorchModel
from modelscope.models.builder import MODELS
from modelscope.utils.constant import ModelFile, Tasks
logger = get_logger()
def normalize_fn(tensor, mean, std):
"""Differentiable version of torchvision.functional.normalize"""
@@ -41,10 +44,15 @@ class NormalizeByChannelMeanStd(nn.Module):
class EasyRobustModel(TorchModel):
def __init__(self, model_dir: str, **kwargs):
import easyrobust.models
try:
import easyrobust.models
except ImportError as e:
logger.error(
'You are using `EasyRobustModel`, but this model requires `easyrobust`,'
'please install it with command `pip install easyrobust`')
raise e
from timm.models import create_model
from mmcls.datasets import ImageNet
import modelscope.models.cv.image_classification.backbones
from modelscope.utils.hub import read_config
super().__init__(model_dir)

View File

@@ -584,9 +584,6 @@ class MetaDataFields:
ARGS_BIG_DATA = 'big_data'
DatasetVisibilityMap = {1: 'private', 3: 'internal', 5: 'public'}
class DistributedParallelType(object):
"""Parallel Strategies for Distributed Models"""
DP = 'data_parallel'

View File

@@ -75,8 +75,12 @@ if TYPE_CHECKING:
else:
from .patcher import get_all_imported_modules, _patch_pretrained_class
all_available_modules = _patch_pretrained_class(
get_all_imported_modules(), wrap=True)
for module in all_available_modules:
globals()[module.__name__] = module
try:
all_available_modules = _patch_pretrained_class(
get_all_imported_modules(), wrap=True)
except Exception: # noqa
import traceback
traceback.print_exc()
else:
for module in all_available_modules:
globals()[module.__name__] = module

View File

@@ -25,25 +25,33 @@ def get_all_imported_modules():
"""Find all modules in transformers/peft/diffusers"""
all_imported_modules = []
transformers_include_names = [
'Auto', 'T5', 'BitsAndBytes', 'GenerationConfig', 'Quant', 'Awq',
'GPTQ', 'BatchFeature', 'Qwen', 'Llama', 'Pipeline'
'Auto.*', 'T5.*', 'BitsAndBytesConfig', 'GenerationConfig', 'Awq.*',
'GPTQ.*', 'BatchFeature', 'Qwen.*', 'Llama.*', 'PretrainedConfig',
'PreTrainedTokenizer', 'PreTrainedModel', 'PreTrainedTokenizerFast',
'Pipeline'
]
diffusers_include_names = ['Pipeline']
peft_include_names = ['.*PeftModel.*', '.*Config']
diffusers_include_names = ['^(?!TF|Flax).*Pipeline$']
if importlib.util.find_spec('transformers') is not None:
import transformers
lazy_module = sys.modules['transformers']
_import_structure = lazy_module._import_structure
for key in _import_structure:
if 'dummy' in key.lower():
continue
values = _import_structure[key]
for value in values:
# pretrained
if any([name in value for name in transformers_include_names]):
if any([
re.fullmatch(name, value)
for name in transformers_include_names
]):
try:
module = importlib.import_module(
f'.{key}', transformers.__name__)
value = getattr(module, value)
all_imported_modules.append(value)
except (ImportError, AttributeError):
except: # noqa
pass
if importlib.util.find_spec('peft') is not None:
@@ -56,8 +64,11 @@ def get_all_imported_modules():
imports = [
attr for attr in attributes if not attr.startswith('__')
]
all_imported_modules.extend(
[getattr(peft, _import) for _import in imports])
all_imported_modules.extend([
getattr(peft, _import) for _import in imports if any([
re.fullmatch(name, _import) for name in peft_include_names
])
])
if importlib.util.find_spec('diffusers') is not None:
try:
@@ -69,10 +80,12 @@ def get_all_imported_modules():
if hasattr(lazy_module, '_import_structure'):
_import_structure = lazy_module._import_structure
for key in _import_structure:
if 'dummy' in key.lower():
continue
values = _import_structure[key]
for value in values:
if any([
name in value
re.fullmatch(name, value)
for name in diffusers_include_names
]):
try:
@@ -80,15 +93,20 @@ def get_all_imported_modules():
f'.{key}', diffusers.__name__)
value = getattr(module, value)
all_imported_modules.append(value)
except (ImportError, AttributeError):
except: # noqa
pass
else:
attributes = dir(lazy_module)
imports = [
attr for attr in attributes if not attr.startswith('__')
]
all_imported_modules.extend(
[getattr(lazy_module, _import) for _import in imports])
all_imported_modules.extend([
getattr(lazy_module, _import) for _import in imports
if any([
re.fullmatch(name, _import)
for name in diffusers_include_names
])
])
return all_imported_modules
@@ -107,41 +125,63 @@ def _patch_pretrained_class(all_imported_modules, wrap=False):
allow_file_pattern=None,
**kwargs):
from modelscope import snapshot_download
subfolder = kwargs.pop('subfolder', None)
file_filter = None
if subfolder:
file_filter = f'{subfolder}/*'
if not os.path.exists(pretrained_model_name_or_path):
revision = kwargs.pop('revision', None)
if revision is None or revision == 'main':
revision = 'master'
if file_filter is not None:
allow_file_pattern = file_filter
model_dir = snapshot_download(
pretrained_model_name_or_path,
revision=revision,
ignore_file_pattern=ignore_file_pattern,
allow_file_pattern=allow_file_pattern)
if subfolder:
model_dir = os.path.join(model_dir, subfolder)
else:
model_dir = pretrained_model_name_or_path
return model_dir
def patch_pretrained_model_name_or_path(pretrained_model_name_or_path,
def patch_pretrained_model_name_or_path(cls, pretrained_model_name_or_path,
*model_args, **kwargs):
"""Patch all from_pretrained/get_config_dict"""
"""Patch all from_pretrained"""
model_dir = get_model_dir(pretrained_model_name_or_path,
kwargs.pop('ignore_file_pattern', None),
kwargs.pop('allow_file_pattern', None),
**kwargs)
return kwargs.pop('ori_func')(model_dir, *model_args, **kwargs)
return cls._from_pretrained_origin.__func__(cls, model_dir,
*model_args, **kwargs)
def patch_peft_model_id(model, model_id, *model_args, **kwargs):
def patch_get_config_dict(cls, pretrained_model_name_or_path, *model_args,
**kwargs):
"""Patch all get_config_dict"""
model_dir = get_model_dir(pretrained_model_name_or_path,
kwargs.pop('ignore_file_pattern', None),
kwargs.pop('allow_file_pattern', None),
**kwargs)
return cls._get_config_dict_origin.__func__(cls, model_dir,
*model_args, **kwargs)
def patch_peft_model_id(cls, model, model_id, *model_args, **kwargs):
"""Patch all peft.from_pretrained"""
model_dir = get_model_dir(model_id,
kwargs.pop('ignore_file_pattern', None),
kwargs.pop('allow_file_pattern', None),
**kwargs)
return kwargs.pop('ori_func')(model, model_dir, *model_args, **kwargs)
return cls._from_pretrained_origin.__func__(cls, model, model_dir,
*model_args, **kwargs)
def _get_peft_type(model_id, **kwargs):
def patch_get_peft_type(cls, model_id, **kwargs):
"""Patch all _get_peft_type"""
model_dir = get_model_dir(model_id,
kwargs.pop('ignore_file_pattern', None),
kwargs.pop('allow_file_pattern', None),
**kwargs)
return kwargs.pop('ori_func')(model_dir, **kwargs)
return cls._get_peft_type_origin.__func__(cls, model_dir, **kwargs)
def get_wrapped_class(
module_class: 'PreTrainedModel',
@@ -292,7 +332,7 @@ def _patch_pretrained_class(all_imported_modules, wrap=False):
has_get_peft_type = hasattr(var, '_get_peft_type')
has_get_config_dict = hasattr(var, 'get_config_dict')
has_save_pretrained = hasattr(var, 'save_pretrained')
except ImportError:
except: # noqa
continue
# save_pretrained is not a classmethod and cannot be overridden by replacing
@@ -305,7 +345,7 @@ def _patch_pretrained_class(all_imported_modules, wrap=False):
else:
all_available_modules.append(
get_wrapped_class(var, **ignore_file_pattern_kwargs))
except Exception:
except: # noqa
all_available_modules.append(var)
else:
if has_from_pretrained and not hasattr(var,
@@ -315,29 +355,24 @@ def _patch_pretrained_class(all_imported_modules, wrap=False):
is_peft = 'model' in parameters and 'model_id' in parameters
var._from_pretrained_origin = var.from_pretrained
if not is_peft:
var.from_pretrained = partial(
patch_pretrained_model_name_or_path,
ori_func=var._from_pretrained_origin,
**ignore_file_pattern_kwargs)
var.from_pretrained = classmethod(
partial(patch_pretrained_model_name_or_path,
**ignore_file_pattern_kwargs))
else:
var.from_pretrained = partial(
patch_peft_model_id,
ori_func=var._from_pretrained_origin,
**ignore_file_pattern_kwargs)
var.from_pretrained = classmethod(
partial(patch_peft_model_id,
**ignore_file_pattern_kwargs))
if has_get_peft_type and not hasattr(var, '_get_peft_type_origin'):
var._get_peft_type_origin = var._get_peft_type
var._get_peft_type = partial(
_get_peft_type,
ori_func=var._get_peft_type_origin,
**ignore_file_pattern_kwargs)
var._get_peft_type = classmethod(
partial(patch_get_peft_type, **ignore_file_pattern_kwargs))
if has_get_config_dict and not hasattr(var,
'_get_config_dict_origin'):
var._get_config_dict_origin = var.get_config_dict
var.get_config_dict = partial(
patch_pretrained_model_name_or_path,
ori_func=var._get_config_dict_origin,
**ignore_file_pattern_kwargs)
var.get_config_dict = classmethod(
partial(patch_get_config_dict,
**ignore_file_pattern_kwargs))
all_available_modules.append(var)
return all_available_modules
@@ -352,7 +387,7 @@ def _unpatch_pretrained_class(all_imported_modules):
has_from_pretrained = hasattr(var, 'from_pretrained')
has_get_peft_type = hasattr(var, '_get_peft_type')
has_get_config_dict = hasattr(var, 'get_config_dict')
except ImportError:
except: # noqa
continue
if has_from_pretrained and hasattr(var, '_from_pretrained_origin'):
var.from_pretrained = var._from_pretrained_origin
@@ -390,6 +425,8 @@ def _patch_hub():
from modelscope.hub.api import HubApi
api = HubApi()
api.login(token)
if revision is None or revision == 'main':
revision = 'master'
return api.file_exists(repo_id, filename, revision=revision)
def _file_download(repo_id: str,
@@ -419,6 +456,8 @@ def _patch_hub():
from modelscope import HubApi
api = HubApi()
api.login(token)
if revision is None or revision == 'main':
revision = 'master'
return file_download(
repo_id,
file_path=os.path.join(subfolder, filename)
@@ -476,6 +515,8 @@ def _patch_hub():
**kwargs,
):
from modelscope.hub.push_to_hub import _push_files_to_hub
if revision is None or revision == 'main':
revision = 'master'
_push_files_to_hub(
path_or_fileobj=folder_path,
path_in_repo=path_in_repo,
@@ -508,6 +549,8 @@ def _patch_hub():
commit_description: Optional[str] = None,
**kwargs,
):
if revision is None or revision == 'main':
revision = 'master'
from modelscope.hub.push_to_hub import _push_files_to_hub
_push_files_to_hub(path_or_fileobj, path_in_repo, repo_id, token,
revision, commit_message, commit_description)
@@ -530,7 +573,8 @@ def _patch_hub():
if any(['Add' not in op.__class__.__name__ for op in operations]):
raise ValueError(
'ModelScope create_commit only support Add operation for now.')
if revision is None or revision == 'main':
revision = 'master'
all_files = [op.path_or_fileobj for op in operations]
api.upload_folder(
repo_id=repo_id,
@@ -541,18 +585,43 @@ def _patch_hub():
revision=revision,
repo_type=repo_type or 'model')
def load(
cls,
repo_id_or_path: Union[str, Path],
repo_type: Optional[str] = None,
token: Optional[str] = None,
ignore_metadata_errors: bool = False,
):
from modelscope.hub.api import HubApi
api = HubApi()
api.login(token)
if os.path.exists(repo_id_or_path):
file_path = repo_id_or_path
elif repo_type == 'model' or repo_type is None:
from modelscope import model_file_download
file_path = model_file_download(repo_id_or_path, 'README.md')
elif repo_type == 'dataset':
from modelscope import dataset_file_download
file_path = dataset_file_download(repo_id_or_path, 'README.md')
else:
raise ValueError(
f'repo_type should be `model` or `dataset`, but now is {repo_type}'
)
with open(file_path, 'r') as f:
repo_card = cls(
f.read(), ignore_metadata_errors=ignore_metadata_errors)
if not hasattr(repo_card.data, 'tags'):
repo_card.data.tags = []
return repo_card
# Patch repocard.validate
from huggingface_hub import repocard
if not hasattr(repocard.RepoCard, '_validate_origin'):
def load(*args, **kwargs):
from huggingface_hub.errors import EntryNotFoundError
raise EntryNotFoundError(message='API not supported.')
repocard.RepoCard._validate_origin = repocard.RepoCard.validate
repocard.RepoCard.validate = lambda *args, **kwargs: None
repocard.RepoCard._load_origin = repocard.RepoCard.load
repocard.RepoCard.load = load
repocard.RepoCard.load = MethodType(load, repocard.RepoCard)
if not hasattr(hf_api, '_hf_hub_download_origin'):
# Patch hf_hub_download

View File

@@ -8,7 +8,6 @@ control_ldm
ddpm_guided_diffusion
diffusers
easydict
easyrobust
edit_distance
face_alignment>=1.3.5
fairscale>=0.4.1