This commit is contained in:
Yingda Chen
2025-02-07 18:19:40 +08:00
6 changed files with 86 additions and 27 deletions

View File

@@ -66,5 +66,5 @@ RUN echo "cache bust $(date +%Y%m%d%H%M%S)" && \
ENV SETUPTOOLS_USE_DISTUTILS=stdlib
ENV VLLM_USE_MODELSCOPE=True
ENV LMDEPLOY_USE_MODELSCOPE=True
ENV MODELSCOPE_CACHE=/mnt/workspace/.cache/modelscope
ENV MODELSCOPE_CACHE=/mnt/workspace/.cache/modelscope/hub
SHELL ["/bin/bash", "-c"]

View File

@@ -8,12 +8,14 @@ lmdeploy_version=${5:-0.6.1}
autogptq_version=${6:-0.7.1}
flashattn_version=${7:-2.7.1.post4}
pip install --no-cache-dir -U autoawq lmdeploy==$lmdeploy_version
pip uninstall -y torch torchvision torchaudio
pip install --no-cache-dir torch==$torch_version torchvision==$torchvision_version torchaudio==$torchaudio_version
pip install --no-cache-dir -U autoawq lmdeploy==$lmdeploy_version
pip install --no-cache-dir torch==$torch_version torchvision==$torchvision_version torchaudio==$torchaudio_version
pip install --no-cache-dir tiktoken transformers_stream_generator bitsandbytes deepspeed torchmetrics decord optimum
# pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiTRUE-cp310-cp310-linux_x86_64.whl

View File

@@ -168,7 +168,7 @@ class UploadCMD(CLICommand):
commit_description=self.args.commit_description,
repo_type=self.args.repo_type,
allow_patterns=convert_patterns(self.args.include),
ignore_patterns=convert_patterns(self.args.exclude),
allow_file_pattern=convert_patterns(self.args.include),
max_workers=self.args.max_workers,
)
else:

View File

@@ -4,6 +4,7 @@ import copy
import hashlib
import io
import os
import shutil
import tempfile
import urllib
import uuid
@@ -286,6 +287,41 @@ def _repo_file_download(
temporary_cache_dir, cache, headers, cookies)
def move_legacy_cache_to_standard_dir(cache_dir: str, model_id: str):
if cache_dir.endswith(os.path.sep):
cache_dir = cache_dir.strip(os.path.sep)
legacy_cache_root = os.path.dirname(cache_dir)
base_name = os.path.basename(cache_dir)
if base_name == 'datasets':
# datasets will not be not affected
return
if not legacy_cache_root.endswith('hub'):
# Two scenarios:
# We have restructured ModelScope cache directory,
# Scenery 1:
# When MODELSCOPE_CACHE is not set, the default directory remains
# the same at ~/.cache/modelscope/hub
# Scenery 2:
# When MODELSCOPE_CACHE is not set, the cache directory is moved from
# $MODELSCOPE_CACHE/hub to $MODELSCOPE_CACHE/. In this case,
# we will be migrating the hub directory accordingly.
legacy_cache_root = os.path.join(legacy_cache_root, 'hub')
group_or_owner, name = model_id_to_group_owner_name(model_id)
name = name.replace('.', '___')
temporary_cache_dir = os.path.join(cache_dir, group_or_owner, name)
legacy_cache_dir = os.path.join(legacy_cache_root, group_or_owner, name)
if os.path.exists(
legacy_cache_dir) and not os.path.exists(temporary_cache_dir):
logger.info(
f'Legacy cache dir exists: {legacy_cache_dir}, move to {temporary_cache_dir}'
)
try:
shutil.move(legacy_cache_dir, temporary_cache_dir)
except Exception: # noqa
# Failed, skip
pass
def create_temporary_directory_and_cache(model_id: str,
local_dir: str = None,
cache_dir: str = None,
@@ -294,6 +330,10 @@ def create_temporary_directory_and_cache(model_id: str,
default_cache_root = get_model_cache_root()
elif repo_type == REPO_TYPE_DATASET:
default_cache_root = get_dataset_cache_root()
else:
raise ValueError(
f'repo_type only support model and dataset, but now is : {repo_type}'
)
group_or_owner, name = model_id_to_group_owner_name(model_id)
if local_dir is not None:
@@ -302,6 +342,7 @@ def create_temporary_directory_and_cache(model_id: str,
else:
if cache_dir is None:
cache_dir = default_cache_root
move_legacy_cache_to_standard_dir(cache_dir, model_id)
if isinstance(cache_dir, Path):
cache_dir = str(cache_dir)
temporary_cache_dir = os.path.join(cache_dir, TEMPORARY_FOLDER_NAME,

View File

@@ -17,7 +17,6 @@ from modelscope.hub.utils.utils import (get_model_masked_directory,
model_id_to_group_owner_name)
from modelscope.utils.constant import (DEFAULT_DATASET_REVISION,
DEFAULT_MODEL_REVISION,
DEFAULT_REPOSITORY_REVISION,
REPO_TYPE_DATASET, REPO_TYPE_MODEL,
REPO_TYPE_SUPPORT)
from modelscope.utils.file_utils import get_modelscope_cache_dir
@@ -246,7 +245,6 @@ def _snapshot_download(
_api = HubApi()
if cookies is None:
cookies = ModelScopeConfig.get_cookies()
repo_files = []
if repo_type == REPO_TYPE_MODEL:
directory = os.path.abspath(
local_dir) if local_dir is not None else os.path.join(
@@ -313,7 +311,6 @@ def _snapshot_download(
local_dir) if local_dir else os.path.join(
system_cache, 'datasets', *repo_id.split('/'))
print(f'Downloading Dataset to directory: {directory}')
group_or_owner, name = model_id_to_group_owner_name(repo_id)
revision_detail = revision or DEFAULT_DATASET_REVISION

View File

@@ -47,29 +47,48 @@ def get_all_imported_modules():
pass
if importlib.util.find_spec('peft') is not None:
import peft
attributes = dir(peft)
imports = [attr for attr in attributes if not attr.startswith('__')]
all_imported_modules.extend(
[getattr(peft, _import) for _import in imports])
try:
import peft
except: # noqa
pass
else:
attributes = dir(peft)
imports = [
attr for attr in attributes if not attr.startswith('__')
]
all_imported_modules.extend(
[getattr(peft, _import) for _import in imports])
if importlib.util.find_spec('diffusers') is not None:
import diffusers
if importlib.util.find_spec('diffusers') is not None:
try:
import diffusers
except: # noqa
pass
else:
lazy_module = sys.modules['diffusers']
_import_structure = lazy_module._import_structure
for key in _import_structure:
values = _import_structure[key]
for value in values:
if any([name in value
for name in diffusers_include_names]):
try:
module = importlib.import_module(
f'.{key}', diffusers.__name__)
value = getattr(module, value)
all_imported_modules.append(value)
except (ImportError, AttributeError):
pass
if hasattr(lazy_module, '_import_structure'):
_import_structure = lazy_module._import_structure
for key in _import_structure:
values = _import_structure[key]
for value in values:
if any([
name in value
for name in diffusers_include_names
]):
try:
module = importlib.import_module(
f'.{key}', diffusers.__name__)
value = getattr(module, value)
all_imported_modules.append(value)
except (ImportError, AttributeError):
pass
else:
attributes = dir(lazy_module)
imports = [
attr for attr in attributes if not attr.startswith('__')
]
all_imported_modules.extend(
[getattr(lazy_module, _import) for _import in imports])
return all_imported_modules