diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu index 0ec13d12..cd48d85d 100644 --- a/docker/Dockerfile.ubuntu +++ b/docker/Dockerfile.ubuntu @@ -66,5 +66,5 @@ RUN echo "cache bust $(date +%Y%m%d%H%M%S)" && \ ENV SETUPTOOLS_USE_DISTUTILS=stdlib ENV VLLM_USE_MODELSCOPE=True ENV LMDEPLOY_USE_MODELSCOPE=True -ENV MODELSCOPE_CACHE=/mnt/workspace/.cache/modelscope +ENV MODELSCOPE_CACHE=/mnt/workspace/.cache/modelscope/hub SHELL ["/bin/bash", "-c"] diff --git a/docker/install.sh b/docker/install.sh index d7d367dc..ee747d20 100644 --- a/docker/install.sh +++ b/docker/install.sh @@ -8,12 +8,14 @@ lmdeploy_version=${5:-0.6.1} autogptq_version=${6:-0.7.1} flashattn_version=${7:-2.7.1.post4} -pip install --no-cache-dir -U autoawq lmdeploy==$lmdeploy_version - pip uninstall -y torch torchvision torchaudio pip install --no-cache-dir torch==$torch_version torchvision==$torchvision_version torchaudio==$torchaudio_version +pip install --no-cache-dir -U autoawq lmdeploy==$lmdeploy_version + +pip install --no-cache-dir torch==$torch_version torchvision==$torchvision_version torchaudio==$torchaudio_version + pip install --no-cache-dir tiktoken transformers_stream_generator bitsandbytes deepspeed torchmetrics decord optimum # pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiTRUE-cp310-cp310-linux_x86_64.whl diff --git a/modelscope/cli/upload.py b/modelscope/cli/upload.py index 453a6314..f48b6cd6 100644 --- a/modelscope/cli/upload.py +++ b/modelscope/cli/upload.py @@ -168,7 +168,7 @@ class UploadCMD(CLICommand): commit_description=self.args.commit_description, repo_type=self.args.repo_type, allow_patterns=convert_patterns(self.args.include), - ignore_patterns=convert_patterns(self.args.exclude), + allow_file_pattern=convert_patterns(self.args.include), max_workers=self.args.max_workers, ) else: diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py index 00eb8abf..ee0f5d89 100644 --- a/modelscope/hub/file_download.py +++ b/modelscope/hub/file_download.py @@ -4,6 +4,7 @@ import copy import hashlib import io import os +import shutil import tempfile import urllib import uuid @@ -286,6 +287,41 @@ def _repo_file_download( temporary_cache_dir, cache, headers, cookies) +def move_legacy_cache_to_standard_dir(cache_dir: str, model_id: str): + if cache_dir.endswith(os.path.sep): + cache_dir = cache_dir.strip(os.path.sep) + legacy_cache_root = os.path.dirname(cache_dir) + base_name = os.path.basename(cache_dir) + if base_name == 'datasets': + # datasets will not be not affected + return + if not legacy_cache_root.endswith('hub'): + # Two scenarios: + # We have restructured ModelScope cache directory, + # Scenery 1: + # When MODELSCOPE_CACHE is not set, the default directory remains + # the same at ~/.cache/modelscope/hub + # Scenery 2: + # When MODELSCOPE_CACHE is not set, the cache directory is moved from + # $MODELSCOPE_CACHE/hub to $MODELSCOPE_CACHE/. In this case, + # we will be migrating the hub directory accordingly. + legacy_cache_root = os.path.join(legacy_cache_root, 'hub') + group_or_owner, name = model_id_to_group_owner_name(model_id) + name = name.replace('.', '___') + temporary_cache_dir = os.path.join(cache_dir, group_or_owner, name) + legacy_cache_dir = os.path.join(legacy_cache_root, group_or_owner, name) + if os.path.exists( + legacy_cache_dir) and not os.path.exists(temporary_cache_dir): + logger.info( + f'Legacy cache dir exists: {legacy_cache_dir}, move to {temporary_cache_dir}' + ) + try: + shutil.move(legacy_cache_dir, temporary_cache_dir) + except Exception: # noqa + # Failed, skip + pass + + def create_temporary_directory_and_cache(model_id: str, local_dir: str = None, cache_dir: str = None, @@ -294,6 +330,10 @@ def create_temporary_directory_and_cache(model_id: str, default_cache_root = get_model_cache_root() elif repo_type == REPO_TYPE_DATASET: default_cache_root = get_dataset_cache_root() + else: + raise ValueError( + f'repo_type only support model and dataset, but now is : {repo_type}' + ) group_or_owner, name = model_id_to_group_owner_name(model_id) if local_dir is not None: @@ -302,6 +342,7 @@ def create_temporary_directory_and_cache(model_id: str, else: if cache_dir is None: cache_dir = default_cache_root + move_legacy_cache_to_standard_dir(cache_dir, model_id) if isinstance(cache_dir, Path): cache_dir = str(cache_dir) temporary_cache_dir = os.path.join(cache_dir, TEMPORARY_FOLDER_NAME, diff --git a/modelscope/hub/snapshot_download.py b/modelscope/hub/snapshot_download.py index 77b49847..2c79050c 100644 --- a/modelscope/hub/snapshot_download.py +++ b/modelscope/hub/snapshot_download.py @@ -17,7 +17,6 @@ from modelscope.hub.utils.utils import (get_model_masked_directory, model_id_to_group_owner_name) from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DEFAULT_MODEL_REVISION, - DEFAULT_REPOSITORY_REVISION, REPO_TYPE_DATASET, REPO_TYPE_MODEL, REPO_TYPE_SUPPORT) from modelscope.utils.file_utils import get_modelscope_cache_dir @@ -246,7 +245,6 @@ def _snapshot_download( _api = HubApi() if cookies is None: cookies = ModelScopeConfig.get_cookies() - repo_files = [] if repo_type == REPO_TYPE_MODEL: directory = os.path.abspath( local_dir) if local_dir is not None else os.path.join( @@ -313,7 +311,6 @@ def _snapshot_download( local_dir) if local_dir else os.path.join( system_cache, 'datasets', *repo_id.split('/')) print(f'Downloading Dataset to directory: {directory}') - group_or_owner, name = model_id_to_group_owner_name(repo_id) revision_detail = revision or DEFAULT_DATASET_REVISION diff --git a/modelscope/utils/hf_util/patcher.py b/modelscope/utils/hf_util/patcher.py index 43933ca9..74264c13 100644 --- a/modelscope/utils/hf_util/patcher.py +++ b/modelscope/utils/hf_util/patcher.py @@ -47,29 +47,48 @@ def get_all_imported_modules(): pass if importlib.util.find_spec('peft') is not None: - import peft - attributes = dir(peft) - imports = [attr for attr in attributes if not attr.startswith('__')] - all_imported_modules.extend( - [getattr(peft, _import) for _import in imports]) + try: + import peft + except: # noqa + pass + else: + attributes = dir(peft) + imports = [ + attr for attr in attributes if not attr.startswith('__') + ] + all_imported_modules.extend( + [getattr(peft, _import) for _import in imports]) if importlib.util.find_spec('diffusers') is not None: - import diffusers - if importlib.util.find_spec('diffusers') is not None: + try: + import diffusers + except: # noqa + pass + else: lazy_module = sys.modules['diffusers'] - _import_structure = lazy_module._import_structure - for key in _import_structure: - values = _import_structure[key] - for value in values: - if any([name in value - for name in diffusers_include_names]): - try: - module = importlib.import_module( - f'.{key}', diffusers.__name__) - value = getattr(module, value) - all_imported_modules.append(value) - except (ImportError, AttributeError): - pass + if hasattr(lazy_module, '_import_structure'): + _import_structure = lazy_module._import_structure + for key in _import_structure: + values = _import_structure[key] + for value in values: + if any([ + name in value + for name in diffusers_include_names + ]): + try: + module = importlib.import_module( + f'.{key}', diffusers.__name__) + value = getattr(module, value) + all_imported_modules.append(value) + except (ImportError, AttributeError): + pass + else: + attributes = dir(lazy_module) + imports = [ + attr for attr in attributes if not attr.startswith('__') + ] + all_imported_modules.extend( + [getattr(lazy_module, _import) for _import in imports]) return all_imported_modules