diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 6d20383d..13f61ff3 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -19,6 +19,10 @@ on: swift_branch: description: 'SWIFT branch to use(release/x.xx)' required: true + ci_image: + description: 'Set as the CI image' + default: '0' + required: false other_params: description: 'Other params in --xxx xxx' required: false @@ -47,4 +51,4 @@ jobs: run: | set -e source ~/.bashrc - python docker/build_image.py --image_type ${{ github.event.inputs.image_type }} --modelscope_branch ${{ github.event.inputs.modelscope_branch }} --modelscope_version ${{ github.event.inputs.modelscope_version }} --swift_branch ${{ github.event.inputs.swift_branch }} ${{ github.event.inputs.other_params }} + python docker/build_image.py --image_type ${{ github.event.inputs.image_type }} --modelscope_branch ${{ github.event.inputs.modelscope_branch }} --modelscope_version ${{ github.event.inputs.modelscope_version }} --swift_branch ${{ github.event.inputs.swift_branch }} --ci_image ${{ github.event.inputs.ci_image }} ${{ github.event.inputs.other_params }} diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu index 3461e32e..0ec13d12 100644 --- a/docker/Dockerfile.ubuntu +++ b/docker/Dockerfile.ubuntu @@ -34,6 +34,7 @@ if [ "$INSTALL_MS_DEPS" = "True" ]; then \ pip install --no-cache-dir -r /var/modelscope/tests.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ pip install --no-cache-dir -r /var/modelscope/server.txt && \ pip install --no-cache-dir https://modelscope.oss-cn-beijing.aliyuncs.com/packages/imageio_ffmpeg-0.4.9-py3-none-any.whl --no-dependencies --force && \ + pip install adaseq pai-easycv && \ pip install --no-cache-dir 'scipy<1.13.0' && \ pip install --no-cache-dir funtextprocessing typeguard==2.13.3 scikit-learn -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ pip install --no-cache-dir text2sql_lgesql==1.3.0 git+https://github.com/jin-s13/xtcocoapi.git@v1.14 git+https://github.com/gatagat/lap.git@v0.4.0 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html --force --no-deps && \ diff --git a/docker/build_image.py b/docker/build_image.py index e3fc930f..344fc9d3 100644 --- a/docker/build_image.py +++ b/docker/build_image.py @@ -61,6 +61,9 @@ class Builder: def push(self) -> int: pass + def image(self) -> str: + pass + def __call__(self): content = self.generate_dockerfile() self._save_dockerfile(content) @@ -68,10 +71,18 @@ class Builder: ret = self.build() if ret != 0: raise RuntimeError(f'Docker build error with errno: {ret}') + ret = self.push() if ret != 0: raise RuntimeError(f'Docker push error with errno: {ret}') + if self.args.ci_image != 0: + ret = os.system( + f'docker tag {self.image()} {docker_registry}:ci_image') + if ret != 0: + raise RuntimeError( + f'Docker tag ci_image error with errno: {ret}') + class BaseCPUImageBuilder(Builder): @@ -87,18 +98,18 @@ class BaseCPUImageBuilder(Builder): content = content.replace('{tf_version}', self.args.tf_version) return content - def build(self): - image_tag = ( + def image(self) -> str: + return ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-{self.args.python_tag}-' f'torch{self.args.torch_version}-base') + + def build(self): return os.system( - f'DOCKER_BUILDKIT=0 docker build -t {image_tag} -f Dockerfile .') + f'DOCKER_BUILDKIT=0 docker build -t {self.image()} -f Dockerfile .' + ) def push(self): - image_tag = ( - f'{docker_registry}:ubuntu{self.args.ubuntu_version}-{self.args.python_tag}-' - f'torch{self.args.torch_version}-base') - return os.system(f'docker push {image_tag}') + return os.system(f'docker push {self.image()}') class BaseGPUImageBuilder(Builder): @@ -115,18 +126,18 @@ class BaseGPUImageBuilder(Builder): content = content.replace('{tf_version}', self.args.tf_version) return content - def build(self) -> int: - image_tag = ( + def image(self) -> str: + return ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-{self.args.python_tag}-' f'torch{self.args.torch_version}-tf{self.args.tf_version}-base') + + def build(self) -> int: return os.system( - f'DOCKER_BUILDKIT=0 docker build -t {image_tag} -f Dockerfile .') + f'DOCKER_BUILDKIT=0 docker build -t {self.image()} -f Dockerfile .' + ) def push(self): - image_tag = ( - f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-{self.args.python_tag}-' - f'torch{self.args.torch_version}-tf{self.args.tf_version}-base') - return os.system(f'docker push {image_tag}') + return os.system(f'docker push {self.image()}') class CPUImageBuilder(Builder): @@ -139,7 +150,7 @@ class CPUImageBuilder(Builder): base_image = ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-{self.args.python_tag}' f'-torch{self.args.torch_version}-base') - extra_content = """\nRUN pip install adaseq pai-easycv""" + extra_content = '' with open('docker/Dockerfile.ubuntu', 'r') as f: content = f.read() @@ -162,26 +173,24 @@ class CPUImageBuilder(Builder): content = content.replace('{swift_branch}', self.args.swift_branch) return content - def build(self) -> int: - image_tag = ( + def image(self) -> str: + return ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-{self.args.python_tag}-' f'torch{self.args.torch_version}-{self.args.modelscope_version}-test' ) - return os.system(f'docker build -t {image_tag} -f Dockerfile .') + + def build(self) -> int: + return os.system(f'docker build -t {self.image()} -f Dockerfile .') def push(self): - image_tag = ( - f'{docker_registry}:ubuntu{self.args.ubuntu_version}-{self.args.python_tag}-' - f'torch{self.args.torch_version}-{self.args.modelscope_version}-test' - ) - ret = os.system(f'docker push {image_tag}') + ret = os.system(f'docker push {self.image()}') if ret != 0: return ret image_tag2 = ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-{self.args.python_tag}-' f'torch{self.args.torch_version}-{self.args.modelscope_version}-{formatted_time}-test' ) - ret = os.system(f'docker tag {image_tag} {image_tag2}') + ret = os.system(f'docker tag {self.image()} {image_tag2}') if ret != 0: return ret return os.system(f'docker push {image_tag2}') @@ -192,8 +201,7 @@ class GPUImageBuilder(Builder): def generate_dockerfile(self) -> str: meta_file = './docker/install.sh' extra_content = """ -RUN pip install adaseq pai-easycv && \ - pip install tf-keras==2.16.0 --no-dependencies && \ +RUN pip install tf-keras==2.16.0 --no-dependencies && \ pip install --no-cache-dir torchsde jupyterlab torchmetrics==0.11.4 basicsr pynvml shortuuid && \ CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0" \ pip install --no-cache-dir 'git+https://github.com/facebookresearch/detectron2.git' @@ -225,26 +233,24 @@ RUN pip install adaseq pai-easycv && \ content = content.replace('{swift_branch}', self.args.swift_branch) return content - def build(self) -> int: - image_tag = ( + def image(self) -> str: + return ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-' f'{self.args.python_tag}-torch{self.args.torch_version}-tf{self.args.tf_version}-' f'{self.args.modelscope_version}-test') - return os.system(f'docker build -t {image_tag} -f Dockerfile .') + + def build(self) -> int: + return os.system(f'docker build -t {self.image()} -f Dockerfile .') def push(self): - image_tag = ( - f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-' - f'{self.args.python_tag}-torch{self.args.torch_version}-tf{self.args.tf_version}-' - f'{self.args.modelscope_version}-test') - ret = os.system(f'docker push {image_tag}') + ret = os.system(f'docker push {self.image()}') if ret != 0: return ret image_tag2 = ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-' f'{self.args.python_tag}-torch{self.args.torch_version}-tf{self.args.tf_version}-' f'{self.args.modelscope_version}-{formatted_time}-test') - ret = os.system(f'docker tag {image_tag} {image_tag2}') + ret = os.system(f'docker tag {self.image()} {image_tag2}') if ret != 0: return ret return os.system(f'docker push {image_tag2}') @@ -299,26 +305,24 @@ class LLMImageBuilder(Builder): content = content.replace('{swift_branch}', self.args.swift_branch) return content - def build(self) -> int: - image_tag = ( + def image(self) -> str: + return ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-' f'{self.args.python_tag}-torch{self.args.torch_version}-{self.args.modelscope_version}-LLM-test' ) - return os.system(f'docker build -t {image_tag} -f Dockerfile .') + + def build(self) -> int: + return os.system(f'docker build -t {self.image()} -f Dockerfile .') def push(self): - image_tag = ( - f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-' - f'{self.args.python_tag}-torch{self.args.torch_version}-{self.args.modelscope_version}-LLM-test' - ) - ret = os.system(f'docker push {image_tag}') + ret = os.system(f'docker push {self.image()}') if ret != 0: return ret image_tag2 = ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-' f'{self.args.python_tag}-torch{self.args.torch_version}-' f'{self.args.modelscope_version}-LLM-{formatted_time}-test') - ret = os.system(f'docker tag {image_tag} {image_tag2}') + ret = os.system(f'docker tag {self.image()} {image_tag2}') if ret != 0: return ret return os.system(f'docker push {image_tag2}') @@ -332,6 +336,7 @@ parser.add_argument('--ubuntu_version', type=str, default='22.04') parser.add_argument('--torch_version', type=str, default=None) parser.add_argument('--torchvision_version', type=str, default=None) parser.add_argument('--cuda_version', type=str, default=None) +parser.add_argument('--ci_image', type=int, default=0) parser.add_argument('--torchaudio_version', type=str, default=None) parser.add_argument('--tf_version', type=str, default=None) parser.add_argument('--vllm_version', type=str, default=None) diff --git a/modelscope/__init__.py b/modelscope/__init__.py index d60a8c79..c969be68 100644 --- a/modelscope/__init__.py +++ b/modelscope/__init__.py @@ -32,12 +32,28 @@ if TYPE_CHECKING: build_dataset_from_file) from .utils.constant import Tasks if is_transformers_available(): - from .utils.hf_util import AutoConfig, GPTQConfig, AwqConfig, BitsAndBytesConfig from .utils.hf_util import ( - AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM, - AutoModelForSequenceClassification, - AutoModelForTokenClassification, AutoModelForImageSegmentation, - AutoTokenizer, GenerationConfig, AutoImageProcessor, BatchFeature, + AutoModel, AutoProcessor, AutoFeatureExtractor, GenerationConfig, + AutoConfig, GPTQConfig, AwqConfig, BitsAndBytesConfig, + AutoModelForCausalLM, AutoModelForSeq2SeqLM, + AutoModelForVision2Seq, AutoModelForSequenceClassification, + AutoModelForTokenClassification, AutoModelForImageClassification, + AutoModelForImageTextToText, + AutoModelForZeroShotImageClassification, + AutoModelForKeypointDetection, + AutoModelForDocumentQuestionAnswering, + AutoModelForSemanticSegmentation, + AutoModelForUniversalSegmentation, + AutoModelForInstanceSegmentation, AutoModelForObjectDetection, + AutoModelForZeroShotObjectDetection, + AutoModelForAudioClassification, AutoModelForSpeechSeq2Seq, + AutoModelForMaskedImageModeling, + AutoModelForVisualQuestionAnswering, + AutoModelForTableQuestionAnswering, AutoModelForImageToImage, + AutoModelForImageSegmentation, AutoModelForQuestionAnswering, + AutoModelForMaskedLM, AutoTokenizer, AutoModelForMaskGeneration, + AutoModelForPreTraining, AutoModelForTextEncoding, + AutoImageProcessor, BatchFeature, Qwen2VLForConditionalGeneration, T5EncoderModel) else: print( @@ -92,12 +108,30 @@ else: if is_transformers_available(): _import_structure['utils.hf_util'] = [ - 'AutoConfig', 'GenerationConfig', 'AutoModel', 'GPTQConfig', - 'AwqConfig', 'BitsAndBytesConfig', 'AutoModelForCausalLM', - 'AutoModelForSeq2SeqLM', 'AutoTokenizer', + 'AutoModel', 'AutoProcessor', 'AutoFeatureExtractor', + 'GenerationConfig', 'AutoConfig', 'GPTQConfig', 'AwqConfig', + 'BitsAndBytesConfig', 'AutoModelForCausalLM', + 'AutoModelForSeq2SeqLM', 'AutoModelForVision2Seq', 'AutoModelForSequenceClassification', - 'AutoModelForTokenClassification', 'AutoModelForImageSegmentation', - 'AutoImageProcessor', 'BatchFeature', 'T5EncoderModel' + 'AutoModelForTokenClassification', + 'AutoModelForImageClassification', 'AutoModelForImageToImage', + 'AutoModelForImageTextToText', + 'AutoModelForZeroShotImageClassification', + 'AutoModelForKeypointDetection', + 'AutoModelForDocumentQuestionAnswering', + 'AutoModelForSemanticSegmentation', + 'AutoModelForUniversalSegmentation', + 'AutoModelForInstanceSegmentation', 'AutoModelForObjectDetection', + 'AutoModelForZeroShotObjectDetection', + 'AutoModelForAudioClassification', 'AutoModelForSpeechSeq2Seq', + 'AutoModelForMaskedImageModeling', + 'AutoModelForVisualQuestionAnswering', + 'AutoModelForTableQuestionAnswering', + 'AutoModelForImageSegmentation', 'AutoModelForQuestionAnswering', + 'AutoModelForMaskedLM', 'AutoTokenizer', + 'AutoModelForMaskGeneration', 'AutoModelForPreTraining', + 'AutoModelForTextEncoding', 'AutoImageProcessor', 'BatchFeature', + 'Qwen2VLForConditionalGeneration', 'T5EncoderModel' ] import sys diff --git a/modelscope/cli/llamafile.py b/modelscope/cli/llamafile.py index 528be904..23f3fe91 100644 --- a/modelscope/cli/llamafile.py +++ b/modelscope/cli/llamafile.py @@ -55,15 +55,6 @@ class LlamafileCMD(CLICommand): 'Selected accuracy of GGUF files in the repo. Ignored when "file" is also provided.' ) - group.add_argument( - '--launch', - type=str, - required=False, - default='True', - help= - 'Whether to launch model with the downloaded llamafile, default to True.' - ) - group.add_argument( '--file', type=str, @@ -80,6 +71,15 @@ class LlamafileCMD(CLICommand): 'Directory where the selected llamafile would will be downloaded to.' ) + group.add_argument( + '--launch', + type=str, + required=False, + default='True', + help= + 'Whether to launch model with the downloaded llamafile, default to True.' + ) + parser.set_defaults(func=subparser_func) def execute(self): @@ -106,7 +106,7 @@ class LlamafileCMD(CLICommand): selected_file = f found = True break - if self.args.accuracy and self.args.accuracy in f.lower(): + if self.args.accuracy and self.args.accuracy.lower() in f.lower(): selected_file = f found = True break @@ -124,7 +124,7 @@ class LlamafileCMD(CLICommand): downloaded_file = self._rename_extension(downloaded_file) if self.args.launch.lower() == 'true': - print('Launching model with llamafile:') + print(f'Launching model with llamafile [{downloaded_file}]:') self._execute_llamafile(downloaded_file) else: print( @@ -135,12 +135,24 @@ class LlamafileCMD(CLICommand): current_mode = os.stat(file_path).st_mode new_mode = current_mode | 0o111 os.chmod(file_path, new_mode) - os.system(file_path) + execute_cmd = file_path + has_gpu = False + try: + import torch + has_gpu = torch.cuda.is_available() + except ModuleNotFoundError: + # we depend on torch to detect gpu. + # if torch is not available, we will just assume gpu cannot be used + pass + if has_gpu: + print( + 'GPU detected, launching model with llamafile GPU option >>>') + execute_cmd = f'{execute_cmd} -ngl 999' + os.system(execute_cmd) def _rename_extension(self, original_file_name): directory, filename = os.path.split(original_file_name) base_name, _ = os.path.splitext(filename) - new_filename = f'{base_name}.exe' - new_file_name = os.path.join(directory, new_filename) - os.rename(original_file_name, new_file_name) + new_filename = os.path.join(directory, f'{base_name}.exe') + os.rename(original_file_name, new_filename) return new_filename diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index cee8e43f..a0d97712 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -571,7 +571,7 @@ class HubApi: revision_detail = self.get_branch_tag_detail(all_tags_detail, revision) if revision_detail is None: revision_detail = self.get_branch_tag_detail(all_branches_detail, revision) - logger.info('Development mode use revision: %s' % revision) + logger.debug('Development mode use revision: %s' % revision) else: if revision is not None and revision in all_branches: revision_detail = self.get_branch_tag_detail(all_branches_detail, revision) @@ -1203,7 +1203,7 @@ class ModelScopeConfig: for cookie in cookies: if cookie.is_expired() and not ModelScopeConfig.cookie_expired_warning: ModelScopeConfig.cookie_expired_warning = True - logger.warning( + logger.debug( 'Authentication has expired, ' 'please re-login with modelscope login --token "YOUR_SDK_TOKEN" ' 'if you need to access private models or datasets.') diff --git a/modelscope/hub/constants.py b/modelscope/hub/constants.py index cfc6d6da..b3d03e1a 100644 --- a/modelscope/hub/constants.py +++ b/modelscope/hub/constants.py @@ -29,6 +29,7 @@ API_RESPONSE_FIELD_MESSAGE = 'Message' MODELSCOPE_CLOUD_ENVIRONMENT = 'MODELSCOPE_ENVIRONMENT' MODELSCOPE_CLOUD_USERNAME = 'MODELSCOPE_USERNAME' MODELSCOPE_SDK_DEBUG = 'MODELSCOPE_SDK_DEBUG' +MODELSCOPE_ENABLE_DEFAULT_HASH_VALIDATION = 'MODELSCOPE_ENABLE_DEFAULT_HASH_VALIDATION' ONE_YEAR_SECONDS = 24 * 365 * 60 * 60 MODELSCOPE_REQUEST_ID = 'X-Request-ID' TEMPORARY_FOLDER_NAME = '._____temp' diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py index f1cbce6f..40ac8a03 100644 --- a/modelscope/hub/file_download.py +++ b/modelscope/hub/file_download.py @@ -214,7 +214,7 @@ def _repo_file_download( if repo_file['Path'] == file_path: if cache.exists(repo_file): logger.debug( - f'File {repo_file["Name"]} already in cache, skip downloading!' + f'File {repo_file["Name"]} already in cache with identical hash, skip downloading!' ) return cache.get_file_by_info(repo_file) else: @@ -251,7 +251,7 @@ def _repo_file_download( if repo_file['Path'] == file_path: if cache.exists(repo_file): logger.debug( - f'File {repo_file["Name"]} already in cache, skip downloading!' + f'File {repo_file["Name"]} already in cache with identical hash, skip downloading!' ) return cache.get_file_by_info(repo_file) else: @@ -381,33 +381,34 @@ def parallel_download( file_size: int = None, ): # create temp file + with tqdm( + unit='B', + unit_scale=True, + unit_divisor=1024, + total=file_size, + initial=0, + desc='Downloading [' + file_name + ']', + leave=True, + ) as progress: + PART_SIZE = 160 * 1024 * 1024 # every part is 160M + tasks = [] + file_path = os.path.join(local_dir, file_name) + os.makedirs(os.path.dirname(file_path), exist_ok=True) + for idx in range(int(file_size / PART_SIZE)): + start = idx * PART_SIZE + end = (idx + 1) * PART_SIZE - 1 + tasks.append((file_path, progress, start, end, url, file_name, + cookies, headers)) + if end + 1 < file_size: + tasks.append((file_path, progress, end + 1, file_size - 1, url, + file_name, cookies, headers)) + parallels = MODELSCOPE_DOWNLOAD_PARALLELS if MODELSCOPE_DOWNLOAD_PARALLELS <= 4 else 4 + # download every part + with ThreadPoolExecutor( + max_workers=parallels, + thread_name_prefix='download') as executor: + list(executor.map(download_part_with_retry, tasks)) - progress = tqdm( - unit='B', - unit_scale=True, - unit_divisor=1024, - total=file_size, - initial=0, - desc='Downloading [' + file_name + ']', - ) - PART_SIZE = 160 * 1024 * 1024 # every part is 160M - tasks = [] - file_path = os.path.join(local_dir, file_name) - os.makedirs(os.path.dirname(file_path), exist_ok=True) - for idx in range(int(file_size / PART_SIZE)): - start = idx * PART_SIZE - end = (idx + 1) * PART_SIZE - 1 - tasks.append((file_path, progress, start, end, url, file_name, cookies, - headers)) - if end + 1 < file_size: - tasks.append((file_path, progress, end + 1, file_size - 1, url, - file_name, cookies, headers)) - parallels = MODELSCOPE_DOWNLOAD_PARALLELS if MODELSCOPE_DOWNLOAD_PARALLELS <= 4 else 4 - # download every part - with ThreadPoolExecutor( - max_workers=parallels, thread_name_prefix='download') as executor: - list(executor.map(download_part_with_retry, tasks)) - progress.close() # merge parts. with open(os.path.join(local_dir, file_name), 'wb') as output_file: for task in tasks: @@ -457,45 +458,47 @@ def http_get_model_file( allowed_methods=['GET']) while True: try: - progress = tqdm( - unit='B', - unit_scale=True, - unit_divisor=1024, - total=file_size if file_size > 0 else 1, - initial=0, - desc='Downloading [' + file_name + ']', - ) - if file_size == 0: - # Avoid empty file server request - with open(temp_file_path, 'w+'): - progress.update(1) - progress.close() + with tqdm( + unit='B', + unit_scale=True, + unit_divisor=1024, + total=file_size if file_size > 0 else 1, + initial=0, + desc='Downloading [' + file_name + ']', + leave=True, + ) as progress: + if file_size == 0: + # Avoid empty file server request + with open(temp_file_path, 'w+'): + progress.update(1) break - partial_length = 0 - if os.path.exists( - temp_file_path): # download partial, continue download - with open(temp_file_path, 'rb') as f: - partial_length = f.seek(0, io.SEEK_END) - progress.update(partial_length) - if partial_length >= file_size: - break - # closed range[], from 0. - get_headers['Range'] = 'bytes=%s-%s' % (partial_length, - file_size - 1) - with open(temp_file_path, 'ab+') as f: - r = requests.get( - url, - stream=True, - headers=get_headers, - cookies=cookies, - timeout=API_FILE_DOWNLOAD_TIMEOUT) - r.raise_for_status() - for chunk in r.iter_content( - chunk_size=API_FILE_DOWNLOAD_CHUNK_SIZE): - if chunk: # filter out keep-alive new chunks - progress.update(len(chunk)) - f.write(chunk) - progress.close() + # Determine the length of any existing partial download + partial_length = 0 + # download partial, continue download + if os.path.exists(temp_file_path): + with open(temp_file_path, 'rb') as f: + partial_length = f.seek(0, io.SEEK_END) + progress.update(partial_length) + + # Check if download is complete + if partial_length >= file_size: + break + # closed range[], from 0. + get_headers['Range'] = 'bytes=%s-%s' % (partial_length, + file_size - 1) + with open(temp_file_path, 'ab+') as f: + r = requests.get( + url, + stream=True, + headers=get_headers, + cookies=cookies, + timeout=API_FILE_DOWNLOAD_TIMEOUT) + r.raise_for_status() + for chunk in r.iter_content( + chunk_size=API_FILE_DOWNLOAD_CHUNK_SIZE): + if chunk: # filter out keep-alive new chunks + progress.update(len(chunk)) + f.write(chunk) break except (Exception) as e: # no matter what happen, we will retry. retry = retry.increment('GET', url, error=e) diff --git a/modelscope/hub/snapshot_download.py b/modelscope/hub/snapshot_download.py index 915d8108..015cadbd 100644 --- a/modelscope/hub/snapshot_download.py +++ b/modelscope/hub/snapshot_download.py @@ -4,10 +4,13 @@ import fnmatch import os import re import uuid +from concurrent.futures import ThreadPoolExecutor from http.cookiejar import CookieJar from pathlib import Path from typing import Dict, List, Optional, Union +from tqdm.auto import tqdm + from modelscope.hub.api import HubApi, ModelScopeConfig from modelscope.hub.errors import InvalidParameter from modelscope.hub.utils.caching import ModelFileSystemCache @@ -36,6 +39,7 @@ def snapshot_download( local_dir: Optional[str] = None, allow_patterns: Optional[Union[List[str], str]] = None, ignore_patterns: Optional[Union[List[str], str]] = None, + max_workers: int = 8, ) -> str: """Download all files of a repo. Downloads a whole snapshot of a repo's files at the specified revision. This @@ -67,6 +71,7 @@ def snapshot_download( ignore_patterns (`str` or `List`, *optional*, default to `None`): If provided, files matching any of the patterns are not downloaded, priority over ignore_file_pattern. For hugging-face compatibility. + max_workers (`int`): The maximum number of workers to download files, default 8. Raises: ValueError: the value details. @@ -94,7 +99,8 @@ def snapshot_download( allow_file_pattern=allow_file_pattern, local_dir=local_dir, ignore_patterns=ignore_patterns, - allow_patterns=allow_patterns) + allow_patterns=allow_patterns, + max_workers=max_workers) def dataset_snapshot_download( @@ -109,6 +115,7 @@ def dataset_snapshot_download( allow_file_pattern: Optional[Union[str, List[str]]] = None, allow_patterns: Optional[Union[List[str], str]] = None, ignore_patterns: Optional[Union[List[str], str]] = None, + max_workers: int = 8, ) -> str: """Download raw files of a dataset. Downloads all files at the specified revision. This @@ -141,6 +148,7 @@ def dataset_snapshot_download( ignore_patterns (`str` or `List`, *optional*, default to `None`): If provided, files matching any of the patterns are not downloaded, priority over ignore_file_pattern. For hugging-face compatibility. + max_workers (`int`): The maximum number of workers to download files, default 8. Raises: ValueError: the value details. @@ -168,7 +176,8 @@ def dataset_snapshot_download( allow_file_pattern=allow_file_pattern, local_dir=local_dir, ignore_patterns=ignore_patterns, - allow_patterns=allow_patterns) + allow_patterns=allow_patterns, + max_workers=max_workers) def _snapshot_download( @@ -185,6 +194,7 @@ def _snapshot_download( local_dir: Optional[str] = None, allow_patterns: Optional[Union[List[str], str]] = None, ignore_patterns: Optional[Union[List[str], str]] = None, + max_workers: int = 8, ): if not repo_type: repo_type = REPO_TYPE_MODEL @@ -261,7 +271,8 @@ def _snapshot_download( ignore_file_pattern=ignore_file_pattern, allow_file_pattern=allow_file_pattern, ignore_patterns=ignore_patterns, - allow_patterns=allow_patterns) + allow_patterns=allow_patterns, + max_workers=max_workers) if '.' in repo_id: masked_directory = get_model_masked_directory( directory, repo_id) @@ -279,54 +290,75 @@ def _snapshot_download( elif repo_type == REPO_TYPE_DATASET: directory = os.path.abspath( - local_dir) if local_dir is not None else os.path.join( + local_dir) if local_dir else os.path.join( system_cache, 'datasets', repo_id) print(f'Downloading Dataset to directory: {directory}') + group_or_owner, name = model_id_to_group_owner_name(repo_id) - if not revision: - revision = DEFAULT_DATASET_REVISION - revision_detail = revision - page_number = 1 - page_size = 100 - while True: - files_list_tree = _api.list_repo_tree( - dataset_name=name, - namespace=group_or_owner, - revision=revision, - root_path='/', - recursive=True, - page_number=page_number, - page_size=page_size) - if not ('Code' in files_list_tree - and files_list_tree['Code'] == 200): - print( - 'Get dataset: %s file list failed, request_id: %s, message: %s' - % (repo_id, files_list_tree['RequestId'], - files_list_tree['Message'])) - return None - repo_files = files_list_tree['Data']['Files'] - _download_file_lists( - repo_files, - cache, - temporary_cache_dir, - repo_id, - _api, - name, - group_or_owner, - headers, - repo_type=repo_type, - revision=revision, - cookies=cookies, - ignore_file_pattern=ignore_file_pattern, - allow_file_pattern=allow_file_pattern, - ignore_patterns=ignore_patterns, - allow_patterns=allow_patterns) - if len(repo_files) < page_size: - break - page_number += 1 + revision_detail = revision or DEFAULT_DATASET_REVISION + + logger.info('Fetching dataset repo file list...') + repo_files = fetch_repo_files(_api, name, group_or_owner, + revision_detail) + + if repo_files is None: + logger.error( + f'Failed to retrieve file list for dataset: {repo_id}') + return None + + _download_file_lists( + repo_files, + cache, + temporary_cache_dir, + repo_id, + _api, + name, + group_or_owner, + headers, + repo_type=repo_type, + revision=revision, + cookies=cookies, + ignore_file_pattern=ignore_file_pattern, + allow_file_pattern=allow_file_pattern, + ignore_patterns=ignore_patterns, + allow_patterns=allow_patterns, + max_workers=max_workers) cache.save_model_version(revision_info=revision_detail) - return os.path.join(cache.get_root_location()) + cache_root_path = cache.get_root_location() + return cache_root_path + + +def fetch_repo_files(_api, name, group_or_owner, revision): + page_number = 1 + page_size = 150 + repo_files = [] + + while True: + files_list_tree = _api.list_repo_tree( + dataset_name=name, + namespace=group_or_owner, + revision=revision, + root_path='/', + recursive=True, + page_number=page_number, + page_size=page_size) + + if not ('Code' in files_list_tree and files_list_tree['Code'] == 200): + logger.error(f'Get dataset file list failed, request_id: \ + {files_list_tree["RequestId"]}, message: {files_list_tree["Message"]}' + ) + return None + + cur_repo_files = files_list_tree['Data']['Files'] + repo_files.extend(cur_repo_files) + + if len(cur_repo_files) < page_size: + break + + page_number += 1 + + return repo_files def _is_valid_regex(pattern: str): @@ -358,23 +390,38 @@ def _get_valid_regex_pattern(patterns: List[str]): return None +def thread_download(func, iterable, max_workers, **kwargs): + # Create a tqdm progress bar with the total number of files to fetch + with tqdm( + total=len(iterable), + desc=f'Fetching {len(iterable)} files') as pbar: + # Define a wrapper function to update the progress bar + def progress_wrapper(*args, **kwargs): + result = func(*args, **kwargs) + pbar.update(1) + return result + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + executor.map(progress_wrapper, iterable) + + def _download_file_lists( - repo_files: List[str], - cache: ModelFileSystemCache, - temporary_cache_dir: str, - repo_id: str, - api: HubApi, - name: str, - group_or_owner: str, - headers, - repo_type: Optional[str] = None, - revision: Optional[str] = DEFAULT_MODEL_REVISION, - cookies: Optional[CookieJar] = None, - ignore_file_pattern: Optional[Union[str, List[str]]] = None, - allow_file_pattern: Optional[Union[str, List[str]]] = None, - allow_patterns: Optional[Union[List[str], str]] = None, - ignore_patterns: Optional[Union[List[str], str]] = None, -): + repo_files: List[str], + cache: ModelFileSystemCache, + temporary_cache_dir: str, + repo_id: str, + api: HubApi, + name: str, + group_or_owner: str, + headers, + repo_type: Optional[str] = None, + revision: Optional[str] = DEFAULT_MODEL_REVISION, + cookies: Optional[CookieJar] = None, + ignore_file_pattern: Optional[Union[str, List[str]]] = None, + allow_file_pattern: Optional[Union[str, List[str]]] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + max_workers: int = 8): ignore_patterns = _normalize_patterns(ignore_patterns) allow_patterns = _normalize_patterns(allow_patterns) ignore_file_pattern = _normalize_patterns(ignore_file_pattern) @@ -382,6 +429,7 @@ def _download_file_lists( # to compatible regex usage. ignore_regex_pattern = _get_valid_regex_pattern(ignore_file_pattern) + filtered_repo_files = [] for repo_file in repo_files: if repo_file['Type'] == 'tree': continue @@ -416,17 +464,20 @@ def _download_file_lists( fnmatch.fnmatch(repo_file['Path'], pattern) for pattern in allow_file_pattern): continue + # check model_file is exist in cache, if existed, skip download + if cache.exists(repo_file): + file_name = os.path.basename(repo_file['Name']) + logger.debug( + f'File {file_name} already in cache with identical hash, skip downloading!' + ) + continue except Exception as e: logger.warning('The file pattern is invalid : %s' % e) + else: + filtered_repo_files.append(repo_file) - # check model_file is exist in cache, if existed, skip download, otherwise download - if cache.exists(repo_file): - file_name = os.path.basename(repo_file['Name']) - logger.debug( - f'File {file_name} already in cache, skip downloading!') - continue + def _download_single_file(repo_file): if repo_type == REPO_TYPE_MODEL: - # get download url url = get_file_download_url( model_id=repo_id, file_path=repo_file['Path'], @@ -441,6 +492,10 @@ def _download_file_lists( raise InvalidParameter( f'Invalid repo type: {repo_type}, supported types: {REPO_TYPE_SUPPORT}' ) - download_file(url, repo_file, temporary_cache_dir, cache, headers, cookies) + + if len(filtered_repo_files) > 0: + thread_download(_download_single_file, filtered_repo_files, + max_workers) + logger.info(f"Download {repo_type} '{repo_id}' successfully.") diff --git a/modelscope/hub/utils/caching.py b/modelscope/hub/utils/caching.py index ecdec7cc..e1dcf83b 100644 --- a/modelscope/hub/utils/caching.py +++ b/modelscope/hub/utils/caching.py @@ -7,9 +7,15 @@ import tempfile from shutil import move, rmtree from typing import Dict +from modelscope.hub.constants import ( # noqa + FILE_HASH, MODELSCOPE_ENABLE_DEFAULT_HASH_VALIDATION) +from modelscope.hub.utils.utils import compute_hash from modelscope.utils.logger import get_logger logger = get_logger() + +enable_default_hash_validation = \ + os.getenv(MODELSCOPE_ENABLE_DEFAULT_HASH_VALIDATION, 'False').strip().lower() == 'true' """Implements caching functionality, used internally only """ @@ -252,26 +258,40 @@ class ModelFileSystemCache(FileSystemCache): return cache_key def exists(self, model_file_info): - """Check the file is cached or not. + """Check the file is cached or not. Note existence check will also cover digest check Args: model_file_info (CachedFileInfo): The cached file info Returns: - bool: If exists return True otherwise False + bool: If exists and has the same hash, return True otherwise False """ key = self.__get_cache_key(model_file_info) is_exists = False + file_path = key['Path'] + cache_file_path = os.path.join(self.cache_root_location, + model_file_info['Path']) for cached_key in self.cached_files: - if cached_key['Path'] == key['Path'] and ( + if cached_key['Path'] == file_path and ( cached_key['Revision'].startswith(key['Revision']) or key['Revision'].startswith(cached_key['Revision'])): - is_exists = True - break - file_path = os.path.join(self.cache_root_location, - model_file_info['Path']) + expected_hash = model_file_info[FILE_HASH] + if expected_hash is not None and os.path.exists( + cache_file_path): + # compute hash only when enabled, otherwise just meet expectation by default + if enable_default_hash_validation: + cache_file_sha256 = compute_hash(cache_file_path) + else: + cache_file_sha256 = expected_hash + if expected_hash == cache_file_sha256: + is_exists = True + break + else: + logger.info( + f'File [{file_path}] exists in cache but with a mismatched hash, will re-download.' + ) if is_exists: - if os.path.exists(file_path): + if os.path.exists(cache_file_path): return True else: self.remove_key( diff --git a/modelscope/pipelines/cv/ocr_detection_pipeline.py b/modelscope/pipelines/cv/ocr_detection_pipeline.py index c23f6e6e..5b0fbda5 100644 --- a/modelscope/pipelines/cv/ocr_detection_pipeline.py +++ b/modelscope/pipelines/cv/ocr_detection_pipeline.py @@ -27,6 +27,8 @@ RBOX_DIM = 5 OFFSET_DIM = 6 WORD_POLYGON_DIM = 8 OFFSET_VARIANCE = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1] +TF_NODE_THRESHOLD = 0.4 +TF_LINK_THRESHOLD = 0.6 @PIPELINES.register_module( @@ -39,7 +41,7 @@ class OCRDetectionPipeline(Pipeline): ```python >>> from modelscope.pipelines import pipeline - >>> ocr_detection = pipeline('ocr_detection', model='damo/cv_resnet18_ocr-detection-line-level_damo') + >>> ocr_detection = pipeline('ocr-detection', model='damo/cv_resnet18_ocr-detection-line-level_damo') >>> result = ocr_detection('https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/ocr_detection.jpg') {'polygons': array([[220, 14, 780, 14, 780, 64, 220, 64], @@ -87,9 +89,9 @@ class OCRDetectionPipeline(Pipeline): tf = tf.compat.v1 tf.compat.v1.disable_eager_execution() - tf.app.flags.DEFINE_float('node_threshold', 0.4, + tf.app.flags.DEFINE_float('node_threshold', TF_NODE_THRESHOLD, 'Confidence threshold for nodes') - tf.app.flags.DEFINE_float('link_threshold', 0.6, + tf.app.flags.DEFINE_float('link_threshold', TF_LINK_THRESHOLD, 'Confidence threshold for links') tf.reset_default_graph() model_path = osp.join( @@ -192,18 +194,6 @@ class OCRDetectionPipeline(Pipeline): return result else: # for model seglink++ - import tensorflow as tf - - if tf.__version__ >= '2.0': - tf = tf.compat.v1 - - tf.compat.v1.disable_eager_execution() - - tf.app.flags.DEFINE_float('node_threshold', 0.4, - 'Confidence threshold for nodes') - tf.app.flags.DEFINE_float('link_threshold', 0.6, - 'Confidence threshold for links') - img = LoadImage.convert_to_ndarray(input) h, w, c = img.shape diff --git a/modelscope/pipelines/multi_modal/efficient_diffusion_tuning_pipeline.py b/modelscope/pipelines/multi_modal/efficient_diffusion_tuning_pipeline.py index 1b791634..320d83e7 100644 --- a/modelscope/pipelines/multi_modal/efficient_diffusion_tuning_pipeline.py +++ b/modelscope/pipelines/multi_modal/efficient_diffusion_tuning_pipeline.py @@ -36,8 +36,10 @@ class EfficientDiffusionTuningPipeline(Pipeline): 'data/test/images/vision_efficient_tuning_test_1.png') >>> print(f'Output: {result}.') """ + logger.warn( + '[NOTE]Do not use this pipeline because the dependencies are too old, ' + 'use https://github.com/modelscope/DiffSynth-Studio instead') super().__init__(model=model, **kwargs) - self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.model = self.model.to(self.device) self.model.eval() diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py index 9d517724..8f7c06da 100644 --- a/modelscope/utils/hf_util.py +++ b/modelscope/utils/hf_util.py @@ -8,14 +8,52 @@ from transformers import AutoConfig as AutoConfigHF from transformers import AutoFeatureExtractor as AutoFeatureExtractorHF from transformers import AutoImageProcessor as AutoImageProcessorHF from transformers import AutoModel as AutoModelHF +from transformers import \ + AutoModelForAudioClassification as AutoModelForAudioClassificationHF from transformers import AutoModelForCausalLM as AutoModelForCausalLMHF +from transformers import \ + AutoModelForDocumentQuestionAnswering as \ + AutoModelForDocumentQuestionAnsweringHF +from transformers import \ + AutoModelForImageClassification as AutoModelForImageClassificationHF from transformers import \ AutoModelForImageSegmentation as AutoModelForImageSegmentationHF +from transformers import \ + AutoModelForInstanceSegmentation as AutoModelForInstanceSegmentationHF +from transformers import \ + AutoModelForMaskedImageModeling as AutoModelForMaskedImageModelingHF +from transformers import AutoModelForMaskedLM as AutoModelForMaskedLMHF +from transformers import \ + AutoModelForMaskGeneration as AutoModelForMaskGenerationHF +from transformers import \ + AutoModelForObjectDetection as AutoModelForObjectDetectionHF +from transformers import AutoModelForPreTraining as AutoModelForPreTrainingHF +from transformers import \ + AutoModelForQuestionAnswering as AutoModelForQuestionAnsweringHF +from transformers import \ + AutoModelForSemanticSegmentation as AutoModelForSemanticSegmentationHF from transformers import AutoModelForSeq2SeqLM as AutoModelForSeq2SeqLMHF from transformers import \ AutoModelForSequenceClassification as AutoModelForSequenceClassificationHF +from transformers import \ + AutoModelForSpeechSeq2Seq as AutoModelForSpeechSeq2SeqHF +from transformers import \ + AutoModelForTableQuestionAnswering as AutoModelForTableQuestionAnsweringHF +from transformers import AutoModelForTextEncoding as AutoModelForTextEncodingHF from transformers import \ AutoModelForTokenClassification as AutoModelForTokenClassificationHF +from transformers import \ + AutoModelForUniversalSegmentation as AutoModelForUniversalSegmentationHF +from transformers import AutoModelForVision2Seq as AutoModelForVision2SeqHF +from transformers import \ + AutoModelForVisualQuestionAnswering as \ + AutoModelForVisualQuestionAnsweringHF +from transformers import \ + AutoModelForZeroShotImageClassification as \ + AutoModelForZeroShotImageClassificationHF +from transformers import \ + AutoModelForZeroShotObjectDetection as \ + AutoModelForZeroShotObjectDetectionHF from transformers import AutoProcessor as AutoProcessorHF from transformers import AutoTokenizer as AutoTokenizerHF from transformers import BatchFeature as BatchFeatureHF @@ -24,6 +62,7 @@ from transformers import GenerationConfig as GenerationConfigHF from transformers import (PretrainedConfig, PreTrainedModel, PreTrainedTokenizerBase) from transformers import T5EncoderModel as T5EncoderModelHF +from transformers import __version__ as transformers_version from modelscope import snapshot_download from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke @@ -39,6 +78,21 @@ except ImportError: logger = get_logger() +class UnsupportedAutoClass: + + def __init__(self, name: str): + self.error_msg =\ + f'{name} is not supported with your installed Transformers version {transformers_version}. ' + \ + 'Please update your Transformers by "pip install transformers -U".' + + def from_pretrained(self, pretrained_model_name_or_path, *model_args, + **kwargs): + raise ImportError(self.error_msg) + + def from_config(self, cls, config): + raise ImportError(self.error_msg) + + def user_agent(invoked_by=None): if invoked_by is None: invoked_by = Invoke.PRETRAINED @@ -272,7 +326,7 @@ def get_wrapped_class(module_class, ignore_file_pattern = kwargs.pop('ignore_file_pattern', default_ignore_file_pattern) subfolder = kwargs.pop('subfolder', default_file_filter) - + file_filter = None if subfolder: file_filter = f'{subfolder}/*' if not os.path.exists(pretrained_model_name_or_path): @@ -309,31 +363,106 @@ def get_wrapped_class(module_class, AutoModel = get_wrapped_class(AutoModelHF) AutoModelForCausalLM = get_wrapped_class(AutoModelForCausalLMHF) AutoModelForSeq2SeqLM = get_wrapped_class(AutoModelForSeq2SeqLMHF) +AutoModelForVision2Seq = get_wrapped_class(AutoModelForVision2SeqHF) AutoModelForSequenceClassification = get_wrapped_class( AutoModelForSequenceClassificationHF) AutoModelForTokenClassification = get_wrapped_class( AutoModelForTokenClassificationHF) AutoModelForImageSegmentation = get_wrapped_class( AutoModelForImageSegmentationHF) +AutoModelForImageClassification = get_wrapped_class( + AutoModelForImageClassificationHF) +AutoModelForZeroShotImageClassification = get_wrapped_class( + AutoModelForZeroShotImageClassificationHF) +try: + from transformers import AutoModelForImageToImage as AutoModelForImageToImageHF + AutoModelForImageToImage = get_wrapped_class(AutoModelForImageToImageHF) +except ImportError: + AutoModelForImageToImage = UnsupportedAutoClass('AutoModelForImageToImage') + +try: + from transformers import AutoModelForImageTextToText as AutoModelForImageTextToTextHF + AutoModelForImageTextToText = get_wrapped_class( + AutoModelForImageTextToTextHF) +except ImportError: + AutoModelForImageTextToText = UnsupportedAutoClass( + 'AutoModelForImageTextToText') + +try: + from transformers import AutoModelForKeypointDetection as AutoModelForKeypointDetectionHF + AutoModelForKeypointDetection = get_wrapped_class( + AutoModelForKeypointDetectionHF) +except ImportError: + AutoModelForKeypointDetection = UnsupportedAutoClass( + 'AutoModelForKeypointDetection') + +AutoModelForQuestionAnswering = get_wrapped_class( + AutoModelForQuestionAnsweringHF) +AutoModelForTableQuestionAnswering = get_wrapped_class( + AutoModelForTableQuestionAnsweringHF) +AutoModelForVisualQuestionAnswering = get_wrapped_class( + AutoModelForVisualQuestionAnsweringHF) +AutoModelForDocumentQuestionAnswering = get_wrapped_class( + AutoModelForDocumentQuestionAnsweringHF) +AutoModelForSemanticSegmentation = get_wrapped_class( + AutoModelForSemanticSegmentationHF) +AutoModelForUniversalSegmentation = get_wrapped_class( + AutoModelForUniversalSegmentationHF) +AutoModelForInstanceSegmentation = get_wrapped_class( + AutoModelForInstanceSegmentationHF) +AutoModelForObjectDetection = get_wrapped_class(AutoModelForObjectDetectionHF) +AutoModelForZeroShotObjectDetection = get_wrapped_class( + AutoModelForZeroShotObjectDetectionHF) +AutoModelForAudioClassification = get_wrapped_class( + AutoModelForAudioClassificationHF) +AutoModelForSpeechSeq2Seq = get_wrapped_class(AutoModelForSpeechSeq2SeqHF) +AutoModelForMaskedImageModeling = get_wrapped_class( + AutoModelForMaskedImageModelingHF) +AutoModelForMaskedLM = get_wrapped_class(AutoModelForMaskedLMHF) +AutoModelForMaskGeneration = get_wrapped_class(AutoModelForMaskGenerationHF) +AutoModelForPreTraining = get_wrapped_class(AutoModelForPreTrainingHF) +AutoModelForTextEncoding = get_wrapped_class(AutoModelForTextEncodingHF) T5EncoderModel = get_wrapped_class(T5EncoderModelHF) +try: + from transformers import \ + Qwen2VLForConditionalGeneration as Qwen2VLForConditionalGenerationHF + Qwen2VLForConditionalGeneration = get_wrapped_class( + Qwen2VLForConditionalGenerationHF) +except ImportError: + Qwen2VLForConditionalGeneration = UnsupportedAutoClass( + 'Qwen2VLForConditionalGeneration') AutoTokenizer = get_wrapped_class( AutoTokenizerHF, ignore_file_pattern=[ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt' + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' + ]) +AutoProcessor = get_wrapped_class( + AutoProcessorHF, + ignore_file_pattern=[ + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' ]) AutoConfig = get_wrapped_class( AutoConfigHF, ignore_file_pattern=[ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt' + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' ]) GenerationConfig = get_wrapped_class( GenerationConfigHF, ignore_file_pattern=[ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt' + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' ]) +BitsAndBytesConfig = get_wrapped_class( + BitsAndBytesConfigHF, + ignore_file_pattern=[ + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' + ]) +AutoImageProcessor = get_wrapped_class( + AutoImageProcessorHF, + ignore_file_pattern=[ + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' + ]) + GPTQConfig = GPTQConfigHF AwqConfig = AwqConfigHF -BitsAndBytesConfig = BitsAndBytesConfigHF -AutoImageProcessor = get_wrapped_class(AutoImageProcessorHF) BatchFeature = get_wrapped_class(BatchFeatureHF) diff --git a/requirements/cv.txt b/requirements/cv.txt index d54e5dc5..842cded2 100644 --- a/requirements/cv.txt +++ b/requirements/cv.txt @@ -32,7 +32,7 @@ mmdet>=2.25.0,<=2.28.2 # mmdet3d-1.0.0rc6 remove networkx and numba version restriction mmdet3d==1.0.0a1 mmsegmentation<=0.30.0 -moviepy>=1.0.3 +moviepy==1.0.3 nerfacc==0.2.2 networkx numba diff --git a/tests/cli/test_llamfafile_cmd.py b/tests/cli/test_llamfafile_cmd.py index 616ed78c..d0ff7574 100644 --- a/tests/cli/test_llamfafile_cmd.py +++ b/tests/cli/test_llamfafile_cmd.py @@ -29,6 +29,24 @@ class LlamafileCMDTest(unittest.TestCase): in output) self.assertTrue('Launching model with llamafile' in output) + accuracy = 'Q2_K' + cmd = f'python -m modelscope.cli.cli {self.cmd} --model {self.model_id} --accuracy {accuracy}' + stat, output = subprocess.getstatusoutput(cmd) + self.assertEqual(stat, 0) + self.assertTrue( + 'llamafile matching criteria found: [My-Model-14B-Q2_K.llamafile]' + in output) + self.assertTrue('Launching model with llamafile' in output) + + accuracy = 'q2_k' + cmd = f'python -m modelscope.cli.cli {self.cmd} --model {self.model_id} --accuracy {accuracy}' + stat, output = subprocess.getstatusoutput(cmd) + self.assertEqual(stat, 0) + self.assertTrue( + 'llamafile matching criteria found: [My-Model-14B-Q2_K.llamafile]' + in output) + self.assertTrue('Launching model with llamafile' in output) + def test_given_file(self): file = 'My-Model-14B-FP16.llamafile' cmd = f'python -m modelscope.cli.cli {self.cmd} --model {self.model_id} --file {file}' diff --git a/tests/pipelines/test_efficient_diffusion_tuning.py b/tests/pipelines/test_efficient_diffusion_tuning.py index 1f224917..af52d65f 100644 --- a/tests/pipelines/test_efficient_diffusion_tuning.py +++ b/tests/pipelines/test_efficient_diffusion_tuning.py @@ -11,10 +11,10 @@ from modelscope.utils.test_utils import test_level class EfficientDiffusionTuningTest(unittest.TestCase): def setUp(self) -> None: - os.system('pip install ms-swift -U') + # os.system('pip install ms-swift -U') self.task = Tasks.efficient_diffusion_tuning - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skip def test_efficient_diffusion_tuning_lora_run_pipeline(self): model_id = 'damo/multi-modal_efficient-diffusion-tuning-lora' model_revision = 'v1.0.2' @@ -24,7 +24,7 @@ class EfficientDiffusionTuningTest(unittest.TestCase): result = edt_pipeline(inputs) print(f'Efficient-diffusion-tuning-lora output: {result}.') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skip def test_efficient_diffusion_tuning_lora_load_model_from_pretrained(self): model_id = 'damo/multi-modal_efficient-diffusion-tuning-lora' model_revision = 'v1.0.2' @@ -32,7 +32,7 @@ class EfficientDiffusionTuningTest(unittest.TestCase): from modelscope.models.multi_modal import EfficientStableDiffusion self.assertTrue(model.__class__ == EfficientStableDiffusion) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skip def test_efficient_diffusion_tuning_control_lora_run_pipeline(self): # TODO: to be fixed in the future model_id = 'damo/multi-modal_efficient-diffusion-tuning-control-lora' @@ -48,7 +48,7 @@ class EfficientDiffusionTuningTest(unittest.TestCase): result = edt_pipeline(inputs) print(f'Efficient-diffusion-tuning-control-lora output: {result}.') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skip def test_efficient_diffusion_tuning_control_lora_load_model_from_pretrained( self): model_id = 'damo/multi-modal_efficient-diffusion-tuning-control-lora'