From 5ca12c6cc425f129bc0e383597b73fda7c1ed76d Mon Sep 17 00:00:00 2001 From: Yingda Chen Date: Mon, 25 Nov 2024 16:31:52 +0800 Subject: [PATCH 01/22] Llamafile support gpu flag (#1097) * add gpu flag when gpu is detected * fix typo * fix typo * add printout prompt --------- Co-authored-by: Yingda Chen --- modelscope/cli/llamafile.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/modelscope/cli/llamafile.py b/modelscope/cli/llamafile.py index 528be904..39d1346f 100644 --- a/modelscope/cli/llamafile.py +++ b/modelscope/cli/llamafile.py @@ -135,7 +135,20 @@ class LlamafileCMD(CLICommand): current_mode = os.stat(file_path).st_mode new_mode = current_mode | 0o111 os.chmod(file_path, new_mode) - os.system(file_path) + execute_cmd = file_path + has_gpu = False + try: + import torch + has_gpu = torch.cuda.is_available() + except ModuleNotFoundError: + # we depend on torch to detect gpu. + # if torch is not available, we will just assume gpu cannot be used + pass + if has_gpu: + print( + 'GPU detected, launching model with llamafile GPU option >>>') + execute_cmd = f'{execute_cmd} -ngl 999' + os.system(execute_cmd) def _rename_extension(self, original_file_name): directory, filename = os.path.split(original_file_name) From 4a3b255d53511607866c743125c3bc1c24d1248f Mon Sep 17 00:00:00 2001 From: Yingda Chen Date: Mon, 25 Nov 2024 22:15:03 +0800 Subject: [PATCH 02/22] change warning to debug (#1099) Co-authored-by: Yingda Chen --- modelscope/hub/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index cee8e43f..499a8e07 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -1203,7 +1203,7 @@ class ModelScopeConfig: for cookie in cookies: if cookie.is_expired() and not ModelScopeConfig.cookie_expired_warning: ModelScopeConfig.cookie_expired_warning = True - logger.warning( + logger.debug( 'Authentication has expired, ' 'please re-login with modelscope login --token "YOUR_SDK_TOKEN" ' 'if you need to access private models or datasets.') From 6d9e6d57c02ba2d6b5ea4bf0e3e35e3d9524c433 Mon Sep 17 00:00:00 2001 From: Yingda Chen Date: Mon, 25 Nov 2024 22:16:05 +0800 Subject: [PATCH 03/22] More automodel (#1098) * add more hf alias --------- Co-authored-by: Yingda Chen --- modelscope/__init__.py | 18 ++++++++++----- modelscope/utils/hf_util.py | 45 ++++++++++++++++++++++++++++++++----- 2 files changed, 52 insertions(+), 11 deletions(-) diff --git a/modelscope/__init__.py b/modelscope/__init__.py index d60a8c79..80cd861a 100644 --- a/modelscope/__init__.py +++ b/modelscope/__init__.py @@ -36,9 +36,12 @@ if TYPE_CHECKING: from .utils.hf_util import ( AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, - AutoModelForTokenClassification, AutoModelForImageSegmentation, - AutoTokenizer, GenerationConfig, AutoImageProcessor, BatchFeature, - T5EncoderModel) + AutoModelForTokenClassification, AutoModelForImageClassification, + AutoModelForImageToImage, AutoModelForImageSegmentation, + AutoModelForQuestionAnswering, AutoModelForMaskedLM, AutoTokenizer, + AutoModelForMaskGeneration, AutoModelForPreTraining, + AutoModelForTextEncoding, GenerationConfig, AutoImageProcessor, + BatchFeature, T5EncoderModel) else: print( 'transformer is not installed, please install it if you want to use related modules' @@ -96,8 +99,13 @@ else: 'AwqConfig', 'BitsAndBytesConfig', 'AutoModelForCausalLM', 'AutoModelForSeq2SeqLM', 'AutoTokenizer', 'AutoModelForSequenceClassification', - 'AutoModelForTokenClassification', 'AutoModelForImageSegmentation', - 'AutoImageProcessor', 'BatchFeature', 'T5EncoderModel' + 'AutoModelForTokenClassification', + 'AutoModelForImageClassification', 'AutoModelForImageToImage', + 'AutoModelForQuestionAnswering', 'AutoModelForMaskedLM', + 'AutoModelForMaskGeneration', 'AutoModelForPreTraining', + 'AutoModelForTextEncoding', 'AutoModelForTokenClassification', + 'AutoModelForImageSegmentation', 'AutoImageProcessor', + 'BatchFeature', 'T5EncoderModel' ] import sys diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py index 9d517724..a67b7886 100644 --- a/modelscope/utils/hf_util.py +++ b/modelscope/utils/hf_util.py @@ -9,11 +9,21 @@ from transformers import AutoFeatureExtractor as AutoFeatureExtractorHF from transformers import AutoImageProcessor as AutoImageProcessorHF from transformers import AutoModel as AutoModelHF from transformers import AutoModelForCausalLM as AutoModelForCausalLMHF +from transformers import \ + AutoModelForImageClassification as AutoModelForImageClassificationHF from transformers import \ AutoModelForImageSegmentation as AutoModelForImageSegmentationHF +from transformers import AutoModelForImageToImage as AutoModelForImageToImageHF +from transformers import AutoModelForMaskedLM as AutoModelForMaskedLMHF +from transformers import \ + AutoModelForMaskGeneration as AutoModelForMaskGenerationHF +from transformers import AutoModelForPreTraining as AutoModelForPreTrainingHF +from transformers import \ + AutoModelForQuestionAnswering as AutoModelForQuestionAnsweringHF from transformers import AutoModelForSeq2SeqLM as AutoModelForSeq2SeqLMHF from transformers import \ AutoModelForSequenceClassification as AutoModelForSequenceClassificationHF +from transformers import AutoModelForTextEncoding as AutoModelForTextEncodingHF from transformers import \ AutoModelForTokenClassification as AutoModelForTokenClassificationHF from transformers import AutoProcessor as AutoProcessorHF @@ -272,7 +282,7 @@ def get_wrapped_class(module_class, ignore_file_pattern = kwargs.pop('ignore_file_pattern', default_ignore_file_pattern) subfolder = kwargs.pop('subfolder', default_file_filter) - + file_filter = None if subfolder: file_filter = f'{subfolder}/*' if not os.path.exists(pretrained_model_name_or_path): @@ -315,25 +325,48 @@ AutoModelForTokenClassification = get_wrapped_class( AutoModelForTokenClassificationHF) AutoModelForImageSegmentation = get_wrapped_class( AutoModelForImageSegmentationHF) +AutoModelForImageClassification = get_wrapped_class( + AutoModelForImageClassificationHF) +AutoModelForImageToImage = get_wrapped_class(AutoModelForImageToImageHF) +AutoModelForQuestionAnswering = get_wrapped_class( + AutoModelForQuestionAnsweringHF) +AutoModelForMaskedLM = get_wrapped_class(AutoModelForMaskedLMHF) +AutoModelForMaskGeneration = get_wrapped_class(AutoModelForMaskGenerationHF) +AutoModelForPreTraining = get_wrapped_class(AutoModelForPreTrainingHF) +AutoModelForTextEncoding = get_wrapped_class(AutoModelForTextEncodingHF) T5EncoderModel = get_wrapped_class(T5EncoderModelHF) AutoTokenizer = get_wrapped_class( AutoTokenizerHF, ignore_file_pattern=[ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt' + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' + ]) +AutoProcessor = get_wrapped_class( + AutoProcessorHF, + ignore_file_pattern=[ + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' ]) AutoConfig = get_wrapped_class( AutoConfigHF, ignore_file_pattern=[ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt' + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' ]) GenerationConfig = get_wrapped_class( GenerationConfigHF, ignore_file_pattern=[ - r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt' + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' ]) +BitsAndBytesConfig = get_wrapped_class( + BitsAndBytesConfigHF, + ignore_file_pattern=[ + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' + ]) +AutoImageProcessor = get_wrapped_class( + AutoImageProcessorHF, + ignore_file_pattern=[ + r'\w+\.bin', r'\w+\.safetensors', r'\w+\.pth', r'\w+\.pt', r'\w+\.h5' + ]) + GPTQConfig = GPTQConfigHF AwqConfig = AwqConfigHF -BitsAndBytesConfig = BitsAndBytesConfigHF -AutoImageProcessor = get_wrapped_class(AutoImageProcessorHF) BatchFeature = get_wrapped_class(BatchFeatureHF) From a7856a599553dc5133133383255505d9c8b6c776 Mon Sep 17 00:00:00 2001 From: Yunlin Mao Date: Tue, 26 Nov 2024 20:22:36 +0800 Subject: [PATCH 04/22] add multi-thread download (#1095) * add thread download * add thread download * fix print * change default workers to 8 * fix return cache path * manage tqdm progress bars --------- Co-authored-by: DaozeZhang --- modelscope/hub/file_download.py | 131 +++++++++++---------- modelscope/hub/snapshot_download.py | 175 +++++++++++++++++----------- 2 files changed, 177 insertions(+), 129 deletions(-) diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py index f1cbce6f..a6d7c2e2 100644 --- a/modelscope/hub/file_download.py +++ b/modelscope/hub/file_download.py @@ -381,33 +381,34 @@ def parallel_download( file_size: int = None, ): # create temp file + with tqdm( + unit='B', + unit_scale=True, + unit_divisor=1024, + total=file_size, + initial=0, + desc='Downloading [' + file_name + ']', + leave=True, + ) as progress: + PART_SIZE = 160 * 1024 * 1024 # every part is 160M + tasks = [] + file_path = os.path.join(local_dir, file_name) + os.makedirs(os.path.dirname(file_path), exist_ok=True) + for idx in range(int(file_size / PART_SIZE)): + start = idx * PART_SIZE + end = (idx + 1) * PART_SIZE - 1 + tasks.append((file_path, progress, start, end, url, file_name, + cookies, headers)) + if end + 1 < file_size: + tasks.append((file_path, progress, end + 1, file_size - 1, url, + file_name, cookies, headers)) + parallels = MODELSCOPE_DOWNLOAD_PARALLELS if MODELSCOPE_DOWNLOAD_PARALLELS <= 4 else 4 + # download every part + with ThreadPoolExecutor( + max_workers=parallels, + thread_name_prefix='download') as executor: + list(executor.map(download_part_with_retry, tasks)) - progress = tqdm( - unit='B', - unit_scale=True, - unit_divisor=1024, - total=file_size, - initial=0, - desc='Downloading [' + file_name + ']', - ) - PART_SIZE = 160 * 1024 * 1024 # every part is 160M - tasks = [] - file_path = os.path.join(local_dir, file_name) - os.makedirs(os.path.dirname(file_path), exist_ok=True) - for idx in range(int(file_size / PART_SIZE)): - start = idx * PART_SIZE - end = (idx + 1) * PART_SIZE - 1 - tasks.append((file_path, progress, start, end, url, file_name, cookies, - headers)) - if end + 1 < file_size: - tasks.append((file_path, progress, end + 1, file_size - 1, url, - file_name, cookies, headers)) - parallels = MODELSCOPE_DOWNLOAD_PARALLELS if MODELSCOPE_DOWNLOAD_PARALLELS <= 4 else 4 - # download every part - with ThreadPoolExecutor( - max_workers=parallels, thread_name_prefix='download') as executor: - list(executor.map(download_part_with_retry, tasks)) - progress.close() # merge parts. with open(os.path.join(local_dir, file_name), 'wb') as output_file: for task in tasks: @@ -457,45 +458,47 @@ def http_get_model_file( allowed_methods=['GET']) while True: try: - progress = tqdm( - unit='B', - unit_scale=True, - unit_divisor=1024, - total=file_size if file_size > 0 else 1, - initial=0, - desc='Downloading [' + file_name + ']', - ) - if file_size == 0: - # Avoid empty file server request - with open(temp_file_path, 'w+'): - progress.update(1) - progress.close() + with tqdm( + unit='B', + unit_scale=True, + unit_divisor=1024, + total=file_size if file_size > 0 else 1, + initial=0, + desc='Downloading [' + file_name + ']', + leave=True, + ) as progress: + if file_size == 0: + # Avoid empty file server request + with open(temp_file_path, 'w+'): + progress.update(1) break - partial_length = 0 - if os.path.exists( - temp_file_path): # download partial, continue download - with open(temp_file_path, 'rb') as f: - partial_length = f.seek(0, io.SEEK_END) - progress.update(partial_length) - if partial_length >= file_size: - break - # closed range[], from 0. - get_headers['Range'] = 'bytes=%s-%s' % (partial_length, - file_size - 1) - with open(temp_file_path, 'ab+') as f: - r = requests.get( - url, - stream=True, - headers=get_headers, - cookies=cookies, - timeout=API_FILE_DOWNLOAD_TIMEOUT) - r.raise_for_status() - for chunk in r.iter_content( - chunk_size=API_FILE_DOWNLOAD_CHUNK_SIZE): - if chunk: # filter out keep-alive new chunks - progress.update(len(chunk)) - f.write(chunk) - progress.close() + # Determine the length of any existing partial download + partial_length = 0 + # download partial, continue download + if os.path.exists(temp_file_path): + with open(temp_file_path, 'rb') as f: + partial_length = f.seek(0, io.SEEK_END) + progress.update(partial_length) + + # Check if download is complete + if partial_length >= file_size: + break + # closed range[], from 0. + get_headers['Range'] = 'bytes=%s-%s' % (partial_length, + file_size - 1) + with open(temp_file_path, 'ab+') as f: + r = requests.get( + url, + stream=True, + headers=get_headers, + cookies=cookies, + timeout=API_FILE_DOWNLOAD_TIMEOUT) + r.raise_for_status() + for chunk in r.iter_content( + chunk_size=API_FILE_DOWNLOAD_CHUNK_SIZE): + if chunk: # filter out keep-alive new chunks + progress.update(len(chunk)) + f.write(chunk) break except (Exception) as e: # no matter what happen, we will retry. retry = retry.increment('GET', url, error=e) diff --git a/modelscope/hub/snapshot_download.py b/modelscope/hub/snapshot_download.py index 915d8108..7ba0f446 100644 --- a/modelscope/hub/snapshot_download.py +++ b/modelscope/hub/snapshot_download.py @@ -8,6 +8,8 @@ from http.cookiejar import CookieJar from pathlib import Path from typing import Dict, List, Optional, Union +from tqdm.contrib.concurrent import thread_map + from modelscope.hub.api import HubApi, ModelScopeConfig from modelscope.hub.errors import InvalidParameter from modelscope.hub.utils.caching import ModelFileSystemCache @@ -36,6 +38,7 @@ def snapshot_download( local_dir: Optional[str] = None, allow_patterns: Optional[Union[List[str], str]] = None, ignore_patterns: Optional[Union[List[str], str]] = None, + max_workers: int = 8, ) -> str: """Download all files of a repo. Downloads a whole snapshot of a repo's files at the specified revision. This @@ -67,6 +70,7 @@ def snapshot_download( ignore_patterns (`str` or `List`, *optional*, default to `None`): If provided, files matching any of the patterns are not downloaded, priority over ignore_file_pattern. For hugging-face compatibility. + max_workers (`int`): The maximum number of workers to download files, default 8. Raises: ValueError: the value details. @@ -94,7 +98,8 @@ def snapshot_download( allow_file_pattern=allow_file_pattern, local_dir=local_dir, ignore_patterns=ignore_patterns, - allow_patterns=allow_patterns) + allow_patterns=allow_patterns, + max_workers=max_workers) def dataset_snapshot_download( @@ -109,6 +114,7 @@ def dataset_snapshot_download( allow_file_pattern: Optional[Union[str, List[str]]] = None, allow_patterns: Optional[Union[List[str], str]] = None, ignore_patterns: Optional[Union[List[str], str]] = None, + max_workers: int = 8, ) -> str: """Download raw files of a dataset. Downloads all files at the specified revision. This @@ -141,6 +147,7 @@ def dataset_snapshot_download( ignore_patterns (`str` or `List`, *optional*, default to `None`): If provided, files matching any of the patterns are not downloaded, priority over ignore_file_pattern. For hugging-face compatibility. + max_workers (`int`): The maximum number of workers to download files, default 8. Raises: ValueError: the value details. @@ -168,7 +175,8 @@ def dataset_snapshot_download( allow_file_pattern=allow_file_pattern, local_dir=local_dir, ignore_patterns=ignore_patterns, - allow_patterns=allow_patterns) + allow_patterns=allow_patterns, + max_workers=max_workers) def _snapshot_download( @@ -185,6 +193,7 @@ def _snapshot_download( local_dir: Optional[str] = None, allow_patterns: Optional[Union[List[str], str]] = None, ignore_patterns: Optional[Union[List[str], str]] = None, + max_workers: int = 8, ): if not repo_type: repo_type = REPO_TYPE_MODEL @@ -261,7 +270,8 @@ def _snapshot_download( ignore_file_pattern=ignore_file_pattern, allow_file_pattern=allow_file_pattern, ignore_patterns=ignore_patterns, - allow_patterns=allow_patterns) + allow_patterns=allow_patterns, + max_workers=max_workers) if '.' in repo_id: masked_directory = get_model_masked_directory( directory, repo_id) @@ -279,54 +289,77 @@ def _snapshot_download( elif repo_type == REPO_TYPE_DATASET: directory = os.path.abspath( - local_dir) if local_dir is not None else os.path.join( + local_dir) if local_dir else os.path.join( system_cache, 'datasets', repo_id) print(f'Downloading Dataset to directory: {directory}') + group_or_owner, name = model_id_to_group_owner_name(repo_id) - if not revision: - revision = DEFAULT_DATASET_REVISION - revision_detail = revision - page_number = 1 - page_size = 100 - while True: - files_list_tree = _api.list_repo_tree( - dataset_name=name, - namespace=group_or_owner, - revision=revision, - root_path='/', - recursive=True, - page_number=page_number, - page_size=page_size) - if not ('Code' in files_list_tree - and files_list_tree['Code'] == 200): - print( - 'Get dataset: %s file list failed, request_id: %s, message: %s' - % (repo_id, files_list_tree['RequestId'], - files_list_tree['Message'])) - return None - repo_files = files_list_tree['Data']['Files'] - _download_file_lists( - repo_files, - cache, - temporary_cache_dir, - repo_id, - _api, - name, - group_or_owner, - headers, - repo_type=repo_type, - revision=revision, - cookies=cookies, - ignore_file_pattern=ignore_file_pattern, - allow_file_pattern=allow_file_pattern, - ignore_patterns=ignore_patterns, - allow_patterns=allow_patterns) - if len(repo_files) < page_size: - break - page_number += 1 + revision_detail = revision or DEFAULT_DATASET_REVISION + + logger.info('Fetching dataset repo file list...') + repo_files = fetch_repo_files(_api, name, group_or_owner, + revision_detail) + + if repo_files is None: + logger.error( + f'Failed to retrieve file list for dataset: {repo_id}') + return None + + _download_file_lists( + repo_files, + cache, + temporary_cache_dir, + repo_id, + _api, + name, + group_or_owner, + headers, + repo_type=repo_type, + revision=revision, + cookies=cookies, + ignore_file_pattern=ignore_file_pattern, + allow_file_pattern=allow_file_pattern, + ignore_patterns=ignore_patterns, + allow_patterns=allow_patterns, + max_workers=max_workers) cache.save_model_version(revision_info=revision_detail) - return os.path.join(cache.get_root_location()) + cache_root_path = cache.get_root_location() + + logger.info(f"Download {repo_type} '{repo_id}' successfully.") + return cache_root_path + + +def fetch_repo_files(_api, name, group_or_owner, revision): + page_number = 1 + page_size = 150 + repo_files = [] + + while True: + files_list_tree = _api.list_repo_tree( + dataset_name=name, + namespace=group_or_owner, + revision=revision, + root_path='/', + recursive=True, + page_number=page_number, + page_size=page_size) + + if not ('Code' in files_list_tree and files_list_tree['Code'] == 200): + logger.error(f'Get dataset file list failed, request_id: \ + {files_list_tree["RequestId"]}, message: {files_list_tree["Message"]}' + ) + return None + + cur_repo_files = files_list_tree['Data']['Files'] + repo_files.extend(cur_repo_files) + + if len(cur_repo_files) < page_size: + break + + page_number += 1 + + return repo_files def _is_valid_regex(pattern: str): @@ -359,22 +392,22 @@ def _get_valid_regex_pattern(patterns: List[str]): def _download_file_lists( - repo_files: List[str], - cache: ModelFileSystemCache, - temporary_cache_dir: str, - repo_id: str, - api: HubApi, - name: str, - group_or_owner: str, - headers, - repo_type: Optional[str] = None, - revision: Optional[str] = DEFAULT_MODEL_REVISION, - cookies: Optional[CookieJar] = None, - ignore_file_pattern: Optional[Union[str, List[str]]] = None, - allow_file_pattern: Optional[Union[str, List[str]]] = None, - allow_patterns: Optional[Union[List[str], str]] = None, - ignore_patterns: Optional[Union[List[str], str]] = None, -): + repo_files: List[str], + cache: ModelFileSystemCache, + temporary_cache_dir: str, + repo_id: str, + api: HubApi, + name: str, + group_or_owner: str, + headers, + repo_type: Optional[str] = None, + revision: Optional[str] = DEFAULT_MODEL_REVISION, + cookies: Optional[CookieJar] = None, + ignore_file_pattern: Optional[Union[str, List[str]]] = None, + allow_file_pattern: Optional[Union[str, List[str]]] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + max_workers: int = 8): ignore_patterns = _normalize_patterns(ignore_patterns) allow_patterns = _normalize_patterns(allow_patterns) ignore_file_pattern = _normalize_patterns(ignore_file_pattern) @@ -382,6 +415,7 @@ def _download_file_lists( # to compatible regex usage. ignore_regex_pattern = _get_valid_regex_pattern(ignore_file_pattern) + filtered_repo_files = [] for repo_file in repo_files: if repo_file['Type'] == 'tree': continue @@ -418,15 +452,18 @@ def _download_file_lists( continue except Exception as e: logger.warning('The file pattern is invalid : %s' % e) + else: + filtered_repo_files.append(repo_file) + def _download_single_file(repo_file): # check model_file is exist in cache, if existed, skip download, otherwise download if cache.exists(repo_file): file_name = os.path.basename(repo_file['Name']) logger.debug( f'File {file_name} already in cache, skip downloading!') - continue + return + if repo_type == REPO_TYPE_MODEL: - # get download url url = get_file_download_url( model_id=repo_id, file_path=repo_file['Path'], @@ -441,6 +478,14 @@ def _download_file_lists( raise InvalidParameter( f'Invalid repo type: {repo_type}, supported types: {REPO_TYPE_SUPPORT}' ) - download_file(url, repo_file, temporary_cache_dir, cache, headers, cookies) + + # Use thread_map for parallel downloading + thread_map( + _download_single_file, + filtered_repo_files, + max_workers=max_workers, + desc=f'Fetching {len(filtered_repo_files)} files', + leave=True, + position=max_workers) From e2bd3021752b10c1019b0f8dc5c0bd9459a527fc Mon Sep 17 00:00:00 2001 From: Yingda Chen Date: Thu, 28 Nov 2024 00:43:24 +0800 Subject: [PATCH 05/22] fix potential double definition for ocr pipeline (#1102) * fix potential double definition issue --------- Co-authored-by: Yingda Chen --- .../pipelines/cv/ocr_detection_pipeline.py | 20 +++++-------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/modelscope/pipelines/cv/ocr_detection_pipeline.py b/modelscope/pipelines/cv/ocr_detection_pipeline.py index c23f6e6e..5b0fbda5 100644 --- a/modelscope/pipelines/cv/ocr_detection_pipeline.py +++ b/modelscope/pipelines/cv/ocr_detection_pipeline.py @@ -27,6 +27,8 @@ RBOX_DIM = 5 OFFSET_DIM = 6 WORD_POLYGON_DIM = 8 OFFSET_VARIANCE = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1] +TF_NODE_THRESHOLD = 0.4 +TF_LINK_THRESHOLD = 0.6 @PIPELINES.register_module( @@ -39,7 +41,7 @@ class OCRDetectionPipeline(Pipeline): ```python >>> from modelscope.pipelines import pipeline - >>> ocr_detection = pipeline('ocr_detection', model='damo/cv_resnet18_ocr-detection-line-level_damo') + >>> ocr_detection = pipeline('ocr-detection', model='damo/cv_resnet18_ocr-detection-line-level_damo') >>> result = ocr_detection('https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/ocr_detection.jpg') {'polygons': array([[220, 14, 780, 14, 780, 64, 220, 64], @@ -87,9 +89,9 @@ class OCRDetectionPipeline(Pipeline): tf = tf.compat.v1 tf.compat.v1.disable_eager_execution() - tf.app.flags.DEFINE_float('node_threshold', 0.4, + tf.app.flags.DEFINE_float('node_threshold', TF_NODE_THRESHOLD, 'Confidence threshold for nodes') - tf.app.flags.DEFINE_float('link_threshold', 0.6, + tf.app.flags.DEFINE_float('link_threshold', TF_LINK_THRESHOLD, 'Confidence threshold for links') tf.reset_default_graph() model_path = osp.join( @@ -192,18 +194,6 @@ class OCRDetectionPipeline(Pipeline): return result else: # for model seglink++ - import tensorflow as tf - - if tf.__version__ >= '2.0': - tf = tf.compat.v1 - - tf.compat.v1.disable_eager_execution() - - tf.app.flags.DEFINE_float('node_threshold', 0.4, - 'Confidence threshold for nodes') - tf.app.flags.DEFINE_float('link_threshold', 0.6, - 'Confidence threshold for links') - img = LoadImage.convert_to_ndarray(input) h, w, c = img.shape From 3e13cc899b112139677292a14fa2de3bbd5f458e Mon Sep 17 00:00:00 2001 From: Yingda Chen Date: Thu, 28 Nov 2024 20:08:14 +0800 Subject: [PATCH 06/22] add transformer support for Qwen2vl (#1106) * add qwen2vlconfig * rearrange --------- Co-authored-by: Yingda Chen --- modelscope/__init__.py | 25 +++++++++++++------------ modelscope/utils/hf_util.py | 4 ++++ 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/modelscope/__init__.py b/modelscope/__init__.py index 80cd861a..b7712b3b 100644 --- a/modelscope/__init__.py +++ b/modelscope/__init__.py @@ -32,16 +32,17 @@ if TYPE_CHECKING: build_dataset_from_file) from .utils.constant import Tasks if is_transformers_available(): - from .utils.hf_util import AutoConfig, GPTQConfig, AwqConfig, BitsAndBytesConfig from .utils.hf_util import ( - AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM, + AutoModel, AutoProcessor, AutoFeatureExtractor, GenerationConfig, + AutoConfig, GPTQConfig, AwqConfig, BitsAndBytesConfig, + AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, AutoModelForTokenClassification, AutoModelForImageClassification, AutoModelForImageToImage, AutoModelForImageSegmentation, AutoModelForQuestionAnswering, AutoModelForMaskedLM, AutoTokenizer, AutoModelForMaskGeneration, AutoModelForPreTraining, - AutoModelForTextEncoding, GenerationConfig, AutoImageProcessor, - BatchFeature, T5EncoderModel) + AutoModelForTextEncoding, AutoImageProcessor, BatchFeature, + Qwen2VLForConditionalGeneration, T5EncoderModel) else: print( 'transformer is not installed, please install it if you want to use related modules' @@ -95,17 +96,17 @@ else: if is_transformers_available(): _import_structure['utils.hf_util'] = [ - 'AutoConfig', 'GenerationConfig', 'AutoModel', 'GPTQConfig', - 'AwqConfig', 'BitsAndBytesConfig', 'AutoModelForCausalLM', - 'AutoModelForSeq2SeqLM', 'AutoTokenizer', - 'AutoModelForSequenceClassification', + 'AutoModel', 'AutoProcessor', 'AutoFeatureExtractor', + 'GenerationConfig', 'AutoConfig', 'GPTQConfig', 'AwqConfig', + 'BitsAndBytesConfig', 'AutoModelForCausalLM', + 'AutoModelForSeq2SeqLM', 'AutoModelForSequenceClassification', 'AutoModelForTokenClassification', 'AutoModelForImageClassification', 'AutoModelForImageToImage', - 'AutoModelForQuestionAnswering', 'AutoModelForMaskedLM', + 'AutoModelForImageSegmentation', 'AutoModelForQuestionAnswering', + 'AutoModelForMaskedLM', 'AutoTokenizer', 'AutoModelForMaskGeneration', 'AutoModelForPreTraining', - 'AutoModelForTextEncoding', 'AutoModelForTokenClassification', - 'AutoModelForImageSegmentation', 'AutoImageProcessor', - 'BatchFeature', 'T5EncoderModel' + 'AutoModelForTextEncoding', 'AutoImageProcessor', 'BatchFeature', + 'Qwen2VLForConditionalGeneration', 'T5EncoderModel' ] import sys diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py index a67b7886..f6613f98 100644 --- a/modelscope/utils/hf_util.py +++ b/modelscope/utils/hf_util.py @@ -33,6 +33,8 @@ from transformers import BitsAndBytesConfig as BitsAndBytesConfigHF from transformers import GenerationConfig as GenerationConfigHF from transformers import (PretrainedConfig, PreTrainedModel, PreTrainedTokenizerBase) +from transformers import \ + Qwen2VLForConditionalGeneration as Qwen2VLForConditionalGenerationHF from transformers import T5EncoderModel as T5EncoderModelHF from modelscope import snapshot_download @@ -335,6 +337,8 @@ AutoModelForMaskGeneration = get_wrapped_class(AutoModelForMaskGenerationHF) AutoModelForPreTraining = get_wrapped_class(AutoModelForPreTrainingHF) AutoModelForTextEncoding = get_wrapped_class(AutoModelForTextEncodingHF) T5EncoderModel = get_wrapped_class(T5EncoderModelHF) +Qwen2VLForConditionalGeneration = get_wrapped_class( + Qwen2VLForConditionalGenerationHF) AutoTokenizer = get_wrapped_class( AutoTokenizerHF, From 95cad91c2189995773943a7667c1dd18e082612e Mon Sep 17 00:00:00 2001 From: Yingda Chen Date: Fri, 29 Nov 2024 10:35:50 +0800 Subject: [PATCH 07/22] add transformers compatability for Vision2seq (#1107) * add vision2seq --------- Co-authored-by: Yingda Chen --- modelscope/__init__.py | 5 +++-- modelscope/hub/api.py | 2 +- modelscope/utils/hf_util.py | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/modelscope/__init__.py b/modelscope/__init__.py index b7712b3b..996fd4c9 100644 --- a/modelscope/__init__.py +++ b/modelscope/__init__.py @@ -36,7 +36,7 @@ if TYPE_CHECKING: AutoModel, AutoProcessor, AutoFeatureExtractor, GenerationConfig, AutoConfig, GPTQConfig, AwqConfig, BitsAndBytesConfig, AutoModelForCausalLM, AutoModelForSeq2SeqLM, - AutoModelForSequenceClassification, + AutoModelForVision2Seq, AutoModelForSequenceClassification, AutoModelForTokenClassification, AutoModelForImageClassification, AutoModelForImageToImage, AutoModelForImageSegmentation, AutoModelForQuestionAnswering, AutoModelForMaskedLM, AutoTokenizer, @@ -99,7 +99,8 @@ else: 'AutoModel', 'AutoProcessor', 'AutoFeatureExtractor', 'GenerationConfig', 'AutoConfig', 'GPTQConfig', 'AwqConfig', 'BitsAndBytesConfig', 'AutoModelForCausalLM', - 'AutoModelForSeq2SeqLM', 'AutoModelForSequenceClassification', + 'AutoModelForSeq2SeqLM', 'AutoModelForVision2Seq', + 'AutoModelForSequenceClassification', 'AutoModelForTokenClassification', 'AutoModelForImageClassification', 'AutoModelForImageToImage', 'AutoModelForImageSegmentation', 'AutoModelForQuestionAnswering', diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index 499a8e07..a0d97712 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -571,7 +571,7 @@ class HubApi: revision_detail = self.get_branch_tag_detail(all_tags_detail, revision) if revision_detail is None: revision_detail = self.get_branch_tag_detail(all_branches_detail, revision) - logger.info('Development mode use revision: %s' % revision) + logger.debug('Development mode use revision: %s' % revision) else: if revision is not None and revision in all_branches: revision_detail = self.get_branch_tag_detail(all_branches_detail, revision) diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py index f6613f98..d838347a 100644 --- a/modelscope/utils/hf_util.py +++ b/modelscope/utils/hf_util.py @@ -26,6 +26,7 @@ from transformers import \ from transformers import AutoModelForTextEncoding as AutoModelForTextEncodingHF from transformers import \ AutoModelForTokenClassification as AutoModelForTokenClassificationHF +from transformers import AutoModelForVision2Seq as AutoModelForVision2SeqHF from transformers import AutoProcessor as AutoProcessorHF from transformers import AutoTokenizer as AutoTokenizerHF from transformers import BatchFeature as BatchFeatureHF @@ -321,6 +322,7 @@ def get_wrapped_class(module_class, AutoModel = get_wrapped_class(AutoModelHF) AutoModelForCausalLM = get_wrapped_class(AutoModelForCausalLMHF) AutoModelForSeq2SeqLM = get_wrapped_class(AutoModelForSeq2SeqLMHF) +AutoModelForVision2Seq = get_wrapped_class(AutoModelForVision2SeqHF) AutoModelForSequenceClassification = get_wrapped_class( AutoModelForSequenceClassificationHF) AutoModelForTokenClassification = get_wrapped_class( From b1e1f66fadd3b937cf45fdefb22b55f4afc5d854 Mon Sep 17 00:00:00 2001 From: Jintao Date: Fri, 29 Nov 2024 13:44:36 +0800 Subject: [PATCH 08/22] Merge release 1.20 docker (#1109) --- docker/Dockerfile.ubuntu | 1 + docker/build_image.py | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu index 3461e32e..0ec13d12 100644 --- a/docker/Dockerfile.ubuntu +++ b/docker/Dockerfile.ubuntu @@ -34,6 +34,7 @@ if [ "$INSTALL_MS_DEPS" = "True" ]; then \ pip install --no-cache-dir -r /var/modelscope/tests.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ pip install --no-cache-dir -r /var/modelscope/server.txt && \ pip install --no-cache-dir https://modelscope.oss-cn-beijing.aliyuncs.com/packages/imageio_ffmpeg-0.4.9-py3-none-any.whl --no-dependencies --force && \ + pip install adaseq pai-easycv && \ pip install --no-cache-dir 'scipy<1.13.0' && \ pip install --no-cache-dir funtextprocessing typeguard==2.13.3 scikit-learn -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ pip install --no-cache-dir text2sql_lgesql==1.3.0 git+https://github.com/jin-s13/xtcocoapi.git@v1.14 git+https://github.com/gatagat/lap.git@v0.4.0 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html --force --no-deps && \ diff --git a/docker/build_image.py b/docker/build_image.py index e3fc930f..ddc67581 100644 --- a/docker/build_image.py +++ b/docker/build_image.py @@ -139,7 +139,7 @@ class CPUImageBuilder(Builder): base_image = ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-{self.args.python_tag}' f'-torch{self.args.torch_version}-base') - extra_content = """\nRUN pip install adaseq pai-easycv""" + extra_content = '' with open('docker/Dockerfile.ubuntu', 'r') as f: content = f.read() @@ -192,8 +192,7 @@ class GPUImageBuilder(Builder): def generate_dockerfile(self) -> str: meta_file = './docker/install.sh' extra_content = """ -RUN pip install adaseq pai-easycv && \ - pip install tf-keras==2.16.0 --no-dependencies && \ +RUN pip install tf-keras==2.16.0 --no-dependencies && \ pip install --no-cache-dir torchsde jupyterlab torchmetrics==0.11.4 basicsr pynvml shortuuid && \ CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0" \ pip install --no-cache-dir 'git+https://github.com/facebookresearch/detectron2.git' From 46c4ce9bdd6e60fa501f36021f6694994fc4aece Mon Sep 17 00:00:00 2001 From: tastelikefeet <58414341+tastelikefeet@users.noreply.github.com> Date: Sun, 1 Dec 2024 15:17:25 +0800 Subject: [PATCH 09/22] support tag ci_image (#1112) --- .github/workflows/docker-image.yml | 6 +- docker/build_image.py | 88 ++++++++++++++++-------------- 2 files changed, 52 insertions(+), 42 deletions(-) diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 6d20383d..13f61ff3 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -19,6 +19,10 @@ on: swift_branch: description: 'SWIFT branch to use(release/x.xx)' required: true + ci_image: + description: 'Set as the CI image' + default: '0' + required: false other_params: description: 'Other params in --xxx xxx' required: false @@ -47,4 +51,4 @@ jobs: run: | set -e source ~/.bashrc - python docker/build_image.py --image_type ${{ github.event.inputs.image_type }} --modelscope_branch ${{ github.event.inputs.modelscope_branch }} --modelscope_version ${{ github.event.inputs.modelscope_version }} --swift_branch ${{ github.event.inputs.swift_branch }} ${{ github.event.inputs.other_params }} + python docker/build_image.py --image_type ${{ github.event.inputs.image_type }} --modelscope_branch ${{ github.event.inputs.modelscope_branch }} --modelscope_version ${{ github.event.inputs.modelscope_version }} --swift_branch ${{ github.event.inputs.swift_branch }} --ci_image ${{ github.event.inputs.ci_image }} ${{ github.event.inputs.other_params }} diff --git a/docker/build_image.py b/docker/build_image.py index ddc67581..344fc9d3 100644 --- a/docker/build_image.py +++ b/docker/build_image.py @@ -61,6 +61,9 @@ class Builder: def push(self) -> int: pass + def image(self) -> str: + pass + def __call__(self): content = self.generate_dockerfile() self._save_dockerfile(content) @@ -68,10 +71,18 @@ class Builder: ret = self.build() if ret != 0: raise RuntimeError(f'Docker build error with errno: {ret}') + ret = self.push() if ret != 0: raise RuntimeError(f'Docker push error with errno: {ret}') + if self.args.ci_image != 0: + ret = os.system( + f'docker tag {self.image()} {docker_registry}:ci_image') + if ret != 0: + raise RuntimeError( + f'Docker tag ci_image error with errno: {ret}') + class BaseCPUImageBuilder(Builder): @@ -87,18 +98,18 @@ class BaseCPUImageBuilder(Builder): content = content.replace('{tf_version}', self.args.tf_version) return content - def build(self): - image_tag = ( + def image(self) -> str: + return ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-{self.args.python_tag}-' f'torch{self.args.torch_version}-base') + + def build(self): return os.system( - f'DOCKER_BUILDKIT=0 docker build -t {image_tag} -f Dockerfile .') + f'DOCKER_BUILDKIT=0 docker build -t {self.image()} -f Dockerfile .' + ) def push(self): - image_tag = ( - f'{docker_registry}:ubuntu{self.args.ubuntu_version}-{self.args.python_tag}-' - f'torch{self.args.torch_version}-base') - return os.system(f'docker push {image_tag}') + return os.system(f'docker push {self.image()}') class BaseGPUImageBuilder(Builder): @@ -115,18 +126,18 @@ class BaseGPUImageBuilder(Builder): content = content.replace('{tf_version}', self.args.tf_version) return content - def build(self) -> int: - image_tag = ( + def image(self) -> str: + return ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-{self.args.python_tag}-' f'torch{self.args.torch_version}-tf{self.args.tf_version}-base') + + def build(self) -> int: return os.system( - f'DOCKER_BUILDKIT=0 docker build -t {image_tag} -f Dockerfile .') + f'DOCKER_BUILDKIT=0 docker build -t {self.image()} -f Dockerfile .' + ) def push(self): - image_tag = ( - f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-{self.args.python_tag}-' - f'torch{self.args.torch_version}-tf{self.args.tf_version}-base') - return os.system(f'docker push {image_tag}') + return os.system(f'docker push {self.image()}') class CPUImageBuilder(Builder): @@ -162,26 +173,24 @@ class CPUImageBuilder(Builder): content = content.replace('{swift_branch}', self.args.swift_branch) return content - def build(self) -> int: - image_tag = ( + def image(self) -> str: + return ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-{self.args.python_tag}-' f'torch{self.args.torch_version}-{self.args.modelscope_version}-test' ) - return os.system(f'docker build -t {image_tag} -f Dockerfile .') + + def build(self) -> int: + return os.system(f'docker build -t {self.image()} -f Dockerfile .') def push(self): - image_tag = ( - f'{docker_registry}:ubuntu{self.args.ubuntu_version}-{self.args.python_tag}-' - f'torch{self.args.torch_version}-{self.args.modelscope_version}-test' - ) - ret = os.system(f'docker push {image_tag}') + ret = os.system(f'docker push {self.image()}') if ret != 0: return ret image_tag2 = ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-{self.args.python_tag}-' f'torch{self.args.torch_version}-{self.args.modelscope_version}-{formatted_time}-test' ) - ret = os.system(f'docker tag {image_tag} {image_tag2}') + ret = os.system(f'docker tag {self.image()} {image_tag2}') if ret != 0: return ret return os.system(f'docker push {image_tag2}') @@ -224,26 +233,24 @@ RUN pip install tf-keras==2.16.0 --no-dependencies && \ content = content.replace('{swift_branch}', self.args.swift_branch) return content - def build(self) -> int: - image_tag = ( + def image(self) -> str: + return ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-' f'{self.args.python_tag}-torch{self.args.torch_version}-tf{self.args.tf_version}-' f'{self.args.modelscope_version}-test') - return os.system(f'docker build -t {image_tag} -f Dockerfile .') + + def build(self) -> int: + return os.system(f'docker build -t {self.image()} -f Dockerfile .') def push(self): - image_tag = ( - f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-' - f'{self.args.python_tag}-torch{self.args.torch_version}-tf{self.args.tf_version}-' - f'{self.args.modelscope_version}-test') - ret = os.system(f'docker push {image_tag}') + ret = os.system(f'docker push {self.image()}') if ret != 0: return ret image_tag2 = ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-' f'{self.args.python_tag}-torch{self.args.torch_version}-tf{self.args.tf_version}-' f'{self.args.modelscope_version}-{formatted_time}-test') - ret = os.system(f'docker tag {image_tag} {image_tag2}') + ret = os.system(f'docker tag {self.image()} {image_tag2}') if ret != 0: return ret return os.system(f'docker push {image_tag2}') @@ -298,26 +305,24 @@ class LLMImageBuilder(Builder): content = content.replace('{swift_branch}', self.args.swift_branch) return content - def build(self) -> int: - image_tag = ( + def image(self) -> str: + return ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-' f'{self.args.python_tag}-torch{self.args.torch_version}-{self.args.modelscope_version}-LLM-test' ) - return os.system(f'docker build -t {image_tag} -f Dockerfile .') + + def build(self) -> int: + return os.system(f'docker build -t {self.image()} -f Dockerfile .') def push(self): - image_tag = ( - f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-' - f'{self.args.python_tag}-torch{self.args.torch_version}-{self.args.modelscope_version}-LLM-test' - ) - ret = os.system(f'docker push {image_tag}') + ret = os.system(f'docker push {self.image()}') if ret != 0: return ret image_tag2 = ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-' f'{self.args.python_tag}-torch{self.args.torch_version}-' f'{self.args.modelscope_version}-LLM-{formatted_time}-test') - ret = os.system(f'docker tag {image_tag} {image_tag2}') + ret = os.system(f'docker tag {self.image()} {image_tag2}') if ret != 0: return ret return os.system(f'docker push {image_tag2}') @@ -331,6 +336,7 @@ parser.add_argument('--ubuntu_version', type=str, default='22.04') parser.add_argument('--torch_version', type=str, default=None) parser.add_argument('--torchvision_version', type=str, default=None) parser.add_argument('--cuda_version', type=str, default=None) +parser.add_argument('--ci_image', type=int, default=0) parser.add_argument('--torchaudio_version', type=str, default=None) parser.add_argument('--tf_version', type=str, default=None) parser.add_argument('--vllm_version', type=str, default=None) From a721220fa1d0b1e65df313ce29dbefa911ea7eb0 Mon Sep 17 00:00:00 2001 From: Jintao Date: Sun, 1 Dec 2024 15:32:33 +0800 Subject: [PATCH 10/22] release transformers version to 4.33-4.46 (#1111) --- modelscope/utils/hf_util.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py index d838347a..9a848bf5 100644 --- a/modelscope/utils/hf_util.py +++ b/modelscope/utils/hf_util.py @@ -13,7 +13,6 @@ from transformers import \ AutoModelForImageClassification as AutoModelForImageClassificationHF from transformers import \ AutoModelForImageSegmentation as AutoModelForImageSegmentationHF -from transformers import AutoModelForImageToImage as AutoModelForImageToImageHF from transformers import AutoModelForMaskedLM as AutoModelForMaskedLMHF from transformers import \ AutoModelForMaskGeneration as AutoModelForMaskGenerationHF @@ -34,8 +33,6 @@ from transformers import BitsAndBytesConfig as BitsAndBytesConfigHF from transformers import GenerationConfig as GenerationConfigHF from transformers import (PretrainedConfig, PreTrainedModel, PreTrainedTokenizerBase) -from transformers import \ - Qwen2VLForConditionalGeneration as Qwen2VLForConditionalGenerationHF from transformers import T5EncoderModel as T5EncoderModelHF from modelscope import snapshot_download @@ -331,7 +328,12 @@ AutoModelForImageSegmentation = get_wrapped_class( AutoModelForImageSegmentationHF) AutoModelForImageClassification = get_wrapped_class( AutoModelForImageClassificationHF) -AutoModelForImageToImage = get_wrapped_class(AutoModelForImageToImageHF) +try: + from transformers import AutoModelForImageToImage as AutoModelForImageToImageHF + AutoModelForImageToImage = get_wrapped_class(AutoModelForImageToImageHF) +except ImportError: + AutoModelForImageToImage = None + AutoModelForQuestionAnswering = get_wrapped_class( AutoModelForQuestionAnsweringHF) AutoModelForMaskedLM = get_wrapped_class(AutoModelForMaskedLMHF) @@ -339,8 +341,13 @@ AutoModelForMaskGeneration = get_wrapped_class(AutoModelForMaskGenerationHF) AutoModelForPreTraining = get_wrapped_class(AutoModelForPreTrainingHF) AutoModelForTextEncoding = get_wrapped_class(AutoModelForTextEncodingHF) T5EncoderModel = get_wrapped_class(T5EncoderModelHF) -Qwen2VLForConditionalGeneration = get_wrapped_class( - Qwen2VLForConditionalGenerationHF) +try: + from transformers import \ + Qwen2VLForConditionalGeneration as Qwen2VLForConditionalGenerationHF + Qwen2VLForConditionalGeneration = get_wrapped_class( + Qwen2VLForConditionalGenerationHF) +except ImportError: + Qwen2VLForConditionalGeneration = None AutoTokenizer = get_wrapped_class( AutoTokenizerHF, From a4582012fffd9c6c620fa91021a8c98e713cccdd Mon Sep 17 00:00:00 2001 From: Yunlin Mao Date: Sun, 1 Dec 2024 15:38:03 +0800 Subject: [PATCH 11/22] fix tqdm bar (#1108) --- modelscope/hub/snapshot_download.py | 45 +++++++++++++++++------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/modelscope/hub/snapshot_download.py b/modelscope/hub/snapshot_download.py index 7ba0f446..f28c18e0 100644 --- a/modelscope/hub/snapshot_download.py +++ b/modelscope/hub/snapshot_download.py @@ -4,11 +4,12 @@ import fnmatch import os import re import uuid +from concurrent.futures import ThreadPoolExecutor from http.cookiejar import CookieJar from pathlib import Path from typing import Dict, List, Optional, Union -from tqdm.contrib.concurrent import thread_map +from tqdm.auto import tqdm from modelscope.hub.api import HubApi, ModelScopeConfig from modelscope.hub.errors import InvalidParameter @@ -325,8 +326,6 @@ def _snapshot_download( cache.save_model_version(revision_info=revision_detail) cache_root_path = cache.get_root_location() - - logger.info(f"Download {repo_type} '{repo_id}' successfully.") return cache_root_path @@ -391,6 +390,21 @@ def _get_valid_regex_pattern(patterns: List[str]): return None +def thread_download(func, iterable, max_workers, **kwargs): + # Create a tqdm progress bar with the total number of files to fetch + with tqdm( + total=len(iterable), + desc=f'Fetching {len(iterable)} files') as pbar: + # Define a wrapper function to update the progress bar + def progress_wrapper(*args, **kwargs): + result = func(*args, **kwargs) + pbar.update(1) + return result + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + executor.map(progress_wrapper, iterable) + + def _download_file_lists( repo_files: List[str], cache: ModelFileSystemCache, @@ -450,19 +464,18 @@ def _download_file_lists( fnmatch.fnmatch(repo_file['Path'], pattern) for pattern in allow_file_pattern): continue + # check model_file is exist in cache, if existed, skip download + if cache.exists(repo_file): + file_name = os.path.basename(repo_file['Name']) + logger.debug( + f'File {file_name} already in cache, skip downloading!') + continue except Exception as e: logger.warning('The file pattern is invalid : %s' % e) else: filtered_repo_files.append(repo_file) def _download_single_file(repo_file): - # check model_file is exist in cache, if existed, skip download, otherwise download - if cache.exists(repo_file): - file_name = os.path.basename(repo_file['Name']) - logger.debug( - f'File {file_name} already in cache, skip downloading!') - return - if repo_type == REPO_TYPE_MODEL: url = get_file_download_url( model_id=repo_id, @@ -481,11 +494,7 @@ def _download_file_lists( download_file(url, repo_file, temporary_cache_dir, cache, headers, cookies) - # Use thread_map for parallel downloading - thread_map( - _download_single_file, - filtered_repo_files, - max_workers=max_workers, - desc=f'Fetching {len(filtered_repo_files)} files', - leave=True, - position=max_workers) + if len(filtered_repo_files) > 0: + thread_download(_download_single_file, filtered_repo_files, + max_workers) + logger.info(f"Download {repo_type} '{repo_id}' successfully.") From 3017a70262c27846ffbc33273a83a4112bf0f280 Mon Sep 17 00:00:00 2001 From: Yingda Chen Date: Mon, 2 Dec 2024 15:17:17 +0800 Subject: [PATCH 12/22] Handle unsupported Transformers class, and add more auto classes (#1113) * optimize unsupported transformer class and add more automodel Co-authored-by: Yingda Chen --- modelscope/__init__.py | 34 +++++++++++--- modelscope/utils/hf_util.py | 89 ++++++++++++++++++++++++++++++++++++- 2 files changed, 116 insertions(+), 7 deletions(-) diff --git a/modelscope/__init__.py b/modelscope/__init__.py index 996fd4c9..c969be68 100644 --- a/modelscope/__init__.py +++ b/modelscope/__init__.py @@ -38,11 +38,23 @@ if TYPE_CHECKING: AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoModelForVision2Seq, AutoModelForSequenceClassification, AutoModelForTokenClassification, AutoModelForImageClassification, - AutoModelForImageToImage, AutoModelForImageSegmentation, - AutoModelForQuestionAnswering, AutoModelForMaskedLM, AutoTokenizer, - AutoModelForMaskGeneration, AutoModelForPreTraining, - AutoModelForTextEncoding, AutoImageProcessor, BatchFeature, - Qwen2VLForConditionalGeneration, T5EncoderModel) + AutoModelForImageTextToText, + AutoModelForZeroShotImageClassification, + AutoModelForKeypointDetection, + AutoModelForDocumentQuestionAnswering, + AutoModelForSemanticSegmentation, + AutoModelForUniversalSegmentation, + AutoModelForInstanceSegmentation, AutoModelForObjectDetection, + AutoModelForZeroShotObjectDetection, + AutoModelForAudioClassification, AutoModelForSpeechSeq2Seq, + AutoModelForMaskedImageModeling, + AutoModelForVisualQuestionAnswering, + AutoModelForTableQuestionAnswering, AutoModelForImageToImage, + AutoModelForImageSegmentation, AutoModelForQuestionAnswering, + AutoModelForMaskedLM, AutoTokenizer, AutoModelForMaskGeneration, + AutoModelForPreTraining, AutoModelForTextEncoding, + AutoImageProcessor, BatchFeature, Qwen2VLForConditionalGeneration, + T5EncoderModel) else: print( 'transformer is not installed, please install it if you want to use related modules' @@ -103,6 +115,18 @@ else: 'AutoModelForSequenceClassification', 'AutoModelForTokenClassification', 'AutoModelForImageClassification', 'AutoModelForImageToImage', + 'AutoModelForImageTextToText', + 'AutoModelForZeroShotImageClassification', + 'AutoModelForKeypointDetection', + 'AutoModelForDocumentQuestionAnswering', + 'AutoModelForSemanticSegmentation', + 'AutoModelForUniversalSegmentation', + 'AutoModelForInstanceSegmentation', 'AutoModelForObjectDetection', + 'AutoModelForZeroShotObjectDetection', + 'AutoModelForAudioClassification', 'AutoModelForSpeechSeq2Seq', + 'AutoModelForMaskedImageModeling', + 'AutoModelForVisualQuestionAnswering', + 'AutoModelForTableQuestionAnswering', 'AutoModelForImageSegmentation', 'AutoModelForQuestionAnswering', 'AutoModelForMaskedLM', 'AutoTokenizer', 'AutoModelForMaskGeneration', 'AutoModelForPreTraining', diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py index 9a848bf5..7da30df2 100644 --- a/modelscope/utils/hf_util.py +++ b/modelscope/utils/hf_util.py @@ -8,24 +8,52 @@ from transformers import AutoConfig as AutoConfigHF from transformers import AutoFeatureExtractor as AutoFeatureExtractorHF from transformers import AutoImageProcessor as AutoImageProcessorHF from transformers import AutoModel as AutoModelHF +from transformers import \ + AutoModelForAudioClassification as AutoModelForAudioClassificationHF from transformers import AutoModelForCausalLM as AutoModelForCausalLMHF +from transformers import \ + AutoModelForDocumentQuestionAnswering as \ + AutoModelForDocumentQuestionAnsweringHF from transformers import \ AutoModelForImageClassification as AutoModelForImageClassificationHF from transformers import \ AutoModelForImageSegmentation as AutoModelForImageSegmentationHF +from transformers import \ + AutoModelForInstanceSegmentation as AutoModelForInstanceSegmentationHF +from transformers import \ + AutoModelForMaskedImageModeling as AutoModelForMaskedImageModelingHF from transformers import AutoModelForMaskedLM as AutoModelForMaskedLMHF from transformers import \ AutoModelForMaskGeneration as AutoModelForMaskGenerationHF +from transformers import \ + AutoModelForObjectDetection as AutoModelForObjectDetectionHF from transformers import AutoModelForPreTraining as AutoModelForPreTrainingHF from transformers import \ AutoModelForQuestionAnswering as AutoModelForQuestionAnsweringHF +from transformers import \ + AutoModelForSemanticSegmentation as AutoModelForSemanticSegmentationHF from transformers import AutoModelForSeq2SeqLM as AutoModelForSeq2SeqLMHF from transformers import \ AutoModelForSequenceClassification as AutoModelForSequenceClassificationHF +from transformers import \ + AutoModelForSpeechSeq2Seq as AutoModelForSpeechSeq2SeqHF +from transformers import \ + AutoModelForTableQuestionAnswering as AutoModelForTableQuestionAnsweringHF from transformers import AutoModelForTextEncoding as AutoModelForTextEncodingHF from transformers import \ AutoModelForTokenClassification as AutoModelForTokenClassificationHF +from transformers import \ + AutoModelForUniversalSegmentation as AutoModelForUniversalSegmentationHF from transformers import AutoModelForVision2Seq as AutoModelForVision2SeqHF +from transformers import \ + AutoModelForVisualQuestionAnswering as \ + AutoModelForVisualQuestionAnsweringHF +from transformers import \ + AutoModelForZeroShotImageClassification as \ + AutoModelForZeroShotImageClassificationHF +from transformers import \ + AutoModelForZeroShotObjectDetection as \ + AutoModelForZeroShotObjectDetectionHF from transformers import AutoProcessor as AutoProcessorHF from transformers import AutoTokenizer as AutoTokenizerHF from transformers import BatchFeature as BatchFeatureHF @@ -34,6 +62,7 @@ from transformers import GenerationConfig as GenerationConfigHF from transformers import (PretrainedConfig, PreTrainedModel, PreTrainedTokenizerBase) from transformers import T5EncoderModel as T5EncoderModelHF +from transformers import __version__ as transformers_version from modelscope import snapshot_download from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke @@ -49,6 +78,21 @@ except ImportError: logger = get_logger() +class UnsupportedAutoClass: + + def __init__(self, name: str): + self.error_msg =\ + f'{name} is not supported with your installed Transformers version {transformers_version}. ' + \ + 'Please update your Transformers by "pip install transformers -U".' + + def from_pretrained(self, pretrained_model_name_or_path, *model_args, + **kwargs): + raise ImportError(self.error_msg) + + def from_config(self, cls, config): + raise ImportError(self.error_msg) + + def user_agent(invoked_by=None): if invoked_by is None: invoked_by = Invoke.PRETRAINED @@ -328,14 +372,54 @@ AutoModelForImageSegmentation = get_wrapped_class( AutoModelForImageSegmentationHF) AutoModelForImageClassification = get_wrapped_class( AutoModelForImageClassificationHF) +AutoModelForZeroShotImageClassification = get_wrapped_class( + AutoModelForZeroShotImageClassificationHF) try: from transformers import AutoModelForImageToImage as AutoModelForImageToImageHF AutoModelForImageToImage = get_wrapped_class(AutoModelForImageToImageHF) except ImportError: - AutoModelForImageToImage = None + AutoModelForImageToImage = UnsupportedAutoClass('AutoModelForImageToImage') + +try: + from transformers import AutoModelForImageTextToText as AutoModelForImageTextToTextHF + AutoModelForImageTextToText = get_wrapped_class( + AutoModelForImageTextToTextHF) +except ImportError: + AutoModelForImageTextToText = UnsupportedAutoClass( + 'AutoModelForImageTextToText') + +try: + from transformers import AutoModelForKeypointDetection as AutoModelForKeypointDetectionHF + AutoModelForKeypointDetection = get_wrapped_class( + AutoModelForKeypointDetectionHF) +except ImportError: + AutoModelForKeypointDetection = UnsupportedAutoClass( + 'AutoModelForKeypointDetection') AutoModelForQuestionAnswering = get_wrapped_class( AutoModelForQuestionAnsweringHF) +AutoModelForTableQuestionAnswering = get_wrapped_class( + AutoModelForTableQuestionAnsweringHF) +AutoModelForVisualQuestionAnswering = get_wrapped_class( + AutoModelForVisualQuestionAnsweringHF) +AutoModelForKeypointDetection = get_wrapped_class( + AutoModelForKeypointDetectionHF) +AutoModelForDocumentQuestionAnswering = get_wrapped_class( + AutoModelForDocumentQuestionAnsweringHF) +AutoModelForSemanticSegmentation = get_wrapped_class( + AutoModelForSemanticSegmentationHF) +AutoModelForUniversalSegmentation = get_wrapped_class( + AutoModelForUniversalSegmentationHF) +AutoModelForInstanceSegmentation = get_wrapped_class( + AutoModelForInstanceSegmentationHF) +AutoModelForObjectDetection = get_wrapped_class(AutoModelForObjectDetectionHF) +AutoModelForZeroShotObjectDetection = get_wrapped_class( + AutoModelForZeroShotObjectDetectionHF) +AutoModelForAudioClassification = get_wrapped_class( + AutoModelForAudioClassificationHF) +AutoModelForSpeechSeq2Seq = get_wrapped_class(AutoModelForSpeechSeq2SeqHF) +AutoModelForMaskedImageModeling = get_wrapped_class( + AutoModelForMaskedImageModelingHF) AutoModelForMaskedLM = get_wrapped_class(AutoModelForMaskedLMHF) AutoModelForMaskGeneration = get_wrapped_class(AutoModelForMaskGenerationHF) AutoModelForPreTraining = get_wrapped_class(AutoModelForPreTrainingHF) @@ -347,7 +431,8 @@ try: Qwen2VLForConditionalGeneration = get_wrapped_class( Qwen2VLForConditionalGenerationHF) except ImportError: - Qwen2VLForConditionalGeneration = None + Qwen2VLForConditionalGeneration = UnsupportedAutoClass( + 'Qwen2VLForConditionalGeneration') AutoTokenizer = get_wrapped_class( AutoTokenizerHF, From 0daf77d4895971e4a52967ccd0fb9e64579ea3bc Mon Sep 17 00:00:00 2001 From: Jintao Date: Mon, 2 Dec 2024 15:34:23 +0800 Subject: [PATCH 13/22] remove unnecessary code (#1115) * remove the extra rows --------- Co-authored-by: Yingda Chen --- modelscope/utils/hf_util.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py index 7da30df2..8f7c06da 100644 --- a/modelscope/utils/hf_util.py +++ b/modelscope/utils/hf_util.py @@ -402,8 +402,6 @@ AutoModelForTableQuestionAnswering = get_wrapped_class( AutoModelForTableQuestionAnsweringHF) AutoModelForVisualQuestionAnswering = get_wrapped_class( AutoModelForVisualQuestionAnsweringHF) -AutoModelForKeypointDetection = get_wrapped_class( - AutoModelForKeypointDetectionHF) AutoModelForDocumentQuestionAnswering = get_wrapped_class( AutoModelForDocumentQuestionAnsweringHF) AutoModelForSemanticSegmentation = get_wrapped_class( From 1950ec1839831bbff0626d1a20b8cfce51a1d965 Mon Sep 17 00:00:00 2001 From: Yingda Chen Date: Mon, 2 Dec 2024 19:29:11 +0800 Subject: [PATCH 14/22] add hash verficiation into cache file existence check (#1116) * add hash check into cache file existence check Co-authored-by: Yingda Chen --- modelscope/hub/file_download.py | 4 ++-- modelscope/hub/snapshot_download.py | 3 ++- modelscope/hub/utils/caching.py | 28 ++++++++++++++++++++-------- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py index a6d7c2e2..40ac8a03 100644 --- a/modelscope/hub/file_download.py +++ b/modelscope/hub/file_download.py @@ -214,7 +214,7 @@ def _repo_file_download( if repo_file['Path'] == file_path: if cache.exists(repo_file): logger.debug( - f'File {repo_file["Name"]} already in cache, skip downloading!' + f'File {repo_file["Name"]} already in cache with identical hash, skip downloading!' ) return cache.get_file_by_info(repo_file) else: @@ -251,7 +251,7 @@ def _repo_file_download( if repo_file['Path'] == file_path: if cache.exists(repo_file): logger.debug( - f'File {repo_file["Name"]} already in cache, skip downloading!' + f'File {repo_file["Name"]} already in cache with identical hash, skip downloading!' ) return cache.get_file_by_info(repo_file) else: diff --git a/modelscope/hub/snapshot_download.py b/modelscope/hub/snapshot_download.py index f28c18e0..015cadbd 100644 --- a/modelscope/hub/snapshot_download.py +++ b/modelscope/hub/snapshot_download.py @@ -468,7 +468,8 @@ def _download_file_lists( if cache.exists(repo_file): file_name = os.path.basename(repo_file['Name']) logger.debug( - f'File {file_name} already in cache, skip downloading!') + f'File {file_name} already in cache with identical hash, skip downloading!' + ) continue except Exception as e: logger.warning('The file pattern is invalid : %s' % e) diff --git a/modelscope/hub/utils/caching.py b/modelscope/hub/utils/caching.py index ecdec7cc..675d62a8 100644 --- a/modelscope/hub/utils/caching.py +++ b/modelscope/hub/utils/caching.py @@ -7,6 +7,8 @@ import tempfile from shutil import move, rmtree from typing import Dict +from modelscope.hub.constants import FILE_HASH +from modelscope.hub.utils.utils import compute_hash from modelscope.utils.logger import get_logger logger = get_logger() @@ -252,26 +254,36 @@ class ModelFileSystemCache(FileSystemCache): return cache_key def exists(self, model_file_info): - """Check the file is cached or not. + """Check the file is cached or not. Note existence check will also cover digest check Args: model_file_info (CachedFileInfo): The cached file info Returns: - bool: If exists return True otherwise False + bool: If exists and has the same hash, return True otherwise False """ key = self.__get_cache_key(model_file_info) is_exists = False + file_path = key['Path'] + cache_file_path = os.path.join(self.cache_root_location, + model_file_info['Path']) for cached_key in self.cached_files: - if cached_key['Path'] == key['Path'] and ( + if cached_key['Path'] == file_path and ( cached_key['Revision'].startswith(key['Revision']) or key['Revision'].startswith(cached_key['Revision'])): - is_exists = True - break - file_path = os.path.join(self.cache_root_location, - model_file_info['Path']) + expected_hash = model_file_info[FILE_HASH] + if expected_hash is not None and os.path.exists( + cache_file_path): + cache_file_sha256 = compute_hash(cache_file_path) + if expected_hash == cache_file_sha256: + is_exists = True + break + else: + logger.info( + f'File [{file_path}] exists in cache but with a mismatched hash, will re-download.' + ) if is_exists: - if os.path.exists(file_path): + if os.path.exists(cache_file_path): return True else: self.remove_key( From e32a9361e33973a87728ecf0b701fdd6c4ce6223 Mon Sep 17 00:00:00 2001 From: Yingda Chen Date: Tue, 3 Dec 2024 21:25:18 +0800 Subject: [PATCH 15/22] fix accuracy case sensitiveness (#1118) Co-authored-by: Yingda Chen --- modelscope/cli/llamafile.py | 20 ++++++++++---------- tests/cli/test_llamfafile_cmd.py | 18 ++++++++++++++++++ 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/modelscope/cli/llamafile.py b/modelscope/cli/llamafile.py index 39d1346f..5c52344d 100644 --- a/modelscope/cli/llamafile.py +++ b/modelscope/cli/llamafile.py @@ -55,15 +55,6 @@ class LlamafileCMD(CLICommand): 'Selected accuracy of GGUF files in the repo. Ignored when "file" is also provided.' ) - group.add_argument( - '--launch', - type=str, - required=False, - default='True', - help= - 'Whether to launch model with the downloaded llamafile, default to True.' - ) - group.add_argument( '--file', type=str, @@ -80,6 +71,15 @@ class LlamafileCMD(CLICommand): 'Directory where the selected llamafile would will be downloaded to.' ) + group.add_argument( + '--launch', + type=str, + required=False, + default='True', + help= + 'Whether to launch model with the downloaded llamafile, default to True.' + ) + parser.set_defaults(func=subparser_func) def execute(self): @@ -106,7 +106,7 @@ class LlamafileCMD(CLICommand): selected_file = f found = True break - if self.args.accuracy and self.args.accuracy in f.lower(): + if self.args.accuracy and self.args.accuracy.lower() in f.lower(): selected_file = f found = True break diff --git a/tests/cli/test_llamfafile_cmd.py b/tests/cli/test_llamfafile_cmd.py index 616ed78c..d0ff7574 100644 --- a/tests/cli/test_llamfafile_cmd.py +++ b/tests/cli/test_llamfafile_cmd.py @@ -29,6 +29,24 @@ class LlamafileCMDTest(unittest.TestCase): in output) self.assertTrue('Launching model with llamafile' in output) + accuracy = 'Q2_K' + cmd = f'python -m modelscope.cli.cli {self.cmd} --model {self.model_id} --accuracy {accuracy}' + stat, output = subprocess.getstatusoutput(cmd) + self.assertEqual(stat, 0) + self.assertTrue( + 'llamafile matching criteria found: [My-Model-14B-Q2_K.llamafile]' + in output) + self.assertTrue('Launching model with llamafile' in output) + + accuracy = 'q2_k' + cmd = f'python -m modelscope.cli.cli {self.cmd} --model {self.model_id} --accuracy {accuracy}' + stat, output = subprocess.getstatusoutput(cmd) + self.assertEqual(stat, 0) + self.assertTrue( + 'llamafile matching criteria found: [My-Model-14B-Q2_K.llamafile]' + in output) + self.assertTrue('Launching model with llamafile' in output) + def test_given_file(self): file = 'My-Model-14B-FP16.llamafile' cmd = f'python -m modelscope.cli.cli {self.cmd} --model {self.model_id} --file {file}' From 140ac66c62a85477a59d23156093e0c6b10928d4 Mon Sep 17 00:00:00 2001 From: Yingda Chen Date: Tue, 3 Dec 2024 21:51:35 +0800 Subject: [PATCH 16/22] fix windows path --- modelscope/cli/llamafile.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/modelscope/cli/llamafile.py b/modelscope/cli/llamafile.py index 5c52344d..23f3fe91 100644 --- a/modelscope/cli/llamafile.py +++ b/modelscope/cli/llamafile.py @@ -124,7 +124,7 @@ class LlamafileCMD(CLICommand): downloaded_file = self._rename_extension(downloaded_file) if self.args.launch.lower() == 'true': - print('Launching model with llamafile:') + print(f'Launching model with llamafile [{downloaded_file}]:') self._execute_llamafile(downloaded_file) else: print( @@ -153,7 +153,6 @@ class LlamafileCMD(CLICommand): def _rename_extension(self, original_file_name): directory, filename = os.path.split(original_file_name) base_name, _ = os.path.splitext(filename) - new_filename = f'{base_name}.exe' - new_file_name = os.path.join(directory, new_filename) - os.rename(original_file_name, new_file_name) + new_filename = os.path.join(directory, f'{base_name}.exe') + os.rename(original_file_name, new_filename) return new_filename From 0411beeb059b9e7dbcea2839fd1a6a7f7a75cc90 Mon Sep 17 00:00:00 2001 From: Yingda Chen Date: Tue, 3 Dec 2024 21:57:50 +0800 Subject: [PATCH 17/22] fix windows path (#1119) Co-authored-by: Yingda Chen From c67ffd4cbccdc1421c37709efade406862ab9a3c Mon Sep 17 00:00:00 2001 From: Yingda Chen Date: Wed, 4 Dec 2024 18:25:52 +0800 Subject: [PATCH 18/22] optional hash --- modelscope/hub/constants.py | 1 + modelscope/hub/utils/caching.py | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/modelscope/hub/constants.py b/modelscope/hub/constants.py index cfc6d6da..b3d03e1a 100644 --- a/modelscope/hub/constants.py +++ b/modelscope/hub/constants.py @@ -29,6 +29,7 @@ API_RESPONSE_FIELD_MESSAGE = 'Message' MODELSCOPE_CLOUD_ENVIRONMENT = 'MODELSCOPE_ENVIRONMENT' MODELSCOPE_CLOUD_USERNAME = 'MODELSCOPE_USERNAME' MODELSCOPE_SDK_DEBUG = 'MODELSCOPE_SDK_DEBUG' +MODELSCOPE_ENABLE_DEFAULT_HASH_VALIDATION = 'MODELSCOPE_ENABLE_DEFAULT_HASH_VALIDATION' ONE_YEAR_SECONDS = 24 * 365 * 60 * 60 MODELSCOPE_REQUEST_ID = 'X-Request-ID' TEMPORARY_FOLDER_NAME = '._____temp' diff --git a/modelscope/hub/utils/caching.py b/modelscope/hub/utils/caching.py index 675d62a8..ed1d9c67 100644 --- a/modelscope/hub/utils/caching.py +++ b/modelscope/hub/utils/caching.py @@ -12,6 +12,10 @@ from modelscope.hub.utils.utils import compute_hash from modelscope.utils.logger import get_logger logger = get_logger() + +enable_default_hash_validation = \ + os.getenv(MODELSCOPE_ENABLE_DEFAULT_HASH_VALIDATION, 'False').strip().lower() == 'true' + """Implements caching functionality, used internally only """ @@ -274,7 +278,11 @@ class ModelFileSystemCache(FileSystemCache): expected_hash = model_file_info[FILE_HASH] if expected_hash is not None and os.path.exists( cache_file_path): - cache_file_sha256 = compute_hash(cache_file_path) + # compute hash only when enabled, otherwise just meet expectation by default + if enable_default_hash_validation: + cache_file_sha256 = compute_hash(cache_file_path) + else: + cache_file_sha256 = expected_hash if expected_hash == cache_file_sha256: is_exists = True break From 8d66932e3f3a74b3107e42e97f4b1eb64904d4e2 Mon Sep 17 00:00:00 2001 From: Yingda Chen Date: Wed, 4 Dec 2024 18:46:48 +0800 Subject: [PATCH 19/22] make hash validation optional (#1124) Co-authored-by: Yingda Chen --- modelscope/hub/utils/caching.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modelscope/hub/utils/caching.py b/modelscope/hub/utils/caching.py index ed1d9c67..993fb1cd 100644 --- a/modelscope/hub/utils/caching.py +++ b/modelscope/hub/utils/caching.py @@ -15,7 +15,6 @@ logger = get_logger() enable_default_hash_validation = \ os.getenv(MODELSCOPE_ENABLE_DEFAULT_HASH_VALIDATION, 'False').strip().lower() == 'true' - """Implements caching functionality, used internally only """ From 101b9249f840e3a8075c0829dbc8a5e07db57b5c Mon Sep 17 00:00:00 2001 From: Yingda Chen Date: Thu, 5 Dec 2024 09:26:34 +0800 Subject: [PATCH 20/22] fix missing import (#1126) Co-authored-by: Yingda Chen --- modelscope/hub/utils/caching.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modelscope/hub/utils/caching.py b/modelscope/hub/utils/caching.py index 993fb1cd..e1dcf83b 100644 --- a/modelscope/hub/utils/caching.py +++ b/modelscope/hub/utils/caching.py @@ -7,7 +7,8 @@ import tempfile from shutil import move, rmtree from typing import Dict -from modelscope.hub.constants import FILE_HASH +from modelscope.hub.constants import ( # noqa + FILE_HASH, MODELSCOPE_ENABLE_DEFAULT_HASH_VALIDATION) from modelscope.hub.utils.utils import compute_hash from modelscope.utils.logger import get_logger From d2ef1003ea4266c3a438361ec091bbcbcd34af66 Mon Sep 17 00:00:00 2001 From: tastelikefeet <58414341+tastelikefeet@users.noreply.github.com> Date: Thu, 5 Dec 2024 20:07:32 +0800 Subject: [PATCH 21/22] Skip obsolete sd pipeline (#1131) --- .../multi_modal/efficient_diffusion_tuning_pipeline.py | 4 +++- tests/pipelines/test_efficient_diffusion_tuning.py | 10 +++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/modelscope/pipelines/multi_modal/efficient_diffusion_tuning_pipeline.py b/modelscope/pipelines/multi_modal/efficient_diffusion_tuning_pipeline.py index 1b791634..320d83e7 100644 --- a/modelscope/pipelines/multi_modal/efficient_diffusion_tuning_pipeline.py +++ b/modelscope/pipelines/multi_modal/efficient_diffusion_tuning_pipeline.py @@ -36,8 +36,10 @@ class EfficientDiffusionTuningPipeline(Pipeline): 'data/test/images/vision_efficient_tuning_test_1.png') >>> print(f'Output: {result}.') """ + logger.warn( + '[NOTE]Do not use this pipeline because the dependencies are too old, ' + 'use https://github.com/modelscope/DiffSynth-Studio instead') super().__init__(model=model, **kwargs) - self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.model = self.model.to(self.device) self.model.eval() diff --git a/tests/pipelines/test_efficient_diffusion_tuning.py b/tests/pipelines/test_efficient_diffusion_tuning.py index 1f224917..af52d65f 100644 --- a/tests/pipelines/test_efficient_diffusion_tuning.py +++ b/tests/pipelines/test_efficient_diffusion_tuning.py @@ -11,10 +11,10 @@ from modelscope.utils.test_utils import test_level class EfficientDiffusionTuningTest(unittest.TestCase): def setUp(self) -> None: - os.system('pip install ms-swift -U') + # os.system('pip install ms-swift -U') self.task = Tasks.efficient_diffusion_tuning - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skip def test_efficient_diffusion_tuning_lora_run_pipeline(self): model_id = 'damo/multi-modal_efficient-diffusion-tuning-lora' model_revision = 'v1.0.2' @@ -24,7 +24,7 @@ class EfficientDiffusionTuningTest(unittest.TestCase): result = edt_pipeline(inputs) print(f'Efficient-diffusion-tuning-lora output: {result}.') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skip def test_efficient_diffusion_tuning_lora_load_model_from_pretrained(self): model_id = 'damo/multi-modal_efficient-diffusion-tuning-lora' model_revision = 'v1.0.2' @@ -32,7 +32,7 @@ class EfficientDiffusionTuningTest(unittest.TestCase): from modelscope.models.multi_modal import EfficientStableDiffusion self.assertTrue(model.__class__ == EfficientStableDiffusion) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skip def test_efficient_diffusion_tuning_control_lora_run_pipeline(self): # TODO: to be fixed in the future model_id = 'damo/multi-modal_efficient-diffusion-tuning-control-lora' @@ -48,7 +48,7 @@ class EfficientDiffusionTuningTest(unittest.TestCase): result = edt_pipeline(inputs) print(f'Efficient-diffusion-tuning-control-lora output: {result}.') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skip def test_efficient_diffusion_tuning_control_lora_load_model_from_pretrained( self): model_id = 'damo/multi-modal_efficient-diffusion-tuning-control-lora' From 31bf308e448e34cc0cf982efe54a4445d12852ea Mon Sep 17 00:00:00 2001 From: Yunlin Mao Date: Tue, 10 Dec 2024 10:10:01 +0800 Subject: [PATCH 22/22] downgrade moviepy to 1.0.3 (#1133) --- requirements/cv.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/cv.txt b/requirements/cv.txt index d54e5dc5..842cded2 100644 --- a/requirements/cv.txt +++ b/requirements/cv.txt @@ -32,7 +32,7 @@ mmdet>=2.25.0,<=2.28.2 # mmdet3d-1.0.0rc6 remove networkx and numba version restriction mmdet3d==1.0.0a1 mmsegmentation<=0.30.0 -moviepy>=1.0.3 +moviepy==1.0.3 nerfacc==0.2.2 networkx numba