Merge commit '60780769b1c9c19bbbdaae02edb0d7d9dfeb8da5' into feat/whoami

* commit '60780769b1c9c19bbbdaae02edb0d7d9dfeb8da5': Fix/daily (#1155) fix user-specified cache path (#1154) fix pipeline builder when model is not supported (#1125) Fix python push model no disk space (#1148) use streaming hash validation (#1127) fix daily fail case 1210yk (#1142) fix ut (#1143) Fix ollama template (#1141) downgrade moviepy to 1.0.3 (#1133) Skip obsolete sd pipeline (#1131)
2025-12-24 20:19:22 +01:00 · 2024-12-25 20:31:16 +08:00
parent 6e68cb92c1 60780769b1
commit 8509b31729
30 changed files with 190 additions and 89 deletions
--- a/.dev_scripts/ci_container_test.sh
+++ b/.dev_scripts/ci_container_test.sh
@@ -28,6 +28,10 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
    pip install -r  requirements/nlp.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
    pip install -r  requirements/science.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html

+    python -m spacy download en_core_web_sm
+    pip install faiss-gpu
+    pip install healpy
+    pip install huggingface-hub==0.25.2
    # test with install
    pip install .
 else
--- a/modelscope/hub/api.py
+++ b/modelscope/hub/api.py
@@ -8,7 +8,6 @@ import pickle
 import platform
 import re
 import shutil
-import tempfile
 import uuid
 from collections import defaultdict
 from http import HTTPStatus
@@ -34,8 +33,8 @@ from modelscope.hub.constants import (API_HTTP_CLIENT_MAX_RETRIES,
                                      MODELSCOPE_CLOUD_USERNAME,
                                      MODELSCOPE_REQUEST_ID, ONE_YEAR_SECONDS,
                                      REQUESTS_API_HTTP_METHOD,
-                                      DatasetVisibility, Licenses,
-                                      ModelVisibility)
+                                      TEMPORARY_FOLDER_NAME, DatasetVisibility,
+                                      Licenses, ModelVisibility)
 from modelscope.hub.errors import (InvalidParameter, NotExistError,
                                   NotLoginException, NoValidRevisionError,
                                   RequestError, datahub_raise_on_error,
@@ -391,7 +390,7 @@ class HubApi:
                license=license,
                chinese_name=chinese_name,
                original_model_id=original_model_id)
-        tmp_dir = tempfile.mkdtemp()
+        tmp_dir = os.path.join(model_dir, TEMPORARY_FOLDER_NAME)  # make temporary folder
        git_wrapper = GitCommandWrapper()
        logger.info(f'Pushing folder {model_dir} as model {model_id}.')
        logger.info(f'Total folder size {folder_size}, this may take a while depending on actual pushing size...')
@@ -433,6 +432,7 @@ class HubApi:
                remote_branch=revision)
            if tag is not None:
                repo.tag_and_push(tag, tag)
+            logger.info(f'Successfully push folder {model_dir} to remote repo [{model_id}].')
        except Exception:
            raise
        finally:
@@ -563,7 +563,7 @@ class HubApi:
            if revision is None:
                revision = MASTER_MODEL_BRANCH
                logger.info(
-                    'Model revision not specified, use default: %s in development mode'
+                    'Model revision not specified, using default: [%s] version.'
                    % revision)
            if revision not in all_branches and revision not in all_tags:
                raise NotExistError('The model: %s has no revision : %s .' % (model_id, revision))
--- a/modelscope/hub/check_model.py
+++ b/modelscope/hub/check_model.py
@@ -14,6 +14,22 @@ from modelscope.utils.logger import get_logger
 logger = get_logger()


+def get_model_id_from_cache(model_root_path: str, ) -> str:
+    model_cache = None
+    # download with git
+    if os.path.exists(os.path.join(model_root_path, '.git')):
+        git_cmd_wrapper = GitCommandWrapper()
+        git_url = git_cmd_wrapper.get_repo_remote_url(model_root_path)
+        if git_url.endswith('.git'):
+            git_url = git_url[:-4]
+        u_parse = urlparse(git_url)
+        model_id = u_parse.path[1:]
+    else:  # snapshot_download
+        model_cache = ModelFileSystemCache(model_root_path)
+        model_id = model_cache.get_model_id()
+    return model_id
+
+
 def check_local_model_is_latest(
    model_root_path: str,
    user_agent: Optional[Union[Dict, str]] = None,
@@ -22,19 +38,7 @@ def check_local_model_is_latest(
    Check local model repo is same as hub latest version.
    """
    try:
-        model_cache = None
-        # download with git
-        if os.path.exists(os.path.join(model_root_path, '.git')):
-            git_cmd_wrapper = GitCommandWrapper()
-            git_url = git_cmd_wrapper.get_repo_remote_url(model_root_path)
-            if git_url.endswith('.git'):
-                git_url = git_url[:-4]
-            u_parse = urlparse(git_url)
-            model_id = u_parse.path[1:]
-        else:  # snapshot_download
-            model_cache = ModelFileSystemCache(model_root_path)
-            model_id = model_cache.get_model_id()
-
+        model_id = get_model_id_from_cache(model_root_path)
        # make headers
        headers = {
            'user-agent':
--- a/modelscope/hub/constants.py
+++ b/modelscope/hub/constants.py
@@ -9,7 +9,7 @@ DEFAULT_MODELSCOPE_DATA_ENDPOINT = MODELSCOPE_URL_SCHEME + DEFAULT_MODELSCOPE_DO
 MODELSCOPE_PARALLEL_DOWNLOAD_THRESHOLD_MB = int(
    os.environ.get('MODELSCOPE_PARALLEL_DOWNLOAD_THRESHOLD_MB', 500))
 MODELSCOPE_DOWNLOAD_PARALLELS = int(
-    os.environ.get('MODELSCOPE_DOWNLOAD_PARALLELS', 4))
+    os.environ.get('MODELSCOPE_DOWNLOAD_PARALLELS', 1))
 DEFAULT_MODELSCOPE_GROUP = 'damo'
 MODEL_ID_SEPARATOR = '/'
 FILE_HASH = 'Sha256'
--- a/modelscope/hub/file_download.py
+++ b/modelscope/hub/file_download.py
@@ -1,6 +1,7 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.

 import copy
+import hashlib
 import io
 import os
 import tempfile
@@ -213,8 +214,9 @@ def _repo_file_download(

            if repo_file['Path'] == file_path:
                if cache.exists(repo_file):
+                    file_name = repo_file['Name']
                    logger.debug(
-                        f'File {repo_file["Name"]} already in cache with identical hash, skip downloading!'
+                        f'File {file_name} already in cache with identical hash, skip downloading!'
                    )
                    return cache.get_file_by_info(repo_file)
                else:
@@ -250,8 +252,9 @@ def _repo_file_download(

                if repo_file['Path'] == file_path:
                    if cache.exists(repo_file):
+                        file_name = repo_file['Name']
                        logger.debug(
-                            f'File {repo_file["Name"]} already in cache with identical hash, skip downloading!'
+                            f'File {file_name} already in cache with identical hash, skip downloading!'
                        )
                        return cache.get_file_by_info(repo_file)
                    else:
@@ -410,12 +413,19 @@ def parallel_download(
            list(executor.map(download_part_with_retry, tasks))

    # merge parts.
+    hash_sha256 = hashlib.sha256()
    with open(os.path.join(local_dir, file_name), 'wb') as output_file:
        for task in tasks:
            part_file_name = task[0] + '_%s_%s' % (task[2], task[3])
            with open(part_file_name, 'rb') as part_file:
-                output_file.write(part_file.read())
+                while True:
+                    chunk = part_file.read(16 * API_FILE_DOWNLOAD_CHUNK_SIZE)
+                    if not chunk:
+                        break
+                    output_file.write(chunk)
+                    hash_sha256.update(chunk)
            os.remove(part_file_name)
+    return hash_sha256.hexdigest()


 def http_get_model_file(
@@ -452,6 +462,8 @@ def http_get_model_file(
    os.makedirs(os.path.dirname(temp_file_path), exist_ok=True)
    logger.debug('downloading %s to %s', url, temp_file_path)
    # retry sleep 0.5s, 1s, 2s, 4s
+    has_retry = False
+    hash_sha256 = hashlib.sha256()
    retry = Retry(
        total=API_FILE_DOWNLOAD_RETRY_TIMES,
        backoff_factor=1,
@@ -499,11 +511,14 @@ def http_get_model_file(
                        if chunk:  # filter out keep-alive new chunks
                            progress.update(len(chunk))
                            f.write(chunk)
+                            hash_sha256.update(chunk)
            break
-        except (Exception) as e:  # no matter what happen, we will retry.
+        except Exception as e:  # no matter what happen, we will retry.
+            has_retry = True
            retry = retry.increment('GET', url, error=e)
            retry.sleep()
-
+    # if anything went wrong, we would discard the real-time computed hash and return None
+    return None if has_retry else hash_sha256.hexdigest()
    logger.debug('storing %s in cache at %s', url, local_dir)


@@ -591,9 +606,10 @@ def http_get_file(

 def download_file(url, file_meta, temporary_cache_dir, cache, headers,
                  cookies):
+    file_digest = None
    if MODELSCOPE_PARALLEL_DOWNLOAD_THRESHOLD_MB * 1000 * 1000 < file_meta[
            'Size'] and MODELSCOPE_DOWNLOAD_PARALLELS > 1:  # parallel download large file.
-        parallel_download(
+        file_digest = parallel_download(
            url,
            temporary_cache_dir,
            file_meta['Path'],
@@ -601,7 +617,7 @@ def download_file(url, file_meta, temporary_cache_dir, cache, headers,
            cookies=None if cookies is None else cookies.get_dict(),
            file_size=file_meta['Size'])
    else:
-        http_get_model_file(
+        file_digest = http_get_model_file(
            url,
            temporary_cache_dir,
            file_meta['Path'],
@@ -612,6 +628,16 @@ def download_file(url, file_meta, temporary_cache_dir, cache, headers,
    # check file integrity
    temp_file = os.path.join(temporary_cache_dir, file_meta['Path'])
    if FILE_HASH in file_meta:
-        file_integrity_validation(temp_file, file_meta[FILE_HASH])
+        expected_hash = file_meta[FILE_HASH]
+        # if a real-time hash has been computed
+        if file_digest is not None:
+            # if real-time hash mismatched, try to compute it again
+            if file_digest != expected_hash:
+                print(
+                    'Mismatched real-time digest found, falling back to lump-sum hash computation'
+                )
+                file_integrity_validation(temp_file, expected_hash)
+        else:
+            file_integrity_validation(temp_file, expected_hash)
    # put file into to cache
    return cache.put_file(file_meta, temp_file)
--- a/modelscope/hub/snapshot_download.py
+++ b/modelscope/hub/snapshot_download.py
@@ -206,7 +206,7 @@ def _snapshot_download(
        repo_id, local_dir=local_dir, cache_dir=cache_dir, repo_type=repo_type)
    system_cache = cache_dir if cache_dir is not None else os.getenv(
        'MODELSCOPE_CACHE',
-        Path.home().joinpath('.cache', 'modelscope'))
+        Path.home().joinpath('.cache', 'modelscope', 'hub'))
    if local_files_only:
        if len(cache.cached_files) == 0:
            raise ValueError(
@@ -233,7 +233,7 @@ def _snapshot_download(
        if repo_type == REPO_TYPE_MODEL:
            directory = os.path.abspath(
                local_dir) if local_dir is not None else os.path.join(
-                    system_cache, 'hub', repo_id)
+                    system_cache, repo_id)
            print(f'Downloading Model to directory: {directory}')
            revision_detail = _api.get_valid_revision_detail(
                repo_id, revision=revision, cookies=cookies)
@@ -283,10 +283,13 @@ def _snapshot_download(
                    logger.info(f'Creating symbolic link [{directory}].')
                    try:
                        os.symlink(
-                            os.path.abspath(masked_directory), directory)
+                            os.path.abspath(masked_directory),
+                            directory,
+                            target_is_directory=True)
                    except OSError:
                        logger.warning(
-                            f'Failed to create symbolic link {directory}.')
+                            f'Failed to create symbolic link {directory} for {os.path.abspath(masked_directory)}.'
+                        )

        elif repo_type == REPO_TYPE_DATASET:
            directory = os.path.abspath(
--- a/modelscope/models/cv/image_super_resolution_pasd/unet_2d_blocks.py
+++ b/modelscope/models/cv/image_super_resolution_pasd/unet_2d_blocks.py
@@ -8,10 +8,11 @@ import torch.nn.functional as F
 from diffusers.models.attention_processor import (Attention,
                                                  AttnAddedKVProcessor,
                                                  AttnAddedKVProcessor2_0)
-from diffusers.models.dual_transformer_2d import DualTransformer2DModel
 from diffusers.models.resnet import (Downsample2D, FirDownsample2D,
                                     FirUpsample2D, KDownsample2D, KUpsample2D,
                                     ResnetBlock2D, Upsample2D)
+from diffusers.models.transformers.dual_transformer_2d import \
+    DualTransformer2DModel
 from torch import nn

 from .attention import AdaGroupNorm
--- a/modelscope/models/cv/image_super_resolution_pasd_v2/unet_2d_blocks.py
+++ b/modelscope/models/cv/image_super_resolution_pasd_v2/unet_2d_blocks.py
@@ -20,12 +20,13 @@ from diffusers.models.activations import get_activation
 from diffusers.models.attention_processor import (Attention,
                                                  AttnAddedKVProcessor,
                                                  AttnAddedKVProcessor2_0)
-from diffusers.models.dual_transformer_2d import DualTransformer2DModel
 from diffusers.models.normalization import AdaLayerNorm
 from diffusers.models.resnet import (Downsample2D, FirDownsample2D,
                                     FirUpsample2D, KDownsample2D, KUpsample2D,
                                     ResnetBlock2D, Upsample2D)
-from diffusers.models.transformer_2d import Transformer2DModel
+from diffusers.models.transformers.dual_transformer_2d import \
+    DualTransformer2DModel
+from diffusers.models.transformers.transformer_2d import Transformer2DModel
 from diffusers.utils import is_torch_version, logging
 from einops import rearrange
 from torch import nn
--- a/modelscope/models/cv/image_view_transform/ldm/autoencoder.py
+++ b/modelscope/models/cv/image_view_transform/ldm/autoencoder.py
@@ -3,7 +3,7 @@ from contextlib import contextmanager
 import pytorch_lightning as pl
 import torch
 import torch.nn.functional as F
-from taming.modules.vqvae.quantize import VectorQuantizer2 as VectorQuantizer
+from taming.modules.vqvae.quantize import VectorQuantizer

 from ..util import instantiate_from_config
 from .distributions import DiagonalGaussianDistribution
--- a/modelscope/models/cv/image_view_transform/ldm/ddpm.py
+++ b/modelscope/models/cv/image_view_transform/ldm/ddpm.py
@@ -16,7 +16,7 @@ import torch
 import torch.nn as nn
 from einops import rearrange, repeat
 from omegaconf import ListConfig
-from pytorch_lightning.utilities.distributed import rank_zero_only
+from pytorch_lightning.utilities.rank_zero import rank_zero_only
 from torch.optim.lr_scheduler import LambdaLR
 from torchvision.utils import make_grid
 from tqdm import tqdm
--- a/modelscope/pipelines/builder.py
+++ b/modelscope/pipelines/builder.py
@@ -125,7 +125,7 @@ def pipeline(task: str = None,
    if pipeline_name is None and prefer_llm_pipeline:
        pipeline_name = external_engine_for_llm_checker(
            model, model_revision, kwargs)
-    else:
+    if pipeline_name is None:
        model = normalize_model_input(
            model,
            model_revision,
@@ -170,7 +170,7 @@ def pipeline(task: str = None,
    pipeline_props['device'] = device
    cfg = ConfigDict(pipeline_props)

-    clear_llm_info(kwargs)
+    clear_llm_info(kwargs, pipeline_name)
    if kwargs:
        cfg.update(kwargs)

@@ -223,23 +223,33 @@ def external_engine_for_llm_checker(model: Union[str, List[str], Model,
                                                 List[Model]],
                                    revision: Optional[str],
                                    kwargs: Dict[str, Any]) -> Optional[str]:
-    from .nlp.llm_pipeline import ModelTypeHelper, LLMAdapterRegistry
-
+    from .nlp.llm_pipeline import SWIFT_MODEL_ID_MAPPING, init_swift_model_mapping, ModelTypeHelper, LLMAdapterRegistry
+    from ..hub.check_model import get_model_id_from_cache
    if isinstance(model, list):
        model = model[0]
    if not isinstance(model, str):
        model = model.model_dir

    if kwargs.get('llm_framework') == 'swift':
-        return 'llm'
+        # check if swift supports
+        if os.path.exists(model):
+            model_id = get_model_id_from_cache(model)
+        else:
+            model_id = model
+
+        init_swift_model_mapping()
+        if model_id.lower() in SWIFT_MODEL_ID_MAPPING:
+            return 'llm'
    model_type = ModelTypeHelper.get(
        model, revision, with_adapter=True, split='-', use_cache=True)
    if LLMAdapterRegistry.contains(model_type):
        return 'llm'


-def clear_llm_info(kwargs: Dict):
+def clear_llm_info(kwargs: Dict, pipeline_name: str):
    from modelscope.utils.model_type_helper import ModelTypeHelper

    kwargs.pop('external_engine_for_llm', None)
+    if pipeline_name != 'llm':
+        kwargs.pop('llm_framework', None)
    ModelTypeHelper.clear_cache()
--- a/modelscope/pipelines/multi_modal/efficient_diffusion_tuning_pipeline.py
+++ b/modelscope/pipelines/multi_modal/efficient_diffusion_tuning_pipeline.py
@@ -36,8 +36,10 @@ class EfficientDiffusionTuningPipeline(Pipeline):
                'data/test/images/vision_efficient_tuning_test_1.png')
            >>> print(f'Output: {result}.')
        """
+        logger.warn(
+            '[NOTE]Do not use this pipeline because the dependencies are too old, '
+            'use https://github.com/modelscope/DiffSynth-Studio instead')
        super().__init__(model=model, **kwargs)
-
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model = self.model.to(self.device)
        self.model.eval()
--- a/modelscope/pipelines/nlp/llm_pipeline.py
+++ b/modelscope/pipelines/nlp/llm_pipeline.py
@@ -33,6 +33,17 @@ SWIFT_MODEL_ID_MAPPING = {}
 SWIFT_FRAMEWORK = 'swift'


+def init_swift_model_mapping():
+    from swift.llm.utils import MODEL_MAPPING
+
+    global SWIFT_MODEL_ID_MAPPING
+    if not SWIFT_MODEL_ID_MAPPING:
+        SWIFT_MODEL_ID_MAPPING = {
+            v['model_id_or_path'].lower(): k
+            for k, v in MODEL_MAPPING.items()
+        }
+
+
 class LLMAdapterRegistry:

    llm_format_map = {'qwen': [None, None, None]}
@@ -216,14 +227,7 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin):

    def _init_swift(self, model_id, device) -> None:
        from swift.llm import prepare_model_template
-        from swift.llm.utils import MODEL_MAPPING, InferArguments
-
-        global SWIFT_MODEL_ID_MAPPING
-        if not SWIFT_MODEL_ID_MAPPING:
-            SWIFT_MODEL_ID_MAPPING = {
-                v['model_id_or_path']: k
-                for k, v in MODEL_MAPPING.items()
-            }
+        from swift.llm.utils import InferArguments

        def format_messages(messages: Dict[str, List[Dict[str, str]]],
                            tokenizer: PreTrainedTokenizer,
@@ -261,9 +265,12 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin):
            else:
                return dict(system=system, prompt=prompt, history=history)

-        assert model_id in SWIFT_MODEL_ID_MAPPING,\
+        init_swift_model_mapping()
+
+        assert model_id.lower() in SWIFT_MODEL_ID_MAPPING,\
            f'Invalid model id {model_id} or Swift framework does not support this model.'
-        args = InferArguments(model_type=SWIFT_MODEL_ID_MAPPING[model_id])
+        args = InferArguments(
+            model_type=SWIFT_MODEL_ID_MAPPING[model_id.lower()])
        model, template = prepare_model_template(
            args, device_map=self.device_map)
        self.model = add_stream_generate(model)
--- a/modelscope/preprocessors/nlp/fill_mask_preprocessor.py
+++ b/modelscope/preprocessors/nlp/fill_mask_preprocessor.py
@@ -213,9 +213,14 @@ class FillMaskPoNetPreprocessor(FillMaskPreprocessorBase):
            osp.join(model_dir, ModelFile.CONFIGURATION))
        self.language = self.cfg.model.get('language', 'en')
        if self.language == 'en':
+            import nltk
            from nltk.tokenize import sent_tokenize
-            import_external_nltk_data(
-                osp.join(model_dir, 'nltk_data'), 'tokenizers/punkt')
+            from packaging import version
+            if version.parse(nltk.__version__) >= version.parse('3.8.2'):
+                nltk.download('punkt_tab')
+            else:
+                import_external_nltk_data(
+                    osp.join(model_dir, 'nltk_data'), 'tokenizers/punkt_tab')
        elif self.language in ['zh', 'cn']:

            def sent_tokenize(para):
--- a/modelscope/preprocessors/templates/loader.py
+++ b/modelscope/preprocessors/templates/loader.py
@@ -30,6 +30,8 @@ def cases(*names):
            else:
                regex += letter
        ret.append(regex)
+        if '-' in regex:
+            ret.append(regex.replace('-', ' '))
    if len(ret) > 1:
        ret = '|'.join(ret)
        ret = '(' + ret + ')'
@@ -53,6 +55,12 @@ def no_multi_modal():
 template_info = [
    # llama
    ## "llama3"
+    TemplateInfo(
+        template_regex=
+        f'.*{cases("llama3.3", "llama-3.3")}.*',
+        modelfile_prefix=
+        'https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/llama3.3',
+    ),
    TemplateInfo(
        template_regex=
        f'.*{cases("llama3.2", "llama-3.2")}.*{cases("vision")}.*',
@@ -291,7 +299,7 @@ template_info = [
    TemplateInfo(
        template=TemplateType.llama,
        template_regex=
-        f'.*{cases("mistral")}{no_multi_modal()}.*{chat_suffix}.*',
+        f'.*{cases("mistral", "ministral")}{no_multi_modal()}.*{chat_suffix}.*',
        modelfile_prefix=
        'https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/mistral',
    ),
@@ -771,6 +779,18 @@ template_info = [
        template_regex=f'.*{cases("paraphrase-multilingual")}.*', 
        modelfile_prefix=
        'https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/paraphrase-multilingual'),
+    TemplateInfo(
+        template_regex=f'.*{cases("marco")}.*{cases("o1")}.*',
+        modelfile_prefix=
+        'https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/marco-o1'),
+    TemplateInfo(
+        template_regex=f'.*{cases("qwq")}.*',
+        modelfile_prefix=
+        'https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/qwq'),
+    TemplateInfo(
+        template_regex=f'.*{cases("exaone")}.*',
+        modelfile_prefix=
+        'https://modelscope.oss-cn-beijing.aliyuncs.com/llm_template/ollama/exaone3.5'),

 ]

--- a/modelscope/utils/streaming_output.py
+++ b/modelscope/utils/streaming_output.py
@@ -175,7 +175,11 @@ class PretrainedModelStreamingOutputMixin(StreamingOutputMixin):

    @contextmanager
    def _replace_generate(self, model: PreTrainedModel) -> Generator:
-        if version.parse(transformers.__version__) >= version.parse('4.39.0'):
+        if version.parse(transformers.__version__) >= version.parse('4.43.0'):
+            greedy_search_name = 'stream_greedy_search'
+            sample_name = '_sample'
+        elif version.parse(
+                transformers.__version__) >= version.parse('4.39.0'):
            greedy_search_name = '_greedy_search'
            sample_name = '_sample'
        else:
@@ -449,6 +453,8 @@ class PretrainedModelStreamingOutputMixin(StreamingOutputMixin):
                    break

            # prepare model inputs
+            model_kwargs = self._get_initial_cache_position(
+                input_ids, model_kwargs)
            model_inputs = self.prepare_inputs_for_generation(
                input_ids, **model_kwargs)

--- a/requirements/cv.txt
+++ b/requirements/cv.txt
@@ -32,7 +32,7 @@ mmdet>=2.25.0,<=2.28.2
 # mmdet3d-1.0.0rc6 remove networkx and numba version restriction
 mmdet3d==1.0.0a1
 mmsegmentation<=0.30.0
-moviepy>=1.0.3
+moviepy==1.0.3
 nerfacc==0.2.2
 networkx
 numba
--- a/tests/pipelines/test_efficient_diffusion_tuning.py
+++ b/tests/pipelines/test_efficient_diffusion_tuning.py
@@ -11,10 +11,10 @@ from modelscope.utils.test_utils import test_level
 class EfficientDiffusionTuningTest(unittest.TestCase):

    def setUp(self) -> None:
-        os.system('pip install ms-swift -U')
+        # os.system('pip install ms-swift -U')
        self.task = Tasks.efficient_diffusion_tuning

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip
    def test_efficient_diffusion_tuning_lora_run_pipeline(self):
        model_id = 'damo/multi-modal_efficient-diffusion-tuning-lora'
        model_revision = 'v1.0.2'
@@ -24,7 +24,7 @@ class EfficientDiffusionTuningTest(unittest.TestCase):
        result = edt_pipeline(inputs)
        print(f'Efficient-diffusion-tuning-lora output: {result}.')

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip
    def test_efficient_diffusion_tuning_lora_load_model_from_pretrained(self):
        model_id = 'damo/multi-modal_efficient-diffusion-tuning-lora'
        model_revision = 'v1.0.2'
@@ -32,7 +32,7 @@ class EfficientDiffusionTuningTest(unittest.TestCase):
        from modelscope.models.multi_modal import EfficientStableDiffusion
        self.assertTrue(model.__class__ == EfficientStableDiffusion)

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip
    def test_efficient_diffusion_tuning_control_lora_run_pipeline(self):
        # TODO: to be fixed in the future
        model_id = 'damo/multi-modal_efficient-diffusion-tuning-control-lora'
@@ -48,7 +48,7 @@ class EfficientDiffusionTuningTest(unittest.TestCase):
        result = edt_pipeline(inputs)
        print(f'Efficient-diffusion-tuning-control-lora output: {result}.')

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip
    def test_efficient_diffusion_tuning_control_lora_load_model_from_pretrained(
            self):
        model_id = 'damo/multi-modal_efficient-diffusion-tuning-control-lora'
--- a/tests/pipelines/test_fill_mask.py
+++ b/tests/pipelines/test_fill_mask.py
@@ -125,13 +125,8 @@ class FillMaskTest(unittest.TestCase):
        for language in ['zh', 'en']:
            ori_text = self.ori_texts[language]
            test_input = self.test_inputs[language].replace('[MASK]', '<mask>')
-            with self.regress_tool.monitor_module_single_forward(
-                    pipeline_ins.model,
-                    f'fill_mask_veco_{language}',
-                    compare_fn=IgnoreKeyFn('.*intermediate_act_fn')):
-                print(
-                    f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: '
-                    f'{pipeline_ins(test_input)}\n')
+            print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: '
+                  f'{pipeline_ins(test_input)}\n')

    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_run_with_model_name(self):
--- a/tests/pipelines/test_mplug_owl_multimodal_dialogue.py
+++ b/tests/pipelines/test_mplug_owl_multimodal_dialogue.py
@@ -39,7 +39,7 @@ class MplugOwlMultimodalDialogueTest(unittest.TestCase):
                },
            ]
        }
-        result = pipeline_multimodal_dialogue(messages, max_length=5)
+        result = pipeline_multimodal_dialogue(messages)
        print(result[OutputKeys.TEXT])

    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@@ -68,7 +68,7 @@ class MplugOwlMultimodalDialogueTest(unittest.TestCase):
                },
            ]
        }
-        result = pipeline_multimodal_dialogue(messages, max_length=120)
+        result = pipeline_multimodal_dialogue(messages, max_new_tokens=512)
        print(result[OutputKeys.TEXT])

    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@@ -90,7 +90,7 @@ class MplugOwlMultimodalDialogueTest(unittest.TestCase):
                },
            ]
        }
-        result = pipeline_multimodal_dialogue(messages)
+        result = pipeline_multimodal_dialogue(messages, max_new_tokens=512)
        print(result[OutputKeys.TEXT])


--- a/tests/pipelines/test_nerf_recon_4k.py
+++ b/tests/pipelines/test_nerf_recon_4k.py
@@ -50,7 +50,7 @@ class NeRFRecon4KTest(unittest.TestCase):
    #     nerf_recon_4k(
    #         dict(data_cfg=self.data_dic, render_dir=self.render_dir))

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip
    @unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest only')
    def test_run_modelhub(self):
        nerf_recon_4k = pipeline(
--- a/tests/pipelines/test_text_to_video_synthesis.py
+++ b/tests/pipelines/test_text_to_video_synthesis.py
@@ -24,7 +24,7 @@ class TextToVideoSynthesisTest(unittest.TestCase):
        'out_width': 256,
    }

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip
    def test_run_with_model_from_modelhub(self):
        pipe_line_text_to_video_synthesis = pipeline(
            task=self.task, model=self.model_id)
@@ -32,7 +32,7 @@ class TextToVideoSynthesisTest(unittest.TestCase):
            self.test_text)[OutputKeys.OUTPUT_VIDEO]
        print(output_video_path)

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip
    def test_run_modelhub_user_control(self):
        pipe_line_text_to_video_synthesis = pipeline(
            task=self.task, model=self.model_id)
--- a/tests/tools/test_to_ollama.py
+++ b/tests/tools/test_to_ollama.py
@@ -7,8 +7,9 @@ from modelscope.preprocessors.templates.loader import TemplateLoader
 from modelscope.utils.test_utils import test_level


-def _test_check_tmpl_type(model, tmpl_type):
-    ollama, info = TemplateLoader.to_ollama(model, debug=True)
+def _test_check_tmpl_type(model, tmpl_type, gguf_meta={}):
+    ollama, info = TemplateLoader.to_ollama(
+        model, gguf_meta=gguf_meta, debug=True)
    assert info.__dict__.get('modelfile_prefix').split(
        '/')[-1] == tmpl_type, info

@@ -121,6 +122,10 @@ class TestToOllama(unittest.TestCase):

    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_check_template_type(self):
+        _test_check_tmpl_type(
+            'LLM-Research/Llama-3.3-70B-Instruct',
+            'llama3.3',
+            gguf_meta={'general.name': 'Llama 3.3 70B Instruct'})
        _test_check_tmpl_type(
            'AI-ModelScope/Llama-3.2-11B-Vision-Instruct-GGUF',
            'llama3.2-vision')
@@ -190,6 +195,8 @@ class TestToOllama(unittest.TestCase):
                              'mistral-openorca')
        _test_check_tmpl_type('QuantFactory/Mistral-7B-Instruct-v0.1-GGUF',
                              'mistral')
+        _test_check_tmpl_type('QuantFactory/Ministral-8B-Instruct-2410-GGUF',
+                              'mistral')
        _test_check_tmpl_type(
            'second-state/Nous-Hermes-2-Mixtral-8x7B-SFT-GGUF',
            'nous-hermes2-mixtral')
@@ -298,6 +305,12 @@ class TestToOllama(unittest.TestCase):
        _test_check_tmpl_type(
            'Ceceliachenen/paraphrase-multilingual-MiniLM-L12-v2',
            'paraphrase-multilingual')
+        _test_check_tmpl_type('bartowski/Marco-o1-GGUF', 'marco-o1')
+        _test_check_tmpl_type('Qwen/QwQ-32B-Preview', 'qwq')
+        _test_check_tmpl_type('LLM-Research/Llama-3.3-70B-Instruct',
+                              'llama3.3')
+        _test_check_tmpl_type('bartowski/EXAONE-3.5-7.8B-Instruct-GGUF',
+                              'exaone3.5')


 if __name__ == '__main__':
--- a/tests/trainers/audio/test_separation_trainer.py
+++ b/tests/trainers/audio/test_separation_trainer.py
@@ -50,7 +50,7 @@ class TestSeparationTrainer(unittest.TestCase):
        shutil.rmtree(self.tmp_dir, ignore_errors=True)
        super().tearDown()

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip
    def test_trainer(self):
        kwargs = dict(
            model=self.model_id,
@@ -73,7 +73,7 @@ class TestSeparationTrainer(unittest.TestCase):
        self.assertEqual(
            len(checkpoint_dirs), 2, f'Cannot find checkpoint in {save_dir}!')

-    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    @unittest.skip
    def test_eval(self):
        kwargs = dict(
            model=self.model_id,
--- a/tests/trainers/test_clip_trainer.py
+++ b/tests/trainers/test_clip_trainer.py
@@ -52,7 +52,7 @@ class TestClipTrainer(unittest.TestCase):
                            'metrics': [{'type': 'inbatch_recall'}]},
             'preprocessor': []}

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip
    def test_trainer_std(self):
        WORKSPACE = './workspace/ckpts/clip'
        os.makedirs(WORKSPACE, exist_ok=True)
--- a/tests/trainers/test_document_grounded_dialog_generate_trainer.py
+++ b/tests/trainers/test_document_grounded_dialog_generate_trainer.py
@@ -16,12 +16,12 @@ class DocumentGroundedDialogGenerateTest(unittest.TestCase):
    def setUp(self) -> None:
        self.model_id = 'DAMO_ConvAI/nlp_convai_generation_pretrain'

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip
    def test_trainer_with_model_name(self):
        # load data
        train_dataset = MsDataset.load(
            'DAMO_ConvAI/FrDoc2BotGeneration',
-            download_mode=DownloadMode.FORCE_REDOWNLOAD)
+            download_mode=DownloadMode.FORCE_REDOWNLOAD)['train']
        test_len = 1
        sub_train_dataset = [x for x in train_dataset][:1]
        sub_train_dataset = [{
--- a/tests/trainers/test_document_grounded_dialog_retrieval_trainer.py
+++ b/tests/trainers/test_document_grounded_dialog_retrieval_trainer.py
@@ -21,7 +21,7 @@ class DocumentGroundedDialogRetrievalTest(unittest.TestCase):
        # load data
        train_dataset = MsDataset.load(
            'DAMO_ConvAI/FrDoc2BotRetrieval',
-            download_mode=DownloadMode.FORCE_REDOWNLOAD)
+            download_mode=DownloadMode.FORCE_REDOWNLOAD)['train']
        sub_train_dataset = [x for x in train_dataset][:10]
        all_passages = ['阑尾炎', '肠胃炎', '肚脐开始', '肚脐为止']

--- a/tests/trainers/test_lora_diffusion_trainer.py
+++ b/tests/trainers/test_lora_diffusion_trainer.py
@@ -35,7 +35,8 @@ class TestLoraDiffusionTrainer(unittest.TestCase):
        shutil.rmtree(self.tmp_dir)
        super().tearDown()

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    # need diffusers==0.24.0, skip in ci
+    @unittest.skip
    def test_lora_diffusion_train(self):
        model_id = 'AI-ModelScope/stable-diffusion-v1-5'
        model_revision = 'v1.0.9'
@@ -67,7 +68,8 @@ class TestLoraDiffusionTrainer(unittest.TestCase):
        results_files = os.listdir(self.tmp_dir)
        self.assertIn(f'{trainer.timestamp}.log.json', results_files)

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    # need diffusers==0.24.0, skip in ci
+    @unittest.skip
    def test_lora_diffusion_eval(self):
        model_id = 'AI-ModelScope/stable-diffusion-v1-5'
        model_revision = 'v1.0.9'
--- a/tests/trainers/test_lora_diffusion_xl_trainer.py
+++ b/tests/trainers/test_lora_diffusion_xl_trainer.py
@@ -35,7 +35,8 @@ class TestLoraDiffusionXLTrainer(unittest.TestCase):
        shutil.rmtree(self.tmp_dir)
        super().tearDown()

-    @unittest.skipUnless(test_level() >= 1, 'skip test for oom')
+    # need diffusers==0.24.0, skip in ci
+    @unittest.skip
    def test_lora_diffusion_xl_train(self):
        model_id = 'AI-ModelScope/stable-diffusion-xl-base-1.0'
        model_revision = 'v1.0.2'
@@ -67,7 +68,8 @@ class TestLoraDiffusionXLTrainer(unittest.TestCase):
        results_files = os.listdir(self.tmp_dir)
        self.assertIn(f'{trainer.timestamp}.log.json', results_files)

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    # need diffusers==0.24.0, skip in ci
+    @unittest.skip
    def test_lora_diffusion_xl_eval(self):
        model_id = 'AI-ModelScope/stable-diffusion-xl-base-1.0'
        model_revision = 'v1.0.2'
--- a/tests/trainers/test_ofa_trainer.py
+++ b/tests/trainers/test_ofa_trainer.py
@@ -76,7 +76,7 @@ class TestOfaTrainer(unittest.TestCase):
            shutil.rmtree(self.WORKSPACE, ignore_errors=True)
        super().tearDown()

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skip
    def test_trainer_std(self):
        os.makedirs(self.WORKSPACE, exist_ok=True)
        config_file = os.path.join(self.WORKSPACE, ModelFile.CONFIGURATION)