Merge tag 'v1.9.0' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into master-gitlab

release 1.9.0
2025-12-25 20:49:37 +01:00 · 2023-09-05 17:52:56 +08:00
parent 7a1d40b54b f1629afd70
commit 4a0ae476e9
26 changed files with 76 additions and 63 deletions
--- a/.dev_scripts/dockerci.sh
+++ b/.dev_scripts/dockerci.sh
@@ -9,7 +9,7 @@ cpu_sets_arr=($cpu_sets)
 is_get_file_lock=false
 CI_COMMAND=${CI_COMMAND:-bash .dev_scripts/ci_container_test.sh python tests/run.py --parallel 2 --run_config tests/run_config.yaml}
 echo "ci command: $CI_COMMAND"
-PR_CHANGED_FILES="${PR_CHANGED_FILES:-''}"
+PR_CHANGED_FILES="${PR_CHANGED_FILES:-}"
 echo "PR modified files: $PR_CHANGED_FILES"
 PR_CHANGED_FILES=${PR_CHANGED_FILES//[ ]/#}
 echo "PR_CHANGED_FILES: $PR_CHANGED_FILES"
--- a/docker/Dockerfile.ubuntu
+++ b/docker/Dockerfile.ubuntu
@@ -48,10 +48,10 @@ ENV SETUPTOOLS_USE_DISTUTILS=stdlib
 RUN CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6" pip install --no-cache-dir  'git+https://github.com/facebookresearch/detectron2.git'

 # torchmetrics==0.11.4 for ofa
-RUN pip install --no-cache-dir tiktoken torchmetrics==0.11.4 'protobuf<=3.20.0' bitsandbytes basicsr && \
-    git clone -b v1.0.8 https://github.com/Dao-AILab/flash-attention && \
-    cd flash-attention && pip install . && \
-    pip install csrc/layer_norm && \
-    pip install csrc/rotary && \
-    cd .. && \
-    rm -rf flash-attention
+RUN pip install --no-cache-dir tiktoken torchmetrics==0.11.4 'transformers<4.31.0' transformers_stream_generator 'protobuf<=3.20.0' bitsandbytes basicsr
+COPY docker/scripts/install_flash_attension.sh /tmp/install_flash_attension.sh
+RUN if [ "$USE_GPU" = "True" ] ; then \
+        bash /tmp/install_flash_attension.sh; \
+    else \
+        echo 'cpu unsupport flash attention'; \
+    fi
--- a/docker/scripts/install_flash_attension.sh
+++ b/docker/scripts/install_flash_attension.sh
@@ -0,0 +1,6 @@
+    git clone -b v1.0.8 https://github.com/Dao-AILab/flash-attention && \
+    cd flash-attention && pip install . && \
+    pip install csrc/layer_norm && \
+    pip install csrc/rotary && \
+    cd .. && \
+    rm -rf flash-attention
--- a/examples/pytorch/stable_diffusion/cones2/finetune_stable_diffusion_cones2.py
+++ b/examples/pytorch/stable_diffusion/cones2/finetune_stable_diffusion_cones2.py
@@ -77,7 +77,7 @@ def cfg_modify_fn(cfg):


 kwargs = dict(
-    model=training_args.model_id,
+    model=training_args.model,
    model_revision=args.model_revision,
    work_dir=training_args.work_dir,
    train_dataset=train_dataset,
--- a/modelscope/models/cv/referring_video_object_segmentation/utils/mttr.py
+++ b/modelscope/models/cv/referring_video_object_segmentation/utils/mttr.py
@@ -65,6 +65,7 @@ class MTTR(nn.Module):
        # keep only the valid frames (frames which are annotated):
        # (for example, in a2d-sentences only the center frame in each window is annotated).
        for layer_out in backbone_out:
+            valid_indices = valid_indices.to(layer_out.tensors.device)
            layer_out.tensors = layer_out.tensors.index_select(
                0, valid_indices)
            layer_out.mask = layer_out.mask.index_select(0, valid_indices)
--- a/modelscope/models/multi_modal/efficient_diffusion_tuning/control_sd_lora.py
+++ b/modelscope/models/multi_modal/efficient_diffusion_tuning/control_sd_lora.py
@@ -10,7 +10,8 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from diffusers.configuration_utils import ConfigMixin, register_to_config
-from diffusers.models.cross_attention import CrossAttention, LoRALinearLayer
+from diffusers.models.attention_processor import Attention
+from diffusers.models.lora import LoRALinearLayer
 from diffusers.models.modeling_utils import ModelMixin
 from diffusers.models.resnet import (Downsample2D, Upsample2D, downsample_2d,
                                     partial, upsample_2d)
@@ -467,7 +468,7 @@ class ControlLoRACrossAttnProcessor(LoRACrossAttnProcessor):
        return control_states

    def __call__(self,
-                 attn: CrossAttention,
+                 attn: Attention,
                 hidden_states,
                 encoder_hidden_states=None,
                 attention_mask=None,
@@ -619,7 +620,7 @@ class ControlLoRACrossAttnProcessorV2(LoRACrossAttnProcessor):
        return control_states

    def __call__(self,
-                 attn: CrossAttention,
+                 attn: Attention,
                 hidden_states,
                 encoder_hidden_states=None,
                 attention_mask=None,
--- a/modelscope/models/multi_modal/efficient_diffusion_tuning/efficient_stable_diffusion.py
+++ b/modelscope/models/multi_modal/efficient_diffusion_tuning/efficient_stable_diffusion.py
@@ -11,7 +11,7 @@ import torch.nn.functional as F
 from diffusers import (AutoencoderKL, DDPMScheduler, DiffusionPipeline,
                       DPMSolverMultistepScheduler, UNet2DConditionModel,
                       utils)
-from diffusers.models import cross_attention
+from diffusers.models import attention
 from diffusers.utils import deprecation_utils
 from swift import AdapterConfig, LoRAConfig, PromptConfig, Swift
 from transformers import CLIPTextModel, CLIPTokenizer
@@ -30,7 +30,7 @@ from .control_sd_lora import ControlLoRATuner

 utils.deprecate = lambda *arg, **kwargs: None
 deprecation_utils.deprecate = lambda *arg, **kwargs: None
-cross_attention.deprecate = lambda *arg, **kwargs: None
+attention.deprecate = lambda *arg, **kwargs: None

 __tuner_MAP__ = {'lora': LoRATuner, 'control_lora': ControlLoRATuner}

@@ -113,12 +113,10 @@ class EfficientStableDiffusion(TorchModel):
            rank = tuner_config[
                'rank'] if tuner_config and 'rank' in tuner_config else 4
            lora_config = LoRAConfig(
-                rank=rank,
-                replace_modules=['to_q', 'to_k', 'to_v', 'to_out.0'],
+                r=rank,
+                target_modules=['to_q', 'to_k', 'to_v', 'to_out.0'],
                merge_weights=False,
-                only_lora_trainable=False,
-                use_merged_linear=False,
-                pretrained_weights=pretrained_tuner)
+                use_merged_linear=False)
            self.unet = Swift.prepare_model(self.unet, lora_config)
        elif tuner_name == 'swift-adapter':
            adapter_length = tuner_config[
@@ -126,10 +124,8 @@ class EfficientStableDiffusion(TorchModel):
            adapter_config = AdapterConfig(
                dim=-1,
                hidden_pos=0,
-                module_name=r'.*ff\.net\.2$',
-                adapter_length=adapter_length,
-                only_adapter_trainable=False,
-                pretrained_weights=pretrained_tuner)
+                target_modules=r'.*ff\.net\.2$',
+                adapter_length=adapter_length)
            self.unet = Swift.prepare_model(self.unet, adapter_config)
        elif tuner_name == 'swift-prompt':
            prompt_length = tuner_config[
@@ -139,14 +135,11 @@ class EfficientStableDiffusion(TorchModel):
                    320, 320, 640, 640, 1280, 1280, 1280, 1280, 1280, 640, 640,
                    640, 320, 320, 320
                ],
-                module_layer_name=
+                target_modules=
                r'.*[down_blocks|up_blocks|mid_block]\.\d+\.attentions\.\d+\.transformer_blocks\.\d+$',
                embedding_pos=0,
                prompt_length=prompt_length,
-                only_prompt_trainable=False,
-                attach_front=False,
-                pretrained_weights=pretrained_tuner,
-                extract_embedding=True)
+                attach_front=False)
            self.unet = Swift.prepare_model(self.unet, prompt_config)
        elif tuner_name in ('lora', 'control_lora'):
            # if not set the config of control-tuner, we add the lora tuner directly to the original framework,
--- a/modelscope/models/multi_modal/efficient_diffusion_tuning/sd_lora.py
+++ b/modelscope/models/multi_modal/efficient_diffusion_tuning/sd_lora.py
@@ -8,7 +8,8 @@ from typing import List, Tuple, Union
 import torch
 import torch.nn as nn
 from diffusers.configuration_utils import ConfigMixin, register_to_config
-from diffusers.models.cross_attention import CrossAttention, LoRALinearLayer
+from diffusers.models.attention_processor import Attention
+from diffusers.models.lora import LoRALinearLayer
 from diffusers.models.modeling_utils import ModelMixin
 from diffusers.utils.outputs import BaseOutput

@@ -84,7 +85,7 @@ class LoRACrossAttnProcessor(nn.Module):
        self.output_states_skipped = is_skipped

    def __call__(self,
-                 attn: CrossAttention,
+                 attn: Attention,
                 hidden_states,
                 encoder_hidden_states=None,
                 attention_mask=None,
--- a/modelscope/models/multi_modal/video_to_video/video_to_video_model.py
+++ b/modelscope/models/multi_modal/video_to_video/video_to_video_model.py
@@ -112,7 +112,7 @@ class VideoToVideo(TorchModel):
        generator.eval()
        load_dict = torch.load(cfg.model_path, map_location='cpu')
        ret = generator.load_state_dict(load_dict['state_dict'], strict=True)
-        self.generator = generator
+        self.generator = generator.half()
        logger.info('Load model {} path {}, with local status {}'.format(
            cfg.UNet.type, cfg.model_path, ret))

@@ -175,7 +175,7 @@ class VideoToVideo(TorchModel):
        video_data = rearrange(video_data, 'b f c h w -> (b f) c h w')

        video_data_list = torch.chunk(
-            video_data, video_data.shape[0] // 2, dim=0)
+            video_data, video_data.shape[0] // 1, dim=0)
        with torch.no_grad():
            decode_data = []
            for vd_data in video_data_list:
@@ -185,6 +185,7 @@ class VideoToVideo(TorchModel):
            video_data_feature = torch.cat(decode_data, dim=0)
            video_data_feature = rearrange(
                video_data_feature, '(b f) c h w -> b c f h w', b=batch_size)
+        torch.cuda.empty_cache()

        with amp.autocast(enabled=True):
            total_noise_levels = 600
@@ -209,6 +210,7 @@ class VideoToVideo(TorchModel):
                t_min=0,
                discretization='trailing')

+            torch.cuda.empty_cache()
            scale_factor = 0.18215
            vid_tensor_feature = 1. / scale_factor * gen_vid

--- a/modelscope/pipeline_inputs.py
+++ b/modelscope/pipeline_inputs.py
@@ -240,7 +240,14 @@ TASK_INPUTS = {
    InputType.IMAGE,
    Tasks.video_embedding:
    InputType.VIDEO,
-    Tasks.virtual_try_on: (InputType.IMAGE, InputType.IMAGE, InputType.IMAGE),
+    Tasks.virtual_try_on: [
+        (InputType.IMAGE, InputType.IMAGE, InputType.IMAGE),
+        {
+            'masked_model': InputType.IMAGE,
+            'pose': InputType.IMAGE,
+            'cloth': InputType.IMAGE,
+        }
+    ],
    Tasks.text_driven_segmentation: {
        InputKeys.IMAGE: InputType.IMAGE,
        InputKeys.TEXT: InputType.TEXT
--- a/modelscope/pipelines/cv/face_emotion_pipeline.py
+++ b/modelscope/pipelines/cv/face_emotion_pipeline.py
@@ -31,7 +31,7 @@ class FaceEmotionPipeline(Pipeline):
        logger.info('load model done')

    def preprocess(self, input: Input) -> Dict[str, Any]:
-        img = LoadImage.convert_to_ndarray(input['img_path'])
+        img = LoadImage.convert_to_ndarray(input)
        return img

    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
--- a/modelscope/pipelines/cv/face_human_hand_detection_pipeline.py
+++ b/modelscope/pipelines/cv/face_human_hand_detection_pipeline.py
@@ -32,14 +32,13 @@ class NanoDettForFaceHumanHandDetectionPipeline(Pipeline):
        logger.info('load model done')

    def preprocess(self, input: Input) -> Dict[str, Any]:
-        img = LoadImage.convert_to_ndarray(input['input_path'])
+        img = LoadImage.convert_to_ndarray(input)
        return img

    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:

        cls_list, bbox_list, score_list = det_infer.inference(
            self.model, self.device, input)
-        logger.info(cls_list, bbox_list, score_list)
        return {
            OutputKeys.LABELS: cls_list,
            OutputKeys.BOXES: bbox_list,
--- a/modelscope/pipelines/cv/hand_static_pipeline.py
+++ b/modelscope/pipelines/cv/hand_static_pipeline.py
@@ -30,7 +30,7 @@ class HandStaticPipeline(Pipeline):
        logger.info('load model done')

    def preprocess(self, input: Input) -> Dict[str, Any]:
-        img = LoadImage.convert_to_ndarray(input['img_path'])
+        img = LoadImage.convert_to_ndarray(input)
        return img

    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
--- a/modelscope/pipelines/cv/product_segmentation_pipeline.py
+++ b/modelscope/pipelines/cv/product_segmentation_pipeline.py
@@ -31,7 +31,8 @@ class F3NetForProductSegmentationPipeline(Pipeline):
        logger.info('load model done')

    def preprocess(self, input: Input) -> Dict[str, Any]:
-        img = LoadImage.convert_to_ndarray(input['input_path'])
+        img = LoadImage.convert_to_ndarray(input)
+
        img = img.astype(np.float32)
        return img

--- a/modelscope/pipelines/nlp/text_generation_pipeline.py
+++ b/modelscope/pipelines/nlp/text_generation_pipeline.py
@@ -448,9 +448,9 @@ class SeqGPTPipeline(Pipeline):
    # define the forward pass
    def forward(self, prompt: str, **forward_params) -> Dict[str, Any]:
        # gen & decode
-        prompt += '[GEN]'
+        # prompt += '[GEN]'
        input_ids = self.tokenizer(
-            prompt,
+            prompt + '[GEN]',
            return_tensors='pt',
            padding=True,
            truncation=True,
--- a/modelscope/trainers/multi_modal/custom_diffusion/custom_diffusion_trainer.py
+++ b/modelscope/trainers/multi_modal/custom_diffusion/custom_diffusion_trainer.py
@@ -40,7 +40,8 @@ class CustomCheckpointProcessor(CheckpointProcessor):
    def __init__(self,
                 modifier_token,
                 modifier_token_id,
-                 torch_type=torch.float32):
+                 torch_type=torch.float32,
+                 safe_serialization=False):
        """Checkpoint processor for custom diffusion.

        Args:
--- a/modelscope/version.py
+++ b/modelscope/version.py
@@ -1,5 +1,5 @@
 # Make sure to modify __release_datetime__ to release time when making official release.
-__version__ = '1.8.1'
+__version__ = '1.9.0'
 # default release datetime for branches under active development is set
 # to be a time far-far-away-into-the-future
-__release_datetime__ = '2099-10-13 08:56:12'
+__release_datetime__ = '2023-09-06 00:00:00'
--- a/tests/export/test_export_stable_diffusion.py
+++ b/tests/export/test_export_stable_diffusion.py
@@ -21,7 +21,7 @@ class TestExportStableDiffusion(unittest.TestCase):
            os.makedirs(self.tmp_dir)
        self.model_id = 'AI-ModelScope/stable-diffusion-v1-5'

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_export_stable_diffusion(self):
        model = Model.from_pretrained(self.model_id)
        Exporter.from_model(model).export_onnx(
--- a/tests/pipelines/test_efficient_diffusion_tuning_swift.py
+++ b/tests/pipelines/test_efficient_diffusion_tuning_swift.py
@@ -16,7 +16,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
    def setUp(self) -> None:
        self.task = Tasks.efficient_diffusion_tuning

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_efficient_diffusion_tuning_swift_lora_run_pipeline(self):
        model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-lora'
        model_revision = 'v1.0.2'
@@ -33,7 +33,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
            f'Efficient-diffusion-tuning-swift-lora output: {output_image_path}'
        )

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_efficient_diffusion_tuning_swift_lora_load_model_from_pretrained(
            self):
        model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-lora'
@@ -41,7 +41,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
        model = Model.from_pretrained(model_id, model_revision=model_revision)
        self.assertTrue(model.__class__ == EfficientStableDiffusion)

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_efficient_diffusion_tuning_swift_adapter_run_pipeline(self):
        model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-adapter'
        model_revision = 'v1.0.2'
@@ -58,7 +58,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
            f'Efficient-diffusion-tuning-swift-adapter output: {output_image_path}'
        )

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_efficient_diffusion_tuning_swift_adapter_load_model_from_pretrained(
            self):
        model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-adapter'
@@ -66,7 +66,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
        model = Model.from_pretrained(model_id, model_revision=model_revision)
        self.assertTrue(model.__class__ == EfficientStableDiffusion)

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_efficient_diffusion_tuning_swift_prompt_run_pipeline(self):
        model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-prompt'
        model_revision = 'v1.0.2'
@@ -83,7 +83,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
            f'Efficient-diffusion-tuning-swift-prompt output: {output_image_path}'
        )

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_efficient_diffusion_tuning_swift_prompt_load_model_from_pretrained(
            self):
        model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-prompt'
--- a/tests/pipelines/test_face_emotion.py
+++ b/tests/pipelines/test_face_emotion.py
@@ -11,7 +11,7 @@ class FaceEmotionTest(unittest.TestCase):

    def setUp(self) -> None:
        self.model = 'damo/cv_face-emotion'
-        self.img = {'img_path': 'data/test/images/face_emotion.jpg'}
+        self.img = 'data/test/images/face_emotion.jpg'

    def pipeline_inference(self, pipeline: Pipeline, input: str):
        result = pipeline(input)
--- a/tests/pipelines/test_face_human_hand_detection.py
+++ b/tests/pipelines/test_face_human_hand_detection.py
@@ -14,9 +14,7 @@ class FaceHumanHandTest(unittest.TestCase):

    def setUp(self) -> None:
        self.model_id = 'damo/cv_nanodet_face-human-hand-detection'
-        self.input = {
-            'input_path': 'data/test/images/face_human_hand_detection.jpg',
-        }
+        self.input = 'data/test/images/face_human_hand_detection.jpg'

    def pipeline_inference(self, pipeline: Pipeline, input: str):
        result = pipeline(input)
--- a/tests/pipelines/test_hand_static.py
+++ b/tests/pipelines/test_hand_static.py
@@ -11,7 +11,7 @@ class HandStaticTest(unittest.TestCase):

    def setUp(self) -> None:
        self.model = 'damo/cv_mobileface_hand-static'
-        self.input = {'img_path': 'data/test/images/hand_static.jpg'}
+        self.input = 'data/test/images/hand_static.jpg'

    def pipeline_inference(self, pipeline: Pipeline, input: str):
        result = pipeline(input)
--- a/tests/pipelines/test_product_segmentation.py
+++ b/tests/pipelines/test_product_segmentation.py
@@ -17,9 +17,7 @@ class ProductSegmentationTest(unittest.TestCase):

    def setUp(self) -> None:
        self.model_id = 'damo/cv_F3Net_product-segmentation'
-        self.input = {
-            'input_path': 'data/test/images/product_segmentation.jpg'
-        }
+        self.input = 'data/test/images/product_segmentation.jpg'

    def pipeline_inference(self, pipeline: Pipeline, input: str):
        result = pipeline(input)
--- a/tests/pipelines/test_virtual_try_on.py
+++ b/tests/pipelines/test_virtual_try_on.py
@@ -20,7 +20,11 @@ class VirtualTryonTest(unittest.TestCase):
    masked_model = Image.open('data/test/images/virtual_tryon_model.jpg')
    pose = Image.open('data/test/images/virtual_tryon_pose.jpg')
    cloth = Image.open('data/test/images/virtual_tryon_cloth.jpg')
-    input_imgs = (masked_model, pose, cloth)
+    input_imgs = {
+        'masked_model': masked_model,
+        'pose': pose,
+        'cloth': cloth,
+    }

    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_run_with_model_name(self):
--- a/tests/run_analysis.py
+++ b/tests/run_analysis.py
@@ -126,11 +126,12 @@ def get_current_branch():

 def get_modified_files():
    if 'PR_CHANGED_FILES' in os.environ and os.environ[
-            'PR_CHANGED_FILES'] != '':
+            'PR_CHANGED_FILES'].strip() != '':
        logger.info('Getting PR modified files.')
        # get modify file from environment
        diff_files = os.environ['PR_CHANGED_FILES'].replace('#', '\n')
    else:
+        logger.info('Getting diff of branch.')
        cmd = ['git', 'diff', '--name-only', 'origin/master...']
        diff_files = run_command_get_output(cmd)
    logger.info('Diff files: ')
--- a/tests/trainers/test_efficient_diffusion_tuning_trainer_swift.py
+++ b/tests/trainers/test_efficient_diffusion_tuning_trainer_swift.py
@@ -33,7 +33,7 @@ class TestEfficientDiffusionTuningTrainerSwift(unittest.TestCase):
        shutil.rmtree(self.tmp_dir)
        super().tearDown()

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_efficient_diffusion_tuning_swift_lora_train(self):
        model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-lora'
        model_revision = 'v1.0.2'
@@ -62,7 +62,7 @@ class TestEfficientDiffusionTuningTrainerSwift(unittest.TestCase):
        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
        self.assertIn(f'epoch_{self.max_epochs}.pth', results_files)

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_efficient_diffusion_tuning_swift_adapter_train(self):
        model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-adapter'
        model_revision = 'v1.0.2'
@@ -91,7 +91,7 @@ class TestEfficientDiffusionTuningTrainerSwift(unittest.TestCase):
        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
        self.assertIn(f'epoch_{self.max_epochs}.pth', results_files)

-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_efficient_diffusion_tuning_swift_prompt_train(self):
        model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-prompt'
        model_revision = 'v1.0.2'