mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-25 20:49:37 +01:00
Merge branch master-merge-github0901 into master
Title: Merge branch 'master-github' into master-merge-github0901 Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/13874155
This commit is contained in:
@@ -10,7 +10,8 @@ import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from diffusers.configuration_utils import ConfigMixin, register_to_config
|
||||
from diffusers.models.cross_attention import CrossAttention, LoRALinearLayer
|
||||
from diffusers.models.attention_processor import Attention
|
||||
from diffusers.models.lora import LoRALinearLayer
|
||||
from diffusers.models.modeling_utils import ModelMixin
|
||||
from diffusers.models.resnet import (Downsample2D, Upsample2D, downsample_2d,
|
||||
partial, upsample_2d)
|
||||
@@ -467,7 +468,7 @@ class ControlLoRACrossAttnProcessor(LoRACrossAttnProcessor):
|
||||
return control_states
|
||||
|
||||
def __call__(self,
|
||||
attn: CrossAttention,
|
||||
attn: Attention,
|
||||
hidden_states,
|
||||
encoder_hidden_states=None,
|
||||
attention_mask=None,
|
||||
@@ -619,7 +620,7 @@ class ControlLoRACrossAttnProcessorV2(LoRACrossAttnProcessor):
|
||||
return control_states
|
||||
|
||||
def __call__(self,
|
||||
attn: CrossAttention,
|
||||
attn: Attention,
|
||||
hidden_states,
|
||||
encoder_hidden_states=None,
|
||||
attention_mask=None,
|
||||
|
||||
@@ -11,7 +11,7 @@ import torch.nn.functional as F
|
||||
from diffusers import (AutoencoderKL, DDPMScheduler, DiffusionPipeline,
|
||||
DPMSolverMultistepScheduler, UNet2DConditionModel,
|
||||
utils)
|
||||
from diffusers.models import cross_attention
|
||||
from diffusers.models import attention
|
||||
from diffusers.utils import deprecation_utils
|
||||
from swift import AdapterConfig, LoRAConfig, PromptConfig, Swift
|
||||
from transformers import CLIPTextModel, CLIPTokenizer
|
||||
@@ -30,7 +30,7 @@ from .control_sd_lora import ControlLoRATuner
|
||||
|
||||
utils.deprecate = lambda *arg, **kwargs: None
|
||||
deprecation_utils.deprecate = lambda *arg, **kwargs: None
|
||||
cross_attention.deprecate = lambda *arg, **kwargs: None
|
||||
attention.deprecate = lambda *arg, **kwargs: None
|
||||
|
||||
__tuner_MAP__ = {'lora': LoRATuner, 'control_lora': ControlLoRATuner}
|
||||
|
||||
@@ -113,12 +113,10 @@ class EfficientStableDiffusion(TorchModel):
|
||||
rank = tuner_config[
|
||||
'rank'] if tuner_config and 'rank' in tuner_config else 4
|
||||
lora_config = LoRAConfig(
|
||||
rank=rank,
|
||||
replace_modules=['to_q', 'to_k', 'to_v', 'to_out.0'],
|
||||
r=rank,
|
||||
target_modules=['to_q', 'to_k', 'to_v', 'to_out.0'],
|
||||
merge_weights=False,
|
||||
only_lora_trainable=False,
|
||||
use_merged_linear=False,
|
||||
pretrained_weights=pretrained_tuner)
|
||||
use_merged_linear=False)
|
||||
self.unet = Swift.prepare_model(self.unet, lora_config)
|
||||
elif tuner_name == 'swift-adapter':
|
||||
adapter_length = tuner_config[
|
||||
@@ -126,10 +124,8 @@ class EfficientStableDiffusion(TorchModel):
|
||||
adapter_config = AdapterConfig(
|
||||
dim=-1,
|
||||
hidden_pos=0,
|
||||
module_name=r'.*ff\.net\.2$',
|
||||
adapter_length=adapter_length,
|
||||
only_adapter_trainable=False,
|
||||
pretrained_weights=pretrained_tuner)
|
||||
target_modules=r'.*ff\.net\.2$',
|
||||
adapter_length=adapter_length)
|
||||
self.unet = Swift.prepare_model(self.unet, adapter_config)
|
||||
elif tuner_name == 'swift-prompt':
|
||||
prompt_length = tuner_config[
|
||||
@@ -139,14 +135,11 @@ class EfficientStableDiffusion(TorchModel):
|
||||
320, 320, 640, 640, 1280, 1280, 1280, 1280, 1280, 640, 640,
|
||||
640, 320, 320, 320
|
||||
],
|
||||
module_layer_name=
|
||||
target_modules=
|
||||
r'.*[down_blocks|up_blocks|mid_block]\.\d+\.attentions\.\d+\.transformer_blocks\.\d+$',
|
||||
embedding_pos=0,
|
||||
prompt_length=prompt_length,
|
||||
only_prompt_trainable=False,
|
||||
attach_front=False,
|
||||
pretrained_weights=pretrained_tuner,
|
||||
extract_embedding=True)
|
||||
attach_front=False)
|
||||
self.unet = Swift.prepare_model(self.unet, prompt_config)
|
||||
elif tuner_name in ('lora', 'control_lora'):
|
||||
# if not set the config of control-tuner, we add the lora tuner directly to the original framework,
|
||||
|
||||
@@ -8,7 +8,8 @@ from typing import List, Tuple, Union
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from diffusers.configuration_utils import ConfigMixin, register_to_config
|
||||
from diffusers.models.cross_attention import CrossAttention, LoRALinearLayer
|
||||
from diffusers.models.attention_processor import Attention
|
||||
from diffusers.models.lora import LoRALinearLayer
|
||||
from diffusers.models.modeling_utils import ModelMixin
|
||||
from diffusers.utils.outputs import BaseOutput
|
||||
|
||||
@@ -84,7 +85,7 @@ class LoRACrossAttnProcessor(nn.Module):
|
||||
self.output_states_skipped = is_skipped
|
||||
|
||||
def __call__(self,
|
||||
attn: CrossAttention,
|
||||
attn: Attention,
|
||||
hidden_states,
|
||||
encoder_hidden_states=None,
|
||||
attention_mask=None,
|
||||
|
||||
@@ -112,7 +112,7 @@ class VideoToVideo(TorchModel):
|
||||
generator.eval()
|
||||
load_dict = torch.load(cfg.model_path, map_location='cpu')
|
||||
ret = generator.load_state_dict(load_dict['state_dict'], strict=True)
|
||||
self.generator = generator
|
||||
self.generator = generator.half()
|
||||
logger.info('Load model {} path {}, with local status {}'.format(
|
||||
cfg.UNet.type, cfg.model_path, ret))
|
||||
|
||||
@@ -175,7 +175,7 @@ class VideoToVideo(TorchModel):
|
||||
video_data = rearrange(video_data, 'b f c h w -> (b f) c h w')
|
||||
|
||||
video_data_list = torch.chunk(
|
||||
video_data, video_data.shape[0] // 2, dim=0)
|
||||
video_data, video_data.shape[0] // 1, dim=0)
|
||||
with torch.no_grad():
|
||||
decode_data = []
|
||||
for vd_data in video_data_list:
|
||||
@@ -185,6 +185,7 @@ class VideoToVideo(TorchModel):
|
||||
video_data_feature = torch.cat(decode_data, dim=0)
|
||||
video_data_feature = rearrange(
|
||||
video_data_feature, '(b f) c h w -> b c f h w', b=batch_size)
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
with amp.autocast(enabled=True):
|
||||
total_noise_levels = 600
|
||||
@@ -209,6 +210,7 @@ class VideoToVideo(TorchModel):
|
||||
t_min=0,
|
||||
discretization='trailing')
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
scale_factor = 0.18215
|
||||
vid_tensor_feature = 1. / scale_factor * gen_vid
|
||||
|
||||
|
||||
@@ -240,7 +240,14 @@ TASK_INPUTS = {
|
||||
InputType.IMAGE,
|
||||
Tasks.video_embedding:
|
||||
InputType.VIDEO,
|
||||
Tasks.virtual_try_on: (InputType.IMAGE, InputType.IMAGE, InputType.IMAGE),
|
||||
Tasks.virtual_try_on: [
|
||||
(InputType.IMAGE, InputType.IMAGE, InputType.IMAGE),
|
||||
{
|
||||
'masked_model': InputType.IMAGE,
|
||||
'pose': InputType.IMAGE,
|
||||
'cloth': InputType.IMAGE,
|
||||
}
|
||||
],
|
||||
Tasks.text_driven_segmentation: {
|
||||
InputKeys.IMAGE: InputType.IMAGE,
|
||||
InputKeys.TEXT: InputType.TEXT
|
||||
|
||||
@@ -448,9 +448,9 @@ class SeqGPTPipeline(Pipeline):
|
||||
# define the forward pass
|
||||
def forward(self, prompt: str, **forward_params) -> Dict[str, Any]:
|
||||
# gen & decode
|
||||
prompt += '[GEN]'
|
||||
# prompt += '[GEN]'
|
||||
input_ids = self.tokenizer(
|
||||
prompt,
|
||||
prompt + '[GEN]',
|
||||
return_tensors='pt',
|
||||
padding=True,
|
||||
truncation=True,
|
||||
|
||||
@@ -21,7 +21,7 @@ class TestExportStableDiffusion(unittest.TestCase):
|
||||
os.makedirs(self.tmp_dir)
|
||||
self.model_id = 'AI-ModelScope/stable-diffusion-v1-5'
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_export_stable_diffusion(self):
|
||||
model = Model.from_pretrained(self.model_id)
|
||||
Exporter.from_model(model).export_onnx(
|
||||
|
||||
@@ -16,7 +16,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
self.task = Tasks.efficient_diffusion_tuning
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_efficient_diffusion_tuning_swift_lora_run_pipeline(self):
|
||||
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-lora'
|
||||
model_revision = 'v1.0.2'
|
||||
@@ -33,7 +33,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
|
||||
f'Efficient-diffusion-tuning-swift-lora output: {output_image_path}'
|
||||
)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_efficient_diffusion_tuning_swift_lora_load_model_from_pretrained(
|
||||
self):
|
||||
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-lora'
|
||||
@@ -41,7 +41,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
|
||||
model = Model.from_pretrained(model_id, model_revision=model_revision)
|
||||
self.assertTrue(model.__class__ == EfficientStableDiffusion)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_efficient_diffusion_tuning_swift_adapter_run_pipeline(self):
|
||||
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-adapter'
|
||||
model_revision = 'v1.0.2'
|
||||
@@ -58,7 +58,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
|
||||
f'Efficient-diffusion-tuning-swift-adapter output: {output_image_path}'
|
||||
)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_efficient_diffusion_tuning_swift_adapter_load_model_from_pretrained(
|
||||
self):
|
||||
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-adapter'
|
||||
@@ -66,7 +66,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
|
||||
model = Model.from_pretrained(model_id, model_revision=model_revision)
|
||||
self.assertTrue(model.__class__ == EfficientStableDiffusion)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_efficient_diffusion_tuning_swift_prompt_run_pipeline(self):
|
||||
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-prompt'
|
||||
model_revision = 'v1.0.2'
|
||||
@@ -83,7 +83,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
|
||||
f'Efficient-diffusion-tuning-swift-prompt output: {output_image_path}'
|
||||
)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_efficient_diffusion_tuning_swift_prompt_load_model_from_pretrained(
|
||||
self):
|
||||
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-prompt'
|
||||
|
||||
@@ -20,7 +20,11 @@ class VirtualTryonTest(unittest.TestCase):
|
||||
masked_model = Image.open('data/test/images/virtual_tryon_model.jpg')
|
||||
pose = Image.open('data/test/images/virtual_tryon_pose.jpg')
|
||||
cloth = Image.open('data/test/images/virtual_tryon_cloth.jpg')
|
||||
input_imgs = (masked_model, pose, cloth)
|
||||
input_imgs = {
|
||||
'masked_model': masked_model,
|
||||
'pose': pose,
|
||||
'cloth': cloth,
|
||||
}
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_run_with_model_name(self):
|
||||
|
||||
@@ -33,7 +33,7 @@ class TestEfficientDiffusionTuningTrainerSwift(unittest.TestCase):
|
||||
shutil.rmtree(self.tmp_dir)
|
||||
super().tearDown()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_efficient_diffusion_tuning_swift_lora_train(self):
|
||||
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-lora'
|
||||
model_revision = 'v1.0.2'
|
||||
@@ -62,7 +62,7 @@ class TestEfficientDiffusionTuningTrainerSwift(unittest.TestCase):
|
||||
self.assertIn(f'{trainer.timestamp}.log.json', results_files)
|
||||
self.assertIn(f'epoch_{self.max_epochs}.pth', results_files)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_efficient_diffusion_tuning_swift_adapter_train(self):
|
||||
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-adapter'
|
||||
model_revision = 'v1.0.2'
|
||||
@@ -91,7 +91,7 @@ class TestEfficientDiffusionTuningTrainerSwift(unittest.TestCase):
|
||||
self.assertIn(f'{trainer.timestamp}.log.json', results_files)
|
||||
self.assertIn(f'epoch_{self.max_epochs}.pth', results_files)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_efficient_diffusion_tuning_swift_prompt_train(self):
|
||||
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-prompt'
|
||||
model_revision = 'v1.0.2'
|
||||
|
||||
Reference in New Issue
Block a user