Merge branch master-merge-github0901 into master

Title: Merge branch 'master-github' into master-merge-github0901 
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/13874155
This commit is contained in:
yuze.zyz
2023-09-01 11:13:19 +08:00
10 changed files with 45 additions and 37 deletions

View File

@@ -10,7 +10,8 @@ import torch
import torch.nn as nn
import torch.nn.functional as F
from diffusers.configuration_utils import ConfigMixin, register_to_config
from diffusers.models.cross_attention import CrossAttention, LoRALinearLayer
from diffusers.models.attention_processor import Attention
from diffusers.models.lora import LoRALinearLayer
from diffusers.models.modeling_utils import ModelMixin
from diffusers.models.resnet import (Downsample2D, Upsample2D, downsample_2d,
partial, upsample_2d)
@@ -467,7 +468,7 @@ class ControlLoRACrossAttnProcessor(LoRACrossAttnProcessor):
return control_states
def __call__(self,
attn: CrossAttention,
attn: Attention,
hidden_states,
encoder_hidden_states=None,
attention_mask=None,
@@ -619,7 +620,7 @@ class ControlLoRACrossAttnProcessorV2(LoRACrossAttnProcessor):
return control_states
def __call__(self,
attn: CrossAttention,
attn: Attention,
hidden_states,
encoder_hidden_states=None,
attention_mask=None,

View File

@@ -11,7 +11,7 @@ import torch.nn.functional as F
from diffusers import (AutoencoderKL, DDPMScheduler, DiffusionPipeline,
DPMSolverMultistepScheduler, UNet2DConditionModel,
utils)
from diffusers.models import cross_attention
from diffusers.models import attention
from diffusers.utils import deprecation_utils
from swift import AdapterConfig, LoRAConfig, PromptConfig, Swift
from transformers import CLIPTextModel, CLIPTokenizer
@@ -30,7 +30,7 @@ from .control_sd_lora import ControlLoRATuner
utils.deprecate = lambda *arg, **kwargs: None
deprecation_utils.deprecate = lambda *arg, **kwargs: None
cross_attention.deprecate = lambda *arg, **kwargs: None
attention.deprecate = lambda *arg, **kwargs: None
__tuner_MAP__ = {'lora': LoRATuner, 'control_lora': ControlLoRATuner}
@@ -113,12 +113,10 @@ class EfficientStableDiffusion(TorchModel):
rank = tuner_config[
'rank'] if tuner_config and 'rank' in tuner_config else 4
lora_config = LoRAConfig(
rank=rank,
replace_modules=['to_q', 'to_k', 'to_v', 'to_out.0'],
r=rank,
target_modules=['to_q', 'to_k', 'to_v', 'to_out.0'],
merge_weights=False,
only_lora_trainable=False,
use_merged_linear=False,
pretrained_weights=pretrained_tuner)
use_merged_linear=False)
self.unet = Swift.prepare_model(self.unet, lora_config)
elif tuner_name == 'swift-adapter':
adapter_length = tuner_config[
@@ -126,10 +124,8 @@ class EfficientStableDiffusion(TorchModel):
adapter_config = AdapterConfig(
dim=-1,
hidden_pos=0,
module_name=r'.*ff\.net\.2$',
adapter_length=adapter_length,
only_adapter_trainable=False,
pretrained_weights=pretrained_tuner)
target_modules=r'.*ff\.net\.2$',
adapter_length=adapter_length)
self.unet = Swift.prepare_model(self.unet, adapter_config)
elif tuner_name == 'swift-prompt':
prompt_length = tuner_config[
@@ -139,14 +135,11 @@ class EfficientStableDiffusion(TorchModel):
320, 320, 640, 640, 1280, 1280, 1280, 1280, 1280, 640, 640,
640, 320, 320, 320
],
module_layer_name=
target_modules=
r'.*[down_blocks|up_blocks|mid_block]\.\d+\.attentions\.\d+\.transformer_blocks\.\d+$',
embedding_pos=0,
prompt_length=prompt_length,
only_prompt_trainable=False,
attach_front=False,
pretrained_weights=pretrained_tuner,
extract_embedding=True)
attach_front=False)
self.unet = Swift.prepare_model(self.unet, prompt_config)
elif tuner_name in ('lora', 'control_lora'):
# if not set the config of control-tuner, we add the lora tuner directly to the original framework,

View File

@@ -8,7 +8,8 @@ from typing import List, Tuple, Union
import torch
import torch.nn as nn
from diffusers.configuration_utils import ConfigMixin, register_to_config
from diffusers.models.cross_attention import CrossAttention, LoRALinearLayer
from diffusers.models.attention_processor import Attention
from diffusers.models.lora import LoRALinearLayer
from diffusers.models.modeling_utils import ModelMixin
from diffusers.utils.outputs import BaseOutput
@@ -84,7 +85,7 @@ class LoRACrossAttnProcessor(nn.Module):
self.output_states_skipped = is_skipped
def __call__(self,
attn: CrossAttention,
attn: Attention,
hidden_states,
encoder_hidden_states=None,
attention_mask=None,

View File

@@ -112,7 +112,7 @@ class VideoToVideo(TorchModel):
generator.eval()
load_dict = torch.load(cfg.model_path, map_location='cpu')
ret = generator.load_state_dict(load_dict['state_dict'], strict=True)
self.generator = generator
self.generator = generator.half()
logger.info('Load model {} path {}, with local status {}'.format(
cfg.UNet.type, cfg.model_path, ret))
@@ -175,7 +175,7 @@ class VideoToVideo(TorchModel):
video_data = rearrange(video_data, 'b f c h w -> (b f) c h w')
video_data_list = torch.chunk(
video_data, video_data.shape[0] // 2, dim=0)
video_data, video_data.shape[0] // 1, dim=0)
with torch.no_grad():
decode_data = []
for vd_data in video_data_list:
@@ -185,6 +185,7 @@ class VideoToVideo(TorchModel):
video_data_feature = torch.cat(decode_data, dim=0)
video_data_feature = rearrange(
video_data_feature, '(b f) c h w -> b c f h w', b=batch_size)
torch.cuda.empty_cache()
with amp.autocast(enabled=True):
total_noise_levels = 600
@@ -209,6 +210,7 @@ class VideoToVideo(TorchModel):
t_min=0,
discretization='trailing')
torch.cuda.empty_cache()
scale_factor = 0.18215
vid_tensor_feature = 1. / scale_factor * gen_vid

View File

@@ -240,7 +240,14 @@ TASK_INPUTS = {
InputType.IMAGE,
Tasks.video_embedding:
InputType.VIDEO,
Tasks.virtual_try_on: (InputType.IMAGE, InputType.IMAGE, InputType.IMAGE),
Tasks.virtual_try_on: [
(InputType.IMAGE, InputType.IMAGE, InputType.IMAGE),
{
'masked_model': InputType.IMAGE,
'pose': InputType.IMAGE,
'cloth': InputType.IMAGE,
}
],
Tasks.text_driven_segmentation: {
InputKeys.IMAGE: InputType.IMAGE,
InputKeys.TEXT: InputType.TEXT

View File

@@ -448,9 +448,9 @@ class SeqGPTPipeline(Pipeline):
# define the forward pass
def forward(self, prompt: str, **forward_params) -> Dict[str, Any]:
# gen & decode
prompt += '[GEN]'
# prompt += '[GEN]'
input_ids = self.tokenizer(
prompt,
prompt + '[GEN]',
return_tensors='pt',
padding=True,
truncation=True,

View File

@@ -21,7 +21,7 @@ class TestExportStableDiffusion(unittest.TestCase):
os.makedirs(self.tmp_dir)
self.model_id = 'AI-ModelScope/stable-diffusion-v1-5'
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_export_stable_diffusion(self):
model = Model.from_pretrained(self.model_id)
Exporter.from_model(model).export_onnx(

View File

@@ -16,7 +16,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.efficient_diffusion_tuning
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_swift_lora_run_pipeline(self):
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-lora'
model_revision = 'v1.0.2'
@@ -33,7 +33,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
f'Efficient-diffusion-tuning-swift-lora output: {output_image_path}'
)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_swift_lora_load_model_from_pretrained(
self):
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-lora'
@@ -41,7 +41,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
model = Model.from_pretrained(model_id, model_revision=model_revision)
self.assertTrue(model.__class__ == EfficientStableDiffusion)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_swift_adapter_run_pipeline(self):
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-adapter'
model_revision = 'v1.0.2'
@@ -58,7 +58,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
f'Efficient-diffusion-tuning-swift-adapter output: {output_image_path}'
)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_swift_adapter_load_model_from_pretrained(
self):
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-adapter'
@@ -66,7 +66,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
model = Model.from_pretrained(model_id, model_revision=model_revision)
self.assertTrue(model.__class__ == EfficientStableDiffusion)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_swift_prompt_run_pipeline(self):
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-prompt'
model_revision = 'v1.0.2'
@@ -83,7 +83,7 @@ class EfficientDiffusionTuningTestSwift(unittest.TestCase):
f'Efficient-diffusion-tuning-swift-prompt output: {output_image_path}'
)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_swift_prompt_load_model_from_pretrained(
self):
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-prompt'

View File

@@ -20,7 +20,11 @@ class VirtualTryonTest(unittest.TestCase):
masked_model = Image.open('data/test/images/virtual_tryon_model.jpg')
pose = Image.open('data/test/images/virtual_tryon_pose.jpg')
cloth = Image.open('data/test/images/virtual_tryon_cloth.jpg')
input_imgs = (masked_model, pose, cloth)
input_imgs = {
'masked_model': masked_model,
'pose': pose,
'cloth': cloth,
}
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_run_with_model_name(self):

View File

@@ -33,7 +33,7 @@ class TestEfficientDiffusionTuningTrainerSwift(unittest.TestCase):
shutil.rmtree(self.tmp_dir)
super().tearDown()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_swift_lora_train(self):
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-lora'
model_revision = 'v1.0.2'
@@ -62,7 +62,7 @@ class TestEfficientDiffusionTuningTrainerSwift(unittest.TestCase):
self.assertIn(f'{trainer.timestamp}.log.json', results_files)
self.assertIn(f'epoch_{self.max_epochs}.pth', results_files)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_swift_adapter_train(self):
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-adapter'
model_revision = 'v1.0.2'
@@ -91,7 +91,7 @@ class TestEfficientDiffusionTuningTrainerSwift(unittest.TestCase):
self.assertIn(f'{trainer.timestamp}.log.json', results_files)
self.assertIn(f'epoch_{self.max_epochs}.pth', results_files)
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_efficient_diffusion_tuning_swift_prompt_train(self):
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-prompt'
model_revision = 'v1.0.2'