From 1904a01117824d7d294227059f6b684972c5a8b6 Mon Sep 17 00:00:00 2001
From: Yuwei Guo <guoyuwei@pjlab.org.cn>
Date: Sun, 10 Sep 2023 21:27:27 +0800
Subject: [PATCH] v2 inference

---
 .../{inference.yaml => inference-v1.yaml}     |  0
 configs/inference/inference-v2.yaml           | 27 +++++++++++++++++++
 configs/prompts/v2/5-RealisticVision.yaml     | 23 ++++++++++++++++
 scripts/animate.py                            |  6 ++---
 4 files changed, 53 insertions(+), 3 deletions(-)
 rename configs/inference/{inference.yaml => inference-v1.yaml} (100%)
 create mode 100644 configs/inference/inference-v2.yaml
 create mode 100644 configs/prompts/v2/5-RealisticVision.yaml

diff --git a/configs/inference/inference.yaml b/configs/inference/inference-v1.yaml
similarity index 100%
rename from configs/inference/inference.yaml
rename to configs/inference/inference-v1.yaml
diff --git a/configs/inference/inference-v2.yaml b/configs/inference/inference-v2.yaml
new file mode 100644
index 0000000..a33bc12
--- /dev/null
+++ b/configs/inference/inference-v2.yaml
@@ -0,0 +1,27 @@
+unet_additional_kwargs:
+  use_inflated_groupnorm: true
+  unet_use_cross_frame_attention: false
+  unet_use_temporal_attention: false
+  use_motion_module: true
+  motion_module_resolutions:
+  - 1
+  - 2
+  - 4
+  - 8
+  motion_module_mid_block: true
+  motion_module_decoder_only: false
+  motion_module_type: Vanilla
+  motion_module_kwargs:
+    num_attention_heads: 8
+    num_transformer_block: 1
+    attention_block_types:
+    - Temporal_Self
+    - Temporal_Self
+    temporal_position_encoding: true
+    temporal_position_encoding_max_len: 32
+    temporal_attention_dim_div: 1
+
+noise_scheduler_kwargs:
+  beta_start: 0.00085
+  beta_end: 0.012
+  beta_schedule: "linear"
diff --git a/configs/prompts/v2/5-RealisticVision.yaml b/configs/prompts/v2/5-RealisticVision.yaml
new file mode 100644
index 0000000..7770b19
--- /dev/null
+++ b/configs/prompts/v2/5-RealisticVision.yaml
@@ -0,0 +1,23 @@
+RealisticVision:
+  base: ""
+  path: "models/DreamBooth_LoRA/realisticVisionV20_v20.safetensors"
+
+  inference_config: "configs/inference/inference-v2.yaml"
+  motion_module:
+    - "models/Motion_Module/mm_sd_v15_v2.ckpt"
+
+  seed:           [13100322578370451493, 14752961627088720670, 9329399085567825781, 16987697414827649302]
+  steps:          25
+  guidance_scale: 7.5
+
+  prompt:
+    - "b&w photo of 42 y.o man in black clothes, bald, face, half body, body, high detailed skin, skin pores, coastline, overcast weather, wind, waves, 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3"
+    - "close up photo of a rabbit, forest, haze, halation, bloom, dramatic atmosphere, centred, rule of thirds, 200mm 1.4f macro shot"
+    - "photo of coastline, rocks, storm weather, wind, waves, lightning, 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3"
+    - "night, b&w photo of old house, post apocalypse, forest, storm weather, wind, rocks, 8k uhd, dslr, soft lighting, high quality, film grain"
+
+  n_prompt:
+    - "semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
+    - "semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
+    - "blur, haze, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers, deformed, distorted, disfigured, poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation"
+    - "blur, haze, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, art, mutated hands and fingers, deformed, distorted, disfigured, poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation"
diff --git a/scripts/animate.py b/scripts/animate.py
index 8bb5dd7..4bcadc0 100644
--- a/scripts/animate.py
+++ b/scripts/animate.py
@@ -34,7 +34,6 @@ def main(args):
     time_str = datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
     savedir = f"samples/{Path(args.config).stem}-{time_str}"
     os.makedirs(savedir)
-    inference_config = OmegaConf.load(args.inference_config)
 
     config  = OmegaConf.load(args.config)
     samples = []
@@ -45,7 +44,8 @@ def main(args):
         motion_modules = model_config.motion_module
         motion_modules = [motion_modules] if isinstance(motion_modules, str) else list(motion_modules)
         for motion_module in motion_modules:
-        
+            inference_config = OmegaConf.load(model_config.get("inference_config", args.inference_config))
+
             ### >>> create validation pipeline >>> ###
             tokenizer    = CLIPTokenizer.from_pretrained(args.pretrained_model_path, subfolder="tokenizer")
             text_encoder = CLIPTextModel.from_pretrained(args.pretrained_model_path, subfolder="text_encoder")
@@ -148,7 +148,7 @@ def main(args):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--pretrained_model_path", type=str, default="models/StableDiffusion/stable-diffusion-v1-5",)
-    parser.add_argument("--inference_config",      type=str, default="configs/inference/inference.yaml")    
+    parser.add_argument("--inference_config",      type=str, default="configs/inference/inference-v1.yaml")    
     parser.add_argument("--config",                type=str, required=True)
     
     parser.add_argument("--L", type=int, default=16 )