From 1904a01117824d7d294227059f6b684972c5a8b6 Mon Sep 17 00:00:00 2001 From: Yuwei Guo Date: Sun, 10 Sep 2023 21:27:27 +0800 Subject: [PATCH] v2 inference --- .../{inference.yaml => inference-v1.yaml} | 0 configs/inference/inference-v2.yaml | 27 +++++++++++++++++++ configs/prompts/v2/5-RealisticVision.yaml | 23 ++++++++++++++++ scripts/animate.py | 6 ++--- 4 files changed, 53 insertions(+), 3 deletions(-) rename configs/inference/{inference.yaml => inference-v1.yaml} (100%) create mode 100644 configs/inference/inference-v2.yaml create mode 100644 configs/prompts/v2/5-RealisticVision.yaml diff --git a/configs/inference/inference.yaml b/configs/inference/inference-v1.yaml similarity index 100% rename from configs/inference/inference.yaml rename to configs/inference/inference-v1.yaml diff --git a/configs/inference/inference-v2.yaml b/configs/inference/inference-v2.yaml new file mode 100644 index 0000000..a33bc12 --- /dev/null +++ b/configs/inference/inference-v2.yaml @@ -0,0 +1,27 @@ +unet_additional_kwargs: + use_inflated_groupnorm: true + unet_use_cross_frame_attention: false + unet_use_temporal_attention: false + use_motion_module: true + motion_module_resolutions: + - 1 + - 2 + - 4 + - 8 + motion_module_mid_block: true + motion_module_decoder_only: false + motion_module_type: Vanilla + motion_module_kwargs: + num_attention_heads: 8 + num_transformer_block: 1 + attention_block_types: + - Temporal_Self + - Temporal_Self + temporal_position_encoding: true + temporal_position_encoding_max_len: 32 + temporal_attention_dim_div: 1 + +noise_scheduler_kwargs: + beta_start: 0.00085 + beta_end: 0.012 + beta_schedule: "linear" diff --git a/configs/prompts/v2/5-RealisticVision.yaml b/configs/prompts/v2/5-RealisticVision.yaml new file mode 100644 index 0000000..7770b19 --- /dev/null +++ b/configs/prompts/v2/5-RealisticVision.yaml @@ -0,0 +1,23 @@ +RealisticVision: + base: "" + path: "models/DreamBooth_LoRA/realisticVisionV20_v20.safetensors" + + inference_config: "configs/inference/inference-v2.yaml" + motion_module: + - "models/Motion_Module/mm_sd_v15_v2.ckpt" + + seed: [13100322578370451493, 14752961627088720670, 9329399085567825781, 16987697414827649302] + steps: 25 + guidance_scale: 7.5 + + prompt: + - "b&w photo of 42 y.o man in black clothes, bald, face, half body, body, high detailed skin, skin pores, coastline, overcast weather, wind, waves, 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3" + - "close up photo of a rabbit, forest, haze, halation, bloom, dramatic atmosphere, centred, rule of thirds, 200mm 1.4f macro shot" + - "photo of coastline, rocks, storm weather, wind, waves, lightning, 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3" + - "night, b&w photo of old house, post apocalypse, forest, storm weather, wind, rocks, 8k uhd, dslr, soft lighting, high quality, film grain" + + n_prompt: + - "semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck" + - "semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck" + - "blur, haze, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers, deformed, distorted, disfigured, poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation" + - "blur, haze, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, art, mutated hands and fingers, deformed, distorted, disfigured, poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation" diff --git a/scripts/animate.py b/scripts/animate.py index 8bb5dd7..4bcadc0 100644 --- a/scripts/animate.py +++ b/scripts/animate.py @@ -34,7 +34,6 @@ def main(args): time_str = datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S") savedir = f"samples/{Path(args.config).stem}-{time_str}" os.makedirs(savedir) - inference_config = OmegaConf.load(args.inference_config) config = OmegaConf.load(args.config) samples = [] @@ -45,7 +44,8 @@ def main(args): motion_modules = model_config.motion_module motion_modules = [motion_modules] if isinstance(motion_modules, str) else list(motion_modules) for motion_module in motion_modules: - + inference_config = OmegaConf.load(model_config.get("inference_config", args.inference_config)) + ### >>> create validation pipeline >>> ### tokenizer = CLIPTokenizer.from_pretrained(args.pretrained_model_path, subfolder="tokenizer") text_encoder = CLIPTextModel.from_pretrained(args.pretrained_model_path, subfolder="text_encoder") @@ -148,7 +148,7 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--pretrained_model_path", type=str, default="models/StableDiffusion/stable-diffusion-v1-5",) - parser.add_argument("--inference_config", type=str, default="configs/inference/inference.yaml") + parser.add_argument("--inference_config", type=str, default="configs/inference/inference-v1.yaml") parser.add_argument("--config", type=str, required=True) parser.add_argument("--L", type=int, default=16 )