Add files via upload

SD推理最佳实践，包括秒级生图，lora，controlnet
2026-07-10 04:22:33 +02:00 · 2024-02-29 17:33:03 +08:00
parent 021735207f
commit f32bf60f27
1 changed files with 358 additions and 0 deletions
--- a/examples/pytorch/stable_diffusion/SD推理最佳实践.ipynb
+++ b/examples/pytorch/stable_diffusion/SD推理最佳实践.ipynb
@@ -0,0 +1,358 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "89373920-4a59-473e-8b7d-7f30570637c7",
+   "metadata": {},
+   "source": [
+    "Stable diffusion模型推理方法1：SDXL模型，魔搭社区Pipeline已经集成SDXL模型，可以直接使用"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "641a04c4-ee0b-4cef-93e2-bca0269e7486",
+   "metadata": {
+    "ExecutionIndicator": {
+     "show": true
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from modelscope.utils.constant import Tasks\n",
+    "from modelscope.pipelines import pipeline\n",
+    "import cv2\n",
+    "\n",
+    "pipe = pipeline(task=Tasks.text_to_image_synthesis, \n",
+    "                model='AI-ModelScope/stable-diffusion-xl-base-1.0',\n",
+    "                use_safetensors=True,\n",
+    "                model_revision='v1.0.0')\n",
+    "\n",
+    "prompt = \"Beautiful and cute girl, 16 years old, denim jacket, gradient background, soft colors, soft lighting, cinematic edge lighting, light and dark contrast, anime, art station Seraflur, blind box, super detail, 8k\"\n",
+    "output = pipe({'text': prompt})\n",
+    "cv2.imwrite('SDXL.png', output['output_imgs'][0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c5740ed4-2c6a-4b0b-8bb7-6ef466d2a08f",
+   "metadata": {},
+   "source": [
+    "秒级推理方法1：SDXL-turbo模型是SDXL 1.0的蒸馏版本，SDXL-Turbo基于一种称之为对抗扩散蒸馏（ADD）的新颖的训练方法，这种方法在扩散模型采样可以减少到1到4步，而生成高质量图像。ADD的训练方式使用得分蒸馏，利用大规模扩散模型作为教师模型，并将其与对抗性损失相结合，即使在1-2步的采样步骤的低步骤状态下，使用对抗学习的方式，引入discriminator来辅助生成质量的把控，也可以确保高质量图像的保真度。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bef68ad6-1fc9-4fff-850e-9bd4cc3ef756",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from diffusers import AutoPipelineForText2Image\n",
+    "import torch\n",
+    "from modelscope import snapshot_download\n",
+    "\n",
+    "model_dir = snapshot_download(\"AI-ModelScope/sdxl-turbo\")\n",
+    "\n",
+    "pipe = AutoPipelineForText2Image.from_pretrained(model_dir, torch_dtype=torch.float16, variant=\"fp16\")\n",
+    "pipe.to(\"cuda\")\n",
+    "\n",
+    "prompt = \"Beautiful and cute girl, 16 years old, denim jacket, gradient background, soft colors, soft lighting, cinematic edge lighting, light and dark contrast, anime, art station Seraflur, blind box, super detail, 8k\"\n",
+    "\n",
+    "image = pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0.0).images[0]\n",
+    "image.save(\"SDXLturbo.png\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bf25d186-317e-4e53-bed5-c801b336b3ff",
+   "metadata": {},
+   "source": [
+    "秒级推理方法2：SDXL+LCM，潜在一致性模型（LCM）受一致性模型（CM）启发，在预训练的LDM上以较少的步骤进行快速推理。LCM-SD系列是在Stable Diffusion的基础上新增Consistency 约束蒸馏的结果，仅通过2-8步的推理即可实现高质量的文本到图片的生成性能。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "58e1b7b6-f2d1-4a04-9a31-108f567b5c64",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from diffusers import UNet2DConditionModel, DiffusionPipeline, LCMScheduler\n",
+    "import torch\n",
+    "from modelscope import snapshot_download\n",
+    "\n",
+    "model_dir_lcm = snapshot_download(\"AI-ModelScope/lcm-sdxl\",revision = \"master\")\n",
+    "model_dir_sdxl = snapshot_download(\"AI-ModelScope/stable-diffusion-xl-base-1.0\",revision = \"v1.0.9\")\n",
+    "\n",
+    "unet = UNet2DConditionModel.from_pretrained(model_dir_lcm, torch_dtype=torch.float16, variant=\"fp16\")\n",
+    "pipe = DiffusionPipeline.from_pretrained(model_dir_sdxl, unet=unet, torch_dtype=torch.float16, variant=\"fp16\")\n",
+    "\n",
+    "pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)\n",
+    "pipe.to(\"cuda\")\n",
+    "\n",
+    "prompt = \"Beautiful and cute girl, 16 years old, denim jacket, gradient background, soft colors, soft lighting, cinematic edge lighting, light and dark contrast, anime, art station Seraflur, blind box, super detail, 8k\"\n",
+    "image = pipe(prompt, num_inference_steps=4, guidance_scale=8.0).images[0]\n",
+    "image.save(\"SDXLLCM.png\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ec6a4dda-2d8c-4fb5-bcbd-468462d9e3c6",
+   "metadata": {},
+   "source": [
+    "秒级推理方法3：stable-cascade模型基于Würstchen架构构建，与稳定扩散等其他模型的主要区别在于它在更小的潜在空间中工作。潜在空间越小，推理速度就越快，训练成本也就越低。潜在空间有多小？稳定扩散使用压缩因子 8，从而将 1024x1024 图像编码为 128x128。Stable Cascade 的压缩系数为 42，这意味着可以将 1024x1024 图像编码为 24x24，同时保持清晰的重建。然后在高度压缩的潜在空间中训练文本条件模型。与稳定扩散 1.5 相比，该架构的先前版本实现了 16 倍的成本降低。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4155f18d-0504-42e6-b785-02ed4a519c1f",
+   "metadata": {
+    "ExecutionIndicator": {
+     "show": true
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "from modelscope import snapshot_download\n",
+    "from diffusers import StableCascadeDecoderPipeline, StableCascadePriorPipeline\n",
+    "\n",
+    "device = \"cuda\"\n",
+    "num_images_per_prompt = 1\n",
+    "\n",
+    "stable_cascade_prior = snapshot_download(\"AI-ModelScope/stable-cascade-prior\")\n",
+    "stable_cascade = snapshot_download(\"AI-ModelScope/stable-cascade\")\n",
+    "\n",
+    "prior = StableCascadePriorPipeline.from_pretrained(stable_cascade_prior, torch_dtype=torch.bfloat16).to(device)\n",
+    "decoder = StableCascadeDecoderPipeline.from_pretrained(stable_cascade,  torch_dtype=torch.float16).to(device)\n",
+    "\n",
+    "prompt = \"Beautiful and cute girl, 16 years old, denim jacket, gradient background, soft colors, soft lighting, cinematic edge lighting, light and dark contrast, anime, art station Seraflur, blind box, super detail, 8k\"\n",
+    "negative_prompt = \"\"\n",
+    "\n",
+    "prior_output = prior(\n",
+    "    prompt=prompt,\n",
+    "    height=1024,\n",
+    "    width=1024,\n",
+    "    negative_prompt=negative_prompt,\n",
+    "    guidance_scale=4.0,\n",
+    "    num_images_per_prompt=num_images_per_prompt,\n",
+    "    num_inference_steps=20\n",
+    ")\n",
+    "decoder_output = decoder(\n",
+    "    image_embeddings=prior_output.image_embeddings.half(),\n",
+    "    prompt=prompt,\n",
+    "    negative_prompt=negative_prompt,\n",
+    "    guidance_scale=0.0,\n",
+    "    output_type=\"pil\",\n",
+    "    num_inference_steps=10\n",
+    ").images\n",
+    "\n",
+    "for i, img in enumerate(decoder_output):\n",
+    "    img.save(f\"stablecascade_{i+1}.png\")\n",
+    "#Now decoder_output is a list with your PIL images"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c402e461-2245-4e38-839b-6a5992c03b00",
+   "metadata": {},
+   "source": [
+    "秒级推理方法4："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f42531c8-c428-4ae7-aef1-b56050bffc71",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler\n",
+    "from modelscope.hub.file_download import model_file_download\n",
+    "from modelscope import snapshot_download\n",
+    "from safetensors.torch import load_file\n",
+    "\n",
+    "base = snapshot_download(\"AI-ModelScope/stable-diffusion-xl-base-1.0\")\n",
+    "repo = \"AI-ModelScope/SDXL-Lightning\"\n",
+    "ckpt = \"sdxl_lightning_4step_unet.safetensors\" # Use the correct ckpt for your step setting!\n",
+    "\n",
+    "# Load model.\n",
+    "unet = UNet2DConditionModel.from_config(base, subfolder=\"unet\").to(\"cuda\", torch.float16)\n",
+    "unet.load_state_dict(load_file(model_file_download(repo, ckpt), device=\"cuda\"))\n",
+    "pipe = StableDiffusionXLPipeline.from_pretrained(base, unet=unet, torch_dtype=torch.float16, variant=\"fp16\").to(\"cuda\")\n",
+    "\n",
+    "# Ensure sampler uses \"trailing\" timesteps.\n",
+    "pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing=\"trailing\")\n",
+    "\n",
+    "# Ensure using the same inference steps as the loaded model and CFG set to 0.\n",
+    "pipe(\"A girl smiling\", num_inference_steps=4, guidance_scale=0).images[0].save(\"sdxllightning.png\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "adbedb78-90fb-4509-a3a6-6262d0d51bcf",
+   "metadata": {},
+   "source": [
+    "微调lora叠加推理"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c418dc94-6c35-4ac2-8807-e796d5488525",
+   "metadata": {
+    "ExecutionIndicator": {
+     "show": true
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from diffusers import AutoPipelineForText2Image\n",
+    "from modelscope import snapshot_download\n",
+    "import torch\n",
+    "\n",
+    "model_dir=snapshot_download(\"YorickHe/majicmixRealistic_v6\")\n",
+    "lora_dir = snapshot_download(\"PaperCloud/zju19_dunhuang_style_lora\")\n",
+    "\n",
+    "pipeline = AutoPipelineForText2Image.from_pretrained(f\"{model_dir}/v7\", torch_dtype=torch.float16).to(\"cuda\")\n",
+    "pipeline.load_lora_weights(lora_dir, weight_name=\"dunhuang.safetensors\")\n",
+    "prompt = \"1 girl, close-up, waist shot, black long hair, clean face, dunhuang, Chinese ancient style, clean skin, organza_lace, Dunhuang wind, Art deco, Necklace, jewelry, Bracelet, Earrings, dunhuang_style, see-through_dress, Expressionism, looking towards the camera, upper_body, raw photo, masterpiece, solo, medium shot, high detail face, photorealistic, best quality\"\n",
+    "#Negative Prompt = \"\"\"(nsfw:2), paintings, sketches, (worst quality:2), (low quality:2), lowers, normal quality, ((monochrome)), ((grayscale)), logo, word, character, bad hand, tattoo, (username, watermark, signature, time signature, timestamp, artist name, copyright name, copyright),low res, ((monochrome)), ((grayscale)), skin spots, acnes, skin blemishes, age spot, glans, extra fingers, fewer fingers, strange fingers, bad hand, mole, ((extra legs)), ((extra hands))\"\"\"\n",
+    "image = pipeline(prompt).images[0]\n",
+    "image.save(\"sdlora.png\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6c36c14f-9481-48f1-a6ef-617d7551b63d",
+   "metadata": {},
+   "source": [
+    "SD+controlnet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "1f1c616d-0d45-4a8d-8140-0b6b352920b9",
+   "metadata": {
+    "ExecutionIndicator": {
+     "show": true
+    },
+    "execution": {
+     "iopub.execute_input": "2024-02-28T00:22:32.730370Z",
+     "iopub.status.busy": "2024-02-28T00:22:32.729999Z",
+     "iopub.status.idle": "2024-02-28T00:23:48.650291Z",
+     "shell.execute_reply": "2024-02-28T00:23:48.649123Z",
+     "shell.execute_reply.started": "2024-02-28T00:22:32.730354Z"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n",
+      "2024-02-28 08:22:35.104069: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
+      "2024-02-28 08:22:35.132215: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2024-02-28 08:22:35.174367: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
+      "2024-02-28 08:22:35.174385: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
+      "2024-02-28 08:22:35.174411: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
+      "2024-02-28 08:22:35.182970: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2024-02-28 08:22:35.183413: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2024-02-28 08:22:36.189620: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
+      "2024-02-28 08:22:39,294 - modelscope - INFO - PyTorch version 2.1.2+cu121 Found.\n",
+      "2024-02-28 08:22:39,296 - modelscope - INFO - TensorFlow version 2.14.0 Found.\n",
+      "2024-02-28 08:22:39,296 - modelscope - INFO - Loading ast index from /mnt/workspace/.cache/modelscope/ast_indexer\n",
+      "2024-02-28 08:22:39,341 - modelscope - INFO - Loading done! Current index file version is 1.12.0, with md5 509123dba36c5e70a95f6780df348471 and a total number of 964 components indexed\n",
+      "2024-02-28 08:22:39,713 - modelscope - WARNING - Model revision not specified, use revision: v1.0.9\n",
+      "Loading pipeline components...: 100%|██████████| 7/7 [00:36<00:00,  5.19s/it]\n",
+      "100%|██████████| 50/50 [00:15<00:00,  3.24it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL\n",
+    "from diffusers.utils import load_image, make_image_grid\n",
+    "from PIL import Image\n",
+    "from modelscope import snapshot_download\n",
+    "import cv2\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "\n",
+    "\n",
+    "model_dir = snapshot_download(\"AI-ModelScope/stable-diffusion-xl-base-1.0\")\n",
+    "controlnet_dir = snapshot_download(\"AI-ModelScope/controlnet-canny-sdxl-1.0\")\n",
+    "VAE_dir = snapshot_download(\"AI-ModelScope/sdxl-vae-fp16-fix\")\n",
+    "original_image = load_image(\n",
+    "    \"/mnt/workspace/canny.jpg\"\n",
+    ")\n",
+    "\n",
+    "prompt = \"sea turtle, hard lighting\"\n",
+    "negative_prompt = 'low quality, bad quality, sketches'\n",
+    "\n",
+    "image = load_image(\"/mnt/workspace/canny.jpg\")\n",
+    "\n",
+    "controlnet_conditioning_scale = 0.5  # recommended for good generalization\n",
+    "\n",
+    "controlnet = ControlNetModel.from_pretrained(\n",
+    "    controlnet_dir,\n",
+    "    torch_dtype=torch.float16\n",
+    ")\n",
+    "vae = AutoencoderKL.from_pretrained(VAE_dir, torch_dtype=torch.float16)\n",
+    "pipe = StableDiffusionXLControlNetPipeline.from_pretrained(\n",
+    "    model_dir,\n",
+    "    controlnet=controlnet,\n",
+    "    vae=vae,\n",
+    "    torch_dtype=torch.float16,\n",
+    ")\n",
+    "pipe.enable_model_cpu_offload()\n",
+    "\n",
+    "image = np.array(image)\n",
+    "image = cv2.Canny(image, 100, 200)\n",
+    "image = image[:, :, None]\n",
+    "image = np.concatenate([image, image, image], axis=2)\n",
+    "image = Image.fromarray(image)\n",
+    "\n",
+    "images = pipe(\n",
+    "    prompt, negative_prompt=negative_prompt, image=image, controlnet_conditioning_scale=controlnet_conditioning_scale,\n",
+    "    ).images\n",
+    "\n",
+    "images[0].save(f\"controlnet.png\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}