Add ControlNet support for SDXL (#675)

* placeholders * export support poc * wrapup export * finish modeling * pipeline done * add compiler args for controlnet export * add test * update setup * fix * update setup * add doc * remove changes for debug * correct comment * placeholder * add sdxl specific inputs * export done * export fixed * doxstring * stage * pipeline done * doc * fix style / remove test * doc * Update optimum/neuron/pipelines/diffusers/pipeline_controlnet_sd_xl.py * Update optimum/exporters/neuron/convert.py * apply suggestions
huggingface · Aug 30, 2024 · d857415 · d857415
1 parent f45250c
commit d857415
Show file tree

Hide file tree

Showing 18 changed files with 987 additions and 123 deletions.
diff --git a/docs/source/inference_tutorials/stable_diffusion.mdx b/docs/source/inference_tutorials/stable_diffusion.mdx
@@ -635,4 +635,55 @@ compare.save("compare.png")
 />
 
 
+
+## ControlNet with Stable Diffusion XL
+
+### Compile
+
+```bash
+optimum-cli export neuron -m stabilityai/stable-diffusion-xl-base-1.0 --task stable-diffusion-xl --batch_size 1 --height 1024 --width 1024 --controlnet_ids diffusers/controlnet-canny-sdxl-1.0-small --num_images_per_prompt 1 sdxl_neuron_controlnet/
+```
+
+### Text-to-Image
+
+```python
+import cv2
+import numpy as np
+from diffusers.utils import load_image
+from PIL import Image
+from optimum.neuron import NeuronStableDiffusionXLControlNetPipeline
+
+# Inputs
+prompt = "aerial view, a futuristic research complex in a bright foggy jungle, hard lighting"
+negative_prompt = "low quality, bad quality, sketches"
+
+image = load_image(
+    "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/hf-logo.png"
+)
+image = np.array(image)
+image = cv2.Canny(image, 100, 200)
+image = image[:, :, None]
+image = np.concatenate([image, image, image], axis=2)
+image = Image.fromarray(image)
+
+controlnet_conditioning_scale = 0.5  # recommended for good generalization
+
+pipe = NeuronStableDiffusionXLControlNetPipeline.from_pretrained("sdxl_neuron_controlnet")
+
+images = pipe(
+    prompt,
+    negative_prompt=negative_prompt,
+    image=image,
+    controlnet_conditioning_scale=controlnet_conditioning_scale,
+).images
+images[0].save("hug_lab.png")
+```
+
+<img
+  src="https://huggingface.co/datasets/optimum/documentation-images/resolve/main/neuron/models/12-sdxl-text2img-controlnet.png?download=true"
+  width="768"
+  height="256"
+  alt="stable diffusion xl generated image with controlnet."
+/>
+
 Are there any other stable diffusion features that you want us to support in 🤗`Optimum-neuron`? Please file an issue to [`Optimum-neuron` Github repo](https://github.com/huggingface/optimum-neuron) or discuss with us on [HuggingFace’s community forum](https://discuss.huggingface.co/c/optimum/), cheers 🤗 !
diff --git a/docs/source/package_reference/modeling.mdx b/docs/source/package_reference/modeling.mdx
@@ -139,3 +139,8 @@ The following Neuron model classes are available for stable diffusion tasks.
 ### NeuronStableDiffusionXLInpaintPipeline
 [[autodoc]] modeling_diffusion.NeuronStableDiffusionXLInpaintPipeline
     - __call__
+
+### NeuronStableDiffusionXLControlNetPipeline
+
+[[autodoc]] modeling_diffusion.NeuronStableDiffusionXLControlNetPipeline
+    - __call__
diff --git a/notebooks/sentence-transformers/getting-started.ipynb b/notebooks/sentence-transformers/getting-started.ipynb
@@ -46,6 +46,7 @@
    "source": [
     "from optimum.neuron import NeuronModelForSentenceTransformers\n",
     "\n",
+    "\n",
     "# Sentence Transformers model from HuggingFace\n",
     "model_id = \"BAAI/bge-small-en-v1.5\"\n",
     "input_shapes = {\"batch_size\": 1, \"sequence_length\": 384}  # mandatory shapes\n",
@@ -88,9 +89,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from optimum.neuron import NeuronModelForSentenceTransformers\n",
     "from transformers import AutoTokenizer\n",
     "\n",
+    "from optimum.neuron import NeuronModelForSentenceTransformers\n",
+    "\n",
+    "\n",
     "model_id_or_path = \"bge_emb_inf2/\"\n",
     "tokenizer_id = \"BAAI/bge-small-en-v1.5\"\n",
     "\n",

diff --git a/notebooks/stable-diffusion/stable-diffusion-txt2img.ipynb b/notebooks/stable-diffusion/stable-diffusion-txt2img.ipynb
@@ -55,6 +55,7 @@
    "source": [
     "from optimum.neuron import NeuronStableDiffusionPipeline\n",
     "\n",
+    "\n",
     "model_id = \"stabilityai/stable-diffusion-2-1\"\n",
     "num_image_per_prompt = 1\n",
     "input_shapes = {\"batch_size\": 1, \"height\": 768, \"width\": 768, \"num_image_per_prompt\": num_image_per_prompt}\n",
@@ -374,6 +375,8 @@
    "outputs": [],
    "source": [
     "from diffusers import DPMSolverMultistepScheduler\n",
+    "\n",
+    "\n",
     "stable_diffusion.scheduler = DPMSolverMultistepScheduler.from_config(stable_diffusion.scheduler.config)"
    ]
   },
@@ -384,11 +387,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from matplotlib import pyplot as plt\n",
-    "from matplotlib import image as mpimg\n",
     "import time\n",
-    "import copy\n",
-    "import numpy as np "
+    "\n",
+    "import numpy as np\n",
+    "from matplotlib import image as mpimg\n",
+    "from matplotlib import pyplot as plt"
    ]
   },
   {

diff --git a/notebooks/stable-diffusion/stable-diffusion-xl-txt2img.ipynb b/notebooks/stable-diffusion/stable-diffusion-xl-txt2img.ipynb
@@ -56,6 +56,7 @@
    "source": [
     "from optimum.neuron import NeuronStableDiffusionXLPipeline\n",
     "\n",
+    "\n",
     "model_id = \"stabilityai/stable-diffusion-xl-base-1.0\"\n",
     "num_image_per_prompt = 1\n",
     "input_shapes = {\"batch_size\": 1, \"height\": 1024, \"width\": 1024, \"num_image_per_prompt\": num_image_per_prompt}\n",
@@ -423,6 +424,8 @@
    "outputs": [],
    "source": [
     "from diffusers import DPMSolverMultistepScheduler\n",
+    "\n",
+    "\n",
     "stable_diffusion_xl.scheduler = DPMSolverMultistepScheduler.from_config(stable_diffusion_xl.scheduler.config)"
    ]
   },
@@ -433,11 +436,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from matplotlib import pyplot as plt\n",
-    "from matplotlib import image as mpimg\n",
     "import time\n",
-    "import copy\n",
-    "import numpy as np "
+    "\n",
+    "import numpy as np\n",
+    "from matplotlib import image as mpimg\n",
+    "from matplotlib import pyplot as plt"
    ]
   },
   {

diff --git a/notebooks/text-classification/notebook.ipynb b/notebooks/text-classification/notebook.ipynb
@@ -85,6 +85,7 @@
    "source": [
     "from datasets import load_dataset\n",
     "\n",
+    "\n",
     "# Dataset id from huggingface.co/dataset\n",
     "dataset_id = \"philschmid/emotion\"\n",
     "\n",
@@ -116,6 +117,7 @@
    "source": [
     "from random import randrange\n",
     "\n",
+    "\n",
     "random_id = randrange(len(raw_dataset['train']))\n",
     "raw_dataset['train'][random_id]\n",
     "# {'text': 'i feel isolated and alone in my trade', 'label': 0}"
@@ -139,8 +141,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from transformers import AutoTokenizer\n",
     "import os\n",
+    "\n",
+    "from transformers import AutoTokenizer\n",
+    "\n",
+    "\n",
     "# Model id to load the tokenizer\n",
     "model_id = \"bert-base-uncased\"\n",
     "save_dataset_path = \"lm_dataset\"\n",

diff --git a/notebooks/text-generation/CodeLlama-7B-Compilation.ipynb b/notebooks/text-generation/CodeLlama-7B-Compilation.ipynb
@@ -96,6 +96,7 @@
    "source": [
     "from optimum.neuron import pipeline\n",
     "\n",
+    "\n",
     "p = pipeline('text-generation', 'aws-neuron/CodeLlama-7b-hf-neuron-8xlarge')\n",
     "p(\"import socket\\n\\ndef ping_exponential_backoff(host: str):\",\n",
     "    do_sample=True,\n",
@@ -188,10 +189,12 @@
    "outputs": [],
    "source": [
     "from optimum.neuron import NeuronModelForCausalLM\n",
+    "\n",
+    "\n",
     "#num_cores should be changed based on the instance.  inf2.24xlarge has 6 neuron processors (they have two cores each) so 12 total\n",
     "compiler_args = {\"num_cores\": 2, \"auto_cast_type\": 'fp16'}\n",
     "input_shapes = {\"batch_size\": 1, \"sequence_length\": 2048}\n",
-    "model = NeuronModelForCausalLM.from_pretrained(\"codellama/CodeLlama-7b-hf\", export=True, **compiler_args, **input_shapes) "
+    "model = NeuronModelForCausalLM.from_pretrained(\"codellama/CodeLlama-7b-hf\", export=True, **compiler_args, **input_shapes)"
    ]
   },
   {
@@ -211,8 +214,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model.save_pretrained(\"CodeLlama-7b-hf-neuron-8xlarge\")\n",
-    " "
+    "model.save_pretrained(\"CodeLlama-7b-hf-neuron-8xlarge\")\n"
    ]
   },
   {
@@ -251,10 +253,21 @@
    "outputs": [],
    "source": [
     "from huggingface_hub.hf_api import HfFolder\n",
-    "HfFolder.save_token('MY_HUGGINGFACE_TOKEN_HERE')\n",
     "\n",
-    "from huggingface_hub import login\n",
-    "from huggingface_hub import HfApi\n",
+    "\n",
+    "HfFolder.save_token('MY_HUGGINGFACE_TOKEN_HERE')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bdbc2537",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from huggingface_hub import HfApi, login\n",
+    "\n",
+    "\n",
     "api = HfApi()\n",
     "login()\n",
     "\n",
@@ -264,9 +277,7 @@
     "    repo_type=\"model\",\n",
     "    multi_commits=True,\n",
     "    multi_commits_verbose=True,\n",
-    ")\n",
-    "\n",
-    "\n"
+    ")"
    ]
   }
  ],

diff --git a/notebooks/text-generation/llama2-13b-chatbot.ipynb b/notebooks/text-generation/llama2-13b-chatbot.ipynb
@@ -61,7 +61,6 @@
    "outputs": [],
    "source": [
     "# Special widgets are required for a nicer display\n",
-    "import sys\n",
     "!{sys.executable} -m pip install ipywidgets"
    ]
   },
@@ -103,6 +102,7 @@
    "source": [
     "from optimum.neuron import NeuronModelForCausalLM\n",
     "\n",
+    "\n",
     "compiler_args = {\"num_cores\": 24, \"auto_cast_type\": 'fp16'}\n",
     "input_shapes = {\"batch_size\": 1, \"sequence_length\": 2048}\n",
     "model = NeuronModelForCausalLM.from_pretrained(\n",
@@ -153,6 +153,7 @@
    "source": [
     "from huggingface_hub import notebook_login\n",
     "\n",
+    "\n",
     "notebook_login(new_session=False)"
    ]
   },
@@ -175,6 +176,7 @@
    "source": [
     "from huggingface_hub import whoami\n",
     "\n",
+    "\n",
     "org = whoami()['name']\n",
     "\n",
     "repo_id = f\"{org}/llama-2-13b-chat-neuron\"\n",
@@ -238,6 +240,7 @@
    "source": [
     "from optimum.neuron import NeuronModelForCausalLM\n",
     "\n",
+    "\n",
     "try:\n",
     "    model\n",
     "except NameError:\n",
@@ -262,6 +265,7 @@
    "source": [
     "from transformers import AutoTokenizer\n",
     "\n",
+    "\n",
     "tokenizer = AutoTokenizer.from_pretrained(\"NousResearch/Llama-2-13b-chat-hf\")"
    ]
   },
@@ -320,13 +324,10 @@
    "source": [
     "def format_chat_prompt(message, history, max_tokens):\n",
     "    \"\"\" Convert a history of messages to a chat prompt\n",
-    "    \n",
-    "    \n",
     "    Args:\n",
     "        message(str): the new user message.\n",
     "        history (List[str]): the list of user messages and assistant responses.\n",
     "        max_tokens (int): the maximum number of input tokens accepted by the model.\n",
-    "    \n",
     "    Returns:\n",
     "        a `str` prompt.\n",
     "    \"\"\"\n",

diff --git a/notebooks/text-generation/llama2-7b-fine-tuning.ipynb b/notebooks/text-generation/llama2-7b-fine-tuning.ipynb
@@ -154,9 +154,11 @@
     }
    ],
    "source": [
-    "from datasets import load_dataset\n",
     "from random import randrange\n",
     "\n",
+    "from datasets import load_dataset\n",
+    "\n",
+    "\n",
     "# Load dataset from the hub\n",
     "dataset = load_dataset(\"databricks/databricks-dolly-15k\", split=\"train\")\n",
     "\n",
@@ -215,6 +217,7 @@
    "source": [
     "from random import randrange\n",
     "\n",
+    "\n",
     "print(format_dolly(dataset[randrange(len(dataset))]))"
    ]
   },
@@ -233,6 +236,7 @@
    "source": [
     "from transformers import AutoTokenizer\n",
     "\n",
+    "\n",
     "# Hugging Face model id\n",
     "model_id = \"philschmid/Llama-2-7b-hf\" # ungated\n",
     "# model_id = \"meta-llama/Llama-2-7b-hf\" # gated\n",
@@ -257,10 +261,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from random import randint\n",
     "# add utils method to path for loading dataset\n",
     "import sys\n",
-    "sys.path.append(\"./scripts/utils\") # make sure you change this to the correct path \n",
+    "from random import randint\n",
+    "\n",
+    "\n",
+    "sys.path.append(\"./scripts/utils\") # make sure you change this to the correct path\n",
     "from pack_dataset import pack_dataset\n",
     "\n",
     "\n",
@@ -337,7 +343,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# precompilation command \n",
+    "# precompilation command\n",
     "!MALLOC_ARENA_MAX=64 neuron_parallel_compile torchrun --nproc_per_node=32 scripts/run_clm.py \\\n",
     " --model_id {model_id} \\\n",
     " --dataset_path {dataset_path} \\\n",
@@ -455,9 +461,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from optimum.neuron import NeuronModelForCausalLM\n",
     "from transformers import AutoTokenizer\n",
     "\n",
+    "from optimum.neuron import NeuronModelForCausalLM\n",
+    "\n",
+    "\n",
     "compiler_args = {\"num_cores\": 2, \"auto_cast_type\": 'fp16'}\n",
     "input_shapes = {\"batch_size\": 1, \"sequence_length\": 2048}\n",
     "\n",
@@ -502,13 +510,13 @@
     "def format_dolly_infernece(sample):\n",
     "    instruction = f\"### Instruction\\n{sample['instruction']}\"\n",
     "    context = f\"### Context\\n{sample['context']}\" if \"context\" in sample else None\n",
-    "    response = f\"### Answer\\n\"\n",
+    "    response = \"### Answer\\n\"\n",
     "    # join all the parts together\n",
     "    prompt = \"\\n\\n\".join([i for i in [instruction, context, response] if i is not None])\n",
     "    return prompt\n",
     "\n",
     "\n",
-    "def generate(sample): \n",
+    "def generate(sample):\n",
     "    prompt = format_dolly_infernece(sample)\n",
     "    inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
     "    outputs = model.generate(**inputs,\n",