Merge branch 'main' into lora_tutorial

huggingface · Sep 5, 2024 · d64af16 · d64af16
2 parents c18e471 + 281d9bb
commit d64af16
Show file tree

Hide file tree

Showing 49 changed files with 1,695 additions and 226 deletions.
diff --git a/.github/workflows/inference_cache.yml → .github/workflows/inference_cache_llm.yml b/.github/workflows/inference_cache.yml → .github/workflows/inference_cache_llm.yml
@@ -1,4 +1,4 @@
-name: Optimum neuron inference cache builder
+name: Optimum neuron LLM inference cache builder
 
 on:
   workflow_dispatch:
@@ -39,7 +39,7 @@ jobs:
           EOF
           wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
           sudo apt-get update -y
-          sudo apt-get install aws-neuronx-tools=2.17.1.0 aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 aws-neuronx-collectives=2.20.22.0-c101c322e  -y
+          sudo apt-get install aws-neuronx-tools=2.18.3.0 aws-neuronx-runtime-lib=2.21.41.0-fb1705f5f aws-neuronx-collectives=2.21.46.0-69b77134b  -y
           export PATH=/opt/aws/neuron/bin:$PATH
       - name: Checkout
         uses: actions/checkout@v4

diff --git a/.github/workflows/inference_cache_stable_diffusion.yml b/.github/workflows/inference_cache_stable_diffusion.yml
@@ -0,0 +1,52 @@
+name: Optimum neuron SD inference cache builder
+
+on:
+  workflow_dispatch:
+  schedule:
+    # Schedule the workflow to run every Saturday at midnight UTC
+    - cron: '0 0 * * 6'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+
+jobs:
+  cache:
+    name: Create optimum-neuron inference cache
+    runs-on:
+      group: aws-inf2-8xlarge
+    env:
+      AWS_REGION: us-east-1
+    strategy:
+      fail-fast: false
+      matrix:
+        config: [stable-diffusion]
+    steps:
+      - name: Install Neuron runtime
+        run: |
+          . /etc/os-release
+          sudo tee /etc/apt/sources.list.d/neuron.list > /dev/null <<EOF
+          deb https://apt.repos.neuron.amazonaws.com ${VERSION_CODENAME} main
+          EOF
+          wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
+          sudo apt-get update -y
+          sudo apt-get install aws-neuronx-tools=2.18.3.0 aws-neuronx-runtime-lib=2.21.41.0-fb1705f5f aws-neuronx-collectives=2.21.46.0-69b77134b  -y
+          export PATH=/opt/aws/neuron/bin:$PATH
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Install python and create venv
+        run: |
+          sudo apt install python3-venv python3-dev -y
+          python3 -m venv aws_neuron_venv_pytorch
+          source aws_neuron_venv_pytorch/bin/activate
+          python -m pip install -U pip
+          python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
+      - name: Install optimum neuron
+        run: |
+          source aws_neuron_venv_pytorch/bin/activate
+          python -m pip install .[neuronx,diffusers]
+      - name: Create cache for ${{matrix.config}} models
+        run: |
+          source aws_neuron_venv_pytorch/bin/activate
+          config_prefix_url=https://huggingface.co/aws-neuron/optimum-neuron-cache/raw/main/inference-cache-config
+          HF_TOKEN=${{secrets.HF_TOKEN_OPTIMUM_NEURON_CACHE}} \
+            python tools/auto_fill_inference_cache.py --config_file ${config_prefix_url}/${{matrix.config}}.json
diff --git a/Makefile b/Makefile
@@ -40,7 +40,7 @@ PACKAGE_FILES = $(PACKAGE_PYTHON_FILES)  \
 $(PACKAGE_DIST) $(PACKAGE_WHEEL): $(PACKAGE_FILES)
 	python -m build
 
-TGI_VERSION ?= 2.0.2
+TGI_VERSION ?= 2.1.1
 
 neuronx-tgi: $(PACKAGE_DIST)
 	docker build --rm -f text-generation-inference/Dockerfile \

diff --git a/docs/source/containers.mdx b/docs/source/containers.mdx
@@ -15,15 +15,26 @@ specific language governing permissions and limitations under the License.
 We provide pre-built Optimum Neuron containers for Amazon SageMaker. These containers come with all of the Hugging Face libraries and dependencies pre-installed, so you can start using them right away.
 We have containers for training and inference, and optimized text generation containers with TGI. The table is up to date and only includes the latest versions of each container. You can find older versions in the [Deep Learning Container Release Notes](https://github.com/aws/deep-learning-containers/releases?q=hf-neuronx&expanded=true)
 
-We recommend using the `sagemaker` Python SDK to retrieve the image URI for the container you want to use.
+We recommend using the `sagemaker` Python SDK to retrieve the image URI for the container you want to use. Here is a code snippet to retrieve the latest Text Generation Inference container Image URI:
+```python
+from sagemaker.huggingface import get_huggingface_llm_image_uri
+
+# retrieve the llm image uri
+llm_image = get_huggingface_llm_image_uri(
+  "huggingface-neuronx"
+)
+
+print(f"llm image uri: {llm_image}")
+
+```
 
 ## Available Optimum Neuron Containers
 
 | Type                       | Optimum Version | Image URI                                   |
 |-----------------------------|-----------------|---------------------------------------------|
-| Training  | 0.0.21           | `763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training-neuronx:1.13.1-transformers4.36.2-neuronx-py310-sdk2.18.0-ubuntu20.04`   |
-| Inference      | 0.0.22           | `763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference-neuronx:2.1.2-transformers4.36.2-neuronx-py310-sdk2.18.0-ubuntu20.04`      |
-| Text Generation Inference        | 0.0.22           | `763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.2-optimum0.0.22-neuronx-py310-ubuntu22.04`        |
+| Training  | 0.0.24           | `763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training-neuronx:2.1.2-transformers4.41.1-neuronx-py310-sdk2.19.1-ubuntu20.04`   |
+| Inference      | 0.0.24           | `763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference-neuronx:2.1.2-transformers4.41.1-neuronx-py310-sdk2.19.1-ubuntu20.04`      |
+| Text Generation Inference        | 0.0.24           | `763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.2-optimum0.0.24-neuronx-py310-ubuntu22.04`        |
 
 
 Please replace `763104351884` with the correct [AWS account ID](https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/image_uri_config/huggingface-neuronx.json) and `region` with the AWS region you are working in.
diff --git a/docs/source/inference_tutorials/stable_diffusion.mdx b/docs/source/inference_tutorials/stable_diffusion.mdx
@@ -635,4 +635,55 @@ compare.save("compare.png")
 />
 
 
+
+## ControlNet with Stable Diffusion XL
+
+### Compile
+
+```bash
+optimum-cli export neuron -m stabilityai/stable-diffusion-xl-base-1.0 --task stable-diffusion-xl --batch_size 1 --height 1024 --width 1024 --controlnet_ids diffusers/controlnet-canny-sdxl-1.0-small --num_images_per_prompt 1 sdxl_neuron_controlnet/
+```
+
+### Text-to-Image
+
+```python
+import cv2
+import numpy as np
+from diffusers.utils import load_image
+from PIL import Image
+from optimum.neuron import NeuronStableDiffusionXLControlNetPipeline
+
+# Inputs
+prompt = "aerial view, a futuristic research complex in a bright foggy jungle, hard lighting"
+negative_prompt = "low quality, bad quality, sketches"
+
+image = load_image(
+    "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/hf-logo.png"
+)
+image = np.array(image)
+image = cv2.Canny(image, 100, 200)
+image = image[:, :, None]
+image = np.concatenate([image, image, image], axis=2)
+image = Image.fromarray(image)
+
+controlnet_conditioning_scale = 0.5  # recommended for good generalization
+
+pipe = NeuronStableDiffusionXLControlNetPipeline.from_pretrained("sdxl_neuron_controlnet")
+
+images = pipe(
+    prompt,
+    negative_prompt=negative_prompt,
+    image=image,
+    controlnet_conditioning_scale=controlnet_conditioning_scale,
+).images
+images[0].save("hug_lab.png")
+```
+
+<img
+  src="https://huggingface.co/datasets/optimum/documentation-images/resolve/main/neuron/models/12-sdxl-text2img-controlnet.png?download=true"
+  width="768"
+  height="256"
+  alt="stable diffusion xl generated image with controlnet."
+/>
+
 Are there any other stable diffusion features that you want us to support in 🤗`Optimum-neuron`? Please file an issue to [`Optimum-neuron` Github repo](https://github.com/huggingface/optimum-neuron) or discuss with us on [HuggingFace’s community forum](https://discuss.huggingface.co/c/optimum/), cheers 🤗 !
diff --git a/docs/source/package_reference/modeling.mdx b/docs/source/package_reference/modeling.mdx
@@ -139,3 +139,8 @@ The following Neuron model classes are available for stable diffusion tasks.
 ### NeuronStableDiffusionXLInpaintPipeline
 [[autodoc]] modeling_diffusion.NeuronStableDiffusionXLInpaintPipeline
     - __call__
+
+### NeuronStableDiffusionXLControlNetPipeline
+
+[[autodoc]] modeling_diffusion.NeuronStableDiffusionXLControlNetPipeline
+    - __call__
diff --git a/infrastructure/ami/hcl2-files/build.pkr.hcl b/infrastructure/ami/hcl2-files/build.pkr.hcl
@@ -14,7 +14,7 @@ build {
     ]
   }
   provisioner "shell" {
-    inline = ["echo 'source /opt/aws_neuron_venv_pytorch/bin/activate' >> /home/ubuntu/.bashrc"]
+    inline = ["echo 'source /opt/aws_neuronx_venv_pytorch_2_1/bin/activate' | sudo tee -a /home/ubuntu/.bashrc"]
   }
   provisioner "file" {
     source      = "scripts/welcome-msg.sh"
@@ -26,4 +26,4 @@ build {
       "sudo chmod +x /etc/update-motd.d/99-custom-message",
     ]
   }
-}
+}
diff --git a/infrastructure/ami/hcl2-files/variables.pkr.hcl b/infrastructure/ami/hcl2-files/variables.pkr.hcl
@@ -10,15 +10,15 @@ variable "instance_type" {
 }
 
 variable "source_ami" {
-  default     = "ami-0274e546d67626305"
+  default     = "ami-0bcb701dd3cace633"
   description = "Base Image"
   type        = string
   /*
   To get latest value, run the following command:
   aws ec2 describe-images \
       --region us-east-1 \
       --owners amazon \
-      --filters 'Name=name,Values=Deep Learning AMI Neuron PyTorch 1.13 (Ubuntu 20.04) ????????' 'Name=state,Values=available' \
+      --filters 'Name=name,Values=Deep Learning AMI Neuron ????????' 'Name=state,Values=available' \
       --query 'reverse(sort_by(Images, &CreationDate))[:1].ImageId' \
       --output text
   */

diff --git a/infrastructure/ami/scripts/install-huggingface-libraries.sh b/infrastructure/ami/scripts/install-huggingface-libraries.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 # Activate the neuron virtual environment
-source /opt/aws_neuron_venv_pytorch/bin/activate
+source /opt/aws_neuronx_venv_pytorch_2_1/bin/activate
 
 echo "Step: install-hugging-face-libraries"
 
@@ -34,4 +34,4 @@ rm -rf optimum-neuron
 chmod -R 777 /home/ubuntu/huggingface-neuron-samples /home/ubuntu/huggingface-neuron-notebooks
 
 echo "Step: validate-imports-of-huggingface-libraries"
-bash -c 'python -c "import transformers;import datasets;import accelerate;import evaluate;import tensorboard; import torch;from optimum.neuron import pipeline"'
+bash -c 'python -c "import transformers;import datasets;import accelerate;import evaluate;import tensorboard; import torch;from optimum.neuron import pipeline"'
diff --git a/infrastructure/ami/scripts/validate-neuron.sh b/infrastructure/ami/scripts/validate-neuron.sh
@@ -3,11 +3,11 @@ echo "Step: validate-neuron-devices"
 neuron-ls
 
 # Activate the neuron virtual environment
-source /opt/aws_neuron_venv_pytorch/bin/activate
+source /opt/aws_neuronx_venv_pytorch_2_1/bin/activate
 
 python -c 'import torch'
 python -c 'import torch_neuronx'
 
 echo "Installing Tensorboard Plugin for Neuron"
 pip install --upgrade --no-cache-dir \
-    "tensorboard-plugin-neuronx"
+    "tensorboard-plugin-neuronx"
diff --git a/notebooks/sentence-transformers/getting-started.ipynb b/notebooks/sentence-transformers/getting-started.ipynb
@@ -46,6 +46,7 @@
    "source": [
     "from optimum.neuron import NeuronModelForSentenceTransformers\n",
     "\n",
+    "\n",
     "# Sentence Transformers model from HuggingFace\n",
     "model_id = \"BAAI/bge-small-en-v1.5\"\n",
     "input_shapes = {\"batch_size\": 1, \"sequence_length\": 384}  # mandatory shapes\n",
@@ -88,9 +89,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from optimum.neuron import NeuronModelForSentenceTransformers\n",
     "from transformers import AutoTokenizer\n",
     "\n",
+    "from optimum.neuron import NeuronModelForSentenceTransformers\n",
+    "\n",
+    "\n",
     "model_id_or_path = \"bge_emb_inf2/\"\n",
     "tokenizer_id = \"BAAI/bge-small-en-v1.5\"\n",
     "\n",

diff --git a/notebooks/stable-diffusion/stable-diffusion-txt2img.ipynb b/notebooks/stable-diffusion/stable-diffusion-txt2img.ipynb
@@ -55,6 +55,7 @@
    "source": [
     "from optimum.neuron import NeuronStableDiffusionPipeline\n",
     "\n",
+    "\n",
     "model_id = \"stabilityai/stable-diffusion-2-1\"\n",
     "num_image_per_prompt = 1\n",
     "input_shapes = {\"batch_size\": 1, \"height\": 768, \"width\": 768, \"num_image_per_prompt\": num_image_per_prompt}\n",
@@ -374,6 +375,8 @@
    "outputs": [],
    "source": [
     "from diffusers import DPMSolverMultistepScheduler\n",
+    "\n",
+    "\n",
     "stable_diffusion.scheduler = DPMSolverMultistepScheduler.from_config(stable_diffusion.scheduler.config)"
    ]
   },
@@ -384,11 +387,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from matplotlib import pyplot as plt\n",
-    "from matplotlib import image as mpimg\n",
     "import time\n",
-    "import copy\n",
-    "import numpy as np "
+    "\n",
+    "import numpy as np\n",
+    "from matplotlib import image as mpimg\n",
+    "from matplotlib import pyplot as plt"
    ]
   },
   {

diff --git a/notebooks/stable-diffusion/stable-diffusion-xl-txt2img.ipynb b/notebooks/stable-diffusion/stable-diffusion-xl-txt2img.ipynb
@@ -56,6 +56,7 @@
    "source": [
     "from optimum.neuron import NeuronStableDiffusionXLPipeline\n",
     "\n",
+    "\n",
     "model_id = \"stabilityai/stable-diffusion-xl-base-1.0\"\n",
     "num_image_per_prompt = 1\n",
     "input_shapes = {\"batch_size\": 1, \"height\": 1024, \"width\": 1024, \"num_image_per_prompt\": num_image_per_prompt}\n",
@@ -423,6 +424,8 @@
    "outputs": [],
    "source": [
     "from diffusers import DPMSolverMultistepScheduler\n",
+    "\n",
+    "\n",
     "stable_diffusion_xl.scheduler = DPMSolverMultistepScheduler.from_config(stable_diffusion_xl.scheduler.config)"
    ]
   },
@@ -433,11 +436,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from matplotlib import pyplot as plt\n",
-    "from matplotlib import image as mpimg\n",
     "import time\n",
-    "import copy\n",
-    "import numpy as np "
+    "\n",
+    "import numpy as np\n",
+    "from matplotlib import image as mpimg\n",
+    "from matplotlib import pyplot as plt"
    ]
   },
   {

diff --git a/notebooks/text-classification/notebook.ipynb b/notebooks/text-classification/notebook.ipynb
@@ -85,6 +85,7 @@
    "source": [
     "from datasets import load_dataset\n",
     "\n",
+    "\n",
     "# Dataset id from huggingface.co/dataset\n",
     "dataset_id = \"philschmid/emotion\"\n",
     "\n",
@@ -116,6 +117,7 @@
    "source": [
     "from random import randrange\n",
     "\n",
+    "\n",
     "random_id = randrange(len(raw_dataset['train']))\n",
     "raw_dataset['train'][random_id]\n",
     "# {'text': 'i feel isolated and alone in my trade', 'label': 0}"
@@ -139,8 +141,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from transformers import AutoTokenizer\n",
     "import os\n",
+    "\n",
+    "from transformers import AutoTokenizer\n",
+    "\n",
+    "\n",
     "# Model id to load the tokenizer\n",
     "model_id = \"bert-base-uncased\"\n",
     "save_dataset_path = \"lm_dataset\"\n",