Merge branch 'main' into add-owlv2

huggingface · Feb 29, 2024 · 06f6b0d · 06f6b0d
2 parents f366dd3 + bb21ae7
commit 06f6b0d
Show file tree

Hide file tree

Showing 32 changed files with 1,044 additions and 631 deletions.
diff --git a/.github/workflows/build_main_documentation.yml b/.github/workflows/build_main_documentation.yml
@@ -171,7 +171,7 @@ jobs:
       - name: Combine subpackage documentation
         run: |
           cd optimum
-          sudo python docs/combine_docs.py --subpackages habana intel neuron furiosa amd --version ${{ env.VERSION }}
+          sudo python docs/combine_docs.py --subpackages nvidia amd intel neuron habana furiosa --version ${{ env.VERSION }}
           cd ..
 
       - name: Push to repositories

diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml
@@ -1,12 +1,9 @@
 name: Build PR documentation
 
-# WARNING: As this workflow supports the pull_request_target event, please exercise extra care when editing it.
-
 on:
   workflow_dispatch:
-  pull_request_target:
+  pull_request:
     branches: [ main ]
-    types: [ opened, synchronize, reopened, labeled ]
     paths:
       - "optimum/**.py"
       - "docs/**.mdx"
@@ -17,14 +14,7 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  authorize:
-    if: (github.event.action == 'labeled' && github.event.label.name == 'build-pr-doc') || github.event_name != 'pull_request_target' || (! github.event.pull_request.head.repo.fork)
-    runs-on: ubuntu-latest
-    steps:
-      - run: true
-
   build_documentation:
-    needs: authorize
     runs-on: ubuntu-latest
     env:
       COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
@@ -42,7 +32,6 @@ jobs:
         with:
           repository: 'huggingface/optimum'
           path: optimum
-          ref: ${{ github.event.pull_request.merge_commit_sha }}
 
       - uses: actions/checkout@v2
         with:
@@ -59,6 +48,11 @@ jobs:
           repository: 'huggingface/optimum-furiosa'
           path: optimum-furiosa
 
+      - uses: actions/checkout@v2
+        with:
+          repository: 'huggingface/optimum-amd'
+          path: optimum-amd
+
       - name: Setup environment
         run: |
           pip uninstall -y doc-builder
@@ -84,26 +78,17 @@ jobs:
           sudo mv intel-doc-build ../optimum
           cd ..
 
+      # TODO: enable Furiosa doc build in PRs once archive.furiosa.ai is public
       - name: Make Furiosa documentation
         run: |
-          cd optimum-furiosa
-          pip install .
-          sudo apt update
-          sudo apt install -y ca-certificates apt-transport-https gnupg
-          sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-key 5F03AFA423A751913F249259814F888B20B09A7E
-            # TODO: remove secrets and pull_request_target once archive.furiosa.ai is public
-          sudo tee -a /etc/apt/auth.conf.d/furiosa.conf > /dev/null <<EOT
-            machine archive.furiosa.ai
-            login ${{ secrets.FURIOSA_ACCESS_KEY }}
-            password ${{ secrets.FURIOSA_SECRET_ACCESS_KEY }}
-          EOT
-          sudo chmod 400 /etc/apt/auth.conf.d/furiosa.conf
-          sudo tee -a /etc/apt/sources.list.d/furiosa.list <<EOT
-            deb [arch=amd64] https://archive.furiosa.ai/ubuntu jammy restricted
-          EOT
-          sudo apt update && sudo apt install -y furiosa-libnux
-          doc-builder build optimum.furiosa docs/source/ --build_dir furiosa-doc-build --version pr_$PR_NUMBER --version_tag_suffix "" --html --clean
-          mv furiosa-doc-build ../optimum
+          echo "For PRs we don't build Furiosa doc"
+
+      - name: Make AMD documentation
+        run: |
+          sudo docker system prune -a -f
+          cd optimum-amd
+          make doc BUILD_DIR=amd-doc-build VERSION=pr_$PR_NUMBER
+          sudo mv amd-doc-build ../optimum
           cd ..
 
       - name: Make Optimum documentation
@@ -116,7 +101,7 @@ jobs:
       - name: Combine subpackage documentation
         run: |
           cd optimum
-          sudo python docs/combine_docs.py --subpackages habana intel neuron furiosa --version pr_$PR_NUMBER
+          sudo python docs/combine_docs.py --subpackages nvidia amd intel neuron habana furiosa --version pr_$PR_NUMBER
           sudo mv optimum-doc-build ../
           cd ..
 

diff --git a/README.md b/README.md
@@ -39,7 +39,7 @@ python -m pip install optimum[onnxruntime]@git+https://github.com/huggingface/op
 
 ## Accelerated Inference
 
-🤗 Optimum provides multiple tools to export and run optimized models on various ecosystems: 
+🤗 Optimum provides multiple tools to export and run optimized models on various ecosystems:
 
 - [ONNX](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model) / [ONNX Runtime](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/models)
 - TensorFlow Lite

diff --git a/docs/combine_docs.py b/docs/combine_docs.py
@@ -6,6 +6,9 @@
 import yaml
 
 
+SUBPACKAGE_TOC_INSERT_INDEX = 2
+
+
 parser = argparse.ArgumentParser(
     description="Script to combine doc builds from subpackages with base doc build of Optimum. "
     "Assumes all subpackage doc builds are present in the root of the `optimum` repo."
@@ -89,7 +92,7 @@ def add_neuron_doc(base_toc: List):
     """
     # Update optimum table of contents
     base_toc.insert(
-        1,
+        SUBPACKAGE_TOC_INSERT_INDEX,
         {
             "sections": [
                 {
@@ -118,9 +121,17 @@ def main():
         if subpackage == "neuron":
             # Neuron has its own doc so it is managed differently
             add_neuron_doc(base_toc)
+        elif subpackage == "nvidia":
+            # At the moment, Optimum Nvidia's doc is the README of the GitHub repo
+            # It is linked to in optimum/docs/source/nvidia_overview.mdx
+            continue
         else:
             subpackage_path = Path(f"{subpackage}-doc-build")
 
+            # The doc of Furiosa will be missing for PRs
+            if subpackage == "furiosa" and not subpackage_path.is_dir():
+                continue
+
             # Copy all HTML files from subpackage into optimum
             rename_copy_subpackage_html_paths(
                 subpackage,
@@ -136,10 +147,13 @@ def main():
             # Extend table of contents sections with the subpackage name as the parent folder
             rename_subpackage_toc(subpackage, subpackage_toc)
             # Just keep the name of the partner in the TOC title
-            subpackage_toc[0]["title"] = subpackage_toc[0]["title"].split("Optimum ")[-1]
+            if subpackage == "amd":
+                subpackage_toc[0]["title"] = subpackage_toc[0]["title"].split("Optimum-")[-1]
+            else:
+                subpackage_toc[0]["title"] = subpackage_toc[0]["title"].split("Optimum ")[-1]
             if subpackage != "graphcore":
                 # Update optimum table of contents
-                base_toc.insert(1, subpackage_toc[0])
+                base_toc.insert(SUBPACKAGE_TOC_INSERT_INDEX, subpackage_toc[0])
 
     # Write final table of contents
     with open(base_toc_path, "w") as f:

diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
@@ -12,6 +12,11 @@
       title: Quantization
     title: Conceptual guides
   title: Overview
+- sections:
+  - local: nvidia_overview
+    title: 🤗 Optimum Nvidia
+  title: Nvidia
+  isExpanded: false
 - sections:
   - local: onnxruntime/overview
     title: Overview
@@ -95,6 +100,18 @@
     title: "TFLite"
   title: Exporters
   isExpanded: false
+- sections:
+  - local: bettertransformer/overview
+    title: Overview
+  - sections:
+    - local: bettertransformer/tutorials/convert
+      title: Convert Transformers models to use BetterTransformer
+    - local: bettertransformer/tutorials/contribute
+      title: How to add support for new architectures?
+    title: Tutorials
+    isExpanded: false
+  title: BetterTransformer
+  isExpanded: false
 - sections:
   - local: torch_fx/overview
     title: Overview
@@ -115,18 +132,6 @@
     isExpanded: false
   title: Torch FX
   isExpanded: false
-- sections:
-  - local: bettertransformer/overview
-    title: Overview
-  - sections:
-    - local: bettertransformer/tutorials/convert
-      title: Convert Transformers models to use BetterTransformer
-    - local: bettertransformer/tutorials/contribute
-      title: How to add support for new architectures?
-    title: Tutorials
-    isExpanded: false
-  title: BetterTransformer
-  isExpanded: false
 - sections:
   - local: llm_quantization/usage_guides/quantization
     title: GPTQ quantization

diff --git a/docs/source/index.mdx b/docs/source/index.mdx
@@ -19,11 +19,20 @@ As such, Optimum enables developers to efficiently use any of these platforms wi
 
 🤗 Optimum is distributed as a collection of packages - check out the links below for an in-depth look at each one.
 
+
+## Hardware partners
+
+The packages below enable you to get the best of the 🤗 Hugging Face ecosystem on various types of devices.
+
 <div class="mt-10">
   <div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-3 md:gap-y-4 md:gap-x-5">
-    <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./habana/index"
-      ><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Habana</div>
-      <p class="text-gray-700">Maximize training throughput and efficiency with <span class="underline" onclick="event.preventDefault(); window.open('https://docs.habana.ai/en/latest/Gaudi_Overview/Gaudi_Architecture.html', '_blank');">Habana's Gaudi processor</span></p>
+    <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://github.com/huggingface/optimum-nvidia"
+      ><div class="w-full text-center bg-gradient-to-br from-green-600 to-green-600 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">NVIDIA</div>
+      <p class="text-gray-700">Accelerate inference with NVIDIA TensorRT-LLM on the <span class="underline" onclick="event.preventDefault(); window.open('https://developer.nvidia.com/blog/nvidia-tensorrt-llm-supercharges-large-language-model-inference-on-nvidia-h100-gpus/', '_blank');">NVIDIA platform</span></p>
+    </a>
+    <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./amd/index"
+      ><div class="w-full text-center bg-gradient-to-br from-red-600 to-red-600 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">AMD</div>
+      <p class="text-gray-700">Enable performance optimizations for <span class="underline" onclick="event.preventDefault(); window.open('https://www.amd.com/en/graphics/instinct-server-accelerators', '_blank');">AMD Instinct GPUs</span> and <span class="underline" onclick="event.preventDefault(); window.open('https://ryzenai.docs.amd.com/en/latest/index.html', '_blank');">AMD Ryzen AI NPUs</span></p>
     </a>
     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./intel/index"
       ><div class="w-full text-center bg-gradient-to-br from-blue-400 to-blue-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Intel</div>
@@ -33,25 +42,42 @@ As such, Optimum enables developers to efficiently use any of these platforms wi
       ><div class="w-full text-center bg-gradient-to-br from-orange-400 to-orange-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">AWS Trainium/Inferentia</div>
       <p class="text-gray-700">Accelerate your training and inference workflows with <span class="underline" onclick="event.preventDefault(); window.open('https://aws.amazon.com/machine-learning/trainium/', '_blank');">AWS Trainium</span> and <span class="underline" onclick="event.preventDefault(); window.open('https://aws.amazon.com/machine-learning/inferentia/', '_blank');">AWS Inferentia</span></p>
     </a>
-    <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://github.com/huggingface/optimum-nvidia"
-      ><div class="w-full text-center bg-gradient-to-br from-green-600 to-green-600 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">NVIDIA</div>
-      <p class="text-gray-700">Accelerate inference with NVIDIA TensorRT-LLM on the <span class="underline" onclick="event.preventDefault(); window.open('https://developer.nvidia.com/blog/nvidia-tensorrt-llm-supercharges-large-language-model-inference-on-nvidia-h100-gpus/', '_blank');">NVIDIA platform</span></p>
-    </a>
-    <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./amd/index"
-      ><div class="w-full text-center bg-gradient-to-br from-red-600 to-red-600 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">AMD</div>
-      <p class="text-gray-700">Enable performance optimizations for <span class="underline" onclick="event.preventDefault(); window.open('https://www.amd.com/en/graphics/instinct-server-accelerators', '_blank');">AMD Instinct GPUs</span> and <span class="underline" onclick="event.preventDefault(); window.open('https://ryzenai.docs.amd.com/en/latest/index.html', '_blank');">AMD Ryzen AI NPUs</span></p>
+    <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./habana/index"
+      ><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Habana</div>
+      <p class="text-gray-700">Maximize training throughput and efficiency with <span class="underline" onclick="event.preventDefault(); window.open('https://docs.habana.ai/en/latest/Gaudi_Overview/Gaudi_Architecture.html', '_blank');">Habana's Gaudi processor</span></p>
     </a>
     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./furiosa/index"
       ><div class="w-full text-center bg-gradient-to-br from-green-400 to-green-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">FuriosaAI</div>
       <p class="text-gray-700">Fast and efficient inference on <span class="underline" onclick="event.preventDefault(); window.open('https://www.furiosa.ai/', '_blank');">FuriosaAI WARBOY</span></p>
     </a>
+  </div>
+</div>
+
+> [!TIP]
+> Some packages provide hardware-agnostic features (e.g. INC interface in Optimum Intel).
+
+
+## Open-source integrations
+
+🤗 Optimum also supports a variety of open-source frameworks to make model optimization very easy.
+
+<div class="mt-10">
+  <div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-3 md:gap-y-4 md:gap-x-5">
     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./onnxruntime/overview"
       ><div class="w-full text-center bg-gradient-to-br from-pink-400 to-pink-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">ONNX Runtime</div>
       <p class="text-gray-700">Apply quantization and graph optimization to accelerate Transformers models training and inference with <span class="underline" onclick="event.preventDefault(); window.open('https://onnxruntime.ai/', '_blank');">ONNX Runtime</span></p>
     </a>
+    <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./exporters/overview"
+      ><div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Exporters</div>
+      <p class="text-gray-700">Export your PyTorch or TensorFlow model to different formats such as ONNX and TFLite</p>
+    </a>
     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./bettertransformer/overview"
       ><div class="w-full text-center bg-gradient-to-br from-yellow-400 to-yellow-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">BetterTransformer</div>
       <p class="text-gray-700">A one-liner integration to use <span class="underline" onclick="event.preventDefault(); window.open('https://pytorch.org/blog/a-better-transformer-for-fast-transformer-encoder-inference/', '_blank');">PyTorch's BetterTransformer</span> with Transformers models</p>
     </a>
+    <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./torch_fx/overview"
+      ><div class="w-full text-center bg-gradient-to-br from-green-400 to-green-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Torch FX</div>
+      <p class="text-gray-700">Create and compose custom graph transformations to optimize PyTorch Transformers models with <span class="underline" onclick="event.preventDefault(); window.open('https://pytorch.org/docs/stable/fx.html#', '_blank');">Torch FX</span></p>
+    </a>
   </div>
 </div>
diff --git a/docs/source/nvidia_overview.mdx b/docs/source/nvidia_overview.mdx
@@ -0,0 +1,3 @@
+# 🤗 Optimum Nvidia
+
+Find more information about 🤗 Optimum Nvidia [here](https://github.com/huggingface/optimum-nvidia).
diff --git a/examples/onnxruntime/training/image-classification/README.md b/examples/onnxruntime/training/image-classification/README.md
@@ -11,9 +11,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 -->
 
-# Language Modeling
-
-## Image Classification Training
+# Image Classification 
 
 By running the scripts [`run_image_classification.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/image-classification/run_image_classification.py) we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) accelerator to train the language models from the
 [HuggingFace hub](https://huggingface.co/models).
@@ -32,6 +30,7 @@ torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_image_classification.py \
     --dataset_name beans \
     --output_dir ./beans_outputs/ \
     --remove_unused_columns False \
+    --label_column_name labels \
     --do_train \
     --do_eval \
     --learning_rate 2e-5 \