From e3bc57681525a2302d5e21bb53ba5cfbc6e33bd8 Mon Sep 17 00:00:00 2001
From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com>
Date: Fri, 5 Apr 2024 18:26:49 +0200
Subject: [PATCH] Disable weights / neff separation of SDXL's UNET for neuron
 sdk 2.18 (#554)

* temp patch

* disable sdxl caching test

* pytest skip
---
 optimum/exporters/neuron/convert.py | 8 ++++++++
 tests/cache/test_neuronx_cache.py   | 1 +
 2 files changed, 9 insertions(+)

diff --git a/optimum/exporters/neuron/convert.py b/optimum/exporters/neuron/convert.py
index 4438a7414..40c87bf62 100644
--- a/optimum/exporters/neuron/convert.py
+++ b/optimum/exporters/neuron/convert.py
@@ -342,6 +342,14 @@ def export_models(
         output_path.parent.mkdir(parents=True, exist_ok=True)
 
         try:
+
+            # TODO: Remove after the weights/neff separation compilation of sdxl is patched by a neuron sdk release: https://github.com/aws-neuron/aws-neuron-sdk/issues/859
+            if not inline_weights_to_neff and getattr(sub_neuron_config, "is_sdxl", False):
+                logger.warning(
+                    "The compilation of SDXL's unet with the weights/neff separation is broken since the Neuron sdk 2.18 release. `inline_weights_to_neff` will be set to True and the caching will be disabled. If you still want to separate the neff and weights, please downgrade your Neuron setup to the 2.17.1 release."
+                )
+                inline_weights_to_neff = True
+
             start_time = time.time()
             neuron_inputs, neuron_outputs = export(
                 model=submodel,
diff --git a/tests/cache/test_neuronx_cache.py b/tests/cache/test_neuronx_cache.py
index 319efe3a5..83a0bc5e1 100644
--- a/tests/cache/test_neuronx_cache.py
+++ b/tests/cache/test_neuronx_cache.py
@@ -274,6 +274,7 @@ def test_stable_diffusion_cache(cache_repos):
 
 @is_inferentia_test
 @requires_neuronx
+@pytest.mark.skip("Disable the test due to https://github.com/aws-neuron/aws-neuron-sdk/issues/859")
 def test_stable_diffusion_xl_cache(cache_repos):
     cache_path, cache_repo_id = cache_repos
     model_id = "echarlaix/tiny-random-stable-diffusion-xl"