From e3bc57681525a2302d5e21bb53ba5cfbc6e33bd8 Mon Sep 17 00:00:00 2001 From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com> Date: Fri, 5 Apr 2024 18:26:49 +0200 Subject: [PATCH] Disable weights / neff separation of SDXL's UNET for neuron sdk 2.18 (#554) * temp patch * disable sdxl caching test * pytest skip --- optimum/exporters/neuron/convert.py | 8 ++++++++ tests/cache/test_neuronx_cache.py | 1 + 2 files changed, 9 insertions(+) diff --git a/optimum/exporters/neuron/convert.py b/optimum/exporters/neuron/convert.py index 4438a7414..40c87bf62 100644 --- a/optimum/exporters/neuron/convert.py +++ b/optimum/exporters/neuron/convert.py @@ -342,6 +342,14 @@ def export_models( output_path.parent.mkdir(parents=True, exist_ok=True) try: + + # TODO: Remove after the weights/neff separation compilation of sdxl is patched by a neuron sdk release: https://github.com/aws-neuron/aws-neuron-sdk/issues/859 + if not inline_weights_to_neff and getattr(sub_neuron_config, "is_sdxl", False): + logger.warning( + "The compilation of SDXL's unet with the weights/neff separation is broken since the Neuron sdk 2.18 release. `inline_weights_to_neff` will be set to True and the caching will be disabled. If you still want to separate the neff and weights, please downgrade your Neuron setup to the 2.17.1 release." + ) + inline_weights_to_neff = True + start_time = time.time() neuron_inputs, neuron_outputs = export( model=submodel, diff --git a/tests/cache/test_neuronx_cache.py b/tests/cache/test_neuronx_cache.py index 319efe3a5..83a0bc5e1 100644 --- a/tests/cache/test_neuronx_cache.py +++ b/tests/cache/test_neuronx_cache.py @@ -274,6 +274,7 @@ def test_stable_diffusion_cache(cache_repos): @is_inferentia_test @requires_neuronx +@pytest.mark.skip("Disable the test due to https://github.com/aws-neuron/aws-neuron-sdk/issues/859") def test_stable_diffusion_xl_cache(cache_repos): cache_path, cache_repo_id = cache_repos model_id = "echarlaix/tiny-random-stable-diffusion-xl"