From 58af629101f35c9a95077c111986838ecac2fa0b Mon Sep 17 00:00:00 2001 From: Jingya Huang Date: Wed, 9 Aug 2023 17:07:48 +0000 Subject: [PATCH] set env var to cli --- optimum/commands/export/neuronx.py | 7 +++++++ optimum/exporters/neuron/utils.py | 5 ++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/optimum/commands/export/neuronx.py b/optimum/commands/export/neuronx.py index 37bef3a33..cd11d0800 100644 --- a/optimum/commands/export/neuronx.py +++ b/optimum/commands/export/neuronx.py @@ -19,9 +19,16 @@ from typing import TYPE_CHECKING, Optional from ...exporters import TasksManager +from ...utils import is_diffusers_available from ..base import BaseOptimumCLICommand, CommandInfo +if is_diffusers_available(): + # Mandatory for applying optimized attention score of Stable Diffusion + import os + + os.environ["NEURON_FUSE_SOFTMAX"] = "1" + if TYPE_CHECKING: from argparse import ArgumentParser, Namespace, _SubParsersAction diff --git a/optimum/exporters/neuron/utils.py b/optimum/exporters/neuron/utils.py index f6eb748b1..e1be1bd3c 100644 --- a/optimum/exporters/neuron/utils.py +++ b/optimum/exporters/neuron/utils.py @@ -242,9 +242,8 @@ def _get_submodels_for_export_stable_diffusion( Attention.get_attention_scores = get_attention_scores else: logger.warning( - "We are not applying optimized attention score computation, which will harm the latency." - " If you want better performance, please set the environment variable with `export NEURON_FUSE_SOFTMAX=1`" - " and recompile the unet model again." + "You are not applying optimized attention score computation. If you want better performance, please" + " set the environment variable with `export NEURON_FUSE_SOFTMAX=1` and recompile the unet model." ) models_for_export["unet"] = copy.deepcopy(pipeline.unet)