Skip to content

Commit

Permalink
Merge branch 'main' into add-tp-support-t5
Browse files Browse the repository at this point in the history
  • Loading branch information
JingyaHuang committed Sep 25, 2024
2 parents 8ea65f2 + 3748a06 commit 08c2d35
Show file tree
Hide file tree
Showing 16 changed files with 68 additions and 91 deletions.
6 changes: 1 addition & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,15 @@ PACKAGE_FILES = $(PACKAGE_PYTHON_FILES) \
$(PACKAGE_DIST) $(PACKAGE_WHEEL): $(PACKAGE_FILES)
python -m build

TGI_VERSION ?= 2.1.1

neuronx-tgi: $(PACKAGE_DIST)
docker build --rm -f text-generation-inference/Dockerfile \
--build-arg VERSION=$(VERSION) \
--build-arg TGI_VERSION=$(TGI_VERSION) \
-t neuronx-tgi:$(VERSION) .
docker tag neuronx-tgi:$(VERSION) neuronx-tgi:latest

neuronx-tgi-sagemaker: $(PACKAGE_DIST)
docker build --rm -f text-generation-inference/Dockerfile \
--build-arg VERSION=$(VERSION) \
--build-arg TGI_VERSION=$(TGI_VERSION) \
--target sagemaker \
-t neuronx-tgi:$(VERSION) .

Expand Down Expand Up @@ -90,7 +86,7 @@ test_installs:
tgi_server:
python -m pip install -r text-generation-inference/server/build-requirements.txt
make -C text-generation-inference/server clean
VERSION=${VERSION} TGI_VERSION=${TGI_VERSION} make -C text-generation-inference/server gen-server
VERSION=${VERSION} make -C text-generation-inference/server gen-server

tgi_test: tgi_server
python -m pip install .[neuronx]
Expand Down
6 changes: 2 additions & 4 deletions docs/source/inference_tutorials/stable_diffusion.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ Here is an example of exporting stable diffusion components with `Optimum` CLI:

```bash
optimum-cli export neuron --model stabilityai/stable-diffusion-2-1-base \
--task stable-diffusion \
--batch_size 1 \
--height 512 `# height in pixels of generated image, eg. 512, 768` \
--width 512 `# width in pixels of generated image, eg. 512, 768` \
Expand Down Expand Up @@ -229,7 +228,6 @@ Here is an example of exporting SDXL components with `Optimum` CLI:

```bash
optimum-cli export neuron --model stabilityai/stable-diffusion-xl-base-1.0 \
--task stable-diffusion-xl \
--batch_size 1 \
--height 1024 `# height in pixels of generated image, eg. 768, 1024` \
--width 1024 `# width in pixels of generated image, eg. 768, 1024` \
Expand Down Expand Up @@ -481,7 +479,7 @@ Here we will compile the [`stabilityai/sdxl-turbo`](https://huggingface.co/stabi
### Compile SDXL Turbo

```bash
optimum-cli export neuron --model stabilityai/sdxl-turbo --task stable-diffusion-xl --batch_size 1 --height 512 --width 512 --auto_cast matmul --auto_cast_type bf16 sdxl_turbo_neuron/
optimum-cli export neuron --model stabilityai/sdxl-turbo --batch_size 1 --height 512 --width 512 --auto_cast matmul --auto_cast_type bf16 sdxl_turbo_neuron/
```

### Text-to-Image
Expand Down Expand Up @@ -562,7 +560,7 @@ We can either compile one or multiple ControlNet via the Optimum CLI or programa
* Export via the Optimum CLI

```bash
optimum-cli export neuron -m runwayml/stable-diffusion-v1-5 --task stable-diffusion --batch_size 1 --height 512 --width 512 --controlnet_ids lllyasviel/sd-controlnet-canny --num_images_per_prompt 1 sd_neuron_controlnet/
optimum-cli export neuron -m runwayml/stable-diffusion-v1-5 --batch_size 1 --height 512 --width 512 --controlnet_ids lllyasviel/sd-controlnet-canny --num_images_per_prompt 1 sd_neuron_controlnet/
```

* Export via Python API
Expand Down
7 changes: 0 additions & 7 deletions optimum/commands/export/neuron.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,6 @@ def parse_args_neuron(parser: "ArgumentParser"):
f" {str(list(TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS.keys()) + list(TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS.keys()))}."
),
)
optional_group.add_argument(
"--library-name",
type=str,
choices=["transformers", "sentence_transformers"],
default=None,
help=("The library on the model. If not provided, will attempt to infer the local checkpoint's library."),
)
optional_group.add_argument(
"--subfolder",
type=str,
Expand Down
7 changes: 0 additions & 7 deletions optimum/commands/export/neuronx.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,6 @@ def parse_args_neuronx(parser: "ArgumentParser"):
f" {str(list(TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS.keys()) + list(TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS.keys()))}."
),
)
optional_group.add_argument(
"--library-name",
type=str,
choices=["transformers", "diffusers", "sentence_transformers"],
default=None,
help=("The library of the model." " If not provided, will attempt to infer the local checkpoint's library."),
)
optional_group.add_argument(
"--subfolder",
type=str,
Expand Down
32 changes: 13 additions & 19 deletions optimum/exporters/neuron/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def get_submodels_and_neuron_configs(
tensor_parallel_size: int,
task: str,
output: Path,
library_name: Optional[str] = None,
library_name: str,
subfolder: str = "",
dynamic_batch_size: bool = False,
model_name_or_path: Optional[Union[str, Path]] = None,
Expand All @@ -277,19 +277,17 @@ def get_submodels_and_neuron_configs(
lora_scales: Optional[Union[float, List[float]]] = None,
controlnet_ids: Optional[Union[str, List[str]]] = None,
):
is_stable_diffusion = "stable-diffusion" in task
is_encoder_decoder = (
getattr(model.config, "is_encoder_decoder", False) if isinstance(model.config, PretrainedConfig) else False
)

if is_stable_diffusion:
if library_name == "diffusers":
# TODO: Enable optional outputs for Stable Diffusion
if output_attentions:
raise ValueError(f"`output_attentions`is not supported by the {task} task yet.")
models_and_neuron_configs, output_model_names = _get_submodels_and_neuron_configs_for_stable_diffusion(
model=model,
input_shapes=input_shapes,
task=task,
output=output,
dynamic_batch_size=dynamic_batch_size,
submodels=submodels,
Expand Down Expand Up @@ -357,7 +355,6 @@ def _normalize_lora_params(lora_model_ids, lora_weight_names, lora_adapter_names
def _get_submodels_and_neuron_configs_for_stable_diffusion(
model: Union["PreTrainedModel", "DiffusionPipeline"],
input_shapes: Dict[str, int],
task: str,
output: Path,
dynamic_batch_size: bool = False,
submodels: Optional[Dict[str, Union[Path, str]]] = None,
Expand Down Expand Up @@ -395,7 +392,6 @@ def _get_submodels_and_neuron_configs_for_stable_diffusion(
)
models_and_neuron_configs = get_stable_diffusion_models_for_export(
pipeline=model,
task=task,
text_encoder_input_shapes=input_shapes["text_encoder"],
unet_input_shapes=input_shapes["unet"],
vae_encoder_input_shapes=input_shapes["vae_encoder"],
Expand Down Expand Up @@ -481,6 +477,7 @@ def load_models_and_neuron_configs(
trust_remote_code: bool,
subfolder: str,
revision: str,
library_name: str,
force_download: bool,
local_files_only: bool,
token: Optional[Union[bool, str]],
Expand All @@ -492,13 +489,8 @@ def load_models_and_neuron_configs(
controlnet_ids: Optional[Union[str, List[str]]] = None,
output_attentions: bool = False,
output_hidden_states: bool = False,
library_name: Optional[str] = None,
**input_shapes,
):
library_name = TasksManager.infer_library_from_model(
model_name_or_path, subfolder=subfolder, library_name=library_name
)

model_kwargs = {
"task": task,
"model_name_or_path": model_name_or_path,
Expand Down Expand Up @@ -575,6 +567,10 @@ def main_export(
output.parent.mkdir(parents=True)

task = TasksManager.map_from_synonym(task)
if library_name is None:
library_name = TasksManager.infer_library_from_model(
model_name_or_path, revision=revision, cache_dir=cache_dir, token=token
)

models_and_neuron_configs, output_model_names = load_models_and_neuron_configs(
model_name_or_path=model_name_or_path,
Expand All @@ -587,13 +583,13 @@ def main_export(
trust_remote_code=trust_remote_code,
subfolder=subfolder,
revision=revision,
library_name=library_name,
force_download=force_download,
local_files_only=local_files_only,
token=token,
submodels=submodels,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
library_name=library_name,
lora_model_ids=lora_model_ids,
lora_weight_names=lora_weight_names,
lora_adapter_names=lora_adapter_names,
Expand All @@ -616,8 +612,7 @@ def main_export(

# Validate compiled model
if do_validation is True:
is_stable_diffusion = "stable-diffusion" in task
if is_stable_diffusion:
if library_name == "diffusers":
# Do not validate vae encoder due to the sampling randomness
neuron_outputs.pop("vae_encoder")
models_and_neuron_configs.pop("vae_encoder", None)
Expand Down Expand Up @@ -686,13 +681,12 @@ def main():
args = parser.parse_args()

task = infer_task(args.task, args.model)
is_stable_diffusion = "stable-diffusion" in task
is_sentence_transformers = args.library_name == "sentence_transformers"
library_name = TasksManager.infer_library_from_model(args.model, cache_dir=args.cache_dir)

if is_stable_diffusion:
if library_name == "diffusers":
input_shapes = normalize_stable_diffusion_input_shapes(args)
submodels = {"unet": args.unet}
elif is_sentence_transformers:
elif library_name == "sentence_transformers":
input_shapes = normalize_sentence_transformers_input_shapes(args)
submodels = None
else:
Expand Down Expand Up @@ -737,7 +731,7 @@ def main():
subfolder=args.subfolder,
do_validation=not args.disable_validation,
submodels=submodels,
library_name=args.library_name,
library_name=library_name,
lora_model_ids=getattr(args, "lora_model_ids", None),
lora_weight_names=getattr(args, "lora_weight_names", None),
lora_adapter_names=getattr(args, "lora_adapter_names", None),
Expand Down
2 changes: 1 addition & 1 deletion optimum/exporters/neuron/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ def add_stable_diffusion_compiler_args(config, compiler_args):
compiler_args.append("--enable-fast-loading-neuron-binaries")
# unet or controlnet
if "unet" in identifier or "controlnet" in identifier:
# SDXL unet doesn't support fast loading neuron binaries
# SDXL unet doesn't support fast loading neuron binaries(sdk 2.19.1)
if not getattr(config, "is_sdxl", False):
compiler_args.append("--enable-fast-loading-neuron-binaries")
compiler_args.append("--model-type=unet-inference")
Expand Down
8 changes: 6 additions & 2 deletions optimum/exporters/neuron/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,12 +383,16 @@ class LevitNeuronConfig(ViTNeuronConfig):
pass


@register_in_tasks_manager("mobilenet-v2", *["feature-extraction", "image-classification", "semantic-segmentation"])
@register_in_tasks_manager(
"mobilenet-v2", *["feature-extraction", "image-classification", "semantic-segmentation", "image-segmentation"]
)
class MobileNetV2NeuronConfig(ViTNeuronConfig):
pass


@register_in_tasks_manager("mobilevit", *["feature-extraction", "image-classification", "semantic-segmentation"])
@register_in_tasks_manager(
"mobilevit", *["feature-extraction", "image-classification", "semantic-segmentation", "image-segmentation"]
)
class MobileViTNeuronConfig(ViTNeuronConfig):
pass

Expand Down
40 changes: 22 additions & 18 deletions optimum/exporters/neuron/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,15 @@
f"We found an older version of diffusers {_diffusers_version} but we require diffusers to be >= {DIFFUSERS_MINIMUM_VERSION}. "
"Please update diffusers by running `pip install --upgrade diffusers`"
)
from diffusers import ControlNetModel, UNet2DConditionModel
from diffusers import (
ControlNetModel,
ModelMixin,
StableDiffusionPipeline,
StableDiffusionXLImg2ImgPipeline,
StableDiffusionXLInpaintPipeline,
StableDiffusionXLPipeline,
UNet2DConditionModel,
)
from diffusers.models.attention_processor import Attention


Expand All @@ -62,9 +70,6 @@

from .base import NeuronDefaultConfig

if is_diffusers_available():
from diffusers import ModelMixin, StableDiffusionPipeline, StableDiffusionXLImg2ImgPipeline


def build_stable_diffusion_components_mandatory_shapes(
batch_size: Optional[int] = None,
Expand Down Expand Up @@ -108,8 +113,7 @@ def build_stable_diffusion_components_mandatory_shapes(


def get_stable_diffusion_models_for_export(
pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLImg2ImgPipeline"],
task: str,
pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLPipeline"],
text_encoder_input_shapes: Dict[str, int],
unet_input_shapes: Dict[str, int],
vae_encoder_input_shapes: Dict[str, int],
Expand All @@ -130,10 +134,8 @@ def get_stable_diffusion_models_for_export(
performance benefit (CLIP text encoder, VAE encoder, VAE decoder, Unet).
Args:
pipeline ([`Union["StableDiffusionPipeline", "StableDiffusionXLImg2ImgPipeline"]`]):
pipeline ([`Union["StableDiffusionPipeline", "StableDiffusionXLPipeline"]`]):
The model to export.
task (`str`):
Task name, should be either "stable-diffusion" or "stable-diffusion-xl".
text_encoder_input_shapes (`Dict[str, int]`):
Static shapes used for compiling text encoder.
unet_input_shapes (`Dict[str, int]`):
Expand Down Expand Up @@ -166,7 +168,6 @@ def get_stable_diffusion_models_for_export(
"""
models_for_export = get_submodels_for_export_stable_diffusion(
pipeline=pipeline,
task=task,
lora_model_ids=lora_model_ids,
lora_weight_names=lora_weight_names,
lora_adapter_names=lora_adapter_names,
Expand Down Expand Up @@ -226,8 +227,10 @@ def get_stable_diffusion_models_for_export(
dynamic_batch_size=dynamic_batch_size,
**unet_input_shapes,
)
if task == "stable-diffusion-xl":
unet_neuron_config.is_sdxl = True
is_stable_diffusion_xl = isinstance(
pipeline, (StableDiffusionXLImg2ImgPipeline, StableDiffusionXLInpaintPipeline, StableDiffusionXLPipeline)
)
unet_neuron_config.is_sdxl = is_stable_diffusion_xl

unet_neuron_config.with_controlnet = True if controlnet_ids else False

Expand Down Expand Up @@ -296,7 +299,7 @@ def get_stable_diffusion_models_for_export(


def _load_lora_weights_to_pipeline(
pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLImg2ImgPipeline"],
pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLPipeline"],
lora_model_ids: Optional[Union[str, List[str]]] = None,
weight_names: Optional[Union[str, List[str]]] = None,
adapter_names: Optional[Union[str, List[str]]] = None,
Expand Down Expand Up @@ -350,8 +353,7 @@ def load_controlnets(controlnet_ids: Optional[Union[str, List[str]]] = None):


def get_submodels_for_export_stable_diffusion(
pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLImg2ImgPipeline"],
task: str,
pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLPipeline"],
output_hidden_states: bool = False,
lora_model_ids: Optional[Union[str, List[str]]] = None,
lora_weight_names: Optional[Union[str, List[str]]] = None,
Expand All @@ -362,7 +364,9 @@ def get_submodels_for_export_stable_diffusion(
"""
Returns the components of a Stable Diffusion model.
"""
is_sdxl = "xl" in task
is_stable_diffusion_xl = isinstance(
pipeline, (StableDiffusionXLImg2ImgPipeline, StableDiffusionXLInpaintPipeline, StableDiffusionXLPipeline)
)

# Lora
pipeline = _load_lora_weights_to_pipeline(
Expand All @@ -381,7 +385,7 @@ def get_submodels_for_export_stable_diffusion(

# Text encoders
if pipeline.text_encoder is not None:
if is_sdxl or output_hidden_states:
if is_stable_diffusion_xl or output_hidden_states:
pipeline.text_encoder.config.output_hidden_states = True
models_for_export.append((DIFFUSION_MODEL_TEXT_ENCODER_NAME, copy.deepcopy(pipeline.text_encoder)))

Expand All @@ -400,7 +404,7 @@ def get_submodels_for_export_stable_diffusion(
# Replace original cross-attention module with custom cross-attention module for better performance
# For applying optimized attention score, we need to set env variable `NEURON_FUSE_SOFTMAX=1`
if os.environ.get("NEURON_FUSE_SOFTMAX") == "1":
if is_sdxl:
if is_stable_diffusion_xl:
logger.info("Applying optimized attention score computation for sdxl.")
Attention.get_attention_scores = get_attention_scores_sdxl
else:
Expand Down
1 change: 1 addition & 0 deletions optimum/neuron/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -852,6 +852,7 @@ def _export(
trust_remote_code=trust_remote_code,
subfolder=subfolder,
revision=revision,
library_name=cls.library_name,
force_download=force_download,
local_files_only=local_files_only,
token=token,
Expand Down
Loading

0 comments on commit 08c2d35

Please sign in to comment.