diff --git a/Makefile b/Makefile index 1ae0aa514..02e82be73 100644 --- a/Makefile +++ b/Makefile @@ -40,19 +40,15 @@ PACKAGE_FILES = $(PACKAGE_PYTHON_FILES) \ $(PACKAGE_DIST) $(PACKAGE_WHEEL): $(PACKAGE_FILES) python -m build -TGI_VERSION ?= 2.1.1 - neuronx-tgi: $(PACKAGE_DIST) docker build --rm -f text-generation-inference/Dockerfile \ --build-arg VERSION=$(VERSION) \ - --build-arg TGI_VERSION=$(TGI_VERSION) \ -t neuronx-tgi:$(VERSION) . docker tag neuronx-tgi:$(VERSION) neuronx-tgi:latest neuronx-tgi-sagemaker: $(PACKAGE_DIST) docker build --rm -f text-generation-inference/Dockerfile \ --build-arg VERSION=$(VERSION) \ - --build-arg TGI_VERSION=$(TGI_VERSION) \ --target sagemaker \ -t neuronx-tgi:$(VERSION) . @@ -90,7 +86,7 @@ test_installs: tgi_server: python -m pip install -r text-generation-inference/server/build-requirements.txt make -C text-generation-inference/server clean - VERSION=${VERSION} TGI_VERSION=${TGI_VERSION} make -C text-generation-inference/server gen-server + VERSION=${VERSION} make -C text-generation-inference/server gen-server tgi_test: tgi_server python -m pip install .[neuronx] diff --git a/docs/source/inference_tutorials/stable_diffusion.mdx b/docs/source/inference_tutorials/stable_diffusion.mdx index b4c64a457..b0e614504 100644 --- a/docs/source/inference_tutorials/stable_diffusion.mdx +++ b/docs/source/inference_tutorials/stable_diffusion.mdx @@ -45,7 +45,6 @@ Here is an example of exporting stable diffusion components with `Optimum` CLI: ```bash optimum-cli export neuron --model stabilityai/stable-diffusion-2-1-base \ - --task stable-diffusion \ --batch_size 1 \ --height 512 `# height in pixels of generated image, eg. 512, 768` \ --width 512 `# width in pixels of generated image, eg. 512, 768` \ @@ -229,7 +228,6 @@ Here is an example of exporting SDXL components with `Optimum` CLI: ```bash optimum-cli export neuron --model stabilityai/stable-diffusion-xl-base-1.0 \ - --task stable-diffusion-xl \ --batch_size 1 \ --height 1024 `# height in pixels of generated image, eg. 768, 1024` \ --width 1024 `# width in pixels of generated image, eg. 768, 1024` \ @@ -481,7 +479,7 @@ Here we will compile the [`stabilityai/sdxl-turbo`](https://huggingface.co/stabi ### Compile SDXL Turbo ```bash -optimum-cli export neuron --model stabilityai/sdxl-turbo --task stable-diffusion-xl --batch_size 1 --height 512 --width 512 --auto_cast matmul --auto_cast_type bf16 sdxl_turbo_neuron/ +optimum-cli export neuron --model stabilityai/sdxl-turbo --batch_size 1 --height 512 --width 512 --auto_cast matmul --auto_cast_type bf16 sdxl_turbo_neuron/ ``` ### Text-to-Image @@ -562,7 +560,7 @@ We can either compile one or multiple ControlNet via the Optimum CLI or programa * Export via the Optimum CLI ```bash -optimum-cli export neuron -m runwayml/stable-diffusion-v1-5 --task stable-diffusion --batch_size 1 --height 512 --width 512 --controlnet_ids lllyasviel/sd-controlnet-canny --num_images_per_prompt 1 sd_neuron_controlnet/ +optimum-cli export neuron -m runwayml/stable-diffusion-v1-5 --batch_size 1 --height 512 --width 512 --controlnet_ids lllyasviel/sd-controlnet-canny --num_images_per_prompt 1 sd_neuron_controlnet/ ``` * Export via Python API diff --git a/optimum/commands/export/neuron.py b/optimum/commands/export/neuron.py index 5172fdb54..f56cd794b 100644 --- a/optimum/commands/export/neuron.py +++ b/optimum/commands/export/neuron.py @@ -46,13 +46,6 @@ def parse_args_neuron(parser: "ArgumentParser"): f" {str(list(TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS.keys()) + list(TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS.keys()))}." ), ) - optional_group.add_argument( - "--library-name", - type=str, - choices=["transformers", "sentence_transformers"], - default=None, - help=("The library on the model. If not provided, will attempt to infer the local checkpoint's library."), - ) optional_group.add_argument( "--subfolder", type=str, diff --git a/optimum/commands/export/neuronx.py b/optimum/commands/export/neuronx.py index 8458121d7..6e8650b76 100644 --- a/optimum/commands/export/neuronx.py +++ b/optimum/commands/export/neuronx.py @@ -53,13 +53,6 @@ def parse_args_neuronx(parser: "ArgumentParser"): f" {str(list(TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS.keys()) + list(TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS.keys()))}." ), ) - optional_group.add_argument( - "--library-name", - type=str, - choices=["transformers", "diffusers", "sentence_transformers"], - default=None, - help=("The library of the model." " If not provided, will attempt to infer the local checkpoint's library."), - ) optional_group.add_argument( "--subfolder", type=str, diff --git a/optimum/exporters/neuron/__main__.py b/optimum/exporters/neuron/__main__.py index cfcc4dec9..efcc47ce7 100644 --- a/optimum/exporters/neuron/__main__.py +++ b/optimum/exporters/neuron/__main__.py @@ -264,7 +264,7 @@ def get_submodels_and_neuron_configs( tensor_parallel_size: int, task: str, output: Path, - library_name: Optional[str] = None, + library_name: str, subfolder: str = "", dynamic_batch_size: bool = False, model_name_or_path: Optional[Union[str, Path]] = None, @@ -277,19 +277,17 @@ def get_submodels_and_neuron_configs( lora_scales: Optional[Union[float, List[float]]] = None, controlnet_ids: Optional[Union[str, List[str]]] = None, ): - is_stable_diffusion = "stable-diffusion" in task is_encoder_decoder = ( getattr(model.config, "is_encoder_decoder", False) if isinstance(model.config, PretrainedConfig) else False ) - if is_stable_diffusion: + if library_name == "diffusers": # TODO: Enable optional outputs for Stable Diffusion if output_attentions: raise ValueError(f"`output_attentions`is not supported by the {task} task yet.") models_and_neuron_configs, output_model_names = _get_submodels_and_neuron_configs_for_stable_diffusion( model=model, input_shapes=input_shapes, - task=task, output=output, dynamic_batch_size=dynamic_batch_size, submodels=submodels, @@ -357,7 +355,6 @@ def _normalize_lora_params(lora_model_ids, lora_weight_names, lora_adapter_names def _get_submodels_and_neuron_configs_for_stable_diffusion( model: Union["PreTrainedModel", "DiffusionPipeline"], input_shapes: Dict[str, int], - task: str, output: Path, dynamic_batch_size: bool = False, submodels: Optional[Dict[str, Union[Path, str]]] = None, @@ -395,7 +392,6 @@ def _get_submodels_and_neuron_configs_for_stable_diffusion( ) models_and_neuron_configs = get_stable_diffusion_models_for_export( pipeline=model, - task=task, text_encoder_input_shapes=input_shapes["text_encoder"], unet_input_shapes=input_shapes["unet"], vae_encoder_input_shapes=input_shapes["vae_encoder"], @@ -481,6 +477,7 @@ def load_models_and_neuron_configs( trust_remote_code: bool, subfolder: str, revision: str, + library_name: str, force_download: bool, local_files_only: bool, token: Optional[Union[bool, str]], @@ -492,13 +489,8 @@ def load_models_and_neuron_configs( controlnet_ids: Optional[Union[str, List[str]]] = None, output_attentions: bool = False, output_hidden_states: bool = False, - library_name: Optional[str] = None, **input_shapes, ): - library_name = TasksManager.infer_library_from_model( - model_name_or_path, subfolder=subfolder, library_name=library_name - ) - model_kwargs = { "task": task, "model_name_or_path": model_name_or_path, @@ -575,6 +567,10 @@ def main_export( output.parent.mkdir(parents=True) task = TasksManager.map_from_synonym(task) + if library_name is None: + library_name = TasksManager.infer_library_from_model( + model_name_or_path, revision=revision, cache_dir=cache_dir, token=token + ) models_and_neuron_configs, output_model_names = load_models_and_neuron_configs( model_name_or_path=model_name_or_path, @@ -587,13 +583,13 @@ def main_export( trust_remote_code=trust_remote_code, subfolder=subfolder, revision=revision, + library_name=library_name, force_download=force_download, local_files_only=local_files_only, token=token, submodels=submodels, output_attentions=output_attentions, output_hidden_states=output_hidden_states, - library_name=library_name, lora_model_ids=lora_model_ids, lora_weight_names=lora_weight_names, lora_adapter_names=lora_adapter_names, @@ -616,8 +612,7 @@ def main_export( # Validate compiled model if do_validation is True: - is_stable_diffusion = "stable-diffusion" in task - if is_stable_diffusion: + if library_name == "diffusers": # Do not validate vae encoder due to the sampling randomness neuron_outputs.pop("vae_encoder") models_and_neuron_configs.pop("vae_encoder", None) @@ -686,13 +681,12 @@ def main(): args = parser.parse_args() task = infer_task(args.task, args.model) - is_stable_diffusion = "stable-diffusion" in task - is_sentence_transformers = args.library_name == "sentence_transformers" + library_name = TasksManager.infer_library_from_model(args.model, cache_dir=args.cache_dir) - if is_stable_diffusion: + if library_name == "diffusers": input_shapes = normalize_stable_diffusion_input_shapes(args) submodels = {"unet": args.unet} - elif is_sentence_transformers: + elif library_name == "sentence_transformers": input_shapes = normalize_sentence_transformers_input_shapes(args) submodels = None else: @@ -737,7 +731,7 @@ def main(): subfolder=args.subfolder, do_validation=not args.disable_validation, submodels=submodels, - library_name=args.library_name, + library_name=library_name, lora_model_ids=getattr(args, "lora_model_ids", None), lora_weight_names=getattr(args, "lora_weight_names", None), lora_adapter_names=getattr(args, "lora_adapter_names", None), diff --git a/optimum/exporters/neuron/convert.py b/optimum/exporters/neuron/convert.py index 51607f82e..1f93590fc 100644 --- a/optimum/exporters/neuron/convert.py +++ b/optimum/exporters/neuron/convert.py @@ -592,7 +592,7 @@ def add_stable_diffusion_compiler_args(config, compiler_args): compiler_args.append("--enable-fast-loading-neuron-binaries") # unet or controlnet if "unet" in identifier or "controlnet" in identifier: - # SDXL unet doesn't support fast loading neuron binaries + # SDXL unet doesn't support fast loading neuron binaries(sdk 2.19.1) if not getattr(config, "is_sdxl", False): compiler_args.append("--enable-fast-loading-neuron-binaries") compiler_args.append("--model-type=unet-inference") diff --git a/optimum/exporters/neuron/model_configs.py b/optimum/exporters/neuron/model_configs.py index 1f59c9031..80973af57 100644 --- a/optimum/exporters/neuron/model_configs.py +++ b/optimum/exporters/neuron/model_configs.py @@ -383,12 +383,16 @@ class LevitNeuronConfig(ViTNeuronConfig): pass -@register_in_tasks_manager("mobilenet-v2", *["feature-extraction", "image-classification", "semantic-segmentation"]) +@register_in_tasks_manager( + "mobilenet-v2", *["feature-extraction", "image-classification", "semantic-segmentation", "image-segmentation"] +) class MobileNetV2NeuronConfig(ViTNeuronConfig): pass -@register_in_tasks_manager("mobilevit", *["feature-extraction", "image-classification", "semantic-segmentation"]) +@register_in_tasks_manager( + "mobilevit", *["feature-extraction", "image-classification", "semantic-segmentation", "image-segmentation"] +) class MobileViTNeuronConfig(ViTNeuronConfig): pass diff --git a/optimum/exporters/neuron/utils.py b/optimum/exporters/neuron/utils.py index c757b1b8f..7c0db9ffb 100644 --- a/optimum/exporters/neuron/utils.py +++ b/optimum/exporters/neuron/utils.py @@ -53,7 +53,15 @@ f"We found an older version of diffusers {_diffusers_version} but we require diffusers to be >= {DIFFUSERS_MINIMUM_VERSION}. " "Please update diffusers by running `pip install --upgrade diffusers`" ) - from diffusers import ControlNetModel, UNet2DConditionModel + from diffusers import ( + ControlNetModel, + ModelMixin, + StableDiffusionPipeline, + StableDiffusionXLImg2ImgPipeline, + StableDiffusionXLInpaintPipeline, + StableDiffusionXLPipeline, + UNet2DConditionModel, + ) from diffusers.models.attention_processor import Attention @@ -62,9 +70,6 @@ from .base import NeuronDefaultConfig - if is_diffusers_available(): - from diffusers import ModelMixin, StableDiffusionPipeline, StableDiffusionXLImg2ImgPipeline - def build_stable_diffusion_components_mandatory_shapes( batch_size: Optional[int] = None, @@ -108,8 +113,7 @@ def build_stable_diffusion_components_mandatory_shapes( def get_stable_diffusion_models_for_export( - pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLImg2ImgPipeline"], - task: str, + pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLPipeline"], text_encoder_input_shapes: Dict[str, int], unet_input_shapes: Dict[str, int], vae_encoder_input_shapes: Dict[str, int], @@ -130,10 +134,8 @@ def get_stable_diffusion_models_for_export( performance benefit (CLIP text encoder, VAE encoder, VAE decoder, Unet). Args: - pipeline ([`Union["StableDiffusionPipeline", "StableDiffusionXLImg2ImgPipeline"]`]): + pipeline ([`Union["StableDiffusionPipeline", "StableDiffusionXLPipeline"]`]): The model to export. - task (`str`): - Task name, should be either "stable-diffusion" or "stable-diffusion-xl". text_encoder_input_shapes (`Dict[str, int]`): Static shapes used for compiling text encoder. unet_input_shapes (`Dict[str, int]`): @@ -166,7 +168,6 @@ def get_stable_diffusion_models_for_export( """ models_for_export = get_submodels_for_export_stable_diffusion( pipeline=pipeline, - task=task, lora_model_ids=lora_model_ids, lora_weight_names=lora_weight_names, lora_adapter_names=lora_adapter_names, @@ -226,8 +227,10 @@ def get_stable_diffusion_models_for_export( dynamic_batch_size=dynamic_batch_size, **unet_input_shapes, ) - if task == "stable-diffusion-xl": - unet_neuron_config.is_sdxl = True + is_stable_diffusion_xl = isinstance( + pipeline, (StableDiffusionXLImg2ImgPipeline, StableDiffusionXLInpaintPipeline, StableDiffusionXLPipeline) + ) + unet_neuron_config.is_sdxl = is_stable_diffusion_xl unet_neuron_config.with_controlnet = True if controlnet_ids else False @@ -296,7 +299,7 @@ def get_stable_diffusion_models_for_export( def _load_lora_weights_to_pipeline( - pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLImg2ImgPipeline"], + pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLPipeline"], lora_model_ids: Optional[Union[str, List[str]]] = None, weight_names: Optional[Union[str, List[str]]] = None, adapter_names: Optional[Union[str, List[str]]] = None, @@ -350,8 +353,7 @@ def load_controlnets(controlnet_ids: Optional[Union[str, List[str]]] = None): def get_submodels_for_export_stable_diffusion( - pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLImg2ImgPipeline"], - task: str, + pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLPipeline"], output_hidden_states: bool = False, lora_model_ids: Optional[Union[str, List[str]]] = None, lora_weight_names: Optional[Union[str, List[str]]] = None, @@ -362,7 +364,9 @@ def get_submodels_for_export_stable_diffusion( """ Returns the components of a Stable Diffusion model. """ - is_sdxl = "xl" in task + is_stable_diffusion_xl = isinstance( + pipeline, (StableDiffusionXLImg2ImgPipeline, StableDiffusionXLInpaintPipeline, StableDiffusionXLPipeline) + ) # Lora pipeline = _load_lora_weights_to_pipeline( @@ -381,7 +385,7 @@ def get_submodels_for_export_stable_diffusion( # Text encoders if pipeline.text_encoder is not None: - if is_sdxl or output_hidden_states: + if is_stable_diffusion_xl or output_hidden_states: pipeline.text_encoder.config.output_hidden_states = True models_for_export.append((DIFFUSION_MODEL_TEXT_ENCODER_NAME, copy.deepcopy(pipeline.text_encoder))) @@ -400,7 +404,7 @@ def get_submodels_for_export_stable_diffusion( # Replace original cross-attention module with custom cross-attention module for better performance # For applying optimized attention score, we need to set env variable `NEURON_FUSE_SOFTMAX=1` if os.environ.get("NEURON_FUSE_SOFTMAX") == "1": - if is_sdxl: + if is_stable_diffusion_xl: logger.info("Applying optimized attention score computation for sdxl.") Attention.get_attention_scores = get_attention_scores_sdxl else: diff --git a/optimum/neuron/modeling_diffusion.py b/optimum/neuron/modeling_diffusion.py index 78362daab..d32704cc9 100644 --- a/optimum/neuron/modeling_diffusion.py +++ b/optimum/neuron/modeling_diffusion.py @@ -852,6 +852,7 @@ def _export( trust_remote_code=trust_remote_code, subfolder=subfolder, revision=revision, + library_name=cls.library_name, force_download=force_download, local_files_only=local_files_only, token=token, diff --git a/optimum/neuron/modeling_traced.py b/optimum/neuron/modeling_traced.py index a22cc18c7..a8235c029 100644 --- a/optimum/neuron/modeling_traced.py +++ b/optimum/neuron/modeling_traced.py @@ -247,7 +247,6 @@ def _export( config: "PretrainedConfig", token: Optional[Union[bool, str]] = None, revision: Optional[str] = None, - library_name: Optional[str] = None, force_download: bool = False, cache_dir: Optional[str] = None, compiler_workdir: Optional[Union[str, Path]] = None, @@ -275,7 +274,6 @@ def _export( if task is None: task = TasksManager.infer_task_from_model(cls.auto_model_class) task = TasksManager.map_from_synonym(task) - library_name = TasksManager.infer_library_from_model(model_id, subfolder=subfolder, library_name=library_name) # Get compilation arguments if is_neuron_available() and dynamic_batch_size is True and "batch_size" in kwargs_shapes: @@ -320,10 +318,9 @@ def _export( model_name_or_path=model_id, subfolder=subfolder, revision=revision, - framework="pt", - library_name=library_name, cache_dir=cache_dir, token=token, + framework="pt", local_files_only=local_files_only, force_download=force_download, trust_remote_code=trust_remote_code, @@ -361,7 +358,6 @@ def _export( local_files_only=local_files_only, token=token, do_validation=False, - library_name=library_name, **kwargs_shapes, ) config = AutoConfig.from_pretrained(save_dir_path) diff --git a/setup.py b/setup.py index de4cea536..45bb37672 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ INSTALL_REQUIRES = [ "transformers == 4.43.2", "accelerate == 0.29.2", - "optimum ~= 1.21.0", + "optimum ~= 1.22.0", "huggingface_hub >= 0.20.1", "numpy>=1.22.2, <=1.25.2", "protobuf<4", diff --git a/tests/cli/test_export_cli.py b/tests/cli/test_export_cli.py index 72f84a50c..cdce97e30 100644 --- a/tests/cli/test_export_cli.py +++ b/tests/cli/test_export_cli.py @@ -152,8 +152,6 @@ def test_stable_diffusion(self): "neuron", "--model", model_id, - "--task", - "stable-diffusion", "--batch_size", "1", "--height", @@ -186,8 +184,6 @@ def test_stable_diffusion_multi_lora(self): "neuron", "--model", model_id, - "--task", - "stable-diffusion", "--batch_size", "1", "--height", @@ -226,8 +222,6 @@ def test_stable_diffusion_single_controlnet(self): "neuron", "--model", model_id, - "--task", - "stable-diffusion", "--batch_size", "1", "--height", @@ -259,8 +253,6 @@ def test_stable_diffusion_xl(self): "neuron", "--model", model_id, - "--task", - "stable-diffusion-xl", "--batch_size", "1", "--height", @@ -293,8 +285,6 @@ def test_replace_unet(self): model_id, "--unet", unet_id, - "--task", - "stable-diffusion-xl", "--batch_size", "1", "--height", diff --git a/tests/exporters/test_export.py b/tests/exporters/test_export.py index 316485e7b..28aeb219f 100644 --- a/tests/exporters/test_export.py +++ b/tests/exporters/test_export.py @@ -225,7 +225,8 @@ def test_export_for_stable_diffusion_models(self, model_id): models_and_neuron_configs, output_model_names = get_submodels_and_neuron_configs( model=model, input_shapes=input_shapes, - task="stable-diffusion", + task="text-to-image", + library_name="diffusers", output=Path(tmpdirname), model_name_or_path=model_id, ) @@ -255,7 +256,8 @@ def test_export_for_stable_diffusion_xl_models(self, model_id): models_and_neuron_configs, output_model_names = get_submodels_and_neuron_configs( model=model, input_shapes=input_shapes, - task="stable-diffusion-xl", + task="text-to-image", + library_name="diffusers", output=Path(tmpdirname), model_name_or_path=model_id, ) @@ -286,7 +288,8 @@ def test_export_sd_with_fused_lora_weights(self): models_and_neuron_configs, output_model_names = get_submodels_and_neuron_configs( model=model, input_shapes=input_shapes, - task="stable-diffusion", + task="text-to-image", + library_name="diffusers", output=Path(tmpdirname), model_name_or_path=model_id, lora_model_ids=lora_params[0], @@ -328,6 +331,7 @@ def test_export_encoder_decoder_models(self, model_name, model_id): input_shapes=input_shapes, task="text2text-generation", output=Path(tmpdirname), + library_name="transformers", model_name_or_path=model_id, output_attentions=True, output_hidden_states=True, diff --git a/tests/inference/test_modeling.py b/tests/inference/test_modeling.py index 21a88ccab..51c873db0 100644 --- a/tests/inference/test_modeling.py +++ b/tests/inference/test_modeling.py @@ -106,7 +106,11 @@ def test_load_model_from_hub(self): def test_load_model_from_hub_subfolder(self): model = NeuronModelForSequenceClassification.from_pretrained( - self.TINY_SUBFOLDER_MODEL_ID, subfolder="my_subfolder", export=True, **self.STATIC_INPUTS_SHAPES + self.TINY_SUBFOLDER_MODEL_ID, + subfolder="my_subfolder", + export=True, + library_name="transformers", + **self.STATIC_INPUTS_SHAPES, ) self.assertIsInstance(model.model, torch.jit._script.ScriptModule) self.assertIsInstance(model.config, PretrainedConfig) @@ -429,7 +433,7 @@ def test_sentence_transformers_clip(self, model_arch): } neuron_model = self.NEURON_MODEL_CLASS.from_pretrained( - model_id, subfolder="0_CLIPModel", export=True, library_name="sentence_transformers", **input_shapes + model_id, subfolder="0_CLIPModel", export=True, **input_shapes ) self.assertIsInstance(neuron_model.model, torch.jit._script.ScriptModule) self.assertIsInstance(neuron_model.config, PretrainedConfig) diff --git a/text-generation-inference/Dockerfile b/text-generation-inference/Dockerfile index ac23d637e..8dab8b378 100644 --- a/text-generation-inference/Dockerfile +++ b/text-generation-inference/Dockerfile @@ -1,7 +1,6 @@ -# Fetch and extract the TGI sources (TGI_VERSION is mandatory) +# Fetch and extract the TGI sources FROM alpine AS tgi -ARG TGI_VERSION -RUN test -n ${TGI_VERSION:?} +ARG TGI_VERSION=2.1.1 RUN mkdir -p /tgi ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v${TGI_VERSION}.tar.gz /tgi/sources.tar.gz RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1 @@ -13,7 +12,7 @@ WORKDIR /usr/src ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse -FROM chef as planner +FROM chef AS planner COPY --from=tgi /tgi/Cargo.lock Cargo.lock COPY --from=tgi /tgi/Cargo.toml Cargo.toml COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml @@ -100,6 +99,7 @@ RUN apt-get update -y \ aws-neuronx-collectives=2.22.26.0-17a033bc8 \ aws-neuronx-runtime-lib=2.22.14.0-6e27b8d5b \ aws-neuronx-tools=2.19.0.0 \ + libxml2 \ && rm -rf /var/lib/apt/lists/* \ && apt-get clean @@ -117,7 +117,7 @@ RUN pip3 install \ hf_transfer huggingface_hub # Install optimum-neuron -COPY dist/optimum_neuron-${VERSION}.tar.gz optimum-neuron.tar.gz +COPY ./dist/optimum_neuron-${VERSION}.tar.gz optimum-neuron.tar.gz RUN pip3 install optimum-neuron.tar.gz # TGI base env @@ -134,7 +134,7 @@ COPY --from=pyserver /pyserver/build/dist dist RUN pip install dist/text_generation_server*.tar.gz # AWS Sagemaker compatible image -FROM neuron as sagemaker +FROM neuron AS sagemaker COPY text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh RUN chmod +x entrypoint.sh diff --git a/text-generation-inference/server/Makefile b/text-generation-inference/server/Makefile index 2637f01d1..9b748e48f 100644 --- a/text-generation-inference/server/Makefile +++ b/text-generation-inference/server/Makefile @@ -2,7 +2,7 @@ pkg_name := text_generation_server BUILDDIR ?= $(CURDIR)/build VERSION ?= 0.0.1 -TGI_VERSION ?= 2.0.2 +TGI_VERSION ?= 2.1.1 mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST))) mkfile_dir := $(dir $(mkfile_path)) pkg_dir := $(BUILDDIR)/$(pkg_name)