Skip to content

Commit

Permalink
Merge branch 'main' into add-tp-support-t5
Browse files Browse the repository at this point in the history
  • Loading branch information
JingyaHuang committed Sep 19, 2024
2 parents 9cf51f2 + 39ace4d commit e0d22b8
Show file tree
Hide file tree
Showing 16 changed files with 37 additions and 43 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/inference_cache_llm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
EOF
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
sudo apt-get update -y
sudo apt-get install aws-neuronx-tools=2.18.3.0 aws-neuronx-runtime-lib=2.21.41.0-fb1705f5f aws-neuronx-collectives=2.21.46.0-69b77134b -y
sudo apt-get install aws-neuronx-tools=2.19.0.0 aws-neuronx-runtime-lib=2.22.14.0-6e27b8d5b aws-neuronx-collectives=2.22.26.0-17a033bc8 -y
export PATH=/opt/aws/neuron/bin:$PATH
- name: Checkout
uses: actions/checkout@v4
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_inf1_export.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
EOF
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
sudo apt-get update -y
sudo apt-get install aws-neuronx-tools=2.17.1.0 aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 aws-neuronx-collectives=2.20.22.0-c101c322e -y
sudo apt-get install aws-neuronx-tools=2.18.3.0 aws-neuronx-runtime-lib=2.21.41.0-fb1705f5f aws-neuronx-collectives=2.21.46.0-69b77134b -y
export PATH=/opt/aws/neuron/bin:$PATH
- name: Checkout
uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_inf2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
EOF
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
sudo apt-get update -y
sudo apt-get install aws-neuronx-tools=2.17.1.0 aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 aws-neuronx-collectives=2.20.22.0-c101c322e -y
sudo apt-get install aws-neuronx-tools=2.19.0.0 aws-neuronx-runtime-lib=2.22.14.0-6e27b8d5b aws-neuronx-collectives=2.22.26.0-17a033bc8 -y
export PATH=/opt/aws/neuron/bin:$PATH
- name: Checkout
uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_inf2_export.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
EOF
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
sudo apt-get update -y
sudo apt-get install aws-neuronx-tools=2.17.1.0 aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 aws-neuronx-collectives=2.20.22.0-c101c322e -y
sudo apt-get install aws-neuronx-tools=2.19.0.0 aws-neuronx-runtime-lib=2.22.14.0-6e27b8d5b aws-neuronx-collectives=2.22.26.0-17a033bc8 -y
export PATH=/opt/aws/neuron/bin:$PATH
- name: Checkout
uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_inf2_full_export.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
EOF
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
sudo apt-get update -y
sudo apt-get install aws-neuronx-tools=2.17.1.0 aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 aws-neuronx-collectives=2.20.22.0-c101c322e -y
sudo apt-get install aws-neuronx-tools=2.19.0.0 aws-neuronx-runtime-lib=2.22.14.0-6e27b8d5b aws-neuronx-collectives=2.22.26.0-17a033bc8 -y
export PATH=/opt/aws/neuron/bin:$PATH
- name: Checkout
uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_inf2_inference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
EOF
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
sudo apt-get update -y
sudo apt-get install aws-neuronx-tools=2.17.1.0 aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 aws-neuronx-collectives=2.20.22.0-c101c322e -y
sudo apt-get install aws-neuronx-tools=2.19.0.0 aws-neuronx-runtime-lib=2.22.14.0-6e27b8d5b aws-neuronx-collectives=2.22.26.0-17a033bc8 -y
export PATH=/opt/aws/neuron/bin:$PATH
- name: Install cv2 dependencies
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_inf2_tgi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
EOF
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
sudo apt-get update -y
sudo apt-get install aws-neuronx-tools=2.17.1.0 aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 aws-neuronx-collectives=2.20.22.0-c101c322e -y
sudo apt-get install aws-neuronx-tools=2.19.0.0 aws-neuronx-runtime-lib=2.22.14.0-6e27b8d5b aws-neuronx-collectives=2.22.26.0-17a033bc8 -y
export PATH=/opt/aws/neuron/bin:$PATH
- name: Checkout
uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_trainium_common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
EOF
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
sudo apt-get update -y
sudo apt-get install aws-neuronx-tools=2.17.1.0 aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 aws-neuronx-collectives=2.20.22.0-c101c322e -y
sudo apt-get install aws-neuronx-tools=2.19.0.0 aws-neuronx-runtime-lib=2.22.14.0-6e27b8d5b aws-neuronx-collectives=2.22.26.0-17a033bc8 -y
export PATH=/opt/aws/neuron/bin:$PATH
- name: Checkout
uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_trainium_distributed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
EOF
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
sudo apt-get update -y
sudo apt-get install aws-neuronx-tools=2.17.1.0 aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 aws-neuronx-collectives=2.20.22.0-c101c322e -y
sudo apt-get install aws-neuronx-tools=2.19.0.0 aws-neuronx-runtime-lib=2.22.14.0-6e27b8d5b aws-neuronx-collectives=2.22.26.0-17a033bc8 -y
export PATH=/opt/aws/neuron/bin:$PATH
- name: Checkout
uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_trainium_examples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
EOF
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
sudo apt-get update -y
sudo apt-get install aws-neuronx-tools=2.17.1.0 aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 aws-neuronx-collectives=2.20.22.0-c101c322e -y
sudo apt-get install aws-neuronx-tools=2.19.0.0 aws-neuronx-runtime-lib=2.22.14.0-6e27b8d5b aws-neuronx-collectives=2.22.26.0-17a033bc8 -y
export PATH=/opt/aws/neuron/bin:$PATH
- name: Checkout
uses: actions/checkout@v2
Expand Down
19 changes: 5 additions & 14 deletions optimum/neuron/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@

import torch
from huggingface_hub import snapshot_download
from packaging.version import Version
from transformers import CLIPFeatureExtractor, CLIPTokenizer, PretrainedConfig
from transformers.modeling_outputs import ModelOutput

Expand Down Expand Up @@ -62,7 +61,6 @@
)
from .utils.require_utils import requires_torch_neuronx
from .utils.version_utils import get_neuronxcc_version
from .version import __sdk_version__


if is_neuronx_available():
Expand Down Expand Up @@ -351,15 +349,6 @@ def load_model(
"text_encoder_2": text_encoder_2_path,
"controlnet": controlnet_paths,
}
# DataParallel class to use (to remove after neuron sdk 2.20)
if to_neuron:
if Version(__sdk_version__) >= Version("2.20.0"):
raise NameError(
"`WeightSeparatedDataParallel` class should be deprecated when neuron sdk 2.20 is out. Please replace it with `torch_neuronx.DataParallel`."
)
dp_cls = WeightSeparatedDataParallel
else:
dp_cls = torch_neuronx.DataParallel

if data_parallel_mode == "all":
logger.info("Loading the whole pipeline into both Neuron Cores...")
Expand All @@ -372,7 +361,7 @@ def load_model(
submodel = NeuronTracedModel.load_model(
submodel_path, to_neuron=False
) # No need to load to neuron manually when dp
submodel = dp_cls(
submodel = torch_neuronx.DataParallel(
submodel,
[0, 1],
set_dynamic_batching=dynamic_batch_size,
Expand All @@ -395,7 +384,7 @@ def load_model(
unet = NeuronTracedModel.load_model(
unet_path, to_neuron=False
) # No need to load to neuron manually when dp
submodels["unet"] = dp_cls(
submodels["unet"] = torch_neuronx.DataParallel(
unet,
[0, 1],
set_dynamic_batching=dynamic_batch_size,
Expand All @@ -408,7 +397,9 @@ def load_model(
controlnet = NeuronTracedModel.load_model(
controlnet_path, to_neuron=False
) # No need to load to neuron manually when dp
controlnets.append(dp_cls(controlnet, [0, 1], set_dynamic_batching=dynamic_batch_size))
controlnets.append(
torch_neuronx.DataParallel(controlnet, [0, 1], set_dynamic_batching=dynamic_batch_size)
)
if controlnets:
submodels["controlnet"] = controlnets if len(controlnets) > 1 else controlnets[0]
else:
Expand Down
2 changes: 1 addition & 1 deletion optimum/neuron/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@

__version__ = "0.0.25.dev0"

__sdk_version__ = "2.19.1"
__sdk_version__ = "2.20.0"
10 changes: 5 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,13 @@
],
"neuronx": [
"wheel",
"neuronx-cc==2.14.227.0",
"torch-neuronx==2.1.2.2.2.0",
"transformers-neuronx==0.11.351",
"neuronx-cc==2.15.128.0",
"torch-neuronx==2.1.2.2.3.0",
"transformers-neuronx==0.12.313",
"torch==2.1.2.*",
"torchvision==0.16.*",
"neuronx_distributed==0.8.0",
"libneuronxla==2.0.2335",
"neuronx_distributed==0.9.0",
"libneuronxla==2.0.4115.0",
],
"diffusers": ["diffusers>=0.28.0, <0.29.0", "peft"],
"sentence-transformers": ["sentence-transformers >= 2.2.0"],
Expand Down
17 changes: 10 additions & 7 deletions text-generation-inference/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ WORKDIR /usr/src
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse

FROM chef as planner
COPY --from=tgi /tgi/Cargo.lock Cargo.lock
COPY --from=tgi /tgi/Cargo.toml Cargo.toml
COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
COPY --from=tgi /tgi/proto proto
Expand All @@ -33,6 +34,7 @@ RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
COPY --from=planner /usr/src/recipe.json recipe.json
RUN cargo chef cook --release --recipe-path recipe.json

COPY --from=tgi /tgi/Cargo.lock Cargo.lock
COPY --from=tgi /tgi/Cargo.toml Cargo.toml
COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
COPY --from=tgi /tgi/proto proto
Expand Down Expand Up @@ -94,19 +96,20 @@ RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEU
# Install neuronx packages
RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
aws-neuronx-dkms=2.17.17.0 \
aws-neuronx-collectives=2.21.46.0-69b77134b \
aws-neuronx-runtime-lib=2.21.41.0-fb1705f5f \
aws-neuronx-tools=2.18.3.0 \
aws-neuronx-dkms=2.18.12.0 \
aws-neuronx-collectives=2.22.26.0-17a033bc8 \
aws-neuronx-runtime-lib=2.22.14.0-6e27b8d5b \
aws-neuronx-tools=2.19.0.0 \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean

ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"

RUN pip3 install \
neuronx-cc==2.14.227.0 \
torch-neuronx==2.1.2.2.2.0 \
transformers-neuronx==0.11.351 \
neuronx-cc==2.15.128.0 \
torch-neuronx==2.1.2.2.3.0 \
transformers-neuronx==0.12.313 \
libneuronxla==2.0.4115.0 \
--extra-index-url=https://pip.repos.neuron.amazonaws.com

# Install HuggingFace packages
Expand Down
6 changes: 3 additions & 3 deletions text-generation-inference/tests/integration/test_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ async def test_model_single_request(tgi_service):
seed=42,
)
sample_expectations = {
"gpt2": "A lot of researchers have tried to make",
"llama": "Deep Learning is a subset of Artificial Intelligence",
"mistral": "Deep Learning is a kind of machine learning",
"gpt2": "Deep Learning",
"llama": "Deep Learning",
"mistral": "Deep Learning",
}
assert sample_expectations[service_name] in response

Expand Down
6 changes: 3 additions & 3 deletions text-generation-inference/tests/server/test_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ def _test_decode(config_name, generator, do_sample):
assert output.finish_reason == 0
if do_sample:
expected_text = {
"gpt2": " The sun was set just three miles south of the city. I had just watched a big fireworks display",
"llama": " George Orwell, 1984\nThe government is not interested in the truth. They want to control",
"mistral": " The sky was as pale as a white horse's skull. A pigeon flew",
"gpt2": " The sun was set just after eleven and the clouds were still beating down. And as he approached the",
"llama": " George Orwell, 1984\nThe government is tracking your every move, recording your phone calls,",
"mistral": " The sky was as pale as a ghost. The sun had risen as the sun never rose",
}[config_name]
else:
expected_text = {
Expand Down

0 comments on commit e0d22b8

Please sign in to comment.