Skip to content

Commit

Permalink
Set up tgi environment values with the ones used to build the model (#…
Browse files Browse the repository at this point in the history
…529)

* Set up tgi environment values with the ones used to build the model

Need this to workaround the model static params, for the docker entrypoint to adapt tgi environment accordingly to the specified model
This will make usage of the image easier: default params (e.g not specifying anything) should be enough for most models

Signed-off-by: Raphael Glon <[email protected]>

* fixes

Signed-off-by: Raphael Glon <[email protected]>

* fixes

Signed-off-by: Raphael Glon <[email protected]>

* minor: logging

Signed-off-by: Raphael Glon <[email protected]>

* Integration tests for inf2 + tgi_env wrapper

Signed-off-by: Raphael Glon <[email protected]>

* Github ci worklow

Signed-off-by: Raphael Glon <[email protected]>

* To run on github ci we cannot share a volume but need to embed the model within an image built on the flight

Signed-off-by: Raphael Glon <[email protected]>

* More flexible on expected outputs

Signed-off-by: Raphael Glon <[email protected]>

* Be more flexible about compiler version

Signed-off-by: Raphael Glon <[email protected]>

* Refacto

bump dev version,
use a single workflow for TGI,
simplify a bit the implicit env test

Signed-off-by: Raphael Glon <[email protected]>

* Misc fixes

Signed-off-by: Raphael Glon <[email protected]>

---------

Signed-off-by: Raphael Glon <[email protected]>
Co-authored-by: Raphael Glon <[email protected]>
  • Loading branch information
oOraph and oOraph authored Apr 9, 2024
1 parent 1f049e1 commit bb1cc96
Show file tree
Hide file tree
Showing 8 changed files with 387 additions and 42 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/test_inf2_tgi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,8 @@ jobs:
sudo apt install gawk -y
source aws_neuron_venv_pytorch/bin/activate
HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} make tgi_test
- name: Run integration tests
shell: bash
run: |
source aws_neuron_venv_pytorch/bin/activate
HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} make tgi_docker_test
2 changes: 1 addition & 1 deletion optimum/neuron/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "0.0.21.dev0"
__version__ = "0.0.22.dev0"

__sdk_version__ = "2.18.0"
18 changes: 9 additions & 9 deletions text-generation-inference/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,12 @@ RUN apt-get update -y \
&& apt-get clean
RUN pip3 --no-cache-dir install --upgrade pip

# VERSION is a mandatory parameter
ARG VERSION
RUN test -n ${VERSION:?}

# Python server build image
FROM base AS pyserver

RUN apt-get update -y \
ARG VERSION

RUN test -n ${VERSION:?} && apt-get update -y \
&& apt-get install -y --no-install-recommends \
make \
python3-venv \
Expand All @@ -77,8 +75,10 @@ RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto VERSION=${VERSIO
# Neuron base image (used for deployment)
FROM base AS neuron

ARG VERSION

# Install system prerequisites
RUN apt-get update -y \
RUN test -n ${VERSION:?} && apt-get update -y \
&& apt-get install -y --no-install-recommends \
gnupg2 \
wget \
Expand Down Expand Up @@ -108,7 +108,7 @@ RUN pip3 install \

# Install HuggingFace packages
RUN pip3 install \
hf_transfer
hf_transfer huggingface_hub

# Install optimum-neuron
COPY dist/optimum-neuron-${VERSION}.tar.gz optimum-neuron.tar.gz
Expand Down Expand Up @@ -137,6 +137,6 @@ ENTRYPOINT ["./entrypoint.sh"]

# Final image
FROM neuron

ENTRYPOINT ["text-generation-launcher"]
COPY text-generation-inference/tgi-entrypoint.sh text-generation-inference/tgi_env.py /
ENTRYPOINT ["/tgi-entrypoint.sh"]
CMD ["--json-output"]
81 changes: 56 additions & 25 deletions text-generation-inference/integration-tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import asyncio
import contextlib
import logging
import os
import random
import shlex
import subprocess
import re
import string
import sys
import tempfile
import time
from tempfile import TemporaryDirectory
from typing import List

import docker
Expand All @@ -17,9 +18,10 @@
from text_generation.types import Response


LOG = logging.getLogger(__file__)
DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "neuronx-tgi:latest")
HUGGING_FACE_HUB_TOKEN = os.getenv("HUGGING_FACE_HUB_TOKEN", None)
DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", "/data")
DOCKER_LOCAL_DIR_PATTERN = re.compile(r"^/data/(.*)$")


class LauncherHandle:
Expand Down Expand Up @@ -71,15 +73,7 @@ def event_loop():


@pytest.fixture(scope="module")
def data_volume():
tmpdir = TemporaryDirectory()
yield tmpdir.name
# Cleanup the temporary directory using sudo as it contains root files created by the container
subprocess.run(shlex.split(f"sudo rm -rf {tmpdir.name}"))


@pytest.fixture(scope="module")
def launcher(event_loop, data_volume):
def launcher(event_loop):
@contextlib.contextmanager
def docker_launcher(
model_id: str,
Expand Down Expand Up @@ -112,33 +106,70 @@ def docker_launcher(
if var in os.environ:
env[var] = os.environ[var]

volumes = [f"{data_volume}:/data"]
# Workaround to bypass docker dind issues preventing to share a volume from the container running tests
# to another
m = DOCKER_LOCAL_DIR_PATTERN.match(model_id)
if m:
local_dir = model_id
real_model_id = m.group(1)
docker_content = f"""
FROM {DOCKER_IMAGE}
RUN huggingface-cli download --local-dir {local_dir} {real_model_id}
"""

docker_tag = "awesome-workaround:{}".format(
"".join(random.choice(string.ascii_lowercase + string.digits) for _ in range(5))
)
LOG.info(
"Building image on the flight derivated from %s, tagged with %s",
DOCKER_IMAGE,
docker_tag,
)
with tempfile.NamedTemporaryFile() as f:
f.write(docker_content.encode("utf-8"))
f.flush()
image, logs = client.images.build(path=".", dockerfile=f.name, tag=docker_tag)
LOG.info("Successfully built image %s", image.id)
LOG.debug("Build logs %s", logs)
else:
docker_tag = DOCKER_IMAGE
image = None

container = client.containers.run(
DOCKER_IMAGE,
docker_tag,
command=args,
name=container_name,
environment=env,
auto_remove=False,
detach=True,
devices=["/dev/neuron0"],
volumes=volumes,
ports={"80/tcp": port},
shm_size="1G",
)

yield ContainerLauncherHandle(client, container.name, port)

try:
container.stop()
container.wait()
except NotFound:
pass

container_output = container.logs().decode("utf-8")
print(container_output, file=sys.stderr)

container.remove()
try:
container.stop()
container.wait()
except NotFound:
pass
container_output = container.logs().decode("utf-8")
print(container_output, file=sys.stderr)

container.remove()
finally:
# Cleanup the build image
if image:
LOG.info("Cleaning image %s", image.id)
try:
image.remove(force=True)
except NotFound:
pass
except Exception as e:
LOG.error("Error while removing image %s, skiping", image.id)
LOG.exception(e)

return docker_launcher

Expand Down
9 changes: 2 additions & 7 deletions text-generation-inference/integration-tests/test_gpt2.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os

import huggingface_hub
import Levenshtein
import pytest

Expand All @@ -13,7 +12,7 @@


@pytest.fixture(scope="module", params=["hub-neuron", "hub", "local-neuron"])
def model_name_or_path(request, data_volume):
def model_name_or_path(request):
if request.param == "hub":
os.environ["HF_BATCH_SIZE"] = str(BATCH_SIZE)
os.environ["HF_SEQUENCE_LENGTH"] = str(SEQUENCE_LENGTH)
Expand All @@ -22,11 +21,7 @@ def model_name_or_path(request, data_volume):
elif request.param == "hub-neuron":
yield NEURON_MODEL_ID
else:
model_dir = f"gpt2-neuron-{BATCH_SIZE}x{SEQUENCE_LENGTH}x{NUM_CORES}"
local_path = os.path.join(data_volume, model_dir)
huggingface_hub.snapshot_download(NEURON_MODEL_ID, local_dir=local_path)
# Return the path of the model inside the mounted volume
yield os.path.join("/data", model_dir)
yield os.path.join("/data", NEURON_MODEL_ID)


@pytest.fixture(scope="module")
Expand Down
80 changes: 80 additions & 0 deletions text-generation-inference/integration-tests/test_implicit_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import os

import pytest
from text_generation.errors import ValidationError


# These tests will often break as it relies on many factors like the optimum version, the neuronx-cc version,
# and on what is synced in the cache for these specific versions...

MODELS = ["openai-community/gpt2", "aws-neuron/gpt2-neuronx-bs4-seqlen1024"]


@pytest.fixture(scope="module", params=MODELS)
def get_model_and_set_env(request):
# the tgi_env.py script will take care of setting these
for var in [
"MAX_BATCH_SIZE",
"MAX_INPUT_LENGTH",
"MAX_TOTAL_TOKEN",
"HF_BATCH_SIZE",
"HF_NUM_CORES",
"HF_SEQUENCE_LENGTH",
"HF_AUTO_CAST_TYPE",
]:
if var in os.environ:
del os.environ[var]
yield request.param


@pytest.fixture(scope="module")
def tgi_service(launcher, get_model_and_set_env):
with launcher(get_model_and_set_env) as tgi_service:
yield tgi_service


@pytest.fixture(scope="module")
async def tgi_client(tgi_service):
await tgi_service.health(300)
return tgi_service.client


@pytest.mark.asyncio
async def test_model_single_request(tgi_client):

# Just verify that the generation works, and nothing is raised, with several set of params

# No params
await tgi_client.generate(
"What is Deep Learning?",
)

response = await tgi_client.generate(
"How to cook beans ?",
max_new_tokens=17,
decoder_input_details=True,
)
assert response.details.generated_tokens == 17

# check error
try:
await tgi_client.generate("What is Deep Learning?", max_new_tokens=170000)
except ValidationError:
pass
else:
raise AssertionError(
"The previous text generation request should have failed, "
"because too many tokens were requested, it succeeded"
)

# Sampling
await tgi_client.generate(
"What is Deep Learning?",
do_sample=True,
top_k=50,
top_p=0.9,
repetition_penalty=1.2,
max_new_tokens=1000,
seed=42,
decoder_input_details=True,
)
16 changes: 16 additions & 0 deletions text-generation-inference/tgi-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash
set -e -o pipefail -u

export ENV_FILEPATH=$(mktemp)

trap "rm -f ${ENV_FILEPATH}" EXIT

touch $ENV_FILEPATH

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )

${SCRIPT_DIR}/tgi_env.py $@

source $ENV_FILEPATH

text-generation-launcher $@
Loading

0 comments on commit bb1cc96

Please sign in to comment.