Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add TGI tests and CI workflow #355

Merged
merged 6 commits into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions .github/workflows/test_inf2_tgi.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Optimum neuron / Test TGI on INF2

on:
push:
branches: [ main ]
paths:
- "setup.py"
- "optimum/**.py"
- "text-generation-inference/**"
pull_request:
branches: [ main ]
paths:
- "setup.py"
- "optimum/**.py"
- "text-generation-inference/**"

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

jobs:
do-the-job:
name: Run TGI tests
runs-on: [self-hosted, 1-aws-inf2, 32-cpu, ci] # run the job on the newly created runner
env:
AWS_REGION: us-east-1
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Install python and create venv
run: |
sudo apt install python3.8-venv -y
python3 -m venv aws_neuron_venv_pytorch
source aws_neuron_venv_pytorch/bin/activate
python -m pip install -U pip
python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
- name: Run TGI server python tests
run: |
source aws_neuron_venv_pytorch/bin/activate
HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} make tgi_test
16 changes: 15 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ clean:

rwildcard=$(wildcard $1) $(foreach d,$1,$(call rwildcard,$(addsuffix /$(notdir $d),$(wildcard $(dir $d)*))))

VERSION := $(shell python -W ignore -c "from optimum.neuron.version import __version__; print(__version__)")
VERSION := $(shell gawk 'match($$0, /__version__ = "(.*)"/, a) {print a[1]}' optimum/neuron/version.py)

PACKAGE_DIST = dist/optimum-neuron-$(VERSION).tar.gz
PACKAGE_WHEEL = dist/optimum_neuron-$(VERSION)-py3-none-any.whl
Expand Down Expand Up @@ -71,6 +71,20 @@ build_dist: ${PACKAGE_DIST} ${PACKAGE_WHEEL}
pypi_upload: ${PACKAGE_DIST} ${PACKAGE_WHEEL}
python -m twine upload ${PACKAGE_DIST} ${PACKAGE_WHEEL}

# Tests

test_installs:
python -m pip install .[tests]
python -m pip install git+https://github.com/huggingface/transformers.git

# Stand-alone TGI server for unit tests outside of TGI container
tgi_server:
python -m pip install -r text-generation-inference/server/build-requirements.txt
make -C text-generation-inference/server clean
VERSION=${VERSION} make -C text-generation-inference/server gen-server

tgi_test: tgi_server
python -m pip install .[neuronx] pytest
find text-generation-inference -name "text_generation_server-$(VERSION)-py3-none-any.whl" \
-exec python -m pip install --force-reinstall {} \;
python -m pytest -s text-generation-inference/tests
10 changes: 5 additions & 5 deletions text-generation-inference/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ RUN apt-get update -y \
&& apt-get clean
RUN pip3 --no-cache-dir install --upgrade pip

# VERSION is a mandatory parameter
ARG VERSION
RUN test -n ${VERSION:?}

# Python server build image
FROM base AS pyserver

Expand All @@ -66,15 +70,11 @@ WORKDIR /pyserver
COPY text-generation-inference/server server
COPY --from=tgi /tgi/proto proto
RUN pip3 install -r server/build-requirements.txt
RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server
RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto VERSION=${VERSION} make -C server gen-server

# Neuron base image (used for deployment)
FROM base AS neuron

# VERSION is a mandatory parameter
ARG VERSION
RUN test -n ${VERSION:?}

# Install system prerequisites
RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
Expand Down
32 changes: 22 additions & 10 deletions text-generation-inference/server/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Initialize base variables
pkg_name := text_generation_server
BUILDDIR ?= $(CURDIR)/build_$(pkg_name)
BUILDDIR ?= $(CURDIR)/build
VERSION ?= 0.0.1
mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST)))
mkfile_dir := $(dir $(mkfile_path))
pkg_dir := $(BUILDDIR)/$(pkg_name)
Expand All @@ -13,14 +14,6 @@ src_dir := $(mkfile_dir)/$(pkg_name)
sources := $(wildcard $(src_dir)/*.py)
deployed_sources := $(subst $(src_dir), $(pkg_dir), $(sources))

# Three python files are generated for each protobuf
protobufs := $(wildcard $(PROTODIR)/*.proto)
pkg_pb_dir := $(pkg_dir)/pb
generated_sources_base := $(foreach proto, $(protobufs), $(proto:.proto=_pb2.py))
generated_sources := $(subst $(PROTODIR), $(pkg_pb_dir), $(generated_sources_base))
generated_sources += $(subst $(PROTODIR), $(pkg_pb_dir), $(generated_sources_base:.py=.pyi))
generated_sources += $(subst $(PROTODIR), $(pkg_pb_dir), $(generated_sources_base:.py=_grpc.py))

# Static files are just copied

define COPY
Expand All @@ -30,18 +23,37 @@ endef
$(BUILDDIR)/pyproject.toml: $(mkfile_dir)/pyproject.toml
mkdir -p $(BUILDDIR)
$(COPY)
sed -i -e 's/version = "VERSION"/version = \"${VERSION}\"/' $@

$(pkg_dir)/%.py: $(src_dir)/%.py
mkdir -p $(pkg_dir)
$(COPY)

# Generated files are produced by grpcio tools

# If not provided, fetch proto files from TGI
ifndef PROTODIR
PROTODIR := $(BUILDDIR)/tgi/proto
endif

$(BUILDDIR)/tgi/proto/%.proto:
install -d $(BUILDDIR)/tgi
curl -L https://github.com/huggingface/text-generation-inference/archive/refs/tags/v1.0.2.tar.gz --output $(BUILDDIR)/tgi/sources.tar.gz
tar -C $(BUILDDIR)/tgi -xf $(BUILDDIR)/tgi/sources.tar.gz --strip-components=1

# Three python files are generated for each protobuf
protobufs := $(PROTODIR)/generate.proto
pkg_pb_dir := $(pkg_dir)/pb
generated_sources_base := $(foreach proto, $(protobufs), $(proto:.proto=_pb2.py))
generated_sources := $(subst $(PROTODIR), $(pkg_pb_dir), $(generated_sources_base))
generated_sources += $(subst $(PROTODIR), $(pkg_pb_dir), $(generated_sources_base:.py=.pyi))
generated_sources += $(subst $(PROTODIR), $(pkg_pb_dir), $(generated_sources_base:.py=_grpc.py))

$(pkg_pb_dir)/%_pb2.py $(pkg_pb_dir)/%_pb2.pyi $(pkg_pb_dir)/%_pb2_grpc.py: $(PROTODIR)/%.proto
mkdir -p $(pkg_pb_dir)
python -m grpc_tools.protoc -I$(PROTODIR) --python_out=$(pkg_pb_dir) \
--grpc_python_out=$(pkg_pb_dir) --mypy_out=$(pkg_pb_dir) $^
sed -i -e 's/^\(import.*pb2\)/from . \1/g' $(pkg_pb_dir)/$*_pb2_grpc.py

gen-server: $(BUILDDIR)/pyproject.toml $(deployed_sources) $(generated_sources)
python -m build $(BUILDDIR) --sdist
python -m build $(BUILDDIR)
5 changes: 4 additions & 1 deletion text-generation-inference/server/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "text-generation-server"
version = "0.0.1"
version = "VERSION"
authors = [{name="David Corvoysier", email="[email protected]" }]
description = "TGI compatible inference server for AWS Neuronx platforms"
dependencies = [
Expand All @@ -18,5 +18,8 @@ dependencies = [
'loguru == 0.6.0'
]

[tool.setuptools]
packages = ["text_generation_server", "text_generation_server.pb"]

[project.scripts]
text-generation-server = 'text_generation_server.cli:app'
61 changes: 61 additions & 0 deletions text-generation-inference/tests/test_generator_slot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import pytest
import torch
from text_generation_server.generator import Slot
from text_generation_server.pb.generate_pb2 import Request
from transformers import AutoTokenizer, GenerationConfig


TOKENIZERS = ["NousResearch/Llama-2-7b-hf", "gpt2"]


@pytest.fixture(params=TOKENIZERS)
def tokenizer(request):
t = AutoTokenizer.from_pretrained(request.param)
t.padding_side = "left"
t.pad_token_id = t.eos_token_id
return t


@pytest.mark.parametrize(
"input_text, generated_text",
[
[
"It was a bright cold day in April, and the clocks were striking thirteen.",
" Winston Smith, his chin nuzzled into his breast in an effort to escape the vile wind,"
" slipped quickly through the glass doors of Victory Mansions, though not quickly enough"
" to prevent a swirl of gritty dust from entering along with him.",
],
["This sentence is written in chinese:", "我很感谢你的热情"],
["Some text might contain a lot of emojis like 😃", "😍💪 👉 👀"],
],
ids=["spaces", "chinese-utf8", "emojis"],
)
def test_decode_streaming(tokenizer, input_text, generated_text):
slot = Slot(0, tokenizer)
request = Request(id=0, inputs=input_text)
slot.assign(request, GenerationConfig())
assert slot.cached_text == input_text

inputs = tokenizer(input_text, padding="max_length", max_length=len(input_text) + 1, return_tensors="pt")
input_ids = inputs["input_ids"][0]
attention_mask = inputs["attention_mask"][0]
generated_tokens = tokenizer(generated_text, add_special_tokens=False)["input_ids"]

# We need to regenerate the full text as the tokenizer might change it (extra spaces might be added)
all_input_ids = torch.cat([input_ids, torch.tensor(generated_tokens)])
full_text = tokenizer.decode(all_input_ids, skip_special_tokens=True)
regenerated_text = full_text[len(input_text) :]

# Initialize the slot with the inputs
slot.reset(input_ids, attention_mask, selector=None)

assert slot.generated_tokens == 0

# Simulate an iterative generation (i.e. don't call select and use known tokens instead)
decoded_text = ""
for i in range(len(generated_tokens)):
text = slot.append(generated_tokens[i])
assert slot.generated_tokens == i + 1
decoded_text += text

assert decoded_text == regenerated_text
Loading