Skip to content
This repository has been archived by the owner on May 28, 2024. It is now read-only.

Commit

Permalink
Update docker images
Browse files Browse the repository at this point in the history
Signed-off-by: Antoni Baum <[email protected]>
  • Loading branch information
Yard1 committed Jul 3, 2023
1 parent caa3062 commit bbfe3c5
Show file tree
Hide file tree
Showing 14 changed files with 37 additions and 15 deletions.
2 changes: 1 addition & 1 deletion aviary/backend/llm/predictor/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ async def _create_worker_group(
await asyncio.gather(
*[
initialize_node_remote_pg.remote(
llm_config.model_id,
llm_config.actual_hf_model_id,
llm_config.initialization.s3_mirror_config,
)
for i in range(scaling_config.num_workers)
Expand Down
12 changes: 7 additions & 5 deletions aviary/backend/llm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def download_model(
Download a model from an S3 bucket and save it in TRANSFORMERS_CACHE for
seamless interoperability with Hugging Face's Transformers library.
The downloaded model must have a 'hash' file containing the commit hash corresponding
The downloaded model may have a 'hash' file containing the commit hash corresponding
to the commit on Hugging Face Hub.
"""
from transformers.utils.hub import TRANSFORMERS_CACHE
Expand All @@ -48,11 +48,13 @@ def download_model(
+ [os.path.join(bucket_uri, "hash"), "."]
)
if not os.path.exists(os.path.join(".", "hash")):
raise RuntimeError(
"Hash file not found in the bucket or bucket could not have been downloaded."
f_hash = "0000000000000000000000000000000000000000"
logger.warning(
f"hash file does not exist in {bucket_uri}. Using {f_hash} as the hash."
)
with open(os.path.join(".", "hash"), "r") as f:
f_hash = f.read().strip()
else:
with open(os.path.join(".", "hash"), "r") as f:
f_hash = f.read().strip()
logger.info(
f"Downloading {model_id} from {bucket_uri} to {os.path.join(path, 'snapshots', f_hash)}"
)
Expand Down
9 changes: 9 additions & 0 deletions aviary/backend/server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,15 @@ def initializer_pipeline(cls, values):
)
return values

@root_validator
def s3_mirror_config_transformers(cls, values):
s3_mirror_config: S3MirrorConfig = values.get("s3_mirror_config")
if s3_mirror_config and s3_mirror_config.bucket_uri:
initializer: Initializer = values.get("initializer")
if isinstance(initializer, Transformers):
initializer.from_pretrained_kwargs["local_files_only"] = True
return values


class StaticBatchingInitializationConfig(InitializationConfig):
initializer: Annotated[
Expand Down
5 changes: 5 additions & 0 deletions aviary/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,11 @@
font-size: 1rem;
}
.ticker-container.block {
padding: 4px 8px !important;
border: 1px solid var(--button-primary-border-color) !important;
}
#prompt-examples-column {
flex-grow: 0 !important;
}
Expand Down
2 changes: 1 addition & 1 deletion aviary/frontend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ def noop(*args, **kwargs):
pass

# Get the port the serve app is running on
controller = serve.context._global_client._controller
controller = ray.serve.context.get_global_client()._controller
port = ray.get(controller.get_http_config.remote()).port

blocks._queue.set_url(f"http://localhost:{port}/")
Expand Down
2 changes: 1 addition & 1 deletion deploy/_internal/backend/service.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
models: ./models
models: ./models/continuous_batching

ray_serve_config:
applications:
Expand Down
7 changes: 4 additions & 3 deletions deploy/ray/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@ FROM rayproject/ray:nightly-cu118

RUN sudo apt-get update && sudo apt-get install -y libaio-dev git-lfs awscli && sudo rm -rf /var/lib/apt/lists/*

RUN conda install python=3.10
RUN conda update -n base -c defaults conda && conda install python=3.10
RUN pip install -i https://download.pytorch.org/whl/cu118 torch torchvision torchaudio
RUN pip uninstall -y ray && pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl

This comment has been minimized.

Copy link
@kevin85421

kevin85421 Jul 12, 2023

Member

Why do we uninstall the existing ray?


# Created by build_aviary_wheel.sh
COPY "./dist" "/home/ray/dist"
RUN cd /home/ray/dist && pip install "$(ls *.whl | head -n1)[backend]"
RUN cd /home/ray/dist && pip install "$(ls *.whl | head -n1)[backend, frontend]"

# The build context should be the root of the repo
# So this gives the model definitions
Expand All @@ -18,4 +19,4 @@ ENV SAFETENSORS_FAST_GPU=1
ENV RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1
RUN echo "Testing aviary install" && python -c "import aviary.backend"

RUN pip cache purge && conda clean -a && rm -rf ~/.cache
RUN (pip cache purge || true) && conda clean -a && rm -rf ~/.cache
5 changes: 3 additions & 2 deletions deploy/ray/Dockerfile-tgi
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,12 @@ RUN export FORCE_CUDA=1 NVCC_PREPEND_FLAGS="--forward-unknown-opts" DS_BUILD_OPS
"numpy<1.24" \
"ninja"
RUN pip install --no-deps "git+https://github.com/huggingface/optimum.git"
RUN pip uninstall -y ray && pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl
RUN source "$HOME/.cargo/env" && pip install boto3

# Created by build_aviary_wheel.sh
COPY "./dist" "/home/ray/dist"
RUN cd /home/ray/dist && pip install "$(ls *.whl | head -n1)[backend]"
RUN cd /home/ray/dist && pip install --no-deps "$(ls *.whl | head -n1)[backend, frontend]"

This comment has been minimized.

Copy link
@shaowei-su

shaowei-su Jul 5, 2023

why no-deps? some aviary dependency is broken in the latest anyscale/aviary:latest-tgi e.g typer

This comment has been minimized.

Copy link
@shaowei-su

shaowei-su Jul 5, 2023

typer==0.6.1

# The build context should be the root of the repo
# So this gives the model definitions
Expand All @@ -48,4 +49,4 @@ RUN sudo supervisord --version

RUN echo "Testing aviary install" && python -c "import aviary.backend"

RUN pip cache purge && conda clean -a && rm -rf ~/.cache
RUN (pip cache purge || true) && conda clean -a && rm -rf ~/.cache
2 changes: 1 addition & 1 deletion models/continuous_batching/lmsys--vicuna-33b-v1.3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ deployment_config:
autoscaling_config:
min_replicas: 1
initial_replicas: 1
max_replicas: 8
max_replicas: 1
target_num_ongoing_requests_per_replica: 1.0
metrics_interval_s: 10.0
look_back_period_s: 30.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ deployment_config:
accelerator_type_cpu: 0.01
model_config:
model_id: OpenAssistant/falcon-40b-sft-top1-560
batching: static
max_input_words: 800
initialization:

s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--OpenAssistant--falcon-40b-sft-top1-560/main-safetensors/
initializer:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ deployment_config:
accelerator_type_cpu: 0.01
model_config:
model_id: OpenAssistant/falcon-7b-sft-top1-696
batching: static
max_input_words: 800
initialization:
s3_mirror_config:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ deployment_config:
accelerator_type_cpu: 0.01
model_config:
model_id: OpenAssistant/oasst-sft-7-llama-30b-xor
batching: static
max_input_words: 800
model_description: "Open Assistant is a project meant to give everyone access to a great chat based large language model.\nWe believe that by doing this we will create a revolution in innovation in language. In the same way that stable-diffusion helped the world make art and images in new ways we hope Open Assistant can help improve the world by improving language itself."
initialization:
Expand Down
1 change: 1 addition & 0 deletions models/static_batching/lmsys--vicuna-13b-delta-v1.1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ deployment_config:
resources:
accelerator_type_cpu: 0.01
model_config:
batching: static
model_id: lmsys/vicuna-13b-delta-v1.1
max_input_words: 800
model_description: "Vicuna is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. It is an auto-regressive language model, based on the transformer architecture."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ deployment_config:
accelerator_type_cpu: 0.01
model_config:
model_id: mosaicml/mpt-30b-chat
batching: static
max_input_words: 800
initialization:
s3_mirror_config:
Expand Down

0 comments on commit bbfe3c5

Please sign in to comment.