Skip to content
This repository has been archived by the owner on May 28, 2024. It is now read-only.

Commit

Permalink
Update dockerfiles, no-sign-request for S3
Browse files Browse the repository at this point in the history
Signed-off-by: Antoni Baum <[email protected]>
  • Loading branch information
Yard1 committed Jul 15, 2023
1 parent e005252 commit 951bcd2
Show file tree
Hide file tree
Showing 29 changed files with 78 additions and 59 deletions.
39 changes: 3 additions & 36 deletions deploy/_internal/backend/cluster-env.yaml
Original file line number Diff line number Diff line change
@@ -1,41 +1,8 @@
base_image: anyscale/ray:nightly-py310-cu118
debian_packages:
- libaio-dev
- git-lfs
- xfsprogs
docker_image: anyscale/aviary:latest
ray_version: nightly
env_vars:
HF_HUB_ENABLE_HF_TRANSFER: 1
SAFETENSORS_FAST_GPU: 1
RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING: 1
XDG_CACHE_HOME: /mnt/local_storage/data/cache
TORCH_HOME: /mnt/local_storage/data/cache/torch
post_build_cmds:
- |-
echo "dedup version 3. increment this to force a rebuild."
pip uninstall -y torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric tensorflow
pip install -i https://download.pytorch.org/whl/cu118 torch torchvision torchaudio
pip install \
"async_timeout" \
"markdown-it-py[plugins]" \
"accelerate" \
"transformers>=4.25.1" \
"datasets" \
"ftfy" \
"tensorboard" \
"sentencepiece" \
"Jinja2" \
"numexpr>=2.7.3" \
"hf_transfer" \
"evaluate" \
"bitsandbytes" \
"git+https://github.com/Yard1/DeepSpeed.git@aviary" \
"numpy<1.24" \
"ninja" \
"protobuf<3.21.0" \
"git+https://github.com/huggingface/optimum.git" \
"torchmetrics" \
"safetensors" \
"pydantic==1.10.7" \
"einops" \
"markdown-it-py[plugins]" \
"fastapi_versioning"
TORCH_HOME: /mnt/local_storage/data/cache/torch
21 changes: 16 additions & 5 deletions deploy/ray/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,22 +1,33 @@
FROM rayproject/ray:nightly-cu118
FROM anyscale/ray:nightly-py310-cu118
SHELL ["/bin/bash", "-c"]

RUN sudo apt-get update && sudo apt-get install -y libaio-dev git-lfs awscli && sudo rm -rf /var/lib/apt/lists/*
# Add extra dependencies
ARG DEBIAN_FRONTEND=noninteractive
RUN sudo apt-get update && sudo apt-get install -y axel nfs-common zip unzip libaio-dev git-lfs awscli curl libssl-dev gcc pkg-config libc6-dev && sudo apt-get clean

RUN conda update -n base -c defaults conda && conda install python=3.10
RUN pip install -i https://download.pytorch.org/whl/cu118 torch torchvision torchaudio
RUN pip uninstall -y ray && pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl

# Created by build_aviary_wheel.sh
COPY "./dist" "/home/ray/dist"
RUN cd /home/ray/dist && pip install "$(ls *.whl | head -n1)[backend, frontend]"
RUN export FORCE_CUDA=1 NVCC_PREPEND_FLAGS="--forward-unknown-opts" DS_BUILD_OPS=1 DS_BUILD_AIO=0 DS_BUILD_SPARSE_ATTN=0 TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && cd /home/ray/dist && pip install "$(ls *.whl | head -n1)[backend, frontend]"

# The build context should be the root of the repo
# So this gives the model definitions
COPY "./models" "/home/ray/models"

ENV RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1
ENV HF_HUB_ENABLE_HF_TRANSFER=1
ENV SAFETENSORS_FAST_GPU=1
ENV RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1

# (Optional) Verify that dependencies from the base image still work. This
# is useful for catching dependency conflicts at build time.
RUN echo "Testing Ray Import..." && python -c "import ray"
RUN ray --version
RUN jupyter --version
RUN anyscale --version
RUN sudo supervisord --version

RUN echo "Testing aviary install" && python -c "import aviary.backend"

RUN (pip cache purge || true) && conda clean -a && rm -rf ~/.cache
4 changes: 2 additions & 2 deletions deploy/ray/Dockerfile-tgi
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ SHELL ["/bin/bash", "-c"]

# Add extra dependencies
ARG DEBIAN_FRONTEND=noninteractive
RUN sudo apt-get update && sudo apt-get install -y axel nfs-common zip unzip awscli curl libssl-dev gcc pkg-config libc6-dev && sudo apt-get clean
RUN sudo apt-get update && sudo apt-get install -y axel nfs-common zip unzip libaio-dev git-lfs awscli curl libssl-dev gcc pkg-config libc6-dev && sudo apt-get clean

RUN wget "https://static.rust-lang.org/rustup/dist/x86_64-unknown-linux-gnu/rustup-init" && chmod +x rustup-init && ./rustup-init -y && rm rustup-init && source "$HOME/.cargo/env"
RUN source "$HOME/.cargo/env" && PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && sudo unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && sudo unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && rm -f $PROTOC_ZIP
RUN source "$HOME/.cargo/env" && pip install -i https://download.pytorch.org/whl/cu118 torch torchvision torchaudio
RUN source "$HOME/.cargo/env" && pip install tensorboard ninja text-generation
RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && git clone https://github.com/huggingface/text-generation-inference && cd text-generation-inference && git checkout e943a294bca239e26828732dd6ab5b6f95dadd0a && BUILD_EXTENSIONS=True make install
RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && git clone https://github.com/huggingface/text-generation-inference && cd text-generation-inference && git checkout a2cf1bdb2fc0570dfca8b9ed2c8322f2040c3c07 && BUILD_EXTENSIONS=True make install
RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && cd text-generation-inference/server && BUILD_EXTENSIONS=True make install-flash-attention
RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && cd text-generation-inference/server && make install-vllm

Expand Down
21 changes: 5 additions & 16 deletions deploy/ray/aviary-cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,21 @@ provider:
region: us-west-2
cache_stopped_nodes: False
docker:
image: "anyscale/aviary:latest"
image: "anyscale/aviary:test"
# Use this image instead for continuous batching:
# image: "anyscale/aviary:latest-tgi"
container_name: "aviary"
run_options:
- --entrypoint ""

# All the 'conda activate' are necessary to ensure we are in the
# python 3.10 conda env.
setup_commands:
- echo "(conda activate || true)" >> ~/.bashrc
- which ray || pip install -U "ray[default] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl"

head_setup_commands:
- (conda activate || true) && pip install 'boto3>=1.4.8'

worker_setup_commands: []

head_start_ray_commands:
- (conda activate || true) && ray stop
- (conda activate || true) && ulimit -n 65536; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host=0.0.0.0

worker_start_ray_commands:
- (conda activate || true) && ray stop
# We need to make sure RAY_HEAD_IP env var is accessible
# after conda activate.
- export RAY_HEAD_IP && echo "export RAY_HEAD_IP=$RAY_HEAD_IP" >> ~/.bashrc && (conda activate || true) && ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076
- ray stop
# We need to make sure RAY_HEAD_IP env var is accessible.
- export RAY_HEAD_IP && echo "export RAY_HEAD_IP=$RAY_HEAD_IP" >> ~/.bashrc && ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076

available_node_types:
head_node_type:
Expand Down
4 changes: 4 additions & 0 deletions models/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ model_config:
# Hugging Face Hub. You can use this to speed up downloads.
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-instruct/main-safetensors/
s3_sync_args:
- "--no-sign-request"
# How to initialize the model.
initializer:
# Initializer type. For static batching, can be one of:
Expand Down Expand Up @@ -193,6 +195,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-instruct/main-safetensors/
s3_sync_args:
- "--no-sign-request"
generation:
prompt_format: "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\n{instruction}\n### Response:\n"
stopping_sequences: ["### Response:", "### End"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--OpenAssistant--falcon-40b-sft-top1-560/main-safetensors-tgi/
s3_sync_args:
- "--no-sign-request"
initializer:
type: TextGenerationInference
model_init_kwargs:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--OpenAssistant--falcon-7b-sft-top1-696/main-safetensors-tgi/
s3_sync_args:
- "--no-sign-request"
initializer:
type: TextGenerationInference
model_init_kwargs:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/restricted/models--OpenAssistant--oasst-sft-7-llama-30b-xor/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: TextGenerationInference
model_init_kwargs:
Expand Down
2 changes: 2 additions & 0 deletions models/continuous_batching/amazon--LightGPT.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--amazon--LightGPT/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: TextGenerationInference
pipeline: TextGenerationInference
Expand Down
2 changes: 2 additions & 0 deletions models/continuous_batching/lmsys--vicuna-13b-v1.3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--lmsys--vicuna-13b-v1.3/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: TextGenerationInference
model_init_kwargs:
Expand Down
2 changes: 2 additions & 0 deletions models/continuous_batching/lmsys--vicuna-33b-v1.3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--lmsys--vicuna-33b-v1.3/main-safetensors-tgi/
s3_sync_args:
- "--no-sign-request"
initializer:
type: TextGenerationInference
model_init_kwargs:
Expand Down
2 changes: 2 additions & 0 deletions models/continuous_batching/mosaicml--mpt-30b-chat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-30b-chat/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: TextGenerationInference
model_init_kwargs:
Expand Down
2 changes: 2 additions & 0 deletions models/continuous_batching/mosaicml--mpt-7b-chat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-chat/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: TextGenerationInference
model_init_kwargs:
Expand Down
2 changes: 2 additions & 0 deletions models/continuous_batching/mosaicml--mpt-7b-instruct.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-instruct/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: TextGenerationInference
model_init_kwargs:
Expand Down
2 changes: 2 additions & 0 deletions models/continuous_batching/mosaicml--mpt-7b-storywriter.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-storywriter/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: TextGenerationInference
model_init_kwargs:
Expand Down
2 changes: 2 additions & 0 deletions models/static_batching/CarperAI--stable-vicuna-13b-delta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ model_config:
- transformers<4.30
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/restricted/models--CarperAI--stable-vicuna-13b-delta/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: DeepSpeed
dtype: float16
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--OpenAssistant--falcon-40b-sft-top1-560/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: DeepSpeed
dtype: float16
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--OpenAssistant--falcon-7b-sft-top1-696/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: DeviceMap
dtype: bfloat16
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ model_config:
- transformers<4.30
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/restricted/models--OpenAssistant--oasst-sft-7-llama-30b-xor/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: DeepSpeed
dtype: float16
Expand Down
2 changes: 2 additions & 0 deletions models/static_batching/RWKV--rwkv-raven-14b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--RWKV--rwkv-raven-14b/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: DeviceMap
dtype: float16
Expand Down
2 changes: 2 additions & 0 deletions models/static_batching/amazon--LightGPT.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ model_config:
- deepspeed==0.9.2
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--amazon--LightGPT/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: DeepSpeed
dtype: float16
Expand Down
2 changes: 2 additions & 0 deletions models/static_batching/databricks--dolly-v2-12b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--databricks--dolly-v2-12b/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: DeepSpeed
dtype: float16
Expand Down
2 changes: 2 additions & 0 deletions models/static_batching/h2oai--h2ogpt-oasst1-512-12b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--h2oai--h2ogpt-oasst1-512-12b/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: DeepSpeed
dtype: float16
Expand Down
2 changes: 2 additions & 0 deletions models/static_batching/lmsys--vicuna-13b-delta-v1.1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/restricted/models--lmsys--vicuna-13b-delta-v1.1/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: DeepSpeed
dtype: float16
Expand Down
2 changes: 2 additions & 0 deletions models/static_batching/mosaicml--mpt-30b-chat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-30b-chat/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: SingleDevice
dtype: bfloat16
Expand Down
2 changes: 2 additions & 0 deletions models/static_batching/mosaicml--mpt-7b-chat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-chat/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: DeviceMap
dtype: bfloat16
Expand Down
2 changes: 2 additions & 0 deletions models/static_batching/mosaicml--mpt-7b-instruct.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-instruct/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: DeviceMap
dtype: bfloat16
Expand Down
2 changes: 2 additions & 0 deletions models/static_batching/mosaicml--mpt-7b-storywriter.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-storywriter/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: DeviceMap
dtype: bfloat16
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ model_config:
initialization:
s3_mirror_config:
bucket_uri: s3://large-dl-models-mirror/models--stabilityai--stablelm-tuned-alpha-7b/main-safetensors/
s3_sync_args:
- "--no-sign-request"
initializer:
type: DeepSpeed
dtype: float16
Expand Down

0 comments on commit 951bcd2

Please sign in to comment.