From 951bcd20e87e3e295e5a59c8aafd1f62c64b788e Mon Sep 17 00:00:00 2001 From: Antoni Baum Date: Sat, 15 Jul 2023 20:30:30 +0000 Subject: [PATCH] Update dockerfiles, no-sign-request for S3 Signed-off-by: Antoni Baum --- deploy/_internal/backend/cluster-env.yaml | 39 ++----------------- deploy/ray/Dockerfile | 21 +++++++--- deploy/ray/Dockerfile-tgi | 4 +- deploy/ray/aviary-cluster.yaml | 21 +++------- models/README.md | 4 ++ ...penAssistant--falcon-40b-sft-top1-560.yaml | 2 + ...OpenAssistant--falcon-7b-sft-top1-696.yaml | 2 + ...nAssistant--oasst-sft-7-llama-30b-xor.yaml | 2 + .../continuous_batching/amazon--LightGPT.yaml | 2 + .../lmsys--vicuna-13b-v1.3.yaml | 2 + .../lmsys--vicuna-33b-v1.3.yaml | 2 + .../mosaicml--mpt-30b-chat.yaml | 2 + .../mosaicml--mpt-7b-chat.yaml | 2 + .../mosaicml--mpt-7b-instruct.yaml | 2 + .../mosaicml--mpt-7b-storywriter.yaml | 2 + .../CarperAI--stable-vicuna-13b-delta.yaml | 2 + ...penAssistant--falcon-40b-sft-top1-560.yaml | 2 + ...OpenAssistant--falcon-7b-sft-top1-696.yaml | 2 + ...nAssistant--oasst-sft-7-llama-30b-xor.yaml | 2 + .../static_batching/RWKV--rwkv-raven-14b.yaml | 2 + models/static_batching/amazon--LightGPT.yaml | 2 + .../databricks--dolly-v2-12b.yaml | 2 + .../h2oai--h2ogpt-oasst1-512-12b.yaml | 2 + .../lmsys--vicuna-13b-delta-v1.1.yaml | 2 + .../mosaicml--mpt-30b-chat.yaml | 2 + .../mosaicml--mpt-7b-chat.yaml | 2 + .../mosaicml--mpt-7b-instruct.yaml | 2 + .../mosaicml--mpt-7b-storywriter.yaml | 2 + .../stabilityai--stablelm-tuned-alpha-7b.yaml | 2 + 29 files changed, 78 insertions(+), 59 deletions(-) diff --git a/deploy/_internal/backend/cluster-env.yaml b/deploy/_internal/backend/cluster-env.yaml index 1bcbbbba..7c4755ff 100644 --- a/deploy/_internal/backend/cluster-env.yaml +++ b/deploy/_internal/backend/cluster-env.yaml @@ -1,41 +1,8 @@ -base_image: anyscale/ray:nightly-py310-cu118 -debian_packages: -- libaio-dev -- git-lfs -- xfsprogs +docker_image: anyscale/aviary:latest +ray_version: nightly env_vars: HF_HUB_ENABLE_HF_TRANSFER: 1 SAFETENSORS_FAST_GPU: 1 RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING: 1 XDG_CACHE_HOME: /mnt/local_storage/data/cache - TORCH_HOME: /mnt/local_storage/data/cache/torch -post_build_cmds: -- |- - echo "dedup version 3. increment this to force a rebuild." - pip uninstall -y torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric tensorflow - pip install -i https://download.pytorch.org/whl/cu118 torch torchvision torchaudio - pip install \ - "async_timeout" \ - "markdown-it-py[plugins]" \ - "accelerate" \ - "transformers>=4.25.1" \ - "datasets" \ - "ftfy" \ - "tensorboard" \ - "sentencepiece" \ - "Jinja2" \ - "numexpr>=2.7.3" \ - "hf_transfer" \ - "evaluate" \ - "bitsandbytes" \ - "git+https://github.com/Yard1/DeepSpeed.git@aviary" \ - "numpy<1.24" \ - "ninja" \ - "protobuf<3.21.0" \ - "git+https://github.com/huggingface/optimum.git" \ - "torchmetrics" \ - "safetensors" \ - "pydantic==1.10.7" \ - "einops" \ - "markdown-it-py[plugins]" \ - "fastapi_versioning" + TORCH_HOME: /mnt/local_storage/data/cache/torch \ No newline at end of file diff --git a/deploy/ray/Dockerfile b/deploy/ray/Dockerfile index 9183e74b..2e69b74a 100644 --- a/deploy/ray/Dockerfile +++ b/deploy/ray/Dockerfile @@ -1,22 +1,33 @@ -FROM rayproject/ray:nightly-cu118 +FROM anyscale/ray:nightly-py310-cu118 +SHELL ["/bin/bash", "-c"] -RUN sudo apt-get update && sudo apt-get install -y libaio-dev git-lfs awscli && sudo rm -rf /var/lib/apt/lists/* +# Add extra dependencies +ARG DEBIAN_FRONTEND=noninteractive +RUN sudo apt-get update && sudo apt-get install -y axel nfs-common zip unzip libaio-dev git-lfs awscli curl libssl-dev gcc pkg-config libc6-dev && sudo apt-get clean -RUN conda update -n base -c defaults conda && conda install python=3.10 RUN pip install -i https://download.pytorch.org/whl/cu118 torch torchvision torchaudio RUN pip uninstall -y ray && pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl # Created by build_aviary_wheel.sh COPY "./dist" "/home/ray/dist" -RUN cd /home/ray/dist && pip install "$(ls *.whl | head -n1)[backend, frontend]" +RUN export FORCE_CUDA=1 NVCC_PREPEND_FLAGS="--forward-unknown-opts" DS_BUILD_OPS=1 DS_BUILD_AIO=0 DS_BUILD_SPARSE_ATTN=0 TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && cd /home/ray/dist && pip install "$(ls *.whl | head -n1)[backend, frontend]" # The build context should be the root of the repo # So this gives the model definitions COPY "./models" "/home/ray/models" +ENV RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 ENV HF_HUB_ENABLE_HF_TRANSFER=1 ENV SAFETENSORS_FAST_GPU=1 -ENV RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 + +# (Optional) Verify that dependencies from the base image still work. This +# is useful for catching dependency conflicts at build time. +RUN echo "Testing Ray Import..." && python -c "import ray" +RUN ray --version +RUN jupyter --version +RUN anyscale --version +RUN sudo supervisord --version + RUN echo "Testing aviary install" && python -c "import aviary.backend" RUN (pip cache purge || true) && conda clean -a && rm -rf ~/.cache diff --git a/deploy/ray/Dockerfile-tgi b/deploy/ray/Dockerfile-tgi index 2b7c3b29..55a1bcb4 100644 --- a/deploy/ray/Dockerfile-tgi +++ b/deploy/ray/Dockerfile-tgi @@ -4,13 +4,13 @@ SHELL ["/bin/bash", "-c"] # Add extra dependencies ARG DEBIAN_FRONTEND=noninteractive -RUN sudo apt-get update && sudo apt-get install -y axel nfs-common zip unzip awscli curl libssl-dev gcc pkg-config libc6-dev && sudo apt-get clean +RUN sudo apt-get update && sudo apt-get install -y axel nfs-common zip unzip libaio-dev git-lfs awscli curl libssl-dev gcc pkg-config libc6-dev && sudo apt-get clean RUN wget "https://static.rust-lang.org/rustup/dist/x86_64-unknown-linux-gnu/rustup-init" && chmod +x rustup-init && ./rustup-init -y && rm rustup-init && source "$HOME/.cargo/env" RUN source "$HOME/.cargo/env" && PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && sudo unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && sudo unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && rm -f $PROTOC_ZIP RUN source "$HOME/.cargo/env" && pip install -i https://download.pytorch.org/whl/cu118 torch torchvision torchaudio RUN source "$HOME/.cargo/env" && pip install tensorboard ninja text-generation -RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && git clone https://github.com/huggingface/text-generation-inference && cd text-generation-inference && git checkout e943a294bca239e26828732dd6ab5b6f95dadd0a && BUILD_EXTENSIONS=True make install +RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && git clone https://github.com/huggingface/text-generation-inference && cd text-generation-inference && git checkout a2cf1bdb2fc0570dfca8b9ed2c8322f2040c3c07 && BUILD_EXTENSIONS=True make install RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && cd text-generation-inference/server && BUILD_EXTENSIONS=True make install-flash-attention RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && cd text-generation-inference/server && make install-vllm diff --git a/deploy/ray/aviary-cluster.yaml b/deploy/ray/aviary-cluster.yaml index ef235bb8..e450f84e 100644 --- a/deploy/ray/aviary-cluster.yaml +++ b/deploy/ray/aviary-cluster.yaml @@ -7,32 +7,21 @@ provider: region: us-west-2 cache_stopped_nodes: False docker: - image: "anyscale/aviary:latest" + image: "anyscale/aviary:test" # Use this image instead for continuous batching: # image: "anyscale/aviary:latest-tgi" container_name: "aviary" run_options: - --entrypoint "" -# All the 'conda activate' are necessary to ensure we are in the -# python 3.10 conda env. setup_commands: - - echo "(conda activate || true)" >> ~/.bashrc + - which ray || pip install -U "ray[default] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl" -head_setup_commands: - - (conda activate || true) && pip install 'boto3>=1.4.8' - -worker_setup_commands: [] - -head_start_ray_commands: - - (conda activate || true) && ray stop - - (conda activate || true) && ulimit -n 65536; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host=0.0.0.0 worker_start_ray_commands: - - (conda activate || true) && ray stop - # We need to make sure RAY_HEAD_IP env var is accessible - # after conda activate. - - export RAY_HEAD_IP && echo "export RAY_HEAD_IP=$RAY_HEAD_IP" >> ~/.bashrc && (conda activate || true) && ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076 + - ray stop + # We need to make sure RAY_HEAD_IP env var is accessible. + - export RAY_HEAD_IP && echo "export RAY_HEAD_IP=$RAY_HEAD_IP" >> ~/.bashrc && ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076 available_node_types: head_node_type: diff --git a/models/README.md b/models/README.md index 9f9e4205..b8fa607d 100644 --- a/models/README.md +++ b/models/README.md @@ -124,6 +124,8 @@ model_config: # Hugging Face Hub. You can use this to speed up downloads. s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-instruct/main-safetensors/ + s3_sync_args: + - "--no-sign-request" # How to initialize the model. initializer: # Initializer type. For static batching, can be one of: @@ -193,6 +195,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-instruct/main-safetensors/ + s3_sync_args: + - "--no-sign-request" generation: prompt_format: "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\n{instruction}\n### Response:\n" stopping_sequences: ["### Response:", "### End"] diff --git a/models/continuous_batching/OpenAssistant--falcon-40b-sft-top1-560.yaml b/models/continuous_batching/OpenAssistant--falcon-40b-sft-top1-560.yaml index 76b322d4..d3a5e940 100644 --- a/models/continuous_batching/OpenAssistant--falcon-40b-sft-top1-560.yaml +++ b/models/continuous_batching/OpenAssistant--falcon-40b-sft-top1-560.yaml @@ -20,6 +20,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--OpenAssistant--falcon-40b-sft-top1-560/main-safetensors-tgi/ + s3_sync_args: + - "--no-sign-request" initializer: type: TextGenerationInference model_init_kwargs: diff --git a/models/continuous_batching/OpenAssistant--falcon-7b-sft-top1-696.yaml b/models/continuous_batching/OpenAssistant--falcon-7b-sft-top1-696.yaml index 7f464ccd..94ebd8cd 100644 --- a/models/continuous_batching/OpenAssistant--falcon-7b-sft-top1-696.yaml +++ b/models/continuous_batching/OpenAssistant--falcon-7b-sft-top1-696.yaml @@ -20,6 +20,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--OpenAssistant--falcon-7b-sft-top1-696/main-safetensors-tgi/ + s3_sync_args: + - "--no-sign-request" initializer: type: TextGenerationInference model_init_kwargs: diff --git a/models/continuous_batching/OpenAssistant--oasst-sft-7-llama-30b-xor.yaml b/models/continuous_batching/OpenAssistant--oasst-sft-7-llama-30b-xor.yaml index 1b267f38..d2c28d40 100644 --- a/models/continuous_batching/OpenAssistant--oasst-sft-7-llama-30b-xor.yaml +++ b/models/continuous_batching/OpenAssistant--oasst-sft-7-llama-30b-xor.yaml @@ -21,6 +21,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/restricted/models--OpenAssistant--oasst-sft-7-llama-30b-xor/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: TextGenerationInference model_init_kwargs: diff --git a/models/continuous_batching/amazon--LightGPT.yaml b/models/continuous_batching/amazon--LightGPT.yaml index 5b74816a..84d415b9 100644 --- a/models/continuous_batching/amazon--LightGPT.yaml +++ b/models/continuous_batching/amazon--LightGPT.yaml @@ -20,6 +20,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--amazon--LightGPT/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: TextGenerationInference pipeline: TextGenerationInference diff --git a/models/continuous_batching/lmsys--vicuna-13b-v1.3.yaml b/models/continuous_batching/lmsys--vicuna-13b-v1.3.yaml index f28f45a5..3fb086b3 100644 --- a/models/continuous_batching/lmsys--vicuna-13b-v1.3.yaml +++ b/models/continuous_batching/lmsys--vicuna-13b-v1.3.yaml @@ -21,6 +21,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--lmsys--vicuna-13b-v1.3/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: TextGenerationInference model_init_kwargs: diff --git a/models/continuous_batching/lmsys--vicuna-33b-v1.3.yaml b/models/continuous_batching/lmsys--vicuna-33b-v1.3.yaml index 43595ee8..f2c6e60c 100644 --- a/models/continuous_batching/lmsys--vicuna-33b-v1.3.yaml +++ b/models/continuous_batching/lmsys--vicuna-33b-v1.3.yaml @@ -21,6 +21,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--lmsys--vicuna-33b-v1.3/main-safetensors-tgi/ + s3_sync_args: + - "--no-sign-request" initializer: type: TextGenerationInference model_init_kwargs: diff --git a/models/continuous_batching/mosaicml--mpt-30b-chat.yaml b/models/continuous_batching/mosaicml--mpt-30b-chat.yaml index fc40dcd9..995d7148 100644 --- a/models/continuous_batching/mosaicml--mpt-30b-chat.yaml +++ b/models/continuous_batching/mosaicml--mpt-30b-chat.yaml @@ -20,6 +20,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-30b-chat/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: TextGenerationInference model_init_kwargs: diff --git a/models/continuous_batching/mosaicml--mpt-7b-chat.yaml b/models/continuous_batching/mosaicml--mpt-7b-chat.yaml index 52874104..e7ba9d64 100644 --- a/models/continuous_batching/mosaicml--mpt-7b-chat.yaml +++ b/models/continuous_batching/mosaicml--mpt-7b-chat.yaml @@ -20,6 +20,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-chat/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: TextGenerationInference model_init_kwargs: diff --git a/models/continuous_batching/mosaicml--mpt-7b-instruct.yaml b/models/continuous_batching/mosaicml--mpt-7b-instruct.yaml index fd5898f2..0a1b0a0e 100644 --- a/models/continuous_batching/mosaicml--mpt-7b-instruct.yaml +++ b/models/continuous_batching/mosaicml--mpt-7b-instruct.yaml @@ -20,6 +20,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-instruct/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: TextGenerationInference model_init_kwargs: diff --git a/models/continuous_batching/mosaicml--mpt-7b-storywriter.yaml b/models/continuous_batching/mosaicml--mpt-7b-storywriter.yaml index 8f6b3d42..3e33b4fe 100644 --- a/models/continuous_batching/mosaicml--mpt-7b-storywriter.yaml +++ b/models/continuous_batching/mosaicml--mpt-7b-storywriter.yaml @@ -20,6 +20,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-storywriter/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: TextGenerationInference model_init_kwargs: diff --git a/models/static_batching/CarperAI--stable-vicuna-13b-delta.yaml b/models/static_batching/CarperAI--stable-vicuna-13b-delta.yaml index 69855d82..cea0400a 100644 --- a/models/static_batching/CarperAI--stable-vicuna-13b-delta.yaml +++ b/models/static_batching/CarperAI--stable-vicuna-13b-delta.yaml @@ -22,6 +22,8 @@ model_config: - transformers<4.30 s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/restricted/models--CarperAI--stable-vicuna-13b-delta/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: DeepSpeed dtype: float16 diff --git a/models/static_batching/OpenAssistant--falcon-40b-sft-top1-560.yaml b/models/static_batching/OpenAssistant--falcon-40b-sft-top1-560.yaml index 5c62c382..d68a6bb0 100644 --- a/models/static_batching/OpenAssistant--falcon-40b-sft-top1-560.yaml +++ b/models/static_batching/OpenAssistant--falcon-40b-sft-top1-560.yaml @@ -20,6 +20,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--OpenAssistant--falcon-40b-sft-top1-560/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: DeepSpeed dtype: float16 diff --git a/models/static_batching/OpenAssistant--falcon-7b-sft-top1-696.yaml b/models/static_batching/OpenAssistant--falcon-7b-sft-top1-696.yaml index be45f44a..b703d06e 100644 --- a/models/static_batching/OpenAssistant--falcon-7b-sft-top1-696.yaml +++ b/models/static_batching/OpenAssistant--falcon-7b-sft-top1-696.yaml @@ -19,6 +19,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--OpenAssistant--falcon-7b-sft-top1-696/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: DeviceMap dtype: bfloat16 diff --git a/models/static_batching/OpenAssistant--oasst-sft-7-llama-30b-xor.yaml b/models/static_batching/OpenAssistant--oasst-sft-7-llama-30b-xor.yaml index ff979d7d..f9e934fd 100644 --- a/models/static_batching/OpenAssistant--oasst-sft-7-llama-30b-xor.yaml +++ b/models/static_batching/OpenAssistant--oasst-sft-7-llama-30b-xor.yaml @@ -23,6 +23,8 @@ model_config: - transformers<4.30 s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/restricted/models--OpenAssistant--oasst-sft-7-llama-30b-xor/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: DeepSpeed dtype: float16 diff --git a/models/static_batching/RWKV--rwkv-raven-14b.yaml b/models/static_batching/RWKV--rwkv-raven-14b.yaml index d859d58d..c00d9391 100644 --- a/models/static_batching/RWKV--rwkv-raven-14b.yaml +++ b/models/static_batching/RWKV--rwkv-raven-14b.yaml @@ -20,6 +20,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--RWKV--rwkv-raven-14b/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: DeviceMap dtype: float16 diff --git a/models/static_batching/amazon--LightGPT.yaml b/models/static_batching/amazon--LightGPT.yaml index 70623536..8b94e4cb 100644 --- a/models/static_batching/amazon--LightGPT.yaml +++ b/models/static_batching/amazon--LightGPT.yaml @@ -22,6 +22,8 @@ model_config: - deepspeed==0.9.2 s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--amazon--LightGPT/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: DeepSpeed dtype: float16 diff --git a/models/static_batching/databricks--dolly-v2-12b.yaml b/models/static_batching/databricks--dolly-v2-12b.yaml index 2f3981a3..5d6f8a95 100644 --- a/models/static_batching/databricks--dolly-v2-12b.yaml +++ b/models/static_batching/databricks--dolly-v2-12b.yaml @@ -20,6 +20,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--databricks--dolly-v2-12b/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: DeepSpeed dtype: float16 diff --git a/models/static_batching/h2oai--h2ogpt-oasst1-512-12b.yaml b/models/static_batching/h2oai--h2ogpt-oasst1-512-12b.yaml index d25df35f..ae812624 100644 --- a/models/static_batching/h2oai--h2ogpt-oasst1-512-12b.yaml +++ b/models/static_batching/h2oai--h2ogpt-oasst1-512-12b.yaml @@ -19,6 +19,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--h2oai--h2ogpt-oasst1-512-12b/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: DeepSpeed dtype: float16 diff --git a/models/static_batching/lmsys--vicuna-13b-delta-v1.1.yaml b/models/static_batching/lmsys--vicuna-13b-delta-v1.1.yaml index b3e9b701..14f29f02 100644 --- a/models/static_batching/lmsys--vicuna-13b-delta-v1.1.yaml +++ b/models/static_batching/lmsys--vicuna-13b-delta-v1.1.yaml @@ -20,6 +20,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/restricted/models--lmsys--vicuna-13b-delta-v1.1/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: DeepSpeed dtype: float16 diff --git a/models/static_batching/mosaicml--mpt-30b-chat.yaml b/models/static_batching/mosaicml--mpt-30b-chat.yaml index d1b50591..6e1ea929 100644 --- a/models/static_batching/mosaicml--mpt-30b-chat.yaml +++ b/models/static_batching/mosaicml--mpt-30b-chat.yaml @@ -19,6 +19,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-30b-chat/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: SingleDevice dtype: bfloat16 diff --git a/models/static_batching/mosaicml--mpt-7b-chat.yaml b/models/static_batching/mosaicml--mpt-7b-chat.yaml index 38db2cfc..9522e8f7 100644 --- a/models/static_batching/mosaicml--mpt-7b-chat.yaml +++ b/models/static_batching/mosaicml--mpt-7b-chat.yaml @@ -19,6 +19,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-chat/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: DeviceMap dtype: bfloat16 diff --git a/models/static_batching/mosaicml--mpt-7b-instruct.yaml b/models/static_batching/mosaicml--mpt-7b-instruct.yaml index 894789b0..2e4e3ff3 100644 --- a/models/static_batching/mosaicml--mpt-7b-instruct.yaml +++ b/models/static_batching/mosaicml--mpt-7b-instruct.yaml @@ -19,6 +19,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-instruct/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: DeviceMap dtype: bfloat16 diff --git a/models/static_batching/mosaicml--mpt-7b-storywriter.yaml b/models/static_batching/mosaicml--mpt-7b-storywriter.yaml index 3a5998fb..643db516 100644 --- a/models/static_batching/mosaicml--mpt-7b-storywriter.yaml +++ b/models/static_batching/mosaicml--mpt-7b-storywriter.yaml @@ -19,6 +19,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-storywriter/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: DeviceMap dtype: bfloat16 diff --git a/models/static_batching/stabilityai--stablelm-tuned-alpha-7b.yaml b/models/static_batching/stabilityai--stablelm-tuned-alpha-7b.yaml index 2860a557..6a71a0a7 100644 --- a/models/static_batching/stabilityai--stablelm-tuned-alpha-7b.yaml +++ b/models/static_batching/stabilityai--stablelm-tuned-alpha-7b.yaml @@ -19,6 +19,8 @@ model_config: initialization: s3_mirror_config: bucket_uri: s3://large-dl-models-mirror/models--stabilityai--stablelm-tuned-alpha-7b/main-safetensors/ + s3_sync_args: + - "--no-sign-request" initializer: type: DeepSpeed dtype: float16