From 951bcd20e87e3e295e5a59c8aafd1f62c64b788e Mon Sep 17 00:00:00 2001
From: Antoni Baum <antoni.baum@protonmail.com>
Date: Sat, 15 Jul 2023 20:30:30 +0000
Subject: [PATCH] Update dockerfiles, no-sign-request for S3

Signed-off-by: Antoni Baum <antoni.baum@protonmail.com>
---
 deploy/_internal/backend/cluster-env.yaml     | 39 ++-----------------
 deploy/ray/Dockerfile                         | 21 +++++++---
 deploy/ray/Dockerfile-tgi                     |  4 +-
 deploy/ray/aviary-cluster.yaml                | 21 +++-------
 models/README.md                              |  4 ++
 ...penAssistant--falcon-40b-sft-top1-560.yaml |  2 +
 ...OpenAssistant--falcon-7b-sft-top1-696.yaml |  2 +
 ...nAssistant--oasst-sft-7-llama-30b-xor.yaml |  2 +
 .../continuous_batching/amazon--LightGPT.yaml |  2 +
 .../lmsys--vicuna-13b-v1.3.yaml               |  2 +
 .../lmsys--vicuna-33b-v1.3.yaml               |  2 +
 .../mosaicml--mpt-30b-chat.yaml               |  2 +
 .../mosaicml--mpt-7b-chat.yaml                |  2 +
 .../mosaicml--mpt-7b-instruct.yaml            |  2 +
 .../mosaicml--mpt-7b-storywriter.yaml         |  2 +
 .../CarperAI--stable-vicuna-13b-delta.yaml    |  2 +
 ...penAssistant--falcon-40b-sft-top1-560.yaml |  2 +
 ...OpenAssistant--falcon-7b-sft-top1-696.yaml |  2 +
 ...nAssistant--oasst-sft-7-llama-30b-xor.yaml |  2 +
 .../static_batching/RWKV--rwkv-raven-14b.yaml |  2 +
 models/static_batching/amazon--LightGPT.yaml  |  2 +
 .../databricks--dolly-v2-12b.yaml             |  2 +
 .../h2oai--h2ogpt-oasst1-512-12b.yaml         |  2 +
 .../lmsys--vicuna-13b-delta-v1.1.yaml         |  2 +
 .../mosaicml--mpt-30b-chat.yaml               |  2 +
 .../mosaicml--mpt-7b-chat.yaml                |  2 +
 .../mosaicml--mpt-7b-instruct.yaml            |  2 +
 .../mosaicml--mpt-7b-storywriter.yaml         |  2 +
 .../stabilityai--stablelm-tuned-alpha-7b.yaml |  2 +
 29 files changed, 78 insertions(+), 59 deletions(-)

diff --git a/deploy/_internal/backend/cluster-env.yaml b/deploy/_internal/backend/cluster-env.yaml
index 1bcbbbba..7c4755ff 100644
--- a/deploy/_internal/backend/cluster-env.yaml
+++ b/deploy/_internal/backend/cluster-env.yaml
@@ -1,41 +1,8 @@
-base_image: anyscale/ray:nightly-py310-cu118
-debian_packages:
-- libaio-dev
-- git-lfs
-- xfsprogs
+docker_image: anyscale/aviary:latest
+ray_version: nightly
 env_vars:
   HF_HUB_ENABLE_HF_TRANSFER: 1
   SAFETENSORS_FAST_GPU: 1
   RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING: 1
   XDG_CACHE_HOME: /mnt/local_storage/data/cache
-  TORCH_HOME: /mnt/local_storage/data/cache/torch
-post_build_cmds:
-- |-
-  echo "dedup version 3. increment this to force a rebuild."
-  pip uninstall -y torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric tensorflow
-  pip install -i https://download.pytorch.org/whl/cu118 torch torchvision torchaudio
-  pip install \
-    "async_timeout" \
-    "markdown-it-py[plugins]" \
-    "accelerate" \
-    "transformers>=4.25.1" \
-    "datasets" \
-    "ftfy" \
-    "tensorboard" \
-    "sentencepiece" \
-    "Jinja2" \
-    "numexpr>=2.7.3" \
-    "hf_transfer" \
-    "evaluate" \
-    "bitsandbytes" \
-    "git+https://github.com/Yard1/DeepSpeed.git@aviary" \
-    "numpy<1.24" \
-    "ninja" \
-    "protobuf<3.21.0" \
-    "git+https://github.com/huggingface/optimum.git" \
-    "torchmetrics" \
-    "safetensors" \
-    "pydantic==1.10.7" \
-    "einops" \
-    "markdown-it-py[plugins]" \
-    "fastapi_versioning"
+  TORCH_HOME: /mnt/local_storage/data/cache/torch
\ No newline at end of file
diff --git a/deploy/ray/Dockerfile b/deploy/ray/Dockerfile
index 9183e74b..2e69b74a 100644
--- a/deploy/ray/Dockerfile
+++ b/deploy/ray/Dockerfile
@@ -1,22 +1,33 @@
-FROM rayproject/ray:nightly-cu118
+FROM anyscale/ray:nightly-py310-cu118
+SHELL ["/bin/bash", "-c"]
 
-RUN sudo apt-get update && sudo apt-get install -y libaio-dev git-lfs awscli && sudo rm -rf /var/lib/apt/lists/*
+# Add extra dependencies
+ARG DEBIAN_FRONTEND=noninteractive
+RUN sudo apt-get update && sudo apt-get install -y axel nfs-common zip unzip libaio-dev git-lfs awscli curl libssl-dev gcc pkg-config libc6-dev && sudo apt-get clean
 
-RUN conda update -n base -c defaults conda && conda install python=3.10
 RUN pip install -i https://download.pytorch.org/whl/cu118 torch torchvision torchaudio
 RUN pip uninstall -y ray && pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl
 
 # Created by build_aviary_wheel.sh
 COPY "./dist" "/home/ray/dist"
-RUN cd /home/ray/dist && pip install "$(ls *.whl | head -n1)[backend, frontend]"
+RUN export FORCE_CUDA=1 NVCC_PREPEND_FLAGS="--forward-unknown-opts" DS_BUILD_OPS=1 DS_BUILD_AIO=0 DS_BUILD_SPARSE_ATTN=0 TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && cd /home/ray/dist && pip install "$(ls *.whl | head -n1)[backend, frontend]"
 
 # The build context should be the root of the repo
 # So this gives the model definitions
 COPY "./models" "/home/ray/models"
 
+ENV RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1
 ENV HF_HUB_ENABLE_HF_TRANSFER=1
 ENV SAFETENSORS_FAST_GPU=1
-ENV RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1
+
+# (Optional) Verify that dependencies from the base image still work. This
+# is useful for catching dependency conflicts at build time.
+RUN echo "Testing Ray Import..." && python -c "import ray"
+RUN ray --version
+RUN jupyter --version
+RUN anyscale --version
+RUN sudo supervisord --version
+
 RUN echo "Testing aviary install" && python -c "import aviary.backend"
 
 RUN (pip cache purge || true) && conda clean -a && rm -rf ~/.cache
diff --git a/deploy/ray/Dockerfile-tgi b/deploy/ray/Dockerfile-tgi
index 2b7c3b29..55a1bcb4 100644
--- a/deploy/ray/Dockerfile-tgi
+++ b/deploy/ray/Dockerfile-tgi
@@ -4,13 +4,13 @@ SHELL ["/bin/bash", "-c"]
 
 # Add extra dependencies
 ARG DEBIAN_FRONTEND=noninteractive
-RUN sudo apt-get update && sudo apt-get install -y axel nfs-common zip unzip awscli curl libssl-dev gcc pkg-config libc6-dev && sudo apt-get clean
+RUN sudo apt-get update && sudo apt-get install -y axel nfs-common zip unzip libaio-dev git-lfs awscli curl libssl-dev gcc pkg-config libc6-dev && sudo apt-get clean
 
 RUN wget "https://static.rust-lang.org/rustup/dist/x86_64-unknown-linux-gnu/rustup-init" && chmod +x rustup-init && ./rustup-init -y && rm rustup-init && source "$HOME/.cargo/env"
 RUN source "$HOME/.cargo/env" && PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && sudo unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && sudo unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && rm -f $PROTOC_ZIP
 RUN source "$HOME/.cargo/env" && pip install -i https://download.pytorch.org/whl/cu118 torch torchvision torchaudio
 RUN source "$HOME/.cargo/env" && pip install tensorboard ninja text-generation
-RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && git clone https://github.com/huggingface/text-generation-inference && cd text-generation-inference && git checkout e943a294bca239e26828732dd6ab5b6f95dadd0a && BUILD_EXTENSIONS=True make install
+RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && git clone https://github.com/huggingface/text-generation-inference && cd text-generation-inference && git checkout a2cf1bdb2fc0570dfca8b9ed2c8322f2040c3c07 && BUILD_EXTENSIONS=True make install
 RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && cd text-generation-inference/server && BUILD_EXTENSIONS=True make install-flash-attention
 RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && cd text-generation-inference/server && make install-vllm
 
diff --git a/deploy/ray/aviary-cluster.yaml b/deploy/ray/aviary-cluster.yaml
index ef235bb8..e450f84e 100644
--- a/deploy/ray/aviary-cluster.yaml
+++ b/deploy/ray/aviary-cluster.yaml
@@ -7,32 +7,21 @@ provider:
     region: us-west-2
     cache_stopped_nodes: False
 docker:
-    image: "anyscale/aviary:latest"
+    image: "anyscale/aviary:test"
     # Use this image instead for continuous batching:
     # image: "anyscale/aviary:latest-tgi"
     container_name: "aviary"
     run_options:
       - --entrypoint ""
 
-# All the 'conda activate' are necessary to ensure we are in the
-# python 3.10 conda env.
 setup_commands:
-    - echo "(conda activate || true)" >> ~/.bashrc
+  - which ray || pip install -U "ray[default] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl"
 
-head_setup_commands:
-    - (conda activate || true) && pip install 'boto3>=1.4.8'
-
-worker_setup_commands: []
-
-head_start_ray_commands:
-    - (conda activate || true) && ray stop
-    - (conda activate || true) && ulimit -n 65536; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host=0.0.0.0
 
 worker_start_ray_commands:
-    - (conda activate || true) && ray stop
-    # We need to make sure RAY_HEAD_IP env var is accessible
-    # after conda activate.
-    - export RAY_HEAD_IP && echo "export RAY_HEAD_IP=$RAY_HEAD_IP" >> ~/.bashrc && (conda activate || true) && ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076
+    - ray stop
+    # We need to make sure RAY_HEAD_IP env var is accessible.
+    - export RAY_HEAD_IP && echo "export RAY_HEAD_IP=$RAY_HEAD_IP" >> ~/.bashrc && ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076
 
 available_node_types:
   head_node_type:
diff --git a/models/README.md b/models/README.md
index 9f9e4205..b8fa607d 100644
--- a/models/README.md
+++ b/models/README.md
@@ -124,6 +124,8 @@ model_config:
     # Hugging Face Hub. You can use this to speed up downloads.
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-instruct/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     # How to initialize the model.
     initializer:
       # Initializer type. For static batching, can be one of:
@@ -193,6 +195,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-instruct/main-safetensors/
+    s3_sync_args:
+      - "--no-sign-request"
   generation:
     prompt_format: "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\n{instruction}\n### Response:\n"
     stopping_sequences: ["### Response:", "### End"]
diff --git a/models/continuous_batching/OpenAssistant--falcon-40b-sft-top1-560.yaml b/models/continuous_batching/OpenAssistant--falcon-40b-sft-top1-560.yaml
index 76b322d4..d3a5e940 100644
--- a/models/continuous_batching/OpenAssistant--falcon-40b-sft-top1-560.yaml
+++ b/models/continuous_batching/OpenAssistant--falcon-40b-sft-top1-560.yaml
@@ -20,6 +20,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--OpenAssistant--falcon-40b-sft-top1-560/main-safetensors-tgi/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: TextGenerationInference
       model_init_kwargs:
diff --git a/models/continuous_batching/OpenAssistant--falcon-7b-sft-top1-696.yaml b/models/continuous_batching/OpenAssistant--falcon-7b-sft-top1-696.yaml
index 7f464ccd..94ebd8cd 100644
--- a/models/continuous_batching/OpenAssistant--falcon-7b-sft-top1-696.yaml
+++ b/models/continuous_batching/OpenAssistant--falcon-7b-sft-top1-696.yaml
@@ -20,6 +20,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--OpenAssistant--falcon-7b-sft-top1-696/main-safetensors-tgi/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: TextGenerationInference
       model_init_kwargs:
diff --git a/models/continuous_batching/OpenAssistant--oasst-sft-7-llama-30b-xor.yaml b/models/continuous_batching/OpenAssistant--oasst-sft-7-llama-30b-xor.yaml
index 1b267f38..d2c28d40 100644
--- a/models/continuous_batching/OpenAssistant--oasst-sft-7-llama-30b-xor.yaml
+++ b/models/continuous_batching/OpenAssistant--oasst-sft-7-llama-30b-xor.yaml
@@ -21,6 +21,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/restricted/models--OpenAssistant--oasst-sft-7-llama-30b-xor/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: TextGenerationInference
       model_init_kwargs:
diff --git a/models/continuous_batching/amazon--LightGPT.yaml b/models/continuous_batching/amazon--LightGPT.yaml
index 5b74816a..84d415b9 100644
--- a/models/continuous_batching/amazon--LightGPT.yaml
+++ b/models/continuous_batching/amazon--LightGPT.yaml
@@ -20,6 +20,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--amazon--LightGPT/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: TextGenerationInference
     pipeline: TextGenerationInference
diff --git a/models/continuous_batching/lmsys--vicuna-13b-v1.3.yaml b/models/continuous_batching/lmsys--vicuna-13b-v1.3.yaml
index f28f45a5..3fb086b3 100644
--- a/models/continuous_batching/lmsys--vicuna-13b-v1.3.yaml
+++ b/models/continuous_batching/lmsys--vicuna-13b-v1.3.yaml
@@ -21,6 +21,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--lmsys--vicuna-13b-v1.3/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: TextGenerationInference
       model_init_kwargs:
diff --git a/models/continuous_batching/lmsys--vicuna-33b-v1.3.yaml b/models/continuous_batching/lmsys--vicuna-33b-v1.3.yaml
index 43595ee8..f2c6e60c 100644
--- a/models/continuous_batching/lmsys--vicuna-33b-v1.3.yaml
+++ b/models/continuous_batching/lmsys--vicuna-33b-v1.3.yaml
@@ -21,6 +21,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--lmsys--vicuna-33b-v1.3/main-safetensors-tgi/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: TextGenerationInference
       model_init_kwargs:
diff --git a/models/continuous_batching/mosaicml--mpt-30b-chat.yaml b/models/continuous_batching/mosaicml--mpt-30b-chat.yaml
index fc40dcd9..995d7148 100644
--- a/models/continuous_batching/mosaicml--mpt-30b-chat.yaml
+++ b/models/continuous_batching/mosaicml--mpt-30b-chat.yaml
@@ -20,6 +20,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-30b-chat/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: TextGenerationInference
       model_init_kwargs:
diff --git a/models/continuous_batching/mosaicml--mpt-7b-chat.yaml b/models/continuous_batching/mosaicml--mpt-7b-chat.yaml
index 52874104..e7ba9d64 100644
--- a/models/continuous_batching/mosaicml--mpt-7b-chat.yaml
+++ b/models/continuous_batching/mosaicml--mpt-7b-chat.yaml
@@ -20,6 +20,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-chat/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: TextGenerationInference
       model_init_kwargs:
diff --git a/models/continuous_batching/mosaicml--mpt-7b-instruct.yaml b/models/continuous_batching/mosaicml--mpt-7b-instruct.yaml
index fd5898f2..0a1b0a0e 100644
--- a/models/continuous_batching/mosaicml--mpt-7b-instruct.yaml
+++ b/models/continuous_batching/mosaicml--mpt-7b-instruct.yaml
@@ -20,6 +20,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-instruct/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: TextGenerationInference
       model_init_kwargs:
diff --git a/models/continuous_batching/mosaicml--mpt-7b-storywriter.yaml b/models/continuous_batching/mosaicml--mpt-7b-storywriter.yaml
index 8f6b3d42..3e33b4fe 100644
--- a/models/continuous_batching/mosaicml--mpt-7b-storywriter.yaml
+++ b/models/continuous_batching/mosaicml--mpt-7b-storywriter.yaml
@@ -20,6 +20,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-storywriter/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: TextGenerationInference
       model_init_kwargs:
diff --git a/models/static_batching/CarperAI--stable-vicuna-13b-delta.yaml b/models/static_batching/CarperAI--stable-vicuna-13b-delta.yaml
index 69855d82..cea0400a 100644
--- a/models/static_batching/CarperAI--stable-vicuna-13b-delta.yaml
+++ b/models/static_batching/CarperAI--stable-vicuna-13b-delta.yaml
@@ -22,6 +22,8 @@ model_config:
         - transformers<4.30
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/restricted/models--CarperAI--stable-vicuna-13b-delta/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: DeepSpeed
       dtype: float16
diff --git a/models/static_batching/OpenAssistant--falcon-40b-sft-top1-560.yaml b/models/static_batching/OpenAssistant--falcon-40b-sft-top1-560.yaml
index 5c62c382..d68a6bb0 100644
--- a/models/static_batching/OpenAssistant--falcon-40b-sft-top1-560.yaml
+++ b/models/static_batching/OpenAssistant--falcon-40b-sft-top1-560.yaml
@@ -20,6 +20,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--OpenAssistant--falcon-40b-sft-top1-560/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: DeepSpeed
       dtype: float16
diff --git a/models/static_batching/OpenAssistant--falcon-7b-sft-top1-696.yaml b/models/static_batching/OpenAssistant--falcon-7b-sft-top1-696.yaml
index be45f44a..b703d06e 100644
--- a/models/static_batching/OpenAssistant--falcon-7b-sft-top1-696.yaml
+++ b/models/static_batching/OpenAssistant--falcon-7b-sft-top1-696.yaml
@@ -19,6 +19,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--OpenAssistant--falcon-7b-sft-top1-696/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: DeviceMap
       dtype: bfloat16
diff --git a/models/static_batching/OpenAssistant--oasst-sft-7-llama-30b-xor.yaml b/models/static_batching/OpenAssistant--oasst-sft-7-llama-30b-xor.yaml
index ff979d7d..f9e934fd 100644
--- a/models/static_batching/OpenAssistant--oasst-sft-7-llama-30b-xor.yaml
+++ b/models/static_batching/OpenAssistant--oasst-sft-7-llama-30b-xor.yaml
@@ -23,6 +23,8 @@ model_config:
         - transformers<4.30
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/restricted/models--OpenAssistant--oasst-sft-7-llama-30b-xor/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: DeepSpeed
       dtype: float16
diff --git a/models/static_batching/RWKV--rwkv-raven-14b.yaml b/models/static_batching/RWKV--rwkv-raven-14b.yaml
index d859d58d..c00d9391 100644
--- a/models/static_batching/RWKV--rwkv-raven-14b.yaml
+++ b/models/static_batching/RWKV--rwkv-raven-14b.yaml
@@ -20,6 +20,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--RWKV--rwkv-raven-14b/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: DeviceMap
       dtype: float16
diff --git a/models/static_batching/amazon--LightGPT.yaml b/models/static_batching/amazon--LightGPT.yaml
index 70623536..8b94e4cb 100644
--- a/models/static_batching/amazon--LightGPT.yaml
+++ b/models/static_batching/amazon--LightGPT.yaml
@@ -22,6 +22,8 @@ model_config:
         - deepspeed==0.9.2
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--amazon--LightGPT/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: DeepSpeed
       dtype: float16
diff --git a/models/static_batching/databricks--dolly-v2-12b.yaml b/models/static_batching/databricks--dolly-v2-12b.yaml
index 2f3981a3..5d6f8a95 100644
--- a/models/static_batching/databricks--dolly-v2-12b.yaml
+++ b/models/static_batching/databricks--dolly-v2-12b.yaml
@@ -20,6 +20,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--databricks--dolly-v2-12b/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: DeepSpeed
       dtype: float16
diff --git a/models/static_batching/h2oai--h2ogpt-oasst1-512-12b.yaml b/models/static_batching/h2oai--h2ogpt-oasst1-512-12b.yaml
index d25df35f..ae812624 100644
--- a/models/static_batching/h2oai--h2ogpt-oasst1-512-12b.yaml
+++ b/models/static_batching/h2oai--h2ogpt-oasst1-512-12b.yaml
@@ -19,6 +19,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--h2oai--h2ogpt-oasst1-512-12b/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: DeepSpeed
       dtype: float16
diff --git a/models/static_batching/lmsys--vicuna-13b-delta-v1.1.yaml b/models/static_batching/lmsys--vicuna-13b-delta-v1.1.yaml
index b3e9b701..14f29f02 100644
--- a/models/static_batching/lmsys--vicuna-13b-delta-v1.1.yaml
+++ b/models/static_batching/lmsys--vicuna-13b-delta-v1.1.yaml
@@ -20,6 +20,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/restricted/models--lmsys--vicuna-13b-delta-v1.1/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: DeepSpeed
       dtype: float16
diff --git a/models/static_batching/mosaicml--mpt-30b-chat.yaml b/models/static_batching/mosaicml--mpt-30b-chat.yaml
index d1b50591..6e1ea929 100644
--- a/models/static_batching/mosaicml--mpt-30b-chat.yaml
+++ b/models/static_batching/mosaicml--mpt-30b-chat.yaml
@@ -19,6 +19,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-30b-chat/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: SingleDevice
       dtype: bfloat16
diff --git a/models/static_batching/mosaicml--mpt-7b-chat.yaml b/models/static_batching/mosaicml--mpt-7b-chat.yaml
index 38db2cfc..9522e8f7 100644
--- a/models/static_batching/mosaicml--mpt-7b-chat.yaml
+++ b/models/static_batching/mosaicml--mpt-7b-chat.yaml
@@ -19,6 +19,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-chat/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: DeviceMap
       dtype: bfloat16
diff --git a/models/static_batching/mosaicml--mpt-7b-instruct.yaml b/models/static_batching/mosaicml--mpt-7b-instruct.yaml
index 894789b0..2e4e3ff3 100644
--- a/models/static_batching/mosaicml--mpt-7b-instruct.yaml
+++ b/models/static_batching/mosaicml--mpt-7b-instruct.yaml
@@ -19,6 +19,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-instruct/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: DeviceMap
       dtype: bfloat16
diff --git a/models/static_batching/mosaicml--mpt-7b-storywriter.yaml b/models/static_batching/mosaicml--mpt-7b-storywriter.yaml
index 3a5998fb..643db516 100644
--- a/models/static_batching/mosaicml--mpt-7b-storywriter.yaml
+++ b/models/static_batching/mosaicml--mpt-7b-storywriter.yaml
@@ -19,6 +19,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-storywriter/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: DeviceMap
       dtype: bfloat16
diff --git a/models/static_batching/stabilityai--stablelm-tuned-alpha-7b.yaml b/models/static_batching/stabilityai--stablelm-tuned-alpha-7b.yaml
index 2860a557..6a71a0a7 100644
--- a/models/static_batching/stabilityai--stablelm-tuned-alpha-7b.yaml
+++ b/models/static_batching/stabilityai--stablelm-tuned-alpha-7b.yaml
@@ -19,6 +19,8 @@ model_config:
   initialization:
     s3_mirror_config:
       bucket_uri: s3://large-dl-models-mirror/models--stabilityai--stablelm-tuned-alpha-7b/main-safetensors/
+      s3_sync_args:
+        - "--no-sign-request"
     initializer:
       type: DeepSpeed
       dtype: float16