Build docker for CC 9.0

Signed-off-by: Antoni Baum <[email protected]>
ray-project · Jul 10, 2023 · e005252 · e005252
1 parent 18cf3a0
commit e005252
Showing 1 changed file with 4 additions and 4 deletions.
diff --git a/deploy/ray/Dockerfile-tgi b/deploy/ray/Dockerfile-tgi
@@ -10,11 +10,11 @@ RUN wget "https://static.rust-lang.org/rustup/dist/x86_64-unknown-linux-gnu/rust
 RUN source "$HOME/.cargo/env" && PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && sudo unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && sudo unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && rm -f $PROTOC_ZIP
 RUN source "$HOME/.cargo/env" && pip install -i https://download.pytorch.org/whl/cu118 torch torchvision torchaudio
 RUN source "$HOME/.cargo/env" && pip install tensorboard ninja text-generation
-RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && git clone https://github.com/huggingface/text-generation-inference && cd text-generation-inference && git checkout e943a294bca239e26828732dd6ab5b6f95dadd0a && BUILD_EXTENSIONS=True make install
-RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && cd text-generation-inference/server && BUILD_EXTENSIONS=True make install-flash-attention
-RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && cd text-generation-inference/server && rm Makefile && mv Makefile-vllm Makefile && make install-vllm
+RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && git clone https://github.com/huggingface/text-generation-inference && cd text-generation-inference && git checkout e943a294bca239e26828732dd6ab5b6f95dadd0a && BUILD_EXTENSIONS=True make install
+RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && cd text-generation-inference/server && BUILD_EXTENSIONS=True make install-flash-attention
+RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && cd text-generation-inference/server && make install-vllm
 
-RUN export FORCE_CUDA=1 NVCC_PREPEND_FLAGS="--forward-unknown-opts" DS_BUILD_OPS=1 DS_BUILD_AIO=0 DS_BUILD_SPARSE_ATTN=0 TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6" && pip install \
+RUN export FORCE_CUDA=1 NVCC_PREPEND_FLAGS="--forward-unknown-opts" DS_BUILD_OPS=1 DS_BUILD_AIO=0 DS_BUILD_SPARSE_ATTN=0 TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 9.0" && pip install \
   "awscrt" \
   "Jinja2" \
   "numexpr>=2.7.3" \