diff --git a/deploy/ray/Dockerfile-tgi b/deploy/ray/Dockerfile-tgi index 457c15df..6ee2e79e 100644 --- a/deploy/ray/Dockerfile-tgi +++ b/deploy/ray/Dockerfile-tgi @@ -10,7 +10,7 @@ RUN wget "https://static.rust-lang.org/rustup/dist/x86_64-unknown-linux-gnu/rust RUN source "$HOME/.cargo/env" && PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && sudo unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && sudo unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && rm -f $PROTOC_ZIP RUN source "$HOME/.cargo/env" && pip install -i https://download.pytorch.org/whl/cu118 torch torchvision torchaudio RUN source "$HOME/.cargo/env" && pip install tensorboard ninja text-generation -RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && git clone https://github.com/huggingface/text-generation-inference && cd text-generation-inference && git checkout 1da07e85aae8ce417dda3effd516691394dc31a1 && BUILD_EXTENSIONS=True make install +RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && git clone https://github.com/huggingface/text-generation-inference && cd text-generation-inference && git checkout e943a294bca239e26828732dd6ab5b6f95dadd0a && BUILD_EXTENSIONS=True make install RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && cd text-generation-inference/server && BUILD_EXTENSIONS=True make install-flash-attention RUN source "$HOME/.cargo/env" && export FORCE_CUDA=1 && cd text-generation-inference/server && rm Makefile && mv Makefile-vllm Makefile && make install-vllm