kubeagi · bjwswang · Mar 4, 2024 · Feb 29, 2024
diff --git a/.github/workflows/worker_image_build_test.yml b/.github/workflows/worker_image_build_test.yml
@@ -13,6 +13,12 @@ jobs:
   test_image_build:
     runs-on: ubuntu-latest
     steps:
+      - name: Maximize build space
+        uses: easimon/maximize-build-space@master
+        with:
+          root-reserve-mb: 512
+          swap-size-mb: 1024
+          remove-dotnet: 'true'
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0

diff --git a/deploy/llms/Dockerfile.fastchat-worker b/deploy/llms/Dockerfile.fastchat-worker
@@ -1,34 +1,43 @@
-FROM nvidia/cuda:12.2.0-devel-ubuntu20.04
-
-
-# Define a build argument with a default value
-ARG PACKAGE_REGISTRY="mirrors.tuna.tsinghua.edu.cn"
-
-# Update the package registry based on the build argument
-RUN sed -i "s/archive.ubuntu.com/$PACKAGE_REGISTRY/g" /etc/apt/sources.list \
-    && sed -i "s/security.ubuntu.com/$PACKAGE_REGISTRY/g" /etc/apt/sources.list
-
-# Configure the default Timezone
-ENV TZ=Asia/Shanghai
-RUN export DEBIAN_FRONTEND=noninteractive \
-    && apt-get update \
-    && apt-get install -y tzdata \
-    && ln -fs /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
-    && dpkg-reconfigure --frontend noninteractive tzdata
-
-# Official: https://pypi.org/simple 
-ARG PYTHON_INDEX_URL="https://pypi.mirrors.ustc.edu.cn/simple/"
-
-# Install fastchat along with its dependencies
-RUN apt-get install -y python3.9 python3.9-distutils curl python3-pip python3-dev
-RUN python3.9 -m pip install tomli setuptools_scm wavedrom -i ${PYTHON_INDEX_URL}
-RUN python3.9 -m pip install fschat fschat[model_worker] -i ${PYTHON_INDEX_URL}
-
-# Install requirements for QWen(https://huggingface.co/Qwen/Qwen-72B-Chat)
-RUN python3.9 -m pip install einops scipy transformers_stream_generator==0.0.4 deepspeed -i ${PYTHON_INDEX_URL}
-
-# Install requirements for Qutantization with auto-gptq
-RUN python3.9 -m pip install auto-gptq optimum -i ${PYTHON_INDEX_URL}
-
-COPY deploy/llms/start-worker.sh /
-ENTRYPOINT ["/start-worker.sh"]
+FROM nvidia/cuda:12.2.0-devel-ubuntu20.04
+
+
+# Define a build argument with a default value
+ARG PACKAGE_REGISTRY="mirrors.tuna.tsinghua.edu.cn"
+
+# Update the package registry based on the build argument
+RUN sed -i "s/archive.ubuntu.com/$PACKAGE_REGISTRY/g" /etc/apt/sources.list \
+    && sed -i "s/security.ubuntu.com/$PACKAGE_REGISTRY/g" /etc/apt/sources.list
+
+# Configure the default Timezone
+ENV TZ=Asia/Shanghai
+RUN export DEBIAN_FRONTEND=noninteractive \
+    && apt-get update \
+    && apt-get install -y tzdata git \
+    && ln -fs /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
+    && dpkg-reconfigure --frontend noninteractive tzdata
+
+# Official: https://pypi.org/simple
+ARG PYTHON_INDEX_URL="https://pypi.mirrors.ustc.edu.cn/simple/"
+
+# Install fastchat along with its dependencies
+RUN apt-get install -y python3.9 python3.9-distutils curl python3-pip python3-dev
+RUN python3.9 -m pip install tomli setuptools_scm wavedrom transformers==4.37.0 -i ${PYTHON_INDEX_URL}
+RUN python3.9 -m pip install --upgrade pip -i ${PYTHON_INDEX_URL}
+RUN git clone https://github.com/lm-sys/FastChat.git \
+    && cd FastChat \
+    && python3.9 -m pip install -e ".[model_worker]"  -i ${PYTHON_INDEX_URL} \
+    && git rev-parse HEAD > $HOME/.fastchat \
+    && cd ..
+
+# Configure the following environment variables to allow fastchat to pull model files from modelscope
+# export VLLM_USE_MODELSCOPE=True ,export FASTCHAT_USE_MODELSCOPE=True
+RUN python3.9 -m pip install modelscope pydantic==1.10.14 -i ${PYTHON_INDEX_URL}
+
+# Install requirements for QWen(https://huggingface.co/Qwen/Qwen-72B-Chat)
+RUN python3.9 -m pip install einops scipy transformers_stream_generator==0.0.4 deepspeed -i ${PYTHON_INDEX_URL}
+
+# Install requirements for Qutantization with auto-gptq
+RUN python3.9 -m pip install auto-gptq optimum -i ${PYTHON_INDEX_URL}
+
+COPY deploy/llms/start-worker.sh /
+ENTRYPOINT ["/start-worker.sh"]
diff --git a/deploy/llms/Dockerfile.fastchat-worker-vllm b/deploy/llms/Dockerfile.fastchat-worker-vllm
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE_VERSION="v0.2.0"
+ARG BASE_IMAGE_VERSION="v0.2.36"
 FROM kubeagi/arcadia-fastchat-worker:${BASE_IMAGE_VERSION}
 
 # Official: https://pypi.org/simple 
@@ -10,4 +10,4 @@ RUN python3.9 -m pip install vllm -i ${PYTHON_INDEX_URL}
 # for now, ray: 2.9.0, python: 3.9.x
 # this utils.py file is from ray 2.9.0 ray-ml image
 # search 'KubeAGI' in utils.py for what's changed
-COPY utils.py /usr/local/lib/python3.9/dist-packages/ray/_private/utils.py
+COPY utils.py /usr/local/lib/python3.9/dist-packages/ray/_private/utils.py
diff --git a/deploy/llms/start-worker.sh b/deploy/llms/start-worker.sh
@@ -28,7 +28,7 @@ fi
 
 echo "Run model worker..."
 python3.9 -m $FASTCHAT_WORKER_NAME --model-names $FASTCHAT_REGISTRATION_MODEL_NAME \
-    --model-path /data/models/$FASTCHAT_MODEL_NAME --worker-address $FASTCHAT_WORKER_ADDRESS \
+    --model-path $FASTCHAT_MODEL_NAME_PATH --worker-address $FASTCHAT_WORKER_ADDRESS \
     --controller-address $FASTCHAT_CONTROLLER_ADDRESS \
     --num-gpus $NUMBER_GPUS  \
     --host 0.0.0.0 --port 21002 $EXTRA_ARGS