Skip to content

Commit

Permalink
chore: update worker's dockerfile and use github action to build images
Browse files Browse the repository at this point in the history
Signed-off-by: bjwswang <[email protected]>
  • Loading branch information
bjwswang committed Dec 26, 2023
1 parent 4a54e73 commit b69b195
Show file tree
Hide file tree
Showing 6 changed files with 168 additions and 35 deletions.
76 changes: 76 additions & 0 deletions .github/workflows/worker_image_build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
name: Build KubeAGI worker images

on:
push:
branches: [main]
paths:
- 'deploy/llms/Dockerfile.fastchat-worker'
workflow_dispatch:
env:
PYTHON_INDEX_URL: https://pypi.org/simple

jobs:
image:
if: github.repository == 'kubeagi/arcadia'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set Variable
id: set-env
run: |
TAG=$(git describe --tags --abbrev=0 --match 'v*' 2> /dev/null) || true
if [ -z "$TAG" ]; then
echo "No tag found, use v0.1.0 as default"
TAG=v0.1.0
fi
echo "TAG=${TAG}" >> $GITHUB_OUTPUT
echo "DATE=$(TZ=Asia/Shanghai date +'%Y%m%d')" >> $GITHUB_OUTPUT
- name: Show Variable
run: echo "varibables ${{ steps.set-env.outputs.TAG }}-${{ steps.set-env.outputs.DATE }}"
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
buildkitd-flags: --debug
config-inline: |
[worker.oci]
max-parallelism = 1
- name: Login to the dockerhub Registry
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- uses: benjlevesque/[email protected]
name: Get short commit sha
id: short-sha
- name: Build and push Fastchat Worker
id: push-worker
uses: docker/build-push-action@v5
with:
context: .
file: deploy/llms/Dockerfile.fastchat-worker
platforms: linux/amd64,linux/arm64
tags: |
kubeagi/arcadia-fastchat-worker:latest
kubeagi/arcadia-fastchat-worker:${{ steps.set-env.outputs.TAG }}
kubeagi/arcadia-fastchat-worker:${{ steps.set-env.outputs.TAG }}-${{ steps.set-env.outputs.DATE }}-${{ steps.short-sha.outputs.sha }}
push: true
build-args: |
PYTHON_INDEX_URL=${{ env.PYTHON_INDEX_URL }}
- name: Build and push Fastchat Worker
id: push-vllm-worker
uses: docker/build-push-action@v5
with:
context: .
file: deploy/llms/Dockerfile.fastchat-worker
platforms: linux/amd64,linux/arm64
tags: |
kubeagi/arcadia-fastchat-worker:vllm-${{ steps.set-env.outputs.TAG }}
kubeagi/arcadia-fastchat-worker:vllm-${{ steps.set-env.outputs.TAG }}-${{ steps.set-env.outputs.DATE }}-${{ steps.short-sha.outputs.sha }}
push: true
build-args: |
BASE_IMAGE_VERSION=${{ steps.set-env.outputs.TAG }}-${{ steps.set-env.outputs.DATE }}-${{ steps.short-sha.outputs.sha }}
PYTHON_INDEX_URL=${{ env.PYTHON_INDEX_URL }}
42 changes: 42 additions & 0 deletions .github/workflows/worker_image_build_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: Build KubeAGI worker images

on:
pull_request:
branches: [main]
paths:
- 'deploy/llms/Dockerfile.fastchat-worker'
workflow_dispatch:
env:
PYTHON_INDEX_URL: https://pypi.org/simple

jobs:
test_image_build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
buildkitd-flags: --debug
config-inline: |
[worker.oci]
max-parallelism = 1
- name: Set up GCC
uses: egor-tensin/setup-gcc@v1
with:
version: latest
platform: x64
- name: Build and push Fastchat Worker
id: push-worker
uses: docker/build-push-action@v5
with:
context: .
file: deploy/llms/Dockerfile.fastchat-worker
platforms: linux/amd64,linux/arm64
push: false
build-args: |
PYTHON_INDEX_URL=${{ env.PYTHON_INDEX_URL }}
13 changes: 9 additions & 4 deletions deploy/llms/Dockerfile.fastchat-server
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
FROM python:3.9-slim

ENV TZ=Asia/Shanghai

# Define a build argument with a default value
ARG PACKAGE_REGISTRY="mirrors.tuna.tsinghua.edu.cn"
RUN sed -i 's/deb.debian.org/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list.d/debian.sources

ENV TZ=Asia/Shanghai
RUN export DEBIAN_FRONTEND=noninteractive \
&& apt-get update \
&& apt-get install -y tzdata \
&& ln -fs /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
&& dpkg-reconfigure --frontend noninteractive tzdata

RUN apt-get update -y && apt-get install -y python3.9-distutils curl python3-pip
RUN pip3 install fschat -i https://pypi.mirrors.ustc.edu.cn/simple/
RUN apt-get update -y && apt-get install -y python3.9-distutils curl python3-pip python3-dev

# Official: https://pypi.org/simple
ARG PYTHON_INDEX_URL=https://pypi.mirrors.ustc.edu.cn/simple/

RUN python3.9 -m pip install fschat -i ${PYTHON_INDEX_URL}
60 changes: 31 additions & 29 deletions deploy/llms/Dockerfile.fastchat-worker
Original file line number Diff line number Diff line change
@@ -1,29 +1,31 @@
FROM nvidia/cuda:12.2.0-runtime-ubuntu20.04

ENV TZ=Asia/Shanghai

RUN sed -i 's/archive.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list
RUN sed -i 's/security.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list

RUN export DEBIAN_FRONTEND=noninteractive \
&& apt-get update \
&& apt-get install -y tzdata \
&& ln -fs /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
&& dpkg-reconfigure --frontend noninteractive tzdata

RUN apt-get update -y && apt-get install -y python3.9 python3.9-distutils curl python3-pip
RUN python3.9 -m pip install tomli -i https://pypi.mirrors.ustc.edu.cn/simple/
RUN python3.9 -m pip install setuptools_scm -i https://pypi.mirrors.ustc.edu.cn/simple/
RUN python3.9 -m pip install wavedrom -i https://pypi.mirrors.ustc.edu.cn/simple/
RUN python3.9 -m pip install fschat -i https://pypi.mirrors.ustc.edu.cn/simple/
RUN python3.9 -m pip install fschat[model_worker,webui] pydantic==1.10.13 -i https://pypi.mirrors.ustc.edu.cn/simple/

# required by vllm
# RUN python3.9 -m pip install vllm -i https://pypi.mirrors.ustc.edu.cn/simple/

# required by qwen
# might also install flash-attention to improve performance
# RUN python3.9 -m pip install transformers==4.32.0 accelerate tiktoken einops scipy transformers_stream_generator==0.0.4 -i https://pypi.mirrors.ustc.edu.cn/simple/

# required by qwen quantize
# RUN python3.9 -m pip install auto-gptq optimum -i https://pypi.mirrors.ustc.edu.cn/simple/qwn
FROM nvidia/cuda:12.2.0-devel-ubuntu20.04


# Define a build argument with a default value
ARG PACKAGE_REGISTRY="mirrors.tuna.tsinghua.edu.cn"

# Update the package registry based on the build argument
RUN sed -i "s/archive.ubuntu.com/$PACKAGE_REGISTRY/g" /etc/apt/sources.list \
&& sed -i "s/security.ubuntu.com/$PACKAGE_REGISTRY/g" /etc/apt/sources.list

# Configure the default Timezone
ENV TZ=Asia/Shanghai
RUN export DEBIAN_FRONTEND=noninteractive \
&& apt-get update \
&& apt-get install -y tzdata \
&& ln -fs /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
&& dpkg-reconfigure --frontend noninteractive tzdata

# Official: https://pypi.org/simple
ARG PYTHON_INDEX_URL="https://pypi.mirrors.ustc.edu.cn/simple/"

# Install fastchat along with its dependencies
RUN apt-get install -y python3.9 python3.9-distutils curl python3-pip python3-dev
RUN python3.9 -m pip install tomli setuptools_scm wavedrom -i ${PYTHON_INDEX_URL}
RUN python3.9 -m pip install fschat fschat[model_worker] -i ${PYTHON_INDEX_URL}

# Install requirements for QWen(https://huggingface.co/Qwen/Qwen-72B-Chat)
RUN python3.9 -m pip install einops scipy transformers_stream_generator==0.0.4 deepspeed -i ${PYTHON_INDEX_URL}

# Install requirements for Qutantization with auto-gptq
RUN python3.9 -m pip install auto-gptq optimum -i ${PYTHON_INDEX_URL}
8 changes: 8 additions & 0 deletions deploy/llms/Dockerfile.fastchat-worker-vllm
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
ARG BASE_IMAGE_VERSION="v0.1.0"
FROM kubeagi/arcadia-fastchat-worker:${BASE_IMAGE_VERSION}

# Official: https://pypi.org/simple
ARG PYTHON_INDEX_URL="https://pypi.mirrors.ustc.edu.cn/simple/"
# Install requirements for vllm worker
RUN python3.9 -m pip install vllm -i ${PYTHON_INDEX_URL}

4 changes: 2 additions & 2 deletions pkg/worker/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,15 +152,15 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp
python3.9 -m fastchat.serve.vllm_worker --model-names $FASTCHAT_REGISTRATION_MODEL_NAME \
--model-path /data/models/$FASTCHAT_MODEL_NAME --worker-address $FASTCHAT_WORKER_ADDRESS \
--controller-address $FASTCHAT_CONTROLLER_ADDRESS \
--device $DEVICE --num-gpus $NUMBER_GPUS \
--num-gpus $NUMBER_GPUS \
--host 0.0.0.0 --port 21002 --trust-remote-code`},
Env: []corev1.EnvVar{
{Name: "FASTCHAT_WORKER_NAMESPACE", Value: runner.w.Namespace},
{Name: "FASTCHAT_REGISTRATION_MODEL_NAME", Value: runner.w.MakeRegistrationModelName()},
{Name: "FASTCHAT_MODEL_NAME", Value: model.Name},
{Name: "FASTCHAT_WORKER_ADDRESS", Value: fmt.Sprintf("http://%s.%s.svc.cluster.local:21002", runner.w.Name+WokerCommonSuffix, runner.w.Namespace)},
{Name: "FASTCHAT_CONTROLLER_ADDRESS", Value: gw.Controller},
{Name: "DEVICE", Value: runner.Device().String()},
// Ray will be used when NumberOfGPUs is more than 1
{Name: "NUMBER_GPUS", Value: runner.NumberOfGPUs()},
},
Ports: []corev1.ContainerPort{
Expand Down

0 comments on commit b69b195

Please sign in to comment.