From b078ba9c1ef31183bc6c9a92b829173fc2bcaf58 Mon Sep 17 00:00:00 2001 From: Lanture1064 <34346740+Lanture1064@users.noreply.github.com> Date: Tue, 1 Aug 2023 14:08:30 +0800 Subject: [PATCH] feat: llm servise helm chart & dockerfile --- charts/llms/Chart.yaml | 7 ++- charts/llms/templates/deployment.yaml | 26 +++++++++ charts/llms/templates/ingress.yaml | 58 +++++++++++++++++++ charts/llms/templates/service.yaml | 15 +++++ charts/llms/values.yaml | 81 +++++++++++++++++++++++++++ llms/Dockerfile | 15 +++++ llms/docker-compose.yml | 21 +++++++ 7 files changed, 221 insertions(+), 2 deletions(-) create mode 100644 charts/llms/templates/deployment.yaml create mode 100644 charts/llms/templates/ingress.yaml create mode 100644 charts/llms/templates/service.yaml create mode 100644 llms/Dockerfile create mode 100644 llms/docker-compose.yml diff --git a/charts/llms/Chart.yaml b/charts/llms/Chart.yaml index ac0bae4bf..f0308fa97 100644 --- a/charts/llms/Chart.yaml +++ b/charts/llms/Chart.yaml @@ -1,6 +1,9 @@ apiVersion: v2 name: llms description: A Helm chart(KubeBB Component) for Arcadia LLM Services +maintainers: + - name: Lanture1064 + - name: bjwswang # A chart can be either an 'application' or a 'library' chart. # @@ -15,10 +18,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0 +version: 0.1.2 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.0.0" +appVersion: "0.2.18" diff --git a/charts/llms/templates/deployment.yaml b/charts/llms/templates/deployment.yaml new file mode 100644 index 000000000..5b1f7ce41 --- /dev/null +++ b/charts/llms/templates/deployment.yaml @@ -0,0 +1,26 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: fschat +spec: + replicas: 1 + selector: + matchLabels: + app: fschat + template: + metadata: + labels: + app: fschat + spec: + containers: + - name: fschat-controller + image: {{ .Values.image.repository }}:{{ .Values.image.tag }} + command: ["python3", "-m", "fastchat.serve.controller", "--host", "0.0.0.0"] + ports: + - containerPort: 21001 + + - name: fschat-api + image: {{ .Values.image.repository }}:{{ .Values.image.tag }} + command: ["python3", "-m", "fastchat.serve.openai_api_server", "--host", "0.0.0.0", "--controller-address", "http://fastchat-controller:21001"] + ports: + - containerPort: 8000 diff --git a/charts/llms/templates/ingress.yaml b/charts/llms/templates/ingress.yaml new file mode 100644 index 000000000..f91b82521 --- /dev/null +++ b/charts/llms/templates/ingress.yaml @@ -0,0 +1,58 @@ +{{- if .Values.ingress.enabled -}} + +{{- if semverCompare "<1.19" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: extensions/v1beta1 +{{- else if semverCompare "<1.22" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: networking.k8s.io/v1 +{{- end }} +kind: Ingress +metadata: + name: {{ .Release.Name }}-{{ .Chart.Name }} + namespace: {{ .Release.Namespace | quote }} + labels: + app: {{ .Chart.Name }} + release: {{ .Release.Name }} +{{- with .Values.ingress.labels }} +{{ toYaml . | indent 4 }} +{{- end }} + +{{- with .Values.ingress.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} +spec: +{{- if .Values.ingress.ingressClassName }} + ingressClassName: {{ .Values.ingress.ingressClassName }} +{{- end }} + +{{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} +{{- end }} + rules: + - host: {{ .Values.ingress.hosts }} + http: + paths: + - path: {{ .Values.ingress.path.controllerPath }} + pathType: Prefix + backend: + service: + name: {{ .Values.controllerService.name }} + port: + number: {{ .Values.controllerService.port }} + - path: {{ .Values.ingress.path.apiServerPath }} + pathType: Prefix + backend: + service: + name: {{ .Values.apiService.name }} + port: + number: {{ .Values.apiService.port }} +{{- end }} diff --git a/charts/llms/templates/service.yaml b/charts/llms/templates/service.yaml new file mode 100644 index 000000000..a15ab183e --- /dev/null +++ b/charts/llms/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: fschat-service +spec: + type: NodePort + selector: + app: fschat + ports: + - name: {{ .Values.controllerService.name}} + port: 21001 + targetPort: 21001 + - name: {{ .Values.apiService.name }} + port: 8000 + targetPort: 8000 \ No newline at end of file diff --git a/charts/llms/values.yaml b/charts/llms/values.yaml index 47d237b30..da62c2e3d 100644 --- a/charts/llms/values.yaml +++ b/charts/llms/values.yaml @@ -2,3 +2,84 @@ # This is a YAML-formatted file. # Declare variables to be passed into your templates. +## Provide a name to substitute for the full names of resources +## +fullnameOverride: "" + +## Set default image, imageTag, and imagePullPolicy +## + +image: + repository: kubebb/arcadia-llms + tag: latest + pullPolicy: IfNotPresent + +imagePullSecrets: [] + +## Additional labels to include with deployment or statefulset +additionalLabels: {} + +## Additional annotations to include with deployment or statefulset +additionalAnnotations: {} + +## Typically the deployment/statefulset includes checksums of secrets/config, +## So that when these change on a subsequent helm install, the deployment/statefulset +## is restarted. This can result in unnecessary restarts under GitOps tooling such as +## flux, so set to "true" to disable this behaviour. +ignoreChartChecksums: false + +## Update strategy for Deployments +DeploymentUpdate: + type: RollingUpdate + maxUnavailable: 0 + maxSurge: 100% + +## Update strategy for StatefulSets +StatefulSetUpdate: + updateStrategy: RollingUpdate + +## Pod priority settings +## ref: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/ +## +priorityClassName: "" + +## Pod runtime class name +## ref https://kubernetes.io/docs/concepts/containers/runtime-class/ +## +runtimeClassName: "" + +## Replicas of the deployment +## +replicaCount: 1 + +## Info of the services +## +controllerService: + type: NodePort + name: controller + port: 21001 + targetPort: 21001 + +apiService: + type: NodePort + name: api-server + port: 8000 + targetPort: 8000 + +## Ingress settings +## +ingress: + enabled: true + ingressClassName: "portal-ingress" + labels: {} + # node-role.kubernetes.io/ingress: platform + + annotations: {} + path: + controllerPath: /controller + apiServerPath: /api + hosts: fschat.server.local + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local diff --git a/llms/Dockerfile b/llms/Dockerfile new file mode 100644 index 000000000..fa623c2f1 --- /dev/null +++ b/llms/Dockerfile @@ -0,0 +1,15 @@ +# FROM python:3.12.0b4-alpine3.18 +# FROM ubuntu:22.04 +FROM nvidia/cuda:11.7.1-runtime-ubuntu22.04 + +RUN sed -i 's/archive.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list +RUN sed -i 's/security.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list + +RUN apt update -y && apt upgrade -y && apt install -y python3 curl +RUN apt install python3-pip -y +# RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.ustc.edu.cn/g' /etc/apk/repositories +# RUN apk add curl py3-pip py3-setuptools +# RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py +# RUN python3 get-pip.py +RUN pip3 install fschat -i https://pypi.mirrors.ustc.edu.cn/simple/ + diff --git a/llms/docker-compose.yml b/llms/docker-compose.yml new file mode 100644 index 000000000..57b5d3198 --- /dev/null +++ b/llms/docker-compose.yml @@ -0,0 +1,21 @@ +version: "3.9" + +services: + fastchat-controller: + build: + context: . + dockerfile: Dockerfile + image: kubebb/arcadia-llms:latest + ports: + - "21001:21001" + entrypoint: ["python3", "-m", "fastchat.serve.controller", "--host", "0.0.0.0", "--port", "21001"] + fastchat-api-server: + build: + context: . + dockerfile: Dockerfile + image: kubebb/arcadia-llms:latest + ports: + - "8000:8000" + entrypoint: ["python3", "-m", "fastchat.serve.openai_api_server", "--controller-address", "http://fastchat-controller:21001", "--host", "0.0.0.0", "--port", "8000"] +volumes: + huggingface: # /root/.cache/huggingface 或未来构建的 git 服务