Skip to content

Commit

Permalink
feat: enable multiple gpus(sinle node) in runner fastchat
Browse files Browse the repository at this point in the history
Signed-off-by: bjwswang <[email protected]>
  • Loading branch information
bjwswang committed Dec 22, 2023
1 parent 09de386 commit 8b74fa1
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 16 deletions.
35 changes: 31 additions & 4 deletions pkg/worker/devices.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,45 @@ limitations under the License.

package worker

import (
corev1 "k8s.io/api/core/v1"
)

// Device defines different types like cpu,gpu,xpu,npu which runs the model
type Device string

const (
CPU Device = "cpu"
GPU Device = "gpu"
CPU Device = "cpu"
CUDA Device = "cuda"
// Not supported yet
XPU Device = "xpu"
// Not supported yet
//
NPU Device = "npu"
)

func (device Device) String() string {
return string(device)
}

const (
ResourceNvidiaGPU = "nvidia.com/gpu"
// Resource
ResourceNvidiaGPU corev1.ResourceName = "nvidia.com/gpu"
)

// DeviceBasedOnResource returns the devide type based on the resource list
func DeviceBasedOnResource(resource corev1.ResourceList) Device {
_, ok := resource[ResourceNvidiaGPU]
if ok {
return CUDA
}
return CPU
}

// NumberOfGPUs from ResourceList
func NumberOfGPUs(resource corev1.ResourceList) string {
gpu, ok := resource[ResourceNvidiaGPU]
if !ok {
return "0"
}
return gpu.String()
}
57 changes: 45 additions & 12 deletions pkg/worker/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,38 +28,39 @@ import (
"github.com/kubeagi/arcadia/pkg/config"
)

// ModelRunner run a model service
type ModelRunner interface {
// Device used when running model
Device() Device
// Build a model runner instance
Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)
}

var _ ModelRunner = (*RunnerFastchat)(nil)

var _ ModelRunner = (*RunnerFastchatVLLM)(nil)

// RunnerFastchat use fastchat to run a model
type RunnerFastchat struct {
c client.Client
w *arcadiav1alpha1.Worker
}

type RunnerFastchatVLLM struct {
c client.Client
w *arcadiav1alpha1.Worker
}

func NewRunnerFastchat(c client.Client, w *arcadiav1alpha1.Worker) (ModelRunner, error) {
return &RunnerFastchat{
c: c,
w: w,
}, nil
}

func NewRunnerFastchatVLLM(c client.Client, w *arcadiav1alpha1.Worker) (ModelRunner, error) {
return &RunnerFastchatVLLM{
c: c,
w: w,
}, nil
func (runner *RunnerFastchat) Device() Device {
return DeviceBasedOnResource(runner.w.Spec.Resources.Limits)
}

// NumberOfGPUs utlized by this runner
func (runner *RunnerFastchat) NumberOfGPUs() string {
return NumberOfGPUs(runner.w.Spec.Resources.Limits)
}

// Build a runner instance
func (runner *RunnerFastchat) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error) {
if model == nil {
return nil, errors.New("nil model")
Expand All @@ -81,13 +82,16 @@ func (runner *RunnerFastchat) Build(ctx context.Context, model *arcadiav1alpha1.
python3.9 -m fastchat.serve.model_worker --model-names $FASTCHAT_REGISTRATION_MODEL_NAME \
--model-path /data/models/$FASTCHAT_MODEL_NAME --worker-address $FASTCHAT_WORKER_ADDRESS \
--controller-address $FASTCHAT_CONTROLLER_ADDRESS \
--device $DEVICE --num-gpus $NUMBER_GPUS \
--host 0.0.0.0 --port 21002`},
Env: []corev1.EnvVar{
{Name: "FASTCHAT_WORKER_NAMESPACE", Value: runner.w.Namespace},
{Name: "FASTCHAT_REGISTRATION_MODEL_NAME", Value: runner.w.MakeRegistrationModelName()},
{Name: "FASTCHAT_MODEL_NAME", Value: model.Name},
{Name: "FASTCHAT_WORKER_ADDRESS", Value: fmt.Sprintf("http://%s.%s.svc.cluster.local:21002", runner.w.Name+WokerCommonSuffix, runner.w.Namespace)},
{Name: "FASTCHAT_CONTROLLER_ADDRESS", Value: gw.Controller},
{Name: "DEVICE", Value: runner.Device().String()},
{Name: "NUMBER_GPUS", Value: runner.NumberOfGPUs()},
},
Ports: []corev1.ContainerPort{
{Name: "http", ContainerPort: 21002},
Expand All @@ -101,6 +105,32 @@ python3.9 -m fastchat.serve.model_worker --model-names $FASTCHAT_REGISTRATION_MO
return container, nil
}

var _ ModelRunner = (*RunnerFastchatVLLM)(nil)

// RunnerFastchatVLLM use fastchat with vllm to run a model
type RunnerFastchatVLLM struct {
c client.Client
w *arcadiav1alpha1.Worker
}

func NewRunnerFastchatVLLM(c client.Client, w *arcadiav1alpha1.Worker) (ModelRunner, error) {
return &RunnerFastchatVLLM{
c: c,
w: w,
}, nil
}

// Devicde used by this runner
func (runner *RunnerFastchatVLLM) Device() Device {
return DeviceBasedOnResource(runner.w.Spec.Resources.Limits)
}

// NumberOfGPUs utlized by this runner
func (runner *RunnerFastchatVLLM) NumberOfGPUs() string {
return NumberOfGPUs(runner.w.Spec.Resources.Limits)
}

// Build a runner instance
func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error) {
if model == nil {
return nil, errors.New("nil model")
Expand All @@ -122,13 +152,16 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp
python3.9 -m fastchat.serve.vllm_worker --model-names $FASTCHAT_REGISTRATION_MODEL_NAME \
--model-path /data/models/$FASTCHAT_MODEL_NAME --worker-address $FASTCHAT_WORKER_ADDRESS \
--controller-address $FASTCHAT_CONTROLLER_ADDRESS \
--device $DEVICE --num-gpus $NUMBER_GPUS \
--host 0.0.0.0 --port 21002 --trust-remote-code`},
Env: []corev1.EnvVar{
{Name: "FASTCHAT_WORKER_NAMESPACE", Value: runner.w.Namespace},
{Name: "FASTCHAT_REGISTRATION_MODEL_NAME", Value: runner.w.MakeRegistrationModelName()},
{Name: "FASTCHAT_MODEL_NAME", Value: model.Name},
{Name: "FASTCHAT_WORKER_ADDRESS", Value: fmt.Sprintf("http://%s.%s.svc.cluster.local:21002", runner.w.Name+WokerCommonSuffix, runner.w.Namespace)},
{Name: "FASTCHAT_CONTROLLER_ADDRESS", Value: gw.Controller},
{Name: "DEVICE", Value: runner.Device().String()},
{Name: "NUMBER_GPUS", Value: runner.NumberOfGPUs()},
},
Ports: []corev1.ContainerPort{
{Name: "http", ContainerPort: 21002},
Expand Down

0 comments on commit 8b74fa1

Please sign in to comment.