From 8dbd01770d050bc1a155acf36f872f7cc231aae2 Mon Sep 17 00:00:00 2001 From: bjwswang Date: Thu, 28 Mar 2024 02:11:46 +0000 Subject: [PATCH] fix: hardcode resouce request of gpus to 1 if utilize a existing ray cluster Signed-off-by: bjwswang --- pkg/config/config_type.go | 2 +- pkg/worker/runner.go | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pkg/config/config_type.go b/pkg/config/config_type.go index e54103e73..0c3744d3a 100644 --- a/pkg/config/config_type.go +++ b/pkg/config/config_type.go @@ -112,7 +112,7 @@ func (rayCluster RayCluster) GetPythonVersion() string { return rayCluster.PythonVersion } -// DefaultRayCluster which can be used for vllm worker as local ray cluster +// DefaultRayCluster which can be used for vllm worker as local ray cluster which can only utilize single node gpus func DefaultRayCluster() RayCluster { return RayCluster{ Name: "default", diff --git a/pkg/worker/runner.go b/pkg/worker/runner.go index 26602c3cd..d2c838d99 100644 --- a/pkg/worker/runner.go +++ b/pkg/worker/runner.go @@ -23,6 +23,7 @@ import ( "strconv" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/types" "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/client" @@ -196,8 +197,12 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp additionalEnvs := []corev1.EnvVar{} // configure ray cluster + resources := runner.w.Spec.Resources + gpus := runner.NumberOfGPUs() + // default ray cluster which can only utilize gpus on single nodes rayCluster := config.DefaultRayCluster() for _, envItem := range runner.w.Spec.AdditionalEnvs { + // using existing ray cluster if envItem.Name == "RAY_CLUSTER_INDEX" { externalRayClusterIndex, _ := strconv.Atoi(envItem.Value) rayClusters, err := config.GetRayClusters(ctx, runner.c) @@ -208,6 +213,8 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp return nil, fmt.Errorf("no ray clusters configured") } rayCluster = rayClusters[externalRayClusterIndex] + // Hardcoded directly requested gpu to 1 if using existing ray cluster + resources.Limits[ResourceNvidiaGPU] = resource.MustParse("1") } // set gpu memory utilization @@ -224,6 +231,8 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp extraAgrs = envItem.Value } } + klog.V(5).Infof("run worker with raycluster:\n %s", rayCluster.String()) + // set ray configurations into additional environments additionalEnvs = append(additionalEnvs, corev1.EnvVar{ @@ -237,8 +246,7 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp Value: rayCluster.GetPythonVersion(), }) // Set gpu number to the number of GPUs in the worker's resource - additionalEnvs = append(additionalEnvs, corev1.EnvVar{Name: "NUMBER_GPUS", Value: runner.NumberOfGPUs()}) - klog.V(5).Infof("run worker with raycluster:\n %s", rayCluster.String()) + additionalEnvs = append(additionalEnvs, corev1.EnvVar{Name: "NUMBER_GPUS", Value: gpus}) modelFileDir := fmt.Sprintf("%s/%s", defaultModelMountPath, model.Name) // --enforce-eager to disable cupy @@ -287,7 +295,7 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp // mount volume to /dev/shm to avoid Bus error {Name: "models", MountPath: defaultShmMountPath}, }, - Resources: runner.w.Spec.Resources, + Resources: resources, } container.Env = append(container.Env, additionalEnvs...) return container, nil