From 8dbd01770d050bc1a155acf36f872f7cc231aae2 Mon Sep 17 00:00:00 2001
From: bjwswang <bjwswang@gmail.com>
Date: Thu, 28 Mar 2024 02:11:46 +0000
Subject: [PATCH] fix: hardcode resouce request of gpus to 1 if utilize a
 existing ray cluster

Signed-off-by: bjwswang <bjwswang@gmail.com>
---
 pkg/config/config_type.go |  2 +-
 pkg/worker/runner.go      | 14 +++++++++++---
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/pkg/config/config_type.go b/pkg/config/config_type.go
index e54103e73..0c3744d3a 100644
--- a/pkg/config/config_type.go
+++ b/pkg/config/config_type.go
@@ -112,7 +112,7 @@ func (rayCluster RayCluster) GetPythonVersion() string {
 	return rayCluster.PythonVersion
 }
 
-// DefaultRayCluster which can be used for vllm worker as local ray cluster
+// DefaultRayCluster which can be used for vllm worker as local ray cluster which can only utilize single node gpus
 func DefaultRayCluster() RayCluster {
 	return RayCluster{
 		Name:          "default",
diff --git a/pkg/worker/runner.go b/pkg/worker/runner.go
index 26602c3cd..d2c838d99 100644
--- a/pkg/worker/runner.go
+++ b/pkg/worker/runner.go
@@ -23,6 +23,7 @@ import (
 	"strconv"
 
 	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/klog/v2"
 	"sigs.k8s.io/controller-runtime/pkg/client"
@@ -196,8 +197,12 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp
 	additionalEnvs := []corev1.EnvVar{}
 
 	// configure ray cluster
+	resources := runner.w.Spec.Resources
+	gpus := runner.NumberOfGPUs()
+	// default ray cluster which can only utilize gpus on single nodes
 	rayCluster := config.DefaultRayCluster()
 	for _, envItem := range runner.w.Spec.AdditionalEnvs {
+		// using existing ray cluster
 		if envItem.Name == "RAY_CLUSTER_INDEX" {
 			externalRayClusterIndex, _ := strconv.Atoi(envItem.Value)
 			rayClusters, err := config.GetRayClusters(ctx, runner.c)
@@ -208,6 +213,8 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp
 				return nil, fmt.Errorf("no ray clusters configured")
 			}
 			rayCluster = rayClusters[externalRayClusterIndex]
+			// Hardcoded directly requested gpu to 1 if using existing ray cluster
+			resources.Limits[ResourceNvidiaGPU] = resource.MustParse("1")
 		}
 
 		// set gpu memory utilization
@@ -224,6 +231,8 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp
 			extraAgrs = envItem.Value
 		}
 	}
+	klog.V(5).Infof("run worker with raycluster:\n %s", rayCluster.String())
+
 	// set ray configurations into additional environments
 	additionalEnvs = append(additionalEnvs,
 		corev1.EnvVar{
@@ -237,8 +246,7 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp
 			Value: rayCluster.GetPythonVersion(),
 		})
 	// Set gpu number to the number of GPUs in the worker's resource
-	additionalEnvs = append(additionalEnvs, corev1.EnvVar{Name: "NUMBER_GPUS", Value: runner.NumberOfGPUs()})
-	klog.V(5).Infof("run worker with raycluster:\n %s", rayCluster.String())
+	additionalEnvs = append(additionalEnvs, corev1.EnvVar{Name: "NUMBER_GPUS", Value: gpus})
 
 	modelFileDir := fmt.Sprintf("%s/%s", defaultModelMountPath, model.Name)
 	// --enforce-eager to disable cupy
@@ -287,7 +295,7 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp
 			// mount volume to /dev/shm to avoid Bus error
 			{Name: "models", MountPath: defaultShmMountPath},
 		},
-		Resources: runner.w.Spec.Resources,
+		Resources: resources,
 	}
 	container.Env = append(container.Env, additionalEnvs...)
 	return container, nil