From 2655001b3b1a196bb6704dd6400d9fee207ac547 Mon Sep 17 00:00:00 2001 From: bjwswang Date: Tue, 26 Mar 2024 20:59:46 +0800 Subject: [PATCH] fix: able to update annotations in knowledgebase Signed-off-by: bjwswang --- apiserver/pkg/knowledgebase/knowledgebase.go | 4 ++++ .../app-node/retriever/rerank_retriever_controller.go | 2 +- pkg/config/config.go | 3 ++- pkg/worker/runner.go | 10 ++++++++++ 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/apiserver/pkg/knowledgebase/knowledgebase.go b/apiserver/pkg/knowledgebase/knowledgebase.go index 65b098d17..967d1f084 100644 --- a/apiserver/pkg/knowledgebase/knowledgebase.go +++ b/apiserver/pkg/knowledgebase/knowledgebase.go @@ -289,6 +289,10 @@ func UpdateKnowledgeBase(ctx context.Context, c client.Client, input *generated. return nil, err } + if input.Annotations != nil { + kb.ObjectMeta.Annotations = graphqlutils.MapAny2Str(input.Annotations) + } + if input.DisplayName != nil && *input.DisplayName != kb.Spec.DisplayName { kb.Spec.DisplayName = *input.DisplayName } diff --git a/controllers/app-node/retriever/rerank_retriever_controller.go b/controllers/app-node/retriever/rerank_retriever_controller.go index 7424a2787..91804a6fb 100644 --- a/controllers/app-node/retriever/rerank_retriever_controller.go +++ b/controllers/app-node/retriever/rerank_retriever_controller.go @@ -111,7 +111,7 @@ func (r *RerankRetrieverReconciler) reconcile(ctx context.Context, log logr.Logg } } if instance.Spec.Model == nil { - model, err := config.GetDefaultRerank(ctx, r.Client) + model, err := config.GetDefaultRerankModel(ctx, r.Client) if err != nil { instance.Status.SetConditions(instance.Status.ErrorCondition(fmt.Sprintf("no model provided. please set model in reranker or set system default reranking model in config :%s", err))...) return instance, ctrl.Result{RequeueAfter: 30 * time.Second}, err diff --git a/pkg/config/config.go b/pkg/config/config.go index 928ba16d7..0b16982da 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -151,7 +151,8 @@ func GetRayClusters(ctx context.Context, c client.Client) ([]RayCluster, error) return config.RayClusters, nil } -func GetDefaultRerank(ctx context.Context, c client.Client) (*arcadiav1alpha1.TypedObjectReference, error) { +// GetDefaultRerankModel gets the default reranking model which is recommended by kubeagi +func GetDefaultRerankModel(ctx context.Context, c client.Client) (*arcadiav1alpha1.TypedObjectReference, error) { config, err := GetConfig(ctx, c) if err != nil { return nil, err diff --git a/pkg/worker/runner.go b/pkg/worker/runner.go index 167948f37..26602c3cd 100644 --- a/pkg/worker/runner.go +++ b/pkg/worker/runner.go @@ -209,6 +209,16 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp } rayCluster = rayClusters[externalRayClusterIndex] } + + // set gpu memory utilization + // The ratio (between 0 and 1) of GPU memory to reserve for the model weights, activations, and KV cache. Higher values will increase the KV cache size and thus improve the model's throughput. + // However, if the value is too high, it may cause out-of-memory (OOM) errors. + // By default, gpu_memory_utilization will be 0.9 + if envItem.Name == "GPU_MEMORY_UTILIZATION" { + gpuMemoryUtilization, _ := strconv.ParseFloat(envItem.Value, 64) + extraAgrs += fmt.Sprintf(" --gpu_memory_utilization %f", gpuMemoryUtilization) + } + // extra arguments to run llm if envItem.Name == "EXTRA_ARGS" { extraAgrs = envItem.Value