Merge pull request #224 from israel-hdez/release-0.12.0-main-sync

Release 0.12.0 main sync
opendatahub-io · Jun 19, 2024 · 56c75d0 · 56c75d0
2 parents 11a7acb + de1cbed
commit 56c75d0
Show file tree

Hide file tree

Showing 23 changed files with 715 additions and 221 deletions.
diff --git a/Makefile b/Makefile
@@ -4,6 +4,8 @@ IMG ?= quay.io/${USER}/odh-model-controller:latest
 # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
 ENVTEST_K8S_VERSION = 1.26
 
+ENGINE ?= docker
+
 # Setting SHELL to bash allows bash commands to be executed by recipes.
 # This is a requirement for 'setup-envtest.sh' in the test target.
 # Options are set to exit when a recipe line exits non-zero or a piped command fails.
@@ -83,13 +85,13 @@ build: generate fmt vet ## Build manager binary.
 run: manifests generate fmt vet ## Run a controller from your host.
 	go run ./main.go
 
-.PHONY: docker-build
-docker-build: test ## Build docker image with the manager.
-	docker build . -f ./Containerfile -t ${IMG}
+.PHONY: container-build
+container-build: test ## Build docker image with the manager.
+	${ENGINE} build . -f ./Containerfile -t ${IMG}
 
-.PHONY: docker-push
-docker-push: ## Push docker image with the manager.
-	docker push ${IMG}
+.PHONY: container-push
+container-push: ## Push docker image with the manager.
+	${ENGINE} push ${IMG}
 
 ##@ Deployment
 

diff --git a/README.md b/README.md
@@ -35,7 +35,7 @@ Build a new image with your local changes and push it to `<YOUR_IMAGE>` (by
 default `quay.io/${USER}/odh-model-controller:latest`).
 
 ```shell
-make -e IMG=<YOUR_IMAGE> docker-build docker-push
+make -e IMG=<YOUR_IMAGE> container-build container-push
 ```
 
 Deploy the manager using the image in your registry:

diff --git a/config/base/kustomization.yaml b/config/base/kustomization.yaml
@@ -20,6 +20,13 @@ vars:
       apiVersion: v1
       kind: ConfigMap
       name: odh-model-controller-parameters
+  - fieldref:
+      fieldPath: data.caikit-standalone-image
+    name: caikit-standalone-image
+    objref:
+      apiVersion: v1
+      kind: ConfigMap
+      name: odh-model-controller-parameters
   - fieldref:
       fieldPath: data.tgis-image
     name: tgis-image

diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml
@@ -85,24 +85,17 @@ rules:
   resources:
   - servicemeshcontrolplanes
   verbs:
-  - create
   - get
   - list
-  - patch
-  - update
   - use
   - watch
 - apiGroups:
   - maistra.io
   resources:
   - servicemeshmemberrolls
   verbs:
-  - create
-  - delete
   - get
   - list
-  - patch
-  - update
   - watch
 - apiGroups:
   - maistra.io
@@ -116,18 +109,6 @@ rules:
   - patch
   - update
   - watch
-- apiGroups:
-  - maistra.io
-  resources:
-  - servicemeshmembers/finalizers
-  verbs:
-  - create
-  - delete
-  - get
-  - list
-  - patch
-  - update
-  - watch
 - apiGroups:
   - monitoring.coreos.com
   resources:

diff --git a/config/runtimes/caikit-standalone-template.yaml b/config/runtimes/caikit-standalone-template.yaml
@@ -0,0 +1,76 @@
+apiVersion: template.openshift.io/v1
+kind: Template
+metadata:
+  labels:
+    opendatahub.io/dashboard: 'true'
+    opendatahub.io/ootb: 'true'
+  annotations:
+    description: Caikit is an AI toolkit that enables users to manage models through a set of developer friendly APIs. It provides a consistent format for creating and using AI models against a wide variety of data domains and tasks.
+    openshift.io/provider-display-name: Red Hat, Inc.
+    tags: rhods,rhoai,kserve,servingruntime
+    template.openshift.io/documentation-url: https://github.com/opendatahub-io/caikit-nlp
+    template.openshift.io/long-description: This template defines resources needed to deploy caikit-standalone-serving servingruntime with Red Hat Data Science KServe for LLM model
+    template.openshift.io/support-url: https://access.redhat.com
+    opendatahub.io/modelServingSupport: '["single"]'
+    opendatahub.io/apiProtocol: 'REST'
+  name: caikit-standalone-serving-template
+objects:
+  - apiVersion: serving.kserve.io/v1alpha1
+    kind: ServingRuntime
+    metadata:
+      name: caikit-standalone-runtime
+      annotations:
+        openshift.io/display-name: Caikit Standalone ServingRuntime for KServe
+        opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
+      labels:
+        opendatahub.io/dashboard: 'true'
+    spec:
+      annotations:
+        prometheus.io/port: '8086'
+        prometheus.io/path: /metrics
+      multiModel: false
+      supportedModelFormats:
+        - autoSelect: true
+          name: caikit
+      containers:
+        - name: kserve-container
+          image: $(caikit-standalone-image)
+          command:
+            - python
+            - '-m'
+            - caikit.runtime
+          env:
+            - name: RUNTIME_LOCAL_MODELS_DIR
+              value: /mnt/models
+            - name: HF_HOME
+              value: /tmp/hf_home
+            - name: RUNTIME_GRPC_ENABLED
+              value: 'false'
+            - name: RUNTIME_HTTP_ENABLED
+              value: 'true'
+          ports:
+            - containerPort: 8080
+              protocol: TCP
+          readinessProbe:
+            exec:
+              command:
+                - python
+                - -m
+                - caikit_health_probe
+                - readiness
+            initialDelaySeconds: 5
+          livenessProbe:
+            exec:
+              command:
+                - python
+                - -m
+                - caikit_health_probe
+                - liveness
+            initialDelaySeconds: 5
+          startupProbe:
+            httpGet:
+              port: 8080
+              path: /health
+            # Allow 12 mins to start
+            failureThreshold: 24
+            periodSeconds: 30
diff --git a/config/runtimes/caikit-tgis-template.yaml b/config/runtimes/caikit-tgis-template.yaml
@@ -55,4 +55,4 @@ objects:
               value: 'true'
           ports:
             - containerPort: 8080
-              protocol: TCP
+              protocol: TCP
diff --git a/config/runtimes/kustomization.yaml b/config/runtimes/kustomization.yaml
@@ -9,3 +9,4 @@ resources:
   - tgis-template.yaml
   - ovms-kserve-template.yaml
   - vllm-template.yaml
+  - caikit-standalone-template.yaml
diff --git a/controllers/comparators/smmr_comparator.go → ...mparators/servicemeshmember_comparator.go b/controllers/comparators/smmr_comparator.go → ...mparators/servicemeshmember_comparator.go
@@ -17,14 +17,15 @@ package comparators
 
 import (
 	v1 "maistra.io/api/core/v1"
-	"reflect"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 
-func GetServiceMeshMemberRollComparator() ResourceComparator {
+func GetServiceMeshMemberComparator() ResourceComparator {
 	return func(deployed client.Object, requested client.Object) bool {
-		deployedSMMR := deployed.(*v1.ServiceMeshMemberRoll)
-		requestedSMMR := requested.(*v1.ServiceMeshMemberRoll)
-		return reflect.DeepEqual(deployedSMMR.Spec, requestedSMMR.Spec)
+		deployedSMM := deployed.(*v1.ServiceMeshMember)
+		requestedSMM := requested.(*v1.ServiceMeshMember)
+
+		return deployedSMM.Spec.ControlPlaneRef.Namespace == requestedSMM.Spec.ControlPlaneRef.Namespace &&
+			deployedSMM.Spec.ControlPlaneRef.Name == requestedSMM.Spec.ControlPlaneRef.Name
 	}
 }
diff --git a/controllers/constants/caikit-metrics.json b/controllers/constants/caikit-metrics.json
@@ -0,0 +1,55 @@
+{
+    "metrics": {
+        "supported": "true",
+        "config": [
+            {
+                "title": "Number of requests",
+                "type": "REQUEST_COUNT",
+                "queries": [
+                    {
+                        "title": "Number of successful incoming requests",
+                        "query": "sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code='OK',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
+                    },
+                    {
+                        "title": "Number of failed incoming requests",
+                        "query": "sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code!='OK',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
+                    }
+                ]
+            },
+            {
+                "title": "Average response time (ms)",
+                "type": "MEAN_LATENCY",
+                "queries": [
+                    {
+                        "title": "Average inference latency",
+                        "query": "sum by (model_id) (rate(predict_caikit_library_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m])) / sum by (model_id) (rate(predict_caikit_library_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
+                    },
+                    {
+                        "title": "Average e2e latency",
+                        "query": "sum by (model_id) (rate(caikit_core_load_model_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m]) + rate(predict_caikit_library_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m])) / sum by (model_id) (rate(caikit_core_load_model_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]) + rate(predict_caikit_library_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
+                    }
+                ]
+            },
+            {
+                "title": "CPU utilization %",
+                "type": "CPU_USAGE",
+                "queries": [
+                    {
+                        "title": "CPU usage",
+                        "query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'}* on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
+                    }
+                ]
+            },
+            {
+                "title": "Memory utilization %",
+                "type": "MEMORY_USAGE",
+                "queries": [
+                    {
+                        "title": "Memory usage",
+                        "query":  "sum(container_memory_working_set_bytes{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
+                    }
+                ]
+            }
+        ]
+    }
+}
diff --git a/controllers/constants/constants.go b/controllers/constants/constants.go
@@ -21,6 +21,7 @@ const (
 	IstioNamespace                   = "istio-system"
 	IstioControlPlaneName            = "data-science-smcp"
 	ServiceMeshMemberRollName        = "default"
+	ServiceMeshMemberName            = "default"
 	IstioIngressService              = "istio-ingressgateway"
 	IstioIngressServiceHTTPPortName  = "http2"
 	IstioIngressServiceHTTPSPortName = "https"

diff --git a/controllers/constants/ovms-metrics.json b/controllers/constants/ovms-metrics.json
@@ -0,0 +1,55 @@
+{
+    "metrics": {
+        "supported": "true",
+        "config": [
+            {
+                "title": "Number of requests",
+                "type": "REQUEST_COUNT",
+                "queries": [
+                    {
+                        "title": "Number of successful incoming requests",
+                        "query": "sum(increase(ovms_requests_success{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
+                    },
+                    {
+                        "title": "Number of failed incoming requests",
+                        "query": "sum(increase(ovms_requests_fail{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
+                    }
+                ]
+            },
+            {
+                "title": "Average response time (ms)",
+                "type": "MEAN_LATENCY",
+                "queries": [
+                    {
+                        "title": "Average inference latency",
+                        "query": "sum by (name) (rate(ovms_inference_time_us_sum{namespace='${NAMESPACE}', name='${MODEL_NAME}'}[1m])) / sum by (name) (rate(ovms_inference_time_us_count{namespace='${NAMESPACE}', name='${MODEL_NAME}'}[{RATE_INTERVAL}]))"
+                    },
+                    {
+                        "title": "Average e2e latency",
+                        "query": "sum by (name) (rate(ovms_request_time_us_sum{name='${MODEL_NAME}'}[1m])) / sum by (name) (rate(ovms_request_time_us_count{name='${MODEL_NAME}'}[{RATE_INTERVAL}]))"
+                    }
+                ]
+            },
+            {
+                "title": "CPU utilization %",
+                "type": "CPU_USAGE",
+                "queries": [
+                    {
+                        "title": "CPU usage",
+                        "query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'}* on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
+                    }
+                ]
+            },
+            {
+                "title": "Memory utilization %",
+                "type": "MEMORY_USAGE",
+                "queries": [
+                    {
+                        "title": "Memory usage",
+                        "query": "sum(container_memory_working_set_bytes{namespace='$(MODEL_NAMESPACE)', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
+                    }
+                ]
+            }
+        ]
+    }
+}
diff --git a/controllers/constants/tgis-metrics.json b/controllers/constants/tgis-metrics.json
@@ -0,0 +1,55 @@
+{
+    "metrics": {
+        "supported": "true",
+        "config": [
+            {
+                "title": "Number of requests",
+                "type": "REQUEST_COUNT",
+                "queries": [
+                    {
+                        "title": "Number of successful incoming requests",
+                        "query": "sum(increase(tgi_request_success{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
+                    },
+                    {
+                        "title": "Number of failed incoming requests",
+                        "query": "sum(increase(tgi_request_failure{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
+                    }
+                ]
+            },
+            {
+                "title": "Average response time (ms)",
+                "type": "MEAN_LATENCY",
+                "queries": [
+                    {
+                        "title": "Average inference latency",
+                        "query": "sum by (pod) (rate(tgi_request_inference_duration_sum{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) / sum by (pod) (rate(tgi_request_inference_duration_count{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))  "
+                    },
+                    {
+                        "title": "Average e2e latency",
+                        "query": "sum by (pod) (rate(tgi_request_duration_sum{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) / sum by (pod) (rate(tgi_request_duration_count{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
+                    }
+                ]
+            },
+            {
+                "title": "CPU utilization %",
+                "type": "CPU_USAGE",
+                "queries": [
+                    {
+                        "title": "CPU usage",
+                        "query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'}* on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
+                    }
+                ]
+            },
+            {
+                "title": "Memory utilization %",
+                "type": "MEMORY_USAGE",
+                "queries": [
+                    {
+                        "title": "Memory usage",
+                        "query": "sum(container_memory_working_set_bytes{namespace='$(MODEL_NAMESPACE)', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
+                    }
+                ]
+            }
+        ]
+    }
+}