Skip to content

Commit

Permalink
Merge pull request #224 from israel-hdez/release-0.12.0-main-sync
Browse files Browse the repository at this point in the history
Release 0.12.0 main sync
  • Loading branch information
israel-hdez committed Jun 19, 2024
2 parents 11a7acb + de1cbed commit 56c75d0
Show file tree
Hide file tree
Showing 23 changed files with 715 additions and 221 deletions.
14 changes: 8 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ IMG ?= quay.io/${USER}/odh-model-controller:latest
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
ENVTEST_K8S_VERSION = 1.26

ENGINE ?= docker

# Setting SHELL to bash allows bash commands to be executed by recipes.
# This is a requirement for 'setup-envtest.sh' in the test target.
# Options are set to exit when a recipe line exits non-zero or a piped command fails.
Expand Down Expand Up @@ -83,13 +85,13 @@ build: generate fmt vet ## Build manager binary.
run: manifests generate fmt vet ## Run a controller from your host.
go run ./main.go

.PHONY: docker-build
docker-build: test ## Build docker image with the manager.
docker build . -f ./Containerfile -t ${IMG}
.PHONY: container-build
container-build: test ## Build docker image with the manager.
${ENGINE} build . -f ./Containerfile -t ${IMG}

.PHONY: docker-push
docker-push: ## Push docker image with the manager.
docker push ${IMG}
.PHONY: container-push
container-push: ## Push docker image with the manager.
${ENGINE} push ${IMG}

##@ Deployment

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Build a new image with your local changes and push it to `<YOUR_IMAGE>` (by
default `quay.io/${USER}/odh-model-controller:latest`).

```shell
make -e IMG=<YOUR_IMAGE> docker-build docker-push
make -e IMG=<YOUR_IMAGE> container-build container-push
```

Deploy the manager using the image in your registry:
Expand Down
7 changes: 7 additions & 0 deletions config/base/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@ vars:
apiVersion: v1
kind: ConfigMap
name: odh-model-controller-parameters
- fieldref:
fieldPath: data.caikit-standalone-image
name: caikit-standalone-image
objref:
apiVersion: v1
kind: ConfigMap
name: odh-model-controller-parameters
- fieldref:
fieldPath: data.tgis-image
name: tgis-image
Expand Down
19 changes: 0 additions & 19 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,24 +85,17 @@ rules:
resources:
- servicemeshcontrolplanes
verbs:
- create
- get
- list
- patch
- update
- use
- watch
- apiGroups:
- maistra.io
resources:
- servicemeshmemberrolls
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- maistra.io
Expand All @@ -116,18 +109,6 @@ rules:
- patch
- update
- watch
- apiGroups:
- maistra.io
resources:
- servicemeshmembers/finalizers
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- monitoring.coreos.com
resources:
Expand Down
76 changes: 76 additions & 0 deletions config/runtimes/caikit-standalone-template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
apiVersion: template.openshift.io/v1
kind: Template
metadata:
labels:
opendatahub.io/dashboard: 'true'
opendatahub.io/ootb: 'true'
annotations:
description: Caikit is an AI toolkit that enables users to manage models through a set of developer friendly APIs. It provides a consistent format for creating and using AI models against a wide variety of data domains and tasks.
openshift.io/provider-display-name: Red Hat, Inc.
tags: rhods,rhoai,kserve,servingruntime
template.openshift.io/documentation-url: https://github.com/opendatahub-io/caikit-nlp
template.openshift.io/long-description: This template defines resources needed to deploy caikit-standalone-serving servingruntime with Red Hat Data Science KServe for LLM model
template.openshift.io/support-url: https://access.redhat.com
opendatahub.io/modelServingSupport: '["single"]'
opendatahub.io/apiProtocol: 'REST'
name: caikit-standalone-serving-template
objects:
- apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
name: caikit-standalone-runtime
annotations:
openshift.io/display-name: Caikit Standalone ServingRuntime for KServe
opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
labels:
opendatahub.io/dashboard: 'true'
spec:
annotations:
prometheus.io/port: '8086'
prometheus.io/path: /metrics
multiModel: false
supportedModelFormats:
- autoSelect: true
name: caikit
containers:
- name: kserve-container
image: $(caikit-standalone-image)
command:
- python
- '-m'
- caikit.runtime
env:
- name: RUNTIME_LOCAL_MODELS_DIR
value: /mnt/models
- name: HF_HOME
value: /tmp/hf_home
- name: RUNTIME_GRPC_ENABLED
value: 'false'
- name: RUNTIME_HTTP_ENABLED
value: 'true'
ports:
- containerPort: 8080
protocol: TCP
readinessProbe:
exec:
command:
- python
- -m
- caikit_health_probe
- readiness
initialDelaySeconds: 5
livenessProbe:
exec:
command:
- python
- -m
- caikit_health_probe
- liveness
initialDelaySeconds: 5
startupProbe:
httpGet:
port: 8080
path: /health
# Allow 12 mins to start
failureThreshold: 24
periodSeconds: 30
2 changes: 1 addition & 1 deletion config/runtimes/caikit-tgis-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,4 @@ objects:
value: 'true'
ports:
- containerPort: 8080
protocol: TCP
protocol: TCP
1 change: 1 addition & 0 deletions config/runtimes/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ resources:
- tgis-template.yaml
- ovms-kserve-template.yaml
- vllm-template.yaml
- caikit-standalone-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,15 @@ package comparators

import (
v1 "maistra.io/api/core/v1"
"reflect"
"sigs.k8s.io/controller-runtime/pkg/client"
)

func GetServiceMeshMemberRollComparator() ResourceComparator {
func GetServiceMeshMemberComparator() ResourceComparator {
return func(deployed client.Object, requested client.Object) bool {
deployedSMMR := deployed.(*v1.ServiceMeshMemberRoll)
requestedSMMR := requested.(*v1.ServiceMeshMemberRoll)
return reflect.DeepEqual(deployedSMMR.Spec, requestedSMMR.Spec)
deployedSMM := deployed.(*v1.ServiceMeshMember)
requestedSMM := requested.(*v1.ServiceMeshMember)

return deployedSMM.Spec.ControlPlaneRef.Namespace == requestedSMM.Spec.ControlPlaneRef.Namespace &&
deployedSMM.Spec.ControlPlaneRef.Name == requestedSMM.Spec.ControlPlaneRef.Name
}
}
55 changes: 55 additions & 0 deletions controllers/constants/caikit-metrics.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"metrics": {
"supported": "true",
"config": [
{
"title": "Number of requests",
"type": "REQUEST_COUNT",
"queries": [
{
"title": "Number of successful incoming requests",
"query": "sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code='OK',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
},
{
"title": "Number of failed incoming requests",
"query": "sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code!='OK',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "Average response time (ms)",
"type": "MEAN_LATENCY",
"queries": [
{
"title": "Average inference latency",
"query": "sum by (model_id) (rate(predict_caikit_library_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m])) / sum by (model_id) (rate(predict_caikit_library_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
},
{
"title": "Average e2e latency",
"query": "sum by (model_id) (rate(caikit_core_load_model_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m]) + rate(predict_caikit_library_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m])) / sum by (model_id) (rate(caikit_core_load_model_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]) + rate(predict_caikit_library_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "CPU utilization %",
"type": "CPU_USAGE",
"queries": [
{
"title": "CPU usage",
"query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'}* on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
}
]
},
{
"title": "Memory utilization %",
"type": "MEMORY_USAGE",
"queries": [
{
"title": "Memory usage",
"query": "sum(container_memory_working_set_bytes{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
}
]
}
]
}
}
1 change: 1 addition & 0 deletions controllers/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ const (
IstioNamespace = "istio-system"
IstioControlPlaneName = "data-science-smcp"
ServiceMeshMemberRollName = "default"
ServiceMeshMemberName = "default"
IstioIngressService = "istio-ingressgateway"
IstioIngressServiceHTTPPortName = "http2"
IstioIngressServiceHTTPSPortName = "https"
Expand Down
55 changes: 55 additions & 0 deletions controllers/constants/ovms-metrics.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"metrics": {
"supported": "true",
"config": [
{
"title": "Number of requests",
"type": "REQUEST_COUNT",
"queries": [
{
"title": "Number of successful incoming requests",
"query": "sum(increase(ovms_requests_success{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
},
{
"title": "Number of failed incoming requests",
"query": "sum(increase(ovms_requests_fail{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "Average response time (ms)",
"type": "MEAN_LATENCY",
"queries": [
{
"title": "Average inference latency",
"query": "sum by (name) (rate(ovms_inference_time_us_sum{namespace='${NAMESPACE}', name='${MODEL_NAME}'}[1m])) / sum by (name) (rate(ovms_inference_time_us_count{namespace='${NAMESPACE}', name='${MODEL_NAME}'}[{RATE_INTERVAL}]))"
},
{
"title": "Average e2e latency",
"query": "sum by (name) (rate(ovms_request_time_us_sum{name='${MODEL_NAME}'}[1m])) / sum by (name) (rate(ovms_request_time_us_count{name='${MODEL_NAME}'}[{RATE_INTERVAL}]))"
}
]
},
{
"title": "CPU utilization %",
"type": "CPU_USAGE",
"queries": [
{
"title": "CPU usage",
"query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'}* on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
}
]
},
{
"title": "Memory utilization %",
"type": "MEMORY_USAGE",
"queries": [
{
"title": "Memory usage",
"query": "sum(container_memory_working_set_bytes{namespace='$(MODEL_NAMESPACE)', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
}
]
}
]
}
}
55 changes: 55 additions & 0 deletions controllers/constants/tgis-metrics.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"metrics": {
"supported": "true",
"config": [
{
"title": "Number of requests",
"type": "REQUEST_COUNT",
"queries": [
{
"title": "Number of successful incoming requests",
"query": "sum(increase(tgi_request_success{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
},
{
"title": "Number of failed incoming requests",
"query": "sum(increase(tgi_request_failure{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "Average response time (ms)",
"type": "MEAN_LATENCY",
"queries": [
{
"title": "Average inference latency",
"query": "sum by (pod) (rate(tgi_request_inference_duration_sum{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) / sum by (pod) (rate(tgi_request_inference_duration_count{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) "
},
{
"title": "Average e2e latency",
"query": "sum by (pod) (rate(tgi_request_duration_sum{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) / sum by (pod) (rate(tgi_request_duration_count{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "CPU utilization %",
"type": "CPU_USAGE",
"queries": [
{
"title": "CPU usage",
"query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'}* on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
}
]
},
{
"title": "Memory utilization %",
"type": "MEMORY_USAGE",
"queries": [
{
"title": "Memory usage",
"query": "sum(container_memory_working_set_bytes{namespace='$(MODEL_NAMESPACE)', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
}
]
}
]
}
}
Loading

0 comments on commit 56c75d0

Please sign in to comment.