Skip to content

Commit

Permalink
[Enhancement] Support to disable probe (#569)
Browse files Browse the repository at this point in the history
Signed-off-by: yandongxiao <[email protected]>
  • Loading branch information
yandongxiao committed Jul 19, 2024
1 parent 8233bd2 commit 24ab4d5
Show file tree
Hide file tree
Showing 8 changed files with 82 additions and 11 deletions.
12 changes: 12 additions & 0 deletions config/crd/bases/starrocks.com_starrocksclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2605,6 +2605,7 @@ spec:
will fail if the pod can't respond in 15 seconds. Your LivenessProbeFailureSeconds is
the total time of seconds before the container restart. If LivenessProbeFailureSeconds
can't be divided by defaultPeriodSeconds, the failureThreshold will be rounded up.
Note: you can set it to 0 to disable the liveness probe.
format: int32
type: integer
nodeSelector:
Expand All @@ -2625,6 +2626,7 @@ spec:
will fail if the pod can't respond in 15 seconds. Your ReadinessProbeFailureSeconds is
the total time of seconds before pods becomes not ready. If ReadinessProbeFailureSeconds
can't be divided by defaultPeriodSeconds, the failureThreshold will be rounded up.
Note: you can set it to 0 to disable the readiness probe.
format: int32
type: integer
replicas:
Expand Down Expand Up @@ -4055,6 +4057,7 @@ spec:
the total time of seconds before startupProbe give up and fail the container start.
If startupProbeFailureSeconds can't be divided by defaultPeriodSeconds, the failureThreshold
will be rounded up.
Note: you can set it to 0 to disable the startup probe.
format: int32
type: integer
storageVolumes:
Expand Down Expand Up @@ -7523,6 +7526,7 @@ spec:
will fail if the pod can't respond in 15 seconds. Your LivenessProbeFailureSeconds is
the total time of seconds before the container restart. If LivenessProbeFailureSeconds
can't be divided by defaultPeriodSeconds, the failureThreshold will be rounded up.
Note: you can set it to 0 to disable the liveness probe.
format: int32
type: integer
nodeSelector:
Expand All @@ -7543,6 +7547,7 @@ spec:
will fail if the pod can't respond in 15 seconds. Your ReadinessProbeFailureSeconds is
the total time of seconds before pods becomes not ready. If ReadinessProbeFailureSeconds
can't be divided by defaultPeriodSeconds, the failureThreshold will be rounded up.
Note: you can set it to 0 to disable the readiness probe.
format: int32
type: integer
replicas:
Expand Down Expand Up @@ -8973,6 +8978,7 @@ spec:
the total time of seconds before startupProbe give up and fail the container start.
If startupProbeFailureSeconds can't be divided by defaultPeriodSeconds, the failureThreshold
will be rounded up.
Note: you can set it to 0 to disable the startup probe.
format: int32
type: integer
storageVolumes:
Expand Down Expand Up @@ -10335,6 +10341,7 @@ spec:
will fail if the pod can't respond in 15 seconds. Your LivenessProbeFailureSeconds is
the total time of seconds before the container restart. If LivenessProbeFailureSeconds
can't be divided by defaultPeriodSeconds, the failureThreshold will be rounded up.
Note: you can set it to 0 to disable the liveness probe.
format: int32
type: integer
nodeSelector:
Expand All @@ -10355,6 +10362,7 @@ spec:
will fail if the pod can't respond in 15 seconds. Your ReadinessProbeFailureSeconds is
the total time of seconds before pods becomes not ready. If ReadinessProbeFailureSeconds
can't be divided by defaultPeriodSeconds, the failureThreshold will be rounded up.
Note: you can set it to 0 to disable the readiness probe.
format: int32
type: integer
replicas:
Expand Down Expand Up @@ -10473,6 +10481,7 @@ spec:
the total time of seconds before startupProbe give up and fail the container start.
If startupProbeFailureSeconds can't be divided by defaultPeriodSeconds, the failureThreshold
will be rounded up.
Note: you can set it to 0 to disable the startup probe.
format: int32
type: integer
storageVolumes:
Expand Down Expand Up @@ -13308,6 +13317,7 @@ spec:
will fail if the pod can't respond in 15 seconds. Your LivenessProbeFailureSeconds is
the total time of seconds before the container restart. If LivenessProbeFailureSeconds
can't be divided by defaultPeriodSeconds, the failureThreshold will be rounded up.
Note: you can set it to 0 to disable the liveness probe.
format: int32
type: integer
nodeSelector:
Expand All @@ -13328,6 +13338,7 @@ spec:
will fail if the pod can't respond in 15 seconds. Your ReadinessProbeFailureSeconds is
the total time of seconds before pods becomes not ready. If ReadinessProbeFailureSeconds
can't be divided by defaultPeriodSeconds, the failureThreshold will be rounded up.
Note: you can set it to 0 to disable the readiness probe.
format: int32
type: integer
replicas:
Expand Down Expand Up @@ -14758,6 +14769,7 @@ spec:
the total time of seconds before startupProbe give up and fail the container start.
If startupProbeFailureSeconds can't be divided by defaultPeriodSeconds, the failureThreshold
will be rounded up.
Note: you can set it to 0 to disable the startup probe.
format: int32
type: integer
storageVolumes:
Expand Down
3 changes: 3 additions & 0 deletions config/crd/bases/starrocks.com_starrockswarehouses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3216,6 +3216,7 @@ spec:
will fail if the pod can't respond in 15 seconds. Your LivenessProbeFailureSeconds is
the total time of seconds before the container restart. If LivenessProbeFailureSeconds
can't be divided by defaultPeriodSeconds, the failureThreshold will be rounded up.
Note: you can set it to 0 to disable the liveness probe.
format: int32
type: integer
nodeSelector:
Expand All @@ -3236,6 +3237,7 @@ spec:
will fail if the pod can't respond in 15 seconds. Your ReadinessProbeFailureSeconds is
the total time of seconds before pods becomes not ready. If ReadinessProbeFailureSeconds
can't be divided by defaultPeriodSeconds, the failureThreshold will be rounded up.
Note: you can set it to 0 to disable the readiness probe.
format: int32
type: integer
replicas:
Expand Down Expand Up @@ -4666,6 +4668,7 @@ spec:
the total time of seconds before startupProbe give up and fail the container start.
If startupProbeFailureSeconds can't be divided by defaultPeriodSeconds, the failureThreshold
will be rounded up.
Note: you can set it to 0 to disable the startup probe.
format: int32
type: integer
storageVolumes:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ spec:
# the total time of seconds before startupProbe give up and fail the container start.
# If startupProbeFailureSeconds can't be divided by defaultPeriodSeconds, the failureThreshold
# will be rounded up
# Note: you can set it to 0 to disable the probe.
startupProbeFailureSeconds: 300

# the spec for starrocks BE.
Expand Down
11 changes: 11 additions & 0 deletions helm-charts/charts/kube-starrocks/charts/starrocks/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -335,12 +335,15 @@ starrocksFESpec:

# StartupProbeFailureSeconds defines the total failure seconds of startup Probe.
# default value is 300 seconds
# You can set it to 0 to disable the probe.
startupProbeFailureSeconds:
# LivenessProbeFailureSeconds defines the total failure seconds of liveness Probe.
# default value is 15 seconds
# You can set it to 0 to disable the probe.
livenessProbeFailureSeconds:
# ReadinessProbeFailureSeconds defines the total failure seconds of readiness Probe.
# default value is 15 seconds
# You can set it to 0 to disable the probe.
readinessProbeFailureSeconds:
# Lifecycle describes actions that the management system should take in response to container lifecycle events.
# By default, Operator will add corresponding preStop hooks for different components. For example, the preStop
Expand Down Expand Up @@ -618,12 +621,15 @@ starrocksCnSpec:

# StartupProbeFailureSeconds defines the total failure seconds of startup Probe.
# default value is 300 seconds
# You can set it to 0 to disable the probe.
startupProbeFailureSeconds:
# LivenessProbeFailureSeconds defines the total failure seconds of liveness Probe.
# default value is 15 seconds
# You can set it to 0 to disable the probe.
livenessProbeFailureSeconds:
# ReadinessProbeFailureSeconds defines the total failure seconds of readiness Probe.
# default value is 15 seconds
# You can set it to 0 to disable the probe.
readinessProbeFailureSeconds:
# Lifecycle describes actions that the management system should take in response to container lifecycle events.
# By default, Operator will add corresponding preStop hooks for different components. For example, the preStop
Expand Down Expand Up @@ -867,12 +873,15 @@ starrocksBeSpec:

# StartupProbeFailureSeconds defines the total failure seconds of startup Probe.
# default value is 300 seconds
# You can set it to 0 to disable the probe.
startupProbeFailureSeconds:
# LivenessProbeFailureSeconds defines the total failure seconds of liveness Probe.
# default value is 15 seconds
# You can set it to 0 to disable the probe.
livenessProbeFailureSeconds:
# ReadinessProbeFailureSeconds defines the total failure seconds of readiness Probe.
# default value is 15 seconds
# You can set it to 0 to disable the probe.
readinessProbeFailureSeconds:
# Lifecycle describes actions that the management system should take in response to container lifecycle events.
# By default, Operator will add corresponding preStop hooks for different components. For example, the preStop
Expand Down Expand Up @@ -1012,9 +1021,11 @@ starrocksFeProxySpec:

# LivenessProbeFailureSeconds defines the total failure seconds of liveness Probe.
# default value is 15 seconds
# You can set it to 0 to disable the probe.
livenessProbeFailureSeconds:
# ReadinessProbeFailureSeconds defines the total failure seconds of readiness Probe.
# default value is 15 seconds
# You can set it to 0 to disable the probe.
readinessProbeFailureSeconds:
# Note: will create emptyDir volume for fe proxy, PVC is not supported.
emptyDirs: []
Expand Down
11 changes: 11 additions & 0 deletions helm-charts/charts/kube-starrocks/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -443,12 +443,15 @@ starrocks:

# StartupProbeFailureSeconds defines the total failure seconds of startup Probe.
# default value is 300 seconds
# You can set it to 0 to disable the probe.
startupProbeFailureSeconds:
# LivenessProbeFailureSeconds defines the total failure seconds of liveness Probe.
# default value is 15 seconds
# You can set it to 0 to disable the probe.
livenessProbeFailureSeconds:
# ReadinessProbeFailureSeconds defines the total failure seconds of readiness Probe.
# default value is 15 seconds
# You can set it to 0 to disable the probe.
readinessProbeFailureSeconds:
# Lifecycle describes actions that the management system should take in response to container lifecycle events.
# By default, Operator will add corresponding preStop hooks for different components. For example, the preStop
Expand Down Expand Up @@ -726,12 +729,15 @@ starrocks:

# StartupProbeFailureSeconds defines the total failure seconds of startup Probe.
# default value is 300 seconds
# You can set it to 0 to disable the probe.
startupProbeFailureSeconds:
# LivenessProbeFailureSeconds defines the total failure seconds of liveness Probe.
# default value is 15 seconds
# You can set it to 0 to disable the probe.
livenessProbeFailureSeconds:
# ReadinessProbeFailureSeconds defines the total failure seconds of readiness Probe.
# default value is 15 seconds
# You can set it to 0 to disable the probe.
readinessProbeFailureSeconds:
# Lifecycle describes actions that the management system should take in response to container lifecycle events.
# By default, Operator will add corresponding preStop hooks for different components. For example, the preStop
Expand Down Expand Up @@ -975,12 +981,15 @@ starrocks:

# StartupProbeFailureSeconds defines the total failure seconds of startup Probe.
# default value is 300 seconds
# You can set it to 0 to disable the probe.
startupProbeFailureSeconds:
# LivenessProbeFailureSeconds defines the total failure seconds of liveness Probe.
# default value is 15 seconds
# You can set it to 0 to disable the probe.
livenessProbeFailureSeconds:
# ReadinessProbeFailureSeconds defines the total failure seconds of readiness Probe.
# default value is 15 seconds
# You can set it to 0 to disable the probe.
readinessProbeFailureSeconds:
# Lifecycle describes actions that the management system should take in response to container lifecycle events.
# By default, Operator will add corresponding preStop hooks for different components. For example, the preStop
Expand Down Expand Up @@ -1120,9 +1129,11 @@ starrocks:

# LivenessProbeFailureSeconds defines the total failure seconds of liveness Probe.
# default value is 15 seconds
# You can set it to 0 to disable the probe.
livenessProbeFailureSeconds:
# ReadinessProbeFailureSeconds defines the total failure seconds of readiness Probe.
# default value is 15 seconds
# You can set it to 0 to disable the probe.
readinessProbeFailureSeconds:
# Note: will create emptyDir volume for fe proxy, PVC is not supported.
emptyDirs: []
Expand Down
3 changes: 3 additions & 0 deletions pkg/apis/starrocks/v1/load_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ type StarRocksLoadSpec struct {
// the total time of seconds before startupProbe give up and fail the container start.
// If startupProbeFailureSeconds can't be divided by defaultPeriodSeconds, the failureThreshold
// will be rounded up.
// Note: you can set it to 0 to disable the startup probe.
// +optional
StartupProbeFailureSeconds *int32 `json:"startupProbeFailureSeconds,omitempty"`

Expand All @@ -138,6 +139,7 @@ type StarRocksLoadSpec struct {
// will fail if the pod can't respond in 15 seconds. Your LivenessProbeFailureSeconds is
// the total time of seconds before the container restart. If LivenessProbeFailureSeconds
// can't be divided by defaultPeriodSeconds, the failureThreshold will be rounded up.
// Note: you can set it to 0 to disable the liveness probe.
// +optional
LivenessProbeFailureSeconds *int32 `json:"livenessProbeFailureSeconds,omitempty"`

Expand All @@ -146,6 +148,7 @@ type StarRocksLoadSpec struct {
// will fail if the pod can't respond in 15 seconds. Your ReadinessProbeFailureSeconds is
// the total time of seconds before pods becomes not ready. If ReadinessProbeFailureSeconds
// can't be divided by defaultPeriodSeconds, the failureThreshold will be rounded up.
// Note: you can set it to 0 to disable the readiness probe.
// +optional
ReadinessProbeFailureSeconds *int32 `json:"readinessProbeFailureSeconds,omitempty"`

Expand Down
7 changes: 6 additions & 1 deletion pkg/k8sutils/templates/pod/probe.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,15 @@ func ReadinessProbe(readinessProbeFailureSeconds *int32, port int32, path string
return completeProbe(readinessProbeFailureSeconds, defaultFailureThreshold, defaultPeriodSeconds, getProbe(port, path))
}

// completeProbe completes the probe. If user specifies the failureSeconds, but its value is 0, it will return nil,
// which means the probe is disabled.
func completeProbe(failureSeconds *int32, defaultFailureThreshold int32, defaultPeriodSeconds int32,
probeHandler corev1.ProbeHandler) *corev1.Probe {
probe := &corev1.Probe{}
if failureSeconds != nil && *failureSeconds > 0 {
if failureSeconds != nil {
if *failureSeconds == 0 {
return nil
}
probe.FailureThreshold = (*failureSeconds + defaultPeriodSeconds - 1) / defaultPeriodSeconds
} else {
probe.FailureThreshold = defaultFailureThreshold
Expand Down
45 changes: 35 additions & 10 deletions pkg/k8sutils/templates/pod/probe_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,15 @@ func TestMakeLivenessProbe(t *testing.T) {
ProbeHandler: getProbe(8080, "/api/health2"),
},
},
{
name: "disable liveness probe",
args: args{
seconds: func() *int32 { s := int32(0); return &s }(),
port: 8080,
path: "/api/health2",
},
want: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down Expand Up @@ -90,6 +99,15 @@ func TestMakeReadinessProbe(t *testing.T) {
ProbeHandler: getProbe(8080, "/api/health2"),
},
},
{
name: "disable readiness probe",
args: args{
seconds: func() *int32 { s := int32(0); return &s }(),
port: 8080,
path: "/api/health2",
},
want: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand All @@ -102,8 +120,9 @@ func TestMakeReadinessProbe(t *testing.T) {

func TestMakeStartupProbe(t *testing.T) {
type args struct {
port int32
path string
seconds *int32
port int32
path string
}
tests := []struct {
name string
Expand All @@ -113,19 +132,29 @@ func TestMakeStartupProbe(t *testing.T) {
{
name: "test",
args: args{
port: 8080,
path: "/api/health2",
seconds: nil,
port: 8080,
path: "/api/health2",
},
want: &corev1.Probe{
FailureThreshold: 60,
PeriodSeconds: 5,
ProbeHandler: getProbe(8080, "/api/health2"),
},
},
{
name: "disable startup probe",
args: args{
seconds: func() *int32 { s := int32(0); return &s }(),
port: 8080,
path: "/api/health2",
},
want: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := StartupProbe(nil, tt.args.port, tt.args.path); !reflect.DeepEqual(got, tt.want) {
if got := StartupProbe(tt.args.seconds, tt.args.port, tt.args.path); !reflect.DeepEqual(got, tt.want) {
t.Errorf("StartupProbe() = %v, want %v", got, tt.want)
}
})
Expand Down Expand Up @@ -216,11 +245,7 @@ func Test_completeProbe(t *testing.T) {
defaultPeriodSeconds: 5,
probeHandler: corev1.ProbeHandler{},
},
want: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{},
FailureThreshold: 60,
PeriodSeconds: 5,
},
want: nil,
},
{
name: "test complete probe 4",
Expand Down

0 comments on commit 24ab4d5

Please sign in to comment.