Skip to content

Commit

Permalink
fix: correct alb metrics to use gauge promql functions (#639)
Browse files Browse the repository at this point in the history
* fix: correct alb metrics to use gauge promql functions

* fix: correct sli lib references

* feat: add ci item to burn rate alerts and add yaxis to alb graphs

* fix: correction to saturation override naming
  • Loading branch information
michaelpearsonHO authored Sep 12, 2024
1 parent d85dbe2 commit 820d05f
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 28 deletions.
2 changes: 2 additions & 0 deletions monitoring-as-code/src/alerts/burn-rate-alerts.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ local createBurnRateAlerts(config, sliSpec, sliKey, journeyKey) =
},
labels: {
ci_type: 'CMDB_CI_Service_Auto',
configuration_item: config.configurationItem,
primary_impact_service: config.applicationServiceName,
title: alertTitle,
wait_for: '%(for)s' % errorBudgetBurnWindow,
factor: std.toString(errorBudgetBurnWindow.factor),
Expand Down
6 changes: 3 additions & 3 deletions monitoring-as-code/src/metric-types.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@
},
sliTypesConfig: {
availability: {
library: (import 'sli-value-libraries/availability-counter-using-2-failure-metrics-for-num-and-1-total-metric-for-dem.libsonnet'),
library: (import 'sli-value-libraries/availability-gauge-using-2-failure-metrics-for-num-and-1-total-metric-for-dem.libsonnet'),
description: 'The error rate for %(sliDescription)s should be below %(metric_target_percent)0.1f%%',
targetMetrics: {
code4xx: 'count4xx',
Expand Down Expand Up @@ -530,7 +530,7 @@
},
sliTypesConfig: {
availability: {
library: (import 'sli-value-libraries/availability-counter-using-2-failure-metrics-for-num-and-1-total-metric-for-dem.libsonnet'),
library: (import 'sli-value-libraries/availability-gauge-using-2-failure-metrics-for-num-and-1-total-metric-for-dem.libsonnet'),
description: 'The error rate for %(sliDescription)s should be below %(metric_target_percent)0.1f%%',
targetMetrics: {
code4xx: 'sum4xx',
Expand Down Expand Up @@ -567,7 +567,7 @@
},
sliTypesConfig: {
availability: {
library: (import 'sli-value-libraries/availability-counter-using-2-failure-metrics-for-num-and-1-total-metric-for-dem.libsonnet'),
library: (import 'sli-value-libraries/availability-gauge-using-2-failure-metrics-for-num-and-1-total-metric-for-dem.libsonnet'),
description: 'The error rate for %(sliDescription)s should be below %(metric_target_percent)0.1f%%',
targetMetrics: {
code4xx: 'sum4xx',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,16 @@ local createSliValueRule(sliSpec, sliMetadata, config) =
sum without (%(selectorLabels)s) (label_replace(label_replace(
(
(
sum by(%(selectorLabels)s) (
rate(%(code4xxMetric)s{%(selectors)s}[%(evalInterval)s])
or
0 * %(codeAllMetric)s{%(selectors)s}
)
+
sum by(%(selectorLabels)s) (
rate(%(code5xxMetric)s{%(selectors)s}[%(evalInterval)s])
or
0 * %(codeAllMetric)s{%(selectors)s}
)
)
sum by(%(selectorLabels)s) (avg_over_time(%(code4xxMetric)s{%(selectors)s}[%(evalInterval)s])
or 0 * %(codeAllMetric)s{%(selectors)s})
+
sum by(%(selectorLabels)s) (avg_over_time(%(code5xxMetric)s{%(selectors)s}[%(evalInterval)s])
or 0 * %(codeAllMetric)s{%(selectors)s})
>=0)
/
sum by(%(selectorLabels)s) (rate(%(codeAllMetric)s{%(selectors)s}[%(evalInterval)s]))
(
sum by(%(selectorLabels)s) (avg_over_time(%(codeAllMetric)s{%(selectors)s}[%(evalInterval)s]))
>=0)
),
"sli_environment", "$1", "%(environmentSelectorLabel)s", "(.*)"), "sli_product", "$1", "%(productSelectorLabel)s", "(.*)"))
||| % {
Expand Down Expand Up @@ -97,38 +93,40 @@ local createGraphPanel(sliSpec) =
).addTarget(
prometheus.target(
|||
sum(rate(%(codeAllMetric)s{%(selectors)s}[%(evalInterval)s]) or vector(0))
sum(avg_over_time(%(codeAllMetric)s{%(selectors)s}[%(evalInterval)s]) >=0 or vector(0))
||| % {
codeAllMetric: targetMetrics.codeAll,
selectors: std.join(',', dashboardSelectors),
evalInterval: sliSpec.evalInterval,
},
legendFormat='requests per second',
legendFormat='average requests',
),
).addTarget(
prometheus.target(
|||
sum(rate(%(code4xxMetric)s{%(selectors)s}[%(evalInterval)s]) or vector(0))
+
sum(rate(%(code5xxMetric)s{%(selectors)s}[%(evalInterval)s]) or vector(0))
sum(avg_over_time(%(code4xxMetric)s{%(selectors)s}[%(evalInterval)s]) >=0 or vector(0))
+
sum(avg_over_time(%(code5xxMetric)s{%(selectors)s}[%(evalInterval)s]) >=0 or vector(0))
||| % {
code4xxMetric: targetMetrics.code4xx,
code5xxMetric: targetMetrics.code5xx,
selectors: std.join(',', dashboardSelectors),
evalInterval: sliSpec.evalInterval,
},
legendFormat='errors per second',
legendFormat='average errors',
)
).addTarget(
prometheus.target(
|||
(
sum(rate(%(code4xxMetric)s{%(selectors)s}[%(evalInterval)s]) or vector(0))
+
sum(rate(%(code5xxMetric)s{%(selectors)s}[%(evalInterval)s]) or vector(0))
sum(avg_over_time(%(code4xxMetric)s{%(selectors)s}[%(evalInterval)s]) >=0 or vector(0))
+
sum(avg_over_time(%(code5xxMetric)s{%(selectors)s}[%(evalInterval)s]) >=0 or vector(0))
)
/
sum(rate(%(codeAllMetric)s{%(selectors)s}[%(evalInterval)s]))
(
sum(avg_over_time(%(codeAllMetric)s{%(selectors)s}[%(evalInterval)s]) >=0 or vector(0))
)
||| % {
code4xxMetric: targetMetrics.code4xx,
code5xxMetric: targetMetrics.code5xx,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,17 @@ local createGraphPanel(sliSpec) =
},
min=0,
fill=4,
formatY2='percentunit',
thresholds=[
{
value: sliSpec.metricTarget,
colorMode: 'critical',
op: 'gt',
line: true,
fill: false,
yaxis: 'right',
},
],
).addTarget(
prometheus.target(
|||
Expand All @@ -94,12 +105,14 @@ local createGraphPanel(sliSpec) =
selectors: std.join(',', dashboardSelectors),
evalInterval: sliSpec.evalInterval,
},
legendFormat='avg period where maximum saturation > %s percentage' % sliSpec.counterPercentTarget,
legendFormat='avg period where saturation > %s percent' % sliSpec.counterPercentTarget,
)
).addSeriesOverride(
{
alias: '/avg period where maximum saturation > %s percentage/' % sliSpec.counterPercentTarget,
alias: '/avg period where saturation > %s percent/' % sliSpec.counterPercentTarget,
yaxis: 2,
color: 'red',

},
).addSeriesOverride(
{
Expand Down

0 comments on commit 820d05f

Please sign in to comment.