Skip to content

Commit

Permalink
Add workload summary and compare data graphs on k8s-netperf
Browse files Browse the repository at this point in the history
  • Loading branch information
Docs committed Jan 19, 2024
1 parent fdf26ce commit c35f00f
Show file tree
Hide file tree
Showing 4 changed files with 248 additions and 11 deletions.
172 changes: 170 additions & 2 deletions assets/k8s-netperf/panels.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,144 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn
"displayName": "ea7b29d7-8991-4752-a0d4-e26446d34915 TCP_STREAM 4096 Mb/s AWS"
}
]),

workloadSummary(title, targets, gridPos):
self.base(title, targets, gridPos)
+ table.queryOptions.withTransformations([
{
"id": "organize",
"options": {
"excludeByName": {
"_id": true,
"_index": true,
"_type": true,
"clientCPU.idleCPU": true,
"clientCPU.ioCPU": true,
"clientCPU.irqCPU": true,
"clientCPU.niceCPU": true,
"clientCPU.softCPU": true,
"clientCPU.stealCPU": true,
"clientCPU.systemCPU": true,
"clientCPU.userCPU": true,
"clientNodeLabels.beta.kubernetes.io/arch": true,
"clientNodeLabels.beta.kubernetes.io/instance-type": true,
"clientNodeLabels.beta.kubernetes.io/os": true,
"clientNodeLabels.failure-domain.beta.kubernetes.io/region": true,
"clientNodeLabels.failure-domain.beta.kubernetes.io/zone": true,
"clientNodeLabels.hypershift.openshift.io/managed": true,
"clientNodeLabels.hypershift.openshift.io/nodePool": true,
"clientNodeLabels.kubernetes.io/arch": true,
"clientNodeLabels.kubernetes.io/hostname": true,
"clientNodeLabels.kubernetes.io/os": true,
"clientNodeLabels.node-role.kubernetes.io/worker": true,
"clientNodeLabels.node.kubernetes.io/instance-type": true,
"clientNodeLabels.node.openshift.io/os_id": true,
"clientNodeLabels.topology.ebs.csi.aws.com/zone": true,
"clientNodeLabels.topology.kubernetes.io/region": true,
"clientNodeLabels.topology.kubernetes.io/zone": true,
"clientPods": true,
"confidence": true,
"driver": true,
"highlight": true,
"hostNetwork": true,
"latency": true,
"local": true,
"ltcyMetric": true,
"messageSize": true,
"metadata.ipsec": true,
"metadata.k8sVersion": true,
"metadata.kernel": true,
"metadata.masterNodesCount": true,
"metadata.masterNodesType": true,
"metadata.metricName": true,
"metadata.mtu": true,
"metadata.ocpShortVersion": true,
"metadata.totalNodes": true,
"parallelism": true,
"profile": true,
"samples": true,
"serverCPU.idleCPU": true,
"serverCPU.ioCPU": true,
"serverCPU.irqCPU": true,
"serverCPU.niceCPU": true,
"serverCPU.softCPU": true,
"serverCPU.stealCPU": true,
"serverCPU.systemCPU": true,
"serverCPU.userCPU": true,
"serverNodeLabels.beta.kubernetes.io/arch": true,
"serverNodeLabels.beta.kubernetes.io/instance-type": true,
"serverNodeLabels.beta.kubernetes.io/os": true,
"serverNodeLabels.failure-domain.beta.kubernetes.io/region": true,
"serverNodeLabels.failure-domain.beta.kubernetes.io/zone": true,
"serverNodeLabels.hypershift.openshift.io/managed": true,
"serverNodeLabels.hypershift.openshift.io/nodePool": true,
"serverNodeLabels.kubernetes.io/arch": true,
"serverNodeLabels.kubernetes.io/hostname": true,
"serverNodeLabels.kubernetes.io/os": true,
"serverNodeLabels.node-role.kubernetes.io/worker": true,
"serverNodeLabels.node.kubernetes.io/instance-type": true,
"serverNodeLabels.node.openshift.io/os_id": true,
"serverNodeLabels.topology.ebs.csi.aws.com/zone": true,
"serverNodeLabels.topology.kubernetes.io/region": true,
"serverNodeLabels.topology.kubernetes.io/zone": true,
"serverPods": true,
"service": true,
"sort": true,
"tcpRetransmits": true,
"throughput": true,
"tputMetric": true,
"udpLossPercent": true
},
"indexByName": {
"uuid": 0,
"timestamp": 1,
"metadata.platform": 2,
"metadata.ocpVersion": 3,
"metadata.clusterName": 4,
"metadata.sdnType": 5,
"metadata.infraNodesCount": 6,
"metadata.infraNodesType": 7,
"metadata.workerNodesCount": 8,
"metadata.workerNodesType": 9,
"metadata.acrossAZ": 10,
"metadata.region": 11
},
"renameByName": {
"acrossAZ": "Multi-AZ",
"metadata.clusterName": "Cluster Name",
"metadata.infraNodesCount": "Infras",
"metadata.infraNodesType": "Infra Type",
"metadata.ocpVersion": "Version",
"metadata.platform": "Platform",
"metadata.region": "Region",
"metadata.sdnType": "SDN",
"metadata.workerNodesCount": "Workers",
"metadata.workerNodesType": "Workers Type",
"timestamp": "Timestamp",
"uuid": "UUID"
}
}
},
{
"id": "groupBy",
"options": {
"fields": {
"UUID": {"aggregations": [], "operation": "groupby"},
"Cluster Name": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"Infra Type": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"Infras": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"Platform": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"Region": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"SDN": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"Timestamp": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"Version": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"Workers": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"Workers Type": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"duration": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"Multi-AZ": {"aggregations": ["last"], "operation": "aggregate"}
}
}
}
]),
withLatencyOverrides(title, targets, gridPos):
self.base(title, targets, gridPos)
+ table.queryOptions.withTransformations([
Expand Down Expand Up @@ -168,7 +305,38 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn
}
]),
},
barGauge: {
local barGauge = g.panel.barGauge,
local custom = barGauge.fields.defaults.custom,
local options = barGauge.options,

base(title, targets, gridPos):
barGauge.new(title)
+ barGauge.queryOptions.withTargets(targets)
+ barGauge.datasource.withType('elasticsearch')
+ barGauge.datasource.withUid('$datasource')
+ barGauge.options.reduceOptions.withValues(false)
+ barGauge.options.reduceOptions.withCalcs(["lastNotNull"])
+ barGauge.options.reduceOptions.withFields("")
+ barGauge.options.withOrientation("horizontal")
+ barGauge.options.withDisplayMode("gradient")
+ barGauge.options.withValueMode("color")
+ barGauge.panelOptions.withRepeat("messageSize")
+ barGauge.standardOptions.withMin("0")
+ barGauge.standardOptions.color.withMode("palette-classic")
+ barGauge.gridPos.withX(gridPos.x)
+ barGauge.gridPos.withY(gridPos.y)
+ barGauge.gridPos.withH(gridPos.h)
+ barGauge.gridPos.withW(gridPos.w),

withThroughput(title, targets,gridPos):
self.base(title, targets, gridPos)
+ barGauge.standardOptions.withUnit("Mbits"),

withLatency(title, targets,gridPos):
self.base(title, targets, gridPos)
+ barGauge.standardOptions.withUnit("µs"),
},
timeSeries: {
local timeSeries = g.panel.timeSeries,
local custom = timeSeries.fieldConfig.defaults.custom,
Expand Down Expand Up @@ -246,4 +414,4 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn
}
]),
},
}
}
54 changes: 51 additions & 3 deletions assets/k8s-netperf/queries.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ local elasticsearch = g.query.elasticsearch;

{
all: {
query(metric, aggregationMetric):
query(metric, aggregationMetric):
elasticsearch.withAlias("{{metadata.ocpVersion.keyword}} hostNetwork={{hostNetwork}} procs={{parallelism}}")
+ elasticsearch.withBucketAggs([
elasticsearch.bucketAggs.Terms.withField("messageSize")
Expand Down Expand Up @@ -79,7 +79,7 @@ local elasticsearch = g.query.elasticsearch;
+ elasticsearch.withTimeField('timestamp')
},
parallelismAll: {
query(metric, aggregationMetric):
query(metric, aggregationMetric):
elasticsearch.withAlias("")
+ elasticsearch.withBucketAggs([
elasticsearch.bucketAggs.Terms.withField("uuid.keyword")
Expand Down Expand Up @@ -161,4 +161,52 @@ local elasticsearch = g.query.elasticsearch;
+ elasticsearch.withQuery('uuid: $uuid AND parallelism: $parallelism AND profile: ' + metric + ' AND messageSize: $messageSize AND driver.keyword: $driver AND metadata.platform: $platform AND hostNetwork: $hostNetwork AND service: $service')
+ elasticsearch.withTimeField('timestamp')
},
}
summary: {
query(metric, aggregationMetric):
elasticsearch.withAlias("")
+ elasticsearch.withBucketAggs([
])
+ elasticsearch.withMetrics([
elasticsearch.metrics.MetricAggregationWithSettings.RawData.withHide(false)
+ elasticsearch.metrics.MetricAggregationWithSettings.RawData.withId("1")
+ elasticsearch.metrics.MetricAggregationWithSettings.RawData.withType("raw_data")
+ elasticsearch.metrics.MetricAggregationWithSettings.RawData.settings.withSize("500")
])
+ elasticsearch.withQuery('uuid: $uuid AND parallelism: $parallelism AND profile: ' + metric + ' AND messageSize: $messageSize AND driver.keyword: $driver AND metadata.platform: $platform AND hostNetwork: $hostNetwork AND service: $service')
+ elasticsearch.withTimeField('timestamp')
},
metricCompare: {
query(metric, aggregationMetric, hostNetwork, service):
elasticsearch.withAlias("{{$compare_by}} Procs: {{parallelism}}")
+ elasticsearch.withBucketAggs([
elasticsearch.bucketAggs.Terms.withField("parallelism")
+ elasticsearch.bucketAggs.Terms.withId("1")
+ elasticsearch.bucketAggs.Terms.withType('terms')
+ elasticsearch.bucketAggs.Terms.settings.withOrder('asc')
+ elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term')
+ elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1')
+ elasticsearch.bucketAggs.Terms.settings.withSize("0"),
elasticsearch.bucketAggs.Terms.withField("$compare_by")
+ elasticsearch.bucketAggs.Terms.withId("2")
+ elasticsearch.bucketAggs.Terms.withType('terms')
+ elasticsearch.bucketAggs.Terms.settings.withOrder('desc')
+ elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term')
+ elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1')
+ elasticsearch.bucketAggs.Terms.settings.withSize("10"),
elasticsearch.bucketAggs.DateHistogram.withField('timestamp')
+ elasticsearch.bucketAggs.DateHistogram.withId("3")
+ elasticsearch.bucketAggs.DateHistogram.withType('date_histogram')
+ elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto')
+ elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1")
+ elasticsearch.bucketAggs.DateHistogram.settings.withTimeZone("utc")
+ elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0),
])
+ elasticsearch.withMetrics([
elasticsearch.metrics.MetricAggregationWithSettings.Average.withField(aggregationMetric)
+ elasticsearch.metrics.MetricAggregationWithSettings.RawData.withId("1")
+ elasticsearch.metrics.MetricAggregationWithSettings.RawData.withType("avg")
])
+ elasticsearch.withQuery('uuid: $uuid AND parallelism: $parallelism AND profile: ' + metric + ' AND messageSize: $messageSize AND driver.keyword: $driver AND hostNetwork: ' + hostNetwork + ' AND service: ' + service + ' AND acrossAZ: false' )
+ elasticsearch.withTimeField('timestamp')
}
}
18 changes: 12 additions & 6 deletions assets/k8s-netperf/variables.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ local var = g.dashboard.variable;
var.query.new('platform', "{\"find\": \"terms\", \"field\": \"metadata.platform.keyword\"}")
+ var.query.withDatasourceFromVariable(self.datasource)
+ var.query.withRefresh(2)
+ var.query.selectionOptions.withMulti(false)
+ var.query.selectionOptions.withMulti(true)
+ var.query.selectionOptions.withIncludeAll(true)
+ var.query.generalOptions.withLabel('Platform'),

Expand All @@ -33,7 +33,7 @@ local var = g.dashboard.variable;
+ var.query.selectionOptions.withMulti(true)
+ var.query.selectionOptions.withIncludeAll(true)
+ var.query.generalOptions.withLabel('uuid'),

hostNetwork:
var.custom.new('hostNetwork', ['true', 'false'],)
+ var.custom.selectionOptions.withMulti(true)
Expand All @@ -45,15 +45,15 @@ local var = g.dashboard.variable;
+ var.custom.selectionOptions.withMulti(true)
+ var.custom.selectionOptions.withIncludeAll(true)
+ var.custom.generalOptions.withLabel('service'),

streams:
var.query.new('parallelism', "{\"find\": \"terms\", \"field\": \"parallelism\", \"query\":\"uuid: $uuid\"}")
+ var.query.withDatasourceFromVariable(self.datasource)
+ var.query.withRefresh(2)
+ var.query.selectionOptions.withMulti(true)
+ var.query.selectionOptions.withIncludeAll(true)
+ var.query.generalOptions.withLabel('streams'),

throughput_profile:
var.query.new('throughput_profile', "{\"find\": \"terms\", \"field\": \"profile.keyword\", \"query\":\"uuid:$uuid\"}")
+ var.query.withDatasourceFromVariable(self.datasource)
Expand All @@ -71,7 +71,7 @@ local var = g.dashboard.variable;
+ var.query.selectionOptions.withMulti(true)
+ var.query.selectionOptions.withIncludeAll(true)
+ var.query.generalOptions.withLabel('Latency profile'),

messageSize:
var.query.new('messageSize', "{\"find\": \"terms\", \"field\": \"messageSize\",\"query\":\"uuid:$uuid\"}")
+ var.query.withDatasourceFromVariable(self.datasource)
Expand All @@ -87,4 +87,10 @@ local var = g.dashboard.variable;
+ var.query.selectionOptions.withMulti(false)
+ var.query.selectionOptions.withIncludeAll(false)
+ var.query.generalOptions.withLabel('Driver'),
}

compare_by:
var.custom.new('compare_by', ['uuid.keyword', 'metadata.ocpVersion.keyword', 'metadata.clusterName.keyword', 'metadata.ocpShortVersion.keyword', 'metadata.platform.keyword'],)
+ var.custom.selectionOptions.withMulti(false)
+ var.custom.selectionOptions.withIncludeAll(false)
+ var.custom.generalOptions.withLabel('Compare By'),
}
15 changes: 15 additions & 0 deletions templates/CPT/k8s-netperf-v2.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,28 @@ g.dashboard.new('k8s-netperf')
variables.latency_profile,
variables.messageSize,
variables.driver,
variables.compare_by,
])
+ g.dashboard.withPanels([
panels.row.base('Workload Summary', '', { x: 0, y: 0, w: 24, h: 0 }),
panels.table.workloadSummary('', queries.summary.query('$throughput_profile', 'throughput'), { x: 0, y: 0, w: 24, h: 11 }),
panels.row.base('$latency_profile', 'latency_profile', { x: 0, y: 0, w: 24, h: 1 }),
panels.timeSeries.base('$latency_profile - $driver - $messageSize', queries.all.query('$latency_profile', 'latency'), { x: 0, y: 0, w: 24, h: 8 }),
panels.row.base('$throughput_profile', 'throughput_profile', { x: 0, y: 9, w: 24, h: 1 }),
panels.timeSeries.withThroughputOverrides('$throughput_profile - $driver - $messageSize', queries.all.query('$throughput_profile', 'throughput'), { x: 0, y: 10, w: 24, h: 8 }),
panels.row.base('Parallelism $parallelism', 'parallelism', { x: 0, y: 18, w: 24, h: 1 }),
panels.table.base('Throughput - Parallelism: $parallelism', queries.parallelismAll.query('$throughput_profile', 'throughput'), { x: 0, y: 19, w: 24, h: 11 }),
panels.table.withLatencyOverrides('Latency - Parallelism: $parallelism', queries.parallelismAll.query('$latency_profile', 'latency'), { x: 0, y: 19, w: 24, h: 11 }),

panels.row.base('Node to Node', '', { x: 0, y: 20, w: 24, h: 1 }),
panels.barGauge.withThroughput('$throughput_profile - $driver - $messageSize', queries.metricCompare.query('TCP_STREAM', 'throughput', true, false), { x: 0, y: 21, w: 11, h: 11 }),
panels.barGauge.withLatency('Latency - $driver - $messageSize', queries.metricCompare.query('TCP_RR', 'latency', true, false), { x: 0, y: 21, w: 11, h: 11 }),

panels.row.base('Pod to Pod', '', { x: 0, y: 22, w: 24, h: 1 }),
panels.barGauge.withThroughput('$throughput_profile - $driver - $messageSize', queries.metricCompare.query('TCP_STREAM', 'throughput', false, false), { x: 0, y: 23, w: 11, h: 11 }),
panels.barGauge.withLatency('Latency - $driver - $messageSize', queries.metricCompare.query('TCP_RR', 'latency', false, false), { x: 0, y: 23, w: 11, h: 11 }),

panels.row.base('Pod to Pod via Service', '', { x: 0, y: 24, w: 24, h: 1 }),
panels.barGauge.withThroughput('$throughput_profile - $driver - $messageSize', queries.metricCompare.query('TCP_STREAM', 'throughput', false, true), { x: 0, y: 25, w: 11, h: 11 }),
panels.barGauge.withLatency('Latency - $driver - $messageSize', queries.metricCompare.query('TCP_RR', 'latency', false, true), { x: 0, y: 25, w: 11, h: 11 }),
])

0 comments on commit c35f00f

Please sign in to comment.