Skip to content

Commit

Permalink
adding latest grafonnet library support
Browse files Browse the repository at this point in the history
  • Loading branch information
Vishnu Challa committed Sep 29, 2023
1 parent 1cac256 commit fa3bb29
Show file tree
Hide file tree
Showing 12 changed files with 595 additions and 5,078 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
templates/grafonnet-lib
templates/vendor
rendered
tmp
bin
39 changes: 29 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,45 +1,64 @@
JSONNET = https://github.com/google/jsonnet/releases/download/v0.17.0/jsonnet-bin-v0.17.0-linux.tar.gz
JB = https://github.com/jsonnet-bundler/jsonnet-bundler/releases/latest/download/jb-linux-amd64
BINDIR = bin
TEMPLATESDIR = templates
OUTPUTDIR = rendered
ALLDIRS = $(BINDIR) $(OUTPUTDIR)
SYNCER_IMG_TAG ?= quay.io/cloud-bulldozer/dittybopper-syncer:latest
PLATFORM = linux/amd64,linux/arm64,linux/ppc64le,linux/s390x

# Get all templates at $(TEMPLATESDIR)
TEMPLATES := $(wildcard $(TEMPLATESDIR)/**/*.jsonnet)
ifeq ($(filter v2,$(MAKECMDGOALS)),v2)
# Set variables and instructions for v2
TEMPLATES := $(wildcard $(TEMPLATESDIR)/**/*-v2.jsonnet)
LIBRARY_PATH := $(TEMPLATESDIR)/vendor
JSONNET := https://github.com/cloud-bulldozer/utils/releases/download/v0.0.0/jsonnet-bin-v0.20.0-linux.tar.gz
else
# Get all templates at $(TEMPLATESDIR)
TEMPLATES := $(filter-out %-v2.jsonnet, $(wildcard $(TEMPLATESDIR)/**/*.jsonnet))
LIBRARY_PATH := $(TEMPLATESDIR)/grafonnet-lib
JSONNET := https://github.com/google/jsonnet/releases/download/v0.17.0/jsonnet-bin-v0.17.0-linux.tar.gz
endif

# Replace $(TEMPLATESDIR)/*.jsonnet by $(OUTPUTDIR)/*.json
outputs := $(patsubst $(TEMPLATESDIR)/%.jsonnet, $(OUTPUTDIR)/%.json, $(TEMPLATES))

all: deps format build

deps: $(ALLDIRS) $(TEMPLATESDIR)/grafonnet-lib $(BINDIR)/jsonnet
deps: $(ALLDIRS) $(BINDIR)/jsonnet $(LIBRARY_PATH)

$(ALLDIRS):
mkdir -p $(ALLDIRS)

format: deps
$(BINDIR)/jsonnetfmt -i $(TEMPLATES)

build: deps $(TEMPLATESDIR)/grafonnet-lib $(outputs)
build: deps $(LIBRARY_PATH) $(outputs)

clean:
@echo "Cleaning up"
rm -rf $(ALLDIRS) $(TEMPLATESDIR)/grafonnet-lib

$(TEMPLATESDIR)/grafonnet-lib:
git clone --depth 1 https://github.com/grafana/grafonnet-lib.git $(TEMPLATESDIR)/grafonnet-lib
rm -rf $(ALLDIRS) $(TEMPLATESDIR)/vendor $(TEMPLATESDIR)/grafonnet-lib

$(BINDIR)/jsonnet:
@echo "Downloading jsonnet binary"
curl -s -L $(JSONNET) | tar xz -C $(BINDIR)
@echo "Downloading jb binary"
curl -s -L $(JB) -o $(BINDIR)/jb
chmod +x $(BINDIR)/jb

$(TEMPLATESDIR)/grafonnet-lib:
git clone --depth 1 https://github.com/grafana/grafonnet-lib.git $(TEMPLATESDIR)/grafonnet-lib

$(TEMPLATESDIR)/vendor:
@echo "Downloading vendor files"
cd $(TEMPLATESDIR) && ../$(BINDIR)/jb install && cd ../

# Build each template and output to $(OUTPUTDIR)
$(OUTPUTDIR)/%.json: $(TEMPLATESDIR)/%.jsonnet
@echo "Building template $<"
mkdir -p $(dir $@)
$(BINDIR)/jsonnet $< > $@
$(BINDIR)/jsonnet -J ./$(LIBRARY_PATH) $< > $@

v2: all
@echo "Rendered the v2 dashboards with latest grafonnet library"

build-syncer-image: build
podman build --platform=${PLATFORM} -f Dockerfile --manifest=${SYNCER_IMG_TAG} .
Expand Down
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,28 @@ bin/jsonnet templates/ocp-performance.jsonnet > rendered/ocp-performance.json
$ ls rendered
ocp-ingress-controller.json ocp-performance.json
```
Similarly for V2, the dashboards that are built using latest grafonnet library, use
```
$ make v2
mkdir -p bin rendered
Downloading jsonnet binary
curl -s -L https://github.com/cloud-bulldozer/utils/releases/download/v0.0.0/jsonnet-bin-v0.20.0-linux.tar.gz | tar xz -C bin
Downloading jb binary
curl -s -L https://github.com/jsonnet-bundler/jsonnet-bundler/releases/latest/download/jb-linux-amd64 -o bin/jb
chmod +x bin/jb
Downloading vendor files
cd templates && ../bin/jb install && cd ../
GET https://github.com/grafana/grafonnet/archive/f40876da40d787e9c288de0b547ac85597c781d9.tar.gz 200
GET https://github.com/grafana/grafonnet/archive/f40876da40d787e9c288de0b547ac85597c781d9.tar.gz 200
GET https://github.com/jsonnet-libs/docsonnet/archive/cc9df63eaca56f39e8e4e1ce192141333257b08d.tar.gz 200
GET https://github.com/jsonnet-libs/xtd/archive/0256a910ac71f0f842696d7bca0bf01ea77eb654.tar.gz 200
bin/jsonnetfmt -i templates/General/ocp-performance-v2.jsonnet
Building template templates/General/ocp-performance-v2.jsonnet
mkdir -p rendered/General/
bin/jsonnet -J ./templates/vendor templates/General/ocp-performance-v2.jsonnet > rendered/General/ocp-performance-v2.json
Rendered the v2 dashboards with latest grafonnet library
```
Rest all operations reamin same as before.

In order to clean up the environment execute `make clean`.

Expand Down
52 changes: 52 additions & 0 deletions assets/ocp-performance/panels.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet';

{
timeSeries: {
local timeSeries = g.panel.timeSeries,
local fieldOverride = g.panel.timeSeries.fieldOverride,
local custom = timeSeries.fieldConfig.defaults.custom,
local options = timeSeries.options,

generic(title, unit, targets, gridPos):
timeSeries.new(title)
+ timeSeries.queryOptions.withTargets(targets)
+ timeSeries.datasource.withUid('$datasource')
+ timeSeries.standardOptions.withUnit(unit)
+ timeSeries.gridPos.withX(gridPos.x)
+ timeSeries.gridPos.withY(gridPos.y)
+ timeSeries.gridPos.withH(gridPos.h)
+ timeSeries.gridPos.withW(gridPos.w)
+ custom.withSpanNulls('false')
+ options.tooltip.withMode('multi')
+ options.tooltip.withSort('desc')
+ options.legend.withDisplayMode('table'),

genericLegend(title, unit, targets, gridPos):
self.generic(title, unit, targets, gridPos)
+ options.legend.withShowLegend(true)
+ options.legend.withCalcs([
'mean',
'max',
'min',
])
+ options.legend.withSortBy('max')
+ options.legend.withSortDesc(true)
+ options.legend.withPlacement('bottom'),
},
stat: {
local stat = g.panel.stat,
local options = stat.options,

base(title, targets, gridPos):
stat.new(title)
+ stat.datasource.withUid('$datasource')
+ stat.queryOptions.withTargets(targets)
+ stat.gridPos.withX(gridPos.x)
+ stat.gridPos.withY(gridPos.y)
+ stat.gridPos.withH(gridPos.h)
+ stat.gridPos.withW(gridPos.w)
+ options.reduceOptions.withCalcs([
'last',
]),
},
}
201 changes: 201 additions & 0 deletions assets/ocp-performance/queries.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet';
local variables = import './variables.libsonnet';

local generateTimeSeriesQuery(query, legend) = [
local prometheusQuery = g.query.prometheus;
prometheusQuery.new(
'$' + variables.datasource.name,
query
)
+ prometheusQuery.withFormat('time_series')
+ prometheusQuery.withIntervalFactor(2)
+ prometheusQuery.withLegendFormat(legend),
];

{
nodeMemory: {
query(nodeName):
generateTimeSeriesQuery('node_memory_Active_bytes{instance=~"' + nodeName + '"}', 'Active')
+ generateTimeSeriesQuery('node_memory_MemTotal_bytes{instance=~"' + nodeName + '"}', 'Total')
+ generateTimeSeriesQuery('node_memory_Cached_bytes{instance=~"' + nodeName + '"} + node_memory_Buffers_bytes{instance=~"' + nodeName + '"}', 'Cached + Buffers')
+ generateTimeSeriesQuery('node_memory_MemAvailable_bytes{instance=~"' + nodeName + '"}', 'Available')
+ generateTimeSeriesQuery('(node_memory_MemTotal_bytes{instance=~"' + nodeName + '"} - (node_memory_MemFree_bytes{instance=~"' + nodeName + '"} + node_memory_Buffers_bytes{instance=~"' + nodeName + '"} + node_memory_Cached_bytes{instance=~"' + nodeName + '"}))', 'Used')
},
nodeCPU: {
query(nodeName):
generateTimeSeriesQuery('sum by (instance, mode)(irate(node_cpu_seconds_total{instance=~"' + nodeName + '",job=~".*"}[$interval])) * 100', 'Busy {{mode}}')
},
diskThroughput: {
query(nodeName):
generateTimeSeriesQuery('rate(node_disk_read_bytes_total{device=~"$block_device",instance=~"' + nodeName + '"}[$interval])', '{{ device }} - read')
+ generateTimeSeriesQuery('rate(node_disk_written_bytes_total{device=~"$block_device",instance=~"' + nodeName + '"}[$interval])', '{{ device }} - write')
},
diskIOPS: {
query(nodeName):
generateTimeSeriesQuery('rate(node_disk_reads_completed_total{device=~"$block_device",instance=~"' + nodeName + '"}[$interval])', '{{ device }} - read')
+ generateTimeSeriesQuery('rate(node_disk_writes_completed_total{device=~"$block_device",instance=~"' + nodeName + '"}[$interval])', '{{ device }} - write')
},
networkUtilization: {
query(nodeName):
generateTimeSeriesQuery('rate(node_network_receive_bytes_total{instance=~"' + nodeName + '",device=~"$net_device"}[$interval]) * 8', '{{instance}} - {{device}} - RX')
+ generateTimeSeriesQuery('rate(node_network_transmit_bytes_total{instance=~"' + nodeName + '",device=~"$net_device"}[$interval]) * 8', '{{instance}} - {{device}} - TX')
},
networkPackets: {
query(nodeName):
generateTimeSeriesQuery('rate(node_network_receive_packets_total{instance=~"' + nodeName + '",device=~"$net_device"}[$interval])', '{{instance}} - {{device}} - RX')
+ generateTimeSeriesQuery('rate(node_network_transmit_packets_total{instance=~"' + nodeName + '",device=~"$net_device"}[$interval])', '{{instance}} - {{device}} - TX')
},
networkDrop: {
query(nodeName):
generateTimeSeriesQuery('topk(10, rate(node_network_receive_drop_total{instance=~"' + nodeName + '"}[$interval]))', 'rx-drop-{{ device }}')
+ generateTimeSeriesQuery('topk(10,rate(node_network_transmit_drop_total{instance=~"' + nodeName + '"}[$interval]))', 'tx-drop-{{ device }}')
},
conntrackStats: {
query(nodeName):
generateTimeSeriesQuery('node_nf_conntrack_entries{instance=~"' + nodeName + '"}', 'conntrack_entries')
+ generateTimeSeriesQuery('node_nf_conntrack_entries_limit{instance=~"' + nodeName + '"}', 'conntrack_limit')
},
top10ContainerCPU: {
query(nodeName):
generateTimeSeriesQuery('topk(10, sum(irate(container_cpu_usage_seconds_total{container!="POD",name!="",node=~"' + nodeName + '",namespace!="",namespace=~"$namespace"}[$interval])) by (pod,container,namespace,name,service) * 100)', '{{ pod }}: {{ container }}')
},
top10ContainerRSS: {
query(nodeName):
generateTimeSeriesQuery('topk(10, container_memory_rss{container!="POD",name!="",node=~"' + nodeName + '",namespace!="",namespace=~"$namespace"})', '{{ pod }}: {{ container }}')
},
containerWriteBytes: {
query(nodeName):
generateTimeSeriesQuery('sum(rate(container_fs_writes_bytes_total{device!~".+dm.+", node=~"' + nodeName + '", container!=""}[$interval])) by (device, container)', '{{ container }}: {{ device }}')
},
stackroxCPU: {
query():
generateTimeSeriesQuery('topk(25, sum(irate(container_cpu_usage_seconds_total{container!="POD",name!="",namespace!="",namespace=~"stackrox"}[$interval])) by (pod,container,namespace,name,service) * 100)', '{{ pod }}: {{ container }}')
},
stackroxMem: {
query():
generateTimeSeriesQuery('topk(25, container_memory_rss{container!="POD",name!="",namespace!="",namespace=~"stackrox"})', '{{ pod }}: {{ container }}')
},
ovnAnnotationLatency: {
query():
generateTimeSeriesQuery('histogram_quantile(0.99, sum(rate(ovnkube_master_pod_creation_latency_seconds_bucket[$interval])) by (pod,le)) > 0', '{{ pod }}')
},
ovnCNIAdd: {
query():
generateTimeSeriesQuery('histogram_quantile(0.99, sum(rate(ovnkube_node_cni_request_duration_seconds_bucket{command="ADD"}[$interval])) by (pod,le)) > 0', '{{ pod }}')
},
ovnCNIDel: {
query():
generateTimeSeriesQuery('histogram_quantile(0.99, sum(rate(ovnkube_node_cni_request_duration_seconds_bucket{command="DEL"}[$interval])) by (pod,le)) > 0', '{{ pod }}')
},
ovnKubeMasterCPU: {
query():
generateTimeSeriesQuery('irate(container_cpu_usage_seconds_total{pod=~"ovnkube-master.*",namespace="openshift-ovn-kubernetes",container!~"POD|"}[$interval])*100', '{{container}}-{{pod}}-{{node}}')
},
ovnKubeMasterMem: {
query():
generateTimeSeriesQuery('container_memory_rss{pod=~"ovnkube-master-.*",namespace="openshift-ovn-kubernetes",container!~"POD|"}', '{{container}}-{{pod}}-{{node}}')
},
topOvnControllerCPU: {
query():
generateTimeSeriesQuery('topk(10, irate(container_cpu_usage_seconds_total{pod=~"ovnkube-.*",namespace="openshift-ovn-kubernetes",container="ovn-controller"}[$interval])*100)', '{{node}}')
},
topOvnControllerMem: {
query():
generateTimeSeriesQuery('topk(10, sum(container_memory_rss{pod=~"ovnkube-node-.*",namespace="openshift-ovn-kubernetes",container="ovn-controller"}) by (node))', '{{node}}')
},
promReplMemUsage: {
query():
generateTimeSeriesQuery('sum(container_memory_rss{pod="prometheus-k8s-1",namespace!="",name!="",container="prometheus"}) by (pod)', '{{pod}}')
+ generateTimeSeriesQuery('sum(container_memory_rss{pod="prometheus-k8s-0",namespace!="",name!="",container="prometheus"}) by (pod)', '{{pod}}')
},
kubeletCPU: {
query():
generateTimeSeriesQuery('topk(10,irate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[$interval])*100)', 'kubelet - {{node}}')
},
crioCPU: {
query():
generateTimeSeriesQuery('topk(10,irate(process_cpu_seconds_total{service="kubelet",job="crio"}[$interval])*100)', 'crio - {{node}}')
},
kubeletMemory: {
query():
generateTimeSeriesQuery('topk(10,process_resident_memory_bytes{service="kubelet",job="kubelet"})', 'kubelet - {{node}}')
},
crioMemory: {
query():
generateTimeSeriesQuery('topk(10,process_resident_memory_bytes{service="kubelet",job="crio"})', 'crio - {{node}}')
},
crioINodes: {
query():
generateTimeSeriesQuery('(1 - node_filesystem_files_free{fstype!="",mountpoint="/run"} / node_filesystem_files{fstype!="",mountpoint="/run"}) * 100', '/var/run - {{instance}}')
},
currentNodeCount: {
query():
generateTimeSeriesQuery('sum(kube_node_info{})', 'Number of nodes')
+ generateTimeSeriesQuery('sum(kube_node_status_condition{status="true"}) by (condition) > 0', 'Node: {{ condition }}')
},
currentNamespaceCount: {
query():
generateTimeSeriesQuery('sum(kube_namespace_status_phase) by (phase)', '{{ phase }}')
},
currentPodCount: {
query():
generateTimeSeriesQuery('sum(kube_pod_status_phase{}) by (phase) > 0', '{{ phase}} Pods')
},
nsCount: {
query():
generateTimeSeriesQuery('sum(kube_namespace_status_phase) by (phase) > 0', '{{ phase }} namespaces')
},
podCount: {
query():
generateTimeSeriesQuery('sum(kube_pod_status_phase{}) by (phase)', '{{phase}} pods')
},
secretCmCount: {
query():
generateTimeSeriesQuery('count(kube_secret_info{})', 'secrets')
+ generateTimeSeriesQuery('count(kube_configmap_info{})', 'Configmaps')
},
deployCount: {
query():
generateTimeSeriesQuery('count(kube_deployment_labels{})', 'Deployments')
},
servicesCount: {
query():
generateTimeSeriesQuery('count(kube_service_info{})', 'Services')
},
routesCount: {
query():
generateTimeSeriesQuery('count(openshift_route_info{})', 'Routes')
},
alerts: {
query():
generateTimeSeriesQuery('topk(10,sum(ALERTS{severity!="none"}) by (alertname, severity))', '{{severity}}: {{alertname}}')
},
podDistribution: {
query():
generateTimeSeriesQuery('count(kube_pod_info{}) by (node)', '{{ node }}')
},
top10ContMem: {
query():
generateTimeSeriesQuery('topk(10, container_memory_rss{namespace!="",container!="POD",name!=""})', '{{ namespace }} - {{ name }}')
},
top10ContCPU: {
query():
generateTimeSeriesQuery('topk(10,irate(container_cpu_usage_seconds_total{namespace!="",container!="POD",name!=""}[$interval])*100)', '{{ namespace }} - {{ name }}')
},
goroutinesCount: {
query():
generateTimeSeriesQuery('topk(10, sum(go_goroutines{}) by (job,instance))', '{{ job }} - {{ instance }}')
},
clusterOperatorsOverview: {
query():
generateTimeSeriesQuery('sum by (condition)(cluster_operator_conditions{condition!=""})', '{{ condition }}')
},
clusterOperatorsInformation: {
query():
generateTimeSeriesQuery('cluster_operator_conditions{name!="",reason!=""}', '{{name}} - {{reason}}')
},
clusterOperatorsDegraded: {
query():
generateTimeSeriesQuery('cluster_operator_conditions{condition="Degraded",name!="",reason!=""}', '{{name}} - {{reason}}')
},
}
Loading

0 comments on commit fa3bb29

Please sign in to comment.