diff --git a/kubernetes/cmsweb/monitoring/prometheus/aps/prometheus.yaml b/kubernetes/cmsweb/monitoring/prometheus/aps/prometheus.yaml new file mode 100644 index 000000000..409b11f99 --- /dev/null +++ b/kubernetes/cmsweb/monitoring/prometheus/aps/prometheus.yaml @@ -0,0 +1,142 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +alerting: + alertmanagers: + - static_configs: + - targets: + - cms-monitoring.cern.ch:30093 + +#enable remote write to be able to write to the cmsmonit cluster +remote_write: + - url: http://cms-monitoring-ha1.cern.ch:30428/api/v1/write + queue_config: + max_samples_per_send: 10000 + max_shards: 30 + - url: http://cms-monitoring-ha2.cern.ch:30428/api/v1/write + queue_config: + max_samples_per_send: 10000 + max_shards: 30 +scrape_configs: +#first, we define all the custom jobs that we have for the services + - job_name: cern-magnum-kube-state-metrics + scrape_interval: 1m + scrape_timeout: 1m + static_configs: + - targets: [ "cern-magnum-kube-state-metrics.kube-system.svc.cluster.local:8080" ] + - job_name: "nginx-ingress-controller" + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + relabel_configs: + - source_labels: [__meta_kubernetes_pod_container_port_number] + action: keep + regex: ^(10254) + - job_name: 'quota-exporter' + scrape_interval: 120s + scrape_timeout: 110s + static_configs: + - targets: ['quota-exporter.http.svc.cluster.local:18000'] +#Then, the default job configs + + - job_name: "kubernetes-apiservers" + kubernetes_sd_configs: + - role: endpoints + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: default;kubernetes;https + + - job_name: "kubernetes-nodes" + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + + - job_name: "kubernetes-pods" + kubernetes_sd_configs: + - role: pod + # for more information about prometheus relabeling see + # https://blog.freshtracks.io/prometheus-relabel-rules-and-the-action-parameter-39c71959354a + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - source_labels: [__meta_kubernetes_pod_node_name] + action: replace + target_label: host + - action: labeldrop + regex: __meta_kubernetes_pod_label_pod_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: ns + - action: labeldrop + regex: __meta_kubernetes_namespace(.+) + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: apod + + - job_name: kubernetes-node-exporter + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + scheme: https + kubernetes_sd_configs: + - role: node + relabel_configs: + - source_labels: [__address__] + regex: ^(.*):\d+$ + target_label: __address__ + replacement: $1:9100 + - target_label: __scheme__ + replacement: http + # Host name + - source_labels: [__meta_kubernetes_node_name] + target_label: instance + + - job_name: "kubernetes-cadvisor" + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor diff --git a/kubernetes/cmsweb/scripts/deploy.sh b/kubernetes/cmsweb/scripts/deploy.sh index 1f7b71298..2f161a2f8 100755 --- a/kubernetes/cmsweb/scripts/deploy.sh +++ b/kubernetes/cmsweb/scripts/deploy.sh @@ -736,8 +736,10 @@ deploy_monitoring() # locate all prometheus files local mon="" - if [ "$deployment" == "services" ] || [ "$deployment" == "aps" ]; then + if [ "$deployment" == "services" ]; then mon="services" + elif [ "$deployment" == "aps" ]; then + mon="aps" else mon="frontend" fi @@ -760,9 +762,11 @@ deploy_monitoring() else files="$files --from-file=monitoring/prometheus/$mon/prometheus.yaml" fi - for fname in monitoring/prometheus/rules/*; do - files="$files --from-file=$fname" - done + if [ "$mon" != "aps" ]; then + for fname in monitoring/prometheus/rules/*; do + files="$files --from-file=$fname" + done + fi fi kubectl create secret generic prometheus-secrets \ $files --dry-run=client -o yaml | \