diff --git a/charts/authentik/README.md b/charts/authentik/README.md index 629ca819..7271e70e 100644 --- a/charts/authentik/README.md +++ b/charts/authentik/README.md @@ -127,9 +127,11 @@ redis: | postgresql.postgresqlUsername | string | `"authentik"` | | | priorityClassName | string | `nil` | Custom priority class for different treatment by the scheduler | | prometheus.rules.create | bool | `false` | | +| prometheus.rules.labels | object | `{}` | labels additional on PrometheusRule | | prometheus.serviceMonitor.create | bool | `false` | | | prometheus.serviceMonitor.interval | string | `"30s"` | | | prometheus.serviceMonitor.scrapeTimeout | string | `"3s"` | | +| prometheus.serviceMonitor.labels | object | `{}` | labels additional on ServiceMonitor | | readinessProbe.enabled | bool | `true` | | | readinessProbe.httpGet.path | string | `"/-/health/ready/"` | | | readinessProbe.httpGet.port | string | `"http"` | | diff --git a/charts/authentik/templates/prom-rules.yaml b/charts/authentik/templates/prom-rules.yaml index 8ba7b3b9..6320336b 100644 --- a/charts/authentik/templates/prom-rules.yaml +++ b/charts/authentik/templates/prom-rules.yaml @@ -1,10 +1,14 @@ -{{- if .Values.prometheus.rules.create -}} +{{- with .Values.prometheus.rules }} +{{- if .create -}} apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: - name: {{ include "common.names.fullname" . }} + name: {{ include "common.names.fullname" $ }} labels: - {{- include "common.labels" . | nindent 4 }} + {{- include "common.labels" $ | nindent 4 }} + {{- with .labels }} + {{- toYaml . | nindent 4 }} + {{- end }} spec: groups: - name: authentik Aggregate request counters @@ -47,6 +51,7 @@ spec: expr: sum(rate(django_http_exceptions_total_by_type[30s])) by (job,type) - record: job:django_http_exceptions_total_by_view:sum_rate30s expr: sum(rate(django_http_exceptions_total_by_view[30s])) by (job,view) + - name: authentik Aggregate latency histograms rules: - record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s @@ -81,6 +86,7 @@ spec: expr: histogram_quantile(0.999, sum(rate(django_http_requests_latency_seconds_bucket[30s])) by (job, le)) labels: quantile: "99.9" + - name: authentik Aggregate model operations rules: - record: job:django_model_inserts_total:sum_rate1m @@ -101,48 +107,59 @@ spec: expr: sum(rate(django_db_execute_many_total[30s])) by (alias, vendor) - record: job:django_db_errors_total:sum_rate30s expr: sum(rate(django_db_errors_total[30s])) by (alias, vendor, type) + - name: authentik Aggregate migrations rules: - record: job:django_migrations_applied_total:max expr: max(django_migrations_applied_total) by (job, connection) - record: job:django_migrations_unapplied_total:max expr: max(django_migrations_unapplied_total) by (job, connection) + - name: authentik Alerts rules: - alert: NoWorkersConnected + labels: + severity: critical expr: max without (pid) (authentik_admin_workers) < 1 + for: 10m annotations: - message: | - authentik instance {{ printf "{{ $labels.instance }}" }}'s worker are either not running or not connected. + {{` summary: No workers connected - for: 10m + message: authentik instance {{ $labels.instance }}'s worker are either not running or not connected. + `}} + + + - alert: PendingMigrations labels: severity: critical - - alert: PendingMigrations expr: max without (pid) (django_migrations_unapplied_total) > 0 + for: 10m annotations: - message: | - authentik instance {{ printf "{{ $labels.instance }}" }} has pending database migrations + {{` summary: Pending database migrations - for: 10m + message: authentik instance {{ $labels.instance }} has pending database migrations + `}} + + - alert: FailedSystemTasks labels: severity: critical - - alert: FailedSystemTasks expr: sum(increase(authentik_system_tasks{status="TaskResultStatus.ERROR"}[2h])) > 0 + for: 2h annotations: - message: | - System task {{ printf "{{ $labels.task_name }}" }} has failed + {{` summary: Failed system tasks - for: 2h + message: System task {{ $labels.task_name }} has failed + `}} + + - alert: DisconnectedOutposts labels: severity: critical - - alert: DisconnectedOutposts expr: sum by (outpost) (max without (pid) (authentik_outposts_connected{uid!~"specific.*"})) < 1 + for: 30m annotations: - message: | - Outpost {{ printf "{{ $labels.outpost }}" }} has at least 1 disconnected instance + {{` summary: Disconnected outpost - for: 30m - labels: - severity: critical - {{- end }} + message: Outpost {{ $labels.outpost }} has at least 1 disconnected instance + `}} +{{- end }} +{{- end }} diff --git a/charts/authentik/templates/prom-service-monitor.yaml b/charts/authentik/templates/prom-service-monitor.yaml index f237d349..8760ed30 100644 --- a/charts/authentik/templates/prom-service-monitor.yaml +++ b/charts/authentik/templates/prom-service-monitor.yaml @@ -1,16 +1,21 @@ -{{- if .Values.prometheus.serviceMonitor.create -}} +{{- with .Values.prometheus.serviceMonitor }} +{{- if .create -}} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: + name: {{ include "common.names.fullname" $ }} labels: - {{- include "common.labels" . | nindent 4 }} - name: {{ include "common.names.fullname" . }} + {{- include "common.labels" $ | nindent 4 }} + {{- with .labels }} + {{- toYaml . | nindent 4 }} + {{- end }} spec: endpoints: - port: http-metrics - scrapeTimeout: {{ .Values.prometheus.serviceMonitor.scrapeTimeout }} - interval: {{ .Values.prometheus.serviceMonitor.interval }} + scrapeTimeout: {{ .scrapeTimeout }} + interval: {{ .interval }} selector: matchLabels: - {{- include "common.labels" . | nindent 6 }} - {{- end }} + {{- include "common.labels.selectorLabels" $ | nindent 6 }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/authentik/templates/service.yaml b/charts/authentik/templates/service.yaml index 5353b533..8934e9d7 100644 --- a/charts/authentik/templates/service.yaml +++ b/charts/authentik/templates/service.yaml @@ -3,13 +3,13 @@ kind: Service metadata: name: {{ include "common.names.fullname" . }} labels: - {{- include "common.labels" . | nindent 4 }} - {{- with .Values.service.labels }} - {{- toYaml . | nindent 4 }} - {{- end }} - {{- with .Values.service.annotations }} + {{- include "common.labels" . | nindent 4 }} + {{- with .Values.service.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.service.annotations }} annotations: - {{ toYaml . | nindent 4 }} + {{- toYaml . | nindent 4 }} {{- end }} spec: {{- $type := default "ClusterIP" .Values.service.type }} @@ -26,7 +26,7 @@ spec: {{- end }} {{- with .Values.service.loadBalancerSourceRanges }} loadBalancerSourceRanges: - {{- toYaml . | nindent 4 }} + {{- toYaml . | nindent 4 }} {{- end }} {{- end }} type: {{ $type }} @@ -34,12 +34,12 @@ spec: sessionAffinity: {{ .Values.service.sessionAffinity }} {{- with .Values.service.sessionAffinityConfig }} sessionAffinityConfig: - {{- toYaml . | nindent 4 }} + {{- toYaml . | nindent 4 }} {{- end }} {{- end }} {{- with .Values.service.externalIPs }} externalIPs: - {{- toYaml . | nindent 4 }} + {{- toYaml . | nindent 4 }} {{- end }} {{- with .Values.service.publishNotReadyAddresses }} publishNotReadyAddresses: {{ . }} diff --git a/charts/authentik/values.yaml b/charts/authentik/values.yaml index c1ab886c..28ee1341 100644 --- a/charts/authentik/values.yaml +++ b/charts/authentik/values.yaml @@ -175,8 +175,12 @@ prometheus: create: false interval: 30s scrapeTimeout: 3s + # -- labels additional on ServiceMonitor + labels: {} rules: create: false + # -- labels additional on PrometheusRule + labels: {} geoip: # -- optional GeoIP, deploys a cronjob to download the maxmind database