diff --git a/.github/workflows/infra-ci-workflow.yaml b/.github/workflows/workflow.yml similarity index 94% rename from .github/workflows/infra-ci-workflow.yaml rename to .github/workflows/workflow.yml index 3f8a6949..bd91776b 100644 --- a/.github/workflows/infra-ci-workflow.yaml +++ b/.github/workflows/workflow.yml @@ -1,5 +1,4 @@ ---- -name: Infra integration workflow +name: Infrastructure integration workflow on: pull_request: @@ -9,7 +8,6 @@ on: - synchronize jobs: - markdown-check: uses: ai-cfia/github-workflows/.github/workflows/workflow-markdown-check.yml@76-as-a-devops-i-would-like-to-fix-the-markdown-lint-workflow diff --git a/.mlc_config.json b/.mlc_config.json new file mode 100644 index 00000000..f2d137e9 --- /dev/null +++ b/.mlc_config.json @@ -0,0 +1,3 @@ +{ + "aliveStatusCodes": [200,403] +} diff --git a/.terraform.lock.hcl b/.terraform.lock.hcl new file mode 100644 index 00000000..8e7f85cc --- /dev/null +++ b/.terraform.lock.hcl @@ -0,0 +1,102 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/external" { + version = "2.3.1" + constraints = "2.3.1" + hashes = [ + "h1:bROCw6g5D/3fFnWeJ01L4IrdnJl1ILU8DGDgXCtYzaY=", + "zh:001e2886dc81fc98cf17cf34c0d53cb2dae1e869464792576e11b0f34ee92f54", + "zh:2eeac58dd75b1abdf91945ac4284c9ccb2bfb17fa9bdb5f5d408148ff553b3ee", + "zh:2fc39079ba61411a737df2908942e6970cb67ed2f4fb19090cd44ce2082903dd", + "zh:472a71c624952cff7aa98a7b967f6c7bb53153dbd2b8f356ceb286e6743bb4e2", + "zh:4cff06d31272aac8bc35e9b7faec42cf4554cbcbae1092eaab6ab7f643c215d9", + "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", + "zh:7ed16ccd2049fa089616b98c0bd57219f407958f318f3c697843e2397ddf70df", + "zh:842696362c92bf2645eb85c739410fd51376be6c488733efae44f4ce688da50e", + "zh:8985129f2eccfd7f1841ce06f3bf2bbede6352ec9e9f926fbaa6b1a05313b326", + "zh:a5f0602d8ec991a5411ef42f872aa90f6347e93886ce67905c53cfea37278e05", + "zh:bf4ab82cbe5256dcef16949973bf6aa1a98c2c73a98d6a44ee7bc40809d002b8", + "zh:e70770be62aa70198fa899526d671643ff99eecf265bf1a50e798fc3480bd417", + ] +} + +provider "registry.terraform.io/hashicorp/google" { + version = "5.10.0" + constraints = "~> 5.0" + hashes = [ + "h1:3kD/GqYmZkA97ebToXu6qhhrwo+GQNmqq9xv30Qkhmw=", + "zh:0f6a1feb5b3a128be6ef5fe0400ed800310a67e799c18aec7442161bb6d3ba36", + "zh:13d591ba78e424c94ce5caaf176ab6b087b0e3af08a7b6bcd963673698cdefda", + "zh:3bef54a2b24b06eef99f3df02e0fe4ac97f018c89f83e0faeb4ade921962565b", + "zh:3f3755b8f5b9db1611d42a02c21f03c54577e4aad3cf93323792f131c671c050", + "zh:61516eec734714ac48b565bee93cc2532160d1b4bd0320753799b829083b7060", + "zh:9160848ad0b9becb522a0744dcb89474849906aa2436ed945c658fe201a724b0", + "zh:aa5e79b01949cfedd874bf52958f90cf8f7d202600126c872127a9a156a3c17b", + "zh:cef73a67031008b7d7ef3edfbcd5e1a9b04c0f2580d815401248025b741bc8e4", + "zh:d2ad21ff9e9d2ad04146591c1b5784075e6df73e2bd243efd8d227d764b80b6e", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + "zh:f58b145081d20bce52e14bee0de73f5c018bc39b8c4736e23e1329df32f8bd45", + "zh:fb82f6b5d1f992243ab8fe417659cdf9831202cf1e16fe7593d3967888b035cc", + ] +} + +provider "registry.terraform.io/hashicorp/kubernetes" { + version = "2.24.0" + constraints = "2.24.0" + hashes = [ + "h1:u9lRMCdNXcB5/WQTZVMvGhNliW2pKOzj3SOVbu9yPpg=", + "zh:0ed83ec390a7e75c4990ebce698f14234de2b6204ed9a01cd042bb7ea5f26564", + "zh:195150e4fdab259c70088528006f4604557a051e037ebe8de64e92840f27e40a", + "zh:1a334af55f7a74adf033eb871c9fe7e9e648b41ab84321114ef4ca0e7a34fba6", + "zh:1ef68c3832691de21a61bf1a4e268123f3e08850712eda0b893cac908a0d1bc1", + "zh:44a1c58e5a6646e62b0bad653319c245f3b635dd03554dea2707a38f553e4a52", + "zh:54b5b374c4386f7f05b3fe986f9cb57bde4beab3bdf6ee33444f2b9a81b8af64", + "zh:aa8c2687ab784b72f8cdad8d3c3673dea83b33561e7b3f2d287ef0d06ff2a9e5", + "zh:e6ecba0503052ef3ad49ad56e17b2a73d9b55e30fcb82b040189d281e25e1a3b", + "zh:f105393f6487d3eb1f1636ba42d10c82950ddfef852244c1bca8d526fa23a9a3", + "zh:f17a8f1914ec66d80ccacecd40123362cf093abee3d3aa1ff9f8f687d8736f85", + "zh:f394b12ef01fa0bdf666a43ad152eb3890134f35e635ea056b18771c292de46e", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + ] +} + +provider "registry.terraform.io/hashicorp/local" { + version = "2.4.0" + constraints = "2.4.0" + hashes = [ + "h1:R97FTYETo88sT2VHfMgkPU3lzCsZLunPftjSI5vfKe8=", + "zh:53604cd29cb92538668fe09565c739358dc53ca56f9f11312b9d7de81e48fab9", + "zh:66a46e9c508716a1c98efbf793092f03d50049fa4a83cd6b2251e9a06aca2acf", + "zh:70a6f6a852dd83768d0778ce9817d81d4b3f073fab8fa570bff92dcb0824f732", + "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", + "zh:82a803f2f484c8b766e2e9c32343e9c89b91997b9f8d2697f9f3837f62926b35", + "zh:9708a4e40d6cc4b8afd1352e5186e6e1502f6ae599867c120967aebe9d90ed04", + "zh:973f65ce0d67c585f4ec250c1e634c9b22d9c4288b484ee2a871d7fa1e317406", + "zh:c8fa0f98f9316e4cfef082aa9b785ba16e36ff754d6aba8b456dab9500e671c6", + "zh:cfa5342a5f5188b20db246c73ac823918c189468e1382cb3c48a9c0c08fc5bf7", + "zh:e0e2b477c7e899c63b06b38cd8684a893d834d6d0b5e9b033cedc06dd7ffe9e2", + "zh:f62d7d05ea1ee566f732505200ab38d94315a4add27947a60afa29860822d3fc", + "zh:fa7ce69dde358e172bd719014ad637634bbdabc49363104f4fca759b4b73f2ce", + ] +} + +provider "registry.terraform.io/hashicorp/tls" { + version = "4.0.4" + constraints = "4.0.4" + hashes = [ + "h1:pe9vq86dZZKCm+8k1RhzARwENslF3SXb9ErHbQfgjXU=", + "zh:23671ed83e1fcf79745534841e10291bbf34046b27d6e68a5d0aab77206f4a55", + "zh:45292421211ffd9e8e3eb3655677700e3c5047f71d8f7650d2ce30242335f848", + "zh:59fedb519f4433c0fdb1d58b27c210b27415fddd0cd73c5312530b4309c088be", + "zh:5a8eec2409a9ff7cd0758a9d818c74bcba92a240e6c5e54b99df68fff312bbd5", + "zh:5e6a4b39f3171f53292ab88058a59e64825f2b842760a4869e64dc1dc093d1fe", + "zh:810547d0bf9311d21c81cc306126d3547e7bd3f194fc295836acf164b9f8424e", + "zh:824a5f3617624243bed0259d7dd37d76017097dc3193dac669be342b90b2ab48", + "zh:9361ccc7048be5dcbc2fafe2d8216939765b3160bd52734f7a9fd917a39ecbd8", + "zh:aa02ea625aaf672e649296bce7580f62d724268189fe9ad7c1b36bb0fa12fa60", + "zh:c71b4cd40d6ec7815dfeefd57d88bc592c0c42f5e5858dcc88245d371b4b8b1e", + "zh:dabcd52f36b43d250a3d71ad7abfa07b5622c69068d989e60b79b2bb4f220316", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + ] +} diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 00000000..d156db8b --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,9 @@ +{ + "recommendations": [ + "stkb.rewrap", + "DavidAnson.vscode-markdownlint" + ], + "unwantedRecommendations": [ + + ] +} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..bbf84028 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "editor.rulers": [80], + "files.trimTrailingWhitespace": true, + "files.trimFinalNewlines": true, + "files.insertFinalNewline": true +} diff --git a/README.md b/README.md index c8e526b1..5133fe1a 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,31 @@ # Infrastructure Repository for ACIA-CFIA AI-Lab -This repository is dedicated to the infrastructure management of the ACIA-CFIA -AI-Lab. It contains scripts, configurations, and documentation pertinent to -infrastructure and DevOps practices within the lab, facilitating setup, -deployment, and management across multiple cloud platforms including AWS, GCP, -and Azure. +This repository contains all the infrastructure used by the ACIA/CFIA AI Lab. +In this repository, you can find the Kubernetes manifests that deploy each of +the applications on the three different cloud providers: Google Cloud Platform +(GCP), Amazon Web Services (AWS), and Azure. -## Contents +## Content -Cross-Cloud Setup Scripts: Automation scripts for seamless configuration across -AWS, GCP, and Azure, covering project initiation, billing account association, -artifact repository orchestration, and service account setup. GitHub Repository -Creation Guide: Detailed instructions for creating new repositories in alignment -with ACIA-CFIA standards. Getting Started: +- The Terraform configuration for the GCP cluster. +- Kubernetes manifests used to deploy the following applications: + - [Nachet backend](https://github.com/ai-cfia/nachet-backend) + - [Nachet frontend](https://github.com/ai-cfia/nachet-frontend) + - [Finesse backend](https://github.com/ai-cfia/finesse-backend) + - [Finesse frontend](https://github.com/ai-cfia/finesse-frontend) +- Configuration for Vault, Grafana, Prometheus, Alert Manager, Ingress NGINX, +and Cert Manager to meet our requirements. -## Clone this repository +## Tooling -1. Navigate to the desired script or documentation. -2. Follow the provided instructions. -3. Related Repositories: +- [Hashicorp Vault](https://www.vaultproject.io/) +- [Grafana](https://grafana.com/) +- [Prometheus](https://prometheus.io/docs/visualization/grafana/) +- [Alert manager](https://github.com/prometheus/alertmanager) +- [Cert manager](https://cert-manager.io/) +- [Ingress NGINX](https://docs.nginx.com/nginx-ingress-controller/) +- [OTEL](https://opentelemetry.io/) -Dev-Rel-Docs: Contains introductory files and documentation related to developer -relations at ACIA-CFIA AI-Lab. +## Liens utiles + +[ai-cfia github container registry](https://github.com/orgs/ai-cfia/packages) diff --git a/kubernetes/apps/demo/nginx-deployment.yml b/kubernetes/apps/demo/nginx-deployment.yml new file mode 100644 index 00000000..91834738 --- /dev/null +++ b/kubernetes/apps/demo/nginx-deployment.yml @@ -0,0 +1,89 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: nginx + labels: + name: nginx + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-deployment + namespace: nginx +spec: + replicas: 2 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: nginx:1.14.2 + ports: + - containerPort: 80 + +--- +apiVersion: v1 +kind: Service +metadata: + name: nginx + namespace: nginx +spec: + clusterIP: None + selector: + app: nginx + ports: + - protocol: TCP + port: 80 + +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: nginx-ingress + namespace: nginx + annotations: + cert-manager.io/cluster-issuer: letsencrypt-http + ingress.kubernetes.io/force-ssl-redirect: "true" + kubernetes.io/tls-acme: "true" +spec: + ingressClassName: nginx + tls: + - hosts: + - nginx.ninebasetwo.xyz + secretName: aciacfia-tls + rules: + - host: nginx.ninebasetwo.xyz + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: nginx + port: + number: 80 + +# --- +# apiVersion: gateway.networking.k8s.io/v1beta1 +# kind: HTTPRoute +# metadata: +# name: nginx-http-route +# namespace: nginx +# spec: +# parentRefs: +# - name: gateway-gke-l7-rilb +# rules: +# - matches: +# - path: +# type: PathPrefix +# value: "/" +# backendRefs: +# - name: nginx +# port: 80 diff --git a/kubernetes/apps/finesse/finesse-backend-deployment.yml b/kubernetes/apps/finesse/finesse-backend-deployment.yml new file mode 100644 index 00000000..42f0342d --- /dev/null +++ b/kubernetes/apps/finesse/finesse-backend-deployment.yml @@ -0,0 +1,77 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: secrets-reader + namespace: finesse + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: finesse-backend + namespace: finesse +spec: + replicas: 2 + selector: + matchLabels: + app: finesse-backend + template: + metadata: + labels: + app: finesse-backend + annotations: + vault.hashicorp.com/agent-inject: 'true' + vault.hashicorp.com/role: 'secrets-reader' + vault.hashicorp.com/tls-skip-verify: 'true' + vault.hashicorp.com/agent-inject-template-.env: | + {{- with secret "apps/finesse" -}} + AZURE_OPENAI_CHATGPT_DEPLOYMENT="{{ .Data.data.AZURE_OPENAI_CHATGPT_DEPLOYMENT }}" + AZURE_OPENAI_GPT_DEPLOYMENT="{{ .Data.data.AZURE_OPENAI_GPT_DEPLOYMENT }}" + FINESSE_BACKEND_AZURE_SEARCH_API_KEY="{{ .Data.data.FINESSE_BACKEND_AZURE_SEARCH_API_KEY }}" + FINESSE_BACKEND_AZURE_SEARCH_ENDPOINT="{{ .Data.data.FINESSE_BACKEND_AZURE_SEARCH_ENDPOINT }}" + FINESSE_BACKEND_AZURE_SEARCH_INDEX_NAME="{{ .Data.data.FINESSE_BACKEND_AZURE_SEARCH_INDEX_NAME }}" + FINESSE_BACKEND_GITHUB_STATIC_FILE_URL="{{ .Data.data.FINESSE_BACKEND_GITHUB_STATIC_FILE_URL }}" + FINESSE_BACKEND_STATIC_FILE_URL="{{ .Data.data.FINESSE_BACKEND_STATIC_FILE_URL }}" + FINESSE_BACKEND_DEBUG_MODE="{{ .Data.data.FINESSE_BACKEND_DEBUG_MODE }}" + FINESSE_WEIGHTS="{{ .Data.data.FINESSE_WEIGHTS }}" + LOUIS_DSN="{{ .Data.data.LOUIS_DSN }}" + LOUIS_SCHEMA="{{ .Data.data.LOUIS_SCHEMA }}" + OPENAI_API_ENGINE="{{ .Data.data.OPENAI_API_ENGINE }}" + OPENAI_API_KEY="{{ .Data.data.OPENAI_API_KEY }}" + OPENAI_API_VERSION="{{ .Data.data.OPENAI_API_VERSION }}" + OPENAI_ENDPOINT="{{ .Data.data.OPENAI_ENDPOINT }}" + {{- end }} + spec: + serviceAccountName: secrets-reader + containers: + - name: finesse-backend + image: ghcr.io/ai-cfia/finesse-backend:main + imagePullPolicy: Always + command: ["/bin/sh", "-c"] + args: + - > + cp /vault/secrets/.env . && + gunicorn --bind :8080 --workers 1 --threads 8 --timeout 0 --forwarded-allow-ips "*" app:app + ports: + - containerPort: 8080 + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 60 + periodSeconds: 10 + +--- +apiVersion: v1 +kind: Service +metadata: + name: finesse-backend-svc + namespace: finesse +spec: + clusterIP: None + selector: + app: finesse-backend + ports: + - protocol: TCP + port: 8080 diff --git a/kubernetes/apps/finesse/finesse-frontend-deployment.yml b/kubernetes/apps/finesse/finesse-frontend-deployment.yml new file mode 100644 index 00000000..d1ad36f7 --- /dev/null +++ b/kubernetes/apps/finesse/finesse-frontend-deployment.yml @@ -0,0 +1,43 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: finesse-frontend + namespace: finesse +spec: + replicas: 2 + selector: + matchLabels: + app: finesse-frontend + template: + metadata: + labels: + app: finesse-frontend + spec: + serviceAccountName: secrets-reader + containers: + - name: finesse-frontend + image: ghcr.io/ai-cfia/finesse-frontend:main + imagePullPolicy: Always + ports: + - containerPort: 3000 + livenessProbe: + httpGet: + path: /health + port: 3000 + initialDelaySeconds: 60 + periodSeconds: 10 + +--- +apiVersion: v1 +kind: Service +metadata: + name: finesse-frontend-svc + namespace: finesse +spec: + clusterIP: None + selector: + app: finesse-frontend + ports: + - protocol: TCP + port: 3000 diff --git a/kubernetes/apps/finesse/finesse-ingress.yml b/kubernetes/apps/finesse/finesse-ingress.yml new file mode 100644 index 00000000..548b10a4 --- /dev/null +++ b/kubernetes/apps/finesse/finesse-ingress.yml @@ -0,0 +1,60 @@ +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: finesse-backend-ingress + namespace: finesse + annotations: + nginx.ingress.kubernetes.io/use-regex: "true" + nginx.ingress.kubernetes.io/rewrite-target: /$2 # https://kubernetes.github.io/ingress-nginx/examples/rewrite/ + cert-manager.io/cluster-issuer: letsencrypt-http + ingress.kubernetes.io/force-ssl-redirect: "true" + kubernetes.io/tls-acme: "true" +spec: + ingressClassName: nginx + tls: + - hosts: + - finesse.ninebasetwo.xyz + secretName: aciacfia-tls + rules: + - host: finesse.ninebasetwo.xyz + http: + paths: + - path: /api(/|$)(.*) + pathType: ImplementationSpecific + backend: + service: + name: finesse-backend-svc + port: + number: 8080 + +--- +# For more information check https://github.com/nginxinc/kubernetes-ingress/issues/323 +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: finesse-frontend-ingress + namespace: finesse + annotations: + nginx.ingress.kubernetes.io/add-base-url: "true" + nginx.ingress.kubernetes.io/rewrite-target: / + cert-manager.io/cluster-issuer: letsencrypt-http + ingress.kubernetes.io/force-ssl-redirect: "true" + kubernetes.io/tls-acme: "true" +spec: + ingressClassName: nginx + tls: + - hosts: + - finesse.ninebasetwo.xyz + secretName: aciacfia-tls + rules: + - host: finesse.ninebasetwo.xyz + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: finesse-frontend-svc + port: + number: 3000 diff --git a/kubernetes/apps/finesse/finesse-namespace.yml b/kubernetes/apps/finesse/finesse-namespace.yml new file mode 100644 index 00000000..270eafa3 --- /dev/null +++ b/kubernetes/apps/finesse/finesse-namespace.yml @@ -0,0 +1,7 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: finesse + labels: + name: finesse diff --git a/kubernetes/apps/nachet/nachet-backend-deployment.yml b/kubernetes/apps/nachet/nachet-backend-deployment.yml new file mode 100644 index 00000000..c4739ac6 --- /dev/null +++ b/kubernetes/apps/nachet/nachet-backend-deployment.yml @@ -0,0 +1,69 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: secrets-reader + namespace: nachet + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nachet-backend + namespace: nachet +spec: + replicas: 2 + selector: + matchLabels: + app: nachet-backend + template: + metadata: + labels: + app: nachet-backend + annotations: + vault.hashicorp.com/agent-inject: 'true' + vault.hashicorp.com/role: 'secrets-reader' + vault.hashicorp.com/tls-skip-verify: 'true' + vault.hashicorp.com/agent-inject-template-.env: | + {{- with secret "apps/nachet" -}} + NACHET_AZURE_STORAGE_CONNECTION_STRING="{{ .Data.data.NACHET_AZURE_STORAGE_CONNECTION_STRING }}" + NACHET_DATA="{{ .Data.data.NACHET_DATA }}" + NACHET_MODEL="{{ .Data.data.NACHET_MODEL }}" + NACHET_MODEL_ENDPOINT_ACCESS_KEY="{{ .Data.data.NACHET_MODEL_ENDPOINT_ACCESS_KEY }}" + NACHET_MODEL_ENDPOINT_REST_URL="{{ .Data.data.NACHET_MODEL_ENDPOINT_REST_URL }}" + NACHET_RESOURCE_GROUP="{{ .Data.data.NACHET_RESOURCE_GROUP }}" + NACHET_SUBSCRIPTION_ID="{{ .Data.data.NACHET_SUBSCRIPTION_ID }}" + NACHET_WORKSPACE="{{ .Data.data.NACHET_WORKSPACE }}" + {{- end }} + spec: + serviceAccountName: secrets-reader + containers: + - name: nachet-backend + image: ghcr.io/ai-cfia/nachet-backend:48 + imagePullPolicy: Always + command: ["/bin/sh", "-c"] + args: + - > + cp /vault/secrets/.env . && hypercorn -b :8080 app:app + ports: + - containerPort: 8080 + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 60 + periodSeconds: 10 + +--- +apiVersion: v1 +kind: Service +metadata: + name: nachet-backend-svc + namespace: nachet +spec: + clusterIP: None + selector: + app: nachet-backend + ports: + - protocol: TCP + port: 8080 diff --git a/kubernetes/apps/nachet/nachet-frontend-deployment.yml b/kubernetes/apps/nachet/nachet-frontend-deployment.yml new file mode 100644 index 00000000..867a1353 --- /dev/null +++ b/kubernetes/apps/nachet/nachet-frontend-deployment.yml @@ -0,0 +1,42 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nachet-frontend + namespace: nachet +spec: + replicas: 2 + selector: + matchLabels: + app: nachet-frontend + template: + metadata: + labels: + app: nachet-frontend + spec: + serviceAccountName: secrets-reader + containers: + - name: nachet-frontend + image: ghcr.io/ai-cfia/nachet-frontend:main + imagePullPolicy: Always + ports: + - containerPort: 3000 + livenessProbe: + httpGet: + path: / + port: 3000 + initialDelaySeconds: 60 + periodSeconds: 10 + +--- +apiVersion: v1 +kind: Service +metadata: + name: nachet-frontend-svc + namespace: nachet +spec: + clusterIP: None + selector: + app: nachet-frontend + ports: + - protocol: TCP + port: 3000 diff --git a/kubernetes/apps/nachet/nachet-ingress.yml b/kubernetes/apps/nachet/nachet-ingress.yml new file mode 100644 index 00000000..6a67d2ab --- /dev/null +++ b/kubernetes/apps/nachet/nachet-ingress.yml @@ -0,0 +1,63 @@ +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: nachet-backend-ingress + namespace: nachet + annotations: + nginx.ingress.kubernetes.io/use-regex: "true" + nginx.ingress.kubernetes.io/rewrite-target: /$2 # https://kubernetes.github.io/ingress-nginx/examples/rewrite/ + nginx.ingress.kubernetes.io/proxy-body-size: "50m" + nginx.ingress.kubernetes.io/proxy-read-timeout: "120" + cert-manager.io/cluster-issuer: letsencrypt-http + ingress.kubernetes.io/force-ssl-redirect: "true" + kubernetes.io/tls-acme: "true" +spec: + ingressClassName: nginx + tls: + - hosts: + - nachet.ninebasetwo.xyz + secretName: aciacfia-tls + rules: + - host: nachet.ninebasetwo.xyz + http: + paths: + - path: /api(/|$)(.*) + pathType: ImplementationSpecific + backend: + service: + name: nachet-backend-svc + port: + number: 8080 + +--- +# For more information check https://github.com/nginxinc/kubernetes-ingress/issues/323 +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: nachet-frontend-ingress + namespace: nachet + annotations: + nginx.ingress.kubernetes.io/add-base-url: "true" + nginx.ingress.kubernetes.io/rewrite-target: / + nginx.ingress.kubernetes.io/proxy-read-timeout: "120" + cert-manager.io/cluster-issuer: letsencrypt-http + ingress.kubernetes.io/force-ssl-redirect: "true" + kubernetes.io/tls-acme: "true" +spec: + ingressClassName: nginx + tls: + - hosts: + - nachet.ninebasetwo.xyz + secretName: aciacfia-tls + rules: + - host: nachet.ninebasetwo.xyz + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: nachet-frontend-svc + port: + number: 3000 diff --git a/kubernetes/apps/nachet/nachet-namespace.yml b/kubernetes/apps/nachet/nachet-namespace.yml new file mode 100644 index 00000000..c01925f3 --- /dev/null +++ b/kubernetes/apps/nachet/nachet-namespace.yml @@ -0,0 +1,7 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: nachet + labels: + name: nachet diff --git a/kubernetes/system/cert-manager/cert-manager.yml b/kubernetes/system/cert-manager/cert-manager.yml new file mode 100644 index 00000000..50852d2d --- /dev/null +++ b/kubernetes/system/cert-manager/cert-manager.yml @@ -0,0 +1,735 @@ +# Default values for cert-manager. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. +global: + # Reference to one or more secrets to be used when pulling images + imagePullSecrets: [] + # - name: "image-pull-secret" + + # Labels to apply to all resources + # Please note that this does not add labels to the resources created dynamically + # by the controllers. + # For these resources, you have to add the labels in the template in the + # cert-manager custom resource: + # eg. podTemplate/ ingressTemplate in ACMEChallengeSolverHTTP01Ingress + # eg. secretTemplate in CertificateSpec + commonLabels: {} + # team_name: dev + + # Optional priority class to be used for the cert-manager pods + priorityClassName: "" + rbac: + create: true + aggregateClusterRoles: true + + podSecurityPolicy: + enabled: false + useAppArmor: true + + # Set the verbosity of cert-manager. Range of 0 - 6 with 6 being the most + # verbose. + logLevel: 2 + + leaderElection: + # Override the namespace used for the leader election lease + namespace: "kube-system" + + # The duration that non-leader candidates will wait after observing a + # leadership renewal until attempting to acquire leadership of a led but + # unrenewed leader slot. This is effectively the maximum duration that a + # leader can be stopped before it is replaced by another candidate. + # leaseDuration: 60s + + # The interval between attempts by the acting master to renew a leadership + # slot before it stops leading. This must be less than or equal to the + # lease duration. + # renewDeadline: 40s + + # The duration the clients should wait between attempting acquisition and + # renewal of a leadership. + # retryPeriod: 15s + +installCRDs: true + +replicaCount: 1 + +strategy: {} + # type: RollingUpdate + # rollingUpdate: + # maxSurge: 0 + # maxUnavailable: 1 + +podDisruptionBudget: + enabled: false + + # minAvailable and maxUnavailable can either be set to an integer (e.g. 1) + # or a percentage value (e.g. 25%) + # if neither minAvailable or maxUnavailable is set, we default to + # `minAvailable: 1` + # minAvailable: 1 + # maxUnavailable: 1 + +# Comma separated list of feature gates that should be enabled on the +# controller pod. +featureGates: "" + +# The maximum number of challenges that can be scheduled as 'processing' at once +maxConcurrentChallenges: 60 + +image: + repository: quay.io/jetstack/cert-manager-controller + # You can manage a registry with + # registry: quay.io + # repository: jetstack/cert-manager-controller + + # Override the image tag to deploy by setting this variable. + # If no value is set, the chart's appVersion will be used. + # tag: canary + + # Setting a digest will override any tag + # digest: + # sha256:0e072dddd1f7f8fc8909a2ca6f65e76c5f0d2fcfb8be47935ae3457e8bbceb20 + pullPolicy: IfNotPresent + +# Override the namespace used to store DNS provider credentials +# etc. for ClusterIssuer +# resources. By default, the same namespace as cert-manager is deployed within +# is used. This namespace will not be automatically created by the Helm chart. +clusterResourceNamespace: "" + +# This namespace allows you to define where the services will be installed into +# if not set then they will use the namespace of the release +# This is helpful when installing cert manager as a chart dependency (sub chart) +namespace: cert-manager + +serviceAccount: + # Specifies whether a service account should be created + create: true + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname + # template + # name: "" + # Optional additional annotations to add to the controller's ServiceAccount + # annotations: {} + # Automount API credentials for a Service Account. + # Optional additional labels to add to the controller's ServiceAccount + # labels: {} + automountServiceAccountToken: true + +# Automounting API credentials for a particular pod +# automountServiceAccountToken: true + +# When this flag is enabled, secrets will be automatically removed when the +# certificate resource is deleted +enableCertificateOwnerRef: false + +# Used to configure options for the controller pod. +# This allows setting options that'd usually be provided via flags. +# An APIVersion and Kind must be specified in your values.yaml file. +# Flags will override options that are set here. +config: +# apiVersion: controller.config.cert-manager.io/v1alpha1 +# kind: ControllerConfiguration +# logging: +# verbosity: 2 +# format: text +# leaderElectionConfig: +# namespace: kube-system +# kubernetesAPIQPS: 9000 +# kubernetesAPIBurst: 9000 +# numberOfConcurrentWorkers: 200 +# featureGates: +# additionalCertificateOutputFormats: true +# experimentalCertificateSigningRequestControllers: true +# experimentalGatewayAPISupport: true +# serverSideApply: true +# literalCertificateSubject: true +# useCertificateRequestBasicConstraints: true + +# Setting Nameservers for DNS01 Self Check + +# Comma separated string with host and port of the recursive nameservers +# cert-manager should query +dns01RecursiveNameservers: "" + +# Forces cert-manager to only use the recursive nameservers for verification. +# Enabling this option could cause the DNS01 self check to take longer due +# to caching performed by the recursive nameservers +dns01RecursiveNameserversOnly: false + +# Additional command line flags to pass to cert-manager controller binary. +# To see all available flags run docker run +# quay.io/jetstack/cert-manager-controller: --help +extraArgs: [] + # Use this flag to enable or disable arbitrary controllers, + # for example, disable the CertificiateRequests approver + # - --controllers=*,-certificaterequests-approver + +extraEnv: [] +# - name: SOME_VAR +# value: 'some value' + +resources: {} + # requests: + # cpu: 10m + # memory: 32Mi + +securityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + +containerSecurityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + + +volumes: [] + +volumeMounts: [] + +# Optional additional annotations to add to the controller Deployment +# deploymentAnnotations: {} + +# Optional additional annotations to add to the controller Pods +# podAnnotations: {} + +podLabels: {} + +# Optional annotations to add to the controller Service +# serviceAnnotations: {} + +# Optional additional labels to add to the controller Service +# serviceLabels: {} + +# Optional DNS settings, useful if you have a public and private DNS zone for +# the same domain on Route 53. What follows is an example of ensuring +# cert-manager can access an ingress or DNS TXT records at all times. +# NOTE: This requires Kubernetes 1.10 or `CustomPodDNS` feature gate enabled for +# the cluster to work. +# podDnsPolicy: "None" +# podDnsConfig: +# nameservers: +# - "1.1.1.1" +# - "8.8.8.8" + +nodeSelector: + kubernetes.io/os: linux + +ingressShim: {} + # defaultIssuerName: "" + # defaultIssuerKind: "" + # defaultIssuerGroup: "" + +prometheus: + enabled: true + servicemonitor: + enabled: false + prometheusInstance: default + targetPort: 9402 + path: /metrics + interval: 60s + scrapeTimeout: 30s + labels: {} + annotations: {} + honorLabels: false + endpointAdditionalProperties: {} + +# Use these variables to configure the HTTP_PROXY environment variables +# http_proxy: "http://proxy:8080" +# https_proxy: "https://proxy:8080" +# no_proxy: 127.0.0.1,localhost + +# A Kubernetes Affinty, if required; +# for example: +# affinity: +# nodeAffinity: +# requiredDuringSchedulingIgnoredDuringExecution: +# nodeSelectorTerms: +# - matchExpressions: +# - key: foo.bar.com/role +# operator: In +# values: +# - master +affinity: {} + +# A list of Kubernetes Tolerations, if required; +# for example: +# tolerations: +# - key: foo.bar.com/role +# operator: Equal +# value: master +# effect: NoSchedule +tolerations: [] + +# A list of Kubernetes TopologySpreadConstraints, if required; +# for example: +# topologySpreadConstraints: +# - maxSkew: 2 +# topologyKey: topology.kubernetes.io/zone +# whenUnsatisfiable: ScheduleAnyway +# labelSelector: +# matchLabels: +# app.kubernetes.io/instance: cert-manager +# app.kubernetes.io/component: controller +topologySpreadConstraints: [] + +# LivenessProbe settings for the controller container of the controller Pod. +# +# Disabled by default, because the controller has a leader election mechanism +# which should cause it to exit if it is unable to renew its leader election +# record. +# LivenessProbe durations and thresholds are based on those used for the +# Kubernetes +# controller-manager. See: +livenessProbe: + enabled: false + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 15 + successThreshold: 1 + failureThreshold: 8 + +# enableServiceLinks indicates whether information about services should be +# injected into pod's environment variables, matching the syntax of Docker +# links. +enableServiceLinks: false + +webhook: + replicaCount: 1 + timeoutSeconds: 10 + + # Used to configure options for the webhook pod. + # This allows setting options that'd usually be provided via flags. + # An APIVersion and Kind must be specified in your values.yaml file. + # Flags will override options that are set here. + config: + # apiVersion: webhook.config.cert-manager.io/v1alpha1 + # kind: WebhookConfiguration + + # The port that the webhook should listen on for requests. + # In GKE private clusters, by default kubernetes apiservers are allowed to + # talk to the cluster nodes only on 443 and 10250. so configuring + # securePort: 10250, will work out of the box without needing + # to add firewall rules or requiring NET_BIND_SERVICE capabilities + # to bind port numbers <1000. + # This should be uncommented and set as a default by the chart once we + # graduate + # the apiVersion of WebhookConfiguration past v1alpha1. + # securePort: 10250 + + strategy: {} + # type: RollingUpdate + # rollingUpdate: + # maxSurge: 0 + # maxUnavailable: 1 + + # Pod Security Context to be set on the webhook component Pod + securityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + + podDisruptionBudget: + enabled: false + + # minAvailable and maxUnavailable can either be set to an integer (e.g. 1) + # or a percentage value (e.g. 25%) + # if neither minAvailable or maxUnavailable is set, we default to + # `minAvailable: 1` + # minAvailable: 1 + # maxUnavailable: 1 + + # Container Security Context to be set on the webhook component container + containerSecurityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + + # Optional additional annotations to add to the webhook Deployment + # deploymentAnnotations: {} + + # Optional additional annotations to add to the webhook Pods + # podAnnotations: {} + + # Optional additional annotations to add to the webhook Service + # serviceAnnotations: {} + + # Optional additional annotations to add to the webhook + # MutatingWebhookConfiguration + # mutatingWebhookConfigurationAnnotations: {} + + # Optional additional annotations to add to the webhook + # ValidatingWebhookConfiguration + # validatingWebhookConfigurationAnnotations: {} + + # Additional command line flags to pass to cert-manager webhook binary. + # To see all available flags run docker run + # quay.io/jetstack/cert-manager-webhook: --help + extraArgs: [] + # Path to a file containing a WebhookConfiguration object + # used to configure the webhook + # - --config= + + # Comma separated list of feature gates that should be enabled on the + # webhook pod. + featureGates: "" + + resources: {} + # requests: + # cpu: 10m + # memory: 32Mi + + ## Liveness and readiness probe values + livenessProbe: + failureThreshold: 3 + initialDelaySeconds: 60 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + readinessProbe: + failureThreshold: 3 + initialDelaySeconds: 5 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 1 + + nodeSelector: + kubernetes.io/os: linux + + affinity: {} + + tolerations: [] + + topologySpreadConstraints: [] + + # Optional additional labels to add to the Webhook Pods + podLabels: {} + + # Optional additional labels to add to the Webhook Service + serviceLabels: {} + + image: + repository: quay.io/jetstack/cert-manager-webhook + # You can manage a registry with + # registry: quay.io + # repository: jetstack/cert-manager-webhook + + # Override the image tag to deploy by setting this variable. + # If no value is set, the chart's appVersion will be used. + # tag: canary + + # Setting a digest will override any tag + + pullPolicy: IfNotPresent + + serviceAccount: + # Specifies whether a service account should be created + create: true + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname + # template + # name: "" + # Optional additional annotations to add to the controller's ServiceAccount + # annotations: {} + # Optional additional labels to add to the webhook's ServiceAccount + # labels: {} + # Automount API credentials for a Service Account. + automountServiceAccountToken: true + + # Automounting API credentials for a particular pod + # automountServiceAccountToken: true + + # The port that the webhook should listen on for requests. + # In GKE private clusters, by default kubernetes apiservers are allowed to + # talk to the cluster nodes only on 443 and 10250. so configuring + # securePort: 10250, will work out of the box without needing to add firewall + # rules or requiring NET_BIND_SERVICE capabilities to bind port numbers <1000 + securePort: 10250 + + # Specifies if the webhook should be started in hostNetwork mode. + # + # Required for use in some managed kubernetes clusters (such as AWS EKS) + # CNI (such as calico), because control-plane managed by AWS cannot + # communicate with pods' IP CIDR and admission webhooks are not working + # + # Since the default port for the webhook conflicts with kubelet on the host + # network, `webhook.securePort` should be changed to an available port if + # running in hostNetwork mode. + hostNetwork: false + + # Specifies how the service should be handled. Useful if you want to expose + # the webhook to outside of the cluster. In some cases, + # reach internal services the control plane cannot + serviceType: ClusterIP + # loadBalancerIP: + + # Overrides the mutating webhook and validating webhook so they reach + # the webhook service using the `url` field instead of a service. + url: {} + # host: + + # Enables default network policies for webhooks. + networkPolicy: + enabled: false + ingress: + - from: + - ipBlock: + cidr: 0.0.0.0/0 + egress: + - ports: + - port: 80 + protocol: TCP + - port: 443 + protocol: TCP + - port: 53 + protocol: TCP + - port: 53 + protocol: UDP + # On OpenShift and OKD, the Kubernetes API server listens on + # port 6443. + - port: 6443 + protocol: TCP + to: + - ipBlock: + cidr: 0.0.0.0/0 + + volumes: [] + volumeMounts: [] + + # enableServiceLinks indicates whether information about services should be + # injected into pod's environment variables, matching the syntax of Docker + # links. + enableServiceLinks: false + +cainjector: + enabled: true + replicaCount: 1 + + strategy: {} + # type: RollingUpdate + # rollingUpdate: + # maxSurge: 0 + # maxUnavailable: 1 + + # Pod Security Context to be set on the cainjector component Pod + securityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + + podDisruptionBudget: + enabled: false + + # minAvailable and maxUnavailable can either be set to an integer (e.g. 1) + # or a percentage value (e.g. 25%) + # if neither minAvailable or maxUnavailable is set, + # we default to `minAvailable: 1` + # minAvailable: 1 + # maxUnavailable: 1 + + # Container Security Context to be set on the cainjector component container + containerSecurityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + + + # Optional additional annotations to add to the cainjector Deployment + # deploymentAnnotations: {} + + # Optional additional annotations to add to the cainjector Pods + # podAnnotations: {} + + # Additional command line flags to pass to cert-manager cainjector binary. + extraArgs: [] + # Enable profiling for cainjector + # - --enable-profiling=true + + resources: {} + # requests: + # cpu: 10m + # memory: 32Mi + + nodeSelector: + kubernetes.io/os: linux + + affinity: {} + + tolerations: [] + + topologySpreadConstraints: [] + + # Optional additional labels to add to the CA Injector Pods + podLabels: {} + + image: + repository: quay.io/jetstack/cert-manager-cainjector + # You can manage a registry with + # registry: quay.io + # repository: jetstack/cert-manager-cainjector + + # Override the image tag to deploy by setting this variable. + # If no value is set, the chart's appVersion will be used. + # tag: canary + + # Setting a digest will override any tag + + pullPolicy: IfNotPresent + + serviceAccount: + # Specifies whether a service account should be created + create: true + # The name of the service account to use. + # If not set and create is true, a name is generated using the + # fullname template + # name: "" + # Optional additional annotations to add to the controller's ServiceAccount + # annotations: {} + # Automount API credentials for a Service Account. + # Optional additional labels to add to the cainjector's ServiceAccount + # labels: {} + automountServiceAccountToken: true + + # Automounting API credentials for a particular pod + # automountServiceAccountToken: true + + volumes: [] + volumeMounts: [] + + # enableServiceLinks indicates whether information about services should be + # injected into pod's environment variables, matching the syntax of Docker + # links. + enableServiceLinks: false + +acmesolver: + image: + repository: quay.io/jetstack/cert-manager-acmesolver + # You can manage a registry with + # registry: quay.io + # repository: jetstack/cert-manager-acmesolver + + # Override the image tag to deploy by setting this variable. + # If no value is set, the chart's appVersion will be used. + # tag: canary + + # Setting a digest will override any tag + +# This startupapicheck is a Helm post-install hook that waits for the webhook +# endpoints to become available. +# The check is implemented using a Kubernetes Job- if you are injecting mesh +# sidecar proxies into cert-manager pods, you probably want to ensure that they +# are not injected into this Job's pod. Otherwise the installation may time out +# due to the Job never being completed because the sidecar proxy does not exit. +# See https://github.com/cert-manager/cert-manager/pull/4414 for context. +startupapicheck: + enabled: true + + # Pod Security Context to be set on the startupapicheck component Pod + securityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + + # Container Security Context to be set on the controller component container + containerSecurityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + + # Timeout for 'kubectl check api' command + timeout: 1m + + # Job backoffLimit + backoffLimit: 4 + + # Optional additional annotations to add to the startupapicheck Job + jobAnnotations: + helm.sh/hook: post-install + helm.sh/hook-weight: "1" + helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded + + # Optional additional annotations to add to the startupapicheck Pods + # podAnnotations: {} + + # Additional command line flags to pass to startupapicheck binary. + extraArgs: [] + + resources: {} + # requests: + # cpu: 10m + # memory: 32Mi + + nodeSelector: + kubernetes.io/os: linux + + affinity: {} + + tolerations: [] + + # Optional additional labels to add to the startupapicheck Pods + podLabels: {} + + image: + repository: quay.io/jetstack/cert-manager-ctl + # You can manage a registry with + # registry: quay.io + # repository: jetstack/cert-manager-ctl + + # Override the image tag to deploy by setting this variable. + # If no value is set, the chart's appVersion will be used. + # tag: canary + + + pullPolicy: IfNotPresent + + rbac: + # annotations for the startup API Check job RBAC and PSP resources + annotations: + helm.sh/hook: post-install + helm.sh/hook-weight: "-5" + helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded + + # Automounting API credentials for a particular pod + # automountServiceAccountToken: true + + serviceAccount: + # Specifies whether a service account should be created + create: true + + # The name of the service account to use. + # If not set and create is true, a name is generated using the + # fullname template name: "" + + # Optional additional annotations to add to the Job's ServiceAccount + annotations: + helm.sh/hook: post-install + helm.sh/hook-weight: "-5" + helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded + + # Automount API credentials for a Service Account. + automountServiceAccountToken: true + + # Optional additional labels to add to the startupapicheck's ServiceAccount + # labels: {} + + volumes: [] + volumeMounts: [] + + # enableServiceLinks indicates whether information about services should be + # injected into pod's environment variables, matching the syntax of Docker + # links. + enableServiceLinks: false diff --git a/kubernetes/system/cert-manager/issuer.yml b/kubernetes/system/cert-manager/issuer.yml new file mode 100644 index 00000000..7154c1e7 --- /dev/null +++ b/kubernetes/system/cert-manager/issuer.yml @@ -0,0 +1,14 @@ +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: letsencrypt-http +spec: + acme: + server: https://acme-v02.api.letsencrypt.org/directory + email: tomcardin@outlook.com + privateKeySecretRef: + name: letsencrypt-private-key + solvers: + - http01: + ingress: + class: nginx diff --git a/kubernetes/system/ingress-nginx/ingress-nginx.yml b/kubernetes/system/ingress-nginx/ingress-nginx.yml new file mode 100644 index 00000000..18e5a661 --- /dev/null +++ b/kubernetes/system/ingress-nginx/ingress-nginx.yml @@ -0,0 +1,778 @@ +## nginx configuration +## + +## Overrides for generated resource names +# See templates/_helpers.tpl +# nameOverride: +# fullnameOverride: + +## Labels to apply to all resources +## +commonLabels: {} +# scmhash: abc123 +# myLabel: aakkmd + +controller: + name: controller + image: + registry: k8s.gcr.io + image: ingress-nginx/controller + ## repository: + tag: "v1.1.1" + digest: sha256:0bc88eb15f9e7f84e8e56c14fa5735aaa488b840983f87bd79b1054190e660de + pullPolicy: IfNotPresent + # www-data -> uid 101 + runAsUser: 101 + allowPrivilegeEscalation: true + + # -- Use an existing PSP instead of creating one + existingPsp: "" + + # -- Configures the controller container name + containerName: controller + + # -- Configures the ports that the nginx-controller listens on + containerPort: + http: 80 + https: 443 + + config: {} + + # -- Annotations to be added to the controller config configuration configmap. + configAnnotations: {} + + proxySetHeaders: {} + + addHeaders: {} + + # -- Optionally customize the pod dnsConfig. + dnsConfig: {} + + # -- Optionally customize the pod hostname. + hostname: {} + + dnsPolicy: ClusterFirst + + reportNodeInternalIp: false + + # Defaults to false + watchIngressWithoutClass: false + + # -- Process IngressClass per name (additionally as per spec.controller). + ingressClassByName: false + + allowSnippetAnnotations: true + + # is merged + hostNetwork: false + + ## Use host ports 80 and 443 + ## Disabled by default + hostPort: + # -- Enable 'hostPort' or not + enabled: false + ports: + # -- 'hostPort' http port + http: 80 + # -- 'hostPort' https port + https: 443 + + # -- Election ID to use for status update + electionID: ingress-controller-leader + + ## This section refers to the creation of the IngressClass resource + ingressClassResource: + # -- Name of the ingressClass + name: nginx + # -- Is this ingressClass enabled or not + enabled: true + # -- Is this the default ingressClass for the cluster + default: false + # -- Controller-value of the controller that is processing this ingressClass + controllerValue: "k8s.io/ingress-nginx" + + # -- Parameters is a link to a custom resource containing additional + # configuration for the controller. This is optional if the controller + # does not require extra parameters. + parameters: {} + + ingressClass: nginx + + # -- Labels to add to the pod container metadata + podLabels: {} + # key: value + + # -- Security Context policies for controller pods + podSecurityContext: {} + + sysctls: {} + # sysctls: + # "net.core.somaxconn": "8192" + + # -- Allows customization of the source of the IP address or FQDN to report + # in the ingress status field. By default, it reads the information provided + # by the service. If disable, the status field reports the IP address of the + # node or nodes where an ingress controller pod is running. + publishService: + # -- Enable 'publishService' or not + enabled: true + # -- Allows overriding of the publish service to bind to + # Must be / + pathOverride: "" + + # Limit the scope of the controller to a specific namespace + scope: + # -- Enable 'scope' or not + enabled: false + # -- Namespace to limit the controller to; defaults to $(POD_NAMESPACE) + namespace: "" + namespaceSelector: "" + + configMapNamespace: "" + + tcp: + configMapNamespace: "" + annotations: {} + + udp: + configMapNamespace: "" + annotations: {} + + maxmindLicenseKey: "" + + # -- Additional command line arguments to pass to nginx-ingress-controller + # E.g. to specify the default SSL certificate you can use + extraArgs: {} + ## extraArgs: + ## default-ssl-certificate: "/" + + # -- Additional environment variables to set + extraEnvs: [] + # extraEnvs: + # - name: FOO + # valueFrom: + # secretKeyRef: + # key: FOO + # name: secret-resource + + # -- Use a `DaemonSet` or `Deployment` + kind: Deployment + + # -- Annotations to be added to the controller Deployment or DaemonSet + ## + annotations: {} + # keel.sh/pollSchedule: "@every 60m" + + labels: {} + # keel.sh/policy: patch + # keel.sh/trigger: poll + + + # -- The update strategy to apply to the Deployment or DaemonSet + ## + updateStrategy: {} + # rollingUpdate: + # maxUnavailable: 1 + # type: RollingUpdate + + # -- `minReadySeconds` to avoid killing pods before we are ready + ## + minReadySeconds: 0 + + + # -- Node tolerations for server scheduling to nodes with taints + ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ + ## + tolerations: [] + # - key: "key" + # operator: "Equal|Exists" + # value: "value" + # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)" + + affinity: {} + # # An example of preferred pod anti-affinity, weight is in the range 1-100 + # podAntiAffinity: + # preferredDuringSchedulingIgnoredDuringExecution: + # - weight: 100 + # podAffinityTerm: + # labelSelector: + # matchExpressions: + # - key: app.kubernetes.io/name + # operator: In + # values: + # - ingress-nginx + # - key: app.kubernetes.io/instance + # operator: In + # values: + # - ingress-nginx + # - key: app.kubernetes.io/component + # operator: In + # values: + # - controller + # topologyKey: kubernetes.io/hostname + + # # An example of required pod anti-affinity + # podAntiAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # - labelSelector: + # matchExpressions: + # - key: app.kubernetes.io/name + # operator: In + # values: + # - ingress-nginx + # - key: app.kubernetes.io/instance + # operator: In + # values: + # - ingress-nginx + # - key: app.kubernetes.io/component + # operator: In + # values: + # - controller + # topologyKey: "kubernetes.io/hostname" + + topologySpreadConstraints: [] + # - maxSkew: 1 + # topologyKey: failure-domain.beta.kubernetes.io/zone + # whenUnsatisfiable: DoNotSchedule + # labelSelector: + # matchLabels: + # app.kubernetes.io/instance: ingress-nginx-internal + + # -- `terminationGracePeriodSeconds` to avoid killing pods before we are ready + ## wait up to five minutes for the drain of connections + ## + terminationGracePeriodSeconds: 300 + + # -- Node labels for controller pod assignment + ## Ref: https://kubernetes.io/docs/user-guide/node-selection/ + ## + nodeSelector: + kubernetes.io/os: linux + + ## Liveness and readiness probe values + ## + ## startupProbe: + ## httpGet: + ## # should match container.healthCheckPath + ## path: "/healthz" + ## port: 10254 + ## scheme: HTTP + ## initialDelaySeconds: 5 + ## periodSeconds: 5 + ## timeoutSeconds: 2 + ## successThreshold: 1 + ## failureThreshold: 5 + livenessProbe: + httpGet: + # should match container.healthCheckPath + path: "/healthz" + port: 10254 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 1 + successThreshold: 1 + failureThreshold: 5 + readinessProbe: + httpGet: + # should match container.healthCheckPath + path: "/healthz" + port: 10254 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 1 + successThreshold: 1 + failureThreshold: 3 + + + # the healthz-port parameter are forwarded internally to this path. + healthCheckPath: "/healthz" + + # -- Address to bind the health check endpoint. + # It is better to set this option to the internal node address + # if the ingress nginx controller is running in the `hostNetwork: true` mode. + healthCheckHost: "" + + # -- Annotations to be added to controller pods + ## + podAnnotations: {} + + replicaCount: 1 + + minAvailable: 1 + + resources: + ## limits: + ## cpu: 100m + ## memory: 90Mi + requests: + cpu: 100m + memory: 90Mi + + # Mutually exclusive with keda autoscaling + autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 11 + targetCPUUtilizationPercentage: 50 + targetMemoryUtilizationPercentage: 50 + behavior: {} + # scaleDown: + # stabilizationWindowSeconds: 300 + # policies: + # - type: Pods + # value: 1 + # periodSeconds: 180 + # scaleUp: + # stabilizationWindowSeconds: 300 + # policies: + # - type: Pods + # value: 2 + # periodSeconds: 60 + + autoscalingTemplate: [] + # Custom or additional autoscaling metrics + # - type: Pods + # pods: + # metric: + # name: nginx_ingress_controller_nginx_process_requests_total + # target: + # type: AverageValue + # averageValue: 10000m + + # Mutually exclusive with hpa autoscaling + keda: + apiVersion: "keda.sh/v1alpha1" + ## apiVersion changes with keda 1.x vs 2.x + ## 2.x = keda.sh/v1alpha1 + ## 1.x = keda.k8s.io/v1alpha1 + enabled: false + minReplicas: 1 + maxReplicas: 11 + pollingInterval: 30 + cooldownPeriod: 300 + restoreToOriginalReplicaCount: false + scaledObject: + annotations: {} + # Custom annotations for ScaledObject resource + # annotations: + # key: value + triggers: [] + # - type: prometheus + # metadata: + # serverAddress: http://:9090 + # metricName: http_requests_total + # threshold: '100' + # query: sum(rate(http_requests_total{deployment="my-deployment"}[2m])) + + behavior: {} + # scaleDown: + # stabilizationWindowSeconds: 300 + # policies: + # - type: Pods + # value: 1 + # periodSeconds: 180 + # scaleUp: + # stabilizationWindowSeconds: 300 + # policies: + # - type: Pods + # value: 2 + # periodSeconds: 60 + + # -- Enable mimalloc as a drop-in replacement for malloc. + ## ref: https://github.com/microsoft/mimalloc + ## + enableMimalloc: true + + ## Override NGINX template + customTemplate: + configMapName: "" + configMapKey: "" + + service: + enabled: true + + appProtocol: true + + annotations: {} + labels: {} + # clusterIP: "" + + # -- List of IP addresses at which the controller services are available + ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips + ## + externalIPs: [] + + # loadBalancerIP: "" + loadBalancerSourceRanges: [] + + enableHttp: true + enableHttps: true + + ipFamilyPolicy: "SingleStack" + + ipFamilies: + - IPv4 + + ports: + http: 80 + https: 443 + + targetPorts: + http: http + https: https + + type: LoadBalancer + + ## type: NodePort + ## nodePorts: + ## http: 32080 + ## https: 32443 + ## tcp: + ## 8080: 32808 + nodePorts: + http: "" + https: "" + tcp: {} + udp: {} + + external: + enabled: true + + internal: + enabled: false + annotations: {} + + # loadBalancerIP: "" + + # -- Restrict access For LoadBalancer service. Defaults to 0.0.0.0/0. + loadBalancerSourceRanges: [] + + ## Set external traffic policy to: "Local" to preserve source IP on + ## providers supporting it + + # -- Additional containers to be added to the controller pod. + extraContainers: [] + # - name: my-sidecar + # image: nginx:latest + # - name: lemonldap-ng-controller + # image: lemonldapng/lemonldap-ng-controller:0.2.0 + # args: + # - /lemonldap-ng-controller + # - --alsologtostderr + # - --configmap=$(POD_NAMESPACE)/lemonldap-ng-configuration + # env: + # - name: POD_NAME + # valueFrom: + # fieldRef: + # fieldPath: metadata.name + # - name: POD_NAMESPACE + # valueFrom: + # fieldRef: + # fieldPath: metadata.namespace + # volumeMounts: + # - name: copy-portal-skins + # mountPath: /srv/var/lib/lemonldap-ng/portal/skins + + # -- Additional volumeMounts to the controller main container. + extraVolumeMounts: [] + # - name: copy-portal-skins + # mountPath: /var/lib/lemonldap-ng/portal/skins + + # -- Additional volumes to the controller pod. + extraVolumes: [] + # - name: copy-portal-skins + # emptyDir: {} + + # -- Containers, which are run before the app containers are started. + extraInitContainers: [] + # - name: init-myservice + # image: busybox + + extraModules: [] + ## Modules, which are mounted into the core nginx image + # - name: opentelemetry + # image: busybox + # + # The image must contain a `/usr/local/bin/init_module.sh` executable, which + # will be executed as initContainers, to move its config files within the + # mounted volume. + + admissionWebhooks: + annotations: {} + + ## Additional annotations to the admission webhooks. + ## These annotations will be added to the ValidatingWebhookConfiguration and + ## the Jobs Spec of the admission webhooks. + enabled: true + failurePolicy: Fail + # timeoutSeconds: 10 + port: 8443 + certificate: "/usr/local/certificates/cert" + key: "/usr/local/certificates/key" + namespaceSelector: {} + objectSelector: {} + # -- Labels to be added to admission webhooks + labels: {} + + # -- Use an existing PSP instead of creating one + existingPsp: "" + + service: + annotations: {} + # clusterIP: "" + externalIPs: [] + # loadBalancerIP: "" + loadBalancerSourceRanges: [] + servicePort: 443 + type: ClusterIP + + createSecretJob: + resources: {} + # limits: + # cpu: 10m + # memory: 20Mi + # requests: + # cpu: 10m + # memory: 20Mi + + patchWebhookJob: + resources: {} + + patch: + enabled: true + image: + registry: k8s.gcr.io + image: ingress-nginx/kube-webhook-certgen + tag: v1.1.1 + digest: sha256:64d8c73dca984af206adf9d6d7e46aa550362b1d7a01f3a0a91b20cc67868660 + pullPolicy: IfNotPresent + # -- Provide a priority class name to the webhook patching job + ## + priorityClassName: "" + podAnnotations: {} + nodeSelector: + kubernetes.io/os: linux + tolerations: [] + # -- Labels to be added to patch job resources + labels: {} + runAsUser: 2000 + + metrics: + port: 10254 + # if this port is changed, change healthz-port: in extraArgs: accordingly + enabled: false + + service: + annotations: {} + # prometheus.io/scrape: "true" + # prometheus.io/port: "10254" + + # clusterIP: "" + + # -- List of IP addresses at which the stats-exporter service is available + ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips + ## + externalIPs: [] + + # loadBalancerIP: "" + loadBalancerSourceRanges: [] + servicePort: 10254 + type: ClusterIP + # externalTrafficPolicy: "" + # nodePort: "" + + serviceMonitor: + enabled: false + additionalLabels: {} + ## The label to use to retrieve the job name from. + ## jobLabel: "app.kubernetes.io/name" + namespace: "" + namespaceSelector: {} + ## Default: scrape .Release.Namespace only + ## To scrape all, use the following: + ## namespaceSelector: + ## any: true + scrapeInterval: 30s + # honorLabels: true + targetLabels: [] + relabelings: [] + metricRelabelings: [] + + prometheusRule: + enabled: false + additionalLabels: {} + # namespace: "" + rules: [] + + + lifecycle: + preStop: + exec: + command: + - /wait-shutdown + + priorityClassName: "" + +# -- Rollback limit +## +revisionHistoryLimit: 10 + +## Default 404 backend +## +defaultBackend: + ## + enabled: false + + name: defaultbackend + image: + registry: k8s.gcr.io + image: defaultbackend-amd64 + tag: "1.5" + pullPolicy: IfNotPresent + # nobody user -> uid 65534 + runAsUser: 65534 + runAsNonRoot: true + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + + # -- Use an existing PSP instead of creating one + existingPsp: "" + + extraArgs: {} + + serviceAccount: + create: true + name: "" + automountServiceAccountToken: true + # -- Additional environment variables to set for defaultBackend pods + extraEnvs: [] + + port: 8080 + + livenessProbe: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + readinessProbe: + failureThreshold: 6 + initialDelaySeconds: 0 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 5 + + # -- Node tolerations for server scheduling to nodes with taints + ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ + ## + tolerations: [] + # - key: "key" + # operator: "Equal|Exists" + # value: "value" + # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)" + + affinity: {} + + # -- Security Context policies for controller pods + # See https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/ for + # notes on enabling and using sysctls + ## + podSecurityContext: {} + + # -- Security Context policies for controller main container. + # See https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/ for + # notes on enabling and using sysctls + ## + containerSecurityContext: {} + + # -- Labels to add to the pod container metadata + podLabels: {} + # key: value + + # -- Node labels for default backend pod assignment + ## Ref: https://kubernetes.io/docs/user-guide/node-selection/ + ## + nodeSelector: + kubernetes.io/os: linux + + # -- Annotations to be added to default backend pods + ## + podAnnotations: {} + + replicaCount: 1 + + minAvailable: 1 + + resources: {} + # limits: + # cpu: 10m + # memory: 20Mi + # requests: + # cpu: 10m + # memory: 20Mi + + extraVolumeMounts: [] + ## Additional volumeMounts to the default backend container. + # - name: copy-portal-skins + # mountPath: /var/lib/lemonldap-ng/portal/skins + + extraVolumes: [] + ## Additional volumes to the default backend pod. + # - name: copy-portal-skins + # emptyDir: {} + + autoscaling: + annotations: {} + enabled: false + minReplicas: 1 + maxReplicas: 2 + targetCPUUtilizationPercentage: 50 + targetMemoryUtilizationPercentage: 50 + + service: + annotations: {} + + # clusterIP: "" + + # -- List of IP addresses at which the default backend service is available + ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips + ## + externalIPs: [] + + # loadBalancerIP: "" + loadBalancerSourceRanges: [] + servicePort: 80 + type: ClusterIP + + priorityClassName: "" + # -- Labels to be added to the default backend resources + labels: {} + +rbac: + create: true + scope: false + +## If true, create & use Pod Security Policy resources +## https://kubernetes.io/docs/concepts/policy/pod-security-policy/ +podSecurityPolicy: + enabled: false + +serviceAccount: + create: true + name: "" + automountServiceAccountToken: true + # -- Annotations for the controller service account + annotations: {} + +imagePullSecrets: [] +# - name: secretName + +tcp: {} +# 8080: "default/example-tcp-svc:9000" + +udp: {} +# 53: "kube-system/kube-dns:53" + +dhParam: diff --git a/kubernetes/system/kube-prometheus-stack/kube-prometheus-stack.yml b/kubernetes/system/kube-prometheus-stack/kube-prometheus-stack.yml new file mode 100644 index 00000000..314285a6 --- /dev/null +++ b/kubernetes/system/kube-prometheus-stack/kube-prometheus-stack.yml @@ -0,0 +1,1253 @@ +# yaml-language-server: $schema=values.schema.json +# Default values for prometheus. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +rbac: + create: true + +podSecurityPolicy: + enabled: false + +imagePullSecrets: [] +# - name: "image-pull-secret" + +## Define serviceAccount names for components. Defaults to component's fully qualified name. +## +serviceAccounts: + server: + create: true + name: "" + annotations: {} + + ## Opt out of automounting Kubernetes API credentials. + ## It will be overriden by server.automountServiceAccountToken value, if set. + # automountServiceAccountToken: false + +## Additional labels to attach to all resources +commonMetaLabels: {} + +## Monitors ConfigMap changes and POSTs to a URL +## Ref: https://github.com/prometheus-operator/prometheus-operator/tree/main/cmd/prometheus-config-reloader +## +configmapReload: + ## URL for configmap-reload to use for reloads + ## + reloadUrl: "" + + ## env sets environment variables to pass to the container. Can be set as name/value pairs, + ## read from secrets or configmaps. + env: [] + # - name: SOMEVAR + # value: somevalue + # - name: PASSWORD + # valueFrom: + # secretKeyRef: + # name: mysecret + # key: password + # optional: false + + prometheus: + ## If false, the configmap-reload container will not be deployed + ## + enabled: true + + ## configmap-reload container name + ## + name: configmap-reload + + ## configmap-reload container image + ## + image: + repository: quay.io/prometheus-operator/prometheus-config-reloader + tag: v0.70.0 + # When digest is set to a non-empty value, images will be pulled by digest (regardless of tag value). + digest: "" + pullPolicy: IfNotPresent + + # containerPort: 9533 + + ## Additional configmap-reload container arguments + ## + extraArgs: {} + + ## Additional configmap-reload volume directories + ## + extraVolumeDirs: [] + + ## Additional configmap-reload volume mounts + ## + extraVolumeMounts: [] + + ## Additional configmap-reload mounts + ## + extraConfigmapMounts: [] + # - name: prometheus-alerts + # mountPath: /etc/alerts.d + # subPath: "" + # configMap: prometheus-alerts + # readOnly: true + + ## Security context to be added to configmap-reload container + containerSecurityContext: {} + + ## configmap-reload resource requests and limits + ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/ + ## + resources: {} + +server: + ## Prometheus server container name + ## + name: server + + ## Opt out of automounting Kubernetes API credentials. + ## If set it will override serviceAccounts.server.automountServiceAccountToken value for ServiceAccount. + # automountServiceAccountToken: false + + ## Use a ClusterRole (and ClusterRoleBinding) + ## - If set to false - we define a RoleBinding in the defined namespaces ONLY + ## + ## NB: because we need a Role with nonResourceURL's ("/metrics") - you must get someone with Cluster-admin privileges to define this role for you, before running with this setting enabled. + ## This makes prometheus work - for users who do not have ClusterAdmin privs, but wants prometheus to operate on their own namespaces, instead of clusterwide. + ## + ## You MUST also set namespaces to the ones you have access to and want monitored by Prometheus. + ## + # useExistingClusterRoleName: nameofclusterrole + + ## If set it will override prometheus.server.fullname value for ClusterRole and ClusterRoleBinding + ## + clusterRoleNameOverride: "" + + # Enable only the release namespace for monitoring. By default all namespaces are monitored. + # If releaseNamespace and namespaces are both set a merged list will be monitored. + releaseNamespace: false + + ## namespaces to monitor (instead of monitoring all - clusterwide). Needed if you want to run without Cluster-admin privileges. + # namespaces: + # - yournamespace + + # sidecarContainers - add more containers to prometheus server + # Key/Value where Key is the sidecar `- name: ` + # Example: + # sidecarContainers: + # webserver: + # image: nginx + # OR for adding OAuth authentication to Prometheus + # sidecarContainers: + # oauth-proxy: + # image: quay.io/oauth2-proxy/oauth2-proxy:v7.1.2 + # args: + # - --upstream=http://127.0.0.1:9090 + # - --http-address=0.0.0.0:8081 + # - ... + # ports: + # - containerPort: 8081 + # name: oauth-proxy + # protocol: TCP + # resources: {} + sidecarContainers: {} + + # sidecarTemplateValues - context to be used in template for sidecarContainers + # Example: + # sidecarTemplateValues: *your-custom-globals + # sidecarContainers: + # webserver: |- + # {{ include "webserver-container-template" . }} + # Template for `webserver-container-template` might looks like this: + # image: "{{ .Values.server.sidecarTemplateValues.repository }}:{{ .Values.server.sidecarTemplateValues.tag }}" + # ... + # + sidecarTemplateValues: {} + + ## Prometheus server container image + ## + image: + repository: quay.io/prometheus/prometheus + # if not set appVersion field from Chart.yaml is used + tag: "" + # When digest is set to a non-empty value, images will be pulled by digest (regardless of tag value). + digest: "" + pullPolicy: IfNotPresent + + ## Prometheus server command + ## + command: [] + + ## prometheus server priorityClassName + ## + priorityClassName: "" + + ## EnableServiceLinks indicates whether information about services should be injected + ## into pod's environment variables, matching the syntax of Docker links. + ## WARNING: the field is unsupported and will be skipped in K8s prior to v1.13.0. + ## + enableServiceLinks: true + + ## The URL prefix at which the container can be accessed. Useful in the case the '-web.external-url' includes a slug + ## so that the various internal URLs are still able to access as they are in the default case. + ## (Optional) + prefixURL: "" + + ## External URL which can access prometheus + ## Maybe same with Ingress host name + baseURL: "" + + ## Additional server container environment variables + ## + ## You specify this manually like you would a raw deployment manifest. + ## This means you can bind in environment variables from secrets. + ## + ## e.g. static environment variable: + ## - name: DEMO_GREETING + ## value: "Hello from the environment" + ## + ## e.g. secret environment variable: + ## - name: USERNAME + ## valueFrom: + ## secretKeyRef: + ## name: mysecret + ## key: username + env: [] + + # List of flags to override default parameters, e.g: + # - --enable-feature=agent + # - --storage.agent.retention.max-time=30m + # - --config.file=/etc/config/prometheus.yml + defaultFlagsOverride: [] + + extraFlags: + - web.enable-lifecycle + ## web.enable-admin-api flag controls access to the administrative HTTP API which includes functionality such as + ## deleting time series. This is disabled by default. + # - web.enable-admin-api + ## + ## storage.tsdb.no-lockfile flag controls BD locking + # - storage.tsdb.no-lockfile + ## + ## storage.tsdb.wal-compression flag enables compression of the write-ahead log (WAL) + # - storage.tsdb.wal-compression + + ## Path to a configuration file on prometheus server container FS + configPath: /etc/config/prometheus.yml + + ### The data directory used by prometheus to set --storage.tsdb.path + ### When empty server.persistentVolume.mountPath is used instead + storagePath: "" + + global: + ## How frequently to scrape targets by default + ## + scrape_interval: 1m + ## How long until a scrape request times out + ## + scrape_timeout: 10s + ## How frequently to evaluate rules + ## + evaluation_interval: 1m + ## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write + ## + remoteWrite: [] + ## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_read + ## + remoteRead: [] + + ## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#tsdb + ## + tsdb: {} + # out_of_order_time_window: 0s + + ## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#exemplars + ## Must be enabled via --enable-feature=exemplar-storage + ## + exemplars: {} + # max_exemplars: 100000 + + ## Custom HTTP headers for Liveness/Readiness/Startup Probe + ## + ## Useful for providing HTTP Basic Auth to healthchecks + probeHeaders: [] + # - name: "Authorization" + # value: "Bearer ABCDEabcde12345" + + ## Additional Prometheus server container arguments + ## + extraArgs: {} + + ## Additional InitContainers to initialize the pod + ## + extraInitContainers: [] + + ## Additional Prometheus server Volume mounts + ## + extraVolumeMounts: [] + + ## Additional Prometheus server Volumes + ## + extraVolumes: [] + + ## Additional Prometheus server hostPath mounts + ## + extraHostPathMounts: [] + # - name: certs-dir + # mountPath: /etc/kubernetes/certs + # subPath: "" + # hostPath: /etc/kubernetes/certs + # readOnly: true + + extraConfigmapMounts: [] + # - name: certs-configmap + # mountPath: /prometheus + # subPath: "" + # configMap: certs-configmap + # readOnly: true + + ## Additional Prometheus server Secret mounts + # Defines additional mounts with secrets. Secrets must be manually created in the namespace. + extraSecretMounts: [] + # - name: secret-files + # mountPath: /etc/secrets + # subPath: "" + # secretName: prom-secret-files + # readOnly: true + + ## ConfigMap override where fullname is {{.Release.Name}}-{{.Values.server.configMapOverrideName}} + ## Defining configMapOverrideName will cause templates/server-configmap.yaml + ## to NOT generate a ConfigMap resource + ## + configMapOverrideName: "" + + ## Extra labels for Prometheus server ConfigMap (ConfigMap that holds serverFiles) + extraConfigmapLabels: {} + + ingress: + ## If true, Prometheus server Ingress will be created + ## + enabled: false + + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + + ## Prometheus server Ingress annotations + ## + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: 'true' + + ## Prometheus server Ingress additional labels + ## + extraLabels: {} + + ## Redirect ingress to an additional defined port on the service + # servicePort: 8081 + + ## Prometheus server Ingress hostnames with optional path + ## Must be provided if Ingress is enabled + ## + hosts: [] + # - prometheus.domain.com + # - domain.com/prometheus + + path: / + + # pathType is only for k8s >= 1.18 + pathType: Prefix + + ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # backend: + # serviceName: ssl-redirect + # servicePort: use-annotation + + ## Prometheus server Ingress TLS configuration + ## Secrets must be manually created in the namespace + ## + tls: [] + # - secretName: prometheus-server-tls + # hosts: + # - prometheus.domain.com + + ## Server Deployment Strategy type + strategy: + type: Recreate + + ## hostAliases allows adding entries to /etc/hosts inside the containers + hostAliases: [] + # - ip: "127.0.0.1" + # hostnames: + # - "example.com" + + ## Node tolerations for server scheduling to nodes with taints + ## Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ + ## + tolerations: [] + # - key: "key" + # operator: "Equal|Exists" + # value: "value" + # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)" + + ## Node labels for Prometheus server pod assignment + ## Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/ + ## + nodeSelector: {} + + ## Pod affinity + ## + affinity: {} + + ## Pod topology spread constraints + ## ref. https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + topologySpreadConstraints: [] + + ## PodDisruptionBudget settings + ## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/ + ## + podDisruptionBudget: + enabled: false + maxUnavailable: 1 + # minAvailable: 1 + ## unhealthyPodEvictionPolicy is available since 1.27.0 (beta) + ## https://kubernetes.io/docs/tasks/run-application/configure-pdb/#unhealthy-pod-eviction-policy + # unhealthyPodEvictionPolicy: IfHealthyBudget + + ## Use an alternate scheduler, e.g. "stork". + ## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/ + ## + # schedulerName: + + persistentVolume: + ## If true, Prometheus server will create/use a Persistent Volume Claim + ## If false, use emptyDir + ## + enabled: true + + ## If set it will override the name of the created persistent volume claim + ## generated by the stateful set. + ## + statefulSetNameOverride: "" + + ## Prometheus server data Persistent Volume access modes + ## Must match those of existing PV or dynamic provisioner + ## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ + ## + accessModes: + - ReadWriteOnce + + ## Prometheus server data Persistent Volume labels + ## + labels: {} + + ## Prometheus server data Persistent Volume annotations + ## + annotations: {} + + ## Prometheus server data Persistent Volume existing claim name + ## Requires server.persistentVolume.enabled: true + ## If defined, PVC must be created manually before volume will be bound + existingClaim: "" + + ## Prometheus server data Persistent Volume mount root path + ## + mountPath: /data + + ## Prometheus server data Persistent Volume size + ## + size: 8Gi + + ## Prometheus server data Persistent Volume Storage Class + ## If defined, storageClassName: + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. (gp2 on AWS, standard on + ## GKE, AWS & OpenStack) + ## + # storageClass: "-" + + ## Prometheus server data Persistent Volume Binding Mode + ## If defined, volumeBindingMode: + ## If undefined (the default) or set to null, no volumeBindingMode spec is + ## set, choosing the default mode. + ## + # volumeBindingMode: "" + + ## Subdirectory of Prometheus server data Persistent Volume to mount + ## Useful if the volume's root directory is not empty + ## + subPath: "" + + ## Persistent Volume Claim Selector + ## Useful if Persistent Volumes have been provisioned in advance + ## Ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/#selector + ## + # selector: + # matchLabels: + # release: "stable" + # matchExpressions: + # - { key: environment, operator: In, values: [ dev ] } + + ## Persistent Volume Name + ## Useful if Persistent Volumes have been provisioned in advance and you want to use a specific one + ## + # volumeName: "" + + emptyDir: + ## Prometheus server emptyDir volume size limit + ## + sizeLimit: "" + + ## Annotations to be added to Prometheus server pods + ## + podAnnotations: {} + # iam.amazonaws.com/role: prometheus + + ## Labels to be added to Prometheus server pods + ## + podLabels: {} + + ## Prometheus AlertManager configuration + ## + alertmanagers: [] + + ## Specify if a Pod Security Policy for node-exporter must be created + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/ + ## + podSecurityPolicy: + annotations: {} + ## Specify pod annotations + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#apparmor + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#seccomp + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#sysctl + ## + # seccomp.security.alpha.kubernetes.io/allowedProfileNames: '*' + # seccomp.security.alpha.kubernetes.io/defaultProfileName: 'docker/default' + # apparmor.security.beta.kubernetes.io/defaultProfileName: 'runtime/default' + + ## Use a StatefulSet if replicaCount needs to be greater than 1 (see below) + ## + replicaCount: 1 + + ## Number of old history to retain to allow rollback + ## Default Kubernetes value is set to 10 + ## + revisionHistoryLimit: 10 + + ## Annotations to be added to deployment + ## + deploymentAnnotations: {} + + statefulSet: + ## If true, use a statefulset instead of a deployment for pod management. + ## This allows to scale replicas to more than 1 pod + ## + enabled: true + + annotations: {} + labels: {} + podManagementPolicy: OrderedReady + + ## Alertmanager headless service to use for the statefulset + ## + headless: + annotations: {} + labels: {} + servicePort: 80 + ## Enable gRPC port on service to allow auto discovery with thanos-querier + gRPC: + enabled: false + servicePort: 10901 + # nodePort: 10901 + + ## Statefulset's persistent volume claim retention policy + ## pvcDeleteOnStsDelete and pvcDeleteOnStsScale determine whether + ## statefulset's PVCs are deleted (true) or retained (false) on scaling down + ## and deleting statefulset, respectively. Requires 1.27.0+. + ## Ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention + ## + pvcDeleteOnStsDelete: false + pvcDeleteOnStsScale: false + + ## Prometheus server readiness and liveness probe initial delay and timeout + ## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/ + ## + tcpSocketProbeEnabled: false + probeScheme: HTTP + readinessProbeInitialDelay: 30 + readinessProbePeriodSeconds: 5 + readinessProbeTimeout: 4 + readinessProbeFailureThreshold: 3 + readinessProbeSuccessThreshold: 1 + livenessProbeInitialDelay: 30 + livenessProbePeriodSeconds: 15 + livenessProbeTimeout: 10 + livenessProbeFailureThreshold: 3 + livenessProbeSuccessThreshold: 1 + startupProbe: + enabled: false + periodSeconds: 5 + failureThreshold: 30 + timeoutSeconds: 10 + + ## Prometheus server resource requests and limits + ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/ + ## + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 500m + memory: 512Mi + + # Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico), + # because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working + ## + hostNetwork: false + + # When hostNetwork is enabled, this will set to ClusterFirstWithHostNet automatically + dnsPolicy: ClusterFirst + + # Use hostPort + # hostPort: 9090 + + # Use portName + portName: "" + + ## Vertical Pod Autoscaler config + ## Ref: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler + verticalAutoscaler: + ## If true a VPA object will be created for the controller (either StatefulSet or Deployemnt, based on above configs) + enabled: false + # updateMode: "Auto" + # containerPolicies: + # - containerName: 'prometheus-server' + + # Custom DNS configuration to be added to prometheus server pods + dnsConfig: {} + # nameservers: + # - 1.2.3.4 + # searches: + # - ns1.svc.cluster-domain.example + # - my.dns.search.suffix + # options: + # - name: ndots + # value: "2" + # - name: edns0 + + ## Security context to be added to server pods + ## + securityContext: + runAsUser: 65534 + runAsNonRoot: true + runAsGroup: 65534 + fsGroup: 65534 + + ## Security context to be added to server container + ## + containerSecurityContext: {} + + service: + ## If false, no Service will be created for the Prometheus server + ## + enabled: true + + annotations: {} + labels: {} + clusterIP: None + + ## List of IP addresses at which the Prometheus server service is available + ## Ref: https://kubernetes.io/docs/concepts/services-networking/service/#external-ips + ## + externalIPs: [] + + loadBalancerIP: "" + loadBalancerSourceRanges: [] + servicePort: 8080 + sessionAffinity: None + type: ClusterIP + + ## Enable gRPC port on service to allow auto discovery with thanos-querier + gRPC: + enabled: false + servicePort: 10901 + # nodePort: 10901 + + ## If using a statefulSet (statefulSet.enabled=true), configure the + ## service to connect to a specific replica to have a consistent view + ## of the data. + statefulsetReplica: + enabled: false + replica: 0 + + ## Additional port to define in the Service + additionalPorts: [] + # additionalPorts: + # - name: authenticated + # port: 8081 + # targetPort: 8081 + + ## Prometheus server pod termination grace period + ## + terminationGracePeriodSeconds: 300 + + ## Prometheus data retention period (default if not specified is 15 days) + ## + retention: "15d" + + ## Prometheus' data retention size. Supported units: B, KB, MB, GB, TB, PB, EB. + ## + retentionSize: "" + +## Prometheus server ConfigMap entries for rule files (allow prometheus labels interpolation) +ruleFiles: {} + +## Prometheus server ConfigMap entries for scrape_config_files +## (allows scrape configs defined in additional files) +## +scrapeConfigFiles: [] + +## Prometheus server ConfigMap entries +## +serverFiles: + ## Alerts configuration + ## Ref: https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/ + alerting_rules.yml: {} + # groups: + # - name: Instances + # rules: + # - alert: InstanceDown + # expr: up == 0 + # for: 5m + # labels: + # severity: page + # annotations: + # description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.' + # summary: 'Instance {{ $labels.instance }} down' + ## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use alerting_rules.yml + alerts: {} + + ## Records configuration + ## Ref: https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/ + recording_rules.yml: {} + ## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use recording_rules.yml + rules: {} + + prometheus.yml: + rule_files: + - /etc/config/recording_rules.yml + - /etc/config/alerting_rules.yml + ## Below two files are DEPRECATED will be removed from this default values file + - /etc/config/rules + - /etc/config/alerts + + scrape_configs: + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + + # A scrape configuration for running Prometheus on a Kubernetes cluster. + # This uses separate scrape configs for cluster components (i.e. API server, node) + # and services to allow each to use different authentication configs. + # + # Kubernetes labels will be added as Prometheus labels on metrics via the + # `labelmap` relabeling action. + + # Scrape config for API servers. + # + # Kubernetes exposes API servers as endpoints to the default/kubernetes + # service so this uses `endpoints` role and uses relabelling to only keep + # the endpoints associated with the default/kubernetes service using the + # default named port `https`. This works for single API server deployments as + # well as HA API server deployments. + - job_name: 'kubernetes-apiservers' + + kubernetes_sd_configs: + - role: endpoints + + # Default to scraping over https. If required, just disable this or change to + # `http`. + scheme: https + + # This TLS & bearer token file config is used to connect to the actual scrape + # endpoints for cluster components. This is separate to discovery auth + # configuration because discovery & scraping are two separate concerns in + # Prometheus. The discovery auth config is automatic if Prometheus runs inside + # the cluster. Otherwise, more config options have to be provided within the + # . + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + # If your node certificates are self-signed or use a different CA to the + # master CA, then disable certificate verification below. Note that + # certificate verification is an integral part of a secure infrastructure + # so this should only be disabled in a controlled environment. You can + # disable certificate verification by uncommenting the line below. + # + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + # Keep only the default/kubernetes service endpoints for the https port. This + # will add targets for each API server which Kubernetes adds an endpoint to + # the default/kubernetes service. + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: default;kubernetes;https + + - job_name: 'kubernetes-nodes' + + # Default to scraping over https. If required, just disable this or change to + # `http`. + scheme: https + + # This TLS & bearer token file config is used to connect to the actual scrape + # endpoints for cluster components. This is separate to discovery auth + # configuration because discovery & scraping are two separate concerns in + # Prometheus. The discovery auth config is automatic if Prometheus runs inside + # the cluster. Otherwise, more config options have to be provided within the + # . + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + # If your node certificates are self-signed or use a different CA to the + # master CA, then disable certificate verification below. Note that + # certificate verification is an integral part of a secure infrastructure + # so this should only be disabled in a controlled environment. You can + # disable certificate verification by uncommenting the line below. + # + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/$1/proxy/metrics + + + - job_name: 'kubernetes-nodes-cadvisor' + + # Default to scraping over https. If required, just disable this or change to + # `http`. + scheme: https + + # This TLS & bearer token file config is used to connect to the actual scrape + # endpoints for cluster components. This is separate to discovery auth + # configuration because discovery & scraping are two separate concerns in + # Prometheus. The discovery auth config is automatic if Prometheus runs inside + # the cluster. Otherwise, more config options have to be provided within the + # . + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + # If your node certificates are self-signed or use a different CA to the + # master CA, then disable certificate verification below. Note that + # certificate verification is an integral part of a secure infrastructure + # so this should only be disabled in a controlled environment. You can + # disable certificate verification by uncommenting the line below. + # + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + + # This configuration will work only on kubelet 1.7.3+ + # As the scrape endpoints for cAdvisor have changed + # if you are using older version you need to change the replacement to + # replacement: /api/v1/nodes/$1:4194/proxy/metrics + # more info here https://github.com/coreos/prometheus-operator/issues/633 + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor + + # Metric relabel configs to apply to samples before ingestion. + # [Metric Relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs) + # metric_relabel_configs: + # - action: labeldrop + # regex: (kubernetes_io_hostname|failure_domain_beta_kubernetes_io_region|beta_kubernetes_io_os|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|failure_domain_beta_kubernetes_io_zone) + + # Scrape config for service endpoints. + # + # The relabeling allows the actual service scrape endpoint to be configured + # via the following annotations: + # + # * `prometheus.io/scrape`: Only scrape services that have a value of + # `true`, except if `prometheus.io/scrape-slow` is set to `true` as well. + # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need + # to set this to `https` & most likely set the `tls_config` of the scrape config. + # * `prometheus.io/path`: If the metrics path is not `/metrics` override this. + # * `prometheus.io/port`: If the metrics are exposed on a different port to the + # service then set this appropriately. + # * `prometheus.io/param_`: If the metrics endpoint uses parameters + # then you can set any parameter + - job_name: 'kubernetes-service-endpoints' + honor_labels: true + + kubernetes_sd_configs: + - role: endpoints + + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow] + action: drop + regex: true + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + action: replace + target_label: __address__ + regex: (.+?)(?::\d+)?;(\d+) + replacement: $1:$2 + - action: labelmap + regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+) + replacement: __param_$1 + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: namespace + - source_labels: [__meta_kubernetes_service_name] + action: replace + target_label: service + - source_labels: [__meta_kubernetes_pod_node_name] + action: replace + target_label: node + + # Scrape config for slow service endpoints; same as above, but with a larger + # timeout and a larger interval + # + # The relabeling allows the actual service scrape endpoint to be configured + # via the following annotations: + # + # * `prometheus.io/scrape-slow`: Only scrape services that have a value of `true` + # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need + # to set this to `https` & most likely set the `tls_config` of the scrape config. + # * `prometheus.io/path`: If the metrics path is not `/metrics` override this. + # * `prometheus.io/port`: If the metrics are exposed on a different port to the + # service then set this appropriately. + # * `prometheus.io/param_`: If the metrics endpoint uses parameters + # then you can set any parameter + - job_name: 'kubernetes-service-endpoints-slow' + honor_labels: true + + scrape_interval: 5m + scrape_timeout: 30s + + kubernetes_sd_configs: + - role: endpoints + + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow] + action: keep + regex: true + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + action: replace + target_label: __address__ + regex: (.+?)(?::\d+)?;(\d+) + replacement: $1:$2 + - action: labelmap + regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+) + replacement: __param_$1 + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: namespace + - source_labels: [__meta_kubernetes_service_name] + action: replace + target_label: service + - source_labels: [__meta_kubernetes_pod_node_name] + action: replace + target_label: node + + - job_name: 'prometheus-pushgateway' + honor_labels: true + + kubernetes_sd_configs: + - role: service + + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] + action: keep + regex: pushgateway + + # Example scrape config for probing services via the Blackbox Exporter. + # + # The relabeling allows the actual service scrape endpoint to be configured + # via the following annotations: + # + # * `prometheus.io/probe`: Only probe services that have a value of `true` + - job_name: 'kubernetes-services' + honor_labels: true + + metrics_path: /probe + params: + module: [http_2xx] + + kubernetes_sd_configs: + - role: service + + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] + action: keep + regex: true + - source_labels: [__address__] + target_label: __param_target + - target_label: __address__ + replacement: blackbox + - source_labels: [__param_target] + target_label: instance + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + - source_labels: [__meta_kubernetes_service_name] + target_label: service + + # Example scrape config for pods + # + # The relabeling allows the actual pod scrape endpoint to be configured via the + # following annotations: + # + # * `prometheus.io/scrape`: Only scrape pods that have a value of `true`, + # except if `prometheus.io/scrape-slow` is set to `true` as well. + # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need + # to set this to `https` & most likely set the `tls_config` of the scrape config. + # * `prometheus.io/path`: If the metrics path is not `/metrics` override this. + # * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`. + - job_name: 'kubernetes-pods' + honor_labels: true + + kubernetes_sd_configs: + - role: pod + + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow] + action: drop + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme] + action: replace + regex: (https?) + target_label: __scheme__ + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip] + action: replace + regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4}) + replacement: '[$2]:$1' + target_label: __address__ + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip] + action: replace + regex: (\d+);((([0-9]+?)(\.|$)){4}) + replacement: $2:$1 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+) + replacement: __param_$1 + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: pod + - source_labels: [__meta_kubernetes_pod_phase] + regex: Pending|Succeeded|Failed|Completed + action: drop + - source_labels: [__meta_kubernetes_pod_node_name] + action: replace + target_label: node + + # Example Scrape config for pods which should be scraped slower. An useful example + # would be stackriver-exporter which queries an API on every scrape of the pod + # + # The relabeling allows the actual pod scrape endpoint to be configured via the + # following annotations: + # + # * `prometheus.io/scrape-slow`: Only scrape pods that have a value of `true` + # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need + # to set this to `https` & most likely set the `tls_config` of the scrape config. + # * `prometheus.io/path`: If the metrics path is not `/metrics` override this. + # * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`. + - job_name: 'kubernetes-pods-slow' + honor_labels: true + + scrape_interval: 5m + scrape_timeout: 30s + + kubernetes_sd_configs: + - role: pod + + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme] + action: replace + regex: (https?) + target_label: __scheme__ + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip] + action: replace + regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4}) + replacement: '[$2]:$1' + target_label: __address__ + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip] + action: replace + regex: (\d+);((([0-9]+?)(\.|$)){4}) + replacement: $2:$1 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+) + replacement: __param_$1 + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: pod + - source_labels: [__meta_kubernetes_pod_phase] + regex: Pending|Succeeded|Failed|Completed + action: drop + - source_labels: [__meta_kubernetes_pod_node_name] + action: replace + target_label: node + +# adds additional scrape configs to prometheus.yml +# must be a string so you have to add a | after extraScrapeConfigs: +# example adds prometheus-blackbox-exporter scrape config +extraScrapeConfigs: "" + # - job_name: 'prometheus-blackbox-exporter' + # metrics_path: /probe + # params: + # module: [http_2xx] + # static_configs: + # - targets: + # - https://example.com + # relabel_configs: + # - source_labels: [__address__] + # target_label: __param_target + # - source_labels: [__param_target] + # target_label: instance + # - target_label: __address__ + # replacement: prometheus-blackbox-exporter:9115 + +# Adds option to add alert_relabel_configs to avoid duplicate alerts in alertmanager +# useful in H/A prometheus with different external labels but the same alerts +alertRelabelConfigs: {} + # alert_relabel_configs: + # - source_labels: [dc] + # regex: (.+)\d+ + # target_label: dc + +networkPolicy: + ## Enable creation of NetworkPolicy resources. + ## + enabled: false + +# Force namespace of namespaced resources +forceNamespace: "" + +# Extra manifests to deploy as an array +extraManifests: [] + # - | + # apiVersion: v1 + # kind: ConfigMap + # metadata: + # labels: + # name: prometheus-extra + # data: + # extra-data: "value" + +# Configuration of subcharts defined in Chart.yaml + +## alertmanager sub-chart configurable values +## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/alertmanager +## +alertmanager: + ## If false, alertmanager will not be installed + ## + enabled: true + + persistence: + size: 2Gi + + podSecurityContext: + runAsUser: 65534 + runAsNonRoot: true + runAsGroup: 65534 + fsGroup: 65534 + +## kube-state-metrics sub-chart configurable values +## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics +## +kube-state-metrics: + ## If false, kube-state-metrics sub-chart will not be installed + ## + enabled: true + +## prometheus-node-exporter sub-chart configurable values +## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter +## +prometheus-node-exporter: + ## If false, node-exporter will not be installed + ## + enabled: true + + rbac: + pspEnabled: false + + containerSecurityContext: + allowPrivilegeEscalation: false + +## prometheus-pushgateway sub-chart configurable values +## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-pushgateway +## +prometheus-pushgateway: + ## If false, pushgateway will not be installed + ## + enabled: true + + # Optional service annotations + serviceAnnotations: + prometheus.io/probe: pushgateway diff --git a/kubernetes/system/kube-prometheus-stack/namespace.yml b/kubernetes/system/kube-prometheus-stack/namespace.yml new file mode 100644 index 00000000..90d12efd --- /dev/null +++ b/kubernetes/system/kube-prometheus-stack/namespace.yml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: monitoring + labels: + name: monitoring diff --git a/kubernetes/system/vault/namespace.yml b/kubernetes/system/vault/namespace.yml new file mode 100644 index 00000000..beb4bbe8 --- /dev/null +++ b/kubernetes/system/vault/namespace.yml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: vault + labels: + name: vault diff --git a/kubernetes/system/vault/vault.yml b/kubernetes/system/vault/vault.yml new file mode 100644 index 00000000..6f53a230 --- /dev/null +++ b/kubernetes/system/vault/vault.yml @@ -0,0 +1,1204 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: MPL-2.0 + +# Available parameters and their default values for the Vault chart. + +global: + # enabled is the master enabled switch. Setting this to true or false + # will enable or disable all the components within this chart by default. + enabled: true + + # The namespace to deploy to. Defaults to the `helm` installation namespace. + namespace: "vault" + + # Image pull secret to use for registry authentication. + # Alternatively, the value may be specified as an array of strings. + imagePullSecrets: [] + # imagePullSecrets: + # - name: image-pull-secret + + # TLS for end-to-end encrypted transport + tlsDisable: false + + # External vault server address for the injector and CSI provider to use. + # Setting this will disable deployment of a vault server. + externalVaultAddr: "" + + # If deploying to OpenShift + openshift: false + + # Create PodSecurityPolicy for pods + psp: + enable: false + # Annotation for PodSecurityPolicy. + # This is a multi-line templated string map, and can also be set as YAML. + annotations: | + seccomp.security.alpha.kubernetes.io/allowedProfileNames: docker/default,runtime/default + apparmor.security.beta.kubernetes.io/allowedProfileNames: runtime/default + seccomp.security.alpha.kubernetes.io/defaultProfileName: runtime/default + apparmor.security.beta.kubernetes.io/defaultProfileName: runtime/default + + serverTelemetry: + # Enable integration with the Prometheus Operator + # See the top level serverTelemetry section below before enabling this feature. + prometheusOperator: false + +injector: + # True if you want to enable vault agent injection. + # @default: global.enabled + enabled: true + + replicas: 1 + + # Configures the port the injector should listen on + port: 8080 + + # If multiple replicas are specified, by default a leader will be determined + # so that only one injector attempts to create TLS certificates. + leaderElector: + enabled: true + + # If true, will enable a node exporter metrics endpoint at /metrics. + metrics: + enabled: true + + # Deprecated: Please use global.externalVaultAddr instead. + externalVaultAddr: "" + + # image sets the repo and tag of the vault-k8s image to use for the injector. + image: + repository: "hashicorp/vault-k8s" + tag: "1.3.1" + pullPolicy: IfNotPresent + + # agentImage sets the repo and tag of the Vault image to use for the Vault Agent + # containers. This should be set to the official Vault image. Vault 1.3.1+ is + # required. + agentImage: + repository: "hashicorp/vault" + tag: "1.15.2" + + # The default values for the injected Vault Agent containers. + agentDefaults: + # For more information on configuring resources, see the K8s documentation: + # https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + cpuLimit: "500m" + cpuRequest: "250m" + memLimit: "128Mi" + memRequest: "64Mi" + # ephemeralLimit: "128Mi" + # ephemeralRequest: "64Mi" + + # Default template type for secrets when no custom template is specified. + # Possible values include: "json" and "map". + template: "map" + + # Default values within Agent's template_config stanza. + templateConfig: + exitOnRetryFailure: true + staticSecretRenderInterval: "" + + # Used to define custom livenessProbe settings + livenessProbe: + failureThreshold: 2 + initialDelaySeconds: 5 + periodSeconds: 2 + successThreshold: 1 + timeoutSeconds: 5 + + # Used to define custom readinessProbe settings + readinessProbe: + failureThreshold: 2 + initialDelaySeconds: 5 + periodSeconds: 2 + successThreshold: 1 + timeoutSeconds: 5 + + # Used to define custom startupProbe settings + startupProbe: + failureThreshold: 12 + initialDelaySeconds: 5 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 5 + + # Mount Path of the Vault Kubernetes Auth Method. + authPath: "auth/kubernetes" + + # Configures the log verbosity of the injector. + # Supported log levels include: trace, debug, info, warn, error + logLevel: "info" + + # Configures the log format of the injector. Supported log formats: "standard", "json". + logFormat: "standard" + + # Configures all Vault Agent sidecars to revoke their token when shutting down + revokeOnShutdown: false + + webhook: + # Configures failurePolicy of the webhook. The "unspecified" default behaviour depends on the + # API Version of the WebHook. + # To block pod creation while the webhook is unavailable, set the policy to `Fail` below. + # See https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#failure-policy + # + failurePolicy: Ignore + + # matchPolicy specifies the approach to accepting changes based on the rules of + # the MutatingWebhookConfiguration. + # See https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-matchpolicy + # for more details. + # + matchPolicy: Exact + + # timeoutSeconds is the amount of seconds before the webhook request will be ignored + # or fails. + # If it is ignored or fails depends on the failurePolicy + # See https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#timeouts + # for more details. + # + timeoutSeconds: 30 + + # namespaceSelector is the selector for restricting the webhook to only + # specific namespaces. + # See https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-namespaceselector + # for more details. + # Example: + # namespaceSelector: + # matchLabels: + # sidecar-injector: enabled + namespaceSelector: {} + + # objectSelector is the selector for restricting the webhook to only + # specific labels. + # See https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-objectselector + # for more details. + # Example: + # objectSelector: + # matchLabels: + # vault-sidecar-injector: enabled + objectSelector: | + matchExpressions: + - key: app.kubernetes.io/name + operator: NotIn + values: + - {{ template "vault.name" . }}-agent-injector + + # Extra annotations to attach to the webhook + annotations: {} + + # Deprecated: please use 'webhook.failurePolicy' instead + # Configures failurePolicy of the webhook. The "unspecified" default behaviour depends on the + # API Version of the WebHook. + # To block pod creation while webhook is unavailable, set the policy to `Fail` below. + # See https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#failure-policy + # + failurePolicy: Ignore + + # Deprecated: please use 'webhook.namespaceSelector' instead + # namespaceSelector is the selector for restricting the webhook to only + # specific namespaces. + # See https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-namespaceselector + # for more details. + # Example: + # namespaceSelector: + # matchLabels: + # sidecar-injector: enabled + namespaceSelector: {} + + # Deprecated: please use 'webhook.objectSelector' instead + # objectSelector is the selector for restricting the webhook to only + # specific labels. + # See https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-objectselector + # for more details. + # Example: + # objectSelector: + # matchLabels: + # vault-sidecar-injector: enabled + objectSelector: {} + + # Deprecated: please use 'webhook.annotations' instead + # Extra annotations to attach to the webhook + webhookAnnotations: {} + + certs: + # secretName is the name of the secret that has the TLS certificate and + # private key to serve the injector webhook. If this is null, then the + # injector will default to its automatic management mode that will assign + # a service account to the injector to generate its own certificates. + secretName: null + + # caBundle is a base64-encoded PEM-encoded certificate bundle for the CA + # that signed the TLS certificate that the webhook serves. This must be set + # if secretName is non-null unless an external service like cert-manager is + # keeping the caBundle updated. + caBundle: "" + + # certName and keyName are the names of the files within the secret for + # the TLS cert and private key, respectively. These have reasonable + # defaults but can be customized if necessary. + certName: tls.crt + keyName: tls.key + + # Security context for the pod template and the injector container + # The default pod securityContext is: + # runAsNonRoot: true + # runAsGroup: {{ .Values.injector.gid | default 1000 }} + # runAsUser: {{ .Values.injector.uid | default 100 }} + # fsGroup: {{ .Values.injector.gid | default 1000 }} + # and for container is + # allowPrivilegeEscalation: false + # capabilities: + # drop: + # - ALL + securityContext: + pod: {} + container: {} + + resources: {} + # resources: + # requests: + # memory: 256Mi + # cpu: 250m + # limits: + # memory: 256Mi + # cpu: 250m + + # extraEnvironmentVars is a list of extra environment variables to set in the + # injector deployment. + extraEnvironmentVars: + VAULT_ADDR: https://vault.vault:8200 + + # Affinity Settings for injector pods + # This can either be a multi-line string or YAML matching the PodSpec's affinity field. + # Commenting out or setting as empty the affinity variable, will allow + # deployment of multiple replicas to single node services such as Minikube. + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/name: {{ template "vault.name" . }}-agent-injector + app.kubernetes.io/instance: "{{ .Release.Name }}" + component: webhook + topologyKey: kubernetes.io/hostname + + # Topology settings for injector pods + # ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/ + # This should be either a multi-line string or YAML matching the topologySpreadConstraints array + # in a PodSpec. + topologySpreadConstraints: [] + + # Toleration Settings for injector pods + # This should be either a multi-line string or YAML matching the Toleration array + # in a PodSpec. + tolerations: [] + + # nodeSelector labels for server pod assignment, formatted as a multi-line string or YAML map. + # ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector + # Example: + # nodeSelector: + # beta.kubernetes.io/arch: amd64 + nodeSelector: {} + + # Priority class for injector pods + priorityClassName: "" + + # Extra annotations to attach to the injector pods + # This can either be YAML or a YAML-formatted multi-line templated string map + # of the annotations to apply to the injector pods + annotations: {} + + # Extra labels to attach to the agent-injector + # This should be a YAML map of the labels to apply to the injector + extraLabels: {} + + # Should the injector pods run on the host network (useful when using + # an alternate CNI in EKS) + hostNetwork: false + + # Injector service specific config + service: + # Extra annotations to attach to the injector service + annotations: {} + + # Injector serviceAccount specific config + serviceAccount: + # Extra annotations to attach to the injector serviceAccount + annotations: {} + + # A disruption budget limits the number of pods of a replicated application + # that are down simultaneously from voluntary disruptions + podDisruptionBudget: {} + # podDisruptionBudget: + # maxUnavailable: 1 + + # strategy for updating the deployment. This can be a multi-line string or a + # YAML map. + strategy: {} + # strategy: | + # rollingUpdate: + # maxSurge: 25% + # maxUnavailable: 25% + # type: RollingUpdate + +server: + # If true, or "-" with global.enabled true, Vault server will be installed. + # See vault.mode in _helpers.tpl for implementation details. + enabled: "-" + + # [Enterprise Only] This value refers to a Kubernetes secret that you have + # created that contains your enterprise license. If you are not using an + # enterprise image or if you plan to introduce the license key via another + # route, then leave secretName blank ("") or set it to null. + # Requires Vault Enterprise 1.8 or later. + enterpriseLicense: + # The name of the Kubernetes secret that holds the enterprise license. The + # secret must be in the same namespace that Vault is installed into. + secretName: "" + # The key within the Kubernetes secret that holds the enterprise license. + secretKey: "license" + + # Resource requests, limits, etc. for the server cluster placement. This + # should map directly to the value of the resources field for a PodSpec. + # By default no direct resource request is made. + + image: + repository: "hashicorp/vault" + tag: "1.15.2" + # Overrides the default Image Pull Policy + pullPolicy: IfNotPresent + + # Configure the Update Strategy Type for the StatefulSet + # See https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#update-strategies + updateStrategyType: "OnDelete" + + # Configure the logging verbosity for the Vault server. + # Supported log levels include: trace, debug, info, warn, error + logLevel: "" + + # Configure the logging format for the Vault server. + # Supported log formats include: standard, json + logFormat: "" + + resources: {} + # resources: + # requests: + # memory: 256Mi + # cpu: 250m + # limits: + # memory: 256Mi + # cpu: 250m + + # Ingress allows ingress services to be created to allow external access + # from Kubernetes to access Vault pods. + # If deployment is on OpenShift, the following block is ignored. + # In order to expose the service, use the route section below + ingress: + enabled: false + labels: {} + # traffic: external + annotations: {} + # | + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + # or + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + + # Optionally use ingressClassName instead of deprecated annotation. + # See: https://kubernetes.io/docs/concepts/services-networking/ingress/#deprecated-annotation + ingressClassName: "" + + # As of Kubernetes 1.19, all Ingress Paths must have a pathType configured. The default value below should be sufficient in most cases. + # See: https://kubernetes.io/docs/concepts/services-networking/ingress/#path-types for other possible values. + pathType: Prefix + + # When HA mode is enabled and K8s service registration is being used, + # configure the ingress to point to the Vault active service. + activeService: true + hosts: + - host: chart-example.local + paths: [] + ## Extra paths to prepend to the host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # backend: + # service: + # name: ssl-redirect + # port: + # number: use-annotation + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + + # hostAliases is a list of aliases to be added to /etc/hosts. Specified as a YAML list. + hostAliases: [] + # - ip: 127.0.0.1 + # hostnames: + # - chart-example.local + + # OpenShift only - create a route to expose the service + # By default the created route will be of type passthrough + route: + enabled: false + + # When HA mode is enabled and K8s service registration is being used, + # configure the route to point to the Vault active service. + activeService: true + + labels: {} + annotations: {} + host: chart-example.local + # tls will be passed directly to the route's TLS config, which + # can be used to configure other termination methods that terminate + # TLS at the router + tls: + termination: passthrough + + # authDelegator enables a cluster role binding to be attached to the service + # account. This cluster role binding can be used to setup Kubernetes auth + # method. See https://developer.hashicorp.com/vault/docs/auth/kubernetes + authDelegator: + enabled: true + + # extraInitContainers is a list of init containers. Specified as a YAML list. + # This is useful if you need to run a script to provision TLS certificates or + # write out configuration files in a dynamic way. + extraInitContainers: null + # # This example installs a plugin pulled from github into the /usr/local/libexec/vault/oauthapp folder, + # # which is defined in the volumes value. + # - name: oauthapp + # image: "alpine" + # command: [sh, -c] + # args: + # - cd /tmp && + # wget https://github.com/puppetlabs/vault-plugin-secrets-oauthapp/releases/download/v1.2.0/vault-plugin-secrets-oauthapp-v1.2.0-linux-amd64.tar.xz -O oauthapp.xz && + # tar -xf oauthapp.xz && + # mv vault-plugin-secrets-oauthapp-v1.2.0-linux-amd64 /usr/local/libexec/vault/oauthapp && + # chmod +x /usr/local/libexec/vault/oauthapp + # volumeMounts: + # - name: plugins + # mountPath: /usr/local/libexec/vault + + # extraContainers is a list of sidecar containers. Specified as a YAML list. + extraContainers: null + + # shareProcessNamespace enables process namespace sharing between Vault and the extraContainers + # This is useful if Vault must be signaled, e.g. to send a SIGHUP for a log rotation + shareProcessNamespace: false + + # extraArgs is a string containing additional Vault server arguments. + extraArgs: "" + + # extraPorts is a list of extra ports. Specified as a YAML list. + # This is useful if you need to add additional ports to the statefulset in dynamic way. + extraPorts: null + # - containerPort: 8300 + # name: http-monitoring + + # Used to define custom readinessProbe settings + readinessProbe: + enabled: true + path: /v1/sys/health?standbyok=true&sealedcode=204&uninitcode=204 + port: 8200 + failureThreshold: 2 + initialDelaySeconds: 5 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 3 + + # Used to enable a livenessProbe for the pods + livenessProbe: + enabled: true + execCommand: [] + path: "/v1/sys/health?standbyok=true" + port: 8200 + failureThreshold: 2 + initialDelaySeconds: 60 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 3 + + # Optional duration in seconds the pod needs to terminate gracefully. + # See: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/ + terminationGracePeriodSeconds: 10 + + # Used to set the sleep time during the preStop step + preStopSleepSeconds: 5 + + # Used to define commands to run after the pod is ready. + # This can be used to automate processes such as initialization + # or boostrapping auth methods. + postStart: [] + # - /bin/sh + # - -c + # - /vault/userconfig/myscript/run.sh + + # extraEnvironmentVars is a list of extra environment variables to set with the stateful set. These could be + # used to include variables required for auto-unseal. + extraEnvironmentVars: + GOOGLE_REGION: northamerica-northeast1 + GOOGLE_PROJECT: spartan-rhino-408115 + GOOGLE_APPLICATION_CREDENTIALS: /vault/userconfig/kms-creds/credentials.json + VAULT_CACERT: /vault/userconfig/vault-ha-tls/vault.ca + VAULT_TLSCERT: /vault/userconfig/vault-ha-tls/vault.crt + VAULT_TLSKEY: /vault/userconfig/vault-ha-tls/vault.key + + # extraSecretEnvironmentVars is a list of extra environment variables to set with the stateful set. + # These variables take value from existing Secret objects. + extraSecretEnvironmentVars: [] + # - envName: AWS_SECRET_ACCESS_KEY + # secretName: vault + # secretKey: AWS_SECRET_ACCESS_KEY + + # Deprecated: please use 'volumes' instead. + # extraVolumes is a list of extra volumes to mount. These will be exposed + # to Vault in the path `/vault/userconfig//`. The value below is + # an array of objects, examples are shown below. + extraVolumes: [] + # - type: secret (or "configMap") + # name: my-secret + # path: null # default is `/vault/userconfig` + + # volumes is a list of volumes made available to all containers. These are rendered + # via toYaml rather than pre-processed like the extraVolumes value. + # The purpose is to make it easy to share volumes between containers. + volumes: + - name: vault-ha-tls + secret: + secretName: vault-ha-tls + - name: kms-creds + secret: + secretName: kms-creds + + # volumeMounts is a list of volumeMounts for the main server container. These are rendered + # via toYaml rather than pre-processed like the extraVolumes value. + # The purpose is to make it easy to share volumes between containers. + volumeMounts: + - name: vault-ha-tls + mountPath: /vault/userconfig/vault-ha-tls + - name: kms-creds + mountPath: /vault/userconfig/kms-creds + + # Affinity Settings + # Commenting out or setting as empty the affinity variable, will allow + # deployment to single node services such as Minikube + # This should be either a multi-line string or YAML matching the PodSpec's affinity field. + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/name: {{ template "vault.name" . }} + app.kubernetes.io/instance: "{{ .Release.Name }}" + component: server + topologyKey: kubernetes.io/hostname + + # Topology settings for server pods + # ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/ + # This should be either a multi-line string or YAML matching the topologySpreadConstraints array + # in a PodSpec. + topologySpreadConstraints: [] + + # Toleration Settings for server pods + # This should be either a multi-line string or YAML matching the Toleration array + # in a PodSpec. + tolerations: [] + + # nodeSelector labels for server pod assignment, formatted as a multi-line string or YAML map. + # ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector + # Example: + # nodeSelector: + # beta.kubernetes.io/arch: amd64 + nodeSelector: {} + + # Enables network policy for server pods + networkPolicy: + enabled: false + egress: [] + # egress: + # - to: + # - ipBlock: + # cidr: 10.0.0.0/24 + # ports: + # - protocol: TCP + # port: 443 + ingress: + - from: + - namespaceSelector: {} + ports: + - port: 8200 + protocol: TCP + - port: 8201 + protocol: TCP + + # Priority class for server pods + priorityClassName: "" + + # Extra labels to attach to the server pods + # This should be a YAML map of the labels to apply to the server pods + extraLabels: {} + + # Extra annotations to attach to the server pods + # This can either be YAML or a YAML-formatted multi-line templated string map + # of the annotations to apply to the server pods + annotations: {} + + # Enables a headless service to be used by the Vault Statefulset + service: + enabled: true + # Enable or disable the vault-active service, which selects Vault pods that + # have labeled themselves as the cluster leader with `vault-active: "true"`. + active: + enabled: true + # Extra annotations for the service definition. This can either be YAML or a + # YAML-formatted multi-line templated string map of the annotations to apply + # to the active service. + annotations: {} + # Enable or disable the vault-standby service, which selects Vault pods that + # have labeled themselves as a cluster follower with `vault-active: "false"`. + standby: + enabled: true + # Extra annotations for the service definition. This can either be YAML or a + # YAML-formatted multi-line templated string map of the annotations to apply + # to the standby service. + annotations: {} + # If enabled, the service selectors will include `app.kubernetes.io/instance: {{ .Release.Name }}` + # When disabled, services may select Vault pods not deployed from the chart. + # Does not affect the headless vault-internal service with `ClusterIP: None` + instanceSelector: + enabled: true + # clusterIP controls whether a Cluster IP address is attached to the + # Vault service within Kubernetes. By default, the Vault service will + # be given a Cluster IP address, set to None to disable. When disabled + # Kubernetes will create a "headless" service. Headless services can be + # used to communicate with pods directly through DNS instead of a round-robin + # load balancer. + # clusterIP: None + + # Configures the service type for the main Vault service. Can be ClusterIP + # or NodePort. + type: ClusterIP + + # The IP family and IP families options are to set the behaviour in a dual-stack environment. + # Omitting these values will let the service fall back to whatever the CNI dictates the defaults + # should be. + # These are only supported for kubernetes versions >=1.23.0 + # + # Configures the service's supported IP family policy, can be either: + # SingleStack: Single-stack service. The control plane allocates a cluster IP for the Service, using the first configured service cluster IP range. + # PreferDualStack: Allocates IPv4 and IPv6 cluster IPs for the Service. + # RequireDualStack: Allocates Service .spec.ClusterIPs from both IPv4 and IPv6 address ranges. + ipFamilyPolicy: "" + + # Sets the families that should be supported and the order in which they should be applied to ClusterIP as well. + # Can be IPv4 and/or IPv6. + ipFamilies: [] + + # Do not wait for pods to be ready before including them in the services' + # targets. Does not apply to the headless service, which is used for + # cluster-internal communication. + publishNotReadyAddresses: true + + # The externalTrafficPolicy can be set to either Cluster or Local + # and is only valid for LoadBalancer and NodePort service types. + # The default value is Cluster. + # ref: https://kubernetes.io/docs/concepts/services-networking/service/#external-traffic-policy + externalTrafficPolicy: Cluster + + # If type is set to "NodePort", a specific nodePort value can be configured, + # will be random if left blank. + #nodePort: 30000 + + # When HA mode is enabled + # If type is set to "NodePort", a specific nodePort value can be configured, + # will be random if left blank. + #activeNodePort: 30001 + + # When HA mode is enabled + # If type is set to "NodePort", a specific nodePort value can be configured, + # will be random if left blank. + #standbyNodePort: 30002 + + # Port on which Vault server is listening + port: 8200 + # Target port to which the service should be mapped to + targetPort: 8200 + # Extra annotations for the service definition. This can either be YAML or a + # YAML-formatted multi-line templated string map of the annotations to apply + # to the service. + annotations: {} + + # This configures the Vault Statefulset to create a PVC for data + # storage when using the file or raft backend storage engines. + # See https://developer.hashicorp.com/vault/docs/configuration/storage to know more + dataStorage: + enabled: true + # Size of the PVC created + size: 10Gi + # Location where the PVC will be mounted. + mountPath: "/vault/data" + # Name of the storage class to use. If null it will use the + # configured default Storage Class. + storageClass: null + # Access Mode of the storage device being used for the PVC + accessMode: ReadWriteOnce + # Annotations to apply to the PVC + annotations: {} + # Labels to apply to the PVC + labels: {} + + # Persistent Volume Claim (PVC) retention policy + # ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention + # Example: + # persistentVolumeClaimRetentionPolicy: + # whenDeleted: Retain + # whenScaled: Retain + persistentVolumeClaimRetentionPolicy: {} + + # This configures the Vault Statefulset to create a PVC for audit + # logs. Once Vault is deployed, initialized, and unsealed, Vault must + # be configured to use this for audit logs. This will be mounted to + # /vault/audit + # See https://developer.hashicorp.com/vault/docs/audit to know more + auditStorage: + enabled: true + # Size of the PVC created + size: 10Gi + # Location where the PVC will be mounted. + mountPath: "/vault/audit" + # Name of the storage class to use. If null it will use the + # configured default Storage Class. + storageClass: null + # Access Mode of the storage device being used for the PVC + accessMode: ReadWriteOnce + # Annotations to apply to the PVC + annotations: {} + # Labels to apply to the PVC + labels: {} + + # Run Vault in "HA" mode. There are no storage requirements unless the audit log + # persistence is required. In HA mode Vault will configure itself to use Consul + # for its storage backend. The default configuration provided will work the Consul + # Helm project by default. It is possible to manually configure Vault to use a + # different HA backend. + ha: + enabled: true + replicas: 2 + + # Set the api_addr configuration for Vault HA + # See https://developer.hashicorp.com/vault/docs/configuration#api_addr + # If set to null, this will be set to the Pod IP Address + apiAddr: null + + # Set the cluster_addr confuguration for Vault HA + # See https://developer.hashicorp.com/vault/docs/configuration#cluster_addr + # If set to null, this will be set to https://$(HOSTNAME).{{ template "vault.fullname" . }}-internal:8201 + clusterAddr: null + + # Enables Vault's integrated Raft storage. Unlike the typical HA modes where + # Vault's persistence is external (such as Consul), enabling Raft mode will create + # persistent volumes for Vault to store data according to the configuration under server.dataStorage. + # The Vault cluster will coordinate leader elections and failovers internally. + raft: + enabled: true + # Set the Node Raft ID to the name of the pod + setNodeId: true + + # Note: Configuration files are stored in ConfigMaps so sensitive data + # such as passwords should be either mounted through extraSecretEnvironmentVars + # or through a Kube secret. For more information see: + # https://developer.hashicorp.com/vault/docs/platform/k8s/helm/run#protecting-sensitive-vault-configurations + # https://developer.hashicorp.com/vault/docs/configuration/storage/raft + config: | + ui = true + plugin_directory = "/vault/plugins" + + listener "tcp" { + tls_disable = 0 + address = "[::]:8200" + cluster_address = "[::]:8201" + tls_cert_file = "/vault/userconfig/vault-ha-tls/vault.crt" + tls_key_file = "/vault/userconfig/vault-ha-tls/vault.key" + tls_client_ca_file = "/vault/userconfig/vault-ha-tls/vault.ca" + } + + storage "raft" { + path = "/vault/data" + retry_join { + leader_api_addr = "https://vault-0.vault-internal:8200" + leader_ca_cert_file = "/vault/userconfig/vault-ha-tls/vault.ca" + leader_client_cert_file = "/vault/userconfig/vault-ha-tls/vault.crt" + leader_client_key_file = "/vault/userconfig/vault-ha-tls/vault.key" + } + retry_join { + leader_api_addr = "https://vault-1.vault-internal:8200" + leader_ca_cert_file = "/vault/userconfig/vault-ha-tls/vault.ca" + leader_client_cert_file = "/vault/userconfig/vault-ha-tls/vault.crt" + leader_client_key_file = "/vault/userconfig/vault-ha-tls/vault.key" + } + retry_join { + leader_api_addr = "https://vault-2.vault-internal:8200" + leader_ca_cert_file = "/vault/userconfig/vault-ha-tls/vault.ca" + leader_client_cert_file = "/vault/userconfig/vault-ha-tls/vault.crt" + leader_client_key_file = "/vault/userconfig/vault-ha-tls/vault.key" + } + } + + seal "gcpckms" { + project = "spartan-rhino-408115" + region = "northamerica-northeast1" + key_ring = "vault-keyring-gcp" + crypto_key = "vault-cryptokey-gcp" + } + + service_registration "kubernetes" {} + + # A disruption budget limits the number of pods of a replicated application + # that are down simultaneously from voluntary disruptions + disruptionBudget: + enabled: true + + # maxUnavailable will default to (n/2)-1 where n is the number of + # replicas. If you'd like a custom value, you can specify an override here. + maxUnavailable: null + + # Definition of the serviceAccount used to run Vault. + # These options are also used when using an external Vault server to validate + # Kubernetes tokens. + serviceAccount: + # Specifies whether a service account should be created + create: true + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + # Create a Secret API object to store a non-expiring token for the service account. + # Prior to v1.24.0, Kubernetes used to generate this secret for each service account by default. + # Kubernetes now recommends using short-lived tokens from the TokenRequest API or projected volumes instead if possible. + # For more details, see https://kubernetes.io/docs/concepts/configuration/secret/#service-account-token-secrets + # serviceAccount.create must be equal to 'true' in order to use this feature. + createSecret: false + # Extra annotations for the serviceAccount definition. This can either be + # YAML or a YAML-formatted multi-line templated string map of the + # annotations to apply to the serviceAccount. + annotations: {} + # Extra labels to attach to the serviceAccount + # This should be a YAML map of the labels to apply to the serviceAccount + extraLabels: {} + # Enable or disable a service account role binding with the permissions required for + # Vault's Kubernetes service_registration config option. + # See https://developer.hashicorp.com/vault/docs/configuration/service-registration/kubernetes + serviceDiscovery: + enabled: true + + # Settings for the statefulSet used to run Vault. + statefulSet: + # Extra annotations for the statefulSet. This can either be YAML or a + # YAML-formatted multi-line templated string map of the annotations to apply + # to the statefulSet. + annotations: {} + + # Set the pod and container security contexts. + # If not set, these will default to, and for *not* OpenShift: + # pod: + # runAsNonRoot: true + # runAsGroup: {{ .Values.server.gid | default 1000 }} + # runAsUser: {{ .Values.server.uid | default 100 }} + # fsGroup: {{ .Values.server.gid | default 1000 }} + # container: + # allowPrivilegeEscalation: false + # + # If not set, these will default to, and for OpenShift: + # pod: {} + # container: {} + securityContext: + pod: {} + container: {} + + # Should the server pods run on the host network + hostNetwork: false + +# Vault UI +ui: + # True if you want to create a Service entry for the Vault UI. + # + # serviceType can be used to control the type of service created. For + # example, setting this to "LoadBalancer" will create an external load + # balancer (for supported K8S installations) to access the UI. + enabled: true + publishNotReadyAddresses: true + # The service should only contain selectors for active Vault pod + activeVaultPodOnly: false + serviceType: "ClusterIP" + serviceNodePort: null + externalPort: 8200 + targetPort: 8200 + + # The IP family and IP families options are to set the behaviour in a dual-stack environment. + # Omitting these values will let the service fall back to whatever the CNI dictates the defaults + # should be. + # These are only supported for kubernetes versions >=1.23.0 + # + # Configures the service's supported IP family, can be either: + # SingleStack: Single-stack service. The control plane allocates a cluster IP for the Service, using the first configured service cluster IP range. + # PreferDualStack: Allocates IPv4 and IPv6 cluster IPs for the Service. + # RequireDualStack: Allocates Service .spec.ClusterIPs from both IPv4 and IPv6 address ranges. + serviceIPFamilyPolicy: "" + + # Sets the families that should be supported and the order in which they should be applied to ClusterIP as well + # Can be IPv4 and/or IPv6. + serviceIPFamilies: [] + + # The externalTrafficPolicy can be set to either Cluster or Local + # and is only valid for LoadBalancer and NodePort service types. + # The default value is Cluster. + # ref: https://kubernetes.io/docs/concepts/services-networking/service/#external-traffic-policy + externalTrafficPolicy: Cluster + + #loadBalancerSourceRanges: + # - 10.0.0.0/16 + # - 1.78.23.3/32 + + # loadBalancerIP: + + # Extra annotations to attach to the ui service + # This can either be YAML or a YAML-formatted multi-line templated string map + # of the annotations to apply to the ui service + annotations: {} + +# secrets-store-csi-driver-provider-vault +csi: + # True if you want to install a secrets-store-csi-driver-provider-vault daemonset. + # + # Requires installing the secrets-store-csi-driver separately, see: + # https://github.com/kubernetes-sigs/secrets-store-csi-driver#install-the-secrets-store-csi-driver + # + # With the driver and provider installed, you can mount Vault secrets into volumes + # similar to the Vault Agent injector, and you can also sync those secrets into + # Kubernetes secrets. + enabled: true + + image: + repository: "hashicorp/vault-csi-provider" + tag: "1.4.1" + pullPolicy: IfNotPresent + + # volumes is a list of volumes made available to all containers. These are rendered + # via toYaml rather than pre-processed like the extraVolumes value. + # The purpose is to make it easy to share volumes between containers. + volumes: + - name: vault-ha-tls + secret: + secretName: vault-ha-tls + + # volumeMounts is a list of volumeMounts for the main server container. These are rendered + # via toYaml rather than pre-processed like the extraVolumes value. + # The purpose is to make it easy to share volumes between containers. + volumeMounts: + - name: vault-ha-tls + mountPath: "/vault/userconfig/vault-ha-tls" + readOnly: true + + resources: {} + # resources: + # requests: + # cpu: 50m + # memory: 128Mi + # limits: + # cpu: 50m + # memory: 128Mi + + # Override the default secret name for the CSI Provider's HMAC key used for + # generating secret versions. + hmacSecretName: "" + + # Settings for the daemonSet used to run the provider. + daemonSet: + updateStrategy: + type: RollingUpdate + maxUnavailable: "" + # Extra annotations for the daemonSet. This can either be YAML or a + # YAML-formatted multi-line templated string map of the annotations to apply + # to the daemonSet. + annotations: {} + # Provider host path (must match the CSI provider's path) + providersDir: "/etc/kubernetes/secrets-store-csi-providers" + # Kubelet host path + kubeletRootDir: "/var/lib/kubelet" + # Extra labels to attach to the vault-csi-provider daemonSet + # This should be a YAML map of the labels to apply to the csi provider daemonSet + extraLabels: {} + # security context for the pod template and container in the csi provider daemonSet + securityContext: + pod: {} + container: {} + + pod: + # Extra annotations for the provider pods. This can either be YAML or a + # YAML-formatted multi-line templated string map of the annotations to apply + # to the pod. + annotations: {} + + # Toleration Settings for provider pods + # This should be either a multi-line string or YAML matching the Toleration array + # in a PodSpec. + tolerations: [] + + # nodeSelector labels for csi pod assignment, formatted as a multi-line string or YAML map. + # ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector + # Example: + # nodeSelector: + # beta.kubernetes.io/arch: amd64 + nodeSelector: {} + + # Affinity Settings + # This should be either a multi-line string or YAML matching the PodSpec's affinity field. + affinity: {} + + # Extra labels to attach to the vault-csi-provider pod + # This should be a YAML map of the labels to apply to the csi provider pod + extraLabels: {} + + agent: + enabled: true + extraArgs: [] + + image: + repository: "hashicorp/vault" + tag: "1.15.2" + pullPolicy: IfNotPresent + + logFormat: standard + logLevel: info + + resources: {} + # resources: + # requests: + # memory: 256Mi + # cpu: 250m + # limits: + # memory: 256Mi + # cpu: 250m + + # Priority class for csi pods + priorityClassName: "" + + serviceAccount: + # Extra annotations for the serviceAccount definition. This can either be + # YAML or a YAML-formatted multi-line templated string map of the + # annotations to apply to the serviceAccount. + annotations: {} + + # Extra labels to attach to the vault-csi-provider serviceAccount + # This should be a YAML map of the labels to apply to the csi provider serviceAccount + extraLabels: {} + + # Used to configure readinessProbe for the pods. + readinessProbe: + failureThreshold: 2 + initialDelaySeconds: 5 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 3 + + # Used to configure livenessProbe for the pods. + livenessProbe: + failureThreshold: 2 + initialDelaySeconds: 5 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 3 + + # Enables debug logging. + debug: false + + # Pass arbitrary additional arguments to vault-csi-provider. + # See https://developer.hashicorp.com/vault/docs/platform/k8s/csi/configurations#command-line-arguments + # for the available command line flags. + extraArgs: + - "-vault-tls-ca-cert=/vault/userconfig/vault-ha-tls/vault.ca" + - "-vault-addr=https://vault.vault:8200" + +# Vault is able to collect and publish various runtime metrics. +# Enabling this feature requires setting adding `telemetry{}` stanza to +# the Vault configuration. There are a few examples included in the `config` sections above. +# +# For more information see: +# https://developer.hashicorp.com/vault/docs/configuration/telemetry +# https://developer.hashicorp.com/vault/docs/internals/telemetry +serverTelemetry: + # Enable support for the Prometheus Operator. Currently, this chart does not support + # authenticating to Vault's metrics endpoint, so the following `telemetry{}` must be included + # in the `listener "tcp"{}` stanza + # telemetry { + # unauthenticated_metrics_access = "true" + # } + # + # See the `standalone.config` for a more complete example of this. + # + # In addition, a top level `telemetry{}` stanza must also be included in the Vault configuration: + # + # example: + # telemetry { + # prometheus_retention_time = "30s" + # disable_hostname = true + # } + # + # Configuration for monitoring the Vault server. + serviceMonitor: + # The Prometheus operator *must* be installed before enabling this feature, + # if not the chart will fail to install due to missing CustomResourceDefinitions + # provided by the operator. + # + # Instructions on how to install the Helm chart can be found here: + # https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack + # More information can be found here: + # https://github.com/prometheus-operator/prometheus-operator + # https://github.com/prometheus-operator/kube-prometheus + + # Enable deployment of the Vault Server ServiceMonitor CustomResource. + enabled: false + + # Selector labels to add to the ServiceMonitor. + # When empty, defaults to: + # release: prometheus + selectors: {} + + # Interval at which Prometheus scrapes metrics + interval: 30s + + # Timeout for Prometheus scrapes + scrapeTimeout: 10s + + prometheusRules: + # The Prometheus operator *must* be installed before enabling this feature, + # if not the chart will fail to install due to missing CustomResourceDefinitions + # provided by the operator. + + # Deploy the PrometheusRule custom resource for AlertManager based alerts. + # Requires that AlertManager is properly deployed. + enabled: false + + # Selector labels to add to the PrometheusRules. + # When empty, defaults to: + # release: prometheus + selectors: {} + + # Some example rules. + rules: [] + # - alert: vault-HighResponseTime + # annotations: + # message: The response time of Vault is over 500ms on average over the last 5 minutes. + # expr: vault_core_handle_request{quantile="0.5", namespace="mynamespace"} > 500 + # for: 5m + # labels: + # severity: warning + # - alert: vault-HighResponseTime + # annotations: + # message: The response time of Vault is over 1s on average over the last 5 minutes. + # expr: vault_core_handle_request{quantile="0.5", namespace="mynamespace"} > 1000 + # for: 5m + # labels: + # severity: critical + diff --git a/main.tf b/main.tf new file mode 100644 index 00000000..53f81a3f --- /dev/null +++ b/main.tf @@ -0,0 +1,21 @@ +# module "gcp-kubernetes-cluster-0" { +# source = "./terraform/gcp-kubernetes-cluster" + +# cluster_name = "acia-cfia" +# project_id = "spartan-rhino-408115" + +# region = "northamerica-northeast1" +# location_1 = "northamerica-northeast1-a" +# location_2 = "northamerica-northeast1-b" +# } + +# module "namecheap" { +# source = "./terraform/dns" + +# cluster_name = "acia-cfia" +# project_id = "spartan-rhino-408115" + +# region = "northamerica-northeast1" +# location_1 = "northamerica-northeast1-a" +# location_2 = "northamerica-northeast1-b" +# } diff --git a/providers.tf b/providers.tf new file mode 100644 index 00000000..057ae116 --- /dev/null +++ b/providers.tf @@ -0,0 +1,34 @@ +terraform { + backend "gcs" { + bucket = "terraform-tfstate-gcp-storage" + prefix = "terraform/state" + } + required_providers { + google = { + source = "hashicorp/google" + version = "~> 5.0" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = "2.24.0" + } + # aws = { + # source = "hashicorp/aws" + # version = "~> 3.0" + # } + # azurerm = { + # source = "hashicorp/azurerm" + # version = "~> 2.0" + # } + } +} + +provider "google" { + project = "spartan-rhino-408115" + region = "northamerica-northeast1" +} + +provider "kubernetes" { + config_path = "~/.kube/config" + config_context = var.kube_ctx +} diff --git a/terraform/gcp-kubernetes-cluster/accounts.tf b/terraform/gcp-kubernetes-cluster/accounts.tf new file mode 100644 index 00000000..290c3170 --- /dev/null +++ b/terraform/gcp-kubernetes-cluster/accounts.tf @@ -0,0 +1,11 @@ +resource "google_project_service" "compute" { + service = "compute.googleapis.com" +} + +resource "google_project_service" "container" { + service = "container.googleapis.com" +} + +resource "google_service_account" "kubernetes" { + account_id = "${var.cluster_name}-kubernetes" +} diff --git a/terraform/gcp-kubernetes-cluster/cluster.tf b/terraform/gcp-kubernetes-cluster/cluster.tf new file mode 100644 index 00000000..fb74b3f2 --- /dev/null +++ b/terraform/gcp-kubernetes-cluster/cluster.tf @@ -0,0 +1,81 @@ +resource "google_container_cluster" "cluster" { + name = "${var.cluster_name}-cluster" + location = var.location_1 + remove_default_node_pool = true + initial_node_count = 1 + network = google_compute_network.network.self_link + subnetwork = google_compute_subnetwork.subnetwork.self_link + networking_mode = "VPC_NATIVE" + deletion_protection = false + # logging_service = "logging.googleapis.com/kubernetes" + # monitoring_service = "monitoring.googleapis.com/kubernetes" + + # Optional, if you want multi-zonal cluster + node_locations = [ + "${var.location_2}" + ] + + addons_config { + http_load_balancing { + disabled = true # NGINX Ingress + } + + horizontal_pod_autoscaling { + disabled = false # Self-managed + } + } + + release_channel { + channel = "RAPID" + } + + workload_identity_config { + workload_pool = "${var.project_id}.svc.id.goog" + } + + ip_allocation_policy { + cluster_secondary_range_name = "${var.cluster_name}-pod-range" + services_secondary_range_name = "${var.cluster_name}-service-range" + } + + private_cluster_config { + enable_private_nodes = true + enable_private_endpoint = false + master_ipv4_cidr_block = "172.16.0.0/28" + } + + # Enable Kubernetes Gateway API + gateway_api_config { + channel = "CHANNEL_STANDARD" + } +} + +resource "google_container_node_pool" "nodepool" { + name = "${var.cluster_name}-standard-nodepool" + cluster = google_container_cluster.cluster.id + node_count = 1 + + management { + auto_repair = true + auto_upgrade = true + } + + autoscaling { + min_node_count = 1 + max_node_count = 10 + } + + node_config { + preemptible = false + machine_type = "e2-small" + + labels = { + role = "general" + } + + service_account = google_service_account.kubernetes.email + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform" + ] + } +} diff --git a/terraform/gcp-kubernetes-cluster/networking.tf b/terraform/gcp-kubernetes-cluster/networking.tf new file mode 100644 index 00000000..3847ca59 --- /dev/null +++ b/terraform/gcp-kubernetes-cluster/networking.tf @@ -0,0 +1,73 @@ +resource "google_compute_network" "network" { + name = "${var.cluster_name}-network" + routing_mode = "REGIONAL" # global + auto_create_subnetworks = false + mtu = 1460 + delete_default_routes_on_create = false + + depends_on = [ + google_project_service.compute, + google_project_service.container + ] +} + +resource "google_compute_subnetwork" "subnetwork" { + name = "${var.cluster_name}-private-subnet" + ip_cidr_range = "10.0.0.0/18" + region = var.region + network = google_compute_network.network.id + private_ip_google_access = true + + secondary_ip_range { + range_name = "${var.cluster_name}-pod-range" + ip_cidr_range = "10.48.0.0/14" + } + + secondary_ip_range { + range_name = "${var.cluster_name}-service-range" + ip_cidr_range = "10.52.0.0/20" + } +} + +resource "google_compute_router" "router" { + name = "${var.cluster_name}-router" + region = var.region + network = google_compute_network.network.id +} + +resource "google_compute_router_nat" "router-nat" { + name = "${var.cluster_name}-nat" + router = google_compute_router.router.name + region = var.region + + source_subnetwork_ip_ranges_to_nat = "LIST_OF_SUBNETWORKS" + nat_ip_allocate_option = "MANUAL_ONLY" + + subnetwork { + name = google_compute_subnetwork.subnetwork.id + source_ip_ranges_to_nat = ["ALL_IP_RANGES"] + } + + nat_ips = [google_compute_address.nat.self_link] +} + +resource "google_compute_address" "nat" { + name = "${var.cluster_name}-nat" + address_type = "EXTERNAL" + network_tier = "STANDARD" + + depends_on = [google_project_service.compute] +} + +# Enable SSH +# resource "google_compute_firewall" "allow-ssh" { +# name = "${var.cluster_name}-allow-ssh" +# network = google_compute_network.network.name + +# allow { +# protocol = "tcp" +# ports = ["22"] +# } + +# source_ranges = ["0.0.0.0/0"] +# } diff --git a/terraform/gcp-kubernetes-cluster/provider.tf b/terraform/gcp-kubernetes-cluster/provider.tf new file mode 100644 index 00000000..53874b86 --- /dev/null +++ b/terraform/gcp-kubernetes-cluster/provider.tf @@ -0,0 +1,29 @@ +terraform { + required_providers { + local = { + source = "hashicorp/local" + version = "2.4.0" + } + tls = { + source = "hashicorp/tls" + version = "4.0.4" + } + external = { + source = "hashicorp/external" + version = "2.3.1" + } + google = { + source = "hashicorp/google" + version = "~> 5.0" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = "2.24.0" + } + } +} + +provider "google" { + project = var.project_id + region = var.region +} diff --git a/terraform/gcp-kubernetes-cluster/variables.tf b/terraform/gcp-kubernetes-cluster/variables.tf new file mode 100644 index 00000000..c1eb01d8 --- /dev/null +++ b/terraform/gcp-kubernetes-cluster/variables.tf @@ -0,0 +1,24 @@ +variable "region" { + description = "The region where the cluster and every resources will be deployed" + type = string +} + +variable "project_id" { + description = "The project id where this terraform module will be deployed." + type = string +} + +variable "location_1" { + description = "The location where the cluster and every resources will be deployed" + type = string +} + +variable "location_2" { + description = "The location where the cluster and every resources will be deployed" + type = string +} + +variable "cluster_name" { + description = "The cluster name. Every resources will have this name (exemple: mycluster-compute-network) " + type = string +} diff --git a/terraform/gcp-kubernetes-cluster/vault-csr.conf b/terraform/gcp-kubernetes-cluster/vault-csr.conf new file mode 100644 index 00000000..0e39b79a --- /dev/null +++ b/terraform/gcp-kubernetes-cluster/vault-csr.conf @@ -0,0 +1,22 @@ +[req] +default_bits = 2048 +prompt = no +encrypt_key = yes +default_md = sha256 +distinguished_name = kubelet_serving +req_extensions = v3_req +[ kubelet_serving ] +O = system:nodes +CN = system:node:*.vault.svc.cluster.local +[ v3_req ] +basicConstraints = CA:FALSE +keyUsage = nonRepudiation, digitalSignature, keyEncipherment, dataEncipherment +extendedKeyUsage = serverAuth, clientAuth +subjectAltName = @alt_names +[alt_names] +DNS.1 = *.vault-internal +DNS.2 = *.vault-internal.vault.svc.cluster.local +DNS.3 = *.vault +DNS.4 = vault.vault.svc.cluster.local +DNS.5 = vault.vault.svc +IP.1 = 127.0.0.1 diff --git a/terraform/gcp-kubernetes-cluster/vault.tf b/terraform/gcp-kubernetes-cluster/vault.tf new file mode 100644 index 00000000..598c5bb0 --- /dev/null +++ b/terraform/gcp-kubernetes-cluster/vault.tf @@ -0,0 +1,114 @@ +# Create a service account for the Vault KMS +resource "google_service_account" "vault_kms_service_account" { + account_id = "${var.cluster_name}-vault-gcpkms" + display_name = "Vault KMS for auto-unseal" +} + +# This will be used to create a credentials.json inside a k8s secret obj +resource "google_service_account_key" "vault_kms_service_account_key" { + service_account_id = google_service_account.vault_kms_service_account.name +} + +# Create a KMS Key +resource "google_kms_key_ring" "key_ring" { + name = "vault-keyring-gcp" + location = var.region +} + +# Create a crypto key for the key ring. This key will be used to automatically unseal Vault +resource "google_kms_crypto_key" "crypto_key" { + name = "vault-cryptokey-gcp" + key_ring = google_kms_key_ring.key_ring.id + rotation_period = "100000s" +} + +# We give vault service account access to that key ring. Making im owner of the key. +resource "google_kms_key_ring_iam_binding" "vault_iam_kms_binding" { + key_ring_id = google_kms_key_ring.key_ring.id + role = "roles/owner" + + members = [ + "serviceAccount:${google_service_account.vault_kms_service_account.email}", + ] +} + +# Vault certificates +resource "tls_private_key" "vault_key" { + algorithm = "RSA" + rsa_bits = 2048 +} + +# This requires that jq and openssl are installed in the runtime environment +# It creates a certificate signing request (CSR) based on the vault-csr.conf file +# The 2 jq at the beginning and end of the pipes are used to read the input and wrap the result in json +# since this is how terraform "external" passes data. +data "external" "k8s_cert_request" { + program = [ + "bash", "-c", + "curl -o jq https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64 && chmod +x jq && jq -rc '.key' | openssl req -new -noenc -config ${path.module}/vault-csr.conf -key /dev/stdin | jq -rRncs '{\"request\": inputs}'" + ] + query = { + "key" = tls_private_key.vault_key.private_key_pem + } +} + +# We make ask Kubernetes to sign the certificate +# https://kubernetes.io/docs/reference/access-authn-authz/certificate-signing-requests/ +resource "kubernetes_certificate_signing_request_v1" "vault_kube_cert_req" { + metadata { + name = "vault.svc" + } + spec { + request = data.external.k8s_cert_request.result["request"] + signer_name = "kubernetes.io/kube-apiserver-client" + usages = ["digital signature", "key encipherment", "server auth"] + } + auto_approve = true + lifecycle { + ignore_changes = [spec[0].request] + replace_triggered_by = [tls_private_key.vault_key] + } +} + +# Makes sure the vault namespace is created before adding secrets +resource "kubernetes_namespace" "vault_ns" { + metadata { + name = "vault" + } +} + +# Get the GKE cluster data to fetch the CA certificate +data "google_container_cluster" "gke_cluster" { + name = "${var.cluster_name}-cluster" + location = "${var.region}-a" +} + +# This secret contains the certificates used +resource "kubernetes_secret" "vault_ha_tls" { + metadata { + name = "vault-ha-tls" + namespace = kubernetes_namespace.vault_ns.metadata[0].name + } + + data = { + "vault.key" = tls_private_key.vault_key.private_key_pem + "vault.crt" = kubernetes_certificate_signing_request_v1.vault_kube_cert_req.certificate + "vault.ca" = base64decode(data.google_container_cluster.gke_cluster.master_auth[0].cluster_ca_certificate) + } + + type = "kubernetes.io/generic" +} + +# This secret contains the GCP KMS service account that is used to unseal +resource "kubernetes_secret" "kms_creds" { + metadata { + name = "kms-creds" + namespace = kubernetes_namespace.vault_ns.metadata[0].name + } + + data = { + "credentials.json" = base64decode(google_service_account_key.vault_kms_service_account_key.private_key) + } + + type = "kubernetes.io/generic" +} diff --git a/variables.tf b/variables.tf new file mode 100644 index 00000000..b9f833d5 --- /dev/null +++ b/variables.tf @@ -0,0 +1,4 @@ +variable "kube_ctx" { + description = "The kubernetes cluster context." + type = string +}