add readiness checks (#652)

* add readiness checks * add health endpoint to metrics asgi app * add health_check function * add ability to inject health_check_functions * add health method to all components * make health check the default for helm charts * add healthcheck for http_input * add health_timeout * make kafka_config immutable * add healthcheck for opensearch * add is_running tests to exporter and add threadingserver tests * Refactor PrometheusExporter test to use uvicorn_config for server host * Refactor ThreadingHTTPServer shut_down method to handle case when server is None * Refactor ThreadingHTTPServer shut_down method to ensure graceful server exit * implement update_healthchecks to prometheus_exporter * add development example setup * move configuration to the top level of documentation * add healthcheck documentation * update changelog * rewrite documentation intro
fkie-cad · Sep 19, 2024 · 34809cf · 34809cf
1 parent eca9240
commit 34809cf
Show file tree

Hide file tree

Showing 53 changed files with 917 additions and 148 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,11 +6,22 @@
 * remove AutoRuleCorpusTester
 
 ### Features
+
+* adds health check endpoint to metrics on path `/health`
+* changes helm chart to use new readiness check
+* adds `healthcheck_timeout` option to all components to tweak the timeout of healthchecks
+* adds `desired_cluster_status` option to opensearch output to signal healthy cluster status
+* initially run health checks on setup for every configured component
+* make `imagePullPolicy` configurable for helm chart deployments
+
+
 ### Improvements
 
 * remove AutoRuleCorpusTester
 * adds support for rust extension development
 * adds prebuild wheels for architectures `x86_64` and `aarch64` on `manylinux` and `musllinux` based linux platforms to releases
+* add manual how to use local images with minikube example setup to documentation
+* move `Configuration` to top level of documentation
 
 ### Bugfix
 

diff --git a/charts/logprep/Chart.yaml b/charts/logprep/Chart.yaml
@@ -6,7 +6,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: "13.2.3"
+version: "13.3.0"
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to

diff --git a/charts/logprep/templates/deployment.yaml b/charts/logprep/templates/deployment.yaml
@@ -34,7 +34,7 @@ spec:
           resources:
             {{- toYaml .Values.resources | nindent 12 }}
           image: {{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}
-          imagePullPolicy: Always
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
           ports:
             {{- if .Values.exporter.enabled }}
             - name: exporter
@@ -106,28 +106,15 @@ spec:
             {{- if .Values.extraMounts }}
             {{- toYaml .Values.extraMounts | nindent 12 }}
             {{- end }}
-          {{- if or .Values.exporter.enabled (eq .Values.input.type "http_input") }}
-          {{- if eq .Values.input.type "http_input" }}
+          {{- if .Values.exporter.enabled }}
           readinessProbe:
             httpGet:
               path: /health
-              port: {{ .Values.input.uvicorn_config.port }}
-            initialDelaySeconds: 5
-            timeoutSeconds: 10
-            periodSeconds: 5
-            failureThreshold: 3
-          {{- else }}
-          readinessProbe:
-            httpGet:
-              path: /metrics
               port: {{ .Values.exporter.port }}
             initialDelaySeconds: 5
             timeoutSeconds: 10
             periodSeconds: 5
             failureThreshold: 3
-          {{- end }}
-          {{- end }}
-          {{- if .Values.exporter.enabled }}
           startupProbe:
             httpGet:
               path: /metrics

diff --git a/charts/logprep/values.yaml b/charts/logprep/values.yaml
@@ -7,6 +7,7 @@ image:
   registry: ghcr.io
   repository: fkie-cad/logprep
   tag: py3.11-stable
+  pullPolicy: Always
 
 # The pod resources
 resources:

diff --git a/...urce/user_manual/configuration/getter.rst → doc/source/configuration/getter.rst b/...urce/user_manual/configuration/getter.rst → doc/source/configuration/getter.rst
diff --git a/...ource/user_manual/configuration/index.rst → doc/source/configuration/index.rst b/...ource/user_manual/configuration/index.rst → doc/source/configuration/index.rst
diff --git a/...ource/user_manual/configuration/input.rst → doc/source/configuration/input.rst b/...ource/user_manual/configuration/input.rst → doc/source/configuration/input.rst
diff --git a/...rce/user_manual/configuration/metrics.rst → doc/source/configuration/metrics.rst b/...rce/user_manual/configuration/metrics.rst → doc/source/configuration/metrics.rst
diff --git a/...urce/user_manual/configuration/output.rst → doc/source/configuration/output.rst b/...urce/user_manual/configuration/output.rst → doc/source/configuration/output.rst
diff --git a/...e/user_manual/configuration/processor.rst → doc/source/configuration/processor.rst b/...e/user_manual/configuration/processor.rst → doc/source/configuration/processor.rst
diff --git a/...ource/user_manual/configuration/rules.rst → doc/source/configuration/rules.rst b/...ource/user_manual/configuration/rules.rst → doc/source/configuration/rules.rst
diff --git a/doc/source/examples/minikube.rst b/doc/source/examples/minikube.rst
@@ -15,25 +15,19 @@ with the following commands:
 .. code-block:: bash
     :caption: Install package prerequisites
 
-    sudo apt-get install -y \
-        apt-transport-https \
-        ca-certificates \
-        curl \
-        software-properties-common
+    sudo apt-get install -y apt-transport-https ca-certificates  curl software-properties-common
 
 .. code-block:: bash
     :caption: Install minikube
 
-    sudo curl -Lo /usr/local/bin/minikube \
-      https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
+    sudo curl -Lo /usr/local/bin/minikube https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
     
     sudo chmod +x /usr/local/bin/minikube
 
 .. code-block:: bash
     :caption: Install kubectl
 
-    sudo curl -Lo /usr/local/bin/kubectl \
-      "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
+    sudo curl -Lo /usr/local/bin/kubectl "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
 
     sudo chmod +x /usr/local/bin/kubectl
 
@@ -56,8 +50,8 @@ with the following commands:
     minikube config set driver docker
     minikube config set cpus 16 
     minikube config set memory 16GB
-    minikube addons enable ingress
     minikube start
+    minikube addons enable ingress
 
 Deploy the example
 ------------------
@@ -125,4 +119,36 @@ Test the opensiem connector:
     }
     2024-07-17 11:15:35 301643 Generator  INFO    : Execution time: 0.067013 seconds
 
-open your browser and go to `http://dashboards.opensiem`_ to see the generated data in the opensearch dashboards.
+open your browser and go to `opensearch dashboard <http://dashboards.opensiem>`_ to see the generated data in the opensearch dashboards.
+
+
+Use local container images
+--------------------------
+
+If you want to use local logprep container images, you can build the images with the following commands:
+
+.. code-block:: bash
+    :caption: switch docker context to minikube in bash
+
+    eval $(minikube docker-env)
+
+for powershell:
+
+.. code-block:: powershell
+    :caption: switch docker context to minikube in powershell
+
+    (minikube docker-env).replace("export ", '$env:') | out-string | Invoke-Expression
+
+Then build the logprep image with the following command:
+
+.. code-block:: bash
+    :caption: build this image using the Dockerfile in the root of the repository
+
+    docker buildx build -t local/logprep:latest --build-arg PYTHON_VERSION=3.11 --build-arg LOGPREP_VERSION=dev .
+
+Then install the opensiem example using the local logprep image:
+
+.. code-block:: bash
+    :caption: use the local values file to deploy the opensiem example
+
+    helm install opensiem examples/k8s --values examples/k8s/values-dev.yaml
diff --git a/doc/source/index.rst b/doc/source/index.rst
@@ -1,19 +1,27 @@
-========================================
-Welcome to the Documentation of Logprep!
-========================================
+======================================
+Logprep: The swiss army knife for logs
+======================================
+
+This is the documentation for Logprep. The swiss army knife for logs.
+It provides tools for: 
+
+* **collection** of logs from various sources
+* **normalization** via different processors
+* **shipping** to different datalake targets
+* **generation** of events for load testing
+* **pseudonymization** and **depseudonymization** of fields in log data to comply with GDPR
+
+and it is written in **Python**!
 
 .. toctree::
-   :maxdepth: 2
-   :caption: Content:
+   :maxdepth: 3
 
    installation
    user_manual/index
+   configuration/index
    development/index
    examples/index
 
-==================
-Indices and Tables
-==================
 
 * :ref:`genindex`
 * :ref:`modindex`
diff --git a/doc/source/user_manual/execution.rst b/doc/source/user_manual/execution.rst
@@ -195,3 +195,24 @@ Exit Codes
    :undoc-members:
    :inherited-members:
    :noindex:
+
+
+Healthchecks
+------------
+
+Logprep provides a health endpoint which can be used to check the health of all components.
+The asgi app for the healthcheck endpoint is implemented in :code:`logprep.metrics.exporter.make_patched_asgi_app` and
+will be recreated on every restart of logprep (e.g. after a configuration change) or on creation of the first pipeline process.
+The healthcheck endpoint is available at :code:`/health` if metrics are enabled and can be accessed via HTTP GET.
+
+* On success, the healthcheck endpoint will return a :code:`200` status code and a payload :code:`OK`.
+* On failure, the healthcheck endpoint will return a :code:`503` status code and a payload :code:`FAIL`.
+
+Healthchecks are implemented in components via the :code:`health()` method. You have to ensure to call
+the :code:`super.health()` method in new implemented health checks.
+The health is checked for the first time after the first pipeline process is started and then every 5 seconds.
+You can configure the healthcheck timeout on component level with the parameter :code:`health_timeout`.
+The default value is 1 second.
+
+Healthchecks are used in the provided helm charts as default for readiness probes.
+
diff --git a/doc/source/user_manual/index.rst b/doc/source/user_manual/index.rst
@@ -9,5 +9,4 @@ User Manual
    execution
    verification
    testing_rules
-   configuration/index
    security_best_practices
diff --git a/examples/exampledata/config/pipeline.yml b/examples/exampledata/config/pipeline.yml
@@ -114,13 +114,13 @@ output:
       - 127.0.0.1:9200
     default_index: processed
     error_index: errors
-    message_backlog_size: 10000
+    message_backlog_size: 2500
     timeout: 10000
     flush_timeout: 60
     max_retries: 3
-    parallel_bulk: false
     user: admin
     secret: admin
+    desired_cluster_status: ["green", "yellow"]
   kafka:
     type: confluentkafka_output
     default: false

diff --git a/examples/k8s/values-dev.yaml b/examples/k8s/values-dev.yaml
@@ -0,0 +1,126 @@
+connector:
+  image:
+    # point your docker context to minikube and build the image
+    # `eval $(minikube docker-env)` or `(minikube docker-env).replace("export ", '$env:') | out-string | Invoke-Expression`
+    # build this image using the Dockerfile in the root of the repository
+    # `docker buildx build -t local/logprep:latest --build-arg PYTHON_VERSION=3.11 --build-arg LOGPREP_VERSION=dev .`
+    registry: local
+    repository: logprep
+    tag: latest
+    pullPolicy: IfNotPresent
+  replicas: 1
+  secrets: {}
+  logger:
+    level: INFO
+  input:
+    type: http_input
+    message_backlog_size: 150000
+    collect_meta: True
+    metafield_name: "@metadata"
+    uvicorn_config:
+      host: 0.0.0.0
+      port: 9000
+      workers: 1
+      access_log: true
+      server_header: false
+      date_header: false
+      ws: none
+      interface: asgi3
+      backlog: 16384
+      timeout_keep_alive: 65
+    endpoints:
+      /auth-json: json
+      /json: json
+      /lab/123/(ABC|DEF)/pl.*: plaintext
+      /lab/123/ABC/auditlog: jsonl
+      /health: plaintext
+  output:
+    type: confluentkafka_output
+    topic: consumer
+    error_topic: errors
+    flush_timeout: 300
+    send_timeout: 0
+    kafka_config:
+      bootstrap.servers: opensiem-kafka:9092
+      compression.type: none
+      statistics.interval.ms: "60000"
+      queue.buffering.max.messages: "100000000"
+      queue.buffering.max.kbytes: "1048576"
+      queue.buffering.max.ms: "10000"
+      batch.size: "1000000"
+      request.required.acks: "-1"
+  ingress:
+    enabled: true
+
+## for additional configurations see: `https://github.com/bitnami/charts/blob/main/bitnami/kafka/values.yaml`
+kafka:
+  listeners:
+    client:
+      protocol: PLAINTEXT
+  controller:
+    replicaCount: 3
+  metrics:
+    jmx:
+      enabled: true
+  provisioning:
+    enabled: true
+    replicationFactor: 1
+    numPartitions: 10
+    topics:
+      - name: consumer
+
+logprep:
+  image:
+    registry: local
+    repository: logprep
+    tag: latest
+    pullPolicy: IfNotPresent
+  logger:
+    level: INFO
+  input:
+    type: confluentkafka_input
+    topic: consumer
+    kafka_config:
+      bootstrap.servers: opensiem-kafka:9092
+      group.id: cgroup3
+      enable.auto.commit: "true"
+      auto.commit.interval.ms: "10000"
+      enable.auto.offset.store: "false"
+      queued.min.messages: "100000"
+      queued.max.messages.kbytes: "65536"
+      statistics.interval.ms: "60000"
+    preprocessing:
+      version_info_target_field: Logprep_version_info
+      log_arrival_time_target_field: event.ingested
+      hmac:
+        target: <RAW_MSG>
+        key: "thisisasecureandrandomkey"
+        output_field: Full_event
+  output:
+    type: opensearch_output
+    hosts:
+      - opensiem-opensearch:9200
+    default_index: processed
+    error_index: errors
+    timeout: 10000
+    message_backlog_size: 2500
+    parallel_bulk: true
+    flush_timeout: 60
+    max_retries: 3
+    chunk_size: 500
+    thread_count: 5
+    user: admin
+    secret: admin
+    desired_cluster_status: ["green", "yellow"]
+## for additional configurations see: `https://github.com/bitnami/charts/blob/main/bitnami/opensearch/values.yaml`
+opensearch:
+  dashboards:
+    enabled: true
+  ingest:
+    replicaCount: 1
+  master:
+    replicaCount: 1
+  data:
+    replicaCount: 1
+  coordinating:
+    replicaCount: 1