Skip to content

Commit

Permalink
Fix log collection in GKE Autopilot with fluentd (#609)
Browse files Browse the repository at this point in the history
This change starts the transition towards assuming that containerd is the container engine used by default instead of trying to detect it. We are confident that at least GKE Autopilot don't use the docker engine anymore so we can exclude the autodection for that distribution. We will be adding other distributions to the exclusion list going forward, and eventually we can drop support of docker engine.
  • Loading branch information
dmitryax authored Dec 20, 2022
1 parent 3d40bab commit df9a7cf
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 123 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

### Fixed

- Make sure the daemonset can start in GKE Autopiot (#602)
- Make sure the daemonset can start in GKE Autopiot (#608)
- Make containerd engine default in for fluentd logs and use always use it in GKE Autopiot (#609)

## [0.66.1] - 2022-12-08

Expand Down

This file was deleted.

36 changes: 36 additions & 0 deletions helm-charts/splunk-otel-collector/templates/configmap-fluentd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -282,4 +282,40 @@ data:
</format>
</match>
</label>
{{- /*
Default configuration files for CRI container engine.
Can be overridden for docker engine by prepare-fluentd-config container.
*/}}
source.containers.parse.conf: |-
@type regexp
expression /^(?<time>.+) (?<stream>stdout|stderr)( (?<partial_flag>[FP]))? (?<log>.*)$/
time_format {{ .Values.fluentd.config.containers.criTimeFormat }}
output.filter.conf: |-
# = handle cri/containerd multiline format =
<filter tail.containers.var.log.containers.**>
@type concat
key log
partial_key partial_flag
partial_value P
separator ''
timeout_label @SPLUNK
</filter>
output.transform.conf: |-
# extract pod_uid and container_name for CRIO runtime
# currently CRI does not produce log paths with all the necessary
# metadata to parse out pod, namespace, container_name, container_id.
# this may be resolved in the future by this issue: https://github.com/kubernetes/kubernetes/issues/58638#issuecomment-385126031
<filter tail.containers.var.log.pods.**>
@type jq_transformer
jq '.record | . + (.source | capture("/var/log/pods/(?<pod_uid>[^/]+)/(?<container_name>[^/]+)/(?<container_retry>[0-9]+).log")) | .sourcetype = ("kube:container:" + .container_name)'
</filter>
# rename pod_uid and container_name to otel semantics.
<filter tail.containers.var.log.pods.**>
@type record_transformer
<record>
k8s.pod.uid ${record["pod_uid"]}
k8s.container.name ${record["container_name"]}
</record>
</filter>
{{- end }}
17 changes: 7 additions & 10 deletions helm-charts/splunk-otel-collector/templates/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,22 +70,19 @@ spec:
{{- end }}
{{- if and (eq (include "splunk-otel-collector.logsEnabled" .) "true") (not .Values.isWindows) }}
initContainers:
{{- if (eq .Values.logsEngine "fluentd") }}
{{- if and (eq .Values.logsEngine "fluentd") (not (eq .Values.distribution "gke/autopilot")) }}
- name: prepare-fluentd-config
image: {{ template "splunk-otel-collector.image.fluentd" . }}
imagePullPolicy: {{ .Values.image.fluentd.pullPolicy }}
command: [ "sh", "-c"]
securityContext:
runAsUser: 0
args:
- if [ -z "${LOG_FORMAT_TYPE}" ]; then
if [ "$(ls {{ .Values.fluentd.config.containers.pathDest }}/*/*json.log 2>/dev/null | wc -l)" != "0" ]; then
export LOG_FORMAT_TYPE=json;
else
export LOG_FORMAT_TYPE=cri;
fi;
- cp /fluentd/etc/common/* /fluentd/etc/;
{{- /* Update fluentd configs if docker engine is configured or discovered from existing log files. */}}
if [ "${LOG_FORMAT_TYPE}" == "json" ] || [ "$(ls {{ .Values.fluentd.config.containers.pathDest }}/*/*json.log 2>/dev/null | wc -l)" != "0" ]; then
cp /fluentd/etc/json/* /fluentd/etc/;
fi;
cp /fluentd/etc/common/* /fluentd/etc/${LOG_FORMAT_TYPE}/* /fluentd/etc/
env:
- name: LOG_FORMAT_TYPE
value: "{{ .Values.fluentd.config.containers.logFormatType }}"
Expand All @@ -99,8 +96,6 @@ spec:
mountPath: /fluentd/etc/common
- name: fluentd-config-json
mountPath: /fluentd/etc/json
- name: fluentd-config-cri
mountPath: /fluentd/etc/cri
{{- else }}
{{- if not (eq .Values.distribution "gke/autopilot") }}
- name: migrate-checkpoint
Expand Down Expand Up @@ -412,8 +407,10 @@ spec:
hostPath:
path: {{ .Values.fluentd.config.journalLogPath | quote }}
- name: fluentd-config
{{- if (not (eq .Values.distribution "gke/autopilot")) }}
emptyDir: {}
- name: fluentd-config-common
{{- end }}
configMap:
name: {{ template "splunk-otel-collector.fullname" . }}-fluentd
- name: fluentd-config-cri
Expand Down
51 changes: 0 additions & 51 deletions rendered/manifests/logs-only/configmap-fluentd-cri.yaml

This file was deleted.

31 changes: 31 additions & 0 deletions rendered/manifests/logs-only/configmap-fluentd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -319,3 +319,34 @@ data:
</format>
</match>
</label>
source.containers.parse.conf: |-
@type regexp
expression /^(?<time>.+) (?<stream>stdout|stderr)( (?<partial_flag>[FP]))? (?<log>.*)$/
time_format %Y-%m-%dT%H:%M:%S.%N%:z
output.filter.conf: |-
# = handle cri/containerd multiline format =
<filter tail.containers.var.log.containers.**>
@type concat
key log
partial_key partial_flag
partial_value P
separator ''
timeout_label @SPLUNK
</filter>
output.transform.conf: |-
# extract pod_uid and container_name for CRIO runtime
# currently CRI does not produce log paths with all the necessary
# metadata to parse out pod, namespace, container_name, container_id.
# this may be resolved in the future by this issue: https://github.com/kubernetes/kubernetes/issues/58638#issuecomment-385126031
<filter tail.containers.var.log.pods.**>
@type jq_transformer
jq '.record | . + (.source | capture("/var/log/pods/(?<pod_uid>[^/]+)/(?<container_name>[^/]+)/(?<container_retry>[0-9]+).log")) | .sourcetype = ("kube:container:" + .container_name)'
</filter>
# rename pod_uid and container_name to otel semantics.
<filter tail.containers.var.log.pods.**>
@type record_transformer
<record>
k8s.pod.uid ${record["pod_uid"]}
k8s.container.name ${record["container_name"]}
</record>
</filter>
14 changes: 4 additions & 10 deletions rendered/manifests/logs-only/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ spec:
app: splunk-otel-collector
release: default
annotations:
checksum/config: 0640e5c5b99d34b55fbd125d1c34fab8387b37d8c3c5a1e7c6857867a6ad7dfc
checksum/config: 0dd2001a5452e85b22ed2a419f03895f9b0a0382fb9d035be838299393f6080b
kubectl.kubernetes.io/default-container: otel-collector
spec:
hostNetwork: true
Expand All @@ -50,14 +50,10 @@ spec:
securityContext:
runAsUser: 0
args:
- if [ -z "${LOG_FORMAT_TYPE}" ]; then
if [ "$(ls /var/lib/docker/containers/*/*json.log 2>/dev/null | wc -l)" != "0" ]; then
export LOG_FORMAT_TYPE=json;
else
export LOG_FORMAT_TYPE=cri;
fi;
- cp /fluentd/etc/common/* /fluentd/etc/;
if [ "${LOG_FORMAT_TYPE}" == "json" ] || [ "$(ls /var/lib/docker/containers/*/*json.log 2>/dev/null | wc -l)" != "0" ]; then
cp /fluentd/etc/json/* /fluentd/etc/;
fi;
cp /fluentd/etc/common/* /fluentd/etc/${LOG_FORMAT_TYPE}/* /fluentd/etc/
env:
- name: LOG_FORMAT_TYPE
value: ""
Expand All @@ -71,8 +67,6 @@ spec:
mountPath: /fluentd/etc/common
- name: fluentd-config-json
mountPath: /fluentd/etc/json
- name: fluentd-config-cri
mountPath: /fluentd/etc/cri
containers:
- name: fluentd
image: splunk/fluentd-hec:1.2.8
Expand Down

0 comments on commit df9a7cf

Please sign in to comment.