Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Configurable workload node selectors and tolerations (Fixes #48) #49

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ spec:
mode: daemonset
nodeSelector:
kubernetes.io/os: linux
{{- if .Values.agent.nodeSelector }}
{{- toYaml .Values.agent.nodeSelector | nindent 4 }}
{{- end }}
serviceAccount: {{ template "cloudwatch-agent.serviceAccountName" . }}
{{- if .Values.agent.config }}
config: {{ include "cloudwatch-agent.modify-config" (merge (dict "Config" .Values.agent.config) . ) }}
Expand Down Expand Up @@ -111,7 +114,7 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.namespace
{{- with .Values.tolerations }}
{{- with (.Values.agent.tolerations | default .Values.tolerations ) }}
tolerations: {{- toYaml . | nindent 2}}
{{- end }}
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ spec:
image: {{ template "dcgm-exporter.image" . }}
nodeSelector:
kubernetes.io/os: linux
{{- with .Values.dcgmExporter.additionalNodeSelector }}
{{- toYaml . | nindent 4 }}
{{- end }}
{{- with (.Values.dcgmExporter.tolerations | default .Values.tolerations ) }}
tolerations: {{- toYaml . | nindent 2}}
{{- end }}
serviceAccount: {{ template "dcgm-exporter.serviceAccountName" . }}
affinity:
nodeAffinity:
Expand Down Expand Up @@ -71,6 +77,3 @@ spec:
tls_server_config:
cert_file: /etc/amazon-cloudwatch-observability-dcgm-cert/server.crt
key_file: /etc/amazon-cloudwatch-observability-dcgm-cert/server.key
{{- with .Values.tolerations }}
tolerations: {{- toYaml . | nindent 2}}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,10 @@ spec:
serviceAccountName: {{ template "cloudwatch-agent.serviceAccountName" . }}
nodeSelector:
kubernetes.io/os: linux
{{- with .Values.tolerations }}
{{- with .Values.fluentBit.additionalNodeSelector }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with (.Values.fluentBit.tolerations | default .Values.tolerations ) }}
tolerations: {{- toYaml . | nindent 6}}
{{- end }}
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ spec:
- key: {{ .Values.nodeLabelKey }}
operator: In
values: {{ .Values.neuronInstances | toYaml | nindent 20 }}
{{- with .Values.neuronMonitor.additionalNodeAffinityTerms }}
{{- toYaml . | nindent 10 }}
{{- end }
resources:
limits:
cpu: 500m
Expand Down Expand Up @@ -91,6 +94,6 @@ spec:
}
]
}
{{- with .Values.tolerations }}
{{- with (.Values.neuronMonitor.tolerations | default .Values.tolerations ) }}
tolerations: {{- toYaml . | nindent 2}}
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,6 @@ spec:
secretName: {{ template "amazon-cloudwatch-observability.certificateSecretName" . }}
nodeSelector:
kubernetes.io/os: linux
{{- with .Values.tolerations }}
{{- with (.Values.manager.tolerations | default .Values.tolerations ) }}
tolerations: {{- toYaml . | nindent 6}}
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ spec:
serviceAccount: {{ template "cloudwatch-agent.serviceAccountName" . }}
nodeSelector:
kubernetes.io/os: windows
{{- with .Values.agent.additionalNodeSelector }}
{{- toYaml . | nindent 4 }}
{{- end }}
config: {{ .Values.agent.windowsDefaultConfig | toJson | quote }}
resources:
requests:
Expand Down Expand Up @@ -47,7 +50,7 @@ spec:
value: "True"
- name: RUN_AS_HOST_PROCESS_CONTAINER
value: "True"
{{- with .Values.tolerations }}
{{- with (.Values.agent.tolerations | default .Values.tolerations ) }}
tolerations: {{- toYaml . | nindent 2}}
{{- end }}
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ spec:
hostNetwork: true
nodeSelector:
kubernetes.io/os: windows
{{- with .Values.fluentBit.additionalNodeSelector }}
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: fluent-bit
image: {{ template "fluent-bit-windows.image" . }}
Expand Down Expand Up @@ -70,7 +73,7 @@ spec:
terminationGracePeriodSeconds: 10
dnsPolicy: ClusterFirstWithHostNet
serviceAccountName: {{ template "cloudwatch-agent.serviceAccountName" . }}
{{- with .Values.tolerations }}
{{- with (.Values.fluentBit.tolerations | default .Values.tolerations ) }}
tolerations: {{- toYaml . | nindent 6}}
{{- end }}
{{- end }}
{{- end }}
14 changes: 13 additions & 1 deletion charts/amazon-cloudwatch-observability/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ neuronInstances: [ trn1.2xlarge, trn1.32xlarge, trn1n.32xlarge, inf1.xlarge, inf
tolerations:
- operator: Exists

fluentBit:
tolerations: [] # Override the default tolerations
additionalNodeSelector: {} # Additional node selector key-value pairs

containerLogs:
enabled: true
fluentBit:
Expand Down Expand Up @@ -423,6 +427,8 @@ manager:

service:
name:
tolerations: [] # Override the default tolerations
additionalNodeSelector: {} # Additional node selector key-value pairs

## Admission webhooks make sure only requests with correctly formatted rules will get into the Operator.
admissionWebhooks:
Expand Down Expand Up @@ -532,6 +538,8 @@ agent:
}
}
}
tolerations: [] # Override the default tolerations
additionalNodeSelector: {} # Additional node selector key-value pairs

dcgmExporter:
name:
Expand All @@ -554,6 +562,8 @@ dcgmExporter:
kubeletPath: "/var/lib/kubelet/pod-resources"
serviceAccount:
name: # override exporter service account name
tolerations: [] # Override the default tolerations
additionalNodeSelector: {} # Additional node selector key-value pairs

neuronMonitor:
name:
Expand All @@ -574,4 +584,6 @@ neuronMonitor:
capabilities:
add: ["SYS_ADMIN"]
serviceAccount:
name: # override exporter service account name
name: # override exporter service account name
tolerations: [] # Override the default tolerations
additionalNodeAffinityTerms: [] # Additional required node affinity selector terms