Skip to content

Commit

Permalink
chore: add limits and requests to pods
Browse files Browse the repository at this point in the history
Signed-off-by: SdgJlbl <[email protected]>
  • Loading branch information
SdgJlbl committed Apr 4, 2024
1 parent f6798d4 commit 7ed22ae
Show file tree
Hide file tree
Showing 7 changed files with 88 additions and 1 deletion.
2 changes: 2 additions & 0 deletions backend/builder/image_builder/image_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from substrapp.compute_tasks.volumes import get_worker_subtuple_pvc_name
from substrapp.docker_registry import USER_IMAGE_REPOSITORY
from substrapp.kubernetes_utils import delete_pod
from substrapp.kubernetes_utils import get_resources_requirements
from substrapp.kubernetes_utils import get_security_context
from substrapp.lock_local import lock_resource
from substrapp.utils import timeit
Expand Down Expand Up @@ -291,6 +292,7 @@ def _build_container(dockerfile_mount_path: str, image_tag: str) -> kubernetes.c
args=args,
volume_mounts=volume_mounts,
security_context=container_security_context,
resources=get_resources_requirements(cpu_request="1000m", memory_request="2Gi", memory_limit="128Gi"),
)


Expand Down
2 changes: 2 additions & 0 deletions backend/substrapp/compute_tasks/compute_pod.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from substrapp.kubernetes_utils import delete_pod
from substrapp.kubernetes_utils import get_pod_security_context
from substrapp.kubernetes_utils import get_resources_requirements
from substrapp.kubernetes_utils import get_security_context

NAMESPACE = settings.NAMESPACE
Expand Down Expand Up @@ -112,6 +113,7 @@ def create_pod(
args=None,
volume_mounts=volume_mounts + gpu_volume_mounts,
security_context=get_security_context(),
resources=get_resources_requirements(cpu_request="1000m", memory_request="1Gi", memory_limit="256Gi"),
env=[kubernetes.client.V1EnvVar(name=env_name, value=env_value) for env_name, env_value in environment.items()],
**container_optional_kwargs,
)
Expand Down
8 changes: 8 additions & 0 deletions backend/substrapp/kubernetes_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@ def get_security_context(root: bool = False, capabilities: list[str] = None) ->
return security_context


def get_resources_requirements(
*, cpu_request: str = "1000m", memory_request: str = "100M", memory_limit: str = "1G"
) -> kubernetes.client.V1ResourceRequirements:
return kubernetes.client.V1ResourceRequirements(
requests={"cpu": cpu_request, "memory": memory_request}, limits={"memory": memory_limit}
)


def pod_exists_by_label_selector(k8s_client: kubernetes.client.CoreV1Api, label_selector: str) -> bool:
"""Return True if the pod exists, else False.
Expand Down
6 changes: 6 additions & 0 deletions charts/substra-backend/templates/deployment-api-events.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ spec:
- name: api-event-app
image: {{ include "substra-backend.images.name" (dict "img" .Values.api.events.image "defaultTag" $.Chart.AppVersion) }}
imagePullPolicy: {{ .Values.api.events.image.pullPolicy }}
resources:
requests:
memory: "200Mi"
cpu: "500m"
limits:
memory: "400Mi"
command: ["/bin/bash"]
{{- if eq .Values.settings "prod" }}
args: ["-c", "python manage.py consume"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,21 @@ spec:
initContainers:
- name: wait-registry
image: jwilder/dockerize:0.6.1
resources:
requests:
memory: "200Mi"
cpu: "500m"
limits:
memory: "400Mi"
command: ['dockerize', '-wait', 'tcp://{{ $.Release.Name }}-docker-registry:5000']
- name: kaniko
image: {{ include "common.images.name" $.Values.kaniko.image }}
resources:
requests:
memory: "2Gi"
cpu: "1000m"
limits:
memory: "4Gi"
args:
- "--context=/docker-context"
{{- if .dstImage }}
Expand All @@ -48,6 +60,13 @@ spec:
containers:
- image: gcr.io/google-containers/pause:3.2
name: pause
resources:
requests:
memory: "64Mi"
cpu: "250m"
limits:
memory: "128Mi"
cpu: "500m"
volumes:
- name: kaniko-dir
emptyDir: {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ spec:
- name: worker-event-app
image: {{ include "substra-backend.images.name" (dict "img" .Values.worker.events.image "defaultTag" $.Chart.AppVersion) }}
imagePullPolicy: {{ .Values.worker.events.image.pullPolicy }}
resources:
requests:
memory: "200Mi"
cpu: "500m"
limits:
memory: "400Mi"
command: ["/bin/bash"]
{{- if eq .Values.settings "prod" }}
args: ["-c", "python manage.py consume"]
Expand Down
46 changes: 45 additions & 1 deletion charts/substra-backend/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,15 @@ server:
## cpu: 100m
## memory: 128Mi
##
resources: {}
resources: {
requests: {
cpu: "1000m",
memory: "4Gi"
},
limits: {
memory: "32Gi"
}
}

persistence:
## @param server.persistence.storageClass Specify the _StorageClass_ used to provision the volume. Or the default _StorageClass_ will be used. Set it to `-` to disable dynamic provisioning
Expand Down Expand Up @@ -828,6 +836,15 @@ postgresql:
capabilities:
drop:
- ALL
resources: {
requests: {
cpu: "1000m",
memory: "200Mi"
},
limits: {
memory: "64Gi"
}
}

## @skip redis
##
Expand All @@ -849,6 +866,15 @@ redis:
appendonly yes
# Disable RDB persistence since AOF persistence is enabled
save ""
resources: {
requests: {
cpu: "1000m",
memory: "512Mi"
},
limits: {
memory: "1024Mi"
}
}

## @skip docker-registry
##
Expand All @@ -861,6 +887,15 @@ docker-registry:
deleteEnabled: true
service:
type: NodePort
resources: {
requests: {
cpu: "1000m",
memory: "16Gi"
},
limits: {
memory: "64Gi"
}
}

## @skip minio
##
Expand All @@ -881,6 +916,15 @@ minio:
capabilities:
drop:
- ALL
resources: {
requests: {
cpu: "1000m",
memory: "16Gi"
},
limits: {
memory: "64Gi"
}
}

## @skip localstack
##
Expand Down

0 comments on commit 7ed22ae

Please sign in to comment.