-
Notifications
You must be signed in to change notification settings - Fork 690
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore(feat): Add kubelet service kill experiment in generic experimen…
…t list (#1542) * chore(feat): Add kubelet service kill experiment in generic experiment list Signed-off-by: Udit Gaurav <[email protected]>
- Loading branch information
1 parent
39df140
commit 57f88a5
Showing
7 changed files
with
426 additions
and
0 deletions.
There are no files selected for viewing
42 changes: 42 additions & 0 deletions
42
chaoslib/litmus/kubelet_service_kill/kubelet-service-kill.j2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
--- | ||
apiVersion: v1 | ||
kind: Pod | ||
metadata: | ||
name: service-kill-{{ run_id }} | ||
labels: | ||
app: service-kill | ||
name: service-kill-{{ run_id }} | ||
{% if chaos_uid is defined and chaos_uid != '' %} | ||
chaosUID: {{ chaos_uid }} | ||
{% endif %} | ||
spec: | ||
nodeName: {{ node_name }} | ||
restartPolicy: Never | ||
containers: | ||
- name: service-kill | ||
image: ubuntu:16.04 | ||
command: ["/bin/bash"] | ||
args: ["-c", "sleep 10 && systemctl stop kubelet && sleep {{ c_duration }} && systemctl start kubelet"] | ||
resources: | ||
requests: | ||
cpu: 10m | ||
memory: 5M | ||
limits: | ||
cpu: 100m | ||
memory: 20M | ||
volumeMounts: | ||
- name: bus | ||
mountPath: /var/run | ||
- name: root | ||
mountPath: /node | ||
securityContext: | ||
privileged: true | ||
tty: true | ||
volumes: | ||
- name: bus | ||
hostPath: | ||
path: /var/run | ||
- name: root | ||
hostPath: | ||
path: / | ||
type: "" |
145 changes: 145 additions & 0 deletions
145
chaoslib/litmus/kubelet_service_kill/kubelet_service_kill.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
--- | ||
- block: | ||
|
||
- block: | ||
|
||
- name: "[Prepare]: Select the application pod name" | ||
shell: > | ||
kubectl get pod -l {{ a_label }} -n {{ a_ns }} | ||
-o=custom-columns=:metadata.name --no-headers | ||
| shuf | head -1 | ||
args: | ||
executable: /bin/bash | ||
register: app_pod_name | ||
|
||
- name: "[Prepare]: Recording the application pod name" | ||
set_fact: | ||
app_pod: "{{ app_pod_name.stdout }}" | ||
|
||
when: "app_pod is not defined or app_pod == ''" | ||
|
||
- name: "[Prepare]: Identify the application node name" | ||
shell: > | ||
kubectl get pod {{ app_pod }} -n {{ a_ns }} | ||
--no-headers -o custom-columns=:spec.nodeName | ||
args: | ||
executable: /bin/bash | ||
register: app_node | ||
|
||
- name: "[Prepare]: Record the application node name" | ||
set_fact: | ||
app_node: "{{ app_node.stdout }}" | ||
|
||
- block: | ||
- name: "[Prepare:] Generate a run id if not passed from the engine/experiment" | ||
shell: echo $(mktemp) | cut -d '.' -f 2 | cut -c -6 | ||
register: rand_string | ||
|
||
- set_fact: | ||
run_id: "{{ rand_string.stdout | lower }}" | ||
when: "run_id is not defined or run_id == ''" | ||
|
||
- name: "[Event]: Generating an Event for ChaosInjection" | ||
include_tasks: /utils/common/generate-kubernetes-chaos-events.yml | ||
vars: | ||
stage: "ChaosInject" | ||
exp_pod_name: "{{ chaos_pod_name }}" | ||
engine_ns: "{{ c_ns }}" | ||
message: "Injecting {{ c_experiment }} chaos on {{ app_node }} node" | ||
when: "c_engine is defined and c_engine != ''" | ||
|
||
- name: "[Prepare]: Patch the run_id to kubelet service kill helper pod template" | ||
template: | ||
src: /chaoslib/litmus/kubelet_service_kill/kubelet-service-kill.j2 | ||
dest: /tmp/kubelet-service-kill.yml | ||
vars: | ||
node_name: "{{ app_node }}" | ||
|
||
# Setting pod_running_status to nil | ||
- set_fact: | ||
pod_running_status: "" | ||
|
||
# Kubelet service kill pod creation is attempted for a total of 3 times, if it is not immediately schedulable due to transient node conditions | ||
# If the kubelet-servie-kill pod is not schedulable across these 3 tries, the experiment is failed with message indicating improper cluster state. | ||
- name: "[Prepare]: Including the util to create the chaos pod" | ||
include_tasks: /utils/common/create_chaos_pod.yml | ||
vars: | ||
pod_ns: "{{ c_ns }}" | ||
c_path: "/tmp/kubelet-service-kill.yml" | ||
pod_label: "name=service-kill-{{ run_id }}" | ||
with_sequence: start=1 end=3 | ||
|
||
# Failing the execution, If kubelet-service-kill pod won't come to running state after three retries. | ||
- fail: | ||
msg: "kubelet_service_kill lib failed, Unable to create as kubelet_service_kill pod couldn't be scheduled on the {{ node_name }} node" | ||
when: "pod_running_status is not defined or pod_running_status != 'Running'" | ||
|
||
- name: "[Status]: Waiting for node to get in NotReady state" | ||
shell: | | ||
kubectl get nodes {{ app_node }} --no-headers | awk '{print$2}' | ||
args: | ||
executable: /bin/bash | ||
register: node_state | ||
until: node_state.stdout == 'NotReady' | ||
delay: 2 | ||
retries: 90 | ||
|
||
- name: "[Wait]: Wait for the chaos duration of {{ c_duration }}s" | ||
wait_for: | ||
timeout: "{{ c_duration }}" | ||
|
||
- name: "[Status]: Checking the node status after chaos" | ||
shell: | | ||
kubectl get nodes {{ app_node }} --no-headers | awk '{print$2}' | ||
args: | ||
executable: /bin/bash | ||
register: node_state | ||
until: node_state.stdout == 'Ready' | ||
delay: 2 | ||
retries: 90 | ||
|
||
- name: "[CleanUP]: Tear down service kill infra" | ||
shell: > | ||
kubectl delete -f /tmp/kubelet-service-kill.yml -n {{ c_ns }} | ||
args: | ||
executable: /bin/bash | ||
register: result | ||
|
||
- name: "[Status]: Confirm that the svc chaos helper pod is teminated successfully" | ||
shell: > | ||
kubectl get pod -l name=service-kill-{{ run_id }} --no-headers -o custom-columns=:status.phase -n {{ a_ns }} | sort | uniq | ||
args: | ||
executable: /bin/bash | ||
register: result_status | ||
until: result_status.stdout =='' | ||
delay: 2 | ||
retries: 90 | ||
|
||
rescue: | ||
|
||
- block: | ||
|
||
- name: "[CleanUP]: Tear down service kill infra" | ||
shell: > | ||
kubectl delete -f /tmp/kubelet-service-kill.yml -n {{ c_ns }} | ||
args: | ||
executable: /bin/bash | ||
register: result | ||
when: "chaos_pod_result.rc == 0" | ||
|
||
- name: "[Status]: Confirm that the svc chaos helper pod is teminated successfully" | ||
shell: > | ||
kubectl get pod -l name=service-kill-{{ run_id }} --no-headers -o custom-columns=:status.phase -n {{ a_ns }} | sort | uniq | ||
args: | ||
executable: /bin/bash | ||
register: result_status | ||
until: result_status.stdout =='' | ||
delay: 2 | ||
retries: 90 | ||
|
||
when: "(pod_running_status is defined and pod_running_status == 'Running') and chaos_pod_result is defined" | ||
|
||
- fail: | ||
msg: "kubelet_service_kill lib failed" | ||
when: true | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
## Experiment Metadata | ||
|
||
<table> | ||
<tr> | ||
<th> Name </th> | ||
<th> Description </th> | ||
<th> Documentation Link </th> | ||
</tr> | ||
<tr> | ||
<td> Kubelet Service Kill </td> | ||
<td> This experiment causes kubelet service kill gracefully for a certain chaos duration. The experiment aims to verify resiliency of applications whose replicas may be evicted or becomes unreachable on account on nodes turning unschedulable (Not Ready) due to kubelet service kill. </td> | ||
<td> <a href=""> Added soon </a> </td> | ||
</tr> | ||
</table> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{% if c_lib is defined and c_lib == 'litmus' %} | ||
c_util: "/chaoslib/litmus/kubelet_service_kill/kubelet_service_kill.yml" | ||
{% endif %} |
156 changes: 156 additions & 0 deletions
156
experiments/generic/kubelet_service_kill/kubelet_service_kill_ansible_logic.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
--- | ||
- hosts: localhost | ||
connection: local | ||
|
||
vars: | ||
c_experiment: "kubelet-service-kill" | ||
c_duration: "{{ lookup('env','TOTAL_CHAOS_DURATION') }}" | ||
ramp_time: "{{ lookup('env','RAMP_TIME') }}" | ||
a_ns: "{{ lookup('env','APP_NAMESPACE') }}" | ||
a_label: "{{ lookup('env','APP_LABEL') }}" | ||
a_kind: "{{ lookup('env','APP_KIND') }}" | ||
lib_image: "{{ lookup('env','LIB_IMAGE') }}" | ||
auxiliary_appinfo: "{{ lookup('env','AUXILIARY_APPINFO') }}" | ||
chaos_uid: "{{ lookup('env','CHAOS_UID') }}" | ||
c_engine: "{{ lookup('env','CHAOSENGINE') }}" | ||
chaos_pod_name: "{{ lookup('env','POD_NAME') }}" | ||
c_ns: "{{ lookup('env','CHAOS_NAMESPACE') }}" | ||
c_lib: "{{ lookup('env','LIB') }}" | ||
|
||
tasks: | ||
|
||
- block: | ||
|
||
## DETERMINE THE CHAOSLIB TASKFILES TO BE USED | ||
- include: kubelet_service_kill_ansible_prerequisites.yml | ||
|
||
- name: "[PreReq]: Including the chaos util for the {{ c_experiment }} experiment" | ||
include_vars: | ||
file: /tmp/chaosutil.yml | ||
|
||
## GENERATE EXPERIMENT RESULT NAME | ||
- name: "[PreReq]: Constructing the chaos result name" | ||
set_fact: | ||
c_result: "{{ c_engine }}-{{ c_experiment }}" | ||
when: "c_engine is defined and c_engine != ''" | ||
|
||
## RECORD START-OF-EXPERIMENT IN LITMUSCHAOS RESULT CR | ||
- name: "[PreReq]: Updating the chaos result of {{ c_experiment }} experiment (SOT)" | ||
include_tasks: /utils/runtime/update_chaos_result_resource.yml | ||
vars: | ||
status: 'SOT' | ||
namespace: "{{ c_ns }}" | ||
|
||
## DISPLAY APP INFORMATION | ||
- name: "[Info]: Display the application information passed via the test job" | ||
debug: | ||
msg: | ||
- "The application info is as follows:" | ||
- "Namespace : {{ a_ns }}" | ||
- "Label : {{ a_label }}" | ||
- "Ramp Time : {{ ramp_time }}" | ||
|
||
## PRE-CHAOS APPLICATION STATUS CHECK | ||
- name: "[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)" | ||
include_tasks: "/utils/common/status_app_pod.yml" | ||
vars: | ||
app_ns: "{{ a_ns }}" | ||
app_label: "{{ a_label }}" | ||
delay: 2 | ||
retries: 90 | ||
|
||
# Auxiliary application health check status | ||
- block: | ||
|
||
- name: Record auxiliary appinfo | ||
set_fact: | ||
auxiliary_appinfo_list: "{{ auxiliary_appinfo.split(',') }}" | ||
|
||
- name: "[Status]: Verify that the Auxiliary Applications are running (pre-chaos)" | ||
include: /utils/common/status_app_pod.yml | ||
vars: | ||
app_ns: "{{ item.split(':')[0] }}" | ||
app_label: "{{ item.split(':')[1] }}" | ||
delay: 2 | ||
retries: 90 | ||
with_items: | ||
- "{{ auxiliary_appinfo_list }}" | ||
|
||
when: auxiliary_appinfo is defined and auxiliary_appinfo != '' | ||
|
||
## RECORD EVENT FOR PRE-CHAOS CHECK | ||
- name: "[Event]: Generating an Event for PreChaosCheck" | ||
include_tasks: /utils/common/generate-kubernetes-chaos-events.yml | ||
vars: | ||
stage: "PreChaosCheck" | ||
exp_pod_name: "{{ chaos_pod_name }}" | ||
engine_ns: "{{ c_ns }}" | ||
message: "AUT is Running successfully" | ||
when: "c_engine is defined and c_engine != ''" | ||
|
||
## READY TO START SERVICE CHAOS | ||
- name: "[Prepare]: Including the kubelet service kill lib" | ||
include_tasks: "{{ c_util }}" | ||
|
||
## POST-CHAOS APPLICATION STATUS CHECK | ||
|
||
- name: "[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)" | ||
include_tasks: "/utils/common/status_app_pod.yml" | ||
vars: | ||
app_ns: "{{ a_ns }}" | ||
app_label: "{{ a_label }}" | ||
delay: 2 | ||
retries: 90 | ||
|
||
# Auxiliary application health check status | ||
- block: | ||
|
||
- name: "[Status]: Verify that the Auxiliary Applications are running (pre-chaos)" | ||
include: /utils/common/status_app_pod.yml | ||
vars: | ||
app_ns: "{{ item.split(':')[0] }}" | ||
app_label: "{{ item.split(':')[1] }}" | ||
delay: 2 | ||
retries: 90 | ||
with_items: | ||
- "{{ auxiliary_appinfo_list }}" | ||
|
||
when: auxiliary_appinfo is defined and auxiliary_appinfo != '' | ||
|
||
## RECORD EVENT FOR POST-CHAOS CHECK | ||
- name: "[Event]: Generating an Event for PostChaosCheck" | ||
include_tasks: /utils/common/generate-kubernetes-chaos-events.yml | ||
vars: | ||
stage: "PostChaosCheck" | ||
exp_pod_name: "{{ chaos_pod_name }}" | ||
engine_ns: "{{ c_ns }}" | ||
message: "AUT is Running successfully" | ||
when: "c_engine is defined and c_engine != ''" | ||
|
||
- set_fact: | ||
flag: "Pass" | ||
|
||
- name: "[Result]: Getting the final result of {{ c_experiment }} experiment" | ||
debug: | ||
msg: "{{ c_experiment }} experiment has been {{ flag }}ed" | ||
|
||
rescue: | ||
|
||
- set_fact: | ||
flag: "Fail" | ||
|
||
- name: "[Result]: Getting the final result of {{ c_experiment }} experiment" | ||
debug: | ||
msg: "{{ c_experiment }} experiment has been {{ flag }}ed" | ||
|
||
always: | ||
|
||
## Getting failure step from experiment-pod | ||
- include_tasks: /utils/runtime/getting_failure_step.yml | ||
|
||
## RECORD END-OF-TEST IN LITMUSCHAOS RESULT CR | ||
- name: "[The End]: Updating the chaos result of {{ c_experiment }} experiment (EOT)" | ||
include_tasks: /utils/runtime/update_chaos_result_resource.yml | ||
vars: | ||
status: 'EOT' | ||
namespace: "{{ c_ns }}" |
4 changes: 4 additions & 0 deletions
4
experiments/generic/kubelet_service_kill/kubelet_service_kill_ansible_prerequisites.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
- name: "[PreReq] Identify the chaos util for {{ c_experiment }} experiment" | ||
template: | ||
src: chaosutil.j2 | ||
dest: /tmp/chaosutil.yml |
Oops, something went wrong.