Skip to content

Commit

Permalink
tests: adding extra scenarios for fault injection (canonical#14489)
Browse files Browse the repository at this point in the history
* tests: adding missing scenarios for fault injection

This change includes some scenarios that are missing to check fault
injection.

In this case are included fault/reboot for:
- install snapd --dangerous
- remodel
- install kernel component
- update boot config

* fix shellcheck errors

* adding few extra workers to make sure the full run can be completed

* remove channel from test which is not used

* update the expected values to Done in all the scenarios

* update the result status in core20-fault-inject-on-refresh test

* Fix tests with new status check

* fix shellcheck error

* fix failing tests

* fix shellcheck

* revert change on refresh scenario
  • Loading branch information
sergiocazzolato authored Sep 27, 2024
1 parent eb3195d commit d7f8099
Show file tree
Hide file tree
Showing 9 changed files with 411 additions and 23 deletions.
6 changes: 3 additions & 3 deletions spread.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -301,15 +301,15 @@ backends:
- ubuntu-20.04-64:
image: ubuntu-2004-64-virt-enabled
storage: 20G
workers: 10
workers: 12
- ubuntu-22.04-64:
image: ubuntu-2204-64-virt-enabled
storage: 25G
workers: 12
workers: 14
- ubuntu-24.04-64:
image: ubuntu-2404-64-virt-enabled
storage: 25G
workers: 12
workers: 14

google-nested-arm:
type: google
Expand Down
79 changes: 79 additions & 0 deletions tests/nested/core/core20-fault-inject-on-install/task.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
summary: Ensure that snapd snap can be installed when a panic/reboot occurs during the process

details: |
The test checks that if a fault is injected during the snapd snap install,
then the change continues an is completed with a predictable outcome.
systems: [ubuntu-2*]

environment:
TAG/snapd_panic_auto_connect: after-auto-connect
FAULT/snapd_panic_auto_connect: panic
TAG/snapd_reboot_auto_connect: after-auto-connect
FAULT/snapd_reboot_auto_connect: reboot
STATUS: Done

prepare: |
# automatically cleaned up in restore
echo "Inject a $FAULT on $TAG"
cat <<EOF > fault-inject.conf
[Service]
Environment=SNAPPY_TESTING=1
Environment=SNAPD_FAULT_INJECT=$TAG:$FAULT
EOF
echo "Wait for the system to be seeded first"
remote.exec "sudo snap wait system seed.loaded"
remote.push fault-inject.conf
remote.exec "sudo mkdir -p /etc/systemd/system/snapd.service.d"
remote.exec "sudo cp -v fault-inject.conf /etc/systemd/system/snapd.service.d/"
remote.exec "sudo systemctl daemon-reload"
remote.exec "sudo systemctl restart snapd.service"
cp "$(ls /tmp/work-dir/snapd_snap/snapd_*.snap)" snapd.snap
execute: |
SNAP=snapd
REBOOT=false
if [ "$FAULT" = reboot ]; then
REBOOT=true
fi
# Get the initial snap revision
INITIAL_REV="$(remote.exec snap list | grep -E "^$SNAP .*" | awk ' {print $3} ')"
# Refresh and reboot before the process is completed
boot_id="$(tests.nested boot-id)"
remote.push "$PWD/${SNAP}.snap"
change_id="$(remote.exec "sudo snap install --dangerous --no-wait ${SNAP}.snap")"
if [ "$REBOOT" = true ]; then
remote.wait-for reboot "$boot_id"
boot_id="$(tests.nested boot-id)"
fi
echo "And snap refresh is completed"
remote.exec "snap watch $change_id" || true
# shellcheck disable=SC2016
retry --wait 1 -n 60 --env "CHANGE_ID=$change_id" --env "SNAP=$SNAP" sh -c 'remote.exec snap changes | MATCH "${CHANGE_ID} .* (Done|Error) .*"'
# Check if the fault was injected
remote.exec "test -e /var/lib/snapd/faults/$TAG:$FAULT"
if [ "$FAULT" = panic ]; then
echo "Ensure the panic was injected and triggered stamp file is present"
remote.exec "sudo journalctl -u snapd | grep -A 3 panic" > output
MATCH "$TAG":$FAULT < output
MATCH "osutil.injectFault" < output
fi
FINAL_STATUS="$(remote.exec snap changes | grep -E "^$change_id .*" | awk ' {print $2} ')"
CURRENT_REV="$(remote.exec snap list | grep -E "^$SNAP .*" | awk ' {print $3} ')"
echo "Ensure the change has no steps with Error"
[ "$FINAL_STATUS" = Done ]
remote.exec "snap change $change_id" | NOMATCH "^Error .*"
echo "Ensure the initial revision is not the current one"
test "$INITIAL_REV" != "$CURRENT_REV"
22 changes: 2 additions & 20 deletions tests/nested/core/core20-fault-inject-on-refresh/task.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,25 +45,6 @@ environment:
TAG/gadget_reboot_refresh_gadget_assets: refresh-gadget-assets
FAULT/gadget_reboot_refresh_gadget_assets: reboot

# TODO: Add the following scenarios in a different test
# These should go in a remodel test
#TAG/gadget_remodel_boot_assets: remodel-boot-assets
#TAG/kernel_remodel_boot_assets: remodel-boot-assets

# This needs a change in the gadget yaml like in cmdline-option test
# TAG/gadget_update_command_line_gadget: update-command-line-gadget
# TAG/gadget_update_config_bootloader: update-config-bootloader

# For this scenario we need to refresh to a kernel with components
# See this test nested/manual/kernel-modules-components
# TAG/kernel_panic_prepare_kernel_components: prepare-kernel-components
# FAULT/kernel_panic_prepare_kernel_components: panic

# This scenario need to install a new snapd with --dangerous instead of
# refresh from the store
# TAG/snapd_panic_auto_connect: after-auto-connect
# FAULT/snapd_panic_auto_connect: panic

prepare: |
# automatically cleaned up in restore
echo "Inject a $FAULT on $TAG"
Expand Down Expand Up @@ -143,7 +124,7 @@ execute: |
echo "And snap refresh is completed"
remote.exec "snap watch $change_id" || true
# shellcheck disable=SC2016
retry --wait 1 -n 60 --env "CHANGE_ID=$change_id" --env "SNAP=$SNAP" sh -c 'remote.exec snap changes | MATCH "${CHANGE_ID} .* (Done|Error) .*"'
retry --wait 1 -n 60 --env "CHANGE_ID=$change_id" sh -c 'remote.exec snap changes | MATCH "${CHANGE_ID} .* (Done|Error) .*"'
# Check if the fault was injected
remote.exec "test -e /var/lib/snapd/faults/$TAG:$FAULT"
Expand All @@ -158,6 +139,7 @@ execute: |
FINAL_STATUS="$(remote.exec snap changes | grep -E "^$change_id .*" | awk ' {print $2} ')"
CURRENT_REV="$(remote.exec snap list | grep -E "^$SNAP .*" | awk ' {print $3} ')"
# Check the final status is the expected one
if [ "$FINAL_STATUS" = Error ]; then
echo "Ensure the change has steps with Error"
remote.exec "snap change $change_id" | MATCH "^Error .*"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
component: pc-kernel+wifi-comp
type: kernel-modules
version: 1.0
summary: wifi simulator
description: wifi simulator for testing purposes
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
summary: Ensure that a kernel component can be installed when a panic occurs during the process

details: |
The test checks that if a fault is injected during the installation of a kernel component,
then the change continues an is completed with a predictable outcome.
systems: [-ubuntu-1*, -ubuntu-20*, -ubuntu-22*]

environment:
TAG/kernel_panic_prepare_kernel_components: prepare-kernel-components
FAULT/kernel_panic_prepare_kernel_components: panic

NESTED_BUILD_SNAPD_FROM_CURRENT: true
NESTED_REPACK_KERNEL_SNAP: false
NESTED_ENABLE_OVMF: true

prepare: |
# Modify kernel and create a component
VERSION="$(tests.nested show version)"
snap download --channel="$VERSION"/beta pc-kernel
unsquashfs -d kernel pc-kernel_*.snap
kern_ver=$(find kernel/modules/* -maxdepth 0 -printf "%f\n")
comp_ko_dir=wifi-comp/modules/"$kern_ver"/wireless/
mkdir -p "$comp_ko_dir"
mkdir -p wifi-comp/meta/
cp component.yaml wifi-comp/meta/
hwsim_path=$(find kernel -name mac80211_hwsim.ko\*)
cp "$hwsim_path" "$comp_ko_dir"
snap pack wifi-comp
# Create kernel without the kernel module
rm "$hwsim_path"
# depmod wants a lib subdir, fake it and remove after invocation
mkdir kernel/lib
ln -s ../modules kernel/lib/modules
depmod -b kernel/ "$kern_ver"
rm -rf kernel/lib
rm pc-kernel_*.snap
# append component meta-information
printf 'components:\n wifi-comp:\n type: kernel-modules\n' >> kernel/meta/snap.yaml
snap pack kernel
cp pc-kernel_*.snap "$(tests.nested get extra-snaps-path)"
tests.nested build-image core
tests.nested create-vm core
echo "Inject a $FAULT on $TAG"
cat <<EOF > fault-inject.conf
[Service]
Environment=SNAPPY_TESTING=1
Environment=SNAPD_FAULT_INJECT=$TAG:$FAULT
EOF
echo "Wait for the system to be seeded first"
remote.exec "sudo snap wait system seed.loaded"
remote.push fault-inject.conf
remote.exec "sudo mkdir -p /etc/systemd/system/snapd.service.d"
remote.exec "sudo cp -v fault-inject.conf /etc/systemd/system/snapd.service.d/"
remote.exec "sudo systemctl daemon-reload"
remote.exec "sudo systemctl restart snapd.service"
execute: |
# install the component
comp_file=pc-kernel+wifi-comp_1.0.comp
remote.push "$comp_file"
change_id="$(remote.exec sudo snap install --dangerous --no-wait "$comp_file")"
echo "And snap kernel component is installed"
remote.exec "snap watch $change_id" || true
# shellcheck disable=SC2016
retry --wait 1 -n 60 --env "CHANGE_ID=$change_id" sh -c 'remote.exec snap changes | MATCH "${CHANGE_ID} .* (Done|Error) .*"'
# Check if the fault was injected
remote.exec "test -e /var/lib/snapd/faults/$TAG:$FAULT"
if [ "$FAULT" = panic ]; then
echo "Ensure the panic was injected and triggered stamp file is present"
remote.exec "sudo journalctl -u snapd | grep -A 3 panic" > output
MATCH "$TAG":$FAULT < output
MATCH "osutil.injectFault" < output
fi
FINAL_STATUS="$(remote.exec snap changes | grep -E "^$change_id .*" | awk ' {print $2} ')"
KERNEL_VERSION=$(remote.exec uname -r)
echo "Ensure the change has no steps with Error"
[ "$FINAL_STATUS" = Done ]
remote.exec "snap change $change_id" | NOMATCH "^Error .*"
echo "Ensure the component is installed"
remote.exec "test -e /var/lib/snapd/kernel/pc-kernel/x1/lib/modules/$KERNEL_VERSION/updates/wifi-comp"
118 changes: 118 additions & 0 deletions tests/nested/manual/core20-fault-inject-on-remodel/task.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
summary: Ensure that a remodel can be completed when a panic occurs during the process

details: |
The test checks that if a fault is injected during a remodel,
then the change continues an is completed with a predictable outcome.
systems: [ubuntu-2*]

environment:
NESTED_CUSTOM_MODEL: $TESTSLIB/assertions/valid-for-testing-pc-{VERSION}.model
NESTED_ENABLE_TPM: true
NESTED_ENABLE_SECURE_BOOT: true
NESTED_BUILD_SNAPD_FROM_CURRENT: true

TAG/gadget_panic_remodel_boot_assets: remodel-boot-assets
FAULT/gadget_panic_remodel_boot_assets: panic
STATUS/gadget_panic_remodel_boot_assets: Done
TAG/gadget_reboot_remodel_boot_assets: remodel-boot-assets
FAULT/gadget_reboot_remodel_boot_assets: reboot
STATUS/gadget_reboot_remodel_boot_assets: Error
TAG/kernel_panic_remodel_boot_assets: remodel-boot-assets
FAULT/kernel_panic_remodel_boot_assets: panic
STATUS/kernel_panic_remodel_boot_assets: Done
TAG/kernel_reboot_remodel_boot_assets: remodel-boot-assets
FAULT/kernel_reboot_remodel_boot_assets: reboot
STATUS/kernel_reboot_remodel_boot_assets: Error

prepare: |
tests.nested build-image core
tests.nested create-vm core
remote.wait-for device-initialized
# automatically cleaned up in restore
echo "Inject a $FAULT on $TAG"
cat <<EOF > fault-inject.conf
[Service]
Environment=SNAPPY_TESTING=1
Environment=SNAPD_FAULT_INJECT=$TAG:$FAULT
EOF
echo "Wait for the system to be seeded first"
remote.exec "sudo snap wait system seed.loaded"
remote.push fault-inject.conf
remote.exec "sudo mkdir -p /etc/systemd/system/snapd.service.d"
remote.exec "sudo cp -v fault-inject.conf /etc/systemd/system/snapd.service.d/"
remote.exec "sudo systemctl daemon-reload"
remote.exec "sudo systemctl restart snapd.service"
execute: |
VERSION="$(tests.nested show version)"
SNAP=
REBOOT=false
SECOND_REBOOT=false
if [ "$FAULT" = reboot ]; then
REBOOT=true
fi
case "$SPREAD_VARIANT" in
gadget_*)
SNAP=pc
;;
kernel_*)
SNAP=pc-kernel
if [ "$FAULT" = reboot ]; then
SECOND_REBOOT=true
else
REBOOT=true
fi
;;
*)
echo "scenario no supported: $SPREAD_VARIANT"
exit 1
;;
esac
# Remodel
boot_id="$(tests.nested boot-id)"
remote.push "$TESTSLIB/assertions/valid-for-testing-pc-revno-2-$VERSION.model"
change_id="$(remote.exec sudo snap remodel --no-wait "valid-for-testing-pc-revno-2-$VERSION.model")"
if [ "$REBOOT" = true ]; then
remote.wait-for reboot "$boot_id"
boot_id="$(tests.nested boot-id)"
fi
if [ "$SECOND_REBOOT" = true ]; then
remote.wait-for reboot "$boot_id"
fi
echo "And snap remodel is completed"
remote.exec "snap watch $change_id" || true
# shellcheck disable=SC2016
retry --wait 1 -n 300 --env "CHANGE_ID=$change_id" --env "SNAP=$SNAP" sh -c 'remote.exec snap changes | MATCH "${CHANGE_ID} .* (Done|Error) .*"'
# Check if the fault was injected
remote.exec "test -e /var/lib/snapd/faults/$TAG:$FAULT"
if [ "$FAULT" = panic ]; then
echo "Ensure the panic was injected and triggered stamp file is present"
remote.exec "sudo journalctl -u snapd | grep -A 3 panic" > output
MATCH "$TAG":$FAULT < output
MATCH "osutil.injectFault" < output
fi
FINAL_STATUS="$(remote.exec snap changes | grep -E "^$change_id .*" | awk ' {print $2} ')"
[ "$FINAL_STATUS" = "$STATUS" ]
if [ "$FINAL_STATUS" = Error ]; then
echo "Ensure the remodel was not done"
not remote.exec "snap list hello-world"
else
echo "Ensure the remodel was done"
remote.exec "snap list hello-world"
fi
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#cloud-config
datasource_list: [None]
users:
- name: user1
sudo: "ALL=(ALL) NOPASSWD:ALL"
lock_passwd: false
plain_text_passwd: "ubuntu"
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
defaults:
system:
refresh:
hold: "@HOLD-TIME@"
journal:
persistent: true
Loading

0 comments on commit d7f8099

Please sign in to comment.