tests: adding extra scenarios for fault injection (canonical#14489)

* tests: adding missing scenarios for fault injection This change includes some scenarios that are missing to check fault injection. In this case are included fault/reboot for: - install snapd --dangerous - remodel - install kernel component - update boot config * fix shellcheck errors * adding few extra workers to make sure the full run can be completed * remove channel from test which is not used * update the expected values to Done in all the scenarios * update the result status in core20-fault-inject-on-refresh test * Fix tests with new status check * fix shellcheck error * fix failing tests * fix shellcheck * revert change on refresh scenario
pedronis · Sep 27, 2024 · d7f8099 · d7f8099
1 parent eb3195d
commit d7f8099
Show file tree

Hide file tree

Showing 9 changed files with 411 additions and 23 deletions.
diff --git a/spread.yaml b/spread.yaml
@@ -301,15 +301,15 @@ backends:
             - ubuntu-20.04-64:
                   image: ubuntu-2004-64-virt-enabled
                   storage: 20G
-                  workers: 10
+                  workers: 12
             - ubuntu-22.04-64:
                   image: ubuntu-2204-64-virt-enabled
                   storage: 25G
-                  workers: 12
+                  workers: 14
             - ubuntu-24.04-64:
                   image: ubuntu-2404-64-virt-enabled
                   storage: 25G
-                  workers: 12
+                  workers: 14
 
     google-nested-arm:
         type: google

diff --git a/tests/nested/core/core20-fault-inject-on-install/task.yaml b/tests/nested/core/core20-fault-inject-on-install/task.yaml
@@ -0,0 +1,79 @@
+summary: Ensure that snapd snap can be installed when a panic/reboot occurs during the process
+
+details: |
+    The test checks that if a fault is injected during the snapd snap install,
+    then the change continues an is completed with a predictable outcome.
+
+systems: [ubuntu-2*]
+
+environment: 
+    TAG/snapd_panic_auto_connect: after-auto-connect
+    FAULT/snapd_panic_auto_connect: panic
+    TAG/snapd_reboot_auto_connect: after-auto-connect
+    FAULT/snapd_reboot_auto_connect: reboot
+    STATUS: Done
+
+prepare: |
+    # automatically cleaned up in restore
+    echo "Inject a $FAULT on $TAG"
+    cat <<EOF > fault-inject.conf
+    [Service]
+    Environment=SNAPPY_TESTING=1
+    Environment=SNAPD_FAULT_INJECT=$TAG:$FAULT
+    EOF
+
+    echo "Wait for the system to be seeded first"
+    remote.exec "sudo snap wait system seed.loaded"
+
+    remote.push fault-inject.conf
+    remote.exec "sudo mkdir -p /etc/systemd/system/snapd.service.d"
+    remote.exec "sudo cp -v fault-inject.conf /etc/systemd/system/snapd.service.d/"
+    remote.exec "sudo systemctl daemon-reload"
+    remote.exec "sudo systemctl restart snapd.service"
+
+    cp "$(ls /tmp/work-dir/snapd_snap/snapd_*.snap)" snapd.snap
+
+execute: |
+    SNAP=snapd
+    REBOOT=false
+
+    if [ "$FAULT" = reboot ]; then
+        REBOOT=true
+    fi
+
+    # Get the initial snap revision
+    INITIAL_REV="$(remote.exec snap list | grep -E "^$SNAP .*" | awk ' {print $3} ')"
+
+    # Refresh and reboot before the process is completed
+    boot_id="$(tests.nested boot-id)"
+    remote.push "$PWD/${SNAP}.snap"
+    change_id="$(remote.exec "sudo snap install --dangerous --no-wait ${SNAP}.snap")"
+
+    if [ "$REBOOT" = true ]; then
+        remote.wait-for reboot "$boot_id"
+        boot_id="$(tests.nested boot-id)"
+    fi
+
+    echo "And snap refresh is completed"
+    remote.exec "snap watch $change_id" || true
+    # shellcheck disable=SC2016
+    retry --wait 1 -n 60 --env "CHANGE_ID=$change_id" --env "SNAP=$SNAP" sh -c 'remote.exec snap changes | MATCH "${CHANGE_ID} .* (Done|Error) .*"'
+
+    # Check if the fault was injected
+    remote.exec "test -e /var/lib/snapd/faults/$TAG:$FAULT"
+
+    if [ "$FAULT" = panic ]; then
+        echo "Ensure the panic was injected and triggered stamp file is present"
+        remote.exec "sudo journalctl -u snapd | grep -A 3 panic" > output
+        MATCH "$TAG":$FAULT < output
+        MATCH "osutil.injectFault" < output
+    fi
+
+    FINAL_STATUS="$(remote.exec snap changes | grep -E "^$change_id .*" | awk ' {print $2} ')"
+    CURRENT_REV="$(remote.exec snap list | grep -E "^$SNAP .*" | awk ' {print $3} ')"
+
+    echo "Ensure the change has no steps with Error"
+    [ "$FINAL_STATUS" = Done ]
+    remote.exec "snap change $change_id" | NOMATCH "^Error .*"
+    echo "Ensure the initial revision is not the current one"
+    test "$INITIAL_REV" != "$CURRENT_REV"
diff --git a/tests/nested/core/core20-fault-inject-on-refresh/task.yaml b/tests/nested/core/core20-fault-inject-on-refresh/task.yaml
@@ -45,25 +45,6 @@ environment:
     TAG/gadget_reboot_refresh_gadget_assets: refresh-gadget-assets
     FAULT/gadget_reboot_refresh_gadget_assets: reboot
 
-    # TODO: Add the following scenarios in a different test
-    # These should go in a remodel test
-    #TAG/gadget_remodel_boot_assets: remodel-boot-assets
-    #TAG/kernel_remodel_boot_assets: remodel-boot-assets
-
-    # This needs a change in the gadget yaml like in cmdline-option test
-    # TAG/gadget_update_command_line_gadget: update-command-line-gadget
-    # TAG/gadget_update_config_bootloader: update-config-bootloader
-
-    # For this scenario we need to refresh to a kernel with components
-    # See this test nested/manual/kernel-modules-components
-    # TAG/kernel_panic_prepare_kernel_components: prepare-kernel-components
-    # FAULT/kernel_panic_prepare_kernel_components: panic
-
-    # This scenario need to install a new snapd with --dangerous instead of
-    # refresh from the store
-    # TAG/snapd_panic_auto_connect: after-auto-connect
-    # FAULT/snapd_panic_auto_connect: panic
-
 prepare: |
     # automatically cleaned up in restore
     echo "Inject a $FAULT on $TAG"
@@ -143,7 +124,7 @@ execute: |
     echo "And snap refresh is completed"
     remote.exec "snap watch $change_id" || true
     # shellcheck disable=SC2016
-    retry --wait 1 -n 60 --env "CHANGE_ID=$change_id" --env "SNAP=$SNAP" sh -c 'remote.exec snap changes | MATCH "${CHANGE_ID} .* (Done|Error) .*"'
+    retry --wait 1 -n 60 --env "CHANGE_ID=$change_id" sh -c 'remote.exec snap changes | MATCH "${CHANGE_ID} .* (Done|Error) .*"'
 
     # Check if the fault was injected
     remote.exec "test -e /var/lib/snapd/faults/$TAG:$FAULT"
@@ -158,6 +139,7 @@ execute: |
     FINAL_STATUS="$(remote.exec snap changes | grep -E "^$change_id .*" | awk ' {print $2} ')"
     CURRENT_REV="$(remote.exec snap list | grep -E "^$SNAP .*" | awk ' {print $3} ')"
 
+    # Check the final status is the expected one    
     if [ "$FINAL_STATUS" = Error ]; then
         echo "Ensure the change has steps with Error"
         remote.exec "snap change $change_id" | MATCH "^Error .*"

diff --git a/tests/nested/manual/core20-fault-inject-on-install-component/component.yaml b/tests/nested/manual/core20-fault-inject-on-install-component/component.yaml
@@ -0,0 +1,5 @@
+component: pc-kernel+wifi-comp
+type: kernel-modules
+version: 1.0
+summary: wifi simulator
+description: wifi simulator for testing purposes
diff --git a/tests/nested/manual/core20-fault-inject-on-install-component/task.yaml b/tests/nested/manual/core20-fault-inject-on-install-component/task.yaml
@@ -0,0 +1,91 @@
+summary: Ensure that a kernel component can be installed when a panic occurs during the process
+
+details: |
+    The test checks that if a fault is injected during the installation of a kernel component,
+    then the change continues an is completed with a predictable outcome.
+
+systems: [-ubuntu-1*, -ubuntu-20*, -ubuntu-22*]
+
+environment: 
+    TAG/kernel_panic_prepare_kernel_components: prepare-kernel-components
+    FAULT/kernel_panic_prepare_kernel_components: panic
+
+    NESTED_BUILD_SNAPD_FROM_CURRENT: true
+    NESTED_REPACK_KERNEL_SNAP: false
+    NESTED_ENABLE_OVMF: true
+
+prepare: |
+    # Modify kernel and create a component
+    VERSION="$(tests.nested show version)"
+    snap download --channel="$VERSION"/beta pc-kernel
+    unsquashfs -d kernel pc-kernel_*.snap
+    kern_ver=$(find kernel/modules/* -maxdepth 0 -printf "%f\n")
+    comp_ko_dir=wifi-comp/modules/"$kern_ver"/wireless/
+    mkdir -p "$comp_ko_dir"
+    mkdir -p wifi-comp/meta/
+    cp component.yaml wifi-comp/meta/
+    hwsim_path=$(find kernel -name mac80211_hwsim.ko\*)
+    cp "$hwsim_path" "$comp_ko_dir"
+    snap pack wifi-comp
+
+    # Create kernel without the kernel module
+    rm "$hwsim_path"
+    # depmod wants a lib subdir, fake it and remove after invocation
+    mkdir kernel/lib
+    ln -s ../modules kernel/lib/modules
+    depmod -b kernel/ "$kern_ver"
+    rm -rf kernel/lib
+    rm pc-kernel_*.snap
+    # append component meta-information
+    printf 'components:\n  wifi-comp:\n    type: kernel-modules\n' >> kernel/meta/snap.yaml
+    snap pack kernel
+
+    cp pc-kernel_*.snap "$(tests.nested get extra-snaps-path)"
+    tests.nested build-image core
+    tests.nested create-vm core
+
+    echo "Inject a $FAULT on $TAG"
+    cat <<EOF > fault-inject.conf
+    [Service]
+    Environment=SNAPPY_TESTING=1
+    Environment=SNAPD_FAULT_INJECT=$TAG:$FAULT
+    EOF
+
+    echo "Wait for the system to be seeded first"
+    remote.exec "sudo snap wait system seed.loaded"
+
+    remote.push fault-inject.conf
+    remote.exec "sudo mkdir -p /etc/systemd/system/snapd.service.d"
+    remote.exec "sudo cp -v fault-inject.conf /etc/systemd/system/snapd.service.d/"
+    remote.exec "sudo systemctl daemon-reload"
+    remote.exec "sudo systemctl restart snapd.service"
+
+execute: |
+    # install the component
+    comp_file=pc-kernel+wifi-comp_1.0.comp
+    remote.push "$comp_file"
+    change_id="$(remote.exec sudo snap install --dangerous --no-wait "$comp_file")"
+
+    echo "And snap kernel component is installed"
+    remote.exec "snap watch $change_id" || true
+    # shellcheck disable=SC2016
+    retry --wait 1 -n 60 --env "CHANGE_ID=$change_id" sh -c 'remote.exec snap changes | MATCH "${CHANGE_ID} .* (Done|Error) .*"'
+
+    # Check if the fault was injected
+    remote.exec "test -e /var/lib/snapd/faults/$TAG:$FAULT"
+
+    if [ "$FAULT" = panic ]; then
+        echo "Ensure the panic was injected and triggered stamp file is present"
+        remote.exec "sudo journalctl -u snapd | grep -A 3 panic" > output
+        MATCH "$TAG":$FAULT < output
+        MATCH "osutil.injectFault" < output
+    fi
+
+    FINAL_STATUS="$(remote.exec snap changes | grep -E "^$change_id .*" | awk ' {print $2} ')"
+    KERNEL_VERSION=$(remote.exec uname -r)
+
+    echo "Ensure the change has no steps with Error"
+    [ "$FINAL_STATUS" = Done ]
+    remote.exec "snap change $change_id" | NOMATCH "^Error .*"
+    echo "Ensure the component is installed"
+    remote.exec "test -e /var/lib/snapd/kernel/pc-kernel/x1/lib/modules/$KERNEL_VERSION/updates/wifi-comp"
diff --git a/tests/nested/manual/core20-fault-inject-on-remodel/task.yaml b/tests/nested/manual/core20-fault-inject-on-remodel/task.yaml
@@ -0,0 +1,118 @@
+summary: Ensure that a remodel can be completed when a panic occurs during the process
+
+details: |
+    The test checks that if a fault is injected during a remodel,
+    then the change continues an is completed with a predictable outcome.
+
+systems: [ubuntu-2*]
+
+environment:
+    NESTED_CUSTOM_MODEL: $TESTSLIB/assertions/valid-for-testing-pc-{VERSION}.model
+    NESTED_ENABLE_TPM: true
+    NESTED_ENABLE_SECURE_BOOT: true
+    NESTED_BUILD_SNAPD_FROM_CURRENT: true
+
+    TAG/gadget_panic_remodel_boot_assets: remodel-boot-assets
+    FAULT/gadget_panic_remodel_boot_assets: panic
+    STATUS/gadget_panic_remodel_boot_assets: Done
+    TAG/gadget_reboot_remodel_boot_assets: remodel-boot-assets
+    FAULT/gadget_reboot_remodel_boot_assets: reboot
+    STATUS/gadget_reboot_remodel_boot_assets: Error
+    TAG/kernel_panic_remodel_boot_assets: remodel-boot-assets
+    FAULT/kernel_panic_remodel_boot_assets: panic
+    STATUS/kernel_panic_remodel_boot_assets: Done
+    TAG/kernel_reboot_remodel_boot_assets: remodel-boot-assets
+    FAULT/kernel_reboot_remodel_boot_assets: reboot
+    STATUS/kernel_reboot_remodel_boot_assets: Error
+
+prepare: |
+    tests.nested build-image core
+    tests.nested create-vm core
+
+    remote.wait-for device-initialized
+
+    # automatically cleaned up in restore
+    echo "Inject a $FAULT on $TAG"
+    cat <<EOF > fault-inject.conf
+    [Service]
+    Environment=SNAPPY_TESTING=1
+    Environment=SNAPD_FAULT_INJECT=$TAG:$FAULT
+    EOF
+
+    echo "Wait for the system to be seeded first"
+    remote.exec "sudo snap wait system seed.loaded"
+
+    remote.push fault-inject.conf
+    remote.exec "sudo mkdir -p /etc/systemd/system/snapd.service.d"
+    remote.exec "sudo cp -v fault-inject.conf /etc/systemd/system/snapd.service.d/"
+    remote.exec "sudo systemctl daemon-reload"
+    remote.exec "sudo systemctl restart snapd.service"
+
+execute: |
+    VERSION="$(tests.nested show version)"
+    SNAP=
+    REBOOT=false
+    SECOND_REBOOT=false
+
+    if [ "$FAULT" = reboot ]; then
+        REBOOT=true
+    fi
+
+    case "$SPREAD_VARIANT" in
+        gadget_*)
+            SNAP=pc
+            ;;
+        kernel_*)
+            SNAP=pc-kernel
+            if [ "$FAULT" = reboot ]; then
+                SECOND_REBOOT=true
+            else
+                REBOOT=true
+            fi
+            ;;
+        *)
+            echo "scenario no supported: $SPREAD_VARIANT"
+            exit 1
+            ;;
+    esac
+        
+    # Remodel
+    boot_id="$(tests.nested boot-id)"
+
+    remote.push "$TESTSLIB/assertions/valid-for-testing-pc-revno-2-$VERSION.model"
+    change_id="$(remote.exec sudo snap remodel --no-wait "valid-for-testing-pc-revno-2-$VERSION.model")"
+
+    if [ "$REBOOT" = true ]; then
+        remote.wait-for reboot "$boot_id"
+        boot_id="$(tests.nested boot-id)"
+    fi
+
+    if [ "$SECOND_REBOOT" = true ]; then
+        remote.wait-for reboot "$boot_id"
+    fi
+
+    echo "And snap remodel is completed"
+    remote.exec "snap watch $change_id" || true
+    # shellcheck disable=SC2016
+    retry --wait 1 -n 300 --env "CHANGE_ID=$change_id" --env "SNAP=$SNAP" sh -c 'remote.exec snap changes | MATCH "${CHANGE_ID} .* (Done|Error) .*"'
+
+    # Check if the fault was injected
+    remote.exec "test -e /var/lib/snapd/faults/$TAG:$FAULT"
+
+    if [ "$FAULT" = panic ]; then
+        echo "Ensure the panic was injected and triggered stamp file is present"
+        remote.exec "sudo journalctl -u snapd | grep -A 3 panic" > output
+        MATCH "$TAG":$FAULT < output
+        MATCH "osutil.injectFault" < output
+    fi
+
+    FINAL_STATUS="$(remote.exec snap changes | grep -E "^$change_id .*" | awk ' {print $2} ')"
+
+    [ "$FINAL_STATUS" = "$STATUS" ]
+    if [ "$FINAL_STATUS" = Error ]; then
+        echo "Ensure the remodel was not done"
+        not remote.exec "snap list hello-world"
+    else
+        echo "Ensure the remodel was done"
+        remote.exec "snap list hello-world"
+    fi
diff --git a/tests/nested/manual/core20-fault-inject-on-update-config/cloud.conf b/tests/nested/manual/core20-fault-inject-on-update-config/cloud.conf
@@ -0,0 +1,7 @@
+#cloud-config
+datasource_list: [None]
+users:
+  - name: user1
+    sudo: "ALL=(ALL) NOPASSWD:ALL"
+    lock_passwd: false
+    plain_text_passwd: "ubuntu"
diff --git a/tests/nested/manual/core20-fault-inject-on-update-config/defaults.yaml b/tests/nested/manual/core20-fault-inject-on-update-config/defaults.yaml
@@ -0,0 +1,6 @@
+defaults:
+  system:
+    refresh:
+      hold: "@HOLD-TIME@"
+    journal:
+      persistent: true