Skip to content

Commit

Permalink
ci: add workaround for WSL hanging in e2e tests
Browse files Browse the repository at this point in the history
Signed-off-by: Austin Vazquez <[email protected]>
  • Loading branch information
austinvazquez committed Jun 26, 2024
1 parent 9c1caf0 commit d6dde28
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 3 deletions.
5 changes: 5 additions & 0 deletions e2e/vm/vm_darwin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,8 @@ var resetDisks = func(_ *option.Option, installed bool) {
}
gomega.Expect(os.RemoveAll(dataDiskDir)).ShouldNot(gomega.HaveOccurred())
}

var shutdownWSL = func() error {
// no-op on darwin
return nil
}
6 changes: 3 additions & 3 deletions e2e/vm/vm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
package vm

import (
"os/exec"
"runtime"
"time"

Expand All @@ -29,16 +28,17 @@ var resetVM = func(o *option.Option) {
// clean up iptables
//nolint:lll // link to explanation
// https://docs.rancherdesktop.io/troubleshooting-tips/#q-how-do-i-fix-fata0005-subnet-1040024-overlaps-with-other-one-on-this-address-space-when-running-a-container-using-nerdctl-run
gomega.Expect(exec.Command("wsl", "--shutdown").Run()).Should(gomega.BeNil())
gomega.Expect(shutdownWSL()).Should(gomega.BeNil())
}

ginkgo.DeferCleanup(func() {
writeFile(finchConfigFilePath, origFinchCfg)
command.New(o, virtualMachineRootCmd, "stop", "-f").WithoutCheckingExitCode().WithTimeoutInSeconds(20).Run()
time.Sleep(1 * time.Second)
command.New(o, virtualMachineRootCmd, "remove", "-f").WithoutCheckingExitCode().WithTimeoutInSeconds(10).Run()
time.Sleep(1 * time.Second)
if runtime.GOOS == "windows" {
gomega.Expect(exec.Command("wsl", "--shutdown").Run()).Should(gomega.BeNil())
gomega.Expect(shutdownWSL()).Should(gomega.BeNil())
}
time.Sleep(1 * time.Second)
command.New(o, virtualMachineRootCmd, "init").WithoutCheckingExitCode().WithTimeoutInSeconds(160).Run()
Expand Down
41 changes: 41 additions & 0 deletions e2e/vm/vm_windows_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@
package vm

import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"testing"
"time"

"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
Expand Down Expand Up @@ -60,3 +64,40 @@ var resetDisks = func(_ *option.Option, _ bool) {
dataDiskDir := filepath.Join(finchRootDir, ".finch", ".disks")
gomega.Expect(os.RemoveAll(dataDiskDir)).ShouldNot(gomega.HaveOccurred())
}

// shutdownWSL is a wrapper function for "wsl --shutdown".
//
// This is a workaround for https://github.com/microsoft/WSL/issues/8529
//
// If WSL is suspected of hanging for longer than 180 seconds, then
// kill the WSL service and retry the shutdown command.
//
// This function will at maximum run for 300 seconds before returning
// context.DeadlineExceeded error.
var shutdownWSL = func() error {
ctx, cancel := context.WithTimeout(context.Background(), 180*time.Second)
defer cancel()

if err := exec.CommandContext(ctx, "wsl", "--shutdown").Run(); err != nil {
ginkgo.GinkgoLogr.Error(err, "WSL shutdown failed", "time", time.Now().Format(time.RFC3339))

// wsl might be hung, kill the wsl service and try again.
// https://github.com/microsoft/WSL/issues/8529
killCtx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()

if err := exec.CommandContext(killCtx, "taskkill", "/f", "/im", "wslservice.exe").Run(); err != nil {
ginkgo.GinkgoLogr.Error(err, "WSL task kill failed", "time", time.Now().Format(time.RFC3339))
return fmt.Errorf("unable to kill wsl service: %w", err)
}

retryCtx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()

if err := exec.CommandContext(retryCtx, "wsl", "--shutdown").Run(); err != nil {
return fmt.Errorf("unable to shutdown wsl: %w", err)
}
}

return nil
}

0 comments on commit d6dde28

Please sign in to comment.