Skip to content

Commit

Permalink
[Integration Test Framework] fix createTempDir and flaky tests (#5409)
Browse files Browse the repository at this point in the history
createTempDir register a test cleanup function to remove the folder it
created, however, on Windows, this folder sometimes fails to be
removed because there are still open file handlers for the files
within the folder.

We fix this problem calling install.RemovePath that will retry removing
the folder for about 2s. This is a very similar approach to
what Go's t.TempDir does.

Fix the flakiness from TestUpgradeHandler* tests by re-working the
mockUpgradeManager, now it accepts a function for its Upgrade method
and their implementation is goroutine safe
  • Loading branch information
belimawr authored Sep 5, 2024
1 parent 65e7913 commit 1242e71
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ package handlers

import (
"context"
"errors"
"sync/atomic"
"testing"
"time"

"github.com/stretchr/testify/require"

Expand All @@ -25,8 +28,15 @@ import (
)

type mockUpgradeManager struct {
msgChan chan string
completedChan chan struct{}
UpgradeFn func(
ctx context.Context,
version string,
sourceURI string,
action *fleetapi.ActionUpgrade,
details *details.Details,
skipVerifyOverride bool,
skipDefaultPgp bool,
pgpBytes ...string) (reexec.ShutdownCallbackFn, error)
}

func (u *mockUpgradeManager) Upgradeable() bool {
Expand All @@ -37,15 +47,25 @@ func (u *mockUpgradeManager) Reload(rawConfig *config.Config) error {
return nil
}

func (u *mockUpgradeManager) Upgrade(ctx context.Context, version string, sourceURI string, action *fleetapi.ActionUpgrade, details *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) {
select {
case <-u.completedChan:
u.msgChan <- "completed " + version
return nil, nil
case <-ctx.Done():
u.msgChan <- "canceled " + version
return nil, ctx.Err()
}
func (u *mockUpgradeManager) Upgrade(
ctx context.Context,
version string,
sourceURI string,
action *fleetapi.ActionUpgrade,
details *details.Details,
skipVerifyOverride bool,
skipDefaultPgp bool,
pgpBytes ...string) (reexec.ShutdownCallbackFn, error) {

return u.UpgradeFn(
ctx,
version,
sourceURI,
action,
details,
skipVerifyOverride,
skipDefaultPgp,
pgpBytes...)
}

func (u *mockUpgradeManager) Ack(ctx context.Context, acker acker.Acker) error {
Expand All @@ -65,8 +85,7 @@ func TestUpgradeHandler(t *testing.T) {
log, _ := logger.New("", false)

agentInfo := &info.AgentInfo{}
msgChan := make(chan string)
completedChan := make(chan struct{})
upgradeCalledChan := make(chan struct{})

// Create and start the coordinator
c := coordinator.New(
Expand All @@ -76,7 +95,21 @@ func TestUpgradeHandler(t *testing.T) {
agentInfo,
component.RuntimeSpecs{},
nil,
&mockUpgradeManager{msgChan: msgChan, completedChan: completedChan},
&mockUpgradeManager{
UpgradeFn: func(
ctx context.Context,
version string,
sourceURI string,
action *fleetapi.ActionUpgrade,
details *details.Details,
skipVerifyOverride bool,
skipDefaultPgp bool,
pgpBytes ...string) (reexec.ShutdownCallbackFn, error) {

upgradeCalledChan <- struct{}{}
return nil, nil
},
},
nil, nil, nil, nil, nil, false)
//nolint:errcheck // We don't need the termination state of the Coordinator
go c.Run(ctx)
Expand All @@ -86,11 +119,14 @@ func TestUpgradeHandler(t *testing.T) {
Version: "8.3.0", SourceURI: "http://localhost"}}
ack := noopacker.New()
err := u.Handle(ctx, &a, ack)
// indicate that upgrade is completed
close(completedChan)
require.NoError(t, err)
msg := <-msgChan
require.Equal(t, "completed 8.3.0", msg)

// Make sure this test does not dead lock or wait for too long
select {
case <-time.Tick(50 * time.Millisecond):
t.Fatal("mockUpgradeManager.Upgrade was not called")
case <-upgradeCalledChan:
}
}

func TestUpgradeHandlerSameVersion(t *testing.T) {
Expand All @@ -102,18 +138,37 @@ func TestUpgradeHandlerSameVersion(t *testing.T) {
log, _ := logger.New("", false)

agentInfo := &info.AgentInfo{}
msgChan := make(chan string)
completedChan := make(chan struct{})
upgradeCalledChan := make(chan struct{})

// Create and start the Coordinator
upgradeCalled := atomic.Bool{}
c := coordinator.New(
log,
configuration.DefaultConfiguration(),
logger.DefaultLogLevel,
agentInfo,
component.RuntimeSpecs{},
nil,
&mockUpgradeManager{msgChan: msgChan, completedChan: completedChan},
&mockUpgradeManager{
UpgradeFn: func(
ctx context.Context,
version string,
sourceURI string,
action *fleetapi.ActionUpgrade,
details *details.Details,
skipVerifyOverride bool,
skipDefaultPgp bool,
pgpBytes ...string) (reexec.ShutdownCallbackFn, error) {

if upgradeCalled.CompareAndSwap(false, true) {
upgradeCalledChan <- struct{}{}
return nil, nil
}
err := errors.New("mockUpgradeManager.Upgrade called more than once")
t.Error(err.Error())
return nil, err
},
},
nil, nil, nil, nil, nil, false)
//nolint:errcheck // We don't need the termination state of the Coordinator
go c.Run(ctx)
Expand All @@ -126,10 +181,13 @@ func TestUpgradeHandlerSameVersion(t *testing.T) {
err2 := u.Handle(ctx, &a, ack)
require.NoError(t, err1)
require.NoError(t, err2)
// indicate that upgrade is completed
close(completedChan)
msg := <-msgChan
require.Equal(t, "completed 8.3.0", msg)

// Make sure this test does not dead lock or wait for too long
select {
case <-time.Tick(50 * time.Millisecond):
t.Fatal("mockUpgradeManager.Upgrade was not called")
case <-upgradeCalledChan:
}
}

func TestUpgradeHandlerNewVersion(t *testing.T) {
Expand All @@ -139,10 +197,9 @@ func TestUpgradeHandlerNewVersion(t *testing.T) {
defer cancel()

log, _ := logger.New("", false)
upgradeCalledChan := make(chan string)

agentInfo := &info.AgentInfo{}
msgChan := make(chan string)
completedChan := make(chan struct{})

// Create and start the Coordinator
c := coordinator.New(
Expand All @@ -152,7 +209,27 @@ func TestUpgradeHandlerNewVersion(t *testing.T) {
agentInfo,
component.RuntimeSpecs{},
nil,
&mockUpgradeManager{msgChan: msgChan, completedChan: completedChan},
&mockUpgradeManager{
UpgradeFn: func(
ctx context.Context,
version string,
sourceURI string,
action *fleetapi.ActionUpgrade,
details *details.Details,
skipVerifyOverride bool,
skipDefaultPgp bool,
pgpBytes ...string) (reexec.ShutdownCallbackFn, error) {

defer func() {
upgradeCalledChan <- version
}()
if version == "8.2.0" {
return nil, errors.New("upgrade to 8.2.0 will always fail")
}

return nil, nil
},
},
nil, nil, nil, nil, nil, false)
//nolint:errcheck // We don't need the termination state of the Coordinator
go c.Run(ctx)
Expand All @@ -163,14 +240,25 @@ func TestUpgradeHandlerNewVersion(t *testing.T) {
a2 := fleetapi.ActionUpgrade{Data: fleetapi.ActionUpgradeData{
Version: "8.5.0", SourceURI: "http://localhost"}}
ack := noopacker.New()

checkMsg := func(c <-chan string, expected, errMsg string) {
t.Helper()
// Make sure this test does not dead lock or wait for too long
// For some reason < 1s sometimes makes the test fail.
select {
case <-time.Tick(1300 * time.Millisecond):
t.Fatal("timed out waiting for Upgrade to return")
case msg := <-c:
require.Equal(t, expected, msg, errMsg)
}
}

// Send both upgrade actions, a1 will error before a2 succeeds
err1 := u.Handle(ctx, &a1, ack)
require.NoError(t, err1)
checkMsg(upgradeCalledChan, "8.2.0", "first call must be with version 8.2.0")

err2 := u.Handle(ctx, &a2, ack)
require.NoError(t, err2)
msg1 := <-msgChan
require.Equal(t, "canceled 8.2.0", msg1)
// indicate that upgrade is completed
close(completedChan)
msg2 := <-msgChan
require.Equal(t, "completed 8.5.0", msg2)
checkMsg(upgradeCalledChan, "8.5.0", "second call to Upgrade must be with version 8.5.0")
}
3 changes: 2 additions & 1 deletion pkg/testing/fixture.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (

"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details"
"github.com/elastic/elastic-agent/internal/pkg/agent/install"
"github.com/elastic/elastic-agent/pkg/component"
"github.com/elastic/elastic-agent/pkg/control"
"github.com/elastic/elastic-agent/pkg/control/v2/client"
Expand Down Expand Up @@ -1209,7 +1210,7 @@ func createTempDir(t *testing.T) string {

cleanup := func() {
if !t.Failed() {
if err := os.RemoveAll(tempDir); err != nil {
if err := install.RemovePath(tempDir); err != nil {
t.Errorf("could not remove temp dir '%s': %s", tempDir, err)
}
} else {
Expand Down
1 change: 0 additions & 1 deletion testing/integration/event_logging_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ func TestEventLogFile(t *testing.T) {
Local: true,
Sudo: false,
})
t.Skip("Flaky test: https://github.com/elastic/elastic-agent/issues/5397")
ctx, cancel := testcontext.WithDeadline(
t,
context.Background(),
Expand Down

0 comments on commit 1242e71

Please sign in to comment.