From 0802e1d94df856d42f4dd8d09bf5d8d2ed5fcd0e Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Mon, 11 Sep 2023 16:09:33 +0000 Subject: [PATCH] [8.10](backport #3370) Fix integration tests by waiting for the watcher to finish during upgrade tests (#3381) * Fix integration tests by waiting for the watcher to finish during upgrade tests (#3370) * Wait for watcher to finish as last test action. * Increase the watcher wait duration. * Sleep 10 minutes between upgrades * Speed up standalone tests. * Don't use fast timeout for 7.17 (cherry picked from commit 94764be90ce4a207f2e380390cfb67558095eb9b) * Fix merge conflict. --------- Co-authored-by: Craig MacKenzie --- testing/integration/upgrade_test.go | 95 ++++++++++++++++++----------- 1 file changed, 59 insertions(+), 36 deletions(-) diff --git a/testing/integration/upgrade_test.go b/testing/integration/upgrade_test.go index 517bb8e6718..a900c1253fb 100644 --- a/testing/integration/upgrade_test.go +++ b/testing/integration/upgrade_test.go @@ -9,7 +9,6 @@ package integration import ( "context" "encoding/json" - "errors" "fmt" "io/fs" "net/http" @@ -45,12 +44,19 @@ import ( agtversion "github.com/elastic/elastic-agent/version" ) -const fastWatcherCfg = ` +// The watcher will need the default 10 minutes to complete for a Fleet managed agent, see https://github.com/elastic/elastic-agent/issues/2977. +const defaultWatcherDuration = 10 * time.Minute + +// Configure standalone agents to complete faster to speed up tests. +const standaloneWatcherDuration = time.Minute + +// Note: this configuration can't apply to Fleet managed upgrades until https://github.com/elastic/elastic-agent/issues/2977 is resolved +var fastWatcherCfg = fmt.Sprintf(` agent.upgrade.watcher: - grace_period: 1m + grace_period: %s error_check.interval: 15s crash_check.interval: 15s -` +`, standaloneWatcherDuration) // notable versions used in tests @@ -94,8 +100,11 @@ func TestFleetManagedUpgrade(t *testing.T) { err = agentFixture.Prepare(ctx) require.NoError(t, err, "error preparing agent fixture") - err = agentFixture.Configure(ctx, []byte(fastWatcherCfg)) - require.NoError(t, err, "error configuring agent fixture") + t.Cleanup(func() { + // The watcher needs to finish before the agent is uninstalled: https://github.com/elastic/elastic-agent/issues/3371 + waitForUpgradeWatcherToComplete(t, agentFixture, parsedVersion, defaultWatcherDuration) + }) + testUpgradeFleetManagedElasticAgent(t, ctx, info, agentFixture, parsedVersion, define.Version()) }) } @@ -162,10 +171,6 @@ func testUpgradeFleetManagedElasticAgent(t *testing.T, ctx context.Context, info t.Log(`Waiting for enrolled Agent status to be "online"...`) require.Eventually(t, tools.WaitForAgentStatus(t, kibClient, "online"), 10*time.Minute, 15*time.Second, "Agent status is not online") - // Upgrade Watcher check disabled until - // https://github.com/elastic/elastic-agent/issues/2977 is resolved. - // checkUpgradeWatcherRan(t, s.agentFixture) - // We remove the `-SNAPSHOT` suffix because, post-upgrade, the version reported // by the Agent will not contain this suffix, even if a `-SNAPSHOT`-suffixed // version was used as the target version for the upgrade. @@ -209,6 +214,11 @@ func TestStandaloneUpgrade(t *testing.T) { err = agentFixture.Configure(ctx, []byte(fastWatcherCfg)) require.NoError(t, err, "error configuring agent fixture") + t.Cleanup(func() { + // The watcher needs to finish before the agent is uninstalled: https://github.com/elastic/elastic-agent/issues/3371 + waitForUpgradeWatcherToComplete(t, agentFixture, parsedVersion, standaloneWatcherDuration) + }) + parsedUpgradeVersion, err := version.ParseVersion(define.Version()) require.NoErrorf(t, err, "define.Version() %q cannot be parsed as agent version", define.Version()) skipVerify := version_8_7_0.Less(*parsedVersion) @@ -223,11 +233,13 @@ func TestStandaloneUpgradeWithGPGFallback(t *testing.T) { Sudo: true, // requires Agent installation }) + t.Skip("Fails upgrading to a version that doesn't exist: https://github.com/elastic/elastic-agent/issues/3397") + minVersion := version_8_10_0_SNAPSHOT - parsedVersion, err := version.ParseVersion(define.Version()) + fromVersion, err := version.ParseVersion(define.Version()) require.NoError(t, err) - if parsedVersion.Less(*minVersion) { + if fromVersion.Less(*minVersion) { t.Skipf("Version %s is lower than min version %s", define.Version(), minVersion) } @@ -235,7 +247,7 @@ func TestStandaloneUpgradeWithGPGFallback(t *testing.T) { defer cancel() // previous - toVersion, err := parsedVersion.GetPreviousMinor() + toVersion, err := fromVersion.GetPreviousMinor() require.NoError(t, err, "failed to get previous minor") agentFixture, err := define.NewFixture( t, @@ -249,6 +261,11 @@ func TestStandaloneUpgradeWithGPGFallback(t *testing.T) { err = agentFixture.Configure(ctx, []byte(fastWatcherCfg)) require.NoError(t, err, "error configuring agent fixture") + t.Cleanup(func() { + // The watcher needs to finish before the agent is uninstalled: https://github.com/elastic/elastic-agent/issues/3371 + waitForUpgradeWatcherToComplete(t, agentFixture, fromVersion, standaloneWatcherDuration) + }) + _, defaultPGP := release.PGP() firstSeven := string(defaultPGP[:7]) customPGP := strings.Replace( @@ -258,7 +275,7 @@ func TestStandaloneUpgradeWithGPGFallback(t *testing.T) { 1, ) - testStandaloneUpgrade(ctx, t, agentFixture, parsedVersion, toVersion, "", false, false, true, customPGP) + testStandaloneUpgrade(ctx, t, agentFixture, fromVersion, toVersion, "", false, false, true, customPGP) } func TestStandaloneUpgradeToSpecificSnapshotBuild(t *testing.T) { @@ -332,9 +349,14 @@ func TestStandaloneUpgradeToSpecificSnapshotBuild(t *testing.T) { t.Logf("Targeting upgrade to version %+v", upgradeInputVersion) parsedFromVersion, err := version.ParseVersion(define.Version()) + + t.Cleanup(func() { + // The watcher needs to finish before the agent is uninstalled: https://github.com/elastic/elastic-agent/issues/3371 + waitForUpgradeWatcherToComplete(t, agentFixture, parsedFromVersion, standaloneWatcherDuration) + }) + require.NoErrorf(t, err, "define.Version() %q cannot be parsed as agent version", define.Version()) testStandaloneUpgrade(ctx, t, agentFixture, parsedFromVersion, upgradeInputVersion, expectedAgentHashAfterUpgrade, false, true, false, "") - } func getUpgradableVersions(ctx context.Context, t *testing.T, upgradeToVersion string) (upgradableVersions []*version.ParsedSemVer) { @@ -472,8 +494,6 @@ func testStandaloneUpgrade( return checkAgentHealthAndVersion(t, ctx, f, parsedUpgradeVersion.CoreVersion(), parsedUpgradeVersion.IsSnapshot(), expectedAgentHashAfterUpgrade) }, 5*time.Minute, 1*time.Second, "agent never upgraded to expected version") - checkUpgradeWatcherRan(t, f, parsedFromVersion) - if expectedAgentHashAfterUpgrade != "" { aVersion, err := c.Version(ctx) assert.NoError(t, err, "error checking version after upgrade") @@ -551,28 +571,17 @@ func checkLegacyAgentHealthAndVersion(t *testing.T, ctx context.Context, f *ates } -// checkUpgradeWatcherRan asserts that the Upgrade Watcher finished running. We use the -// presence of the update marker file as evidence that the Upgrade Watcher is still running -// and the absence of that file as evidence that the Upgrade Watcher is no longer running. -func checkUpgradeWatcherRan(t *testing.T, agentFixture *atesting.Fixture, fromVersion *version.ParsedSemVer) { +// waitForUpgradeWatcherToComplete asserts that the Upgrade Watcher finished running. +func waitForUpgradeWatcherToComplete(t *testing.T, f *atesting.Fixture, fromVersion *version.ParsedSemVer, timeout time.Duration) { t.Helper() if fromVersion.Less(*version_8_9_0_SNAPSHOT) { - t.Logf("Version %q is too old for a quick update marker check, skipping...", fromVersion) - return + t.Logf("Version %q is too old for a quick update marker check", fromVersion) + timeout = defaultWatcherDuration } - t.Log("Waiting for upgrade watcher to finish running...") - - updateMarkerFile := filepath.Join(agentFixture.WorkDir(), "data", ".update-marker") - require.FileExists(t, updateMarkerFile) - - now := time.Now() - require.Eventuallyf(t, func() bool { - _, err := os.Stat(updateMarkerFile) - return errors.Is(err, fs.ErrNotExist) - }, 2*time.Minute, 15*time.Second, "agent never removed update marker") - t.Logf("Upgrade Watcher completed in %s", time.Now().Sub(now)) + t.Logf("Waiting %s for upgrade watcher to finish running", timeout) + time.Sleep(timeout) } func extractCommitHashFromArtifact(t *testing.T, ctx context.Context, artifactVersion *version.ParsedSemVer, agentProject tools.Project) string { @@ -653,6 +662,11 @@ func TestStandaloneUpgradeRetryDownload(t *testing.T) { err = agentFixture.Configure(ctx, []byte(fastWatcherCfg)) require.NoError(t, err, "error configuring agent fixture") + t.Cleanup(func() { + // The watcher needs to finish before the agent is uninstalled: https://github.com/elastic/elastic-agent/issues/3371 + waitForUpgradeWatcherToComplete(t, agentFixture, upgradeFromVersion, standaloneWatcherDuration) + }) + t.Log("Install the built Agent") output, err := tools.InstallStandaloneAgent(agentFixture) t.Log(string(output)) @@ -739,8 +753,6 @@ func TestStandaloneUpgradeRetryDownload(t *testing.T) { t.Log("Waiting for upgrade to finish") wg.Wait() - checkUpgradeWatcherRan(t, agentFixture, upgradeFromVersion) - t.Log("Check Agent version to ensure upgrade is successful") currentVersion, err = getVersion(t, ctx, agentFixture) require.NoError(t, err) @@ -802,6 +814,9 @@ func TestUpgradeBrokenPackageVersion(t *testing.T) { f, err := define.NewFixture(t, define.Version()) require.NoError(t, err) + fromVersion, err := version.ParseVersion(define.Version()) + require.NoError(t, err) + // Prepare the Elastic Agent so the binary is extracted and ready to use. err = f.Prepare(context.Background()) require.NoError(t, err) @@ -809,6 +824,14 @@ func TestUpgradeBrokenPackageVersion(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() + err = f.Configure(ctx, []byte(fastWatcherCfg)) + require.NoError(t, err, "error configuring agent fixture") + + t.Cleanup(func() { + // The watcher needs to finish before the agent is uninstalled: https://github.com/elastic/elastic-agent/issues/3371 + waitForUpgradeWatcherToComplete(t, f, fromVersion, standaloneWatcherDuration) + }) + output, err := tools.InstallStandaloneAgent(f) t.Logf("Agent installation output: %q", string(output)) require.NoError(t, err)