Skip to content

Commit

Permalink
improve install and enroll by surfacing errors
Browse files Browse the repository at this point in the history
* surface errors that might occur during enroll
* fail install command if agent cannot be restarted
* do not print success message if there was an enroll error. Print an error message and the error instead
* add logs to show the different enroll attempts
* add more context t errors
* refactor internal/pkg/agent/install/perms_unix.go and add more context to errors
  • Loading branch information
AndersonQ committed Aug 24, 2023
1 parent c4c314a commit fefb9cb
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 28 deletions.
2 changes: 1 addition & 1 deletion internal/pkg/agent/cmd/enroll.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ func enroll(streams *cli.IOStreams, cmd *cobra.Command) error {
// Error: failed to fix permissions: chown /Library/Elastic/Agent/data/elastic-agent-c13f91/elastic-agent.app: operation not permitted
// This is because we are fixing permissions twice, once during installation and again during the enrollment step.
// When we are enrolling as part of installation on MacOS, skip the second attempt to fix permissions.
var fixPermissions bool = fromInstall
var fixPermissions = fromInstall
if runtime.GOOS == "darwin" {
fixPermissions = false
}
Expand Down
48 changes: 34 additions & 14 deletions internal/pkg/agent/cmd/enroll_cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ func newEnrollCmd(
)
}

// newEnrollCmdWithStore creates an new enrollment and accept a custom store.
// newEnrollCmdWithStore creates a new enrollment and accept a custom store.
func newEnrollCmdWithStore(
log *logger.Logger,
options *enrollCmdOption,
Expand All @@ -187,10 +187,11 @@ func newEnrollCmdWithStore(
}, nil
}

// Execute tries to enroll the agent into Fleet.
// Execute enrolls the agent into Fleet.
func (c *enrollCmd) Execute(ctx context.Context, streams *cli.IOStreams) error {
var err error
defer c.stopAgent() // ensure its stopped no matter what

span, ctx := apm.StartSpan(ctx, "enroll", "app.internal")
defer func() {
apm.CaptureError(ctx, err).Send()
Expand Down Expand Up @@ -235,7 +236,7 @@ func (c *enrollCmd) Execute(ctx context.Context, streams *cli.IOStreams) error {
// Ensure that the agent does not use a proxy configuration
// when connecting to the local fleet server.
// Note that when running fleet-server the enroll request will be sent to :8220,
// however when the agent is running afterwards requests will be sent to :8221
// however when the agent is running afterward requests will be sent to :8221
c.remoteConfig.Transport.Proxy.Disable = true
}

Expand All @@ -256,7 +257,7 @@ func (c *enrollCmd) Execute(ctx context.Context, streams *cli.IOStreams) error {

err = c.enrollWithBackoff(ctx, persistentConfig)
if err != nil {
return errors.New(err, "fail to enroll")
return fmt.Errorf("fail to enroll: %w", err)
}

if c.options.FixPermissions {
Expand All @@ -267,17 +268,23 @@ func (c *enrollCmd) Execute(ctx context.Context, streams *cli.IOStreams) error {
}

defer func() {
fmt.Fprintln(streams.Out, "Successfully enrolled the Elastic Agent.")
if err != nil {
fmt.Fprintln(streams.Out, "Successfully enrolled the Elastic Agent.")
} else {
fmt.Fprintf(streams.Out, "Something went wrong while enrolling the Elastic Agent: %v\n", err)
}
}()

if c.agentProc == nil {
if c.daemonReload(ctx) != nil {
c.log.Info("Elastic Agent might not be running; unable to trigger restart")
} else {
c.log.Info("Successfully triggered restart on running Elastic Agent.")
if err = c.daemonReloadWithBackoff(ctx); err != nil {
c.log.Errorf("Elastic Agent might not be running; unable to trigger restart: %v", err)
return fmt.Errorf("could not reload agent deamon, unable to trigger restart: %w", err)
}

c.log.Info("Successfully triggered restart on running Elastic Agent.")
return nil
}

c.log.Info("Elastic Agent has been enrolled; start Elastic Agent")
return nil
}
Expand Down Expand Up @@ -478,8 +485,20 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[

c.log.Infof("Starting enrollment to URL: %s", c.client.URI())
err := c.enroll(ctx, persistentConfig)
if err == nil {
return nil
}

const deadline = 10 * time.Minute
const frequency = 60 * time.Second

c.log.Infof("1st enrollment attempt failed, retrying for %s each %s enrolling to URL: %s",
deadline,
frequency,
c.client.URI())
signal := make(chan struct{})
backExp := backoff.NewExpBackoff(signal, 60*time.Second, 10*time.Minute)
defer close(signal)
backExp := backoff.NewExpBackoff(signal, frequency, deadline)

for {
retry := false
Expand All @@ -498,7 +517,6 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[
err = c.enroll(ctx, persistentConfig)
}

close(signal)
return err
}

Expand Down Expand Up @@ -547,8 +565,10 @@ func (c *enrollCmd) enroll(ctx context.Context, persistentConfig map[string]inte
c.options.FleetServer.ElasticsearchInsecure,
)
if err != nil {
return err
return fmt.Errorf(
"failed creating fleet-server bootstrap config: %w", err)
}

// no longer need bootstrap at this point
serverConfig.Server.Bootstrap = false
fleetConfig.Server = serverConfig.Server
Expand All @@ -568,11 +588,11 @@ func (c *enrollCmd) enroll(ctx context.Context, persistentConfig map[string]inte

reader, err := yamlToReader(configToStore)
if err != nil {
return err
return fmt.Errorf("yamlToReader failed: %w", err)
}

if err := safelyStoreAgentInfo(c.configStore, reader); err != nil {
return err
return fmt.Errorf("failed to store agent config: %w", err)
}

// clear action store
Expand Down
4 changes: 3 additions & 1 deletion internal/pkg/agent/cmd/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ func installCmd(streams *cli.IOStreams, cmd *cobra.Command) error {

nonInteractive, _ := cmd.Flags().GetBool("non-interactive")
if nonInteractive {
fmt.Fprintf(streams.Out, "Installing in non-interactive mode.")
fmt.Fprintf(streams.Out, "Installing in non-interactive mode.\n")
}

if status == install.PackageInstall {
Expand Down Expand Up @@ -205,6 +205,8 @@ func installCmd(streams *cli.IOStreams, cmd *cobra.Command) error {
}
}()
}

fmt.Fprint(streams.Out, "Elastic Agent successfully installed, starting enrollment.\n")
}

if enroll {
Expand Down
32 changes: 20 additions & 12 deletions internal/pkg/agent/install/perms_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package install

import (
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
Expand All @@ -18,19 +19,26 @@ func fixPermissions(topPath string) error {
return recursiveRootPermissions(topPath)
}

func recursiveRootPermissions(path string) error {
return filepath.Walk(path, func(name string, info fs.FileInfo, err error) error {
if err == nil {
// all files should be owned by root:root
err = os.Chown(name, 0, 0)
if err != nil {
return err
}
// remove any world permissions from the file
err = os.Chmod(name, info.Mode().Perm()&0770)
} else if errors.Is(err, fs.ErrNotExist) {
func recursiveRootPermissions(root string) error {
return filepath.Walk(root, func(path string, info fs.FileInfo, err error) error {
if errors.Is(err, fs.ErrNotExist) {
return nil
}
return err
if err != nil {
return fmt.Errorf("walk on %q failed: %v", path, err)

Check failure on line 28 in internal/pkg/agent/install/perms_unix.go

View workflow job for this annotation

GitHub Actions / lint (ubuntu-latest)

non-wrapping format verb for fmt.Errorf. Use `%w` to format errors (errorlint)
}

// all files should be owned by root:root
err = os.Chown(path, 0, 0)
if err != nil {
return fmt.Errorf("could not fix ownership of %q: %w", path, err)
}
// remove any world permissions from the file
err = os.Chmod(path, info.Mode().Perm()&0770)
if err != nil {
return fmt.Errorf("could not fix permissions of %q: %w", path, err)
}

return nil
})
}

0 comments on commit fefb9cb

Please sign in to comment.