From c8b52a03eb1b6bb4285380c7b885ee660cb10349 Mon Sep 17 00:00:00 2001 From: Jon Johnson Date: Thu, 31 Oct 2024 11:23:06 -0700 Subject: [PATCH] Drop errgroup.WithContext and add withCause (#1380) We are sometimes unable to expand an APK into our cache due to an opaque "context canceled" error. This change attempts to address that in two ways. First, we drop our use of errgroup.WithContext when installing packages. If one package fails to install, we'll let the rest run to completion so that we make more forward progress towards populating the cache (if there is one). Even though the overall operation will still fail, in cases where that failure was transient, we avoid throwing away in-flight work towards the other packages. Second, we'll try to extract a cause for the context cancellation and annotate the errgroup's error with it so that we know why the overall operation was canceled. At least one of these changes should help determine what's going on. Signed-off-by: Jon Johnson --- pkg/apk/apk/implementation.go | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/pkg/apk/apk/implementation.go b/pkg/apk/apk/implementation.go index ba34e8ad..5ea535f2 100644 --- a/pkg/apk/apk/implementation.go +++ b/pkg/apk/apk/implementation.go @@ -518,7 +518,7 @@ func (a *APK) CalculateWorld(ctx context.Context, allpkgs []*RepositoryPackage) // TODO: Consider making this configurable option. jobs := runtime.GOMAXPROCS(0) - g, gctx := errgroup.WithContext(ctx) + var g errgroup.Group g.SetLimit(jobs + 1) resolved := make([]*APKResolved, len(allpkgs)) @@ -535,11 +535,11 @@ func (a *APK) CalculateWorld(ctx context.Context, allpkgs []*RepositoryPackage) i, pkg := i, pkg g.Go(func() error { - r, err := a.FetchPackage(gctx, pkg) + r, err := a.FetchPackage(ctx, pkg) if err != nil { return fmt.Errorf("fetching %s: %w", pkg.Name, err) } - res, err := ResolveApk(gctx, r) + res, err := ResolveApk(ctx, r) if err != nil { return fmt.Errorf("resolving %s: %w", pkg.Name, err) } @@ -554,12 +554,22 @@ func (a *APK) CalculateWorld(ctx context.Context, allpkgs []*RepositoryPackage) } if err := g.Wait(); err != nil { - return nil, fmt.Errorf("installing packages: %w", err) + return nil, fmt.Errorf("calculating world: %w", withCause(ctx, err)) } return resolved, nil } +// Sometimes we get an opaque error about context cancellation, and it's unclear what caused it. +// If we get something useful from ctx via context.Cause, we'll annotate err with it. +func withCause(ctx context.Context, err error) error { + if cause := context.Cause(ctx); cause != nil { + return fmt.Errorf("%w: %w", err, cause) + } + + return err +} + func (a *APK) ResolveAndCalculateWorld(ctx context.Context) ([]*APKResolved, error) { log := clog.FromContext(ctx) log.Debug("resolving and calculating 'world' (packages to install)") @@ -625,7 +635,7 @@ func (a *APK) InstallPackages(ctx context.Context, sourceDateEpoch *time.Time, a // TODO: Consider making this configurable option. jobs := runtime.GOMAXPROCS(0) - g, gctx := errgroup.WithContext(ctx) + var g errgroup.Group g.SetLimit(jobs + 1) expanded := make([]*expandapk.APKExpanded, len(allpkgs)) @@ -648,8 +658,8 @@ func (a *APK) InstallPackages(ctx context.Context, sourceDateEpoch *time.Time, a g.Go(func() error { for i, ch := range done { select { - case <-gctx.Done(): - return gctx.Err() + case <-ctx.Done(): + return ctx.Err() case <-ch: exp := expanded[i] pkg := allpkgs[i] @@ -670,7 +680,7 @@ func (a *APK) InstallPackages(ctx context.Context, sourceDateEpoch *time.Time, a } infos[i] = pkgInfo - installedFiles, err := a.installPackage(gctx, pkgInfo, exp, sourceDateEpoch) + installedFiles, err := a.installPackage(ctx, pkgInfo, exp, sourceDateEpoch) if err != nil { return fmt.Errorf("installing %s: %w", pkg, err) } @@ -688,7 +698,7 @@ func (a *APK) InstallPackages(ctx context.Context, sourceDateEpoch *time.Time, a i, pkg := i, pkg g.Go(func() error { - exp, err := a.expandPackage(gctx, pkg) + exp, err := a.expandPackage(ctx, pkg) if err != nil { return fmt.Errorf("expanding %s: %w", pkg, err) } @@ -701,7 +711,7 @@ func (a *APK) InstallPackages(ctx context.Context, sourceDateEpoch *time.Time, a } if err := g.Wait(); err != nil { - return fmt.Errorf("installing packages: %w", err) + return fmt.Errorf("installing packages: %w", withCause(ctx, err)) } // update the installed file