Skip to content

Commit

Permalink
Optionally delay job garbage collection (#1077)
Browse files Browse the repository at this point in the history
Add the job.gcDelay operator configuration property to delay the garbage
collection of successful build and signing jobs.
Add the new kmm.node.kubernetes.io/gc-delay finalizer to all new jobs.
Only clear the finalizer after deletionTimestamp+gcDelay.
  • Loading branch information
qbarrand authored Apr 8, 2024
1 parent 2ae391e commit 12fbdf0
Show file tree
Hide file tree
Showing 21 changed files with 397 additions and 115 deletions.
9 changes: 8 additions & 1 deletion cmd/manager-hub/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,14 @@ func main() {
mcmr := hub.NewManagedClusterModuleReconciler(
client,
manifestwork.NewCreator(client, scheme, kernelAPI, registryAPI, authFactory, cache, operatorNamespace),
cluster.NewClusterAPI(client, module.NewKernelMapper(buildHelperAPI, sign.NewSignerHelper()), buildAPI, signAPI, operatorNamespace),
cluster.NewClusterAPI(
client,
module.NewKernelMapper(buildHelperAPI, sign.NewSignerHelper()),
buildAPI,
signAPI,
operatorNamespace,
cfg.Job.GCDelay,
),
statusupdater.NewManagedClusterModuleStatusUpdater(client),
filterAPI,
operatorNamespace,
Expand Down
3 changes: 2 additions & 1 deletion cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ func main() {
buildAPI,
signAPI,
kernelAPI,
filterAPI)
filterAPI,
cfg.Job.GCDelay)
if err = bsc.SetupWithManager(mgr, constants.KernelLabel); err != nil {
cmd.FatalError(setupLogger, err, "unable to create controller", "name", controllers.BuildSignReconcilerName)
}
Expand Down
3 changes: 2 additions & 1 deletion internal/build/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package build

import (
"context"
"time"

"github.com/rh-ecosystem-edge/kernel-module-management/internal/utils/ocpbuild"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -12,7 +13,7 @@ import (
//go:generate mockgen -source=manager.go -package=build -destination=mock_manager.go

type Manager interface {
GarbageCollect(ctx context.Context, modName, namespace string, owner metav1.Object) ([]string, error)
GarbageCollect(ctx context.Context, modName, namespace string, owner metav1.Object, delay time.Duration) ([]string, error)

ShouldSync(ctx context.Context, mld *api.ModuleLoaderData) (bool, error)

Expand Down
9 changes: 5 additions & 4 deletions internal/build/mock_manager.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 17 additions & 6 deletions internal/build/ocpbuild/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ import (
"context"
"errors"
"fmt"
"time"

buildv1 "github.com/openshift/api/build/v1"
"github.com/rh-ecosystem-edge/kernel-module-management/internal/build"
"github.com/rh-ecosystem-edge/kernel-module-management/internal/constants"
ocpbuildutils "github.com/rh-ecosystem-edge/kernel-module-management/internal/utils/ocpbuild"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -32,7 +34,8 @@ func NewManager(
maker Maker,
ocpBuildsHelper ocpbuildutils.OCPBuildsHelper,
authFactory auth.RegistryAuthGetterFactory,
registry registry.Registry) build.Manager {
registry registry.Registry,
) build.Manager {
return &manager{
client: client,
maker: maker,
Expand All @@ -42,7 +45,7 @@ func NewManager(
}
}

func (m *manager) GarbageCollect(ctx context.Context, modName, namespace string, owner metav1.Object) ([]string, error) {
func (m *manager) GarbageCollect(ctx context.Context, modName, namespace string, owner metav1.Object, delay time.Duration) ([]string, error) {
moduleBuilds, err := m.ocpBuildsHelper.GetModuleOCPBuilds(ctx, modName, namespace, owner)
if err != nil {
return nil, fmt.Errorf("failed to get OCP builds for module's builds %s: %v", modName, err)
Expand All @@ -51,11 +54,19 @@ func (m *manager) GarbageCollect(ctx context.Context, modName, namespace string,
deleteNames := make([]string, 0, len(moduleBuilds))
for _, moduleBuild := range moduleBuilds {
if moduleBuild.Status.Phase == buildv1.BuildPhaseComplete {
err = m.ocpBuildsHelper.DeleteOCPBuild(ctx, &moduleBuild)
if err != nil {
return nil, fmt.Errorf("failed to delete OCP build %s: %v", moduleBuild.Name, err)
if moduleBuild.DeletionTimestamp == nil {
if err = m.ocpBuildsHelper.DeleteOCPBuild(ctx, &moduleBuild); err != nil {
return nil, fmt.Errorf("failed to delete build %s: %v", moduleBuild.Name, err)
}
}

if moduleBuild.DeletionTimestamp.Add(delay).Before(time.Now()) {
if err = m.ocpBuildsHelper.RemoveFinalizer(ctx, &moduleBuild, constants.GCDelayFinalizer); err != nil {
return nil, fmt.Errorf("could not remove the GC delay finalizer from build %s/%s: %v", moduleBuild.Namespace, moduleBuild.Name, err)
}

deleteNames = append(deleteNames, moduleBuild.Name)
}
deleteNames = append(deleteNames, moduleBuild.Name)
}
}
return deleteNames, nil
Expand Down
144 changes: 122 additions & 22 deletions internal/build/ocpbuild/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@ import (
"context"
"errors"
"fmt"
"time"

"github.com/budougumi0617/cmpmock"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
buildv1 "github.com/openshift/api/build/v1"
"github.com/rh-ecosystem-edge/kernel-module-management/internal/constants"
"go.uber.org/mock/gomock"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -271,7 +274,7 @@ var _ = Describe("GarbageCollect", func() {
It("GetModuleBuilds failed", func() {
mockOCPBuildsHelper.EXPECT().GetModuleOCPBuilds(ctx, moduleName, namespace, nil).Return(nil, fmt.Errorf("some error"))

deleted, err := m.GarbageCollect(ctx, moduleName, namespace, nil)
deleted, err := m.GarbageCollect(ctx, moduleName, namespace, nil, 0)

Expect(err).To(HaveOccurred())
Expect(deleted).To(BeEmpty())
Expand All @@ -287,40 +290,137 @@ var _ = Describe("GarbageCollect", func() {
mockOCPBuildsHelper.EXPECT().GetModuleOCPBuilds(ctx, moduleName, namespace, nil).Return([]buildv1.Build{ocpBuild}, nil),
mockOCPBuildsHelper.EXPECT().DeleteOCPBuild(ctx, &ocpBuild).Return(fmt.Errorf("some error")),
)
deleted, err := m.GarbageCollect(ctx, moduleName, namespace, nil)
deleted, err := m.GarbageCollect(ctx, moduleName, namespace, nil, 0)

Expect(err).To(HaveOccurred())
Expect(deleted).To(BeEmpty())
})

DescribeTable("should return the correct error and names of the collected OCP builds",
func(buildPhase1 buildv1.BuildPhase, buildPhase2 buildv1.BuildPhase, numSuccessfulBuilds int) {
ocpBuild1 := buildv1.Build{
Status: buildv1.BuildStatus{
Phase: buildPhase1,
},
mld := api.ModuleLoaderData{
Name: "moduleName",
Owner: &kmmv1beta1.Module{},
}

type testCase struct {
podPhase1, podPhase2 buildv1.BuildPhase
gcDelay time.Duration
expectsErr bool
resShouldContainPod1, resShouldContainPod2 bool
}

DescribeTable("should return the correct error and names of the collected pods",
func(tc testCase) {
const (
build1Name = "pod-1"
build2Name = "pod-2"
)

ctx := context.Background()

build1 := buildv1.Build{
ObjectMeta: metav1.ObjectMeta{Name: build1Name},
Status: buildv1.BuildStatus{Phase: tc.podPhase1},
}
ocpBuild2 := buildv1.Build{
Status: buildv1.BuildStatus{
Phase: buildPhase2,
},
build2 := buildv1.Build{
ObjectMeta: metav1.ObjectMeta{Name: build2Name},
Status: buildv1.BuildStatus{Phase: tc.podPhase2},
}
mockOCPBuildsHelper.EXPECT().GetModuleOCPBuilds(ctx, moduleName, namespace, nil).Return([]buildv1.Build{ocpBuild1, ocpBuild2}, nil)
if buildPhase1 == buildv1.BuildPhaseComplete {
mockOCPBuildsHelper.EXPECT().DeleteOCPBuild(ctx, &ocpBuild1).Return(nil)

returnedError := fmt.Errorf("some error")
if !tc.expectsErr {
returnedError = nil
}
if buildPhase2 == buildv1.BuildPhaseComplete {
mockOCPBuildsHelper.EXPECT().DeleteOCPBuild(ctx, &ocpBuild2).Return(nil)

buildList := []buildv1.Build{build1, build2}

calls := []any{
mockOCPBuildsHelper.EXPECT().GetModuleOCPBuilds(ctx, mld.Name, mld.Namespace, mld.Owner).Return(buildList, returnedError),
}

deleted, err := m.GarbageCollect(ctx, moduleName, namespace, nil)
if !tc.expectsErr {
now := metav1.Now()

for i := 0; i < len(buildList); i++ {
build := &buildList[i]

if build.Status.Phase == buildv1.BuildPhaseComplete {
c := mockOCPBuildsHelper.
EXPECT().
DeleteOCPBuild(ctx, cmpmock.DiffEq(build)).
Do(func(_ context.Context, b *buildv1.Build) {
b.DeletionTimestamp = &now
build.DeletionTimestamp = &now
})

calls = append(calls, c)

if time.Now().After(now.Add(tc.gcDelay)) {
calls = append(
calls,
mockOCPBuildsHelper.EXPECT().RemoveFinalizer(ctx, cmpmock.DiffEq(build), constants.GCDelayFinalizer),
)
}
}
}
}

gomock.InOrder(calls...)

names, err := m.GarbageCollect(ctx, mld.Name, mld.Namespace, mld.Owner, tc.gcDelay)

if tc.expectsErr {
Expect(err).To(HaveOccurred())
return
}

Expect(err).NotTo(HaveOccurred())
Expect(len(deleted)).To(Equal(numSuccessfulBuilds))

if tc.resShouldContainPod1 {
Expect(names).To(ContainElements(build1Name))
}

if tc.resShouldContainPod2 {
Expect(names).To(ContainElements(build2Name))
}
},
Entry("0 successfull builds", buildv1.BuildPhaseRunning, buildv1.BuildPhaseRunning, 0),
Entry("1 successfull builds", buildv1.BuildPhaseRunning, buildv1.BuildPhaseComplete, 1),
Entry("2 successfull builds", buildv1.BuildPhaseComplete, buildv1.BuildPhaseComplete, 2),
Entry(
"all pods succeeded",
testCase{
podPhase1: buildv1.BuildPhaseComplete,
podPhase2: buildv1.BuildPhaseComplete,
resShouldContainPod1: true,
resShouldContainPod2: true,
},
),
Entry(
"1 pod succeeded",
testCase{
podPhase1: buildv1.BuildPhaseComplete,
podPhase2: buildv1.BuildPhaseFailed,
resShouldContainPod1: true,
},
),
Entry(
"0 pod succeeded",
testCase{
podPhase1: buildv1.BuildPhaseFailed,
podPhase2: buildv1.BuildPhaseFailed,
},
),
Entry(
"error occurred",
testCase{expectsErr: true},
),
Entry(
"GC delayed",
testCase{
podPhase1: buildv1.BuildPhaseComplete,
podPhase2: buildv1.BuildPhaseComplete,
gcDelay: time.Hour,
resShouldContainPod1: false,
resShouldContainPod2: false,
},
),
)

})
10 changes: 7 additions & 3 deletions internal/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"sort"
"strings"
"time"

ocpbuildutils "github.com/rh-ecosystem-edge/kernel-module-management/internal/utils/ocpbuild"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -38,6 +39,7 @@ type clusterAPI struct {
client client.Client
kernelAPI module.KernelMapper
buildAPI build.Manager
gcDelay time.Duration
signAPI sign.SignManager
namespace string
}
Expand All @@ -47,11 +49,13 @@ func NewClusterAPI(
kernelAPI module.KernelMapper,
buildAPI build.Manager,
signAPI sign.SignManager,
defaultJobNamespace string) ClusterAPI {
defaultJobNamespace string,
gcDelay time.Duration) ClusterAPI {
return &clusterAPI{
client: client,
kernelAPI: kernelAPI,
buildAPI: buildAPI,
gcDelay: gcDelay,
signAPI: signAPI,
namespace: defaultJobNamespace,
}
Expand Down Expand Up @@ -143,12 +147,12 @@ func (c *clusterAPI) BuildAndSign(
}

func (c *clusterAPI) GarbageCollectBuildsAndSigns(ctx context.Context, mcm hubv1beta1.ManagedClusterModule) ([]string, error) {
deletedBuilds, err := c.buildAPI.GarbageCollect(ctx, mcm.Name, c.namespace, &mcm)
deletedBuilds, err := c.buildAPI.GarbageCollect(ctx, mcm.Name, c.namespace, &mcm, c.gcDelay)
if err != nil {
return nil, fmt.Errorf("failed to garbage collect build object: %v", err)
}

deletedSigns, err := c.signAPI.GarbageCollect(ctx, mcm.Name, c.namespace, &mcm)
deletedSigns, err := c.signAPI.GarbageCollect(ctx, mcm.Name, c.namespace, &mcm, c.gcDelay)
if err != nil {
return nil, fmt.Errorf("failed to garbage collect sign object: %v", err)
}
Expand Down
Loading

0 comments on commit 12fbdf0

Please sign in to comment.