Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: added RegistrationDuration flag #1452

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions pkg/controllers/nodeclaim/lifecycle/liveness.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package lifecycle

import (
"context"
"time"

"github.com/prometheus/client_golang/prometheus"
"k8s.io/utils/clock"
Expand All @@ -28,17 +27,14 @@ import (

v1 "sigs.k8s.io/karpenter/pkg/apis/v1"
"sigs.k8s.io/karpenter/pkg/metrics"
"sigs.k8s.io/karpenter/pkg/operator/options"
)

type Liveness struct {
clock clock.Clock
kubeClient client.Client
}

// registrationTTL is a heuristic time that we expect the node to register within
// If we don't see the node within this time, then we should delete the NodeClaim and try again
const registrationTTL = time.Minute * 15

func (l *Liveness) Reconcile(ctx context.Context, nodeClaim *v1.NodeClaim) (reconcile.Result, error) {
registered := nodeClaim.StatusConditions().Get(v1.ConditionTypeRegistered)
if registered.IsTrue() {
Expand All @@ -48,6 +44,7 @@ func (l *Liveness) Reconcile(ctx context.Context, nodeClaim *v1.NodeClaim) (reco
return reconcile.Result{Requeue: true}, nil
}
// If the Registered statusCondition hasn't gone True during the TTL since we first updated it, we should terminate the NodeClaim
registrationTTL := options.FromContext(ctx).RegistrationDuration
if l.clock.Since(registered.LastTransitionTime.Time) < registrationTTL {
return reconcile.Result{RequeueAfter: registrationTTL - l.clock.Since(registered.LastTransitionTime.Time)}, nil
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/operator/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ type Options struct {
BatchMaxDuration time.Duration
BatchIdleDuration time.Duration
FeatureGates FeatureGates
RegistrationDuration time.Duration
}

type FlagSet struct {
Expand Down Expand Up @@ -94,6 +95,7 @@ func (o *Options) AddFlags(fs *FlagSet) {
fs.DurationVar(&o.BatchMaxDuration, "batch-max-duration", env.WithDefaultDuration("BATCH_MAX_DURATION", 10*time.Second), "The maximum length of a batch window. The longer this is, the more pods we can consider for provisioning at one time which usually results in fewer but larger nodes.")
fs.DurationVar(&o.BatchIdleDuration, "batch-idle-duration", env.WithDefaultDuration("BATCH_IDLE_DURATION", time.Second), "The maximum amount of time with no new pending pods that if exceeded ends the current batching window. If pods arrive faster than this time, the batching window will be extended up to the maxDuration. If they arrive slower, the pods will be batched separately.")
fs.StringVar(&o.FeatureGates.inputStr, "feature-gates", env.WithDefaultString("FEATURE_GATES", "SpotToSpotConsolidation=false"), "Optional features can be enabled / disabled using feature gates. Current options are: SpotToSpotConsolidation")
fs.DurationVar(&o.RegistrationDuration, "registration-duration", env.WithDefaultDuration("REGISTRATION_DURATION", 15*time.Minute), "The maximum time to wait for node registration. If this time is exceeded, NodeClaim will be deleted. Default 15 minute.")
}

func (o *Options) Parse(fs *FlagSet, args ...string) error {
Expand Down
9 changes: 9 additions & 0 deletions pkg/operator/options/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ var _ = Describe("Options", func() {
"BATCH_MAX_DURATION",
"BATCH_IDLE_DURATION",
"FEATURE_GATES",
"REGISTRATION_DURATION",
}

BeforeEach(func() {
Expand Down Expand Up @@ -111,6 +112,7 @@ var _ = Describe("Options", func() {
FeatureGates: test.FeatureGates{
SpotToSpotConsolidation: lo.ToPtr(false),
},
RegistrationDuration: lo.ToPtr(15 * time.Minute),
}))
})

Expand All @@ -133,6 +135,7 @@ var _ = Describe("Options", func() {
"--batch-max-duration", "5s",
"--batch-idle-duration", "5s",
"--feature-gates", "SpotToSpotConsolidation=true",
"--registration-duration", "60s",
)
Expect(err).To(BeNil())
expectOptionsMatch(opts, test.Options(test.OptionsFields{
Expand All @@ -152,6 +155,7 @@ var _ = Describe("Options", func() {
FeatureGates: test.FeatureGates{
SpotToSpotConsolidation: lo.ToPtr(true),
},
RegistrationDuration: lo.ToPtr(60 * time.Second),
}))
})

Expand All @@ -170,6 +174,7 @@ var _ = Describe("Options", func() {
os.Setenv("BATCH_MAX_DURATION", "5s")
os.Setenv("BATCH_IDLE_DURATION", "5s")
os.Setenv("FEATURE_GATES", "SpotToSpotConsolidation=true")
os.Setenv("REGISTRATION_DURATION", "20s")
fs = &options.FlagSet{
FlagSet: flag.NewFlagSet("karpenter", flag.ContinueOnError),
}
Expand All @@ -193,6 +198,7 @@ var _ = Describe("Options", func() {
FeatureGates: test.FeatureGates{
SpotToSpotConsolidation: lo.ToPtr(true),
},
RegistrationDuration: lo.ToPtr(20 * time.Second),
}))
})

Expand All @@ -208,6 +214,7 @@ var _ = Describe("Options", func() {
os.Setenv("BATCH_MAX_DURATION", "5s")
os.Setenv("BATCH_IDLE_DURATION", "5s")
os.Setenv("FEATURE_GATES", "SpotToSpotConsolidation=true")
os.Setenv("REGISTRATION_DURATION", "20s")
fs = &options.FlagSet{
FlagSet: flag.NewFlagSet("karpenter", flag.ContinueOnError),
}
Expand Down Expand Up @@ -236,6 +243,7 @@ var _ = Describe("Options", func() {
FeatureGates: test.FeatureGates{
SpotToSpotConsolidation: lo.ToPtr(true),
},
RegistrationDuration: lo.ToPtr(20 * time.Second),
}))
})
})
Expand Down Expand Up @@ -289,4 +297,5 @@ func expectOptionsMatch(optsA, optsB *options.Options) {
Expect(optsA.BatchMaxDuration).To(Equal(optsB.BatchMaxDuration))
Expect(optsA.BatchIdleDuration).To(Equal(optsB.BatchIdleDuration))
Expect(optsA.FeatureGates.SpotToSpotConsolidation).To(Equal(optsB.FeatureGates.SpotToSpotConsolidation))
Expect(optsA.RegistrationDuration).To(Equal(optsB.RegistrationDuration))
}
2 changes: 2 additions & 0 deletions pkg/test/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ type OptionsFields struct {
BatchMaxDuration *time.Duration
BatchIdleDuration *time.Duration
FeatureGates FeatureGates
RegistrationDuration *time.Duration
}

type FeatureGates struct {
Expand Down Expand Up @@ -73,5 +74,6 @@ func Options(overrides ...OptionsFields) *options.Options {
FeatureGates: options.FeatureGates{
SpotToSpotConsolidation: lo.FromPtrOr(opts.FeatureGates.SpotToSpotConsolidation, false),
},
RegistrationDuration: lo.FromPtrOr(opts.RegistrationDuration, 15*time.Minute),
}
}
Loading