Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reconcile machine before creation deadline #849

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions pkg/util/provider/machinecontroller/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,14 +175,15 @@ func (c *controller) reconcileClusterMachine(ctx context.Context, machine *v1alp
}
}
if machine.Spec.ProviderID == "" || machine.Status.CurrentStatus.Phase == "" || machine.Status.CurrentStatus.Phase == v1alpha1.MachineCrashLoopBackOff {
return c.triggerCreationFlow(
retry, err = c.triggerCreationFlow(
ctx,
&driver.CreateMachineRequest{
Machine: machine,
MachineClass: machineClass,
Secret: &corev1.Secret{Data: secretData},
},
)
return c.adjustCreateRetryRequired(machine, retry), err
}

return machineutils.LongRetry, nil
Expand Down Expand Up @@ -354,7 +355,10 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
// If node label is not present
klog.V(2).Infof("Creating a VM for machine %q, please wait!", machine.Name)
klog.V(2).Infof("The machine creation is triggered with timeout of %s", c.getEffectiveCreationTimeout(createMachineRequest.Machine).Duration)
createMachineResponse, err := c.driver.CreateMachine(ctx, createMachineRequest)
createMachineCtx, cancelFunc := c.getCreationContext(ctx, machine)
defer cancelFunc()
// TODO: Adapt all mcm-providers driver method implementations to use the passed context
createMachineResponse, err := c.driver.CreateMachine(createMachineCtx, createMachineRequest)
if err != nil {
// Create call returned an error.
klog.Errorf("Error while creating machine %s: %s", machine.Name, err.Error())
Expand Down
30 changes: 25 additions & 5 deletions pkg/util/provider/machinecontroller/machine_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ func SyncMachineTaints(
return toBeUpdated
}

// machineCreateErrorHandler TODO
// machineCreateErrorHandler handles errors when machine creation does not succeed
func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1alpha1.Machine, createMachineResponse *driver.CreateMachineResponse, err error) (machineutils.RetryPeriod, error) {
var (
retryRequired = machineutils.MediumRetry
Expand All @@ -494,7 +494,7 @@ func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1a
lastKnownState = createMachineResponse.LastKnownState
}

c.machineStatusUpdate(
statusUpdErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
Expand All @@ -510,8 +510,23 @@ func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1a
},
lastKnownState,
)
return retryRequired, statusUpdErr
}

// adjustCreateRetryRequired adjusts the retry period if needed so that we don't have a case where the machine is reconciled long (<=ShortRetry) after machineCreationTimeout
// if the retry period is too large, it is adjusted to cause a reconcile at the machineCreationTimeout
// if the machineCreationTimeout has already passed, return `ShortRetry` so that the machine is immediately reconciled
func (c *controller) adjustCreateRetryRequired(machine *v1alpha1.Machine, retryRequired machineutils.RetryPeriod) machineutils.RetryPeriod {
adjustedRetry := retryRequired
machineCreationDeadline := machine.CreationTimestamp.Time.Add(c.getEffectiveCreationTimeout(machine).Duration)
if time.Now().After(machineCreationDeadline) {
adjustedRetry = machineutils.ShortRetry
} else if time.Now().Add(time.Duration(retryRequired)).After(machineCreationDeadline) {
// Machine will reconcile after create deadline, so adapt RetryPeriod to reconcile machine at deadline
adjustedRetry = machineutils.RetryPeriod(machineCreationDeadline.Sub(time.Now()))
}

return retryRequired, nil
return adjustedRetry
}

func (c *controller) machineStatusUpdate(
Expand Down Expand Up @@ -977,7 +992,7 @@ func isConditionEmpty(condition v1.NodeCondition) bool {
return condition == v1.NodeCondition{}
}

// initializes err and description with the passed string message
// printLogInitError initializes err and description with the passed string message
func printLogInitError(s string, err *error, description *string, machine *v1alpha1.Machine) {
klog.Warningf(s+" machine: %q ", machine.Name)
*err = fmt.Errorf(s+" %s", machineutils.InitiateVMDeletion)
Expand Down Expand Up @@ -1372,7 +1387,7 @@ func (c *controller) getEffectiveHealthTimeout(machine *v1alpha1.Machine) *metav
return effectiveHealthTimeout
}

// getEffectiveHealthTimeout returns the creationTimeout set on the machine-object, otherwise returns the timeout set using the global-flag.
// getEffectiveCreationTimeout returns the creationTimeout set on the machine-object, otherwise returns the timeout set using the global-flag.
func (c *controller) getEffectiveCreationTimeout(machine *v1alpha1.Machine) *metav1.Duration {
var effectiveCreationTimeout *metav1.Duration
if machine.Spec.MachineConfiguration != nil && machine.Spec.MachineConfiguration.MachineCreationTimeout != nil {
Expand All @@ -1394,6 +1409,11 @@ func (c *controller) getEffectiveNodeConditions(machine *v1alpha1.Machine) *stri
return effectiveNodeConditions
}

func (c *controller) getCreationContext(parentCtx context.Context, machine *v1alpha1.Machine) (context.Context, context.CancelFunc) {
timeOutDuration := c.getEffectiveCreationTimeout(machine).Duration
return context.WithDeadline(parentCtx, machine.CreationTimestamp.Time.Add(timeOutDuration))
}

// UpdateNodeTerminationCondition updates termination condition on the node object
func (c *controller) UpdateNodeTerminationCondition(ctx context.Context, machine *v1alpha1.Machine) error {
if machine.Status.CurrentStatus.Phase == "" || machine.Status.CurrentStatus.Phase == v1alpha1.MachineCrashLoopBackOff {
Expand Down
68 changes: 68 additions & 0 deletions pkg/util/provider/machinecontroller/machine_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2725,4 +2725,72 @@ var _ = Describe("machine_util", func() {
}),
)
})

Describe("#Adjust Machine Create Retry Period if near Timeout", func() {
type setup struct {
machine *machinev1.Machine
mcCreateTimeout metav1.Duration
createOpRetry machineutils.RetryPeriod
}

type expect struct {
retryAdjusted bool
}

type data struct {
setup setup
expect expect
}

DescribeTable("##table",
func(data *data) {
stop := make(chan struct{})
defer close(stop)

c, trackers := createController(stop, testNamespace, nil, nil, nil, nil)
defer trackers.Stop()

c.safetyOptions.MachineCreationTimeout = data.setup.mcCreateTimeout

adjustedRetry := c.adjustCreateRetryRequired(data.setup.machine, machineutils.RetryPeriod(data.setup.createOpRetry))

if data.expect.retryAdjusted {
// Considering time.Now() = 0
Expect(adjustedRetry).Should(BeNumerically("<=", machineutils.RetryPeriod(c.getEffectiveCreationTimeout(data.setup.machine).Duration)))
himanshu-kun marked this conversation as resolved.
Show resolved Hide resolved
} else {
Expect(adjustedRetry).To(Equal(data.setup.createOpRetry))
}
},
Entry("Should not adjust machine create retry period if retry is set to occur before machine create deadline", &data{
setup: setup{
createOpRetry: machineutils.RetryPeriod(3 * time.Minute),
mcCreateTimeout: metav1.Duration{Duration: 20 * time.Minute},
machine: newMachine(
&machinev1.MachineTemplateSpec{
Spec: machinev1.MachineSpec{},
},
&machinev1.MachineStatus{},
nil, nil, map[string]string{v1alpha1.NodeLabelKey: "test-node-0"}, true, metav1.Now()),
},
expect: expect{
retryAdjusted: false,
},
}),
Entry("Should adjust machine create retry period to retry at machine create deadline if retry is set to occur after machine create deadline", &data{
setup: setup{
createOpRetry: machineutils.RetryPeriod(4 * time.Minute),
mcCreateTimeout: metav1.Duration{Duration: 3 * time.Minute},
machine: newMachine(
&machinev1.MachineTemplateSpec{
Spec: machinev1.MachineSpec{},
},
&machinev1.MachineStatus{},
nil, nil, map[string]string{v1alpha1.NodeLabelKey: "test-node-0"}, true, metav1.Now()),
},
expect: expect{
retryAdjusted: true,
},
}),
)
})
})