Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ETCD-672: use consistent get on status conflict #1825

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions pkg/operator/status/status_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package status

import (
"context"
"fmt"
"strings"
"time"

Expand Down Expand Up @@ -51,6 +52,7 @@ type StatusSyncer struct {
degradedInertia Inertia

removeUnusedVersions bool
retryWithClientGet bool
}

var _ factory.Controller = &StatusSyncer{}
Expand Down Expand Up @@ -126,9 +128,9 @@ func (c *StatusSyncer) WithVersionRemoval() *StatusSyncer {
return &output
}

// sync reacts to a change in prereqs by finding information that is required to match another value in the cluster. This
// Sync reacts to a change in prereqs by finding information that is required to match another value in the cluster. This
// must be information that is logically "owned" by another component.
func (c StatusSyncer) Sync(ctx context.Context, syncCtx factory.SyncContext) error {
func (c *StatusSyncer) Sync(ctx context.Context, syncCtx factory.SyncContext) error {
detailedSpec, currentDetailedStatus, _, err := c.operatorClient.GetOperatorState()
if apierrors.IsNotFound(err) {
syncCtx.Recorder().Warningf("StatusNotFound", "Unable to determine current operator status for clusteroperator/%s", c.clusterOperatorName)
Expand All @@ -147,6 +149,14 @@ func (c StatusSyncer) Sync(ctx context.Context, syncCtx factory.SyncContext) err
return err
}

if c.retryWithClientGet {
c.retryWithClientGet = false
originalClusterOperatorObj, err = c.clusterOperatorClient.ClusterOperators().Get(ctx, c.clusterOperatorName, metav1.GetOptions{})
if err != nil {
return fmt.Errorf("consistent client get from retry failed with: %w", err)
}
}

// ensure that we have a clusteroperator resource
if originalClusterOperatorObj == nil || apierrors.IsNotFound(err) {
klog.Infof("clusteroperator/%s not found", c.clusterOperatorName)
Expand Down Expand Up @@ -225,6 +235,11 @@ func (c StatusSyncer) Sync(ctx context.Context, syncCtx factory.SyncContext) err
klog.V(2).Infof("clusteroperator/%s diff %v", c.clusterOperatorName, resourceapply.JSONPatchNoError(originalClusterOperatorObj, clusterOperatorObj))

if _, updateErr := c.clusterOperatorClient.ClusterOperators().UpdateStatus(ctx, clusterOperatorObj, metav1.UpdateOptions{}); updateErr != nil {
// Conflicts are sometimes caused when the watch cache is lagging behind. We still want to ensure the operators update their status correctly.
// This is ensuring on the next sync loop to get the latest status directly with a client get.
if apierrors.IsConflict(updateErr) {
c.retryWithClientGet = true
}
return updateErr
}
if !skipOperatorStatusChangedEvent(originalClusterOperatorObj.Status, clusterOperatorObj.Status) {
Expand Down