From 3ec318a10a24eba08be30ac60a4615612551e3da Mon Sep 17 00:00:00 2001 From: Chun-Hung Tseng Date: Mon, 11 Nov 2024 15:16:55 +0100 Subject: [PATCH] Update restoration hash computation and restoring from hash flow --- internal/controller/prefixclaim_controller.go | 140 +++++++++++------- internal/controller/prefixclaim_helpers.go | 2 +- pkg/netbox/api/prefix_claim.go | 12 +- 3 files changed, 91 insertions(+), 63 deletions(-) diff --git a/internal/controller/prefixclaim_controller.go b/internal/controller/prefixclaim_controller.go index 3ad48f6..0523f76 100644 --- a/internal/controller/prefixclaim_controller.go +++ b/internal/controller/prefixclaim_controller.go @@ -40,6 +40,10 @@ import ( "github.com/netbox-community/netbox-operator/pkg/netbox/api" ) +const ( + msgCanNotInferParentPrefix = "Prefix restored from hash, can't infer the parent prefix" +) + // PrefixClaimReconciler reconciles a PrefixClaim object type PrefixClaimReconciler struct { client.Client @@ -76,51 +80,70 @@ func (r *PrefixClaimReconciler) Reconcile(ctx context.Context, req ctrl.Request) } /* 1. compute and assign the parent prefix if required */ - // The current design will use prefixClaim.Status.ParentPrefix for storing the computed parent prefix, and as the source of truth for future parent prefix references + // The current design will use prefixClaim.Status.ParentPrefix for storing the computed parent prefix, + // and as the source of truth for future parent prefix references if prefixClaim.Status.ParentPrefix == "" /* parent prefix not yet computed/assigned */ { if prefixClaim.Spec.ParentPrefix != "" { prefixClaim.Status.ParentPrefix = prefixClaim.Spec.ParentPrefix + + // set status, and condition field + msg := fmt.Sprintf("parentPrefix is provided in CR: %v", prefixClaim.Status.ParentPrefix) + if err := r.SetConditionAndCreateEvent(ctx, prefixClaim, netboxv1.ConditionParentPrefixComputedTrue, corev1.EventTypeNormal, msg); err != nil { + return ctrl.Result{}, err + } } else if len(prefixClaim.Spec.ParentPrefixSelector) > 0 { // we first check if a prefix can be restored from the netbox - // since the parent prefix is part of the restoration hash computation - // we need take all parent prefix candidates into consideration when recovering for it - allParentPrefixCandidates, err := r.NetboxClient.GetAvailablePrefixByParentPrefixSelector(&prefixClaim.Spec, true) + // since the parent prefix is not part of the restoration hash computation + // we can quickly check to see if the prefix with the restoration hash is matched in NetBox + h := generatePrefixRestorationHash(prefixClaim) + canBeRestored, err := r.NetboxClient.RestoreExistingPrefixByHash(h) if err != nil { - // we requeue as this might be a temporary query error return ctrl.Result{Requeue: true}, nil } - // query one by one to see if the restoration hash exists - // TODO(henrybear327): throw an warning if the prefix count is more than 10 (or a threshold) since it's going to be a lot of API calls - var canBeRestored *models.Prefix - for _, parentPrefix := range allParentPrefixCandidates { - prefixClaim.Status.ParentPrefix = parentPrefix.Prefix + if canBeRestored != nil { + // Yes, so we will claim the prefix directly + /* + Because the parent prefix isn't part of the restoration hash, + we can't be 100% certain what was the original parent prefix when we performed the initial allocation when restoring. - h := generatePrefixRestorationHash(prefixClaim) - canBeRestored, err = r.NetboxClient.RestoreExistingPrefixByHash(h) - if err != nil { - // we can't ignore failures as we need to make sure that all potential parent prefixes are tried, so we requeue - return ctrl.Result{Requeue: true}, nil - } - if canBeRestored != nil { - // restoration hash matched - break - } + Consider the following case: - prefixClaim.Status.ParentPrefix = "" - } + 1) A prefix (P1) is allocated from (P), with preserveInNetBox set to true. + |------------------------| Parent prefix (P) + |-----| Allocated prefix (P1) - if canBeRestored != nil { - // yes, so we need to use the previously allocated parent prefix by setting prefixClaim.Status.ParentPrefix (which is already done) + 2) Prefix (P1) is deleted using the NetBox operator (but still visible in NetBox because of preserveInNetBox being true) + |------------------------| Parent prefix (P) + |-----| Allocated prefix (P1) + + 3) In NetBox, another prefix (P2) is created manually + |------------------------| Parent prefix (P) + |---------------| Manually added prefix (P2) + |-----| Allocated prefix (P1) + + 4) Perform prefix restoration + + Now we won't know if P or P2 is the parent prefix. + But this doesn't matter since we are certain which was the original prefix we allocated and we can recover exactly that one. + */ + + // since we can't infer the parent prefix + // we write a special string in the ParentPrefix status field indicating the situation + prefixClaim.Status.ParentPrefix = msgCanNotInferParentPrefix + + if err := r.SetConditionAndCreateEvent(ctx, prefixClaim, netboxv1.ConditionParentPrefixComputedTrue, corev1.EventTypeNormal, msgCanNotInferParentPrefix); err != nil { + return ctrl.Result{}, err + } } else { - // no, so we need to select one parent prefix from prefix candidates + // No, so we need to select one parent prefix from prefix candidates // The main idea is that we select one of the available parent prefixes as the ParentPrefix for all subsequent computation // The existing algorithm for prefix allocation within a ParentPrefix remains unchanged // fetch available prefixes from netbox - parentPrefixCandidates, err := r.NetboxClient.GetAvailablePrefixByParentPrefixSelector(&prefixClaim.Spec, false) + parentPrefixCandidates, err := r.NetboxClient.GetAvailablePrefixByParentPrefixSelector(&prefixClaim.Spec) if err != nil || len(parentPrefixCandidates) == 0 { errorMsg := fmt.Sprintf("no parent prefix can be obtained with the query conditions set in ParentPrefixSelector, err = %v, number of candidates = %v", err, len(parentPrefixCandidates)) if err := r.SetConditionAndCreateEvent(ctx, prefixClaim, netboxv1.ConditionParentPrefixComputedFalse, corev1.EventTypeWarning, errorMsg); err != nil { @@ -134,6 +157,12 @@ func (r *PrefixClaimReconciler) Reconcile(ctx context.Context, req ctrl.Request) // TODO(henrybear327): use best-fit algorithm to pick a parent prefix parentPrefixCandidate := parentPrefixCandidates[0] prefixClaim.Status.ParentPrefix = parentPrefixCandidate.Prefix + + // set status, and condition field + msg := fmt.Sprintf("parentPrefix is computed: %v", prefixClaim.Status.ParentPrefix) + if err := r.SetConditionAndCreateEvent(ctx, prefixClaim, netboxv1.ConditionParentPrefixComputedTrue, corev1.EventTypeNormal, msg); err != nil { + return ctrl.Result{}, err + } } } else { // this case should not be triggered anymore, as we have validation rules put in place on the CR @@ -142,12 +171,6 @@ func (r *PrefixClaimReconciler) Reconcile(ctx context.Context, req ctrl.Request) } return ctrl.Result{}, nil } - - // set status, and condition field - msg := fmt.Sprintf("parentPrefix is computed: %v", prefixClaim.Status.ParentPrefix) - if err := r.SetConditionAndCreateEvent(ctx, prefixClaim, netboxv1.ConditionParentPrefixComputedTrue, corev1.EventTypeNormal, msg); err != nil { - return ctrl.Result{}, err - } } /* 2. check if the matching Prefix object exists */ @@ -164,30 +187,37 @@ func (r *PrefixClaimReconciler) Reconcile(ctx context.Context, req ctrl.Request) } debugLogger.Info("the prefix was not found, will create a new prefix object now") - /* 3. check if the lease for parent prefix is available */ - leaseLockerNSN := types.NamespacedName{ - Name: convertCIDRToLeaseLockName(prefixClaim.Status.ParentPrefix), - Namespace: r.OperatorNamespace, - } - ll, err := leaselocker.NewLeaseLocker(r.RestConfig, leaseLockerNSN, req.Namespace+"/"+prefixName) - if err != nil { - return ctrl.Result{}, err - } + if prefixClaim.Status.ParentPrefix != msgCanNotInferParentPrefix { + // in the case that we can't restore from the restoration hash, we do the following - lockCtx, cancel := context.WithCancel(ctx) - defer cancel() - - /* 4. try to lock the lease for the parent prefix */ - locked := ll.TryLock(lockCtx) - if !locked { - // lock for parent prefix was not available, rescheduling - errorMsg := fmt.Sprintf("failed to lock parent prefix %s", prefixClaim.Status.ParentPrefix) - r.Recorder.Eventf(prefixClaim, corev1.EventTypeWarning, "FailedToLockParentPrefix", errorMsg) - return ctrl.Result{ - RequeueAfter: 2 * time.Second, - }, nil - } - debugLogger.Info(fmt.Sprintf("successfully locked parent prefix %s", prefixClaim.Status.ParentPrefix)) + /* 3. check if the lease for parent prefix is available */ + leaseLockerNSN := types.NamespacedName{ + Name: convertCIDRToLeaseLockName(prefixClaim.Status.ParentPrefix), + Namespace: r.OperatorNamespace, + } + ll, err := leaselocker.NewLeaseLocker(r.RestConfig, leaseLockerNSN, req.Namespace+"/"+prefixName) + if err != nil { + return ctrl.Result{}, err + } + + lockCtx, cancel := context.WithCancel(ctx) + defer cancel() + + /* 4. try to lock the lease for the parent prefix */ + locked := ll.TryLock(lockCtx) + if !locked { + // lock for parent prefix was not available, rescheduling + errorMsg := fmt.Sprintf("failed to lock parent prefix %s", prefixClaim.Status.ParentPrefix) + r.Recorder.Eventf(prefixClaim, corev1.EventTypeWarning, "FailedToLockParentPrefix", errorMsg) + return ctrl.Result{ + RequeueAfter: 2 * time.Second, + }, nil + } + debugLogger.Info(fmt.Sprintf("successfully locked parent prefix %s", prefixClaim.Status.ParentPrefix)) + } // else { + // in the case that we can restore from the restoration hash + // we skip directly to try to reclaim Prefix using restorationHash + // } // 5. try to reclaim Prefix using restorationHash h := generatePrefixRestorationHash(prefixClaim) diff --git a/internal/controller/prefixclaim_helpers.go b/internal/controller/prefixclaim_helpers.go index f36fd40..e53cbfa 100644 --- a/internal/controller/prefixclaim_helpers.go +++ b/internal/controller/prefixclaim_helpers.go @@ -87,7 +87,7 @@ func generatePrefixRestorationHash(claim *netboxv1.PrefixClaim) string { rd := PrefixClaimRestorationData{ Namespace: claim.Namespace, Name: claim.Name, - ParentPrefix: claim.Status.ParentPrefix, + ParentPrefix: claim.Spec.ParentPrefix, PrefixLength: claim.Spec.PrefixLength, Tenant: claim.Spec.Tenant, ParentPrefixSelector: parentPrefixSelectorStr, diff --git a/pkg/netbox/api/prefix_claim.go b/pkg/netbox/api/prefix_claim.go index a9c8694..164cd42 100644 --- a/pkg/netbox/api/prefix_claim.go +++ b/pkg/netbox/api/prefix_claim.go @@ -93,7 +93,7 @@ func validatePrefixLengthOrError(prefixClaim *models.PrefixClaim, prefixFamily i return nil } -func (r *NetboxClient) GetAvailablePrefixByParentPrefixSelector(prefixClaimSpec *netboxv1.PrefixClaimSpec, returnAll bool) ([]*models.Prefix, error) { +func (r *NetboxClient) GetAvailablePrefixByParentPrefixSelector(prefixClaimSpec *netboxv1.PrefixClaimSpec) ([]*models.Prefix, error) { fieldEntries := make(map[string]string) if prefixClaimSpec.Tenant != "" { details, err := r.GetTenantDetails(prefixClaimSpec.Tenant) @@ -135,12 +135,10 @@ func (r *NetboxClient) GetAvailablePrefixByParentPrefixSelector(prefixClaimSpec prefixes := make([]*models.Prefix, 0) for _, prefix := range list.Payload.Results { - if prefix.Prefix != nil { - if returnAll || r.isParentPrefixCandidate(prefixClaimSpec, *prefix.Prefix) { - prefixes = append(prefixes, &models.Prefix{ - Prefix: *prefix.Prefix, - }) - } + if prefix.Prefix != nil && r.isParentPrefixCandidate(prefixClaimSpec, *prefix.Prefix) { + prefixes = append(prefixes, &models.Prefix{ + Prefix: *prefix.Prefix, + }) } }