Skip to content

Commit

Permalink
Update restoration hash computation and restoring from hash flow
Browse files Browse the repository at this point in the history
  • Loading branch information
henrybear327 committed Nov 11, 2024
1 parent e571019 commit 3ec318a
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 63 deletions.
140 changes: 85 additions & 55 deletions internal/controller/prefixclaim_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ import (
"github.com/netbox-community/netbox-operator/pkg/netbox/api"
)

const (
msgCanNotInferParentPrefix = "Prefix restored from hash, can't infer the parent prefix"
)

// PrefixClaimReconciler reconciles a PrefixClaim object
type PrefixClaimReconciler struct {
client.Client
Expand Down Expand Up @@ -76,51 +80,70 @@ func (r *PrefixClaimReconciler) Reconcile(ctx context.Context, req ctrl.Request)
}

/* 1. compute and assign the parent prefix if required */
// The current design will use prefixClaim.Status.ParentPrefix for storing the computed parent prefix, and as the source of truth for future parent prefix references
// The current design will use prefixClaim.Status.ParentPrefix for storing the computed parent prefix,
// and as the source of truth for future parent prefix references
if prefixClaim.Status.ParentPrefix == "" /* parent prefix not yet computed/assigned */ {
if prefixClaim.Spec.ParentPrefix != "" {
prefixClaim.Status.ParentPrefix = prefixClaim.Spec.ParentPrefix

// set status, and condition field
msg := fmt.Sprintf("parentPrefix is provided in CR: %v", prefixClaim.Status.ParentPrefix)
if err := r.SetConditionAndCreateEvent(ctx, prefixClaim, netboxv1.ConditionParentPrefixComputedTrue, corev1.EventTypeNormal, msg); err != nil {
return ctrl.Result{}, err
}
} else if len(prefixClaim.Spec.ParentPrefixSelector) > 0 {
// we first check if a prefix can be restored from the netbox

// since the parent prefix is part of the restoration hash computation
// we need take all parent prefix candidates into consideration when recovering for it
allParentPrefixCandidates, err := r.NetboxClient.GetAvailablePrefixByParentPrefixSelector(&prefixClaim.Spec, true)
// since the parent prefix is not part of the restoration hash computation
// we can quickly check to see if the prefix with the restoration hash is matched in NetBox
h := generatePrefixRestorationHash(prefixClaim)
canBeRestored, err := r.NetboxClient.RestoreExistingPrefixByHash(h)
if err != nil {
// we requeue as this might be a temporary query error
return ctrl.Result{Requeue: true}, nil
}

// query one by one to see if the restoration hash exists
// TODO(henrybear327): throw an warning if the prefix count is more than 10 (or a threshold) since it's going to be a lot of API calls
var canBeRestored *models.Prefix
for _, parentPrefix := range allParentPrefixCandidates {
prefixClaim.Status.ParentPrefix = parentPrefix.Prefix
if canBeRestored != nil {
// Yes, so we will claim the prefix directly
/*
Because the parent prefix isn't part of the restoration hash,
we can't be 100% certain what was the original parent prefix when we performed the initial allocation when restoring.
h := generatePrefixRestorationHash(prefixClaim)
canBeRestored, err = r.NetboxClient.RestoreExistingPrefixByHash(h)
if err != nil {
// we can't ignore failures as we need to make sure that all potential parent prefixes are tried, so we requeue
return ctrl.Result{Requeue: true}, nil
}
if canBeRestored != nil {
// restoration hash matched
break
}
Consider the following case:
prefixClaim.Status.ParentPrefix = ""
}
1) A prefix (P1) is allocated from (P), with preserveInNetBox set to true.
|------------------------| Parent prefix (P)
|-----| Allocated prefix (P1)
if canBeRestored != nil {
// yes, so we need to use the previously allocated parent prefix by setting prefixClaim.Status.ParentPrefix (which is already done)
2) Prefix (P1) is deleted using the NetBox operator (but still visible in NetBox because of preserveInNetBox being true)
|------------------------| Parent prefix (P)
|-----| Allocated prefix (P1)
3) In NetBox, another prefix (P2) is created manually
|------------------------| Parent prefix (P)
|---------------| Manually added prefix (P2)
|-----| Allocated prefix (P1)
4) Perform prefix restoration
Now we won't know if P or P2 is the parent prefix.
But this doesn't matter since we are certain which was the original prefix we allocated and we can recover exactly that one.
*/

// since we can't infer the parent prefix
// we write a special string in the ParentPrefix status field indicating the situation
prefixClaim.Status.ParentPrefix = msgCanNotInferParentPrefix

if err := r.SetConditionAndCreateEvent(ctx, prefixClaim, netboxv1.ConditionParentPrefixComputedTrue, corev1.EventTypeNormal, msgCanNotInferParentPrefix); err != nil {
return ctrl.Result{}, err
}
} else {
// no, so we need to select one parent prefix from prefix candidates
// No, so we need to select one parent prefix from prefix candidates

// The main idea is that we select one of the available parent prefixes as the ParentPrefix for all subsequent computation
// The existing algorithm for prefix allocation within a ParentPrefix remains unchanged

// fetch available prefixes from netbox
parentPrefixCandidates, err := r.NetboxClient.GetAvailablePrefixByParentPrefixSelector(&prefixClaim.Spec, false)
parentPrefixCandidates, err := r.NetboxClient.GetAvailablePrefixByParentPrefixSelector(&prefixClaim.Spec)
if err != nil || len(parentPrefixCandidates) == 0 {
errorMsg := fmt.Sprintf("no parent prefix can be obtained with the query conditions set in ParentPrefixSelector, err = %v, number of candidates = %v", err, len(parentPrefixCandidates))
if err := r.SetConditionAndCreateEvent(ctx, prefixClaim, netboxv1.ConditionParentPrefixComputedFalse, corev1.EventTypeWarning, errorMsg); err != nil {
Expand All @@ -134,6 +157,12 @@ func (r *PrefixClaimReconciler) Reconcile(ctx context.Context, req ctrl.Request)
// TODO(henrybear327): use best-fit algorithm to pick a parent prefix
parentPrefixCandidate := parentPrefixCandidates[0]
prefixClaim.Status.ParentPrefix = parentPrefixCandidate.Prefix

// set status, and condition field
msg := fmt.Sprintf("parentPrefix is computed: %v", prefixClaim.Status.ParentPrefix)
if err := r.SetConditionAndCreateEvent(ctx, prefixClaim, netboxv1.ConditionParentPrefixComputedTrue, corev1.EventTypeNormal, msg); err != nil {
return ctrl.Result{}, err
}
}
} else {
// this case should not be triggered anymore, as we have validation rules put in place on the CR
Expand All @@ -142,12 +171,6 @@ func (r *PrefixClaimReconciler) Reconcile(ctx context.Context, req ctrl.Request)
}
return ctrl.Result{}, nil
}

// set status, and condition field
msg := fmt.Sprintf("parentPrefix is computed: %v", prefixClaim.Status.ParentPrefix)
if err := r.SetConditionAndCreateEvent(ctx, prefixClaim, netboxv1.ConditionParentPrefixComputedTrue, corev1.EventTypeNormal, msg); err != nil {
return ctrl.Result{}, err
}
}

/* 2. check if the matching Prefix object exists */
Expand All @@ -164,30 +187,37 @@ func (r *PrefixClaimReconciler) Reconcile(ctx context.Context, req ctrl.Request)
}
debugLogger.Info("the prefix was not found, will create a new prefix object now")

/* 3. check if the lease for parent prefix is available */
leaseLockerNSN := types.NamespacedName{
Name: convertCIDRToLeaseLockName(prefixClaim.Status.ParentPrefix),
Namespace: r.OperatorNamespace,
}
ll, err := leaselocker.NewLeaseLocker(r.RestConfig, leaseLockerNSN, req.Namespace+"/"+prefixName)
if err != nil {
return ctrl.Result{}, err
}
if prefixClaim.Status.ParentPrefix != msgCanNotInferParentPrefix {
// in the case that we can't restore from the restoration hash, we do the following

lockCtx, cancel := context.WithCancel(ctx)
defer cancel()

/* 4. try to lock the lease for the parent prefix */
locked := ll.TryLock(lockCtx)
if !locked {
// lock for parent prefix was not available, rescheduling
errorMsg := fmt.Sprintf("failed to lock parent prefix %s", prefixClaim.Status.ParentPrefix)
r.Recorder.Eventf(prefixClaim, corev1.EventTypeWarning, "FailedToLockParentPrefix", errorMsg)
return ctrl.Result{
RequeueAfter: 2 * time.Second,
}, nil
}
debugLogger.Info(fmt.Sprintf("successfully locked parent prefix %s", prefixClaim.Status.ParentPrefix))
/* 3. check if the lease for parent prefix is available */
leaseLockerNSN := types.NamespacedName{
Name: convertCIDRToLeaseLockName(prefixClaim.Status.ParentPrefix),
Namespace: r.OperatorNamespace,
}
ll, err := leaselocker.NewLeaseLocker(r.RestConfig, leaseLockerNSN, req.Namespace+"/"+prefixName)
if err != nil {
return ctrl.Result{}, err
}

lockCtx, cancel := context.WithCancel(ctx)
defer cancel()

/* 4. try to lock the lease for the parent prefix */
locked := ll.TryLock(lockCtx)
if !locked {
// lock for parent prefix was not available, rescheduling
errorMsg := fmt.Sprintf("failed to lock parent prefix %s", prefixClaim.Status.ParentPrefix)
r.Recorder.Eventf(prefixClaim, corev1.EventTypeWarning, "FailedToLockParentPrefix", errorMsg)
return ctrl.Result{
RequeueAfter: 2 * time.Second,
}, nil
}
debugLogger.Info(fmt.Sprintf("successfully locked parent prefix %s", prefixClaim.Status.ParentPrefix))
} // else {
// in the case that we can restore from the restoration hash
// we skip directly to try to reclaim Prefix using restorationHash
// }

// 5. try to reclaim Prefix using restorationHash
h := generatePrefixRestorationHash(prefixClaim)
Expand Down
2 changes: 1 addition & 1 deletion internal/controller/prefixclaim_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func generatePrefixRestorationHash(claim *netboxv1.PrefixClaim) string {
rd := PrefixClaimRestorationData{
Namespace: claim.Namespace,
Name: claim.Name,
ParentPrefix: claim.Status.ParentPrefix,
ParentPrefix: claim.Spec.ParentPrefix,
PrefixLength: claim.Spec.PrefixLength,
Tenant: claim.Spec.Tenant,
ParentPrefixSelector: parentPrefixSelectorStr,
Expand Down
12 changes: 5 additions & 7 deletions pkg/netbox/api/prefix_claim.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ func validatePrefixLengthOrError(prefixClaim *models.PrefixClaim, prefixFamily i
return nil
}

func (r *NetboxClient) GetAvailablePrefixByParentPrefixSelector(prefixClaimSpec *netboxv1.PrefixClaimSpec, returnAll bool) ([]*models.Prefix, error) {
func (r *NetboxClient) GetAvailablePrefixByParentPrefixSelector(prefixClaimSpec *netboxv1.PrefixClaimSpec) ([]*models.Prefix, error) {
fieldEntries := make(map[string]string)
if prefixClaimSpec.Tenant != "" {
details, err := r.GetTenantDetails(prefixClaimSpec.Tenant)
Expand Down Expand Up @@ -135,12 +135,10 @@ func (r *NetboxClient) GetAvailablePrefixByParentPrefixSelector(prefixClaimSpec

prefixes := make([]*models.Prefix, 0)
for _, prefix := range list.Payload.Results {
if prefix.Prefix != nil {
if returnAll || r.isParentPrefixCandidate(prefixClaimSpec, *prefix.Prefix) {
prefixes = append(prefixes, &models.Prefix{
Prefix: *prefix.Prefix,
})
}
if prefix.Prefix != nil && r.isParentPrefixCandidate(prefixClaimSpec, *prefix.Prefix) {
prefixes = append(prefixes, &models.Prefix{
Prefix: *prefix.Prefix,
})
}
}

Expand Down

0 comments on commit 3ec318a

Please sign in to comment.