Skip to content

Commit

Permalink
This commit introduces a new redesign on how the operator resets the …
Browse files Browse the repository at this point in the history
…device plugin

* use a general nodeSelector to avoid updating the daemonset yaml
* remove the config-daemon removing pod (better security)
* make the operator in charge of resetting the device plugin via annotations
* mark the node as cordon BEFORE we remove the device plugin (without drain) to avoid scheduling new pods until the device plugin is backed up

Signed-off-by: Sebastian Sch <[email protected]>
  • Loading branch information
SchSeba committed Jul 30, 2024
1 parent ee40683 commit ba8d6ef
Show file tree
Hide file tree
Showing 14 changed files with 285 additions and 583 deletions.
167 changes: 165 additions & 2 deletions controllers/drain_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,23 @@ func (dr *DrainReconcile) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
// node request to be on idle and the currect state is idle
// we don't do anything
if nodeStateDrainAnnotationCurrent == constants.DrainIdle {
reqLogger.Info("node and nodeState are on idle nothing todo")
// in case we have policy there is nothing else to do
if len(nodeNetworkState.Spec.Interfaces) > 0 {
reqLogger.Info("node and nodeState are on idle nothing todo")
} else {
// if we don't have any policy
// let's be sure the device plugin label doesn't exist on the node
reqLogger.Info("remove Device plugin from node nodeState spec is empty")
err = utils.LabelNode(ctx, node.Name, constants.SriovDevicePluginEnabledLabel, constants.SriovDevicePluginEnabledLabelDisabled, dr.Client)
if err != nil {
log.Log.Error(err, "failed to label node for device plugin label",
"labelKey",
constants.SriovDevicePluginEnabledLabel,
"labelValue",
constants.SriovDevicePluginEnabledLabelDisabled)
return reconcile.Result{}, err
}
}
return reconcile.Result{}, nil
}

Expand Down Expand Up @@ -172,6 +188,29 @@ func (dr *DrainReconcile) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
return reconcile.Result{RequeueAfter: 5 * time.Second}, nil
}

// check the device plugin exited and enable it again
// only of we have something in the node state spec
if len(nodeNetworkState.Spec.Interfaces) > 0 {
completed, err = dr.enableSriovDevicePlugin(ctx, node)
if err != nil {
reqLogger.Error(err, "failed to enable SriovDevicePlugin")
dr.recorder.Event(nodeNetworkState,
corev1.EventTypeWarning,
"DrainController",
"failed to enable SriovDevicePlugin")
return ctrl.Result{}, err
}

if !completed {
reqLogger.Info("sriov device plugin enable was not completed")
dr.recorder.Event(nodeNetworkState,
corev1.EventTypeWarning,
"DrainController",
"sriov device plugin enable was not completed")
return reconcile.Result{RequeueAfter: 5 * time.Second}, nil
}
}

// move the node state back to idle
err = utils.AnnotateObject(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent, constants.DrainIdle, dr.Client)
if err != nil {
Expand Down Expand Up @@ -209,7 +248,7 @@ func (dr *DrainReconcile) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
}
}

// class the drain function that will also call drain to other platform providers like openshift
// call the drain function that will also call drain to other platform providers like openshift
drained, err := dr.drainer.DrainNode(ctx, node, nodeDrainAnnotation == constants.RebootRequired)
if err != nil {
reqLogger.Error(err, "error trying to drain the node")
Expand All @@ -230,6 +269,17 @@ func (dr *DrainReconcile) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
return reconcile.Result{RequeueAfter: 5 * time.Second}, nil
}

reqLogger.Info("remove Device plugin from node")
err = utils.LabelNode(ctx, node.Name, constants.SriovDevicePluginEnabledLabel, constants.SriovDevicePluginEnabledLabelDisabled, dr.Client)
if err != nil {
log.Log.Error(err, "failed to label node for device plugin label",
"labelKey",
constants.SriovDevicePluginEnabledLabel,
"labelValue",
constants.SriovDevicePluginEnabledLabelDisabled)
return reconcile.Result{}, err
}

// if we manage to drain we label the node state with drain completed and finish
err = utils.AnnotateObject(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent, constants.DrainComplete, dr.Client)
if err != nil {
Expand All @@ -243,6 +293,60 @@ func (dr *DrainReconcile) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
"DrainController",
"node drain completed")
return ctrl.Result{}, nil
} else if nodeDrainAnnotation == constants.DevicePluginResetRequired {
// nothing to do here we need to wait for the node to move back to idle
if nodeStateDrainAnnotationCurrent == constants.DrainComplete {
reqLogger.Info("node requested a drain and nodeState is on drain completed nothing todo")
return ctrl.Result{}, nil
}

// if we are on idle state we move it to drain
if nodeStateDrainAnnotationCurrent == constants.DrainIdle {
err = utils.AnnotateObject(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent, constants.Draining, dr.Client)
if err != nil {
reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.Draining)
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}

// This cover a case where we only need to reset the device plugin
// for that we are going to cordon the node, so we don't get new pods allocated
// to the node in the time we remove the device plugin
err = dr.drainer.RunCordonOrUncordon(ctx, node, true)
if err != nil {
log.Log.Error(err, "failed to cordon on node")
return reconcile.Result{}, err
}

// we switch the sriov label to disable and mark the drain as completed
// no need to wait for the device plugin to exist here as we cordon the node,
// and we want to config-daemon to start the configuration in parallel of the kube-controller to remove the pod
// we check the device plugin was removed when the config-daemon moves is desire state to idle
reqLogger.Info("disable Device plugin from node")
err = utils.LabelNode(ctx, node.Name, constants.SriovDevicePluginEnabledLabel, constants.SriovDevicePluginEnabledLabelDisabled, dr.Client)
if err != nil {
log.Log.Error(err, "failed to label node for device plugin label",
"labelKey",
constants.SriovDevicePluginEnabledLabel,
"labelValue",
constants.SriovDevicePluginEnabledLabelDisabled)
return reconcile.Result{}, err
}

// if we manage to cordon we label the node state with drain completed and finish
err = utils.AnnotateObject(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent, constants.DrainComplete, dr.Client)
if err != nil {
reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.DrainComplete)
return ctrl.Result{}, err
}

reqLogger.Info("node cordoned successfully and device plugin removed")
dr.recorder.Event(nodeNetworkState,
corev1.EventTypeWarning,
"DrainController",
"node cordoned and device plugin removed completed")
return ctrl.Result{}, nil
}

reqLogger.Error(nil, "unexpected node drain annotation")
Expand Down Expand Up @@ -436,6 +540,65 @@ func (dr *DrainReconcile) findNodePoolConfig(ctx context.Context, node *corev1.N
}
}

// enableSriovDevicePlugin change the device plugin label on the requested node to enable
// if there is a pod still running we will return false
func (dr *DrainReconcile) enableSriovDevicePlugin(ctx context.Context, node *corev1.Node) (bool, error) {
logger := log.FromContext(ctx)
logger.Info("enableSriovDevicePlugin():")

// check if the device plugin is terminating only if the node annotation for device plugin is disabled
if node.Annotations[constants.SriovDevicePluginEnabledLabel] == constants.SriovDevicePluginEnabledLabelDisabled {
pods, err := dr.getDevicePluginPodsOnNode(node.Name)
if err != nil {
logger.Error(err, "failed to list device plugin pods running on node")
return false, err
}

if len(pods.Items) != 0 {
log.Log.V(2).Info("device plugin pod still terminating on node")
return false, nil
}
}

logger.Info("enable Device plugin from node")
err := utils.LabelNode(ctx, node.Name, constants.SriovDevicePluginEnabledLabel, constants.SriovDevicePluginEnabledLabelEnabled, dr.Client)
if err != nil {
log.Log.Error(err, "failed to label node for device plugin label",
"labelKey",
constants.SriovDevicePluginEnabledLabel,
"labelValue",
constants.SriovDevicePluginEnabledLabelEnabled)
return false, err
}

// check if the device plugin pod is running on the node
pods, err := dr.getDevicePluginPodsOnNode(node.Name)
if err != nil {
logger.Error(err, "failed to list device plugin pods running on node")
return false, err
}

if len(pods.Items) == 1 && pods.Items[0].Status.Phase == corev1.PodRunning {
logger.Info("Device plugin pod running on node")
return true, nil
}

logger.V(2).Info("Device plugin pod still not running on node")
return false, nil
}

func (dr *DrainReconcile) getDevicePluginPodsOnNode(nodeName string) (*corev1.PodList, error) {
pods := &corev1.PodList{}
err := dr.List(context.Background(), pods, &client.ListOptions{
Raw: &metav1.ListOptions{
LabelSelector: "app=sriov-device-plugin",
FieldSelector: "spec.nodeName=" + nodeName,
ResourceVersion: "0"},
})

return pods, err
}

// SetupWithManager sets up the controller with the Manager.
func (dr *DrainReconcile) SetupWithManager(mgr ctrl.Manager) error {
createUpdateEnqueue := handler.Funcs{
Expand Down
Loading

0 comments on commit ba8d6ef

Please sign in to comment.