Skip to content

Commit

Permalink
webhook: Avoid setting DisableDrain while updating
Browse files Browse the repository at this point in the history
Disable the drain while a node is updating its configuration
prevents the config-daemon to uncordon the node, leading
to a node that needs to be manually uncordoned.

Signed-off-by: Andrea Panattoni <[email protected]>
  • Loading branch information
zeeke committed Jul 4, 2023
1 parent 1a6040f commit 91ea65f
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 8 deletions.
2 changes: 1 addition & 1 deletion pkg/webhook/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned"
)

var snclient *snclientset.Clientset
var snclient snclientset.Interface
var kubeclient *kubernetes.Clientset

func SetupInClusterClient() error {
Expand Down
43 changes: 43 additions & 0 deletions pkg/webhook/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"

k8serrors "k8s.io/apimachinery/pkg/api/errors"

sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
)

Expand Down Expand Up @@ -47,9 +49,50 @@ func validateSriovOperatorConfig(cr *sriovnetworkv1.SriovOperatorConfig, operati
warnings = append(warnings, "Node draining is disabled for applying SriovNetworkNodePolicy, it may result in workload interruption.")
}

err := validateSriovOperatorConfigDisableDrain(cr)
if err != nil {
return false, warnings, err
}

return true, warnings, nil
}

// validateSriovOperatorConfigDisableDrain checks if the user is setting `.Spec.DisableDrain` from false to true while
// operator is updating one or more nodes. Disabling the drain at this stage would prevent the operator to uncordon a node at
// the end of the update operation, keeping nodes un-schedulable until manual intervention.
func validateSriovOperatorConfigDisableDrain(cr *sriovnetworkv1.SriovOperatorConfig) error {
if !cr.Spec.DisableDrain {
return nil
}

previousConfig, err := snclient.SriovnetworkV1().SriovOperatorConfigs(cr.Namespace).Get(context.Background(), cr.Name, metav1.GetOptions{})
if err != nil {
if k8serrors.IsNotFound(err) {
return nil
}
return fmt.Errorf("can't validate SriovOperatorConfig[%s] DisableDrain against its previous value: %q", cr.Name, err)
}

if previousConfig.Spec.DisableDrain == cr.Spec.DisableDrain {
// DisableDrain didn't change
return nil
}

// DisableDrain has been changed `false -> true`, check if any node is updating
nodeStates, err := snclient.SriovnetworkV1().SriovNetworkNodeStates(namespace).List(context.Background(), metav1.ListOptions{})
if err != nil {
return fmt.Errorf("can't validate SriovOperatorConfig[%s] DisableDrain transition to true: %q", cr.Name, err)
}

for _, nodeState := range nodeStates.Items {
if nodeState.Status.SyncStatus == "InProgress" {
return fmt.Errorf("can't set Spec.DisableDrain = true while node[%s] is updating", nodeState.Name)
}
}

return nil
}

func validateSriovNetworkNodePolicy(cr *sriovnetworkv1.SriovNetworkNodePolicy, operation v1.Operation) (bool, []string, error) {
glog.V(2).Infof("validateSriovNetworkNodePolicy: %v", cr)
var warnings []string
Expand Down
54 changes: 47 additions & 7 deletions pkg/webhook/validate_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package webhook

import (
"context"
"fmt"
"os"
"testing"
Expand All @@ -14,6 +15,8 @@ import (

. "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
constants "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"

fakesnclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned/fake"
)

func TestMain(m *testing.M) {
Expand Down Expand Up @@ -129,11 +132,8 @@ func NewNode() *corev1.Node {
return &corev1.Node{Spec: corev1.NodeSpec{ProviderID: "openstack"}}
}

func TestValidateSriovOperatorConfigWithDefaultOperatorConfig(t *testing.T) {
var err error
var ok bool
var w []string
config := &SriovOperatorConfig{
func newDefaultOperatorConfig() *SriovOperatorConfig {
return &SriovOperatorConfig{
ObjectMeta: metav1.ObjectMeta{
Name: "default",
},
Expand All @@ -145,16 +145,23 @@ func TestValidateSriovOperatorConfigWithDefaultOperatorConfig(t *testing.T) {
LogLevel: 2,
},
}
}

func TestValidateSriovOperatorConfigWithDefaultOperatorConfig(t *testing.T) {
g := NewGomegaWithT(t)
ok, _, err = validateSriovOperatorConfig(config, "DELETE")

config := newDefaultOperatorConfig()
snclient = fakesnclientset.NewSimpleClientset()

ok, _, err := validateSriovOperatorConfig(config, "DELETE")
g.Expect(err).To(HaveOccurred())
g.Expect(ok).To(Equal(false))

ok, _, err = validateSriovOperatorConfig(config, "UPDATE")
g.Expect(err).NotTo(HaveOccurred())
g.Expect(ok).To(Equal(true))

ok, w, err = validateSriovOperatorConfig(config, "UPDATE")
ok, w, err := validateSriovOperatorConfig(config, "UPDATE")
g.Expect(err).NotTo(HaveOccurred())
g.Expect(ok).To(Equal(true))
g.Expect(w[0]).To(ContainSubstring("Node draining is disabled"))
Expand All @@ -164,6 +171,39 @@ func TestValidateSriovOperatorConfigWithDefaultOperatorConfig(t *testing.T) {
g.Expect(ok).To(Equal(true))
}

func TestValidateSriovOperatorConfigDisableDrain(t *testing.T) {
g := NewGomegaWithT(t)

config := newDefaultOperatorConfig()
config.Spec.DisableDrain = false

nodeState := &SriovNetworkNodeState{
ObjectMeta: metav1.ObjectMeta{Name: "worker-1", Namespace: namespace},
Status: SriovNetworkNodeStateStatus{
SyncStatus: "InProgress",
},
}

snclient = fakesnclientset.NewSimpleClientset(
config,
nodeState,
)

config.Spec.DisableDrain = true
ok, _, err := validateSriovOperatorConfig(config, "UPDATE")
g.Expect(err).To(MatchError("can't set Spec.DisableDrain = true while node[worker-1] is updating"))
g.Expect(ok).To(Equal(false))

// Simulate node update finished
nodeState.Status.SyncStatus = "Succeeded"
snclient.SriovnetworkV1().SriovNetworkNodeStates(namespace).
Update(context.Background(), nodeState, metav1.UpdateOptions{})

ok, _, err = validateSriovOperatorConfig(config, "UPDATE")
g.Expect(err).NotTo(HaveOccurred())
g.Expect(ok).To(Equal(true))
}

func TestValidateSriovNetworkNodePolicyWithDefaultPolicy(t *testing.T) {
var err error
var ok bool
Expand Down

0 comments on commit 91ea65f

Please sign in to comment.