From dc852cf363a579679bd470fe15ea7f91fec16265 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan-Luis=20de=20Sousa-Valadas=20Casta=C3=B1o?= Date: Thu, 14 Mar 2024 13:47:38 +0100 Subject: [PATCH 1/2] Increase the default timeout to 6 minutes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We see the timeouts are quite tight in general, specially the remove APIs timeout in particular. As discussed in the call, increase the default timeout from 4 to 6 minutes Signed-off-by: Juan-Luis de Sousa-Valadas CastaƱo (cherry picked from commit e2b6eb3012596373db2203edd29ec32ded76374f) (cherry picked from commit 97d96a5cc9c864fffc6a4f9e5cae107be3048e46) (cherry picked from commit 01c78a8fe61565b139cb484784a3615001e2cc4c) --- inttest/Makefile | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/inttest/Makefile b/inttest/Makefile index bdb696559825..9ef861f64479 100644 --- a/inttest/Makefile +++ b/inttest/Makefile @@ -67,18 +67,9 @@ check-conformance: bin/sonobuoy get-conformance-results: bin/sonobuoy $(realpath bin/sonobuoy) retrieve -TIMEOUT ?= 4m +TIMEOUT ?= 6m check-ctr: TIMEOUT=10m -check-byocri: TIMEOUT=5m -# readiness check for metric tests takes between around 5 and 6 minutes. -check-metrics: TIMEOUT=6m -check-metricsscraper: TIMEOUT=6m - -check-calico: TIMEOUT=6m - -# Establishing konnectivity tunnels with the LB in place takes a while, thus a bit longer timeout for the smoke -check-customports: TIMEOUT=6m # Config change smoke runs actually many cases hence a bit longer timeout check-configchange: TIMEOUT=8m @@ -89,7 +80,6 @@ check-backup: TIMEOUT=10m # Autopilot 3x3 HA test can take a while to run check-ap-ha3x3: K0S_UPDATE_FROM_BIN ?= ../k0s check-ap-ha3x3: K0S_UPDATE_FROM_PATH ?= $(realpath $(K0S_UPDATE_FROM_BIN)) -check-ap-ha3x3: TIMEOUT=6m check-customports-dynamicconfig: export K0S_ENABLE_DYNAMIC_CONFIG=true check-customports-dynamicconfig: TEST_PACKAGE=customports From 08f071d535c865bcf035921d3d3293e871d2cd7c Mon Sep 17 00:00:00 2001 From: Jussi Nummelin Date: Tue, 21 May 2024 14:47:12 +0300 Subject: [PATCH 2/2] Use dedicated leaspool for worker config component With this change the first controller on new version can apply the needed versioned resources as it will always be guaranteed to become the leader. Signed-off-by: Jussi Nummelin (cherry picked from commit e5a271bc4a1b97d3000f5120413c7c611db4ea30) (cherry picked from commit a9e79f6ecd6929bc27702a4ab7080ba5bc51e90c) Signed-off-by: Tom Wieczorek (cherry picked from commit a1d7798a93e7e467bfb4bf30e8bc7afa5ce97a28) (cherry picked from commit 728c6c9a7cc311748597defd5afaa951a546a69e) --- cmd/controller/controller.go | 11 +- inttest/Makefile | 3 + inttest/Makefile.variables | 1 + .../controllerworker_test.go | 209 ++++++++++++++++++ .../controller/leaderelector/leasepool.go | 6 +- 5 files changed, 226 insertions(+), 4 deletions(-) create mode 100644 inttest/ap-controllerworker/controllerworker_test.go diff --git a/cmd/controller/controller.go b/cmd/controller/controller.go index 7ed723fe706c..1430e16e4502 100644 --- a/cmd/controller/controller.go +++ b/cmd/controller/controller.go @@ -224,7 +224,9 @@ func (c *command) start(ctx context.Context) error { // One leader elector per controller if !c.SingleNode { - leaderElector = leaderelector.NewLeasePool(adminClientFactory) + // The name used to be hardcoded in the component itself + // At some point we need to rename this. + leaderElector = leaderelector.NewLeasePool(adminClientFactory, "k0s-endpoint-reconciler") } else { leaderElector = &leaderelector.Dummy{Leader: true} } @@ -436,7 +438,12 @@ func (c *command) start(ctx context.Context) error { } if !slices.Contains(c.DisableComponents, constant.WorkerConfigComponentName) { - reconciler, err := workerconfig.NewReconciler(c.K0sVars, c.NodeConfig.Spec, adminClientFactory, leaderElector, enableKonnectivity) + // Create new dedicated leasepool for worker config reconciler + leaseName := fmt.Sprintf("k0s-%s-%s", constant.WorkerConfigComponentName, constant.KubernetesMajorMinorVersion) + workerConfigLeasePool := leaderelector.NewLeasePool(adminClientFactory, leaseName) + c.ClusterComponents.Add(ctx, workerConfigLeasePool) + + reconciler, err := workerconfig.NewReconciler(c.K0sVars, c.NodeConfig.Spec, adminClientFactory, workerConfigLeasePool, enableKonnectivity) if err != nil { return err } diff --git a/inttest/Makefile b/inttest/Makefile index 9ef861f64479..9f7d03be0148 100644 --- a/inttest/Makefile +++ b/inttest/Makefile @@ -81,6 +81,9 @@ check-backup: TIMEOUT=10m check-ap-ha3x3: K0S_UPDATE_FROM_BIN ?= ../k0s check-ap-ha3x3: K0S_UPDATE_FROM_PATH ?= $(realpath $(K0S_UPDATE_FROM_BIN)) +check-ap-controllerworker: K0S_UPDATE_FROM_BIN ?= ../k0s +check-ap-controllerworker: K0S_UPDATE_FROM_PATH ?= $(realpath $(K0S_UPDATE_FROM_BIN)) + check-customports-dynamicconfig: export K0S_ENABLE_DYNAMIC_CONFIG=true check-customports-dynamicconfig: TEST_PACKAGE=customports diff --git a/inttest/Makefile.variables b/inttest/Makefile.variables index a46fa72353f6..77d01987f306 100644 --- a/inttest/Makefile.variables +++ b/inttest/Makefile.variables @@ -2,6 +2,7 @@ smoketests := \ check-addons \ check-airgap \ check-ap-airgap \ + check-ap-controllerworker \ check-ap-ha3x3 \ check-ap-platformselect \ check-ap-quorum \ diff --git a/inttest/ap-controllerworker/controllerworker_test.go b/inttest/ap-controllerworker/controllerworker_test.go new file mode 100644 index 000000000000..471d6e1ec549 --- /dev/null +++ b/inttest/ap-controllerworker/controllerworker_test.go @@ -0,0 +1,209 @@ +// Copyright 2024 k0s authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package controllerworker + +import ( + "fmt" + "strings" + "testing" + "time" + + "github.com/k0sproject/k0s/inttest/common" + aptest "github.com/k0sproject/k0s/inttest/common/autopilot" + + apconst "github.com/k0sproject/k0s/pkg/autopilot/constant" + appc "github.com/k0sproject/k0s/pkg/autopilot/controller/plans/core" + "github.com/k0sproject/k0s/pkg/constant" + "github.com/k0sproject/k0s/pkg/kubernetes/watch" + + "github.com/stretchr/testify/suite" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type controllerworkerSuite struct { + common.FootlooseSuite +} + +const k0sConfigWithMultiController = ` +spec: + api: + address: %s + storage: + etcd: + peerAddress: %s +` + +const oldVersion = "v1.29.4+k0s.0" + +// SetupTest prepares the controller and filesystem, getting it into a consistent +// state which we can run tests against. +func (s *controllerworkerSuite) SetupTest() { + ctx := s.Context() + // ipAddress := s.GetControllerIPAddress(0) + var joinToken string + + for idx := 0; idx < s.FootlooseSuite.ControllerCount; idx++ { + nodeName, require := s.ControllerNode(idx), s.Require() + address := s.GetControllerIPAddress(idx) + + s.Require().NoError(s.WaitForSSH(nodeName, 2*time.Minute, 1*time.Second)) + ssh, err := s.SSH(ctx, nodeName) + require.NoError(err) + defer ssh.Disconnect() + s.PutFile(nodeName, "/tmp/k0s.yaml", fmt.Sprintf(k0sConfigWithMultiController, address, address)) + // Install older version of k0s + downloadCmd := fmt.Sprintf("curl -sSfL get.k0s.sh | K0S_VERSION=%s sh", oldVersion) + out, err := ssh.ExecWithOutput(ctx, downloadCmd) + if err != nil { + s.T().Logf("error getting k0s: %s", out) + } + require.NoError(err) + s.T().Logf("downloaded succesfully: %s", out) + // Note that the token is intentionally empty for the first controller + args := []string{ + "--debug", + "--disable-components=metrics-server,helm,konnectivity-server", + "--enable-worker", + "--config=/tmp/k0s.yaml", + } + if joinToken != "" { + s.PutFile(nodeName, "/tmp/token", joinToken) + args = append(args, "--token-file=/tmp/token") + } + out, err = ssh.ExecWithOutput(ctx, "k0s install controller "+strings.Join(args, " ")) + if err != nil { + s.T().Logf("error installing k0s: %s", out) + } + require.NoError(err) + _, err = ssh.ExecWithOutput(ctx, "k0s start") + require.NoError(err) + // s.Require().NoError(s.InitController(idx, "--config=/tmp/k0s.yaml", "--disable-components=metrics-server", "--enable-worker", joinToken)) + s.Require().NoError(s.WaitJoinAPI(nodeName)) + kc, err := s.KubeClient(nodeName) + require.NoError(err) + require.NoError(s.WaitForNodeReady(nodeName, kc)) + + node, err := kc.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) + require.NoError(err) + require.Equal("v1.29.4+k0s", node.Status.NodeInfo.KubeletVersion) + + client, err := s.ExtensionsClient(s.ControllerNode(0)) + s.Require().NoError(err) + + s.Require().NoError(aptest.WaitForCRDByName(ctx, client, "plans")) + s.Require().NoError(aptest.WaitForCRDByName(ctx, client, "controlnodes")) + + // With the primary controller running, create the join token for subsequent controllers. + if idx == 0 { + token, err := s.GetJoinToken("controller") + s.Require().NoError(err) + joinToken = token + } + } + + // Final sanity -- ensure all nodes see each other according to etcd + for idx := 0; idx < s.FootlooseSuite.ControllerCount; idx++ { + s.Require().Len(s.GetMembers(idx), s.FootlooseSuite.ControllerCount) + } +} + +// TestApply applies a well-formed `plan` yaml, and asserts that +// all of the correct values across different objects + controllers are correct. +func (s *controllerworkerSuite) TestApply() { + + planTemplate := ` +apiVersion: autopilot.k0sproject.io/v1beta2 +kind: Plan +metadata: + name: autopilot +spec: + id: id123 + timestamp: now + commands: + - k0supdate: + version: v0.0.0 + forceupdate: true + platforms: + linux-amd64: + url: http://localhost/dist/k0s-new + linux-arm64: + url: http://localhost/dist/k0s-new + targets: + controllers: + discovery: + static: + nodes: + - controller1 + - controller2 + - controller0 +` + ctx := s.Context() + manifestFile := "/tmp/happy.yaml" + s.PutFileTemplate(s.ControllerNode(0), manifestFile, planTemplate, nil) + + out, err := s.RunCommandController(0, fmt.Sprintf("/usr/local/bin/k0s kubectl apply -f %s", manifestFile)) + s.T().Logf("kubectl apply output: '%s'", out) + s.Require().NoError(err) + + client, err := s.AutopilotClient(s.ControllerNode(0)) + s.Require().NoError(err) + s.NotEmpty(client) + + // The plan has enough information to perform a successful update of k0s, so wait for it. + plan, err := aptest.WaitForPlanState(s.Context(), client, apconst.AutopilotName, appc.PlanCompleted) + s.Require().NoError(err) + + s.Equal(1, len(plan.Status.Commands)) + cmd := plan.Status.Commands[0] + + s.Equal(appc.PlanCompleted, cmd.State) + s.NotNil(cmd.K0sUpdate) + s.NotNil(cmd.K0sUpdate.Controllers) + s.Empty(cmd.K0sUpdate.Workers) + + for _, node := range cmd.K0sUpdate.Controllers { + s.Equal(appc.SignalCompleted, node.State) + } + + kc, err := s.KubeClient(s.ControllerNode(0)) + s.NoError(err) + + for idx := 0; idx < s.FootlooseSuite.ControllerCount; idx++ { + nodeName, require := s.ControllerNode(idx), s.Require() + require.NoError(s.WaitForNodeReady(nodeName, kc)) + // Wait till we see kubelet reporting the expected version + err := watch.Nodes(kc.CoreV1().Nodes()). + WithObjectName(nodeName). + WithErrorCallback(common.RetryWatchErrors(s.T().Logf)). + Until(ctx, func(node *corev1.Node) (bool, error) { + return strings.Contains(node.Status.NodeInfo.KubeletVersion, fmt.Sprintf("v%s.", constant.KubernetesMajorMinorVersion)), nil + }) + require.NoError(err) + } +} + +// TestQuorumSuite sets up a suite using 3 controllers for quorum, and runs various +// autopilot upgrade scenarios against them. +func TestQuorumSuite(t *testing.T) { + suite.Run(t, &controllerworkerSuite{ + common.FootlooseSuite{ + ControllerCount: 3, + WorkerCount: 0, + LaunchMode: common.LaunchModeOpenRC, + }, + }) +} diff --git a/pkg/component/controller/leaderelector/leasepool.go b/pkg/component/controller/leaderelector/leasepool.go index 3456b88770b2..b63165250550 100644 --- a/pkg/component/controller/leaderelector/leasepool.go +++ b/pkg/component/controller/leaderelector/leasepool.go @@ -37,13 +37,14 @@ type LeasePool struct { acquiredLeaseCallbacks []func() lostLeaseCallbacks []func() + name string } var _ Interface = (*LeasePool)(nil) var _ manager.Component = (*LeasePool)(nil) // NewLeasePool creates a new leader elector using a Kubernetes lease pool. -func NewLeasePool(kubeClientFactory kubeutil.ClientFactoryInterface) *LeasePool { +func NewLeasePool(kubeClientFactory kubeutil.ClientFactoryInterface, name string) *LeasePool { d := atomic.Value{} d.Store(false) return &LeasePool{ @@ -51,6 +52,7 @@ func NewLeasePool(kubeClientFactory kubeutil.ClientFactoryInterface) *LeasePool kubeClientFactory: kubeClientFactory, log: logrus.WithFields(logrus.Fields{"component": "poolleaderelector"}), leaderStatus: d, + name: name, } } @@ -63,7 +65,7 @@ func (l *LeasePool) Start(ctx context.Context) error { if err != nil { return fmt.Errorf("can't create kubernetes rest client for lease pool: %v", err) } - leasePool, err := leaderelection.NewLeasePool(ctx, client, "k0s-endpoint-reconciler", + leasePool, err := leaderelection.NewLeasePool(ctx, client, l.name, leaderelection.WithLogger(l.log), leaderelection.WithContext(ctx)) if err != nil {