From dada819e59f5e7278a4e38993219f90c3b09abd2 Mon Sep 17 00:00:00 2001 From: Tom Wieczorek Date: Tue, 8 Oct 2024 15:17:44 +0200 Subject: [PATCH] Wait for kine to be ready during startup So k0s will not continue with its startup sequence, and will exit with an error message indicating that kine was not okay. Signed-off-by: Tom Wieczorek --- pkg/component/controller/kine.go | 45 ++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/pkg/component/controller/kine.go b/pkg/component/controller/kine.go index 7cde3caeebf6..cad33caa76df 100644 --- a/pkg/component/controller/kine.go +++ b/pkg/component/controller/kine.go @@ -25,20 +25,21 @@ import ( "path/filepath" "time" + "github.com/k0sproject/k0s/internal/pkg/dir" + "github.com/k0sproject/k0s/internal/pkg/users" "github.com/k0sproject/k0s/pkg/apis/k0s/v1beta1" + "github.com/k0sproject/k0s/pkg/assets" "github.com/k0sproject/k0s/pkg/component/manager" "github.com/k0sproject/k0s/pkg/config" "github.com/k0sproject/k0s/pkg/config/kine" + "github.com/k0sproject/k0s/pkg/constant" "github.com/k0sproject/k0s/pkg/etcd" - clientv3 "go.etcd.io/etcd/client/v3" + "github.com/k0sproject/k0s/pkg/supervisor" - "github.com/sirupsen/logrus" + "k8s.io/apimachinery/pkg/util/wait" - "github.com/k0sproject/k0s/internal/pkg/dir" - "github.com/k0sproject/k0s/internal/pkg/users" - "github.com/k0sproject/k0s/pkg/assets" - "github.com/k0sproject/k0s/pkg/constant" - "github.com/k0sproject/k0s/pkg/supervisor" + "github.com/sirupsen/logrus" + clientv3 "go.etcd.io/etcd/client/v3" ) // Kine implement the component interface to run kine @@ -131,7 +132,26 @@ func (k *Kine) Start(ctx context.Context) error { GID: k.gid, } - return k.supervisor.Supervise() + if err := k.supervisor.Supervise(); err != nil { + return err + } + + var err error + waitErr := wait.PollUntilContextTimeout(ctx, 350*time.Millisecond, 2*time.Minute, true, func(ctx context.Context) (bool, error) { + ctx, cancel := context.WithTimeout(ctx, 1*time.Second) + defer cancel() + err = k.ready(ctx) + return err == nil, nil + }) + + if waitErr != nil { + if err != nil { + return fmt.Errorf("%w (%w)", waitErr, err) + } + return waitErr + } + + return nil } // Stop stops kine @@ -144,15 +164,18 @@ const hcKey = "/k0s-health-check" const hcValue = "value" func (k *Kine) Ready() error { - ctx, cancel := context.WithTimeout(context.TODO(), 1*time.Second) - defer cancel() + return k.ready(context.TODO()) +} +func (k *Kine) ready(ctx context.Context) error { + ctx, cancel := context.WithTimeout(ctx, 1*time.Second) + defer cancel() ok, err := k.bypassClient.Write(ctx, hcKey, hcValue, 64*time.Second) if err != nil { return fmt.Errorf("kine-etcd-health: %w", err) } if !ok { - logrus.Warningf("kine-etcd-health: health-check value was not written") + return errors.New("kine-etcd-health: health-check value was not written") } v, err := k.bypassClient.Read(ctx, hcKey)