From 86c8e5bf758b8df34e9a54e6acf928409379f456 Mon Sep 17 00:00:00 2001 From: Jussi Nummelin Date: Thu, 25 Mar 2021 15:24:34 +0000 Subject: [PATCH 1/2] Add readyness check for kube api-server Signed-off-by: Jussi Nummelin --- pkg/component/controller/apiserver.go | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/pkg/component/controller/apiserver.go b/pkg/component/controller/apiserver.go index b1612a04d945..506c7fa2d6ea 100644 --- a/pkg/component/controller/apiserver.go +++ b/pkg/component/controller/apiserver.go @@ -16,7 +16,10 @@ limitations under the License. package controller import ( + "crypto/tls" "fmt" + "io/ioutil" + "net/http" "path" "github.com/pkg/errors" @@ -183,4 +186,24 @@ func (a *APIServer) Stop() error { } // Health-check interface -func (a *APIServer) Healthy() error { return nil } +func (a *APIServer) Healthy() error { + tr := &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + } + client := &http.Client{Transport: tr} + resp, err := client.Get("https://localhost:6443/readyz?verbose") + if err != nil { + return err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, err := ioutil.ReadAll(resp.Body) + if err == nil { + logrus.Debugf("api server readyz output:\n %s", string(body)) + } + return fmt.Errorf("expected 200 for api server ready check, got %d", resp.StatusCode) + } + + return nil +} From 355971ab963b9a90b3b9152b7e94e7123d7b519d Mon Sep 17 00:00:00 2001 From: Jussi Nummelin Date: Thu, 25 Mar 2021 18:59:24 +0000 Subject: [PATCH 2/2] Finetune HA smoketest to wait for join api Signed-off-by: Jussi Nummelin --- inttest/common/footloosesuite.go | 70 +++++++++++++++++-- inttest/hacontrolplane/hacontrolplane_test.go | 3 + 2 files changed, 69 insertions(+), 4 deletions(-) diff --git a/inttest/common/footloosesuite.go b/inttest/common/footloosesuite.go index b46ce8ed3e5d..9fa7e4024b4d 100644 --- a/inttest/common/footloosesuite.go +++ b/inttest/common/footloosesuite.go @@ -17,8 +17,10 @@ package common import ( "context" + "crypto/tls" "fmt" "io/ioutil" + "net/http" "os" "os/signal" "path" @@ -393,7 +395,7 @@ func (s *FootlooseSuite) GetNodeLabels(node string, kc *kubernetes.Clientset) (m // WaitForKubeAPI waits until we see kube API online on given node. // Timeouts with error return in 5 mins func (s *FootlooseSuite) WaitForKubeAPI(node string, k0sKubeconfigArgs ...string) error { - s.T().Log("starting to poll kube api") + s.T().Logf("waiting for kube api to start on node %s", node) return wait.PollImmediate(100*time.Millisecond, 5*time.Minute, func() (done bool, err error) { kc, err := s.KubeClient(node, k0sKubeconfigArgs...) if err != nil { @@ -403,11 +405,68 @@ func (s *FootlooseSuite) WaitForKubeAPI(node string, k0sKubeconfigArgs ...string if err != nil { return false, nil } - s.T().Logf("kube api seems to be up-and-running, version: %s", v.String()) + ctx, cancel := context.WithTimeout(context.TODO(), 5*time.Second) + defer cancel() + res := kc.RESTClient().Get().RequestURI("/readyz").Do(ctx) + if res.Error() != nil { + return false, nil + } + var statusCode int + res.StatusCode(&statusCode) + if statusCode != http.StatusOK { + return false, nil + } + + s.T().Logf("kube api up-and-running, version: %s", v.String()) + return true, nil }) } +// WaitJoinApi waits untill we see k0s join api up-and-running on a given node +// Timeouts with error return in 5 mins +func (s *FootlooseSuite) WaitJoinAPI(node string) error { + s.T().Logf("waiting for join api to start on node %s", node) + return wait.PollImmediate(100*time.Millisecond, 5*time.Minute, func() (done bool, err error) { + joinAPIStatus, err := s.GetHTTPStatus(node, 9443, "/v1beta1/ca") + if err != nil { + return false, nil + } + // JoinAPI returns always un-authorized when called with no token, but it's a signal that it properly up-and-running still + if joinAPIStatus != http.StatusUnauthorized { + return false, nil + } + + s.T().Logf("join api up-and-running") + + return true, nil + + }) +} + +func (s *FootlooseSuite) GetHTTPStatus(node string, port int, path string) (int, error) { + m, err := s.MachineForName(node) + if err != nil { + return 0, err + } + joinPort, err := m.HostPort(9443) + if err != nil { + return 0, err + } + + tr := &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + } + client := &http.Client{Transport: tr} + url := fmt.Sprintf("https://localhost:%d/%s", joinPort, path) + resp, err := client.Get(url) + if err != nil { + return 0, err + } + defer resp.Body.Close() + return resp.StatusCode, nil +} + func (s *FootlooseSuite) createConfig() config.Config { binPath := os.Getenv("K0S_PATH") if binPath == "" { @@ -435,10 +494,13 @@ func (s *FootlooseSuite) createConfig() config.Config { portMaps := []config.PortMapping{ { - ContainerPort: 22, + ContainerPort: 22, // SSH + }, + { + ContainerPort: 6443, // kube API }, { - ContainerPort: 6443, + ContainerPort: 9443, // k0s join API }, } diff --git a/inttest/hacontrolplane/hacontrolplane_test.go b/inttest/hacontrolplane/hacontrolplane_test.go index c401eb012a08..c536d89b0d2d 100644 --- a/inttest/hacontrolplane/hacontrolplane_test.go +++ b/inttest/hacontrolplane/hacontrolplane_test.go @@ -71,9 +71,11 @@ func (s *HAControlplaneSuite) TestDeregistration() { s.Require().Error(err) s.NoError(s.InitController(0)) + s.NoError(s.WaitJoinAPI(s.ControllerNode(0))) token, err := s.GetJoinToken("controller") s.NoError(err) s.NoError(s.InitController(1, token)) + s.NoError(s.WaitJoinAPI(s.ControllerNode(1))) ca0 := s.GetFileFromController(0, "/var/lib/k0s/pki/ca.crt") s.Contains(ca0, "-----BEGIN CERTIFICATE-----") @@ -105,6 +107,7 @@ func (s *HAControlplaneSuite) TestDeregistration() { _, err = sshC1.ExecWithOutput("kill $(pidof k0s) && while pidof k0s; do sleep 0.1s; done") s.Require().NoError(err) s.NoError(s.InitController(1, token)) + s.NoError(s.WaitJoinAPI(s.ControllerNode(1))) // Make one member leave the etcd cluster s.makeNodeLeave(1, membersFromJoined[s.ControllerNode(1)])