From 05d0613be88d9395a1169412705b01a49ef45e93 Mon Sep 17 00:00:00 2001 From: Jussi Nummelin Date: Tue, 25 Jul 2023 13:42:23 +0300 Subject: [PATCH] Add etcd and kine metrics collection with `--enable-metrics-scraper` Fixes #3247 Signed-off-by: Jussi Nummelin --- cmd/controller/controller.go | 2 +- docs/system-monitoring.md | 10 ++- inttest/kine/kine_test.go | 20 ++++- inttest/metricsscraper/metricsscraper_test.go | 5 +- pkg/component/controller/metrics.go | 74 ++++++++++++++++--- 5 files changed, 92 insertions(+), 19 deletions(-) diff --git a/cmd/controller/controller.go b/cmd/controller/controller.go index e2de12545ae4..16939afd2203 100644 --- a/cmd/controller/controller.go +++ b/cmd/controller/controller.go @@ -441,7 +441,7 @@ func (c *command) start(ctx context.Context) error { if err != nil { return fmt.Errorf("failed to create metrics manifests saver: %w", err) } - metrics, err := controller.NewMetrics(c.K0sVars, metricsSaver, adminClientFactory) + metrics, err := controller.NewMetrics(c.K0sVars, metricsSaver, adminClientFactory, nodeConfig.Spec.Storage.Type) if err != nil { return fmt.Errorf("failed to create metrics reconciler: %w", err) } diff --git a/docs/system-monitoring.md b/docs/system-monitoring.md index 96753e6a5d83..b8e144d19a88 100644 --- a/docs/system-monitoring.md +++ b/docs/system-monitoring.md @@ -1,15 +1,15 @@ # System components monitoring -Controller nodes [are isolated](architecture.md/#control-plane) by default, which thus means that a cluster user cannot schedule workloads onto controller nodes. +Controller nodes [are isolated](architecture.md#control-plane) by default, which thus means that a cluster user cannot schedule workloads onto controller nodes. k0s provides a mechanism to expose system components for monitoring. System component metrics can give a better look into what is happening inside them. Metrics are particularly useful for building dashboards and alerts. You can read more about metrics for Kubernetes system components [here](https://kubernetes.io/docs/concepts/cluster-administration/system-metrics/). **Note:** the mechanism is an opt-in feature, you can enable it on installation: - ```shell - sudo k0s install controller --enable-metrics-scraper - ``` +```shell +sudo k0s install controller --enable-metrics-scraper +``` ## Jobs @@ -17,6 +17,8 @@ The list of components which is scrapped by k0s: - kube-scheduler - kube-controller-manager +- etcd +- kine **Note:** kube-apiserver metrics are not scrapped since they are accessible via `kubernetes` endpoint within the cluster. diff --git a/inttest/kine/kine_test.go b/inttest/kine/kine_test.go index ca3c59e18ba8..8fae280c63c0 100644 --- a/inttest/kine/kine_test.go +++ b/inttest/kine/kine_test.go @@ -17,12 +17,16 @@ limitations under the License. package kine import ( + "context" "fmt" + "strings" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "k8s.io/apimachinery/pkg/util/wait" "github.com/k0sproject/k0s/inttest/common" ) @@ -33,7 +37,7 @@ type KineSuite struct { func (s *KineSuite) TestK0sGetsUp() { s.PutFile(s.ControllerNode(0), "/tmp/k0s.yaml", k0sConfigWithKine) - s.NoError(s.InitController(0, "--config=/tmp/k0s.yaml")) + s.NoError(s.InitController(0, "--config=/tmp/k0s.yaml", "--enable-metrics-scraper")) s.NoError(s.RunWorkers()) kc, err := s.KubeClient(s.ControllerNode(0)) @@ -71,6 +75,20 @@ func (s *KineSuite) TestK0sGetsUp() { assert.NoError(t, err) }) }) + + s.T().Run("metrics", func(t *testing.T) { + s.Require().NoError(common.WaitForDeployment(s.Context(), kc, "k0s-pushgateway", "k0s-system")) + s.Require().NoError(wait.PollImmediateInfiniteWithContext(s.Context(), 5*time.Second, func(ctx context.Context) (bool, error) { + b, err := kc.RESTClient().Get().AbsPath("/api/v1/namespaces/k0s-system/services/http:k0s-pushgateway:http/proxy/metrics").DoRaw(s.Context()) + if err != nil { + return false, nil + } + + // wait for kube-scheduler and kube-controller-manager metrics + output := string(b) + return strings.Contains(output, `job="kube-scheduler"`) && strings.Contains(output, `job="kube-controller-manager"`) && strings.Contains(output, `job="kine"`), nil + })) + }) } func TestKineSuite(t *testing.T) { diff --git a/inttest/metricsscraper/metricsscraper_test.go b/inttest/metricsscraper/metricsscraper_test.go index 1ae7482246ad..b403e63e8c69 100644 --- a/inttest/metricsscraper/metricsscraper_test.go +++ b/inttest/metricsscraper/metricsscraper_test.go @@ -33,7 +33,7 @@ type MetricsScraperSuite struct { } func (s *MetricsScraperSuite) TestK0sGetsUp() { - s.NoError(s.InitController(0, "--single", "--enable-metrics-scraper")) + s.NoError(s.InitController(0, "--enable-worker", "--enable-metrics-scraper")) kc, err := s.KubeClient(s.ControllerNode(0)) s.Require().NoError(err) @@ -71,7 +71,8 @@ func (s *MetricsScraperSuite) waitForMetrics() error { } // wait for kube-scheduler and kube-controller-manager metrics - return strings.Contains(string(b), `job="kube-scheduler"`) && strings.Contains(string(b), `job="kube-controller-manager"`), nil + output := string(b) + return strings.Contains(output, `job="kube-scheduler"`) && strings.Contains(output, `job="kube-controller-manager"`) && strings.Contains(output, `job="etcd"`), nil }) } diff --git a/pkg/component/controller/metrics.go b/pkg/component/controller/metrics.go index 0d8f2a758bfc..a76bd49e9f43 100644 --- a/pkg/component/controller/metrics.go +++ b/pkg/component/controller/metrics.go @@ -45,10 +45,11 @@ const ( type Metrics struct { log logrus.FieldLogger - hostname string - K0sVars *config.CfgVars - saver manifestsSaver - restClient rest.Interface + hostname string + K0sVars *config.CfgVars + saver manifestsSaver + restClient rest.Interface + storageType string clusterConfig *v1beta1.ClusterConfig tickerDone context.CancelFunc @@ -59,7 +60,7 @@ var _ manager.Component = (*Metrics)(nil) var _ manager.Reconciler = (*Metrics)(nil) // NewMetrics creates new Metrics reconciler -func NewMetrics(k0sVars *config.CfgVars, saver manifestsSaver, clientCF kubernetes.ClientFactoryInterface) (*Metrics, error) { +func NewMetrics(k0sVars *config.CfgVars, saver manifestsSaver, clientCF kubernetes.ClientFactoryInterface, storageType string) (*Metrics, error) { hostname, err := os.Hostname() if err != nil { return nil, err @@ -71,12 +72,12 @@ func NewMetrics(k0sVars *config.CfgVars, saver manifestsSaver, clientCF kubernet } return &Metrics{ - log: logrus.WithFields(logrus.Fields{"component": "metrics"}), - - hostname: hostname, - K0sVars: k0sVars, - saver: saver, - restClient: restClient, + log: logrus.WithFields(logrus.Fields{"component": "metrics"}), + storageType: storageType, + hostname: hostname, + K0sVars: k0sVars, + saver: saver, + restClient: restClient, }, nil } @@ -95,6 +96,22 @@ func (m *Metrics) Init(_ context.Context) error { } m.jobs = append(m.jobs, j) + if m.storageType == v1beta1.EtcdStorageType { + etcdJob, err := m.newEtcdJob() + if err != nil { + return err + } + m.jobs = append(m.jobs, etcdJob) + } + + if m.storageType == v1beta1.KineStorageType { + kineJob, err := m.newKineJob() + if err != nil { + return err + } + m.jobs = append(m.jobs, kineJob) + } + return nil } @@ -161,6 +178,41 @@ type job struct { restClient rest.Interface } +func (m *Metrics) newEtcdJob() (*job, error) { + certFile := path.Join(m.K0sVars.CertRootDir, "apiserver-etcd-client.crt") + keyFile := path.Join(m.K0sVars.CertRootDir, "apiserver-etcd-client.key") + + httpClient, err := getClient(certFile, keyFile) + if err != nil { + return nil, err + } + + return &job{ + log: m.log.WithField("metrics_job", "etcd"), + scrapeURL: "https://localhost:2379/metrics", + name: "etcd", + hostname: m.hostname, + scrapeClient: httpClient, + restClient: m.restClient, + }, nil +} + +func (m *Metrics) newKineJob() (*job, error) { + httpClient, err := getClient("", "") + if err != nil { + return nil, err + } + + return &job{ + log: m.log.WithField("metrics_job", "kine"), + scrapeURL: "http://localhost:8080/metrics", + name: "kine", + hostname: m.hostname, + scrapeClient: httpClient, + restClient: m.restClient, + }, nil +} + func (m *Metrics) newJob(name, scrapeURL string) (*job, error) { certFile := path.Join(m.K0sVars.CertRootDir, "admin.crt") keyFile := path.Join(m.K0sVars.CertRootDir, "admin.key")