Skip to content

Commit

Permalink
Merge pull request #3322 from jnummelin/feature/etcd-push-metrics
Browse files Browse the repository at this point in the history
Add etcd and kine metrics collection with `--enable-metrics-scraper`
  • Loading branch information
jnummelin authored Jul 31, 2023
2 parents 20eead8 + 05d0613 commit 65fa4a3
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 19 deletions.
2 changes: 1 addition & 1 deletion cmd/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ func (c *command) start(ctx context.Context) error {
if err != nil {
return fmt.Errorf("failed to create metrics manifests saver: %w", err)
}
metrics, err := controller.NewMetrics(c.K0sVars, metricsSaver, adminClientFactory)
metrics, err := controller.NewMetrics(c.K0sVars, metricsSaver, adminClientFactory, nodeConfig.Spec.Storage.Type)
if err != nil {
return fmt.Errorf("failed to create metrics reconciler: %w", err)
}
Expand Down
10 changes: 6 additions & 4 deletions docs/system-monitoring.md
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
# System components monitoring

Controller nodes [are isolated](architecture.md/#control-plane) by default, which thus means that a cluster user cannot schedule workloads onto controller nodes.
Controller nodes [are isolated](architecture.md#control-plane) by default, which thus means that a cluster user cannot schedule workloads onto controller nodes.

k0s provides a mechanism to expose system components for monitoring. System component metrics can give a better look into what is happening inside them. Metrics are particularly useful for building dashboards and alerts.
You can read more about metrics for Kubernetes system components [here](https://kubernetes.io/docs/concepts/cluster-administration/system-metrics/).

**Note:** the mechanism is an opt-in feature, you can enable it on installation:

```shell
sudo k0s install controller --enable-metrics-scraper
```
```shell
sudo k0s install controller --enable-metrics-scraper
```

## Jobs

The list of components which is scrapped by k0s:

- kube-scheduler
- kube-controller-manager
- etcd
- kine

**Note:** kube-apiserver metrics are not scrapped since they are accessible via `kubernetes` endpoint within the cluster.

Expand Down
20 changes: 19 additions & 1 deletion inttest/kine/kine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,16 @@ limitations under the License.
package kine

import (
"context"
"fmt"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"k8s.io/apimachinery/pkg/util/wait"

"github.com/k0sproject/k0s/inttest/common"
)
Expand All @@ -33,7 +37,7 @@ type KineSuite struct {

func (s *KineSuite) TestK0sGetsUp() {
s.PutFile(s.ControllerNode(0), "/tmp/k0s.yaml", k0sConfigWithKine)
s.NoError(s.InitController(0, "--config=/tmp/k0s.yaml"))
s.NoError(s.InitController(0, "--config=/tmp/k0s.yaml", "--enable-metrics-scraper"))
s.NoError(s.RunWorkers())

kc, err := s.KubeClient(s.ControllerNode(0))
Expand Down Expand Up @@ -71,6 +75,20 @@ func (s *KineSuite) TestK0sGetsUp() {
assert.NoError(t, err)
})
})

s.T().Run("metrics", func(t *testing.T) {
s.Require().NoError(common.WaitForDeployment(s.Context(), kc, "k0s-pushgateway", "k0s-system"))
s.Require().NoError(wait.PollImmediateInfiniteWithContext(s.Context(), 5*time.Second, func(ctx context.Context) (bool, error) {
b, err := kc.RESTClient().Get().AbsPath("/api/v1/namespaces/k0s-system/services/http:k0s-pushgateway:http/proxy/metrics").DoRaw(s.Context())
if err != nil {
return false, nil
}

// wait for kube-scheduler and kube-controller-manager metrics
output := string(b)
return strings.Contains(output, `job="kube-scheduler"`) && strings.Contains(output, `job="kube-controller-manager"`) && strings.Contains(output, `job="kine"`), nil
}))
})
}

func TestKineSuite(t *testing.T) {
Expand Down
5 changes: 3 additions & 2 deletions inttest/metricsscraper/metricsscraper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ type MetricsScraperSuite struct {
}

func (s *MetricsScraperSuite) TestK0sGetsUp() {
s.NoError(s.InitController(0, "--single", "--enable-metrics-scraper"))
s.NoError(s.InitController(0, "--enable-worker", "--enable-metrics-scraper"))

kc, err := s.KubeClient(s.ControllerNode(0))
s.Require().NoError(err)
Expand Down Expand Up @@ -71,7 +71,8 @@ func (s *MetricsScraperSuite) waitForMetrics() error {
}

// wait for kube-scheduler and kube-controller-manager metrics
return strings.Contains(string(b), `job="kube-scheduler"`) && strings.Contains(string(b), `job="kube-controller-manager"`), nil
output := string(b)
return strings.Contains(output, `job="kube-scheduler"`) && strings.Contains(output, `job="kube-controller-manager"`) && strings.Contains(output, `job="etcd"`), nil
})
}

Expand Down
74 changes: 63 additions & 11 deletions pkg/component/controller/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,11 @@ const (
type Metrics struct {
log logrus.FieldLogger

hostname string
K0sVars *config.CfgVars
saver manifestsSaver
restClient rest.Interface
hostname string
K0sVars *config.CfgVars
saver manifestsSaver
restClient rest.Interface
storageType string

clusterConfig *v1beta1.ClusterConfig
tickerDone context.CancelFunc
Expand All @@ -59,7 +60,7 @@ var _ manager.Component = (*Metrics)(nil)
var _ manager.Reconciler = (*Metrics)(nil)

// NewMetrics creates new Metrics reconciler
func NewMetrics(k0sVars *config.CfgVars, saver manifestsSaver, clientCF kubernetes.ClientFactoryInterface) (*Metrics, error) {
func NewMetrics(k0sVars *config.CfgVars, saver manifestsSaver, clientCF kubernetes.ClientFactoryInterface, storageType string) (*Metrics, error) {
hostname, err := os.Hostname()
if err != nil {
return nil, err
Expand All @@ -71,12 +72,12 @@ func NewMetrics(k0sVars *config.CfgVars, saver manifestsSaver, clientCF kubernet
}

return &Metrics{
log: logrus.WithFields(logrus.Fields{"component": "metrics"}),

hostname: hostname,
K0sVars: k0sVars,
saver: saver,
restClient: restClient,
log: logrus.WithFields(logrus.Fields{"component": "metrics"}),
storageType: storageType,
hostname: hostname,
K0sVars: k0sVars,
saver: saver,
restClient: restClient,
}, nil
}

Expand All @@ -95,6 +96,22 @@ func (m *Metrics) Init(_ context.Context) error {
}
m.jobs = append(m.jobs, j)

if m.storageType == v1beta1.EtcdStorageType {
etcdJob, err := m.newEtcdJob()
if err != nil {
return err
}
m.jobs = append(m.jobs, etcdJob)
}

if m.storageType == v1beta1.KineStorageType {
kineJob, err := m.newKineJob()
if err != nil {
return err
}
m.jobs = append(m.jobs, kineJob)
}

return nil
}

Expand Down Expand Up @@ -161,6 +178,41 @@ type job struct {
restClient rest.Interface
}

func (m *Metrics) newEtcdJob() (*job, error) {
certFile := path.Join(m.K0sVars.CertRootDir, "apiserver-etcd-client.crt")
keyFile := path.Join(m.K0sVars.CertRootDir, "apiserver-etcd-client.key")

httpClient, err := getClient(certFile, keyFile)
if err != nil {
return nil, err
}

return &job{
log: m.log.WithField("metrics_job", "etcd"),
scrapeURL: "https://localhost:2379/metrics",
name: "etcd",
hostname: m.hostname,
scrapeClient: httpClient,
restClient: m.restClient,
}, nil
}

func (m *Metrics) newKineJob() (*job, error) {
httpClient, err := getClient("", "")
if err != nil {
return nil, err
}

return &job{
log: m.log.WithField("metrics_job", "kine"),
scrapeURL: "http://localhost:8080/metrics",
name: "kine",
hostname: m.hostname,
scrapeClient: httpClient,
restClient: m.restClient,
}, nil
}

func (m *Metrics) newJob(name, scrapeURL string) (*job, error) {
certFile := path.Join(m.K0sVars.CertRootDir, "admin.crt")
keyFile := path.Join(m.K0sVars.CertRootDir, "admin.key")
Expand Down

0 comments on commit 65fa4a3

Please sign in to comment.