From d823ad72c40e7bff952ce1a14a55c39576f0b769 Mon Sep 17 00:00:00 2001 From: Alejandro Pedraza Date: Wed, 16 Aug 2023 13:34:50 -0500 Subject: [PATCH] Fixed service-mirror metrics warning (#11246) Whenever the service mirror's main loop was triggered again, the following warnings were generated: ``` time="2023-08-14T20:16:29Z" level=warning msg="failed to register Prometheus gauge Desc{fqName: \"service_cache_size\", help: \"Number of items in the client-go service cache\", constLabels: {cluster=\"remote\"}, variableLabels: []}: duplicate metrics collector registration attempted" time="2023-08-14T20:16:29Z" level=warning msg="failed to register Prometheus gauge Desc{fqName: \"endpoints_cache_size\", help: \"Number of items in the client-go endpoints cache\", constLabels: {cluster=\"remote\"}, variableLabels: []}: duplicate metrics collector registration attempted" ``` To fix, this adds into the cluster watcher's `Stop()` method a directive to unregister the prometheus cache metrics associated to the cluster's client API. --- controller/k8s/api.go | 5 +++++ controller/k8s/prometheus.go | 6 ++++++ multicluster/service-mirror/cluster_watcher.go | 4 ++++ 3 files changed, 15 insertions(+) diff --git a/controller/k8s/api.go b/controller/k8s/api.go index cdd123a70890b..64902e31aeff8 100644 --- a/controller/k8s/api.go +++ b/controller/k8s/api.go @@ -288,6 +288,11 @@ func (api *API) Sync(stopCh <-chan struct{}) { waitForCacheSync(api.syncChecks) } +// UnregisterGauges unregisters all the prometheus cache gauges associated to this API +func (api *API) UnregisterGauges() { + api.promGauges.unregister() +} + // NS provides access to a shared informer and lister for Namespaces. func (api *API) NS() coreinformers.NamespaceInformer { if api.ns == nil { diff --git a/controller/k8s/prometheus.go b/controller/k8s/prometheus.go index aed1739db8628..b4daad5f58616 100644 --- a/controller/k8s/prometheus.go +++ b/controller/k8s/prometheus.go @@ -20,3 +20,9 @@ func (p *promGauges) addInformerSize(kind string, labels prometheus.Labels, inf return float64(len(inf.GetStore().ListKeys())) })) } + +func (p *promGauges) unregister() { + for _, gauge := range p.gauges { + prometheus.Unregister(gauge) + } +} diff --git a/multicluster/service-mirror/cluster_watcher.go b/multicluster/service-mirror/cluster_watcher.go index cb6727bc52957..67fadc4ead6b8 100644 --- a/multicluster/service-mirror/cluster_watcher.go +++ b/multicluster/service-mirror/cluster_watcher.go @@ -974,6 +974,10 @@ func (rcsw *RemoteClusterServiceWatcher) Stop(cleanupState bool) { rcsw.log.Warnf("error removing service informer handler: %s", err) } } + + if rcsw.remoteAPIClient != nil { + rcsw.remoteAPIClient.UnregisterGauges() + } } func (rcsw *RemoteClusterServiceWatcher) resolveGatewayAddress() ([]corev1.EndpointAddress, error) {