diff --git a/collector/node.go b/collector/node.go index 377c732..2d3d114 100644 --- a/collector/node.go +++ b/collector/node.go @@ -31,8 +31,12 @@ type NodeCollector struct { SumStatRopen *prometheus.GaugeVec SumStatWopen *prometheus.GaugeVec CfgStatSysThreads *prometheus.GaugeVec + CfgStatSysVsize *prometheus.GaugeVec + CfgStatSysRss *prometheus.GaugeVec + CfgStatSysSockets *prometheus.GaugeVec SumStatNetInratemib *prometheus.GaugeVec SumStatNetOutratemib *prometheus.GaugeVec + Info *prometheus.GaugeVec } /* @@ -77,7 +81,7 @@ cfg.gw.rate=120 cfg.gw.ntx=10 */ -//NewNodeCollector creates an cluster of the NodeCollector +// NewNodeCollector creates an cluster of the NodeCollector func NewNodeCollector(cluster string) *NodeCollector { labels := make(prometheus.Labels) labels["cluster"] = cluster @@ -200,6 +204,33 @@ func NewNodeCollector(cluster string) *NodeCollector { }, []string{"node", "port"}, ), + CfgStatSysVsize: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "eos", + Name: "node_vsize", + Help: "Node virtual memory size", + ConstLabels: labels, + }, + []string{"node", "port"}, + ), + CfgStatSysRss: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "eos", + Name: "node_rss", + Help: "Node resident memory set size", + ConstLabels: labels, + }, + []string{"node", "port"}, + ), + CfgStatSysSockets: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "eos", + Name: "node_sockets", + Help: "Node Number of sockets", + ConstLabels: labels, + }, + []string{"node", "port"}, + ), SumStatNetInratemib: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: "eos", @@ -218,6 +249,15 @@ func NewNodeCollector(cluster string) *NodeCollector { }, []string{"node", "port"}, ), + Info: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "eos", + Name: "node_info", + Help: "Node metadata", + ConstLabels: labels, + }, + []string{"node", "port", "eos_version", "xrootd_version", "kernel", "geotag"}, + ), } } @@ -236,8 +276,12 @@ func (o *NodeCollector) collectorList() []prometheus.Collector { o.SumStatRopen, o.SumStatWopen, o.CfgStatSysThreads, + o.CfgStatSysVsize, + o.CfgStatSysRss, + o.CfgStatSysSockets, o.SumStatNetInratemib, o.SumStatNetOutratemib, + o.Info, } } @@ -347,6 +391,24 @@ func (o *NodeCollector) collectNodeDF() error { if err == nil { o.CfgStatSysThreads.WithLabelValues(m.Host, m.Port).Set(threads) } + + vsize, err := strconv.ParseFloat(m.CfgStatSysVsize, 64) + if err == nil { + o.CfgStatSysVsize.WithLabelValues(m.Host, m.Port).Set(vsize) + } + + rss, err := strconv.ParseFloat(m.CfgStatSysRss, 64) + if err == nil { + o.CfgStatSysRss.WithLabelValues(m.Host, m.Port).Set(rss) + } + + sockets, err := strconv.ParseFloat(m.CfgStatSysSockets, 64) + if err == nil { + o.CfgStatSysSockets.WithLabelValues(m.Host, m.Port).Set(sockets) + } + + // We send just a dummy 1 as value for the eos_node_info metric, and metadata on labels + o.Info.WithLabelValues(m.Host, m.Port, m.EOSVersion, m.XRootDVersion, m.Kernel, m.Geotag).Set(1) } return nil diff --git a/collector/vs.go b/collector/vs.go deleted file mode 100644 index fe9c6cb..0000000 --- a/collector/vs.go +++ /dev/null @@ -1,165 +0,0 @@ -package collector - -import ( - "context" - "log" - "strconv" - - "github.com/prometheus/client_golang/prometheus" - "gitlab.cern.ch/rvalverd/eos_exporter/eosclient" - //"os" - //"bufio" - //"fmt" - //"strings" -) - -type VSCollector struct { - EOSmgm *prometheus.GaugeVec - Hostport *prometheus.GaugeVec - Geotag *prometheus.GaugeVec - Vsize *prometheus.GaugeVec - Rss *prometheus.GaugeVec - Threads *prometheus.GaugeVec - Versions *prometheus.GaugeVec - EOSfst *prometheus.GaugeVec - Xrootdfst *prometheus.GaugeVec - KernelV *prometheus.GaugeVec - Start *prometheus.GaugeVec - Uptime *prometheus.GaugeVec -} - -//NewFSCollector creates an cluster of the FSCollector and instantiates -// the individual metrics that show information about the FS. -func NewVSCollector(cluster string) *VSCollector { - labels := make(prometheus.Labels) - labels["cluster"] = cluster - namespace := "eos" - return &VSCollector{ - Vsize: prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: namespace, - Name: "versions_vsize_bytes", - Help: "Vsize: ", - ConstLabels: labels, - }, - []string{"mgm_version", "node", "geotag", "eos_v_fst", "xrd_v_fst", "kernel_v"}, - ), - Rss: prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: namespace, - Name: "versions_rss_bytes", - Help: "Rss: ", - ConstLabels: labels, - }, - []string{"mgm_version", "node", "geotag", "eos_v_fst", "xrd_v_fst", "kernel_v"}, - ), - Threads: prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: namespace, - Name: "versions_threads_total", - Help: "Threads: ", - ConstLabels: labels, - }, - []string{"mgm_version", "node", "geotag", "eos_v_fst", "xrd_v_fst", "kernel_v"}, - ), - Versions: prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: namespace, - Name: "versions_total", - Help: "Verions: Amount of daemons attached to a node", - ConstLabels: labels, - }, - []string{"mgm_version", "node", "port", "geotag", "eos_v_fst", "xrd_v_fst", "kernel_v"}, - ), - Uptime: prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: namespace, - Name: "versions_uptime_seconds", - Help: "Uptime: Amount of seconds the FST has been up", - ConstLabels: labels, - }, - []string{"node"}, - ), - Start: prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: namespace, - Name: "versions_start_seconds", - Help: "Start: Time when EOS was started.", - ConstLabels: labels, - }, - []string{"mgm_version", "node", "geotag", "eos_v_fst", "xrd_v_fst", "kernel_v"}, - ), - } -} - -func (o *VSCollector) collectorList() []prometheus.Collector { - return []prometheus.Collector{ - // o.EOSmgm, - // o.Hostport, - // o.Geotag, - o.Vsize, - o.Rss, - o.Threads, - o.Versions, - // o.EOSfst, - // o.Xrootdfst, - // o.KernelV, - o.Start, - o.Uptime, - } -} - -func (o *VSCollector) collectVSDF() error { - ins := getEOSInstance() - url := "root://" + ins - opt := &eosclient.Options{URL: url} - client, err := eosclient.New(opt) - if err != nil { - return err - } - - mds, err := client.ListVS(context.Background()) - if err != nil { - return err - } - - for _, m := range mds { - - // Versions - - versions, err := strconv.ParseFloat("1", 64) - if err == nil { - o.Versions.WithLabelValues(m.EOSmgm, m.Hostname, m.Port, m.Geotag, m.EOSfst, m.Xrootdfst, m.KernelV).Set(versions) - } - - // Uptime - - uptime, err := strconv.ParseFloat(m.Uptime, 64) - if err == nil { - o.Uptime.WithLabelValues(m.Hostname).Set(uptime * 3600 * 24) - } - } - - return nil - -} // collectVSDF() - -// Describe sends the descriptors of each VSCollector related metrics we have defined -func (o *VSCollector) Describe(ch chan<- *prometheus.Desc) { - for _, metric := range o.collectorList() { - metric.Describe(ch) - } - //ch <- o.ScrubbingStateDesc -} - -// Collect sends all the collected metrics to the provided prometheus channel. -func (o *VSCollector) Collect(ch chan<- prometheus.Metric) { - - if err := o.collectVSDF(); err != nil { - log.Println("failed collecting vs metrics:", err) - } - - for _, metric := range o.collectorList() { - metric.Collect(ch) - } -} diff --git a/eos_exporter.go b/eos_exporter.go index c269703..77e4da9 100644 --- a/eos_exporter.go +++ b/eos_exporter.go @@ -65,7 +65,6 @@ func NewEOSExporter(instance string) *EOSExporter { collector.NewGroupCollector(instance), // eos scheduling group stats collector.NewNodeCollector(instance), // eos node stats collector.NewFSCollector(instance), // eos filesystem stats - collector.NewVSCollector(instance), // eos FST versions information collector.NewIOInfoCollector(instance), // eos io stat information collector.NewIOAppInfoCollector(instance), // eos io stat information per App collector.NewNSCollector(instance), // eos namespace information diff --git a/eosclient/eos.go b/eosclient/eos.go index 2a4457b..e6a9ec9 100644 --- a/eosclient/eos.go +++ b/eosclient/eos.go @@ -89,8 +89,15 @@ type NodeInfo struct { SumStatRopen string SumStatWopen string CfgStatSysThreads string + CfgStatSysVsize string + CfgStatSysRss string + CfgStatSysSockets string SumStatNetInratemib string SumStatNetOutratemib string + EOSVersion string + XRootDVersion string + Kernel string + Geotag string } type GroupInfo struct { @@ -168,22 +175,6 @@ type FSInfo struct { StatHealthIndicator string } -type VSInfo struct { - EOSmgm string - Hostname string - Port string - Geotag string - Vsize string - Rss string - Threads string - Sockets string - EOSfst string - Xrootdfst string - KernelV string - Start string - Uptime string -} - type NSInfo struct { Boot_file_time string Boot_status string @@ -470,32 +461,6 @@ func (c *Client) getEosMGMVersion(ctx context.Context) (string, error) { return "", errors.New("version not found") } -// List the version of different nodes in the instance -func (c *Client) ListVS(ctx context.Context) ([]*VSInfo, error) { - - ctx, cancel := context.WithTimeout(ctx, cmdTimeout) - defer cancel() - - mgmVersion, err := c.getEosMGMVersion(ctx) - if err != nil { - return nil, err - } - - //cmd = exec.CommandContext(ctxWt, "/usr/bin/eos", "-r", unixUser.Uid, unixUser.Gid, "-b", "node", "ls","-m", "--sys", "|", "grep", "cern.ch", "|", "sort", "-t:", "-uk1,1") - stdout, _, err := c.execute(exec.CommandContext(ctx, "/usr/bin/eos", "--json", "node", "ls")) - if err != nil { - return nil, err - } - - nodeLSResponse := &NodeLSResponse{} - err = json.Unmarshal([]byte(stdout), nodeLSResponse) - if err != nil { - return nil, err // fmt.Errorf("%w -> value: %s", err, stdout) // for testing unmarshal issues - } - - return c.parseVSsInfo(mgmVersion, nodeLSResponse) -} - // List the activity of different users in the instance func (c *Client) ListNS(ctx context.Context) ([]*NSInfo, []*NSActivityInfo, []*NSBatchInfo, error) { // eos ns stat, without -a will exclude batch users info (this adds to much latency in the instance where the exporter is deployed) @@ -626,8 +591,15 @@ func (c *Client) parseNodeInfo(line string) (*NodeInfo, error) { SumStatRopen: kv["sum.stat.ropen"], SumStatWopen: kv["sum.stat.wopen"], CfgStatSysThreads: kv["cfg.stat.sys.threads"], + CfgStatSysVsize: kv["cfg.stat.sys.vsize"], + CfgStatSysRss: kv["cfg.stat.sys.rss"], + CfgStatSysSockets: kv["cfg.stat.sys.sockets"], SumStatNetInratemib: kv["sum.stat.net.inratemib"], SumStatNetOutratemib: kv["sum.stat.net.outratemib"], + EOSVersion: kv["cfg.stat.sys.eos.version"], + XRootDVersion: kv["cfg.stat.sys.xrootd.version"], + Kernel: kv["cfg.stat.sys.kernel"], + Geotag: kv["cfg.stat.geotag"], } return fst, nil } @@ -755,51 +727,6 @@ func (c *Client) parseFSInfo(line string) (*FSInfo, error) { return fs, nil } -// Gathers information of versions of nodes -func (c *Client) parseVSsInfo(mgmVersion string, nodeLSResponse *NodeLSResponse) ([]*VSInfo, error) { - vsinfos := []*VSInfo{} - - if nodeLSResponse.ErrorMsg != "" { - return nil, errors.New(nodeLSResponse.ErrorMsg) - } - - for _, node := range nodeLSResponse.Result { - hostname, port, foundcolon := getHostname(node.HostPort) - if !foundcolon { - continue - } - - // Parse uptime to days - s := strings.Split(node.Cfg.Stat.Sys.Uptime.value, "%20days,")[0] - upt := strings.Split(s, "up%20") - var uptime string - if len(upt) < 2 { - uptime = "0" - } else { - uptime = upt[1] - } - - info := &VSInfo{ - EOSmgm: mgmVersion, - Hostname: hostname, - Port: port, - Geotag: node.Cfg.Stat.Geotag, - Vsize: strconv.Itoa(node.Cfg.Stat.Sys.Vsize), - Rss: node.Cfg.Stat.Sys.Rss.value, - Threads: strconv.Itoa(node.Cfg.Stat.Sys.Threads), - Sockets: node.Cfg.Stat.Sys.Sockets.value, - EOSfst: node.Cfg.Stat.Sys.Eos.Version, - Xrootdfst: node.Cfg.Stat.Sys.Xrootd.Version, - KernelV: node.Cfg.Stat.Sys.Kernel, - Start: node.Cfg.Stat.Sys.Eos.Start, - Uptime: uptime, - } - vsinfos = append(vsinfos, info) - } - - return vsinfos, nil -} - // Checks if uid is made only of letters. func UidLetter(s string) bool { for _, r := range s {