From 872f921867eadca491876bb2e6ad17d05cf2808a Mon Sep 17 00:00:00 2001 From: Tobias Schmidt Date: Mon, 24 Nov 2014 21:00:17 -0500 Subject: [PATCH] Reduce number of global variables used This is the first step to make the exporter more testable. --- collector/attributes.go | 37 +++--- collector/bonding.go | 47 +++---- collector/diskstats.go | 240 ++++++++++++++++++----------------- collector/filesystem.go | 122 +++++++++--------- collector/interrupts.go | 29 ++--- collector/lastlogin.go | 25 ++-- collector/loadavg.go | 23 ++-- collector/megacli.go | 112 ++++++++-------- collector/meminfo.go | 23 ++-- collector/netdev.go | 23 ++-- collector/netstat.go | 23 ++-- collector/ntp.go | 19 +-- collector/runit_collector.go | 85 +++++++------ collector/stat.go | 121 +++++++++--------- collector/time.go | 24 ++-- 15 files changed, 472 insertions(+), 481 deletions(-) diff --git a/collector/attributes.go b/collector/attributes.go index a06d7698..761832bf 100644 --- a/collector/attributes.go +++ b/collector/attributes.go @@ -7,12 +7,9 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -var ( - attributes *prometheus.GaugeVec -) - type attributesCollector struct { config Config + metric *prometheus.GaugeVec } func init() { @@ -22,28 +19,28 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // labels from the config. func NewAttributesCollector(config Config) (Collector, error) { - c := attributesCollector{ - config: config, - } labelNames := []string{} - for l := range c.config.Attributes { + for l := range config.Attributes { labelNames = append(labelNames, l) } - attributes = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: Namespace, - Name: "attributes", - Help: "The node_exporter attributes.", - }, - labelNames, - ) - return &c, nil + + return &attributesCollector{ + config: config, + metric: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "attributes", + Help: "The node_exporter attributes.", + }, + labelNames, + ), + }, nil } func (c *attributesCollector) Update(ch chan<- prometheus.Metric) (err error) { glog.V(1).Info("Set node_attributes{%v}: 1", c.config.Attributes) - attributes.Reset() - attributes.With(c.config.Attributes).Set(1) - attributes.Collect(ch) + c.metric.Reset() + c.metric.With(c.config.Attributes).Set(1) + c.metric.Collect(ch) return err } diff --git a/collector/bonding.go b/collector/bonding.go index 17fd092c..1a132452 100644 --- a/collector/bonding.go +++ b/collector/bonding.go @@ -16,22 +16,9 @@ const ( sysfsNet = "/sys/class/net" ) -var ( - bondingSlaves = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: Namespace, - Name: "net_bonding_slaves", - Help: "Number of configured slaves per bonding interface.", - }, []string{"master"}) - bondingSlavesActive = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: Namespace, - Name: "net_bonding_slaves_active", - Help: "Number of active slaves per bonding interface.", - }, []string{"master"}) -) - -type bondingCollector struct{} +type bondingCollector struct { + slaves, active *prometheus.GaugeVec +} func init() { Factories["bonding"] = NewBondingCollector @@ -40,8 +27,24 @@ func init() { // NewBondingCollector returns a newly allocated bondingCollector. // It exposes the number of configured and active slave of linux bonding interfaces. func NewBondingCollector(config Config) (Collector, error) { - c := bondingCollector{} - return &c, nil + return &bondingCollector{ + slaves: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "net_bonding_slaves", + Help: "Number of configured slaves per bonding interface.", + }, + []string{"master"}, + ), + active: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "net_bonding_slaves_active", + Help: "Number of active slaves per bonding interface.", + }, + []string{"master"}, + ), + }, nil } // Update reads and exposes bonding states, implements Collector interface. Caution: This works only on linux. @@ -51,11 +54,11 @@ func (c *bondingCollector) Update(ch chan<- prometheus.Metric) (err error) { return err } for master, status := range bondingStats { - bondingSlaves.WithLabelValues(master).Set(float64(status[0])) - bondingSlavesActive.WithLabelValues(master).Set(float64(status[1])) + c.slaves.WithLabelValues(master).Set(float64(status[0])) + c.active.WithLabelValues(master).Set(float64(status[1])) } - bondingSlaves.Collect(ch) - bondingSlavesActive.Collect(ch) + c.slaves.Collect(ch) + c.active.Collect(ch) return nil } diff --git a/collector/diskstats.go b/collector/diskstats.go index c6c8d6e7..007446c4 100644 --- a/collector/diskstats.go +++ b/collector/diskstats.go @@ -23,116 +23,12 @@ const ( var ( ignoredDevices = flag.String("diskstatsIgnoredDevices", "^(ram|loop|(h|s|xv)d[a-z])\\d+$", "Regexp of devices to ignore for diskstats.") - - diskLabelNames = []string{"device"} - - // Docs from https://www.kernel.org/doc/Documentation/iostats.txt - diskStatsMetrics = []prometheus.Collector{ - prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: Namespace, - Subsystem: diskSubsystem, - Name: "reads_completed", - Help: "The total number of reads completed successfully.", - }, - diskLabelNames, - ), - prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: Namespace, - Subsystem: diskSubsystem, - Name: "reads_merged", - Help: "The number of reads merged. See https://www.kernel.org/doc/Documentation/iostats.txt.", - }, - diskLabelNames, - ), - prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: Namespace, - Subsystem: diskSubsystem, - Name: "sectors_read", - Help: "The total number of sectors read successfully.", - }, - diskLabelNames, - ), - prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: Namespace, - Subsystem: diskSubsystem, - Name: "read_time_ms", - Help: "The total number of milliseconds spent by all reads.", - }, - diskLabelNames, - ), - prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: Namespace, - Subsystem: diskSubsystem, - Name: "writes_completed", - Help: "The total number of writes completed successfully.", - }, - diskLabelNames, - ), - prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: Namespace, - Subsystem: diskSubsystem, - Name: "writes_merged", - Help: "The number of writes merged. See https://www.kernel.org/doc/Documentation/iostats.txt.", - }, - diskLabelNames, - ), - prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: Namespace, - Subsystem: diskSubsystem, - Name: "sectors_written", - Help: "The total number of sectors written successfully.", - }, - diskLabelNames, - ), - prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: Namespace, - Subsystem: diskSubsystem, - Name: "write_time_ms", - Help: "This is the total number of milliseconds spent by all writes.", - }, - diskLabelNames, - ), - prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: Namespace, - Subsystem: diskSubsystem, - Name: "io_now", - Help: "The number of I/Os currently in progress.", - }, - diskLabelNames, - ), - prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: Namespace, - Subsystem: diskSubsystem, - Name: "io_time_ms", - Help: "Milliseconds spent doing I/Os.", - }, - diskLabelNames, - ), - prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: Namespace, - Subsystem: diskSubsystem, - Name: "io_time_weighted", - Help: "The weighted # of milliseconds spent doing I/Os. See https://www.kernel.org/doc/Documentation/iostats.txt.", - }, - diskLabelNames, - ), - } ) type diskstatsCollector struct { config Config ignoredDevicesPattern *regexp.Regexp + metrics []prometheus.Collector } func init() { @@ -142,38 +38,148 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // disk device stats. func NewDiskstatsCollector(config Config) (Collector, error) { - c := diskstatsCollector{ + var diskLabelNames = []string{"device"} + + return &diskstatsCollector{ config: config, ignoredDevicesPattern: regexp.MustCompile(*ignoredDevices), - } - return &c, nil + // Docs from https://www.kernel.org/doc/Documentation/iostats.txt + metrics: []prometheus.Collector{ + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "reads_completed", + Help: "The total number of reads completed successfully.", + }, + diskLabelNames, + ), + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "reads_merged", + Help: "The number of reads merged. See https://www.kernel.org/doc/Documentation/iostats.txt.", + }, + diskLabelNames, + ), + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "sectors_read", + Help: "The total number of sectors read successfully.", + }, + diskLabelNames, + ), + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "read_time_ms", + Help: "The total number of milliseconds spent by all reads.", + }, + diskLabelNames, + ), + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "writes_completed", + Help: "The total number of writes completed successfully.", + }, + diskLabelNames, + ), + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "writes_merged", + Help: "The number of writes merged. See https://www.kernel.org/doc/Documentation/iostats.txt.", + }, + diskLabelNames, + ), + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "sectors_written", + Help: "The total number of sectors written successfully.", + }, + diskLabelNames, + ), + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "write_time_ms", + Help: "This is the total number of milliseconds spent by all writes.", + }, + diskLabelNames, + ), + prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "io_now", + Help: "The number of I/Os currently in progress.", + }, + diskLabelNames, + ), + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "io_time_ms", + Help: "Milliseconds spent doing I/Os.", + }, + diskLabelNames, + ), + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "io_time_weighted", + Help: "The weighted # of milliseconds spent doing I/Os. See https://www.kernel.org/doc/Documentation/iostats.txt.", + }, + diskLabelNames, + ), + }, + }, nil } func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) (err error) { diskStats, err := getDiskStats() if err != nil { - return fmt.Errorf("Couldn't get diskstats: %s", err) + return fmt.Errorf("couldn't get diskstats: %s", err) } + for dev, stats := range diskStats { if c.ignoredDevicesPattern.MatchString(dev) { glog.V(1).Infof("Ignoring device: %s", dev) continue } + + if len(stats) != len(c.metrics) { + return fmt.Errorf("invalid line for %s for %s", procDiskStats, dev) + } + for k, value := range stats { v, err := strconv.ParseFloat(value, 64) if err != nil { - return fmt.Errorf("Invalid value %s in diskstats: %s", value, err) + return fmt.Errorf("invalid value %s in diskstats: %s", value, err) } - counter, ok := diskStatsMetrics[k].(*prometheus.CounterVec) - if ok { + + if counter, ok := c.metrics[k].(*prometheus.CounterVec); ok { counter.WithLabelValues(dev).Set(v) - } else { - var gauge = diskStatsMetrics[k].(*prometheus.GaugeVec) + } else if gauge, ok := c.metrics[k].(*prometheus.GaugeVec); ok { gauge.WithLabelValues(dev).Set(v) + } else { + return fmt.Errorf("unexpected collector %d", k) } } } - for _, c := range diskStatsMetrics { + for _, c := range c.metrics { c.Collect(ch) } return err @@ -197,8 +203,8 @@ func parseDiskStats(r io.Reader) (map[string]map[int]string, error) { for scanner.Scan() { parts := strings.Fields(string(scanner.Text())) - if len(parts) != len(diskStatsMetrics)+3 { // we strip major, minor and dev - return nil, fmt.Errorf("Invalid line in %s: %s", procDiskStats, scanner.Text()) + if len(parts) < 4 { // we strip major, minor and dev + return nil, fmt.Errorf("invalid line in %s: %s", procDiskStats, scanner.Text()) } dev := parts[2] diskStats[dev] = map[int]string{} diff --git a/collector/filesystem.go b/collector/filesystem.go index c7dca3fc..fa618aab 100644 --- a/collector/filesystem.go +++ b/collector/filesystem.go @@ -21,60 +21,14 @@ const ( ) var ( - filesystemLabelNames = []string{"filesystem"} - - fsSizeMetric = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: Namespace, - Subsystem: filesystemSubsystem, - Name: "size", - Help: "Filesystem size in bytes.", - }, - filesystemLabelNames, - ) - fsFreeMetric = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: Namespace, - Subsystem: filesystemSubsystem, - Name: "free", - Help: "Filesystem free space in bytes.", - }, - filesystemLabelNames, - ) - fsAvailMetric = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: Namespace, - Subsystem: filesystemSubsystem, - Name: "avail", - Help: "Filesystem space available to non-root users in bytes.", - }, - filesystemLabelNames, - ) - fsFilesMetric = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: Namespace, - Subsystem: filesystemSubsystem, - Name: "files", - Help: "Filesystem total file nodes.", - }, - filesystemLabelNames, - ) - fsFilesFreeMetric = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: Namespace, - Subsystem: filesystemSubsystem, - Name: "files_free", - Help: "Filesystem total free file nodes.", - }, - filesystemLabelNames, - ) - ignoredMountPoints = flag.String("filesystemIgnoredMountPoints", "^/(sys|proc|dev)($|/)", "Regexp of mount points to ignore for filesystem collector.") ) type filesystemCollector struct { config Config ignoredMountPointsPattern *regexp.Regexp + + size, free, avail, files, filesFree *prometheus.GaugeVec } func init() { @@ -84,11 +38,57 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // network device filesystems. func NewFilesystemCollector(config Config) (Collector, error) { - c := filesystemCollector{ + var filesystemLabelNames = []string{"filesystem"} + + return &filesystemCollector{ config: config, ignoredMountPointsPattern: regexp.MustCompile(*ignoredMountPoints), - } - return &c, nil + size: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: filesystemSubsystem, + Name: "size", + Help: "Filesystem size in bytes.", + }, + filesystemLabelNames, + ), + free: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: filesystemSubsystem, + Name: "free", + Help: "Filesystem free space in bytes.", + }, + filesystemLabelNames, + ), + avail: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: filesystemSubsystem, + Name: "avail", + Help: "Filesystem space available to non-root users in bytes.", + }, + filesystemLabelNames, + ), + files: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: filesystemSubsystem, + Name: "files", + Help: "Filesystem total file nodes.", + }, + filesystemLabelNames, + ), + filesFree: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: filesystemSubsystem, + Name: "files_free", + Help: "Filesystem total free file nodes.", + }, + filesystemLabelNames, + ), + }, nil } // Expose filesystem fullness. @@ -107,17 +107,17 @@ func (c *filesystemCollector) Update(ch chan<- prometheus.Metric) (err error) { if err != nil { return fmt.Errorf("Statfs on %s returned %s", mp, err) } - fsSizeMetric.WithLabelValues(mp).Set(float64(buf.Blocks) * float64(buf.Bsize)) - fsFreeMetric.WithLabelValues(mp).Set(float64(buf.Bfree) * float64(buf.Bsize)) - fsAvailMetric.WithLabelValues(mp).Set(float64(buf.Bavail) * float64(buf.Bsize)) - fsFilesMetric.WithLabelValues(mp).Set(float64(buf.Files)) - fsFilesFreeMetric.WithLabelValues(mp).Set(float64(buf.Ffree)) + c.size.WithLabelValues(mp).Set(float64(buf.Blocks) * float64(buf.Bsize)) + c.free.WithLabelValues(mp).Set(float64(buf.Bfree) * float64(buf.Bsize)) + c.avail.WithLabelValues(mp).Set(float64(buf.Bavail) * float64(buf.Bsize)) + c.files.WithLabelValues(mp).Set(float64(buf.Files)) + c.filesFree.WithLabelValues(mp).Set(float64(buf.Ffree)) } - fsSizeMetric.Collect(ch) - fsFreeMetric.Collect(ch) - fsAvailMetric.Collect(ch) - fsFilesMetric.Collect(ch) - fsFilesFreeMetric.Collect(ch) + c.size.Collect(ch) + c.free.Collect(ch) + c.avail.Collect(ch) + c.files.Collect(ch) + c.filesFree.Collect(ch) return err } diff --git a/collector/interrupts.go b/collector/interrupts.go index 48a26602..74c98e9d 100644 --- a/collector/interrupts.go +++ b/collector/interrupts.go @@ -17,19 +17,9 @@ const ( procInterrupts = "/proc/interrupts" ) -var ( - interruptsMetric = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: Namespace, - Name: "interrupts", - Help: "Interrupt details from /proc/interrupts.", - }, - []string{"CPU", "type", "info", "devices"}, - ) -) - type interruptsCollector struct { config Config + metric *prometheus.CounterVec } func init() { @@ -39,10 +29,17 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // interrupts stats func NewInterruptsCollector(config Config) (Collector, error) { - c := interruptsCollector{ + return &interruptsCollector{ config: config, - } - return &c, nil + metric: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Name: "interrupts", + Help: "Interrupt details from /proc/interrupts.", + }, + []string{"CPU", "type", "info", "devices"}, + ), + }, nil } func (c *interruptsCollector) Update(ch chan<- prometheus.Metric) (err error) { @@ -62,10 +59,10 @@ func (c *interruptsCollector) Update(ch chan<- prometheus.Metric) (err error) { "info": interrupt.info, "devices": interrupt.devices, } - interruptsMetric.With(labels).Set(fv) + c.metric.With(labels).Set(fv) } } - interruptsMetric.Collect(ch) + c.metric.Collect(ch) return err } diff --git a/collector/lastlogin.go b/collector/lastlogin.go index 48fbab1b..970a8238 100644 --- a/collector/lastlogin.go +++ b/collector/lastlogin.go @@ -16,17 +16,9 @@ import ( const lastLoginSubsystem = "last_login" -var ( - lastSeen = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: Namespace, - Subsystem: lastLoginSubsystem, - Name: "time", - Help: "The time of the last login.", - }) -) - type lastLoginCollector struct { config Config + metric prometheus.Gauge } func init() { @@ -36,10 +28,15 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // load, seconds since last login and a list of tags as specified by config. func NewLastLoginCollector(config Config) (Collector, error) { - c := lastLoginCollector{ + return &lastLoginCollector{ config: config, - } - return &c, nil + metric: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: lastLoginSubsystem, + Name: "time", + Help: "The time of the last login.", + }), + }, nil } func (c *lastLoginCollector) Update(ch chan<- prometheus.Metric) (err error) { @@ -48,8 +45,8 @@ func (c *lastLoginCollector) Update(ch chan<- prometheus.Metric) (err error) { return fmt.Errorf("Couldn't get last seen: %s", err) } glog.V(1).Infof("Set node_last_login_time: %f", last) - lastSeen.Set(last) - lastSeen.Collect(ch) + c.metric.Set(last) + c.metric.Collect(ch) return err } diff --git a/collector/loadavg.go b/collector/loadavg.go index b4ab9d41..6b157d2f 100644 --- a/collector/loadavg.go +++ b/collector/loadavg.go @@ -16,16 +16,9 @@ const ( procLoad = "/proc/loadavg" ) -var ( - load1 = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: Namespace, - Name: "load1", - Help: "1m load average.", - }) -) - type loadavgCollector struct { config Config + metric prometheus.Gauge } func init() { @@ -35,10 +28,14 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // load, seconds since last login and a list of tags as specified by config. func NewLoadavgCollector(config Config) (Collector, error) { - c := loadavgCollector{ + return &loadavgCollector{ config: config, - } - return &c, nil + metric: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "load1", + Help: "1m load average.", + }), + }, nil } func (c *loadavgCollector) Update(ch chan<- prometheus.Metric) (err error) { @@ -47,8 +44,8 @@ func (c *loadavgCollector) Update(ch chan<- prometheus.Metric) (err error) { return fmt.Errorf("Couldn't get load: %s", err) } glog.V(1).Infof("Set node_load: %f", load) - load1.Set(load) - load1.Collect(ch) + c.metric.Set(load) + c.metric.Collect(ch) return err } diff --git a/collector/megacli.go b/collector/megacli.go index 6025c99b..404d31a5 100644 --- a/collector/megacli.go +++ b/collector/megacli.go @@ -17,32 +17,60 @@ const ( adapterHeaderSep = "================" ) -var ( - driveTemperature = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: Namespace, - Name: "megacli_drive_temperature_celsius", - Help: "megacli: drive temperature", - }, []string{"enclosure", "slot"}) +type megaCliCollector struct { + config Config + cli string - driveCounters = prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: Namespace, - Name: "megacli_drive_count", - Help: "megacli: drive error and event counters", - }, []string{"enclosure", "slot", "type"}) - - drivePresence = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: Namespace, - Name: "megacli_adapter_disk_presence", - Help: "megacli: disk presence per adapter", - }, []string{"type"}) - - counters = []string{"Media Error Count", "Other Error Count", "Predictive Failure Count"} -) + driveTemperature *prometheus.GaugeVec + driveCounters *prometheus.CounterVec + drivePresence *prometheus.GaugeVec +} func init() { Factories["megacli"] = NewMegaCliCollector } +// Takes a config struct and prometheus registry and returns a new Collector exposing +// RAID status through megacli. +func NewMegaCliCollector(config Config) (Collector, error) { + cli := defaultMegaCli + if config.Config["megacli_command"] != "" { + cli = config.Config["megacli_command"] + } + + return &megaCliCollector{ + config: config, + cli: cli, + driveTemperature: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "megacli_drive_temperature_celsius", + Help: "megacli: drive temperature", + }, []string{"enclosure", "slot"}), + driveCounters: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: Namespace, + Name: "megacli_drive_count", + Help: "megacli: drive error and event counters", + }, []string{"enclosure", "slot", "type"}), + drivePresence: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "megacli_adapter_disk_presence", + Help: "megacli: disk presence per adapter", + }, []string{"type"}), + }, nil +} + +func (c *megaCliCollector) Update(ch chan<- prometheus.Metric) (err error) { + err = c.updateAdapter() + if err != nil { + return err + } + err = c.updateDisks() + c.driveTemperature.Collect(ch) + c.driveCounters.Collect(ch) + c.drivePresence.Collect(ch) + return err +} + func parseMegaCliDisks(r io.Reader) (map[int]map[int]map[string]string, error) { var ( stats = map[int]map[int]map[string]string{} @@ -118,38 +146,6 @@ func parseMegaCliAdapter(r io.Reader) (map[string]map[string]string, error) { return raidStats, nil } -type megaCliCollector struct { - config Config - cli string -} - -// Takes a config struct and prometheus registry and returns a new Collector exposing -// RAID status through megacli. -func NewMegaCliCollector(config Config) (Collector, error) { - cli := defaultMegaCli - if config.Config["megacli_command"] != "" { - cli = config.Config["megacli_command"] - } - - c := megaCliCollector{ - config: config, - cli: cli, - } - return &c, nil -} - -func (c *megaCliCollector) Update(ch chan<- prometheus.Metric) (err error) { - err = c.updateAdapter() - if err != nil { - return err - } - err = c.updateDisks() - driveTemperature.Collect(ch) - driveCounters.Collect(ch) - drivePresence.Collect(ch) - return err -} - func (c *megaCliCollector) updateAdapter() error { cmd := exec.Command(c.cli, "-AdpAllInfo", "-aALL") pipe, err := cmd.StdoutPipe() @@ -174,12 +170,14 @@ func (c *megaCliCollector) updateAdapter() error { if err != nil { return err } - drivePresence.WithLabelValues(k).Set(value) + c.drivePresence.WithLabelValues(k).Set(value) } return nil } func (c *megaCliCollector) updateDisks() error { + var counters = []string{"Media Error Count", "Other Error Count", "Predictive Failure Count"} + cmd := exec.Command(c.cli, "-PDList", "-aALL") pipe, err := cmd.StdoutPipe() if err != nil { @@ -210,15 +208,15 @@ func (c *megaCliCollector) updateDisks() error { encStr := strconv.Itoa(enc) slotStr := strconv.Itoa(slot) - driveTemperature.WithLabelValues(encStr, slotStr).Set(t) + c.driveTemperature.WithLabelValues(encStr, slotStr).Set(t) - for _, c := range counters { - counter, err := strconv.ParseFloat(slotStats[c], 64) + for _, i := range counters { + counter, err := strconv.ParseFloat(slotStats[i], 64) if err != nil { return err } - driveCounters.WithLabelValues(encStr, slotStr, c).Set(counter) + c.driveCounters.WithLabelValues(encStr, slotStr, i).Set(counter) } } } diff --git a/collector/meminfo.go b/collector/meminfo.go index 13d2e179..58a6e70b 100644 --- a/collector/meminfo.go +++ b/collector/meminfo.go @@ -20,12 +20,9 @@ const ( memInfoSubsystem = "memory" ) -var ( - memInfoMetrics = map[string]prometheus.Gauge{} -) - type meminfoCollector struct { - config Config + config Config + metrics map[string]prometheus.Gauge } func init() { @@ -35,10 +32,10 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // memory stats. func NewMeminfoCollector(config Config) (Collector, error) { - c := meminfoCollector{ - config: config, - } - return &c, nil + return &meminfoCollector{ + config: config, + metrics: map[string]prometheus.Gauge{}, + }, nil } func (c *meminfoCollector) Update(ch chan<- prometheus.Metric) (err error) { @@ -48,16 +45,16 @@ func (c *meminfoCollector) Update(ch chan<- prometheus.Metric) (err error) { } glog.V(1).Infof("Set node_mem: %#v", memInfo) for k, v := range memInfo { - if _, ok := memInfoMetrics[k]; !ok { - memInfoMetrics[k] = prometheus.NewGauge(prometheus.GaugeOpts{ + if _, ok := c.metrics[k]; !ok { + c.metrics[k] = prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: Namespace, Subsystem: memInfoSubsystem, Name: k, Help: k + " from /proc/meminfo.", }) } - memInfoMetrics[k].Set(v) - memInfoMetrics[k].Collect(ch) + c.metrics[k].Set(v) + c.metrics[k].Collect(ch) } return err } diff --git a/collector/netdev.go b/collector/netdev.go index efc27711..e8a658b9 100644 --- a/collector/netdev.go +++ b/collector/netdev.go @@ -18,12 +18,9 @@ const ( netDevSubsystem = "network" ) -var ( - netDevMetrics = map[string]*prometheus.GaugeVec{} -) - type netDevCollector struct { - config Config + config Config + metrics map[string]*prometheus.GaugeVec } func init() { @@ -33,10 +30,10 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // network device stats. func NewNetDevCollector(config Config) (Collector, error) { - c := netDevCollector{ - config: config, - } - return &c, nil + return &netDevCollector{ + config: config, + metrics: map[string]*prometheus.GaugeVec{}, + }, nil } func (c *netDevCollector) Update(ch chan<- prometheus.Metric) (err error) { @@ -48,8 +45,8 @@ func (c *netDevCollector) Update(ch chan<- prometheus.Metric) (err error) { for dev, stats := range devStats { for t, value := range stats { key := direction + "_" + t - if _, ok := netDevMetrics[key]; !ok { - netDevMetrics[key] = prometheus.NewGaugeVec( + if _, ok := c.metrics[key]; !ok { + c.metrics[key] = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: Namespace, Subsystem: netDevSubsystem, @@ -63,11 +60,11 @@ func (c *netDevCollector) Update(ch chan<- prometheus.Metric) (err error) { if err != nil { return fmt.Errorf("Invalid value %s in netstats: %s", value, err) } - netDevMetrics[key].WithLabelValues(dev).Set(v) + c.metrics[key].WithLabelValues(dev).Set(v) } } } - for _, m := range netDevMetrics { + for _, m := range c.metrics { m.Collect(ch) } return err diff --git a/collector/netstat.go b/collector/netstat.go index bca6dcd8..19948c20 100644 --- a/collector/netstat.go +++ b/collector/netstat.go @@ -18,12 +18,9 @@ const ( netStatsSubsystem = "netstat" ) -var ( - netStatsMetrics = map[string]prometheus.Gauge{} -) - type netStatCollector struct { - config Config + config Config + metrics map[string]prometheus.Gauge } func init() { @@ -33,10 +30,10 @@ func init() { // NewNetStatCollector takes a config struct and returns // a new Collector exposing network stats. func NewNetStatCollector(config Config) (Collector, error) { - c := netStatCollector{ - config: config, - } - return &c, nil + return &netStatCollector{ + config: config, + metrics: map[string]prometheus.Gauge{}, + }, nil } func (c *netStatCollector) Update(ch chan<- prometheus.Metric) (err error) { @@ -47,8 +44,8 @@ func (c *netStatCollector) Update(ch chan<- prometheus.Metric) (err error) { for protocol, protocolStats := range netStats { for name, value := range protocolStats { key := protocol + "_" + name - if _, ok := netStatsMetrics[key]; !ok { - netStatsMetrics[key] = prometheus.NewGauge( + if _, ok := c.metrics[key]; !ok { + c.metrics[key] = prometheus.NewGauge( prometheus.GaugeOpts{ Namespace: Namespace, Subsystem: netStatsSubsystem, @@ -61,10 +58,10 @@ func (c *netStatCollector) Update(ch chan<- prometheus.Metric) (err error) { if err != nil { return fmt.Errorf("invalid value %s in netstats: %s", value, err) } - netStatsMetrics[key].Set(v) + c.metrics[key].Set(v) } } - for _, m := range netStatsMetrics { + for _, m := range c.metrics { m.Collect(ch) } return err diff --git a/collector/ntp.go b/collector/ntp.go index f2b9b676..1172ff79 100644 --- a/collector/ntp.go +++ b/collector/ntp.go @@ -14,14 +14,10 @@ import ( var ( ntpServer = flag.String("ntpServer", "", "NTP server to use for ntp collector.") - ntpDrift = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: Namespace, - Name: "ntp_drift_seconds", - Help: "Time between system time and ntp time.", - }) ) type ntpCollector struct { + drift prometheus.Gauge } func init() { @@ -34,9 +30,14 @@ func NewNtpCollector(config Config) (Collector, error) { if *ntpServer == "" { return nil, fmt.Errorf("No NTP server specifies, see --ntpServer") } - c := ntpCollector{} - return &c, nil + return &ntpCollector{ + drift: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "ntp_drift_seconds", + Help: "Time between system time and ntp time.", + }), + }, nil } func (c *ntpCollector) Update(ch chan<- prometheus.Metric) (err error) { @@ -46,7 +47,7 @@ func (c *ntpCollector) Update(ch chan<- prometheus.Metric) (err error) { } drift := t.Sub(time.Now()) glog.V(1).Infof("Set ntp_drift_seconds: %f", drift.Seconds()) - ntpDrift.Set(drift.Seconds()) - ntpDrift.Collect(ch) + c.drift.Set(drift.Seconds()) + c.drift.Collect(ch) return err } diff --git a/collector/runit_collector.go b/collector/runit_collector.go index b71d2aff..9ae78f5e 100644 --- a/collector/runit_collector.go +++ b/collector/runit_collector.go @@ -8,37 +8,14 @@ import ( "github.com/soundcloud/go-runit/runit" ) -var ( - runitLabelNames = []string{"service"} - - runitState = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: Namespace, - Name: "service_state", - Help: "node_exporter: state of runit service.", - }, - runitLabelNames, - ) - runitStateDesired = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: Namespace, - Name: "service_desired_state", - Help: "node_exporter: desired state of runit service.", - }, - runitLabelNames, - ) - runitStateNormal = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: Namespace, - Name: "service_normal_state", - Help: "node_exporter: normal state of runit service.", - }, - runitLabelNames, - ) +const ( + runitSubsystem = "runit" ) type runitCollector struct { config Config + + state, stateDesired, stateNormal *prometheus.GaugeVec } func init() { @@ -46,14 +23,41 @@ func init() { } func NewRunitCollector(config Config) (Collector, error) { - c := runitCollector{ - config: config, - } + var labels = []string{"service"} - return &c, nil + return &runitCollector{ + config: config, + state: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: runitSubsystem, + Name: "state", + Help: "state of runit service.", + }, + labels, + ), + stateDesired: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: runitSubsystem, + Name: "desired_state", + Help: "desired state of runit service.", + }, + labels, + ), + stateNormal: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: runitSubsystem, + Name: "normal_state", + Help: "normal state of runit service.", + }, + labels, + ), + }, nil } -func (c *runitCollector) Update(ch chan<- prometheus.Metric) (err error) { +func (c *runitCollector) Update(ch chan<- prometheus.Metric) error { services, err := runit.GetServices("/etc/service") if err != nil { return err @@ -67,16 +71,17 @@ func (c *runitCollector) Update(ch chan<- prometheus.Metric) (err error) { } glog.V(1).Infof("%s is %d on pid %d for %d seconds", service.Name, status.State, status.Pid, status.Duration) - runitState.WithLabelValues(service.Name).Set(float64(status.State)) - runitStateDesired.WithLabelValues(service.Name).Set(float64(status.Want)) + c.state.WithLabelValues(service.Name).Set(float64(status.State)) + c.stateDesired.WithLabelValues(service.Name).Set(float64(status.Want)) if status.NormallyUp { - runitStateNormal.WithLabelValues(service.Name).Set(1) + c.stateNormal.WithLabelValues(service.Name).Set(1) } else { - runitStateNormal.WithLabelValues(service.Name).Set(1) + c.stateNormal.WithLabelValues(service.Name).Set(0) } } - runitState.Collect(ch) - runitStateDesired.Collect(ch) - runitStateNormal.Collect(ch) - return err + c.state.Collect(ch) + c.stateDesired.Collect(ch) + c.stateNormal.Collect(ch) + + return nil } diff --git a/collector/stat.go b/collector/stat.go index 4e78f9c4..f5942f9e 100644 --- a/collector/stat.go +++ b/collector/stat.go @@ -18,49 +18,15 @@ const ( procStat = "/proc/stat" ) -var ( - cpuMetrics = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: Namespace, - Name: "cpu", - Help: "Seconds the cpus spent in each mode.", - }, - []string{"cpu", "mode"}, - ) - intrMetric = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: Namespace, - Name: "intr", - Help: "Total number of interrupts serviced.", - }) - ctxtMetric = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: Namespace, - Name: "context_switches", - Help: "Total number of context switches.", - }) - forksMetric = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: Namespace, - Name: "forks", - Help: "Total number of forks.", - }) - btimeMetric = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: Namespace, - Name: "boot_time", - Help: "Node boot time, in unixtime.", - }) - procsRunningMetric = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: Namespace, - Name: "procs_running", - Help: "Number of processes in runnable state.", - }) - procsBlockedMetric = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: Namespace, - Name: "procs_blocked", - Help: "Number of processes blocked waiting for I/O to complete.", - }) -) - type statCollector struct { - config Config + config Config + cpu *prometheus.CounterVec + intr prometheus.Counter + ctxt prometheus.Counter + forks prometheus.Counter + btime prometheus.Gauge + procsRunning prometheus.Gauge + procsBlocked prometheus.Gauge } func init() { @@ -70,10 +36,47 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // network device stats. func NewStatCollector(config Config) (Collector, error) { - c := statCollector{ + return &statCollector{ config: config, - } - return &c, nil + cpu: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Name: "cpu", + Help: "Seconds the cpus spent in each mode.", + }, + []string{"cpu", "mode"}, + ), + intr: prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: Namespace, + Name: "intr", + Help: "Total number of interrupts serviced.", + }), + ctxt: prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: Namespace, + Name: "context_switches", + Help: "Total number of context switches.", + }), + forks: prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: Namespace, + Name: "forks", + Help: "Total number of forks.", + }), + btime: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "boot_time", + Help: "Node boot time, in unixtime.", + }), + procsRunning: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "procs_running", + Help: "Number of processes in runnable state.", + }), + procsBlocked: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "procs_blocked", + Help: "Number of processes blocked waiting for I/O to complete.", + }), + }, nil } // Expose a variety of stats from /proc/stats. @@ -102,7 +105,7 @@ func (c *statCollector) Update(ch chan<- prometheus.Metric) (err error) { } // Convert from ticks to seconds value /= float64(C.sysconf(C._SC_CLK_TCK)) - cpuMetrics.With(prometheus.Labels{"cpu": parts[0], "mode": cpuFields[i]}).Set(value) + c.cpu.With(prometheus.Labels{"cpu": parts[0], "mode": cpuFields[i]}).Set(value) } case parts[0] == "intr": // Only expose the overall number, use the 'interrupts' collector for more detail. @@ -110,45 +113,45 @@ func (c *statCollector) Update(ch chan<- prometheus.Metric) (err error) { if err != nil { return err } - intrMetric.Set(value) + c.intr.Set(value) case parts[0] == "ctxt": value, err := strconv.ParseFloat(parts[1], 64) if err != nil { return err } - ctxtMetric.Set(value) + c.ctxt.Set(value) case parts[0] == "processes": value, err := strconv.ParseFloat(parts[1], 64) if err != nil { return err } - forksMetric.Set(value) + c.forks.Set(value) case parts[0] == "btime": value, err := strconv.ParseFloat(parts[1], 64) if err != nil { return err } - btimeMetric.Set(value) + c.btime.Set(value) case parts[0] == "procs_running": value, err := strconv.ParseFloat(parts[1], 64) if err != nil { return err } - procsRunningMetric.Set(value) + c.procsRunning.Set(value) case parts[0] == "procs_blocked": value, err := strconv.ParseFloat(parts[1], 64) if err != nil { return err } - procsBlockedMetric.Set(value) + c.procsBlocked.Set(value) } } - cpuMetrics.Collect(ch) - ctxtMetric.Collect(ch) - intrMetric.Collect(ch) - forksMetric.Collect(ch) - btimeMetric.Collect(ch) - procsRunningMetric.Collect(ch) - procsBlockedMetric.Collect(ch) + c.cpu.Collect(ch) + c.ctxt.Collect(ch) + c.intr.Collect(ch) + c.forks.Collect(ch) + c.btime.Collect(ch) + c.procsRunning.Collect(ch) + c.procsBlocked.Collect(ch) return err } diff --git a/collector/time.go b/collector/time.go index e63efafe..83a7f23b 100644 --- a/collector/time.go +++ b/collector/time.go @@ -9,16 +9,9 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -var ( - systemTime = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: Namespace, - Name: "time", - Help: "System time in seconds since epoch (1970).", - }) -) - type timeCollector struct { config Config + metric prometheus.Counter } func init() { @@ -28,17 +21,20 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // the current system time in seconds since epoch. func NewTimeCollector(config Config) (Collector, error) { - c := timeCollector{ + return &timeCollector{ config: config, - } - - return &c, nil + metric: prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: Namespace, + Name: "time", + Help: "System time in seconds since epoch (1970).", + }), + }, nil } func (c *timeCollector) Update(ch chan<- prometheus.Metric) (err error) { now := time.Now() glog.V(1).Infof("Set time: %f", now.Unix()) - systemTime.Set(float64(now.Unix())) - systemTime.Collect(ch) + c.metric.Set(float64(now.Unix())) + c.metric.Collect(ch) return err }