From 25ea90369cf67b8dc65910040731c5abf3392394 Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Wed, 4 Jun 2014 12:12:34 +0100 Subject: [PATCH 1/4] Split native collector into it's component parts and make them enablable. Last login is disabled by default as it's broken on ubuntu 12.04 Interrupts is disabled by default as it's very granular and we'll have total interrupts from /proc/stat Allow ignoring devices from diskstats, ignore ram and loop devices by default. Use glog for logging. --- README.md | 9 +- collector/attributes.go | 43 ++++ collector/collector.go | 5 +- collector/diskstats.go | 131 ++++++++++ collector/gmond_collector.go | 13 +- collector/helper.go | 11 - collector/interrupts.go | 116 +++++++++ collector/lastlogin.go | 105 ++++++++ collector/loadavg.go | 76 ++++++ collector/meminfo.go | 101 ++++++++ collector/native_collector.go | 454 ---------------------------------- collector/netdev.go | 125 ++++++++++ collector/runit_collector.go | 11 +- node_exporter.go | 63 ++--- 14 files changed, 747 insertions(+), 516 deletions(-) create mode 100644 collector/attributes.go create mode 100644 collector/diskstats.go create mode 100644 collector/interrupts.go create mode 100644 collector/lastlogin.go create mode 100644 collector/loadavg.go create mode 100644 collector/meminfo.go delete mode 100644 collector/native_collector.go create mode 100644 collector/netdev.go diff --git a/README.md b/README.md index fd076cfe..1b614b27 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,26 @@ # node_exporter -Prometheus exporter with plugable metric collectors. +Prometheus exporter with pluggable metric collectors. ## Available collectors -By default it will only include the NativeCollector. +By default the build will only include the native collectors +that expose information from /proc. To include other collectors, specify the build tags lile this: go build -tags 'ganglia runit' node_exporter.go +Which collectors are used is controled by the --enabledCollectors flag. + ### NativeCollector Provides metrics for load, seconds since last login and a list of tags read from `node_exporter.conf`. -To disable the native collector, use build tag `nonative`. - ### GmondCollector (tag: ganglia) diff --git a/collector/attributes.go b/collector/attributes.go new file mode 100644 index 00000000..a42bc6e9 --- /dev/null +++ b/collector/attributes.go @@ -0,0 +1,43 @@ +// +build !noattributes + +package collector + +import ( + "github.com/golang/glog" + "github.com/prometheus/client_golang/prometheus" +) + +var ( + attributes = prometheus.NewGauge() +) + +type attributesCollector struct { + registry prometheus.Registry + config Config +} + +func init() { + Factories["attributes"] = NewAttributesCollector +} + +// Takes a config struct and prometheus registry and returns a new Collector exposing +// labels from the config. +func NewAttributesCollector(config Config, registry prometheus.Registry) (Collector, error) { + c := attributesCollector{ + config: config, + registry: registry, + } + registry.Register( + "node_attributes", + "node_exporter attributes", + prometheus.NilLabels, + attributes, + ) + return &c, nil +} + +func (c *attributesCollector) Update() (updates int, err error) { + glog.V(1).Info("Set node_attributes{%v}: 1", c.config.Attributes) + attributes.Set(c.config.Attributes, 1) + return updates, err +} diff --git a/collector/collector.go b/collector/collector.go index 5fb6f4fd..e713cc44 100644 --- a/collector/collector.go +++ b/collector/collector.go @@ -5,15 +5,12 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -var Factories []func(Config, prometheus.Registry) (Collector, error) +var Factories = make(map[string]func(Config, prometheus.Registry) (Collector, error)) // Interface a collector has to implement. type Collector interface { // Get new metrics and expose them via prometheus registry. Update() (n int, err error) - - // Returns the name of the collector. - Name() string } type Config struct { diff --git a/collector/diskstats.go b/collector/diskstats.go new file mode 100644 index 00000000..cdc8d4ec --- /dev/null +++ b/collector/diskstats.go @@ -0,0 +1,131 @@ +// +build !nonative + +package collector + +import ( + "bufio" + "flag" + "fmt" + "io" + "os" + "regexp" + "strconv" + "strings" + + "github.com/golang/glog" + "github.com/prometheus/client_golang/prometheus" +) + +const ( + procDiskStats = "/proc/diskstats" +) + +type diskStat struct { + name string + metric prometheus.Metric + documentation string +} + +var ( + ignoredDevices = flag.String("diskstatsIgnoredDevices", "^(ram|loop)\\d+$", "Regexp of devices to ignore for diskstats.") + + // Docs from https://www.kernel.org/doc/Documentation/iostats.txt + diskStatsMetrics = []diskStat{ + {"reads_completed", prometheus.NewCounter(), "The total number of reads completed successfully."}, + {"reads_merged", prometheus.NewCounter(), "The number of reads merged. See https://www.kernel.org/doc/Documentation/iostats.txt"}, + {"sectors_read", prometheus.NewCounter(), "The total number of sectors read successfully."}, + {"read_time_ms", prometheus.NewCounter(), "the total number of milliseconds spent by all reads."}, + {"writes_completed", prometheus.NewCounter(), "The total number of writes completed successfully."}, + {"writes_merged", prometheus.NewCounter(), "The number of writes merged. See https://www.kernel.org/doc/Documentation/iostats.txt"}, + {"sectors_written", prometheus.NewCounter(), "The total number of sectors written successfully."}, + {"write_time_ms", prometheus.NewCounter(), "This is the total number of milliseconds spent by all writes."}, + {"io_now", prometheus.NewGauge(), "The number of I/Os currently in progress."}, + {"io_time_ms", prometheus.NewCounter(), "Milliseconds spent doing I/Os."}, + {"io_time_weighted", prometheus.NewCounter(), "The weighted # of milliseconds spent doing I/Os. See https://www.kernel.org/doc/Documentation/iostats.txt"}, + } +) + +type diskstatsCollector struct { + registry prometheus.Registry + config Config + ignoredDevicesPattern *regexp.Regexp +} + +func init() { + Factories["diskstats"] = NewDiskstatsCollector +} + +// Takes a config struct and prometheus registry and returns a new Collector exposing +// disk device stats. +func NewDiskstatsCollector(config Config, registry prometheus.Registry) (Collector, error) { + c := diskstatsCollector{ + config: config, + registry: registry, + ignoredDevicesPattern: regexp.MustCompile(*ignoredDevices), + } + + for _, v := range diskStatsMetrics { + registry.Register( + "node_disk_"+v.name, + v.documentation, + prometheus.NilLabels, + v.metric, + ) + } + return &c, nil +} + +func (c *diskstatsCollector) Update() (updates int, err error) { + diskStats, err := getDiskStats() + if err != nil { + return updates, fmt.Errorf("Couldn't get diskstats: %s", err) + } + for dev, stats := range diskStats { + if c.ignoredDevicesPattern.MatchString(dev) { + glog.V(1).Infof("Ignoring device: %s", dev) + continue + } + for k, value := range stats { + updates++ + v, err := strconv.ParseFloat(value, 64) + if err != nil { + return updates, fmt.Errorf("Invalid value %s in diskstats: %s", value, err) + } + labels := map[string]string{"device": dev} + counter, ok := diskStatsMetrics[k].metric.(prometheus.Counter) + if ok { + counter.Set(labels, v) + } else { + var gauge = diskStatsMetrics[k].metric.(prometheus.Gauge) + gauge.Set(labels, v) + } + } + } + return updates, err +} + +func getDiskStats() (map[string]map[int]string, error) { + file, err := os.Open(procDiskStats) + if err != nil { + return nil, err + } + return parseDiskStats(file) +} + +func parseDiskStats(r io.ReadCloser) (map[string]map[int]string, error) { + defer r.Close() + diskStats := map[string]map[int]string{} + scanner := bufio.NewScanner(r) + for scanner.Scan() { + parts := strings.Fields(string(scanner.Text())) + if len(parts) != len(diskStatsMetrics)+3 { // we strip major, minor and dev + return nil, fmt.Errorf("Invalid line in %s: %s", procDiskStats, scanner.Text()) + } + dev := parts[2] + diskStats[dev] = map[int]string{} + for i, v := range parts[3:] { + diskStats[dev][i] = v + } + } + return diskStats, nil +} diff --git a/collector/gmond_collector.go b/collector/gmond_collector.go index 874f0813..43cfd09d 100644 --- a/collector/gmond_collector.go +++ b/collector/gmond_collector.go @@ -11,6 +11,7 @@ import ( "regexp" "time" + "github.com/golang/glog" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/node_exporter/collector/ganglia" ) @@ -23,14 +24,13 @@ const ( ) type gmondCollector struct { - name string Metrics map[string]prometheus.Gauge config Config registry prometheus.Registry } func init() { - Factories = append(Factories, NewGmondCollector) + Factories["gmond"] = NewGmondCollector } var illegalCharsRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) @@ -38,7 +38,6 @@ var illegalCharsRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) // Takes a config struct and prometheus registry and returns a new Collector scraping ganglia. func NewGmondCollector(config Config, registry prometheus.Registry) (Collector, error) { c := gmondCollector{ - name: "gmond_collector", config: config, Metrics: make(map[string]prometheus.Gauge), registry: registry, @@ -47,8 +46,6 @@ func NewGmondCollector(config Config, registry prometheus.Registry) (Collector, return &c, nil } -func (c *gmondCollector) Name() string { return c.name } - func (c *gmondCollector) setMetric(name string, labels map[string]string, metric ganglia.Metric) { if _, ok := c.Metrics[name]; !ok { var desc string @@ -64,18 +61,18 @@ func (c *gmondCollector) setMetric(name string, labels map[string]string, metric break } } - debug(c.Name(), "Register %s: %s", name, desc) + glog.V(1).Infof("Register %s: %s", name, desc) gauge := prometheus.NewGauge() c.Metrics[name] = gauge c.registry.Register(name, desc, prometheus.NilLabels, gauge) // one gauge per metric! } - debug(c.Name(), "Set %s{%s}: %f", name, labels, metric.Value) + glog.V(1).Infof("Set %s{%s}: %f", name, labels, metric.Value) c.Metrics[name].Set(labels, metric.Value) } func (c *gmondCollector) Update() (updates int, err error) { conn, err := net.Dial(gangliaProto, gangliaAddress) - debug(c.Name(), "gmondCollector Update") + glog.V(1).Infof("gmondCollector Update") if err != nil { return updates, fmt.Errorf("Can't connect to gmond: %s", err) } diff --git a/collector/helper.go b/collector/helper.go index 00012a2d..3e4fda44 100644 --- a/collector/helper.go +++ b/collector/helper.go @@ -1,22 +1,11 @@ package collector import ( - "flag" "fmt" - "log" "strconv" "strings" ) -var verbose = flag.Bool("verbose", false, "Verbose output.") - -func debug(name string, format string, a ...interface{}) { - if *verbose { - f := fmt.Sprintf("%s: %s", name, format) - log.Printf(f, a...) - } -} - func splitToInts(str string, sep string) (ints []int, err error) { for _, part := range strings.Split(str, sep) { i, err := strconv.Atoi(part) diff --git a/collector/interrupts.go b/collector/interrupts.go new file mode 100644 index 00000000..a13e94c6 --- /dev/null +++ b/collector/interrupts.go @@ -0,0 +1,116 @@ +// +build !nointerrupts + +package collector + +import ( + "bufio" + "fmt" + "io" + "os" + "strconv" + "strings" + + "github.com/prometheus/client_golang/prometheus" +) + +const ( + procInterrupts = "/proc/interrupts" +) + +var ( + interruptsMetric = prometheus.NewCounter() +) + +type interruptsCollector struct { + registry prometheus.Registry + config Config +} + +func init() { + Factories["interrupts"] = NewInterruptsCollector +} + +// Takes a config struct and prometheus registry and returns a new Collector exposing +// interrupts stats +func NewInterruptsCollector(config Config, registry prometheus.Registry) (Collector, error) { + c := interruptsCollector{ + config: config, + registry: registry, + } + registry.Register( + "node_interrupts", + "Interrupt details from /proc/interrupts", + prometheus.NilLabels, + interruptsMetric, + ) + return &c, nil +} + +func (c *interruptsCollector) Update() (updates int, err error) { + interrupts, err := getInterrupts() + if err != nil { + return updates, fmt.Errorf("Couldn't get interrupts: %s", err) + } + for name, interrupt := range interrupts { + for cpuNo, value := range interrupt.values { + updates++ + fv, err := strconv.ParseFloat(value, 64) + if err != nil { + return updates, fmt.Errorf("Invalid value %s in interrupts: %s", value, err) + } + labels := map[string]string{ + "CPU": strconv.Itoa(cpuNo), + "type": name, + "info": interrupt.info, + "devices": interrupt.devices, + } + interruptsMetric.Set(labels, fv) + } + } + return updates, err +} + +type interrupt struct { + info string + devices string + values []string +} + +func getInterrupts() (map[string]interrupt, error) { + file, err := os.Open(procInterrupts) + if err != nil { + return nil, err + } + return parseInterrupts(file) +} + +func parseInterrupts(r io.ReadCloser) (map[string]interrupt, error) { + defer r.Close() + interrupts := map[string]interrupt{} + scanner := bufio.NewScanner(r) + if !scanner.Scan() { + return nil, fmt.Errorf("%s empty", procInterrupts) + } + cpuNum := len(strings.Fields(string(scanner.Text()))) // one header per cpu + + for scanner.Scan() { + line := scanner.Text() + parts := strings.Fields(string(line)) + if len(parts) < cpuNum+2 { // irq + one column per cpu + details, + continue // we ignore ERR and MIS for now + } + intName := parts[0][:len(parts[0])-1] // remove trailing : + intr := interrupt{ + values: parts[1:cpuNum], + } + + if _, err := strconv.Atoi(intName); err == nil { // numeral interrupt + intr.info = parts[cpuNum+1] + intr.devices = strings.Join(parts[cpuNum+2:], " ") + } else { + intr.info = strings.Join(parts[cpuNum+1:], " ") + } + interrupts[intName] = intr + } + return interrupts, nil +} diff --git a/collector/lastlogin.go b/collector/lastlogin.go new file mode 100644 index 00000000..bfd37cc7 --- /dev/null +++ b/collector/lastlogin.go @@ -0,0 +1,105 @@ +// +build !nolastLogin + +package collector + +import ( + "bufio" + "fmt" + "io" + "os/exec" + "strings" + "time" + + "github.com/golang/glog" + "github.com/prometheus/client_golang/prometheus" +) + +var ( + lastSeen = prometheus.NewGauge() +) + +type lastLoginCollector struct { + registry prometheus.Registry + config Config +} + +func init() { + Factories["lastlogin"] = NewLastLoginCollector +} + +// Takes a config struct and prometheus registry and returns a new Collector exposing +// load, seconds since last login and a list of tags as specified by config. +func NewLastLoginCollector(config Config, registry prometheus.Registry) (Collector, error) { + c := lastLoginCollector{ + config: config, + registry: registry, + } + registry.Register( + "node_last_login_time", + "The time of the last login.", + prometheus.NilLabels, + lastSeen, + ) + return &c, nil +} + +func (c *lastLoginCollector) Update() (updates int, err error) { + last, err := getLastLoginTime() + if err != nil { + return updates, fmt.Errorf("Couldn't get last seen: %s", err) + } + updates++ + glog.V(1).Infof("Set node_last_login_time: %f", last) + lastSeen.Set(nil, last) + return updates, err +} + +func getLastLoginTime() (float64, error) { + who := exec.Command("who", "/var/log/wtmp", "-l", "-u", "-s") + + output, err := who.StdoutPipe() + if err != nil { + return 0, err + } + + err = who.Start() + if err != nil { + return 0, err + } + + reader := bufio.NewReader(output) + + var last time.Time + for { + line, isPrefix, err := reader.ReadLine() + if err == io.EOF { + break + } + if isPrefix { + return 0, fmt.Errorf("line to long: %s(...)", line) + } + + fields := strings.Fields(string(line)) + lastDate := fields[2] + lastTime := fields[3] + + dateParts, err := splitToInts(lastDate, "-") // 2013-04-16 + if err != nil { + return 0, fmt.Errorf("Couldn't parse date in line '%s': %s", fields, err) + } + + timeParts, err := splitToInts(lastTime, ":") // 11:33 + if err != nil { + return 0, fmt.Errorf("Couldn't parse time in line '%s': %s", fields, err) + } + + last_t := time.Date(dateParts[0], time.Month(dateParts[1]), dateParts[2], timeParts[0], timeParts[1], 0, 0, time.UTC) + last = last_t + } + err = who.Wait() + if err != nil { + return 0, err + } + + return float64(last.Unix()), nil +} diff --git a/collector/loadavg.go b/collector/loadavg.go new file mode 100644 index 00000000..a6104df2 --- /dev/null +++ b/collector/loadavg.go @@ -0,0 +1,76 @@ +// +build !noloadavg + +package collector + +import ( + "fmt" + "io/ioutil" + "strconv" + "strings" + + "github.com/golang/glog" + "github.com/prometheus/client_golang/prometheus" +) + +const ( + procLoad = "/proc/loadavg" +) + +var ( + load1 = prometheus.NewGauge() +) + +type loadavgCollector struct { + registry prometheus.Registry + config Config +} + +func init() { + Factories["loadavg"] = NewLoadavgCollector +} + +// Takes a config struct and prometheus registry and returns a new Collector exposing +// load, seconds since last login and a list of tags as specified by config. +func NewLoadavgCollector(config Config, registry prometheus.Registry) (Collector, error) { + c := loadavgCollector{ + config: config, + registry: registry, + } + + registry.Register( + "node_load1", + "1m load average", + prometheus.NilLabels, + load1, + ) + return &c, nil +} + +func (c *loadavgCollector) Update() (updates int, err error) { + load, err := getLoad1() + if err != nil { + return updates, fmt.Errorf("Couldn't get load: %s", err) + } + updates++ + glog.V(1).Infof("Set node_load: %f", load) + load1.Set(nil, load) + + return updates, err +} + +func getLoad1() (float64, error) { + data, err := ioutil.ReadFile(procLoad) + if err != nil { + return 0, err + } + return parseLoad(string(data)) +} + +func parseLoad(data string) (float64, error) { + parts := strings.Fields(data) + load, err := strconv.ParseFloat(parts[0], 64) + if err != nil { + return 0, fmt.Errorf("Could not parse load '%s': %s", parts[0], err) + } + return load, nil +} diff --git a/collector/meminfo.go b/collector/meminfo.go new file mode 100644 index 00000000..180cfd7f --- /dev/null +++ b/collector/meminfo.go @@ -0,0 +1,101 @@ +// +build !nomeminfo + +package collector + +import ( + "bufio" + "fmt" + "io" + "os" + "regexp" + "strconv" + "strings" + + "github.com/golang/glog" + "github.com/prometheus/client_golang/prometheus" +) + +const ( + procMemInfo = "/proc/meminfo" +) + +var ( + memInfoMetrics = map[string]prometheus.Gauge{} +) + +type meminfoCollector struct { + registry prometheus.Registry + config Config +} + +func init() { + Factories["meminfo"] = NewMeminfoCollector +} + +// Takes a config struct and prometheus registry and returns a new Collector exposing +// memory stats. +func NewMeminfoCollector(config Config, registry prometheus.Registry) (Collector, error) { + c := meminfoCollector{ + config: config, + registry: registry, + } + return &c, nil +} + +func (c *meminfoCollector) Update() (updates int, err error) { + memInfo, err := getMemInfo() + if err != nil { + return updates, fmt.Errorf("Couldn't get meminfo: %s", err) + } + glog.V(1).Infof("Set node_mem: %#v", memInfo) + for k, v := range memInfo { + if _, ok := memInfoMetrics[k]; !ok { + memInfoMetrics[k] = prometheus.NewGauge() + c.registry.Register( + "node_memory_"+k, + k+" from /proc/meminfo", + prometheus.NilLabels, + memInfoMetrics[k], + ) + } + updates++ + memInfoMetrics[k].Set(nil, v) + } + return updates, err +} + +func getMemInfo() (map[string]float64, error) { + file, err := os.Open(procMemInfo) + if err != nil { + return nil, err + } + return parseMemInfo(file) +} + +func parseMemInfo(r io.ReadCloser) (map[string]float64, error) { + defer r.Close() + memInfo := map[string]float64{} + scanner := bufio.NewScanner(r) + re := regexp.MustCompile("\\((.*)\\)") + for scanner.Scan() { + line := scanner.Text() + parts := strings.Fields(string(line)) + fv, err := strconv.ParseFloat(parts[1], 64) + if err != nil { + return nil, fmt.Errorf("Invalid value in meminfo: %s", err) + } + switch len(parts) { + case 2: // no unit + case 3: // has unit, we presume kB + fv *= 1024 + default: + return nil, fmt.Errorf("Invalid line in %s: %s", procMemInfo, line) + } + key := parts[0][:len(parts[0])-1] // remove trailing : from key + // Active(anon) -> Active_anon + key = re.ReplaceAllString(key, "_${1}") + memInfo[key] = fv + } + return memInfo, nil + +} diff --git a/collector/native_collector.go b/collector/native_collector.go deleted file mode 100644 index 9faa23a3..00000000 --- a/collector/native_collector.go +++ /dev/null @@ -1,454 +0,0 @@ -// +build !nonative - -package collector - -import ( - "bufio" - "fmt" - "io" - "io/ioutil" - "os" - "os/exec" - "regexp" - "strconv" - "strings" - "time" - - "github.com/prometheus/client_golang/prometheus" -) - -const ( - procLoad = "/proc/loadavg" - procMemInfo = "/proc/meminfo" - procInterrupts = "/proc/interrupts" - procNetDev = "/proc/net/dev" - procDiskStats = "/proc/diskstats" -) - -type diskStat struct { - name string - metric prometheus.Metric - documentation string -} - -var ( - // Docs from https://www.kernel.org/doc/Documentation/iostats.txt - diskStatsMetrics = []diskStat{ - {"reads_completed", prometheus.NewCounter(), "The total number of reads completed successfully."}, - {"reads_merged", prometheus.NewCounter(), "The number of reads merged. See https://www.kernel.org/doc/Documentation/iostats.txt"}, - {"sectors_read", prometheus.NewCounter(), "The total number of sectors read successfully."}, - {"read_time_ms", prometheus.NewCounter(), "the total number of milliseconds spent by all reads."}, - {"writes_completed", prometheus.NewCounter(), "The total number of writes completed successfully."}, - {"writes_merged", prometheus.NewCounter(), "The number of writes merged. See https://www.kernel.org/doc/Documentation/iostats.txt"}, - {"sectors_written", prometheus.NewCounter(), "The total number of sectors written successfully."}, - {"write_time_ms", prometheus.NewCounter(), "This is the total number of milliseconds spent by all writes."}, - {"io_now", prometheus.NewGauge(), "The number of I/Os currently in progress."}, - {"io_time_ms", prometheus.NewCounter(), "Milliseconds spent doing I/Os."}, - {"io_time_weighted", prometheus.NewCounter(), "The weighted # of milliseconds spent doing I/Os. See https://www.kernel.org/doc/Documentation/iostats.txt"}, - } - lastSeen = prometheus.NewGauge() - load1 = prometheus.NewGauge() - attributes = prometheus.NewGauge() - memInfoMetrics = map[string]prometheus.Gauge{} - netStatsMetrics = map[string]prometheus.Gauge{} - interruptsMetric = prometheus.NewCounter() -) - -type nativeCollector struct { - registry prometheus.Registry - name string - config Config -} - -func init() { - Factories = append(Factories, NewNativeCollector) -} - -// Takes a config struct and prometheus registry and returns a new Collector exposing -// load, seconds since last login and a list of tags as specified by config. -func NewNativeCollector(config Config, registry prometheus.Registry) (Collector, error) { - c := nativeCollector{ - name: "native_collector", - config: config, - registry: registry, - } - - registry.Register( - "node_load1", - "1m load average", - prometheus.NilLabels, - load1, - ) - - registry.Register( - "node_last_login_time", - "The time of the last login.", - prometheus.NilLabels, - lastSeen, - ) - - registry.Register( - "node_attributes", - "node_exporter attributes", - prometheus.NilLabels, - attributes, - ) - - registry.Register( - "node_interrupts", - "Interrupt details from /proc/interrupts", - prometheus.NilLabels, - interruptsMetric, - ) - - for _, v := range diskStatsMetrics { - registry.Register( - "node_disk_"+v.name, - v.documentation, - prometheus.NilLabels, - v.metric, - ) - } - return &c, nil -} - -func (c *nativeCollector) Name() string { return c.name } - -func (c *nativeCollector) Update() (updates int, err error) { - last, err := getLastLoginTime() - if err != nil { - return updates, fmt.Errorf("Couldn't get last seen: %s", err) - } - updates++ - debug(c.Name(), "Set node_last_login_time: %f", last) - lastSeen.Set(nil, last) - - load, err := getLoad1() - if err != nil { - return updates, fmt.Errorf("Couldn't get load: %s", err) - } - updates++ - debug(c.Name(), "Set node_load: %f", load) - load1.Set(nil, load) - - debug(c.Name(), "Set node_attributes{%v}: 1", c.config.Attributes) - attributes.Set(c.config.Attributes, 1) - - memInfo, err := getMemInfo() - if err != nil { - return updates, fmt.Errorf("Couldn't get meminfo: %s", err) - } - debug(c.Name(), "Set node_mem: %#v", memInfo) - for k, v := range memInfo { - if _, ok := memInfoMetrics[k]; !ok { - memInfoMetrics[k] = prometheus.NewGauge() - c.registry.Register( - "node_memory_"+k, - k+" from /proc/meminfo", - prometheus.NilLabels, - memInfoMetrics[k], - ) - } - updates++ - memInfoMetrics[k].Set(nil, v) - } - - interrupts, err := getInterrupts() - if err != nil { - return updates, fmt.Errorf("Couldn't get interrupts: %s", err) - } - for name, interrupt := range interrupts { - for cpuNo, value := range interrupt.values { - updates++ - fv, err := strconv.ParseFloat(value, 64) - if err != nil { - return updates, fmt.Errorf("Invalid value %s in interrupts: %s", value, err) - } - labels := map[string]string{ - "CPU": strconv.Itoa(cpuNo), - "type": name, - "info": interrupt.info, - "devices": interrupt.devices, - } - interruptsMetric.Set(labels, fv) - } - } - - netStats, err := getNetStats() - if err != nil { - return updates, fmt.Errorf("Couldn't get netstats: %s", err) - } - for direction, devStats := range netStats { - for dev, stats := range devStats { - for t, value := range stats { - key := direction + "_" + t - if _, ok := netStatsMetrics[key]; !ok { - netStatsMetrics[key] = prometheus.NewGauge() - c.registry.Register( - "node_network_"+key, - t+" "+direction+" from /proc/net/dev", - prometheus.NilLabels, - netStatsMetrics[key], - ) - } - updates++ - v, err := strconv.ParseFloat(value, 64) - if err != nil { - return updates, fmt.Errorf("Invalid value %s in netstats: %s", value, err) - } - netStatsMetrics[key].Set(map[string]string{"device": dev}, v) - } - } - } - - diskStats, err := getDiskStats() - if err != nil { - return updates, fmt.Errorf("Couldn't get diskstats: %s", err) - } - for dev, stats := range diskStats { - for k, value := range stats { - updates++ - v, err := strconv.ParseFloat(value, 64) - if err != nil { - return updates, fmt.Errorf("Invalid value %s in diskstats: %s", value, err) - } - labels := map[string]string{"device": dev} - counter, ok := diskStatsMetrics[k].metric.(prometheus.Counter) - if ok { - counter.Set(labels, v) - } else { - var gauge = diskStatsMetrics[k].metric.(prometheus.Gauge) - gauge.Set(labels, v) - } - } - } - return updates, err -} - -func getLoad1() (float64, error) { - data, err := ioutil.ReadFile(procLoad) - if err != nil { - return 0, err - } - return parseLoad(string(data)) -} - -func parseLoad(data string) (float64, error) { - parts := strings.Fields(data) - load, err := strconv.ParseFloat(parts[0], 64) - if err != nil { - return 0, fmt.Errorf("Could not parse load '%s': %s", parts[0], err) - } - return load, nil -} - -func getLastLoginTime() (float64, error) { - who := exec.Command("who", "/var/log/wtmp", "-l", "-u", "-s") - - output, err := who.StdoutPipe() - if err != nil { - return 0, err - } - - err = who.Start() - if err != nil { - return 0, err - } - - reader := bufio.NewReader(output) - - var last time.Time - for { - line, isPrefix, err := reader.ReadLine() - if err == io.EOF { - break - } - if isPrefix { - return 0, fmt.Errorf("line to long: %s(...)", line) - } - - fields := strings.Fields(string(line)) - lastDate := fields[2] - lastTime := fields[3] - - dateParts, err := splitToInts(lastDate, "-") // 2013-04-16 - if err != nil { - return 0, fmt.Errorf("Couldn't parse date in line '%s': %s", fields, err) - } - - timeParts, err := splitToInts(lastTime, ":") // 11:33 - if err != nil { - return 0, fmt.Errorf("Couldn't parse time in line '%s': %s", fields, err) - } - - last_t := time.Date(dateParts[0], time.Month(dateParts[1]), dateParts[2], timeParts[0], timeParts[1], 0, 0, time.UTC) - last = last_t - } - err = who.Wait() - if err != nil { - return 0, err - } - - return float64(last.Unix()), nil -} - -func getMemInfo() (map[string]float64, error) { - file, err := os.Open(procMemInfo) - if err != nil { - return nil, err - } - return parseMemInfo(file) -} - -func parseMemInfo(r io.ReadCloser) (map[string]float64, error) { - defer r.Close() - memInfo := map[string]float64{} - scanner := bufio.NewScanner(r) - re := regexp.MustCompile("\\((.*)\\)") - for scanner.Scan() { - line := scanner.Text() - parts := strings.Fields(string(line)) - fv, err := strconv.ParseFloat(parts[1], 64) - if err != nil { - return nil, fmt.Errorf("Invalid value in meminfo: %s", err) - } - switch len(parts) { - case 2: // no unit - case 3: // has unit, we presume kB - fv *= 1024 - default: - return nil, fmt.Errorf("Invalid line in %s: %s", procMemInfo, line) - } - key := parts[0][:len(parts[0])-1] // remove trailing : from key - // Active(anon) -> Active_anon - key = re.ReplaceAllString(key, "_${1}") - memInfo[key] = fv - } - return memInfo, nil - -} - -type interrupt struct { - info string - devices string - values []string -} - -func getInterrupts() (map[string]interrupt, error) { - file, err := os.Open(procInterrupts) - if err != nil { - return nil, err - } - return parseInterrupts(file) -} - -func parseInterrupts(r io.ReadCloser) (map[string]interrupt, error) { - defer r.Close() - interrupts := map[string]interrupt{} - scanner := bufio.NewScanner(r) - if !scanner.Scan() { - return nil, fmt.Errorf("%s empty", procInterrupts) - } - cpuNum := len(strings.Fields(string(scanner.Text()))) // one header per cpu - - for scanner.Scan() { - line := scanner.Text() - parts := strings.Fields(string(line)) - if len(parts) < cpuNum+2 { // irq + one column per cpu + details, - continue // we ignore ERR and MIS for now - } - intName := parts[0][:len(parts[0])-1] // remove trailing : - intr := interrupt{ - values: parts[1:cpuNum], - } - - if _, err := strconv.Atoi(intName); err == nil { // numeral interrupt - intr.info = parts[cpuNum+1] - intr.devices = strings.Join(parts[cpuNum+2:], " ") - } else { - intr.info = strings.Join(parts[cpuNum+1:], " ") - } - interrupts[intName] = intr - } - return interrupts, nil -} - -func getNetStats() (map[string]map[string]map[string]string, error) { - file, err := os.Open(procNetDev) - if err != nil { - return nil, err - } - return parseNetStats(file) -} - -func parseNetStats(r io.ReadCloser) (map[string]map[string]map[string]string, error) { - defer r.Close() - netStats := map[string]map[string]map[string]string{} - netStats["transmit"] = map[string]map[string]string{} - netStats["receive"] = map[string]map[string]string{} - - scanner := bufio.NewScanner(r) - scanner.Scan() // skip first header - scanner.Scan() - parts := strings.Split(string(scanner.Text()), "|") - if len(parts) != 3 { // interface + receive + transmit - return nil, fmt.Errorf("Invalid header line in %s: %s", - procNetDev, scanner.Text()) - } - header := strings.Fields(parts[1]) - for scanner.Scan() { - parts := strings.Fields(string(scanner.Text())) - if len(parts) != 2*len(header)+1 { - return nil, fmt.Errorf("Invalid line in %s: %s", - procNetDev, scanner.Text()) - } - - dev := parts[0][:len(parts[0])-1] - receive, err := parseNetDevLine(parts[1:len(header)+1], header) - if err != nil { - return nil, err - } - - transmit, err := parseNetDevLine(parts[len(header)+1:], header) - if err != nil { - return nil, err - } - netStats["transmit"][dev] = transmit - netStats["receive"][dev] = receive - } - return netStats, nil -} - -func parseNetDevLine(parts []string, header []string) (map[string]string, error) { - devStats := map[string]string{} - for i, v := range parts { - devStats[header[i]] = v - } - return devStats, nil -} - -func getDiskStats() (map[string]map[int]string, error) { - file, err := os.Open(procDiskStats) - if err != nil { - return nil, err - } - return parseDiskStats(file) -} - -func parseDiskStats(r io.ReadCloser) (map[string]map[int]string, error) { - defer r.Close() - diskStats := map[string]map[int]string{} - scanner := bufio.NewScanner(r) - for scanner.Scan() { - parts := strings.Fields(string(scanner.Text())) - if len(parts) != len(diskStatsMetrics)+3 { // we strip major, minor and dev - return nil, fmt.Errorf("Invalid line in %s: %s", procDiskStats, scanner.Text()) - } - dev := parts[2] - diskStats[dev] = map[int]string{} - for i, v := range parts[3:] { - diskStats[dev][i] = v - } - } - return diskStats, nil -} diff --git a/collector/netdev.go b/collector/netdev.go new file mode 100644 index 00000000..4824fbdf --- /dev/null +++ b/collector/netdev.go @@ -0,0 +1,125 @@ +// +build !nonetDev + +package collector + +import ( + "bufio" + "fmt" + "io" + "os" + "strconv" + "strings" + + "github.com/prometheus/client_golang/prometheus" +) + +const ( + procNetDev = "/proc/net/dev" +) + +var ( + netStatsMetrics = map[string]prometheus.Gauge{} +) + +type netDevCollector struct { + registry prometheus.Registry + config Config +} + +func init() { + Factories["netdev"] = NewNetDevCollector +} + +// Takes a config struct and prometheus registry and returns a new Collector exposing +// network device stats. +func NewNetDevCollector(config Config, registry prometheus.Registry) (Collector, error) { + c := netDevCollector{ + config: config, + registry: registry, + } + return &c, nil +} + +func (c *netDevCollector) Update() (updates int, err error) { + netStats, err := getNetStats() + if err != nil { + return updates, fmt.Errorf("Couldn't get netstats: %s", err) + } + for direction, devStats := range netStats { + for dev, stats := range devStats { + for t, value := range stats { + key := direction + "_" + t + if _, ok := netStatsMetrics[key]; !ok { + netStatsMetrics[key] = prometheus.NewGauge() + c.registry.Register( + "node_network_"+key, + t+" "+direction+" from /proc/net/dev", + prometheus.NilLabels, + netStatsMetrics[key], + ) + } + updates++ + v, err := strconv.ParseFloat(value, 64) + if err != nil { + return updates, fmt.Errorf("Invalid value %s in netstats: %s", value, err) + } + netStatsMetrics[key].Set(map[string]string{"device": dev}, v) + } + } + } + return updates, err +} + +func getNetStats() (map[string]map[string]map[string]string, error) { + file, err := os.Open(procNetDev) + if err != nil { + return nil, err + } + return parseNetStats(file) +} + +func parseNetStats(r io.ReadCloser) (map[string]map[string]map[string]string, error) { + defer r.Close() + netStats := map[string]map[string]map[string]string{} + netStats["transmit"] = map[string]map[string]string{} + netStats["receive"] = map[string]map[string]string{} + + scanner := bufio.NewScanner(r) + scanner.Scan() // skip first header + scanner.Scan() + parts := strings.Split(string(scanner.Text()), "|") + if len(parts) != 3 { // interface + receive + transmit + return nil, fmt.Errorf("Invalid header line in %s: %s", + procNetDev, scanner.Text()) + } + header := strings.Fields(parts[1]) + for scanner.Scan() { + parts := strings.Fields(string(scanner.Text())) + if len(parts) != 2*len(header)+1 { + return nil, fmt.Errorf("Invalid line in %s: %s", + procNetDev, scanner.Text()) + } + + dev := parts[0][:len(parts[0])-1] + receive, err := parseNetDevLine(parts[1:len(header)+1], header) + if err != nil { + return nil, err + } + + transmit, err := parseNetDevLine(parts[len(header)+1:], header) + if err != nil { + return nil, err + } + netStats["transmit"][dev] = transmit + netStats["receive"][dev] = receive + } + return netStats, nil +} + +func parseNetDevLine(parts []string, header []string) (map[string]string, error) { + devStats := map[string]string{} + for i, v := range parts { + devStats[header[i]] = v + } + return devStats, nil +} diff --git a/collector/runit_collector.go b/collector/runit_collector.go index 24b89731..6e7c2096 100644 --- a/collector/runit_collector.go +++ b/collector/runit_collector.go @@ -3,12 +3,12 @@ package collector import ( + "github.com/golang/glog" "github.com/prometheus/client_golang/prometheus" "github.com/soundcloud/go-runit/runit" ) type runitCollector struct { - name string config Config state prometheus.Gauge stateDesired prometheus.Gauge @@ -16,12 +16,11 @@ type runitCollector struct { } func init() { - Factories = append(Factories, NewRunitCollector) + Factories["runit"] = NewRunitCollector } func NewRunitCollector(config Config, registry prometheus.Registry) (Collector, error) { c := runitCollector{ - name: "runit_collector", config: config, state: prometheus.NewGauge(), stateDesired: prometheus.NewGauge(), @@ -52,8 +51,6 @@ func NewRunitCollector(config Config, registry prometheus.Registry) (Collector, return &c, nil } -func (c *runitCollector) Name() string { return c.name } - func (c *runitCollector) Update() (updates int, err error) { services, err := runit.GetServices("/etc/service") if err != nil { @@ -63,11 +60,11 @@ func (c *runitCollector) Update() (updates int, err error) { for _, service := range services { status, err := service.Status() if err != nil { - debug(c.Name(), "Couldn't get status for %s: %s, skipping...", service.Name, err) + glog.V(1).Infof("Couldn't get status for %s: %s, skipping...", service.Name, err) continue } - debug(c.Name(), "%s is %d on pid %d for %d seconds", service.Name, status.State, status.Pid, status.Duration) + glog.V(1).Infof("%s is %d on pid %d for %d seconds", service.Name, status.State, status.Pid, status.Duration) labels := map[string]string{ "service": service.Name, } diff --git a/node_exporter.go b/node_exporter.go index f6671aa3..a75699f3 100644 --- a/node_exporter.go +++ b/node_exporter.go @@ -9,22 +9,25 @@ import ( "os" "os/signal" "runtime/pprof" + "strings" "sync" "syscall" "time" + "github.com/golang/glog" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/exp" "github.com/prometheus/node_exporter/collector" ) var ( - configFile = flag.String("config", "node_exporter.conf", "config file.") - memProfile = flag.String("memprofile", "", "write memory profile to this file") - listeningAddress = flag.String("listen", ":8080", "address to listen on") - interval = flag.Duration("interval", 60*time.Second, "refresh interval") - scrapeDurations = prometheus.NewDefaultHistogram() - metricsUpdated = prometheus.NewGauge() + configFile = flag.String("config", "node_exporter.conf", "config file.") + memProfile = flag.String("memprofile", "", "write memory profile to this file") + listeningAddress = flag.String("listen", ":8080", "address to listen on") + enabledCollectors = flag.String("enabledCollectors", "attributes,diskstats,loadavg,meminfo,netdev", "comma seperated list of collectors to use") + interval = flag.Duration("interval", 60*time.Second, "refresh interval") + scrapeDurations = prometheus.NewDefaultHistogram() + metricsUpdated = prometheus.NewGauge() ) func main() { @@ -38,9 +41,9 @@ func main() { registry.Register("node_exporter_scrape_duration_seconds", "node_exporter: Duration of a scrape job.", prometheus.NilLabels, scrapeDurations) registry.Register("node_exporter_metrics_updated", "node_exporter: Number of metrics updated.", prometheus.NilLabels, metricsUpdated) - log.Printf("Registered collectors:") - for _, c := range collectors { - log.Print(" - ", c.Name()) + glog.Infof("Enabled collectors:") + for n, _ := range collectors { + glog.Infof(" - %s", n) } sigHup := make(chan os.Signal) @@ -50,7 +53,7 @@ func main() { go serveStatus(registry) - log.Printf("Starting initial collection") + glog.Infof("Starting initial collection") collect(collectors) tick := time.Tick(*interval) @@ -61,17 +64,17 @@ func main() { if err != nil { log.Fatalf("Couldn't load config and collectors: %s", err) } - log.Printf("Reloaded collectors and config") + glog.Infof("Reloaded collectors and config") tick = time.Tick(*interval) case <-tick: - log.Printf("Starting new interval") + glog.Infof("Starting new interval") collect(collectors) case <-sigUsr1: - log.Printf("got signal") + glog.Infof("got signal") if *memProfile != "" { - log.Printf("Writing memory profile to %s", *memProfile) + glog.Infof("Writing memory profile to %s", *memProfile) f, err := os.Create(*memProfile) if err != nil { log.Fatal(err) @@ -84,25 +87,29 @@ func main() { } -func loadCollectors(file string, registry prometheus.Registry) ([]collector.Collector, error) { - collectors := []collector.Collector{} +func loadCollectors(file string, registry prometheus.Registry) (map[string]collector.Collector, error) { + collectors := map[string]collector.Collector{} config, err := getConfig(file) if err != nil { log.Fatalf("Couldn't read config %s: %s", file, err) } - for _, fn := range collector.Factories { + for _, name := range strings.Split(*enabledCollectors, ",") { + fn, ok := collector.Factories[name] + if !ok { + log.Fatalf("Collector '%s' not available", name) + } c, err := fn(*config, registry) if err != nil { return nil, err } - collectors = append(collectors, c) + collectors[name] = c } return collectors, nil } func getConfig(file string) (*collector.Config, error) { config := &collector.Config{} - log.Printf("Reading config %s", *configFile) + glog.Infof("Reading config %s", *configFile) bytes, err := ioutil.ReadFile(*configFile) if err != nil { return nil, err @@ -115,31 +122,31 @@ func serveStatus(registry prometheus.Registry) { http.ListenAndServe(*listeningAddress, exp.DefaultCoarseMux) } -func collect(collectors []collector.Collector) { +func collect(collectors map[string]collector.Collector) { wg := sync.WaitGroup{} wg.Add(len(collectors)) - for _, c := range collectors { - go func(c collector.Collector) { - Execute(c) + for n, c := range collectors { + go func(n string, c collector.Collector) { + Execute(n, c) wg.Done() - }(c) + }(n, c) } wg.Wait() } -func Execute(c collector.Collector) { +func Execute(name string, c collector.Collector) { begin := time.Now() updates, err := c.Update() duration := time.Since(begin) label := map[string]string{ - "collector": c.Name(), + "collector": name, } if err != nil { - log.Printf("ERROR: %s failed after %fs: %s", c.Name(), duration.Seconds(), err) + glog.Infof("ERROR: %s failed after %fs: %s", name, duration.Seconds(), err) label["result"] = "error" } else { - log.Printf("OK: %s success after %fs.", c.Name(), duration.Seconds()) + glog.Infof("OK: %s success after %fs.", name, duration.Seconds()) label["result"] = "success" } scrapeDurations.Add(label, duration.Seconds()) From d2bc8b7ccae01b94de823ed36f36bd5034068872 Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Wed, 4 Jun 2014 13:43:57 +0100 Subject: [PATCH 2/4] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1b614b27..ae2e079c 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ To include other collectors, specify the build tags lile this: go build -tags 'ganglia runit' node_exporter.go -Which collectors are used is controled by the --enabledCollectors flag. +Which collectors are used is controlled by the --enabledCollectors flag. ### NativeCollector From cbb91fb3328d5b50d47073108db5f90536ae915b Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Wed, 4 Jun 2014 14:09:33 +0100 Subject: [PATCH 3/4] Add option to print out available collectors. Fix typo. --- node_exporter.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/node_exporter.go b/node_exporter.go index a75699f3..357927e8 100644 --- a/node_exporter.go +++ b/node_exporter.go @@ -3,6 +3,7 @@ package main import ( "encoding/json" "flag" + "fmt" "io/ioutil" "log" "net/http" @@ -24,7 +25,8 @@ var ( configFile = flag.String("config", "node_exporter.conf", "config file.") memProfile = flag.String("memprofile", "", "write memory profile to this file") listeningAddress = flag.String("listen", ":8080", "address to listen on") - enabledCollectors = flag.String("enabledCollectors", "attributes,diskstats,loadavg,meminfo,netdev", "comma seperated list of collectors to use") + enabledCollectors = flag.String("enabledCollectors", "attributes,diskstats,loadavg,meminfo,netdev", "comma-seperated list of collectors to use") + printCollectors = flag.Bool("printCollectors", false, "If true, print available collectors and exit") interval = flag.Duration("interval", 60*time.Second, "refresh interval") scrapeDurations = prometheus.NewDefaultHistogram() metricsUpdated = prometheus.NewGauge() @@ -32,6 +34,13 @@ var ( func main() { flag.Parse() + if *printCollectors { + fmt.Printf("Available collectors:\n") + for n, _ := range collector.Factories { + fmt.Printf(" - %s\n", n) + } + return + } registry := prometheus.NewRegistry() collectors, err := loadCollectors(*configFile, registry) if err != nil { From f9c6e4ca52c78484284abf679914431d77810725 Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Thu, 5 Jun 2014 11:44:44 +0100 Subject: [PATCH 4/4] Ignore disk partitions by default, it's the disks themselves you usually want. --- collector/diskstats.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collector/diskstats.go b/collector/diskstats.go index cdc8d4ec..665a7c65 100644 --- a/collector/diskstats.go +++ b/collector/diskstats.go @@ -27,7 +27,7 @@ type diskStat struct { } var ( - ignoredDevices = flag.String("diskstatsIgnoredDevices", "^(ram|loop)\\d+$", "Regexp of devices to ignore for diskstats.") + ignoredDevices = flag.String("diskstatsIgnoredDevices", "^(ram|loop|[hs]d[a-z])\\d+$", "Regexp of devices to ignore for diskstats.") // Docs from https://www.kernel.org/doc/Documentation/iostats.txt diskStatsMetrics = []diskStat{