diff --git a/README.md b/README.md index fd076cfe..1b614b27 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,26 @@ # node_exporter -Prometheus exporter with plugable metric collectors. +Prometheus exporter with pluggable metric collectors. ## Available collectors -By default it will only include the NativeCollector. +By default the build will only include the native collectors +that expose information from /proc. To include other collectors, specify the build tags lile this: go build -tags 'ganglia runit' node_exporter.go +Which collectors are used is controled by the --enabledCollectors flag. + ### NativeCollector Provides metrics for load, seconds since last login and a list of tags read from `node_exporter.conf`. -To disable the native collector, use build tag `nonative`. - ### GmondCollector (tag: ganglia) diff --git a/collector/attributes.go b/collector/attributes.go new file mode 100644 index 00000000..a42bc6e9 --- /dev/null +++ b/collector/attributes.go @@ -0,0 +1,43 @@ +// +build !noattributes + +package collector + +import ( + "github.com/golang/glog" + "github.com/prometheus/client_golang/prometheus" +) + +var ( + attributes = prometheus.NewGauge() +) + +type attributesCollector struct { + registry prometheus.Registry + config Config +} + +func init() { + Factories["attributes"] = NewAttributesCollector +} + +// Takes a config struct and prometheus registry and returns a new Collector exposing +// labels from the config. +func NewAttributesCollector(config Config, registry prometheus.Registry) (Collector, error) { + c := attributesCollector{ + config: config, + registry: registry, + } + registry.Register( + "node_attributes", + "node_exporter attributes", + prometheus.NilLabels, + attributes, + ) + return &c, nil +} + +func (c *attributesCollector) Update() (updates int, err error) { + glog.V(1).Info("Set node_attributes{%v}: 1", c.config.Attributes) + attributes.Set(c.config.Attributes, 1) + return updates, err +} diff --git a/collector/collector.go b/collector/collector.go index 5fb6f4fd..e713cc44 100644 --- a/collector/collector.go +++ b/collector/collector.go @@ -5,15 +5,12 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -var Factories []func(Config, prometheus.Registry) (Collector, error) +var Factories = make(map[string]func(Config, prometheus.Registry) (Collector, error)) // Interface a collector has to implement. type Collector interface { // Get new metrics and expose them via prometheus registry. Update() (n int, err error) - - // Returns the name of the collector. - Name() string } type Config struct { diff --git a/collector/diskstats.go b/collector/diskstats.go new file mode 100644 index 00000000..cdc8d4ec --- /dev/null +++ b/collector/diskstats.go @@ -0,0 +1,131 @@ +// +build !nonative + +package collector + +import ( + "bufio" + "flag" + "fmt" + "io" + "os" + "regexp" + "strconv" + "strings" + + "github.com/golang/glog" + "github.com/prometheus/client_golang/prometheus" +) + +const ( + procDiskStats = "/proc/diskstats" +) + +type diskStat struct { + name string + metric prometheus.Metric + documentation string +} + +var ( + ignoredDevices = flag.String("diskstatsIgnoredDevices", "^(ram|loop)\\d+$", "Regexp of devices to ignore for diskstats.") + + // Docs from https://www.kernel.org/doc/Documentation/iostats.txt + diskStatsMetrics = []diskStat{ + {"reads_completed", prometheus.NewCounter(), "The total number of reads completed successfully."}, + {"reads_merged", prometheus.NewCounter(), "The number of reads merged. See https://www.kernel.org/doc/Documentation/iostats.txt"}, + {"sectors_read", prometheus.NewCounter(), "The total number of sectors read successfully."}, + {"read_time_ms", prometheus.NewCounter(), "the total number of milliseconds spent by all reads."}, + {"writes_completed", prometheus.NewCounter(), "The total number of writes completed successfully."}, + {"writes_merged", prometheus.NewCounter(), "The number of writes merged. See https://www.kernel.org/doc/Documentation/iostats.txt"}, + {"sectors_written", prometheus.NewCounter(), "The total number of sectors written successfully."}, + {"write_time_ms", prometheus.NewCounter(), "This is the total number of milliseconds spent by all writes."}, + {"io_now", prometheus.NewGauge(), "The number of I/Os currently in progress."}, + {"io_time_ms", prometheus.NewCounter(), "Milliseconds spent doing I/Os."}, + {"io_time_weighted", prometheus.NewCounter(), "The weighted # of milliseconds spent doing I/Os. See https://www.kernel.org/doc/Documentation/iostats.txt"}, + } +) + +type diskstatsCollector struct { + registry prometheus.Registry + config Config + ignoredDevicesPattern *regexp.Regexp +} + +func init() { + Factories["diskstats"] = NewDiskstatsCollector +} + +// Takes a config struct and prometheus registry and returns a new Collector exposing +// disk device stats. +func NewDiskstatsCollector(config Config, registry prometheus.Registry) (Collector, error) { + c := diskstatsCollector{ + config: config, + registry: registry, + ignoredDevicesPattern: regexp.MustCompile(*ignoredDevices), + } + + for _, v := range diskStatsMetrics { + registry.Register( + "node_disk_"+v.name, + v.documentation, + prometheus.NilLabels, + v.metric, + ) + } + return &c, nil +} + +func (c *diskstatsCollector) Update() (updates int, err error) { + diskStats, err := getDiskStats() + if err != nil { + return updates, fmt.Errorf("Couldn't get diskstats: %s", err) + } + for dev, stats := range diskStats { + if c.ignoredDevicesPattern.MatchString(dev) { + glog.V(1).Infof("Ignoring device: %s", dev) + continue + } + for k, value := range stats { + updates++ + v, err := strconv.ParseFloat(value, 64) + if err != nil { + return updates, fmt.Errorf("Invalid value %s in diskstats: %s", value, err) + } + labels := map[string]string{"device": dev} + counter, ok := diskStatsMetrics[k].metric.(prometheus.Counter) + if ok { + counter.Set(labels, v) + } else { + var gauge = diskStatsMetrics[k].metric.(prometheus.Gauge) + gauge.Set(labels, v) + } + } + } + return updates, err +} + +func getDiskStats() (map[string]map[int]string, error) { + file, err := os.Open(procDiskStats) + if err != nil { + return nil, err + } + return parseDiskStats(file) +} + +func parseDiskStats(r io.ReadCloser) (map[string]map[int]string, error) { + defer r.Close() + diskStats := map[string]map[int]string{} + scanner := bufio.NewScanner(r) + for scanner.Scan() { + parts := strings.Fields(string(scanner.Text())) + if len(parts) != len(diskStatsMetrics)+3 { // we strip major, minor and dev + return nil, fmt.Errorf("Invalid line in %s: %s", procDiskStats, scanner.Text()) + } + dev := parts[2] + diskStats[dev] = map[int]string{} + for i, v := range parts[3:] { + diskStats[dev][i] = v + } + } + return diskStats, nil +} diff --git a/collector/gmond_collector.go b/collector/gmond_collector.go index 874f0813..43cfd09d 100644 --- a/collector/gmond_collector.go +++ b/collector/gmond_collector.go @@ -11,6 +11,7 @@ import ( "regexp" "time" + "github.com/golang/glog" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/node_exporter/collector/ganglia" ) @@ -23,14 +24,13 @@ const ( ) type gmondCollector struct { - name string Metrics map[string]prometheus.Gauge config Config registry prometheus.Registry } func init() { - Factories = append(Factories, NewGmondCollector) + Factories["gmond"] = NewGmondCollector } var illegalCharsRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) @@ -38,7 +38,6 @@ var illegalCharsRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) // Takes a config struct and prometheus registry and returns a new Collector scraping ganglia. func NewGmondCollector(config Config, registry prometheus.Registry) (Collector, error) { c := gmondCollector{ - name: "gmond_collector", config: config, Metrics: make(map[string]prometheus.Gauge), registry: registry, @@ -47,8 +46,6 @@ func NewGmondCollector(config Config, registry prometheus.Registry) (Collector, return &c, nil } -func (c *gmondCollector) Name() string { return c.name } - func (c *gmondCollector) setMetric(name string, labels map[string]string, metric ganglia.Metric) { if _, ok := c.Metrics[name]; !ok { var desc string @@ -64,18 +61,18 @@ func (c *gmondCollector) setMetric(name string, labels map[string]string, metric break } } - debug(c.Name(), "Register %s: %s", name, desc) + glog.V(1).Infof("Register %s: %s", name, desc) gauge := prometheus.NewGauge() c.Metrics[name] = gauge c.registry.Register(name, desc, prometheus.NilLabels, gauge) // one gauge per metric! } - debug(c.Name(), "Set %s{%s}: %f", name, labels, metric.Value) + glog.V(1).Infof("Set %s{%s}: %f", name, labels, metric.Value) c.Metrics[name].Set(labels, metric.Value) } func (c *gmondCollector) Update() (updates int, err error) { conn, err := net.Dial(gangliaProto, gangliaAddress) - debug(c.Name(), "gmondCollector Update") + glog.V(1).Infof("gmondCollector Update") if err != nil { return updates, fmt.Errorf("Can't connect to gmond: %s", err) } diff --git a/collector/helper.go b/collector/helper.go index 00012a2d..3e4fda44 100644 --- a/collector/helper.go +++ b/collector/helper.go @@ -1,22 +1,11 @@ package collector import ( - "flag" "fmt" - "log" "strconv" "strings" ) -var verbose = flag.Bool("verbose", false, "Verbose output.") - -func debug(name string, format string, a ...interface{}) { - if *verbose { - f := fmt.Sprintf("%s: %s", name, format) - log.Printf(f, a...) - } -} - func splitToInts(str string, sep string) (ints []int, err error) { for _, part := range strings.Split(str, sep) { i, err := strconv.Atoi(part) diff --git a/collector/interrupts.go b/collector/interrupts.go new file mode 100644 index 00000000..a13e94c6 --- /dev/null +++ b/collector/interrupts.go @@ -0,0 +1,116 @@ +// +build !nointerrupts + +package collector + +import ( + "bufio" + "fmt" + "io" + "os" + "strconv" + "strings" + + "github.com/prometheus/client_golang/prometheus" +) + +const ( + procInterrupts = "/proc/interrupts" +) + +var ( + interruptsMetric = prometheus.NewCounter() +) + +type interruptsCollector struct { + registry prometheus.Registry + config Config +} + +func init() { + Factories["interrupts"] = NewInterruptsCollector +} + +// Takes a config struct and prometheus registry and returns a new Collector exposing +// interrupts stats +func NewInterruptsCollector(config Config, registry prometheus.Registry) (Collector, error) { + c := interruptsCollector{ + config: config, + registry: registry, + } + registry.Register( + "node_interrupts", + "Interrupt details from /proc/interrupts", + prometheus.NilLabels, + interruptsMetric, + ) + return &c, nil +} + +func (c *interruptsCollector) Update() (updates int, err error) { + interrupts, err := getInterrupts() + if err != nil { + return updates, fmt.Errorf("Couldn't get interrupts: %s", err) + } + for name, interrupt := range interrupts { + for cpuNo, value := range interrupt.values { + updates++ + fv, err := strconv.ParseFloat(value, 64) + if err != nil { + return updates, fmt.Errorf("Invalid value %s in interrupts: %s", value, err) + } + labels := map[string]string{ + "CPU": strconv.Itoa(cpuNo), + "type": name, + "info": interrupt.info, + "devices": interrupt.devices, + } + interruptsMetric.Set(labels, fv) + } + } + return updates, err +} + +type interrupt struct { + info string + devices string + values []string +} + +func getInterrupts() (map[string]interrupt, error) { + file, err := os.Open(procInterrupts) + if err != nil { + return nil, err + } + return parseInterrupts(file) +} + +func parseInterrupts(r io.ReadCloser) (map[string]interrupt, error) { + defer r.Close() + interrupts := map[string]interrupt{} + scanner := bufio.NewScanner(r) + if !scanner.Scan() { + return nil, fmt.Errorf("%s empty", procInterrupts) + } + cpuNum := len(strings.Fields(string(scanner.Text()))) // one header per cpu + + for scanner.Scan() { + line := scanner.Text() + parts := strings.Fields(string(line)) + if len(parts) < cpuNum+2 { // irq + one column per cpu + details, + continue // we ignore ERR and MIS for now + } + intName := parts[0][:len(parts[0])-1] // remove trailing : + intr := interrupt{ + values: parts[1:cpuNum], + } + + if _, err := strconv.Atoi(intName); err == nil { // numeral interrupt + intr.info = parts[cpuNum+1] + intr.devices = strings.Join(parts[cpuNum+2:], " ") + } else { + intr.info = strings.Join(parts[cpuNum+1:], " ") + } + interrupts[intName] = intr + } + return interrupts, nil +} diff --git a/collector/lastlogin.go b/collector/lastlogin.go new file mode 100644 index 00000000..bfd37cc7 --- /dev/null +++ b/collector/lastlogin.go @@ -0,0 +1,105 @@ +// +build !nolastLogin + +package collector + +import ( + "bufio" + "fmt" + "io" + "os/exec" + "strings" + "time" + + "github.com/golang/glog" + "github.com/prometheus/client_golang/prometheus" +) + +var ( + lastSeen = prometheus.NewGauge() +) + +type lastLoginCollector struct { + registry prometheus.Registry + config Config +} + +func init() { + Factories["lastlogin"] = NewLastLoginCollector +} + +// Takes a config struct and prometheus registry and returns a new Collector exposing +// load, seconds since last login and a list of tags as specified by config. +func NewLastLoginCollector(config Config, registry prometheus.Registry) (Collector, error) { + c := lastLoginCollector{ + config: config, + registry: registry, + } + registry.Register( + "node_last_login_time", + "The time of the last login.", + prometheus.NilLabels, + lastSeen, + ) + return &c, nil +} + +func (c *lastLoginCollector) Update() (updates int, err error) { + last, err := getLastLoginTime() + if err != nil { + return updates, fmt.Errorf("Couldn't get last seen: %s", err) + } + updates++ + glog.V(1).Infof("Set node_last_login_time: %f", last) + lastSeen.Set(nil, last) + return updates, err +} + +func getLastLoginTime() (float64, error) { + who := exec.Command("who", "/var/log/wtmp", "-l", "-u", "-s") + + output, err := who.StdoutPipe() + if err != nil { + return 0, err + } + + err = who.Start() + if err != nil { + return 0, err + } + + reader := bufio.NewReader(output) + + var last time.Time + for { + line, isPrefix, err := reader.ReadLine() + if err == io.EOF { + break + } + if isPrefix { + return 0, fmt.Errorf("line to long: %s(...)", line) + } + + fields := strings.Fields(string(line)) + lastDate := fields[2] + lastTime := fields[3] + + dateParts, err := splitToInts(lastDate, "-") // 2013-04-16 + if err != nil { + return 0, fmt.Errorf("Couldn't parse date in line '%s': %s", fields, err) + } + + timeParts, err := splitToInts(lastTime, ":") // 11:33 + if err != nil { + return 0, fmt.Errorf("Couldn't parse time in line '%s': %s", fields, err) + } + + last_t := time.Date(dateParts[0], time.Month(dateParts[1]), dateParts[2], timeParts[0], timeParts[1], 0, 0, time.UTC) + last = last_t + } + err = who.Wait() + if err != nil { + return 0, err + } + + return float64(last.Unix()), nil +} diff --git a/collector/loadavg.go b/collector/loadavg.go new file mode 100644 index 00000000..a6104df2 --- /dev/null +++ b/collector/loadavg.go @@ -0,0 +1,76 @@ +// +build !noloadavg + +package collector + +import ( + "fmt" + "io/ioutil" + "strconv" + "strings" + + "github.com/golang/glog" + "github.com/prometheus/client_golang/prometheus" +) + +const ( + procLoad = "/proc/loadavg" +) + +var ( + load1 = prometheus.NewGauge() +) + +type loadavgCollector struct { + registry prometheus.Registry + config Config +} + +func init() { + Factories["loadavg"] = NewLoadavgCollector +} + +// Takes a config struct and prometheus registry and returns a new Collector exposing +// load, seconds since last login and a list of tags as specified by config. +func NewLoadavgCollector(config Config, registry prometheus.Registry) (Collector, error) { + c := loadavgCollector{ + config: config, + registry: registry, + } + + registry.Register( + "node_load1", + "1m load average", + prometheus.NilLabels, + load1, + ) + return &c, nil +} + +func (c *loadavgCollector) Update() (updates int, err error) { + load, err := getLoad1() + if err != nil { + return updates, fmt.Errorf("Couldn't get load: %s", err) + } + updates++ + glog.V(1).Infof("Set node_load: %f", load) + load1.Set(nil, load) + + return updates, err +} + +func getLoad1() (float64, error) { + data, err := ioutil.ReadFile(procLoad) + if err != nil { + return 0, err + } + return parseLoad(string(data)) +} + +func parseLoad(data string) (float64, error) { + parts := strings.Fields(data) + load, err := strconv.ParseFloat(parts[0], 64) + if err != nil { + return 0, fmt.Errorf("Could not parse load '%s': %s", parts[0], err) + } + return load, nil +} diff --git a/collector/meminfo.go b/collector/meminfo.go new file mode 100644 index 00000000..180cfd7f --- /dev/null +++ b/collector/meminfo.go @@ -0,0 +1,101 @@ +// +build !nomeminfo + +package collector + +import ( + "bufio" + "fmt" + "io" + "os" + "regexp" + "strconv" + "strings" + + "github.com/golang/glog" + "github.com/prometheus/client_golang/prometheus" +) + +const ( + procMemInfo = "/proc/meminfo" +) + +var ( + memInfoMetrics = map[string]prometheus.Gauge{} +) + +type meminfoCollector struct { + registry prometheus.Registry + config Config +} + +func init() { + Factories["meminfo"] = NewMeminfoCollector +} + +// Takes a config struct and prometheus registry and returns a new Collector exposing +// memory stats. +func NewMeminfoCollector(config Config, registry prometheus.Registry) (Collector, error) { + c := meminfoCollector{ + config: config, + registry: registry, + } + return &c, nil +} + +func (c *meminfoCollector) Update() (updates int, err error) { + memInfo, err := getMemInfo() + if err != nil { + return updates, fmt.Errorf("Couldn't get meminfo: %s", err) + } + glog.V(1).Infof("Set node_mem: %#v", memInfo) + for k, v := range memInfo { + if _, ok := memInfoMetrics[k]; !ok { + memInfoMetrics[k] = prometheus.NewGauge() + c.registry.Register( + "node_memory_"+k, + k+" from /proc/meminfo", + prometheus.NilLabels, + memInfoMetrics[k], + ) + } + updates++ + memInfoMetrics[k].Set(nil, v) + } + return updates, err +} + +func getMemInfo() (map[string]float64, error) { + file, err := os.Open(procMemInfo) + if err != nil { + return nil, err + } + return parseMemInfo(file) +} + +func parseMemInfo(r io.ReadCloser) (map[string]float64, error) { + defer r.Close() + memInfo := map[string]float64{} + scanner := bufio.NewScanner(r) + re := regexp.MustCompile("\\((.*)\\)") + for scanner.Scan() { + line := scanner.Text() + parts := strings.Fields(string(line)) + fv, err := strconv.ParseFloat(parts[1], 64) + if err != nil { + return nil, fmt.Errorf("Invalid value in meminfo: %s", err) + } + switch len(parts) { + case 2: // no unit + case 3: // has unit, we presume kB + fv *= 1024 + default: + return nil, fmt.Errorf("Invalid line in %s: %s", procMemInfo, line) + } + key := parts[0][:len(parts[0])-1] // remove trailing : from key + // Active(anon) -> Active_anon + key = re.ReplaceAllString(key, "_${1}") + memInfo[key] = fv + } + return memInfo, nil + +} diff --git a/collector/native_collector.go b/collector/native_collector.go deleted file mode 100644 index 9faa23a3..00000000 --- a/collector/native_collector.go +++ /dev/null @@ -1,454 +0,0 @@ -// +build !nonative - -package collector - -import ( - "bufio" - "fmt" - "io" - "io/ioutil" - "os" - "os/exec" - "regexp" - "strconv" - "strings" - "time" - - "github.com/prometheus/client_golang/prometheus" -) - -const ( - procLoad = "/proc/loadavg" - procMemInfo = "/proc/meminfo" - procInterrupts = "/proc/interrupts" - procNetDev = "/proc/net/dev" - procDiskStats = "/proc/diskstats" -) - -type diskStat struct { - name string - metric prometheus.Metric - documentation string -} - -var ( - // Docs from https://www.kernel.org/doc/Documentation/iostats.txt - diskStatsMetrics = []diskStat{ - {"reads_completed", prometheus.NewCounter(), "The total number of reads completed successfully."}, - {"reads_merged", prometheus.NewCounter(), "The number of reads merged. See https://www.kernel.org/doc/Documentation/iostats.txt"}, - {"sectors_read", prometheus.NewCounter(), "The total number of sectors read successfully."}, - {"read_time_ms", prometheus.NewCounter(), "the total number of milliseconds spent by all reads."}, - {"writes_completed", prometheus.NewCounter(), "The total number of writes completed successfully."}, - {"writes_merged", prometheus.NewCounter(), "The number of writes merged. See https://www.kernel.org/doc/Documentation/iostats.txt"}, - {"sectors_written", prometheus.NewCounter(), "The total number of sectors written successfully."}, - {"write_time_ms", prometheus.NewCounter(), "This is the total number of milliseconds spent by all writes."}, - {"io_now", prometheus.NewGauge(), "The number of I/Os currently in progress."}, - {"io_time_ms", prometheus.NewCounter(), "Milliseconds spent doing I/Os."}, - {"io_time_weighted", prometheus.NewCounter(), "The weighted # of milliseconds spent doing I/Os. See https://www.kernel.org/doc/Documentation/iostats.txt"}, - } - lastSeen = prometheus.NewGauge() - load1 = prometheus.NewGauge() - attributes = prometheus.NewGauge() - memInfoMetrics = map[string]prometheus.Gauge{} - netStatsMetrics = map[string]prometheus.Gauge{} - interruptsMetric = prometheus.NewCounter() -) - -type nativeCollector struct { - registry prometheus.Registry - name string - config Config -} - -func init() { - Factories = append(Factories, NewNativeCollector) -} - -// Takes a config struct and prometheus registry and returns a new Collector exposing -// load, seconds since last login and a list of tags as specified by config. -func NewNativeCollector(config Config, registry prometheus.Registry) (Collector, error) { - c := nativeCollector{ - name: "native_collector", - config: config, - registry: registry, - } - - registry.Register( - "node_load1", - "1m load average", - prometheus.NilLabels, - load1, - ) - - registry.Register( - "node_last_login_time", - "The time of the last login.", - prometheus.NilLabels, - lastSeen, - ) - - registry.Register( - "node_attributes", - "node_exporter attributes", - prometheus.NilLabels, - attributes, - ) - - registry.Register( - "node_interrupts", - "Interrupt details from /proc/interrupts", - prometheus.NilLabels, - interruptsMetric, - ) - - for _, v := range diskStatsMetrics { - registry.Register( - "node_disk_"+v.name, - v.documentation, - prometheus.NilLabels, - v.metric, - ) - } - return &c, nil -} - -func (c *nativeCollector) Name() string { return c.name } - -func (c *nativeCollector) Update() (updates int, err error) { - last, err := getLastLoginTime() - if err != nil { - return updates, fmt.Errorf("Couldn't get last seen: %s", err) - } - updates++ - debug(c.Name(), "Set node_last_login_time: %f", last) - lastSeen.Set(nil, last) - - load, err := getLoad1() - if err != nil { - return updates, fmt.Errorf("Couldn't get load: %s", err) - } - updates++ - debug(c.Name(), "Set node_load: %f", load) - load1.Set(nil, load) - - debug(c.Name(), "Set node_attributes{%v}: 1", c.config.Attributes) - attributes.Set(c.config.Attributes, 1) - - memInfo, err := getMemInfo() - if err != nil { - return updates, fmt.Errorf("Couldn't get meminfo: %s", err) - } - debug(c.Name(), "Set node_mem: %#v", memInfo) - for k, v := range memInfo { - if _, ok := memInfoMetrics[k]; !ok { - memInfoMetrics[k] = prometheus.NewGauge() - c.registry.Register( - "node_memory_"+k, - k+" from /proc/meminfo", - prometheus.NilLabels, - memInfoMetrics[k], - ) - } - updates++ - memInfoMetrics[k].Set(nil, v) - } - - interrupts, err := getInterrupts() - if err != nil { - return updates, fmt.Errorf("Couldn't get interrupts: %s", err) - } - for name, interrupt := range interrupts { - for cpuNo, value := range interrupt.values { - updates++ - fv, err := strconv.ParseFloat(value, 64) - if err != nil { - return updates, fmt.Errorf("Invalid value %s in interrupts: %s", value, err) - } - labels := map[string]string{ - "CPU": strconv.Itoa(cpuNo), - "type": name, - "info": interrupt.info, - "devices": interrupt.devices, - } - interruptsMetric.Set(labels, fv) - } - } - - netStats, err := getNetStats() - if err != nil { - return updates, fmt.Errorf("Couldn't get netstats: %s", err) - } - for direction, devStats := range netStats { - for dev, stats := range devStats { - for t, value := range stats { - key := direction + "_" + t - if _, ok := netStatsMetrics[key]; !ok { - netStatsMetrics[key] = prometheus.NewGauge() - c.registry.Register( - "node_network_"+key, - t+" "+direction+" from /proc/net/dev", - prometheus.NilLabels, - netStatsMetrics[key], - ) - } - updates++ - v, err := strconv.ParseFloat(value, 64) - if err != nil { - return updates, fmt.Errorf("Invalid value %s in netstats: %s", value, err) - } - netStatsMetrics[key].Set(map[string]string{"device": dev}, v) - } - } - } - - diskStats, err := getDiskStats() - if err != nil { - return updates, fmt.Errorf("Couldn't get diskstats: %s", err) - } - for dev, stats := range diskStats { - for k, value := range stats { - updates++ - v, err := strconv.ParseFloat(value, 64) - if err != nil { - return updates, fmt.Errorf("Invalid value %s in diskstats: %s", value, err) - } - labels := map[string]string{"device": dev} - counter, ok := diskStatsMetrics[k].metric.(prometheus.Counter) - if ok { - counter.Set(labels, v) - } else { - var gauge = diskStatsMetrics[k].metric.(prometheus.Gauge) - gauge.Set(labels, v) - } - } - } - return updates, err -} - -func getLoad1() (float64, error) { - data, err := ioutil.ReadFile(procLoad) - if err != nil { - return 0, err - } - return parseLoad(string(data)) -} - -func parseLoad(data string) (float64, error) { - parts := strings.Fields(data) - load, err := strconv.ParseFloat(parts[0], 64) - if err != nil { - return 0, fmt.Errorf("Could not parse load '%s': %s", parts[0], err) - } - return load, nil -} - -func getLastLoginTime() (float64, error) { - who := exec.Command("who", "/var/log/wtmp", "-l", "-u", "-s") - - output, err := who.StdoutPipe() - if err != nil { - return 0, err - } - - err = who.Start() - if err != nil { - return 0, err - } - - reader := bufio.NewReader(output) - - var last time.Time - for { - line, isPrefix, err := reader.ReadLine() - if err == io.EOF { - break - } - if isPrefix { - return 0, fmt.Errorf("line to long: %s(...)", line) - } - - fields := strings.Fields(string(line)) - lastDate := fields[2] - lastTime := fields[3] - - dateParts, err := splitToInts(lastDate, "-") // 2013-04-16 - if err != nil { - return 0, fmt.Errorf("Couldn't parse date in line '%s': %s", fields, err) - } - - timeParts, err := splitToInts(lastTime, ":") // 11:33 - if err != nil { - return 0, fmt.Errorf("Couldn't parse time in line '%s': %s", fields, err) - } - - last_t := time.Date(dateParts[0], time.Month(dateParts[1]), dateParts[2], timeParts[0], timeParts[1], 0, 0, time.UTC) - last = last_t - } - err = who.Wait() - if err != nil { - return 0, err - } - - return float64(last.Unix()), nil -} - -func getMemInfo() (map[string]float64, error) { - file, err := os.Open(procMemInfo) - if err != nil { - return nil, err - } - return parseMemInfo(file) -} - -func parseMemInfo(r io.ReadCloser) (map[string]float64, error) { - defer r.Close() - memInfo := map[string]float64{} - scanner := bufio.NewScanner(r) - re := regexp.MustCompile("\\((.*)\\)") - for scanner.Scan() { - line := scanner.Text() - parts := strings.Fields(string(line)) - fv, err := strconv.ParseFloat(parts[1], 64) - if err != nil { - return nil, fmt.Errorf("Invalid value in meminfo: %s", err) - } - switch len(parts) { - case 2: // no unit - case 3: // has unit, we presume kB - fv *= 1024 - default: - return nil, fmt.Errorf("Invalid line in %s: %s", procMemInfo, line) - } - key := parts[0][:len(parts[0])-1] // remove trailing : from key - // Active(anon) -> Active_anon - key = re.ReplaceAllString(key, "_${1}") - memInfo[key] = fv - } - return memInfo, nil - -} - -type interrupt struct { - info string - devices string - values []string -} - -func getInterrupts() (map[string]interrupt, error) { - file, err := os.Open(procInterrupts) - if err != nil { - return nil, err - } - return parseInterrupts(file) -} - -func parseInterrupts(r io.ReadCloser) (map[string]interrupt, error) { - defer r.Close() - interrupts := map[string]interrupt{} - scanner := bufio.NewScanner(r) - if !scanner.Scan() { - return nil, fmt.Errorf("%s empty", procInterrupts) - } - cpuNum := len(strings.Fields(string(scanner.Text()))) // one header per cpu - - for scanner.Scan() { - line := scanner.Text() - parts := strings.Fields(string(line)) - if len(parts) < cpuNum+2 { // irq + one column per cpu + details, - continue // we ignore ERR and MIS for now - } - intName := parts[0][:len(parts[0])-1] // remove trailing : - intr := interrupt{ - values: parts[1:cpuNum], - } - - if _, err := strconv.Atoi(intName); err == nil { // numeral interrupt - intr.info = parts[cpuNum+1] - intr.devices = strings.Join(parts[cpuNum+2:], " ") - } else { - intr.info = strings.Join(parts[cpuNum+1:], " ") - } - interrupts[intName] = intr - } - return interrupts, nil -} - -func getNetStats() (map[string]map[string]map[string]string, error) { - file, err := os.Open(procNetDev) - if err != nil { - return nil, err - } - return parseNetStats(file) -} - -func parseNetStats(r io.ReadCloser) (map[string]map[string]map[string]string, error) { - defer r.Close() - netStats := map[string]map[string]map[string]string{} - netStats["transmit"] = map[string]map[string]string{} - netStats["receive"] = map[string]map[string]string{} - - scanner := bufio.NewScanner(r) - scanner.Scan() // skip first header - scanner.Scan() - parts := strings.Split(string(scanner.Text()), "|") - if len(parts) != 3 { // interface + receive + transmit - return nil, fmt.Errorf("Invalid header line in %s: %s", - procNetDev, scanner.Text()) - } - header := strings.Fields(parts[1]) - for scanner.Scan() { - parts := strings.Fields(string(scanner.Text())) - if len(parts) != 2*len(header)+1 { - return nil, fmt.Errorf("Invalid line in %s: %s", - procNetDev, scanner.Text()) - } - - dev := parts[0][:len(parts[0])-1] - receive, err := parseNetDevLine(parts[1:len(header)+1], header) - if err != nil { - return nil, err - } - - transmit, err := parseNetDevLine(parts[len(header)+1:], header) - if err != nil { - return nil, err - } - netStats["transmit"][dev] = transmit - netStats["receive"][dev] = receive - } - return netStats, nil -} - -func parseNetDevLine(parts []string, header []string) (map[string]string, error) { - devStats := map[string]string{} - for i, v := range parts { - devStats[header[i]] = v - } - return devStats, nil -} - -func getDiskStats() (map[string]map[int]string, error) { - file, err := os.Open(procDiskStats) - if err != nil { - return nil, err - } - return parseDiskStats(file) -} - -func parseDiskStats(r io.ReadCloser) (map[string]map[int]string, error) { - defer r.Close() - diskStats := map[string]map[int]string{} - scanner := bufio.NewScanner(r) - for scanner.Scan() { - parts := strings.Fields(string(scanner.Text())) - if len(parts) != len(diskStatsMetrics)+3 { // we strip major, minor and dev - return nil, fmt.Errorf("Invalid line in %s: %s", procDiskStats, scanner.Text()) - } - dev := parts[2] - diskStats[dev] = map[int]string{} - for i, v := range parts[3:] { - diskStats[dev][i] = v - } - } - return diskStats, nil -} diff --git a/collector/netdev.go b/collector/netdev.go new file mode 100644 index 00000000..4824fbdf --- /dev/null +++ b/collector/netdev.go @@ -0,0 +1,125 @@ +// +build !nonetDev + +package collector + +import ( + "bufio" + "fmt" + "io" + "os" + "strconv" + "strings" + + "github.com/prometheus/client_golang/prometheus" +) + +const ( + procNetDev = "/proc/net/dev" +) + +var ( + netStatsMetrics = map[string]prometheus.Gauge{} +) + +type netDevCollector struct { + registry prometheus.Registry + config Config +} + +func init() { + Factories["netdev"] = NewNetDevCollector +} + +// Takes a config struct and prometheus registry and returns a new Collector exposing +// network device stats. +func NewNetDevCollector(config Config, registry prometheus.Registry) (Collector, error) { + c := netDevCollector{ + config: config, + registry: registry, + } + return &c, nil +} + +func (c *netDevCollector) Update() (updates int, err error) { + netStats, err := getNetStats() + if err != nil { + return updates, fmt.Errorf("Couldn't get netstats: %s", err) + } + for direction, devStats := range netStats { + for dev, stats := range devStats { + for t, value := range stats { + key := direction + "_" + t + if _, ok := netStatsMetrics[key]; !ok { + netStatsMetrics[key] = prometheus.NewGauge() + c.registry.Register( + "node_network_"+key, + t+" "+direction+" from /proc/net/dev", + prometheus.NilLabels, + netStatsMetrics[key], + ) + } + updates++ + v, err := strconv.ParseFloat(value, 64) + if err != nil { + return updates, fmt.Errorf("Invalid value %s in netstats: %s", value, err) + } + netStatsMetrics[key].Set(map[string]string{"device": dev}, v) + } + } + } + return updates, err +} + +func getNetStats() (map[string]map[string]map[string]string, error) { + file, err := os.Open(procNetDev) + if err != nil { + return nil, err + } + return parseNetStats(file) +} + +func parseNetStats(r io.ReadCloser) (map[string]map[string]map[string]string, error) { + defer r.Close() + netStats := map[string]map[string]map[string]string{} + netStats["transmit"] = map[string]map[string]string{} + netStats["receive"] = map[string]map[string]string{} + + scanner := bufio.NewScanner(r) + scanner.Scan() // skip first header + scanner.Scan() + parts := strings.Split(string(scanner.Text()), "|") + if len(parts) != 3 { // interface + receive + transmit + return nil, fmt.Errorf("Invalid header line in %s: %s", + procNetDev, scanner.Text()) + } + header := strings.Fields(parts[1]) + for scanner.Scan() { + parts := strings.Fields(string(scanner.Text())) + if len(parts) != 2*len(header)+1 { + return nil, fmt.Errorf("Invalid line in %s: %s", + procNetDev, scanner.Text()) + } + + dev := parts[0][:len(parts[0])-1] + receive, err := parseNetDevLine(parts[1:len(header)+1], header) + if err != nil { + return nil, err + } + + transmit, err := parseNetDevLine(parts[len(header)+1:], header) + if err != nil { + return nil, err + } + netStats["transmit"][dev] = transmit + netStats["receive"][dev] = receive + } + return netStats, nil +} + +func parseNetDevLine(parts []string, header []string) (map[string]string, error) { + devStats := map[string]string{} + for i, v := range parts { + devStats[header[i]] = v + } + return devStats, nil +} diff --git a/collector/runit_collector.go b/collector/runit_collector.go index 24b89731..6e7c2096 100644 --- a/collector/runit_collector.go +++ b/collector/runit_collector.go @@ -3,12 +3,12 @@ package collector import ( + "github.com/golang/glog" "github.com/prometheus/client_golang/prometheus" "github.com/soundcloud/go-runit/runit" ) type runitCollector struct { - name string config Config state prometheus.Gauge stateDesired prometheus.Gauge @@ -16,12 +16,11 @@ type runitCollector struct { } func init() { - Factories = append(Factories, NewRunitCollector) + Factories["runit"] = NewRunitCollector } func NewRunitCollector(config Config, registry prometheus.Registry) (Collector, error) { c := runitCollector{ - name: "runit_collector", config: config, state: prometheus.NewGauge(), stateDesired: prometheus.NewGauge(), @@ -52,8 +51,6 @@ func NewRunitCollector(config Config, registry prometheus.Registry) (Collector, return &c, nil } -func (c *runitCollector) Name() string { return c.name } - func (c *runitCollector) Update() (updates int, err error) { services, err := runit.GetServices("/etc/service") if err != nil { @@ -63,11 +60,11 @@ func (c *runitCollector) Update() (updates int, err error) { for _, service := range services { status, err := service.Status() if err != nil { - debug(c.Name(), "Couldn't get status for %s: %s, skipping...", service.Name, err) + glog.V(1).Infof("Couldn't get status for %s: %s, skipping...", service.Name, err) continue } - debug(c.Name(), "%s is %d on pid %d for %d seconds", service.Name, status.State, status.Pid, status.Duration) + glog.V(1).Infof("%s is %d on pid %d for %d seconds", service.Name, status.State, status.Pid, status.Duration) labels := map[string]string{ "service": service.Name, } diff --git a/node_exporter.go b/node_exporter.go index f6671aa3..a75699f3 100644 --- a/node_exporter.go +++ b/node_exporter.go @@ -9,22 +9,25 @@ import ( "os" "os/signal" "runtime/pprof" + "strings" "sync" "syscall" "time" + "github.com/golang/glog" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/exp" "github.com/prometheus/node_exporter/collector" ) var ( - configFile = flag.String("config", "node_exporter.conf", "config file.") - memProfile = flag.String("memprofile", "", "write memory profile to this file") - listeningAddress = flag.String("listen", ":8080", "address to listen on") - interval = flag.Duration("interval", 60*time.Second, "refresh interval") - scrapeDurations = prometheus.NewDefaultHistogram() - metricsUpdated = prometheus.NewGauge() + configFile = flag.String("config", "node_exporter.conf", "config file.") + memProfile = flag.String("memprofile", "", "write memory profile to this file") + listeningAddress = flag.String("listen", ":8080", "address to listen on") + enabledCollectors = flag.String("enabledCollectors", "attributes,diskstats,loadavg,meminfo,netdev", "comma seperated list of collectors to use") + interval = flag.Duration("interval", 60*time.Second, "refresh interval") + scrapeDurations = prometheus.NewDefaultHistogram() + metricsUpdated = prometheus.NewGauge() ) func main() { @@ -38,9 +41,9 @@ func main() { registry.Register("node_exporter_scrape_duration_seconds", "node_exporter: Duration of a scrape job.", prometheus.NilLabels, scrapeDurations) registry.Register("node_exporter_metrics_updated", "node_exporter: Number of metrics updated.", prometheus.NilLabels, metricsUpdated) - log.Printf("Registered collectors:") - for _, c := range collectors { - log.Print(" - ", c.Name()) + glog.Infof("Enabled collectors:") + for n, _ := range collectors { + glog.Infof(" - %s", n) } sigHup := make(chan os.Signal) @@ -50,7 +53,7 @@ func main() { go serveStatus(registry) - log.Printf("Starting initial collection") + glog.Infof("Starting initial collection") collect(collectors) tick := time.Tick(*interval) @@ -61,17 +64,17 @@ func main() { if err != nil { log.Fatalf("Couldn't load config and collectors: %s", err) } - log.Printf("Reloaded collectors and config") + glog.Infof("Reloaded collectors and config") tick = time.Tick(*interval) case <-tick: - log.Printf("Starting new interval") + glog.Infof("Starting new interval") collect(collectors) case <-sigUsr1: - log.Printf("got signal") + glog.Infof("got signal") if *memProfile != "" { - log.Printf("Writing memory profile to %s", *memProfile) + glog.Infof("Writing memory profile to %s", *memProfile) f, err := os.Create(*memProfile) if err != nil { log.Fatal(err) @@ -84,25 +87,29 @@ func main() { } -func loadCollectors(file string, registry prometheus.Registry) ([]collector.Collector, error) { - collectors := []collector.Collector{} +func loadCollectors(file string, registry prometheus.Registry) (map[string]collector.Collector, error) { + collectors := map[string]collector.Collector{} config, err := getConfig(file) if err != nil { log.Fatalf("Couldn't read config %s: %s", file, err) } - for _, fn := range collector.Factories { + for _, name := range strings.Split(*enabledCollectors, ",") { + fn, ok := collector.Factories[name] + if !ok { + log.Fatalf("Collector '%s' not available", name) + } c, err := fn(*config, registry) if err != nil { return nil, err } - collectors = append(collectors, c) + collectors[name] = c } return collectors, nil } func getConfig(file string) (*collector.Config, error) { config := &collector.Config{} - log.Printf("Reading config %s", *configFile) + glog.Infof("Reading config %s", *configFile) bytes, err := ioutil.ReadFile(*configFile) if err != nil { return nil, err @@ -115,31 +122,31 @@ func serveStatus(registry prometheus.Registry) { http.ListenAndServe(*listeningAddress, exp.DefaultCoarseMux) } -func collect(collectors []collector.Collector) { +func collect(collectors map[string]collector.Collector) { wg := sync.WaitGroup{} wg.Add(len(collectors)) - for _, c := range collectors { - go func(c collector.Collector) { - Execute(c) + for n, c := range collectors { + go func(n string, c collector.Collector) { + Execute(n, c) wg.Done() - }(c) + }(n, c) } wg.Wait() } -func Execute(c collector.Collector) { +func Execute(name string, c collector.Collector) { begin := time.Now() updates, err := c.Update() duration := time.Since(begin) label := map[string]string{ - "collector": c.Name(), + "collector": name, } if err != nil { - log.Printf("ERROR: %s failed after %fs: %s", c.Name(), duration.Seconds(), err) + glog.Infof("ERROR: %s failed after %fs: %s", name, duration.Seconds(), err) label["result"] = "error" } else { - log.Printf("OK: %s success after %fs.", c.Name(), duration.Seconds()) + glog.Infof("OK: %s success after %fs.", name, duration.Seconds()) label["result"] = "success" } scrapeDurations.Add(label, duration.Seconds())