Split native collector into it's component parts and make them enablable.
Last login is disabled by default as it's broken on ubuntu 12.04 Interrupts is disabled by default as it's very granular and we'll have total interrupts from /proc/stat Allow ignoring devices from diskstats, ignore ram and loop devices by default. Use glog for logging.pull/12/head
parent
964cdbfcc9
commit
25ea90369c
|
@ -1,25 +1,26 @@
|
|||
# node_exporter
|
||||
|
||||
Prometheus exporter with plugable metric collectors.
|
||||
Prometheus exporter with pluggable metric collectors.
|
||||
|
||||
|
||||
|
||||
## Available collectors
|
||||
|
||||
By default it will only include the NativeCollector.
|
||||
By default the build will only include the native collectors
|
||||
that expose information from /proc.
|
||||
|
||||
To include other collectors, specify the build tags lile this:
|
||||
|
||||
go build -tags 'ganglia runit' node_exporter.go
|
||||
|
||||
|
||||
Which collectors are used is controled by the --enabledCollectors flag.
|
||||
|
||||
### NativeCollector
|
||||
|
||||
Provides metrics for load, seconds since last login and a list of tags
|
||||
read from `node_exporter.conf`.
|
||||
|
||||
To disable the native collector, use build tag `nonative`.
|
||||
|
||||
|
||||
### GmondCollector (tag: ganglia)
|
||||
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
// +build !noattributes
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"github.com/golang/glog"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var (
|
||||
attributes = prometheus.NewGauge()
|
||||
)
|
||||
|
||||
type attributesCollector struct {
|
||||
registry prometheus.Registry
|
||||
config Config
|
||||
}
|
||||
|
||||
func init() {
|
||||
Factories["attributes"] = NewAttributesCollector
|
||||
}
|
||||
|
||||
// Takes a config struct and prometheus registry and returns a new Collector exposing
|
||||
// labels from the config.
|
||||
func NewAttributesCollector(config Config, registry prometheus.Registry) (Collector, error) {
|
||||
c := attributesCollector{
|
||||
config: config,
|
||||
registry: registry,
|
||||
}
|
||||
registry.Register(
|
||||
"node_attributes",
|
||||
"node_exporter attributes",
|
||||
prometheus.NilLabels,
|
||||
attributes,
|
||||
)
|
||||
return &c, nil
|
||||
}
|
||||
|
||||
func (c *attributesCollector) Update() (updates int, err error) {
|
||||
glog.V(1).Info("Set node_attributes{%v}: 1", c.config.Attributes)
|
||||
attributes.Set(c.config.Attributes, 1)
|
||||
return updates, err
|
||||
}
|
|
@ -5,15 +5,12 @@ import (
|
|||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var Factories []func(Config, prometheus.Registry) (Collector, error)
|
||||
var Factories = make(map[string]func(Config, prometheus.Registry) (Collector, error))
|
||||
|
||||
// Interface a collector has to implement.
|
||||
type Collector interface {
|
||||
// Get new metrics and expose them via prometheus registry.
|
||||
Update() (n int, err error)
|
||||
|
||||
// Returns the name of the collector.
|
||||
Name() string
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
|
|
|
@ -0,0 +1,131 @@
|
|||
// +build !nonative
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
const (
|
||||
procDiskStats = "/proc/diskstats"
|
||||
)
|
||||
|
||||
type diskStat struct {
|
||||
name string
|
||||
metric prometheus.Metric
|
||||
documentation string
|
||||
}
|
||||
|
||||
var (
|
||||
ignoredDevices = flag.String("diskstatsIgnoredDevices", "^(ram|loop)\\d+$", "Regexp of devices to ignore for diskstats.")
|
||||
|
||||
// Docs from https://www.kernel.org/doc/Documentation/iostats.txt
|
||||
diskStatsMetrics = []diskStat{
|
||||
{"reads_completed", prometheus.NewCounter(), "The total number of reads completed successfully."},
|
||||
{"reads_merged", prometheus.NewCounter(), "The number of reads merged. See https://www.kernel.org/doc/Documentation/iostats.txt"},
|
||||
{"sectors_read", prometheus.NewCounter(), "The total number of sectors read successfully."},
|
||||
{"read_time_ms", prometheus.NewCounter(), "the total number of milliseconds spent by all reads."},
|
||||
{"writes_completed", prometheus.NewCounter(), "The total number of writes completed successfully."},
|
||||
{"writes_merged", prometheus.NewCounter(), "The number of writes merged. See https://www.kernel.org/doc/Documentation/iostats.txt"},
|
||||
{"sectors_written", prometheus.NewCounter(), "The total number of sectors written successfully."},
|
||||
{"write_time_ms", prometheus.NewCounter(), "This is the total number of milliseconds spent by all writes."},
|
||||
{"io_now", prometheus.NewGauge(), "The number of I/Os currently in progress."},
|
||||
{"io_time_ms", prometheus.NewCounter(), "Milliseconds spent doing I/Os."},
|
||||
{"io_time_weighted", prometheus.NewCounter(), "The weighted # of milliseconds spent doing I/Os. See https://www.kernel.org/doc/Documentation/iostats.txt"},
|
||||
}
|
||||
)
|
||||
|
||||
type diskstatsCollector struct {
|
||||
registry prometheus.Registry
|
||||
config Config
|
||||
ignoredDevicesPattern *regexp.Regexp
|
||||
}
|
||||
|
||||
func init() {
|
||||
Factories["diskstats"] = NewDiskstatsCollector
|
||||
}
|
||||
|
||||
// Takes a config struct and prometheus registry and returns a new Collector exposing
|
||||
// disk device stats.
|
||||
func NewDiskstatsCollector(config Config, registry prometheus.Registry) (Collector, error) {
|
||||
c := diskstatsCollector{
|
||||
config: config,
|
||||
registry: registry,
|
||||
ignoredDevicesPattern: regexp.MustCompile(*ignoredDevices),
|
||||
}
|
||||
|
||||
for _, v := range diskStatsMetrics {
|
||||
registry.Register(
|
||||
"node_disk_"+v.name,
|
||||
v.documentation,
|
||||
prometheus.NilLabels,
|
||||
v.metric,
|
||||
)
|
||||
}
|
||||
return &c, nil
|
||||
}
|
||||
|
||||
func (c *diskstatsCollector) Update() (updates int, err error) {
|
||||
diskStats, err := getDiskStats()
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Couldn't get diskstats: %s", err)
|
||||
}
|
||||
for dev, stats := range diskStats {
|
||||
if c.ignoredDevicesPattern.MatchString(dev) {
|
||||
glog.V(1).Infof("Ignoring device: %s", dev)
|
||||
continue
|
||||
}
|
||||
for k, value := range stats {
|
||||
updates++
|
||||
v, err := strconv.ParseFloat(value, 64)
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Invalid value %s in diskstats: %s", value, err)
|
||||
}
|
||||
labels := map[string]string{"device": dev}
|
||||
counter, ok := diskStatsMetrics[k].metric.(prometheus.Counter)
|
||||
if ok {
|
||||
counter.Set(labels, v)
|
||||
} else {
|
||||
var gauge = diskStatsMetrics[k].metric.(prometheus.Gauge)
|
||||
gauge.Set(labels, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
return updates, err
|
||||
}
|
||||
|
||||
func getDiskStats() (map[string]map[int]string, error) {
|
||||
file, err := os.Open(procDiskStats)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return parseDiskStats(file)
|
||||
}
|
||||
|
||||
func parseDiskStats(r io.ReadCloser) (map[string]map[int]string, error) {
|
||||
defer r.Close()
|
||||
diskStats := map[string]map[int]string{}
|
||||
scanner := bufio.NewScanner(r)
|
||||
for scanner.Scan() {
|
||||
parts := strings.Fields(string(scanner.Text()))
|
||||
if len(parts) != len(diskStatsMetrics)+3 { // we strip major, minor and dev
|
||||
return nil, fmt.Errorf("Invalid line in %s: %s", procDiskStats, scanner.Text())
|
||||
}
|
||||
dev := parts[2]
|
||||
diskStats[dev] = map[int]string{}
|
||||
for i, v := range parts[3:] {
|
||||
diskStats[dev][i] = v
|
||||
}
|
||||
}
|
||||
return diskStats, nil
|
||||
}
|
|
@ -11,6 +11,7 @@ import (
|
|||
"regexp"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/node_exporter/collector/ganglia"
|
||||
)
|
||||
|
@ -23,14 +24,13 @@ const (
|
|||
)
|
||||
|
||||
type gmondCollector struct {
|
||||
name string
|
||||
Metrics map[string]prometheus.Gauge
|
||||
config Config
|
||||
registry prometheus.Registry
|
||||
}
|
||||
|
||||
func init() {
|
||||
Factories = append(Factories, NewGmondCollector)
|
||||
Factories["gmond"] = NewGmondCollector
|
||||
}
|
||||
|
||||
var illegalCharsRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
|
||||
|
@ -38,7 +38,6 @@ var illegalCharsRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
|
|||
// Takes a config struct and prometheus registry and returns a new Collector scraping ganglia.
|
||||
func NewGmondCollector(config Config, registry prometheus.Registry) (Collector, error) {
|
||||
c := gmondCollector{
|
||||
name: "gmond_collector",
|
||||
config: config,
|
||||
Metrics: make(map[string]prometheus.Gauge),
|
||||
registry: registry,
|
||||
|
@ -47,8 +46,6 @@ func NewGmondCollector(config Config, registry prometheus.Registry) (Collector,
|
|||
return &c, nil
|
||||
}
|
||||
|
||||
func (c *gmondCollector) Name() string { return c.name }
|
||||
|
||||
func (c *gmondCollector) setMetric(name string, labels map[string]string, metric ganglia.Metric) {
|
||||
if _, ok := c.Metrics[name]; !ok {
|
||||
var desc string
|
||||
|
@ -64,18 +61,18 @@ func (c *gmondCollector) setMetric(name string, labels map[string]string, metric
|
|||
break
|
||||
}
|
||||
}
|
||||
debug(c.Name(), "Register %s: %s", name, desc)
|
||||
glog.V(1).Infof("Register %s: %s", name, desc)
|
||||
gauge := prometheus.NewGauge()
|
||||
c.Metrics[name] = gauge
|
||||
c.registry.Register(name, desc, prometheus.NilLabels, gauge) // one gauge per metric!
|
||||
}
|
||||
debug(c.Name(), "Set %s{%s}: %f", name, labels, metric.Value)
|
||||
glog.V(1).Infof("Set %s{%s}: %f", name, labels, metric.Value)
|
||||
c.Metrics[name].Set(labels, metric.Value)
|
||||
}
|
||||
|
||||
func (c *gmondCollector) Update() (updates int, err error) {
|
||||
conn, err := net.Dial(gangliaProto, gangliaAddress)
|
||||
debug(c.Name(), "gmondCollector Update")
|
||||
glog.V(1).Infof("gmondCollector Update")
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Can't connect to gmond: %s", err)
|
||||
}
|
||||
|
|
|
@ -1,22 +1,11 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var verbose = flag.Bool("verbose", false, "Verbose output.")
|
||||
|
||||
func debug(name string, format string, a ...interface{}) {
|
||||
if *verbose {
|
||||
f := fmt.Sprintf("%s: %s", name, format)
|
||||
log.Printf(f, a...)
|
||||
}
|
||||
}
|
||||
|
||||
func splitToInts(str string, sep string) (ints []int, err error) {
|
||||
for _, part := range strings.Split(str, sep) {
|
||||
i, err := strconv.Atoi(part)
|
||||
|
|
|
@ -0,0 +1,116 @@
|
|||
// +build !nointerrupts
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
const (
|
||||
procInterrupts = "/proc/interrupts"
|
||||
)
|
||||
|
||||
var (
|
||||
interruptsMetric = prometheus.NewCounter()
|
||||
)
|
||||
|
||||
type interruptsCollector struct {
|
||||
registry prometheus.Registry
|
||||
config Config
|
||||
}
|
||||
|
||||
func init() {
|
||||
Factories["interrupts"] = NewInterruptsCollector
|
||||
}
|
||||
|
||||
// Takes a config struct and prometheus registry and returns a new Collector exposing
|
||||
// interrupts stats
|
||||
func NewInterruptsCollector(config Config, registry prometheus.Registry) (Collector, error) {
|
||||
c := interruptsCollector{
|
||||
config: config,
|
||||
registry: registry,
|
||||
}
|
||||
registry.Register(
|
||||
"node_interrupts",
|
||||
"Interrupt details from /proc/interrupts",
|
||||
prometheus.NilLabels,
|
||||
interruptsMetric,
|
||||
)
|
||||
return &c, nil
|
||||
}
|
||||
|
||||
func (c *interruptsCollector) Update() (updates int, err error) {
|
||||
interrupts, err := getInterrupts()
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Couldn't get interrupts: %s", err)
|
||||
}
|
||||
for name, interrupt := range interrupts {
|
||||
for cpuNo, value := range interrupt.values {
|
||||
updates++
|
||||
fv, err := strconv.ParseFloat(value, 64)
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Invalid value %s in interrupts: %s", value, err)
|
||||
}
|
||||
labels := map[string]string{
|
||||
"CPU": strconv.Itoa(cpuNo),
|
||||
"type": name,
|
||||
"info": interrupt.info,
|
||||
"devices": interrupt.devices,
|
||||
}
|
||||
interruptsMetric.Set(labels, fv)
|
||||
}
|
||||
}
|
||||
return updates, err
|
||||
}
|
||||
|
||||
type interrupt struct {
|
||||
info string
|
||||
devices string
|
||||
values []string
|
||||
}
|
||||
|
||||
func getInterrupts() (map[string]interrupt, error) {
|
||||
file, err := os.Open(procInterrupts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return parseInterrupts(file)
|
||||
}
|
||||
|
||||
func parseInterrupts(r io.ReadCloser) (map[string]interrupt, error) {
|
||||
defer r.Close()
|
||||
interrupts := map[string]interrupt{}
|
||||
scanner := bufio.NewScanner(r)
|
||||
if !scanner.Scan() {
|
||||
return nil, fmt.Errorf("%s empty", procInterrupts)
|
||||
}
|
||||
cpuNum := len(strings.Fields(string(scanner.Text()))) // one header per cpu
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
parts := strings.Fields(string(line))
|
||||
if len(parts) < cpuNum+2 { // irq + one column per cpu + details,
|
||||
continue // we ignore ERR and MIS for now
|
||||
}
|
||||
intName := parts[0][:len(parts[0])-1] // remove trailing :
|
||||
intr := interrupt{
|
||||
values: parts[1:cpuNum],
|
||||
}
|
||||
|
||||
if _, err := strconv.Atoi(intName); err == nil { // numeral interrupt
|
||||
intr.info = parts[cpuNum+1]
|
||||
intr.devices = strings.Join(parts[cpuNum+2:], " ")
|
||||
} else {
|
||||
intr.info = strings.Join(parts[cpuNum+1:], " ")
|
||||
}
|
||||
interrupts[intName] = intr
|
||||
}
|
||||
return interrupts, nil
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
// +build !nolastLogin
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var (
|
||||
lastSeen = prometheus.NewGauge()
|
||||
)
|
||||
|
||||
type lastLoginCollector struct {
|
||||
registry prometheus.Registry
|
||||
config Config
|
||||
}
|
||||
|
||||
func init() {
|
||||
Factories["lastlogin"] = NewLastLoginCollector
|
||||
}
|
||||
|
||||
// Takes a config struct and prometheus registry and returns a new Collector exposing
|
||||
// load, seconds since last login and a list of tags as specified by config.
|
||||
func NewLastLoginCollector(config Config, registry prometheus.Registry) (Collector, error) {
|
||||
c := lastLoginCollector{
|
||||
config: config,
|
||||
registry: registry,
|
||||
}
|
||||
registry.Register(
|
||||
"node_last_login_time",
|
||||
"The time of the last login.",
|
||||
prometheus.NilLabels,
|
||||
lastSeen,
|
||||
)
|
||||
return &c, nil
|
||||
}
|
||||
|
||||
func (c *lastLoginCollector) Update() (updates int, err error) {
|
||||
last, err := getLastLoginTime()
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Couldn't get last seen: %s", err)
|
||||
}
|
||||
updates++
|
||||
glog.V(1).Infof("Set node_last_login_time: %f", last)
|
||||
lastSeen.Set(nil, last)
|
||||
return updates, err
|
||||
}
|
||||
|
||||
func getLastLoginTime() (float64, error) {
|
||||
who := exec.Command("who", "/var/log/wtmp", "-l", "-u", "-s")
|
||||
|
||||
output, err := who.StdoutPipe()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
err = who.Start()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
reader := bufio.NewReader(output)
|
||||
|
||||
var last time.Time
|
||||
for {
|
||||
line, isPrefix, err := reader.ReadLine()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if isPrefix {
|
||||
return 0, fmt.Errorf("line to long: %s(...)", line)
|
||||
}
|
||||
|
||||
fields := strings.Fields(string(line))
|
||||
lastDate := fields[2]
|
||||
lastTime := fields[3]
|
||||
|
||||
dateParts, err := splitToInts(lastDate, "-") // 2013-04-16
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("Couldn't parse date in line '%s': %s", fields, err)
|
||||
}
|
||||
|
||||
timeParts, err := splitToInts(lastTime, ":") // 11:33
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("Couldn't parse time in line '%s': %s", fields, err)
|
||||
}
|
||||
|
||||
last_t := time.Date(dateParts[0], time.Month(dateParts[1]), dateParts[2], timeParts[0], timeParts[1], 0, 0, time.UTC)
|
||||
last = last_t
|
||||
}
|
||||
err = who.Wait()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return float64(last.Unix()), nil
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
// +build !noloadavg
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
const (
|
||||
procLoad = "/proc/loadavg"
|
||||
)
|
||||
|
||||
var (
|
||||
load1 = prometheus.NewGauge()
|
||||
)
|
||||
|
||||
type loadavgCollector struct {
|
||||
registry prometheus.Registry
|
||||
config Config
|
||||
}
|
||||
|
||||
func init() {
|
||||
Factories["loadavg"] = NewLoadavgCollector
|
||||
}
|
||||
|
||||
// Takes a config struct and prometheus registry and returns a new Collector exposing
|
||||
// load, seconds since last login and a list of tags as specified by config.
|
||||
func NewLoadavgCollector(config Config, registry prometheus.Registry) (Collector, error) {
|
||||
c := loadavgCollector{
|
||||
config: config,
|
||||
registry: registry,
|
||||
}
|
||||
|
||||
registry.Register(
|
||||
"node_load1",
|
||||
"1m load average",
|
||||
prometheus.NilLabels,
|
||||
load1,
|
||||
)
|
||||
return &c, nil
|
||||
}
|
||||
|
||||
func (c *loadavgCollector) Update() (updates int, err error) {
|
||||
load, err := getLoad1()
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Couldn't get load: %s", err)
|
||||
}
|
||||
updates++
|
||||
glog.V(1).Infof("Set node_load: %f", load)
|
||||
load1.Set(nil, load)
|
||||
|
||||
return updates, err
|
||||
}
|
||||
|
||||
func getLoad1() (float64, error) {
|
||||
data, err := ioutil.ReadFile(procLoad)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return parseLoad(string(data))
|
||||
}
|
||||
|
||||
func parseLoad(data string) (float64, error) {
|
||||
parts := strings.Fields(data)
|
||||
load, err := strconv.ParseFloat(parts[0], 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("Could not parse load '%s': %s", parts[0], err)
|
||||
}
|
||||
return load, nil
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
// +build !nomeminfo
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
const (
|
||||
procMemInfo = "/proc/meminfo"
|
||||
)
|
||||
|
||||
var (
|
||||
memInfoMetrics = map[string]prometheus.Gauge{}
|
||||
)
|
||||
|
||||
type meminfoCollector struct {
|
||||
registry prometheus.Registry
|
||||
config Config
|
||||
}
|
||||
|
||||
func init() {
|
||||
Factories["meminfo"] = NewMeminfoCollector
|
||||
}
|
||||
|
||||
// Takes a config struct and prometheus registry and returns a new Collector exposing
|
||||
// memory stats.
|
||||
func NewMeminfoCollector(config Config, registry prometheus.Registry) (Collector, error) {
|
||||
c := meminfoCollector{
|
||||
config: config,
|
||||
registry: registry,
|
||||
}
|
||||
return &c, nil
|
||||
}
|
||||
|
||||
func (c *meminfoCollector) Update() (updates int, err error) {
|
||||
memInfo, err := getMemInfo()
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Couldn't get meminfo: %s", err)
|
||||
}
|
||||
glog.V(1).Infof("Set node_mem: %#v", memInfo)
|
||||
for k, v := range memInfo {
|
||||
if _, ok := memInfoMetrics[k]; !ok {
|
||||
memInfoMetrics[k] = prometheus.NewGauge()
|
||||
c.registry.Register(
|
||||
"node_memory_"+k,
|
||||
k+" from /proc/meminfo",
|
||||
prometheus.NilLabels,
|
||||
memInfoMetrics[k],
|
||||
)
|
||||
}
|
||||
updates++
|
||||
memInfoMetrics[k].Set(nil, v)
|
||||
}
|
||||
return updates, err
|
||||
}
|
||||
|
||||
func getMemInfo() (map[string]float64, error) {
|
||||
file, err := os.Open(procMemInfo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return parseMemInfo(file)
|
||||
}
|
||||
|
||||
func parseMemInfo(r io.ReadCloser) (map[string]float64, error) {
|
||||
defer r.Close()
|
||||
memInfo := map[string]float64{}
|
||||
scanner := bufio.NewScanner(r)
|
||||
re := regexp.MustCompile("\\((.*)\\)")
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
parts := strings.Fields(string(line))
|
||||
fv, err := strconv.ParseFloat(parts[1], 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Invalid value in meminfo: %s", err)
|
||||
}
|
||||
switch len(parts) {
|
||||
case 2: // no unit
|
||||
case 3: // has unit, we presume kB
|
||||
fv *= 1024
|
||||
default:
|
||||
return nil, fmt.Errorf("Invalid line in %s: %s", procMemInfo, line)
|
||||
}
|
||||
key := parts[0][:len(parts[0])-1] // remove trailing : from key
|
||||
// Active(anon) -> Active_anon
|
||||
key = re.ReplaceAllString(key, "_${1}")
|
||||
memInfo[key] = fv
|
||||
}
|
||||
return memInfo, nil
|
||||
|
||||
}
|
|
@ -1,454 +0,0 @@
|
|||
// +build !nonative
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
const (
|
||||
procLoad = "/proc/loadavg"
|
||||
procMemInfo = "/proc/meminfo"
|
||||
procInterrupts = "/proc/interrupts"
|
||||
procNetDev = "/proc/net/dev"
|
||||
procDiskStats = "/proc/diskstats"
|
||||
)
|
||||
|
||||
type diskStat struct {
|
||||
name string
|
||||
metric prometheus.Metric
|
||||
documentation string
|
||||
}
|
||||
|
||||
var (
|
||||
// Docs from https://www.kernel.org/doc/Documentation/iostats.txt
|
||||
diskStatsMetrics = []diskStat{
|
||||
{"reads_completed", prometheus.NewCounter(), "The total number of reads completed successfully."},
|
||||
{"reads_merged", prometheus.NewCounter(), "The number of reads merged. See https://www.kernel.org/doc/Documentation/iostats.txt"},
|
||||
{"sectors_read", prometheus.NewCounter(), "The total number of sectors read successfully."},
|
||||
{"read_time_ms", prometheus.NewCounter(), "the total number of milliseconds spent by all reads."},
|
||||
{"writes_completed", prometheus.NewCounter(), "The total number of writes completed successfully."},
|
||||
{"writes_merged", prometheus.NewCounter(), "The number of writes merged. See https://www.kernel.org/doc/Documentation/iostats.txt"},
|
||||
{"sectors_written", prometheus.NewCounter(), "The total number of sectors written successfully."},
|
||||
{"write_time_ms", prometheus.NewCounter(), "This is the total number of milliseconds spent by all writes."},
|
||||
{"io_now", prometheus.NewGauge(), "The number of I/Os currently in progress."},
|
||||
{"io_time_ms", prometheus.NewCounter(), "Milliseconds spent doing I/Os."},
|
||||
{"io_time_weighted", prometheus.NewCounter(), "The weighted # of milliseconds spent doing I/Os. See https://www.kernel.org/doc/Documentation/iostats.txt"},
|
||||
}
|
||||
lastSeen = prometheus.NewGauge()
|
||||
load1 = prometheus.NewGauge()
|
||||
attributes = prometheus.NewGauge()
|
||||
memInfoMetrics = map[string]prometheus.Gauge{}
|
||||
netStatsMetrics = map[string]prometheus.Gauge{}
|
||||
interruptsMetric = prometheus.NewCounter()
|
||||
)
|
||||
|
||||
type nativeCollector struct {
|
||||
registry prometheus.Registry
|
||||
name string
|
||||
config Config
|
||||
}
|
||||
|
||||
func init() {
|
||||
Factories = append(Factories, NewNativeCollector)
|
||||
}
|
||||
|
||||
// Takes a config struct and prometheus registry and returns a new Collector exposing
|
||||
// load, seconds since last login and a list of tags as specified by config.
|
||||
func NewNativeCollector(config Config, registry prometheus.Registry) (Collector, error) {
|
||||
c := nativeCollector{
|
||||
name: "native_collector",
|
||||
config: config,
|
||||
registry: registry,
|
||||
}
|
||||
|
||||
registry.Register(
|
||||
"node_load1",
|
||||
"1m load average",
|
||||
prometheus.NilLabels,
|
||||
load1,
|
||||
)
|
||||
|
||||
registry.Register(
|
||||
"node_last_login_time",
|
||||
"The time of the last login.",
|
||||
prometheus.NilLabels,
|
||||
lastSeen,
|
||||
)
|
||||
|
||||
registry.Register(
|
||||
"node_attributes",
|
||||
"node_exporter attributes",
|
||||
prometheus.NilLabels,
|
||||
attributes,
|
||||
)
|
||||
|
||||
registry.Register(
|
||||
"node_interrupts",
|
||||
"Interrupt details from /proc/interrupts",
|
||||
prometheus.NilLabels,
|
||||
interruptsMetric,
|
||||
)
|
||||
|
||||
for _, v := range diskStatsMetrics {
|
||||
registry.Register(
|
||||
"node_disk_"+v.name,
|
||||
v.documentation,
|
||||
prometheus.NilLabels,
|
||||
v.metric,
|
||||
)
|
||||
}
|
||||
return &c, nil
|
||||
}
|
||||
|
||||
func (c *nativeCollector) Name() string { return c.name }
|
||||
|
||||
func (c *nativeCollector) Update() (updates int, err error) {
|
||||
last, err := getLastLoginTime()
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Couldn't get last seen: %s", err)
|
||||
}
|
||||
updates++
|
||||
debug(c.Name(), "Set node_last_login_time: %f", last)
|
||||
lastSeen.Set(nil, last)
|
||||
|
||||
load, err := getLoad1()
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Couldn't get load: %s", err)
|
||||
}
|
||||
updates++
|
||||
debug(c.Name(), "Set node_load: %f", load)
|
||||
load1.Set(nil, load)
|
||||
|
||||
debug(c.Name(), "Set node_attributes{%v}: 1", c.config.Attributes)
|
||||
attributes.Set(c.config.Attributes, 1)
|
||||
|
||||
memInfo, err := getMemInfo()
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Couldn't get meminfo: %s", err)
|
||||
}
|
||||
debug(c.Name(), "Set node_mem: %#v", memInfo)
|
||||
for k, v := range memInfo {
|
||||
if _, ok := memInfoMetrics[k]; !ok {
|
||||
memInfoMetrics[k] = prometheus.NewGauge()
|
||||
c.registry.Register(
|
||||
"node_memory_"+k,
|
||||
k+" from /proc/meminfo",
|
||||
prometheus.NilLabels,
|
||||
memInfoMetrics[k],
|
||||
)
|
||||
}
|
||||
updates++
|
||||
memInfoMetrics[k].Set(nil, v)
|
||||
}
|
||||
|
||||
interrupts, err := getInterrupts()
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Couldn't get interrupts: %s", err)
|
||||
}
|
||||
for name, interrupt := range interrupts {
|
||||
for cpuNo, value := range interrupt.values {
|
||||
updates++
|
||||
fv, err := strconv.ParseFloat(value, 64)
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Invalid value %s in interrupts: %s", value, err)
|
||||
}
|
||||
labels := map[string]string{
|
||||
"CPU": strconv.Itoa(cpuNo),
|
||||
"type": name,
|
||||
"info": interrupt.info,
|
||||
"devices": interrupt.devices,
|
||||
}
|
||||
interruptsMetric.Set(labels, fv)
|
||||
}
|
||||
}
|
||||
|
||||
netStats, err := getNetStats()
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Couldn't get netstats: %s", err)
|
||||
}
|
||||
for direction, devStats := range netStats {
|
||||
for dev, stats := range devStats {
|
||||
for t, value := range stats {
|
||||
key := direction + "_" + t
|
||||
if _, ok := netStatsMetrics[key]; !ok {
|
||||
netStatsMetrics[key] = prometheus.NewGauge()
|
||||
c.registry.Register(
|
||||
"node_network_"+key,
|
||||
t+" "+direction+" from /proc/net/dev",
|
||||
prometheus.NilLabels,
|
||||
netStatsMetrics[key],
|
||||
)
|
||||
}
|
||||
updates++
|
||||
v, err := strconv.ParseFloat(value, 64)
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Invalid value %s in netstats: %s", value, err)
|
||||
}
|
||||
netStatsMetrics[key].Set(map[string]string{"device": dev}, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
diskStats, err := getDiskStats()
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Couldn't get diskstats: %s", err)
|
||||
}
|
||||
for dev, stats := range diskStats {
|
||||
for k, value := range stats {
|
||||
updates++
|
||||
v, err := strconv.ParseFloat(value, 64)
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Invalid value %s in diskstats: %s", value, err)
|
||||
}
|
||||
labels := map[string]string{"device": dev}
|
||||
counter, ok := diskStatsMetrics[k].metric.(prometheus.Counter)
|
||||
if ok {
|
||||
counter.Set(labels, v)
|
||||
} else {
|
||||
var gauge = diskStatsMetrics[k].metric.(prometheus.Gauge)
|
||||
gauge.Set(labels, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
return updates, err
|
||||
}
|
||||
|
||||
func getLoad1() (float64, error) {
|
||||
data, err := ioutil.ReadFile(procLoad)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return parseLoad(string(data))
|
||||
}
|
||||
|
||||
func parseLoad(data string) (float64, error) {
|
||||
parts := strings.Fields(data)
|
||||
load, err := strconv.ParseFloat(parts[0], 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("Could not parse load '%s': %s", parts[0], err)
|
||||
}
|
||||
return load, nil
|
||||
}
|
||||
|
||||
func getLastLoginTime() (float64, error) {
|
||||
who := exec.Command("who", "/var/log/wtmp", "-l", "-u", "-s")
|
||||
|
||||
output, err := who.StdoutPipe()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
err = who.Start()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
reader := bufio.NewReader(output)
|
||||
|
||||
var last time.Time
|
||||
for {
|
||||
line, isPrefix, err := reader.ReadLine()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if isPrefix {
|
||||
return 0, fmt.Errorf("line to long: %s(...)", line)
|
||||
}
|
||||
|
||||
fields := strings.Fields(string(line))
|
||||
lastDate := fields[2]
|
||||
lastTime := fields[3]
|
||||
|
||||
dateParts, err := splitToInts(lastDate, "-") // 2013-04-16
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("Couldn't parse date in line '%s': %s", fields, err)
|
||||
}
|
||||
|
||||
timeParts, err := splitToInts(lastTime, ":") // 11:33
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("Couldn't parse time in line '%s': %s", fields, err)
|
||||
}
|
||||
|
||||
last_t := time.Date(dateParts[0], time.Month(dateParts[1]), dateParts[2], timeParts[0], timeParts[1], 0, 0, time.UTC)
|
||||
last = last_t
|
||||
}
|
||||
err = who.Wait()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return float64(last.Unix()), nil
|
||||
}
|
||||
|
||||
func getMemInfo() (map[string]float64, error) {
|
||||
file, err := os.Open(procMemInfo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return parseMemInfo(file)
|
||||
}
|
||||
|
||||
func parseMemInfo(r io.ReadCloser) (map[string]float64, error) {
|
||||
defer r.Close()
|
||||
memInfo := map[string]float64{}
|
||||
scanner := bufio.NewScanner(r)
|
||||
re := regexp.MustCompile("\\((.*)\\)")
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
parts := strings.Fields(string(line))
|
||||
fv, err := strconv.ParseFloat(parts[1], 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Invalid value in meminfo: %s", err)
|
||||
}
|
||||
switch len(parts) {
|
||||
case 2: // no unit
|
||||
case 3: // has unit, we presume kB
|
||||
fv *= 1024
|
||||
default:
|
||||
return nil, fmt.Errorf("Invalid line in %s: %s", procMemInfo, line)
|
||||
}
|
||||
key := parts[0][:len(parts[0])-1] // remove trailing : from key
|
||||
// Active(anon) -> Active_anon
|
||||
key = re.ReplaceAllString(key, "_${1}")
|
||||
memInfo[key] = fv
|
||||
}
|
||||
return memInfo, nil
|
||||
|
||||
}
|
||||
|
||||
type interrupt struct {
|
||||
info string
|
||||
devices string
|
||||
values []string
|
||||
}
|
||||
|
||||
func getInterrupts() (map[string]interrupt, error) {
|
||||
file, err := os.Open(procInterrupts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return parseInterrupts(file)
|
||||
}
|
||||
|
||||
func parseInterrupts(r io.ReadCloser) (map[string]interrupt, error) {
|
||||
defer r.Close()
|
||||
interrupts := map[string]interrupt{}
|
||||
scanner := bufio.NewScanner(r)
|
||||
if !scanner.Scan() {
|
||||
return nil, fmt.Errorf("%s empty", procInterrupts)
|
||||
}
|
||||
cpuNum := len(strings.Fields(string(scanner.Text()))) // one header per cpu
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
parts := strings.Fields(string(line))
|
||||
if len(parts) < cpuNum+2 { // irq + one column per cpu + details,
|
||||
continue // we ignore ERR and MIS for now
|
||||
}
|
||||
intName := parts[0][:len(parts[0])-1] // remove trailing :
|
||||
intr := interrupt{
|
||||
values: parts[1:cpuNum],
|
||||
}
|
||||
|
||||
if _, err := strconv.Atoi(intName); err == nil { // numeral interrupt
|
||||
intr.info = parts[cpuNum+1]
|
||||
intr.devices = strings.Join(parts[cpuNum+2:], " ")
|
||||
} else {
|
||||
intr.info = strings.Join(parts[cpuNum+1:], " ")
|
||||
}
|
||||
interrupts[intName] = intr
|
||||
}
|
||||
return interrupts, nil
|
||||
}
|
||||
|
||||
func getNetStats() (map[string]map[string]map[string]string, error) {
|
||||
file, err := os.Open(procNetDev)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return parseNetStats(file)
|
||||
}
|
||||
|
||||
func parseNetStats(r io.ReadCloser) (map[string]map[string]map[string]string, error) {
|
||||
defer r.Close()
|
||||
netStats := map[string]map[string]map[string]string{}
|
||||
netStats["transmit"] = map[string]map[string]string{}
|
||||
netStats["receive"] = map[string]map[string]string{}
|
||||
|
||||
scanner := bufio.NewScanner(r)
|
||||
scanner.Scan() // skip first header
|
||||
scanner.Scan()
|
||||
parts := strings.Split(string(scanner.Text()), "|")
|
||||
if len(parts) != 3 { // interface + receive + transmit
|
||||
return nil, fmt.Errorf("Invalid header line in %s: %s",
|
||||
procNetDev, scanner.Text())
|
||||
}
|
||||
header := strings.Fields(parts[1])
|
||||
for scanner.Scan() {
|
||||
parts := strings.Fields(string(scanner.Text()))
|
||||
if len(parts) != 2*len(header)+1 {
|
||||
return nil, fmt.Errorf("Invalid line in %s: %s",
|
||||
procNetDev, scanner.Text())
|
||||
}
|
||||
|
||||
dev := parts[0][:len(parts[0])-1]
|
||||
receive, err := parseNetDevLine(parts[1:len(header)+1], header)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
transmit, err := parseNetDevLine(parts[len(header)+1:], header)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
netStats["transmit"][dev] = transmit
|
||||
netStats["receive"][dev] = receive
|
||||
}
|
||||
return netStats, nil
|
||||
}
|
||||
|
||||
func parseNetDevLine(parts []string, header []string) (map[string]string, error) {
|
||||
devStats := map[string]string{}
|
||||
for i, v := range parts {
|
||||
devStats[header[i]] = v
|
||||
}
|
||||
return devStats, nil
|
||||
}
|
||||
|
||||
func getDiskStats() (map[string]map[int]string, error) {
|
||||
file, err := os.Open(procDiskStats)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return parseDiskStats(file)
|
||||
}
|
||||
|
||||
func parseDiskStats(r io.ReadCloser) (map[string]map[int]string, error) {
|
||||
defer r.Close()
|
||||
diskStats := map[string]map[int]string{}
|
||||
scanner := bufio.NewScanner(r)
|
||||
for scanner.Scan() {
|
||||
parts := strings.Fields(string(scanner.Text()))
|
||||
if len(parts) != len(diskStatsMetrics)+3 { // we strip major, minor and dev
|
||||
return nil, fmt.Errorf("Invalid line in %s: %s", procDiskStats, scanner.Text())
|
||||
}
|
||||
dev := parts[2]
|
||||
diskStats[dev] = map[int]string{}
|
||||
for i, v := range parts[3:] {
|
||||
diskStats[dev][i] = v
|
||||
}
|
||||
}
|
||||
return diskStats, nil
|
||||
}
|
|
@ -0,0 +1,125 @@
|
|||
// +build !nonetDev
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
const (
|
||||
procNetDev = "/proc/net/dev"
|
||||
)
|
||||
|
||||
var (
|
||||
netStatsMetrics = map[string]prometheus.Gauge{}
|
||||
)
|
||||
|
||||
type netDevCollector struct {
|
||||
registry prometheus.Registry
|
||||
config Config
|
||||
}
|
||||
|
||||
func init() {
|
||||
Factories["netdev"] = NewNetDevCollector
|
||||
}
|
||||
|
||||
// Takes a config struct and prometheus registry and returns a new Collector exposing
|
||||
// network device stats.
|
||||
func NewNetDevCollector(config Config, registry prometheus.Registry) (Collector, error) {
|
||||
c := netDevCollector{
|
||||
config: config,
|
||||
registry: registry,
|
||||
}
|
||||
return &c, nil
|
||||
}
|
||||
|
||||
func (c *netDevCollector) Update() (updates int, err error) {
|
||||
netStats, err := getNetStats()
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Couldn't get netstats: %s", err)
|
||||
}
|
||||
for direction, devStats := range netStats {
|
||||
for dev, stats := range devStats {
|
||||
for t, value := range stats {
|
||||
key := direction + "_" + t
|
||||
if _, ok := netStatsMetrics[key]; !ok {
|
||||
netStatsMetrics[key] = prometheus.NewGauge()
|
||||
c.registry.Register(
|
||||
"node_network_"+key,
|
||||
t+" "+direction+" from /proc/net/dev",
|
||||
prometheus.NilLabels,
|
||||
netStatsMetrics[key],
|
||||
)
|
||||
}
|
||||
updates++
|
||||
v, err := strconv.ParseFloat(value, 64)
|
||||
if err != nil {
|
||||
return updates, fmt.Errorf("Invalid value %s in netstats: %s", value, err)
|
||||
}
|
||||
netStatsMetrics[key].Set(map[string]string{"device": dev}, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
return updates, err
|
||||
}
|
||||
|
||||
func getNetStats() (map[string]map[string]map[string]string, error) {
|
||||
file, err := os.Open(procNetDev)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return parseNetStats(file)
|
||||
}
|
||||
|
||||
func parseNetStats(r io.ReadCloser) (map[string]map[string]map[string]string, error) {
|
||||
defer r.Close()
|
||||
netStats := map[string]map[string]map[string]string{}
|
||||
netStats["transmit"] = map[string]map[string]string{}
|
||||
netStats["receive"] = map[string]map[string]string{}
|
||||
|
||||
scanner := bufio.NewScanner(r)
|
||||
scanner.Scan() // skip first header
|
||||
scanner.Scan()
|
||||
parts := strings.Split(string(scanner.Text()), "|")
|
||||
if len(parts) != 3 { // interface + receive + transmit
|
||||
return nil, fmt.Errorf("Invalid header line in %s: %s",
|
||||
procNetDev, scanner.Text())
|
||||
}
|
||||
header := strings.Fields(parts[1])
|
||||
for scanner.Scan() {
|
||||
parts := strings.Fields(string(scanner.Text()))
|
||||
if len(parts) != 2*len(header)+1 {
|
||||
return nil, fmt.Errorf("Invalid line in %s: %s",
|
||||
procNetDev, scanner.Text())
|
||||
}
|
||||
|
||||
dev := parts[0][:len(parts[0])-1]
|
||||
receive, err := parseNetDevLine(parts[1:len(header)+1], header)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
transmit, err := parseNetDevLine(parts[len(header)+1:], header)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
netStats["transmit"][dev] = transmit
|
||||
netStats["receive"][dev] = receive
|
||||
}
|
||||
return netStats, nil
|
||||
}
|
||||
|
||||
func parseNetDevLine(parts []string, header []string) (map[string]string, error) {
|
||||
devStats := map[string]string{}
|
||||
for i, v := range parts {
|
||||
devStats[header[i]] = v
|
||||
}
|
||||
return devStats, nil
|
||||
}
|
|
@ -3,12 +3,12 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"github.com/golang/glog"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/soundcloud/go-runit/runit"
|
||||
)
|
||||
|
||||
type runitCollector struct {
|
||||
name string
|
||||
config Config
|
||||
state prometheus.Gauge
|
||||
stateDesired prometheus.Gauge
|
||||
|
@ -16,12 +16,11 @@ type runitCollector struct {
|
|||
}
|
||||
|
||||
func init() {
|
||||
Factories = append(Factories, NewRunitCollector)
|
||||
Factories["runit"] = NewRunitCollector
|
||||
}
|
||||
|
||||
func NewRunitCollector(config Config, registry prometheus.Registry) (Collector, error) {
|
||||
c := runitCollector{
|
||||
name: "runit_collector",
|
||||
config: config,
|
||||
state: prometheus.NewGauge(),
|
||||
stateDesired: prometheus.NewGauge(),
|
||||
|
@ -52,8 +51,6 @@ func NewRunitCollector(config Config, registry prometheus.Registry) (Collector,
|
|||
return &c, nil
|
||||
}
|
||||
|
||||
func (c *runitCollector) Name() string { return c.name }
|
||||
|
||||
func (c *runitCollector) Update() (updates int, err error) {
|
||||
services, err := runit.GetServices("/etc/service")
|
||||
if err != nil {
|
||||
|
@ -63,11 +60,11 @@ func (c *runitCollector) Update() (updates int, err error) {
|
|||
for _, service := range services {
|
||||
status, err := service.Status()
|
||||
if err != nil {
|
||||
debug(c.Name(), "Couldn't get status for %s: %s, skipping...", service.Name, err)
|
||||
glog.V(1).Infof("Couldn't get status for %s: %s, skipping...", service.Name, err)
|
||||
continue
|
||||
}
|
||||
|
||||
debug(c.Name(), "%s is %d on pid %d for %d seconds", service.Name, status.State, status.Pid, status.Duration)
|
||||
glog.V(1).Infof("%s is %d on pid %d for %d seconds", service.Name, status.State, status.Pid, status.Duration)
|
||||
labels := map[string]string{
|
||||
"service": service.Name,
|
||||
}
|
||||
|
|
|
@ -9,22 +9,25 @@ import (
|
|||
"os"
|
||||
"os/signal"
|
||||
"runtime/pprof"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/exp"
|
||||
"github.com/prometheus/node_exporter/collector"
|
||||
)
|
||||
|
||||
var (
|
||||
configFile = flag.String("config", "node_exporter.conf", "config file.")
|
||||
memProfile = flag.String("memprofile", "", "write memory profile to this file")
|
||||
listeningAddress = flag.String("listen", ":8080", "address to listen on")
|
||||
interval = flag.Duration("interval", 60*time.Second, "refresh interval")
|
||||
scrapeDurations = prometheus.NewDefaultHistogram()
|
||||
metricsUpdated = prometheus.NewGauge()
|
||||
configFile = flag.String("config", "node_exporter.conf", "config file.")
|
||||
memProfile = flag.String("memprofile", "", "write memory profile to this file")
|
||||
listeningAddress = flag.String("listen", ":8080", "address to listen on")
|
||||
enabledCollectors = flag.String("enabledCollectors", "attributes,diskstats,loadavg,meminfo,netdev", "comma seperated list of collectors to use")
|
||||
interval = flag.Duration("interval", 60*time.Second, "refresh interval")
|
||||
scrapeDurations = prometheus.NewDefaultHistogram()
|
||||
metricsUpdated = prometheus.NewGauge()
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
@ -38,9 +41,9 @@ func main() {
|
|||
registry.Register("node_exporter_scrape_duration_seconds", "node_exporter: Duration of a scrape job.", prometheus.NilLabels, scrapeDurations)
|
||||
registry.Register("node_exporter_metrics_updated", "node_exporter: Number of metrics updated.", prometheus.NilLabels, metricsUpdated)
|
||||
|
||||
log.Printf("Registered collectors:")
|
||||
for _, c := range collectors {
|
||||
log.Print(" - ", c.Name())
|
||||
glog.Infof("Enabled collectors:")
|
||||
for n, _ := range collectors {
|
||||
glog.Infof(" - %s", n)
|
||||
}
|
||||
|
||||
sigHup := make(chan os.Signal)
|
||||
|
@ -50,7 +53,7 @@ func main() {
|
|||
|
||||
go serveStatus(registry)
|
||||
|
||||
log.Printf("Starting initial collection")
|
||||
glog.Infof("Starting initial collection")
|
||||
collect(collectors)
|
||||
|
||||
tick := time.Tick(*interval)
|
||||
|
@ -61,17 +64,17 @@ func main() {
|
|||
if err != nil {
|
||||
log.Fatalf("Couldn't load config and collectors: %s", err)
|
||||
}
|
||||
log.Printf("Reloaded collectors and config")
|
||||
glog.Infof("Reloaded collectors and config")
|
||||
tick = time.Tick(*interval)
|
||||
|
||||
case <-tick:
|
||||
log.Printf("Starting new interval")
|
||||
glog.Infof("Starting new interval")
|
||||
collect(collectors)
|
||||
|
||||
case <-sigUsr1:
|
||||
log.Printf("got signal")
|
||||
glog.Infof("got signal")
|
||||
if *memProfile != "" {
|
||||
log.Printf("Writing memory profile to %s", *memProfile)
|
||||
glog.Infof("Writing memory profile to %s", *memProfile)
|
||||
f, err := os.Create(*memProfile)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
|
@ -84,25 +87,29 @@ func main() {
|
|||
|
||||
}
|
||||
|
||||
func loadCollectors(file string, registry prometheus.Registry) ([]collector.Collector, error) {
|
||||
collectors := []collector.Collector{}
|
||||
func loadCollectors(file string, registry prometheus.Registry) (map[string]collector.Collector, error) {
|
||||
collectors := map[string]collector.Collector{}
|
||||
config, err := getConfig(file)
|
||||
if err != nil {
|
||||
log.Fatalf("Couldn't read config %s: %s", file, err)
|
||||
}
|
||||
for _, fn := range collector.Factories {
|
||||
for _, name := range strings.Split(*enabledCollectors, ",") {
|
||||
fn, ok := collector.Factories[name]
|
||||
if !ok {
|
||||
log.Fatalf("Collector '%s' not available", name)
|
||||
}
|
||||
c, err := fn(*config, registry)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
collectors = append(collectors, c)
|
||||
collectors[name] = c
|
||||
}
|
||||
return collectors, nil
|
||||
}
|
||||
|
||||
func getConfig(file string) (*collector.Config, error) {
|
||||
config := &collector.Config{}
|
||||
log.Printf("Reading config %s", *configFile)
|
||||
glog.Infof("Reading config %s", *configFile)
|
||||
bytes, err := ioutil.ReadFile(*configFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -115,31 +122,31 @@ func serveStatus(registry prometheus.Registry) {
|
|||
http.ListenAndServe(*listeningAddress, exp.DefaultCoarseMux)
|
||||
}
|
||||
|
||||
func collect(collectors []collector.Collector) {
|
||||
func collect(collectors map[string]collector.Collector) {
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(len(collectors))
|
||||
for _, c := range collectors {
|
||||
go func(c collector.Collector) {
|
||||
Execute(c)
|
||||
for n, c := range collectors {
|
||||
go func(n string, c collector.Collector) {
|
||||
Execute(n, c)
|
||||
wg.Done()
|
||||
}(c)
|
||||
}(n, c)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func Execute(c collector.Collector) {
|
||||
func Execute(name string, c collector.Collector) {
|
||||
begin := time.Now()
|
||||
updates, err := c.Update()
|
||||
duration := time.Since(begin)
|
||||
|
||||
label := map[string]string{
|
||||
"collector": c.Name(),
|
||||
"collector": name,
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("ERROR: %s failed after %fs: %s", c.Name(), duration.Seconds(), err)
|
||||
glog.Infof("ERROR: %s failed after %fs: %s", name, duration.Seconds(), err)
|
||||
label["result"] = "error"
|
||||
} else {
|
||||
log.Printf("OK: %s success after %fs.", c.Name(), duration.Seconds())
|
||||
glog.Infof("OK: %s success after %fs.", name, duration.Seconds())
|
||||
label["result"] = "success"
|
||||
}
|
||||
scrapeDurations.Add(label, duration.Seconds())
|
||||
|
|
Loading…
Reference in New Issue