Change stats to follow name guidelines.
Diskstats: Split out metrics, keep 'device' label Meminfo: Split out metrics, one each. Convert kB to bytes. Netstats: Split out metrics, keep 'device' label. Interrupts: Stays the same. Not perfect, but should be rarely used. Loadavg: Make it clear it's the 1m loadavg Last seen: Not clear this belongs in the node exporter, as it's more a user thing than a machine thing. Changed to absolute time rather than relative. All stats now have appropriate counter/gauge type.pull/11/head
parent
bf39ec5bf1
commit
6cdaf0e969
|
@ -9,6 +9,7 @@ import (
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
@ -24,24 +25,37 @@ const (
|
||||||
procDiskStats = "/proc/diskstats"
|
procDiskStats = "/proc/diskstats"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
type diskStat struct {
|
||||||
diskStatsHeader = []string{
|
name string
|
||||||
"reads_completed", "reads_merged",
|
metric prometheus.Metric
|
||||||
"sectors_read", "read_time_ms",
|
documentation string
|
||||||
"writes_completed", "writes_merged",
|
|
||||||
"sectors_written", "write_time_ms",
|
|
||||||
"io_now", "io_time_ms", "io_time_weighted",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
// Docs from https://www.kernel.org/doc/Documentation/iostats.txt
|
||||||
|
diskStatsMetrics = []diskStat{
|
||||||
|
{"reads_completed", prometheus.NewCounter(), "# of reads completed"},
|
||||||
|
{"reads_merged", prometheus.NewCounter(), "# of reads merged"},
|
||||||
|
{"sectors_read", prometheus.NewCounter(), "# of sectors read"},
|
||||||
|
{"read_time_ms", prometheus.NewCounter(), "# of milliseconds spent reading"},
|
||||||
|
{"writes_completed", prometheus.NewCounter(), "# of writes completed"},
|
||||||
|
{"writes_merged", prometheus.NewCounter(), "# of writes merges"},
|
||||||
|
{"sectors_written", prometheus.NewCounter(), "# of sectors written"},
|
||||||
|
{"write_time_ms", prometheus.NewCounter(), "# of milliseconds spent writing"},
|
||||||
|
{"io_now", prometheus.NewGauge(), "# of I/Os currently in progress"},
|
||||||
|
{"io_time_ms", prometheus.NewCounter(), "# of milliseconds spent doing I/Os"},
|
||||||
|
{"io_time_weighted", prometheus.NewCounter(), "weighted # of milliseconds spent doing I/Os"},
|
||||||
|
}
|
||||||
|
lastSeen = prometheus.NewGauge()
|
||||||
|
load1 = prometheus.NewGauge()
|
||||||
|
attributes = prometheus.NewGauge()
|
||||||
|
memInfoMetrics = map[string]prometheus.Gauge{}
|
||||||
|
netStatsMetrics = map[string]prometheus.Gauge{}
|
||||||
|
interruptsMetric = prometheus.NewCounter()
|
||||||
)
|
)
|
||||||
|
|
||||||
type nativeCollector struct {
|
type nativeCollector struct {
|
||||||
loadAvg prometheus.Gauge
|
registry prometheus.Registry
|
||||||
attributes prometheus.Gauge
|
|
||||||
lastSeen prometheus.Gauge
|
|
||||||
memInfo prometheus.Gauge
|
|
||||||
interrupts prometheus.Counter
|
|
||||||
netStats prometheus.Counter
|
|
||||||
diskStats prometheus.Counter
|
|
||||||
name string
|
name string
|
||||||
config Config
|
config Config
|
||||||
}
|
}
|
||||||
|
@ -56,87 +70,69 @@ func NewNativeCollector(config Config, registry prometheus.Registry) (Collector,
|
||||||
c := nativeCollector{
|
c := nativeCollector{
|
||||||
name: "native_collector",
|
name: "native_collector",
|
||||||
config: config,
|
config: config,
|
||||||
loadAvg: prometheus.NewGauge(),
|
registry: registry,
|
||||||
attributes: prometheus.NewGauge(),
|
|
||||||
lastSeen: prometheus.NewGauge(),
|
|
||||||
memInfo: prometheus.NewGauge(),
|
|
||||||
interrupts: prometheus.NewCounter(),
|
|
||||||
netStats: prometheus.NewCounter(),
|
|
||||||
diskStats: prometheus.NewCounter(),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
registry.Register(
|
registry.Register(
|
||||||
"node_load",
|
"node_load1",
|
||||||
"node_exporter: system load.",
|
"1m load average",
|
||||||
prometheus.NilLabels,
|
prometheus.NilLabels,
|
||||||
c.loadAvg,
|
load1,
|
||||||
)
|
)
|
||||||
|
|
||||||
registry.Register(
|
registry.Register(
|
||||||
"node_last_login_seconds",
|
"node_last_login_time",
|
||||||
"node_exporter: seconds since last login.",
|
"The time of the last login",
|
||||||
prometheus.NilLabels,
|
prometheus.NilLabels,
|
||||||
c.lastSeen,
|
lastSeen,
|
||||||
)
|
)
|
||||||
|
|
||||||
registry.Register(
|
registry.Register(
|
||||||
"node_attributes",
|
"node_attributes",
|
||||||
"node_exporter: system attributes.",
|
"node_exporter attributes",
|
||||||
prometheus.NilLabels,
|
prometheus.NilLabels,
|
||||||
c.attributes,
|
attributes,
|
||||||
)
|
|
||||||
|
|
||||||
registry.Register(
|
|
||||||
"node_mem",
|
|
||||||
"node_exporter: memory details.",
|
|
||||||
prometheus.NilLabels,
|
|
||||||
c.memInfo,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
registry.Register(
|
registry.Register(
|
||||||
"node_interrupts",
|
"node_interrupts",
|
||||||
"node_exporter: interrupt details.",
|
"Interrupt details from /proc/interrupts",
|
||||||
prometheus.NilLabels,
|
prometheus.NilLabels,
|
||||||
c.interrupts,
|
interruptsMetric,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
for _, v := range diskStatsMetrics {
|
||||||
registry.Register(
|
registry.Register(
|
||||||
"node_net",
|
"node_disk_"+v.name,
|
||||||
"node_exporter: network stats.",
|
v.documentation,
|
||||||
prometheus.NilLabels,
|
prometheus.NilLabels,
|
||||||
c.netStats,
|
v.metric,
|
||||||
)
|
|
||||||
|
|
||||||
registry.Register(
|
|
||||||
"node_disk",
|
|
||||||
"node_exporter: disk stats.",
|
|
||||||
prometheus.NilLabels,
|
|
||||||
c.diskStats,
|
|
||||||
)
|
)
|
||||||
|
}
|
||||||
return &c, nil
|
return &c, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *nativeCollector) Name() string { return c.name }
|
func (c *nativeCollector) Name() string { return c.name }
|
||||||
|
|
||||||
func (c *nativeCollector) Update() (updates int, err error) {
|
func (c *nativeCollector) Update() (updates int, err error) {
|
||||||
last, err := getSecondsSinceLastLogin()
|
last, err := getLastLoginTime()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return updates, fmt.Errorf("Couldn't get last seen: %s", err)
|
return updates, fmt.Errorf("Couldn't get last seen: %s", err)
|
||||||
}
|
}
|
||||||
updates++
|
updates++
|
||||||
debug(c.Name(), "Set node_last_login_seconds: %f", last)
|
debug(c.Name(), "Set node_last_login_time: %f", last)
|
||||||
c.lastSeen.Set(nil, last)
|
lastSeen.Set(nil, last)
|
||||||
|
|
||||||
load, err := getLoad()
|
load, err := getLoad1()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return updates, fmt.Errorf("Couldn't get load: %s", err)
|
return updates, fmt.Errorf("Couldn't get load: %s", err)
|
||||||
}
|
}
|
||||||
updates++
|
updates++
|
||||||
debug(c.Name(), "Set node_load: %f", load)
|
debug(c.Name(), "Set node_load: %f", load)
|
||||||
c.loadAvg.Set(nil, load)
|
load1.Set(nil, load)
|
||||||
|
|
||||||
debug(c.Name(), "Set node_attributes{%v}: 1", c.config.Attributes)
|
debug(c.Name(), "Set node_attributes{%v}: 1", c.config.Attributes)
|
||||||
c.attributes.Set(c.config.Attributes, 1)
|
attributes.Set(c.config.Attributes, 1)
|
||||||
|
|
||||||
memInfo, err := getMemInfo()
|
memInfo, err := getMemInfo()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -144,12 +140,17 @@ func (c *nativeCollector) Update() (updates int, err error) {
|
||||||
}
|
}
|
||||||
debug(c.Name(), "Set node_mem: %#v", memInfo)
|
debug(c.Name(), "Set node_mem: %#v", memInfo)
|
||||||
for k, v := range memInfo {
|
for k, v := range memInfo {
|
||||||
updates++
|
if _, ok := memInfoMetrics[k]; !ok {
|
||||||
fv, err := strconv.ParseFloat(v, 64)
|
memInfoMetrics[k] = prometheus.NewGauge()
|
||||||
if err != nil {
|
c.registry.Register(
|
||||||
return updates, fmt.Errorf("Invalid value in meminfo: %s", err)
|
"node_memory_"+k,
|
||||||
|
k+" from /proc/meminfo",
|
||||||
|
prometheus.NilLabels,
|
||||||
|
memInfoMetrics[k],
|
||||||
|
)
|
||||||
}
|
}
|
||||||
c.memInfo.Set(map[string]string{"type": k}, fv)
|
updates++
|
||||||
|
memInfoMetrics[k].Set(nil, v)
|
||||||
}
|
}
|
||||||
|
|
||||||
interrupts, err := getInterrupts()
|
interrupts, err := getInterrupts()
|
||||||
|
@ -169,7 +170,7 @@ func (c *nativeCollector) Update() (updates int, err error) {
|
||||||
"info": interrupt.info,
|
"info": interrupt.info,
|
||||||
"devices": interrupt.devices,
|
"devices": interrupt.devices,
|
||||||
}
|
}
|
||||||
c.interrupts.Set(labels, fv)
|
interruptsMetric.Set(labels, fv)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,17 +181,22 @@ func (c *nativeCollector) Update() (updates int, err error) {
|
||||||
for direction, devStats := range netStats {
|
for direction, devStats := range netStats {
|
||||||
for dev, stats := range devStats {
|
for dev, stats := range devStats {
|
||||||
for t, value := range stats {
|
for t, value := range stats {
|
||||||
|
key := direction + "_" + t
|
||||||
|
if _, ok := netStatsMetrics[key]; !ok {
|
||||||
|
netStatsMetrics[key] = prometheus.NewGauge()
|
||||||
|
c.registry.Register(
|
||||||
|
"node_network_"+key,
|
||||||
|
t+" "+direction+" from /proc/net/dev",
|
||||||
|
prometheus.NilLabels,
|
||||||
|
netStatsMetrics[key],
|
||||||
|
)
|
||||||
|
}
|
||||||
updates++
|
updates++
|
||||||
v, err := strconv.ParseFloat(value, 64)
|
v, err := strconv.ParseFloat(value, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return updates, fmt.Errorf("Invalid value %s in interrupts: %s", value, err)
|
return updates, fmt.Errorf("Invalid value %s in netstats: %s", value, err)
|
||||||
}
|
}
|
||||||
labels := map[string]string{
|
netStatsMetrics[key].Set(map[string]string{"device": dev}, v)
|
||||||
"device": dev,
|
|
||||||
"direction": direction,
|
|
||||||
"type": t,
|
|
||||||
}
|
|
||||||
c.netStats.Set(labels, v)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -206,14 +212,20 @@ func (c *nativeCollector) Update() (updates int, err error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return updates, fmt.Errorf("Invalid value %s in diskstats: %s", value, err)
|
return updates, fmt.Errorf("Invalid value %s in diskstats: %s", value, err)
|
||||||
}
|
}
|
||||||
labels := map[string]string{"device": dev, "type": k}
|
labels := map[string]string{"device": dev}
|
||||||
c.diskStats.Set(labels, v)
|
counter, ok := diskStatsMetrics[k].metric.(prometheus.Counter)
|
||||||
|
if ok {
|
||||||
|
counter.Set(labels, v)
|
||||||
|
} else {
|
||||||
|
var gauge = diskStatsMetrics[k].metric.(prometheus.Gauge)
|
||||||
|
gauge.Set(labels, v)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return updates, err
|
return updates, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func getLoad() (float64, error) {
|
func getLoad1() (float64, error) {
|
||||||
data, err := ioutil.ReadFile(procLoad)
|
data, err := ioutil.ReadFile(procLoad)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
|
@ -230,7 +242,7 @@ func parseLoad(data string) (float64, error) {
|
||||||
return load, nil
|
return load, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getSecondsSinceLastLogin() (float64, error) {
|
func getLastLoginTime() (float64, error) {
|
||||||
who := exec.Command("who", "/var/log/wtmp", "-l", "-u", "-s")
|
who := exec.Command("who", "/var/log/wtmp", "-l", "-u", "-s")
|
||||||
|
|
||||||
output, err := who.StdoutPipe()
|
output, err := who.StdoutPipe()
|
||||||
|
@ -277,10 +289,10 @@ func getSecondsSinceLastLogin() (float64, error) {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return float64(time.Now().Sub(last).Seconds()), nil
|
return float64(last.Unix()), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getMemInfo() (map[string]string, error) {
|
func getMemInfo() (map[string]float64, error) {
|
||||||
file, err := os.Open(procMemInfo)
|
file, err := os.Open(procMemInfo)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -288,23 +300,29 @@ func getMemInfo() (map[string]string, error) {
|
||||||
return parseMemInfo(file)
|
return parseMemInfo(file)
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseMemInfo(r io.ReadCloser) (map[string]string, error) {
|
func parseMemInfo(r io.ReadCloser) (map[string]float64, error) {
|
||||||
defer r.Close()
|
defer r.Close()
|
||||||
memInfo := map[string]string{}
|
memInfo := map[string]float64{}
|
||||||
scanner := bufio.NewScanner(r)
|
scanner := bufio.NewScanner(r)
|
||||||
|
re := regexp.MustCompile("\\((.*)\\)")
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Text()
|
line := scanner.Text()
|
||||||
parts := strings.Fields(string(line))
|
parts := strings.Fields(string(line))
|
||||||
key := ""
|
fv, err := strconv.ParseFloat(parts[1], 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("Invalid value in meminfo: %s", err)
|
||||||
|
}
|
||||||
switch len(parts) {
|
switch len(parts) {
|
||||||
case 2: // no unit
|
case 2: // no unit
|
||||||
key = parts[0][:len(parts[0])-1] // remove trailing : from key
|
case 3: // has unit, we presume kB
|
||||||
case 3: // has unit
|
fv *= 1024
|
||||||
key = fmt.Sprintf("%s_%s", parts[0][:len(parts[0])-1], parts[2])
|
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("Invalid line in %s: %s", procMemInfo, line)
|
return nil, fmt.Errorf("Invalid line in %s: %s", procMemInfo, line)
|
||||||
}
|
}
|
||||||
memInfo[key] = parts[1]
|
key := parts[0][:len(parts[0])-1] // remove trailing : from key
|
||||||
|
// Active(anon) -> Active_anon
|
||||||
|
key = re.ReplaceAllString(key, "_${1}")
|
||||||
|
memInfo[key] = fv
|
||||||
}
|
}
|
||||||
return memInfo, nil
|
return memInfo, nil
|
||||||
|
|
||||||
|
@ -409,7 +427,7 @@ func parseNetDevLine(parts []string, header []string) (map[string]string, error)
|
||||||
return devStats, nil
|
return devStats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDiskStats() (map[string]map[string]string, error) {
|
func getDiskStats() (map[string]map[int]string, error) {
|
||||||
file, err := os.Open(procDiskStats)
|
file, err := os.Open(procDiskStats)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -417,19 +435,19 @@ func getDiskStats() (map[string]map[string]string, error) {
|
||||||
return parseDiskStats(file)
|
return parseDiskStats(file)
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseDiskStats(r io.ReadCloser) (map[string]map[string]string, error) {
|
func parseDiskStats(r io.ReadCloser) (map[string]map[int]string, error) {
|
||||||
defer r.Close()
|
defer r.Close()
|
||||||
diskStats := map[string]map[string]string{}
|
diskStats := map[string]map[int]string{}
|
||||||
scanner := bufio.NewScanner(r)
|
scanner := bufio.NewScanner(r)
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
parts := strings.Fields(string(scanner.Text()))
|
parts := strings.Fields(string(scanner.Text()))
|
||||||
if len(parts) != len(diskStatsHeader)+3 { // we strip major, minor and dev
|
if len(parts) != len(diskStatsMetrics)+3 { // we strip major, minor and dev
|
||||||
return nil, fmt.Errorf("Invalid line in %s: %s", procDiskStats, scanner.Text())
|
return nil, fmt.Errorf("Invalid line in %s: %s", procDiskStats, scanner.Text())
|
||||||
}
|
}
|
||||||
dev := parts[2]
|
dev := parts[2]
|
||||||
diskStats[dev] = map[string]string{}
|
diskStats[dev] = map[int]string{}
|
||||||
for i, v := range parts[3:] {
|
for i, v := range parts[3:] {
|
||||||
diskStats[dev][diskStatsHeader[i]] = v
|
diskStats[dev][i] = v
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return diskStats, nil
|
return diskStats, nil
|
||||||
|
|
Loading…
Reference in New Issue