Browse Source

Merge pull request #7 from prometheus/add-more-metrics

Refactor node_exporter
pull/9/head
Johannes 'fish' Ziemke 11 years ago
parent
commit
0f7604c3cd
  1. 21
      collector/collector.go
  2. 38
      collector/fixtures/diskstats
  3. 31
      collector/fixtures/interrupts
  4. 1
      collector/fixtures/loadavg
  5. 42
      collector/fixtures/meminfo
  6. 8
      collector/fixtures/net-dev
  7. 0
      collector/ganglia/format.go
  8. 13
      collector/gmond_collector.go
  9. 5
      collector/helper.go
  10. 72
      collector/native_collector.go
  11. 118
      collector/native_collector_test.go
  12. 8
      collector/runit_collector.go
  13. 167
      exporter/exporter.go
  14. 1
      node_exporter.conf
  15. 129
      node_exporter.go

21
collector/collector.go

@ -0,0 +1,21 @@
// Exporter is a prometheus exporter using multiple Factories to collect and export system metrics.
package collector
import (
"github.com/prometheus/client_golang/prometheus"
)
var Factories []func(Config, prometheus.Registry) (Collector, error)
// Interface a collector has to implement.
type Collector interface {
// Get new metrics and expose them via prometheus registry.
Update() (n int, err error)
// Returns the name of the collector.
Name() string
}
type Config struct {
Attributes map[string]string `json:"attributes"`
}

38
collector/fixtures/diskstats

@ -0,0 +1,38 @@
1 0 ram0 0 0 0 0 0 0 0 0 0 0 0
1 1 ram1 0 0 0 0 0 0 0 0 0 0 0
1 2 ram2 0 0 0 0 0 0 0 0 0 0 0
1 3 ram3 0 0 0 0 0 0 0 0 0 0 0
1 4 ram4 0 0 0 0 0 0 0 0 0 0 0
1 5 ram5 0 0 0 0 0 0 0 0 0 0 0
1 6 ram6 0 0 0 0 0 0 0 0 0 0 0
1 7 ram7 0 0 0 0 0 0 0 0 0 0 0
1 8 ram8 0 0 0 0 0 0 0 0 0 0 0
1 9 ram9 0 0 0 0 0 0 0 0 0 0 0
1 10 ram10 0 0 0 0 0 0 0 0 0 0 0
1 11 ram11 0 0 0 0 0 0 0 0 0 0 0
1 12 ram12 0 0 0 0 0 0 0 0 0 0 0
1 13 ram13 0 0 0 0 0 0 0 0 0 0 0
1 14 ram14 0 0 0 0 0 0 0 0 0 0 0
1 15 ram15 0 0 0 0 0 0 0 0 0 0 0
7 0 loop0 0 0 0 0 0 0 0 0 0 0 0
7 1 loop1 0 0 0 0 0 0 0 0 0 0 0
7 2 loop2 0 0 0 0 0 0 0 0 0 0 0
7 3 loop3 0 0 0 0 0 0 0 0 0 0 0
7 4 loop4 0 0 0 0 0 0 0 0 0 0 0
7 5 loop5 0 0 0 0 0 0 0 0 0 0 0
7 6 loop6 0 0 0 0 0 0 0 0 0 0 0
7 7 loop7 0 0 0 0 0 0 0 0 0 0 0
8 0 sda 25354637 34367663 1003346126 18492372 28444756 11134226 505697032 63877960 0 9653880 82621804
8 1 sda1 250 0 2000 36 0 0 0 0 0 36 36
8 2 sda2 246 0 1968 32 0 0 0 0 0 32 32
8 3 sda3 340 13 2818 52 11 8 152 8 0 56 60
8 4 sda4 25353629 34367650 1003337964 18492232 27448755 11134218 505696880 61593380 0 7576432 80332428
252 0 dm-0 59910002 0 1003337218 46229572 39231014 0 505696880 1158557800 0 11325968 1206301256
252 1 dm-1 388 0 3104 84 74 0 592 0 0 76 84
252 2 dm-2 11571 0 308350 6536 153522 0 5093416 122884 0 65400 129416
252 3 dm-3 3870 0 3870 104 0 0 0 0 0 16 104
252 4 dm-4 392 0 1034 28 38 0 137 16 0 24 44
252 5 dm-5 3729 0 84279 924 98918 0 1151688 104684 0 58848 105632
179 0 mmcblk0 192 3 1560 156 0 0 0 0 0 136 156
179 1 mmcblk0p1 17 3 160 24 0 0 0 0 0 24 24
179 2 mmcblk0p2 95 0 760 68 0 0 0 0 0 68 68

31
collector/fixtures/interrupts

@ -0,0 +1,31 @@
CPU0 CPU1 CPU2 CPU3
0: 18 0 0 0 IR-IO-APIC-edge timer
1: 17960 105 28 28 IR-IO-APIC-edge i8042
8: 1 0 0 0 IR-IO-APIC-edge rtc0
9: 398553 2320 824 863 IR-IO-APIC-fasteoi acpi
12: 380847 1021 240 198 IR-IO-APIC-edge i8042
16: 328511 322879 293782 351412 IR-IO-APIC-fasteoi ehci_hcd:usb1, mmc0
23: 1451445 3333499 1092032 2644609 IR-IO-APIC-fasteoi ehci_hcd:usb2
40: 0 0 0 0 DMAR_MSI-edge dmar0
41: 0 0 0 0 DMAR_MSI-edge dmar1
42: 378324 1734637 440240 2434308 IR-PCI-MSI-edge xhci_hcd
43: 7434032 8092205 6478877 7492252 IR-PCI-MSI-edge ahci
44: 140636 226313 347 633 IR-PCI-MSI-edge i915
45: 4 22 0 0 IR-PCI-MSI-edge mei_me
46: 43078464 130 460171 290 IR-PCI-MSI-edge iwlwifi
47: 350 224 0 0 IR-PCI-MSI-edge snd_hda_intel
NMI: 47 5031 6211 4968 Non-maskable interrupts
LOC: 174326351 135776678 168393257 130980079 Local timer interrupts
SPU: 0 0 0 0 Spurious interrupts
PMI: 47 5031 6211 4968 Performance monitoring interrupts
IWI: 1509379 2411776 1512975 2428828 IRQ work interrupts
RTR: 0 0 0 0 APIC ICR read retries
RES: 10847134 9111507 15999335 7457260 Rescheduling interrupts
CAL: 148554 157441 142912 155528 Function call interrupts
TLB: 10460334 9918429 10494258 10345022 TLB shootdowns
TRM: 0 0 0 0 Thermal event interrupts
THR: 0 0 0 0 Threshold APIC interrupts
MCE: 0 0 0 0 Machine check exceptions
MCP: 2406 2399 2399 2399 Machine check polls
ERR: 0
MIS: 0

1
collector/fixtures/loadavg

@ -0,0 +1 @@
0.21 0.37 0.39 1/719 19737

42
collector/fixtures/meminfo

@ -0,0 +1,42 @@
MemTotal: 3742148 kB
MemFree: 225472 kB
Buffers: 22040 kB
Cached: 930888 kB
SwapCached: 192504 kB
Active: 2233416 kB
Inactive: 1028728 kB
Active(anon): 2020004 kB
Inactive(anon): 883052 kB
Active(file): 213412 kB
Inactive(file): 145676 kB
Unevictable: 32 kB
Mlocked: 32 kB
SwapTotal: 4194300 kB
SwapFree: 3155360 kB
Dirty: 1052 kB
Writeback: 0 kB
AnonPages: 2244172 kB
Mapped: 239220 kB
Shmem: 593840 kB
Slab: 98932 kB
SReclaimable: 44772 kB
SUnreclaim: 54160 kB
KernelStack: 5800 kB
PageTables: 75212 kB
NFS_Unstable: 0 kB
Bounce: 0 kB
WritebackTmp: 0 kB
CommitLimit: 6065372 kB
Committed_AS: 7835436 kB
VmallocTotal: 34359738367 kB
VmallocUsed: 352840 kB
VmallocChunk: 34359338876 kB
HardwareCorrupted: 0 kB
AnonHugePages: 0 kB
HugePages_Total: 0
HugePages_Free: 0
HugePages_Rsvd: 0
HugePages_Surp: 0
Hugepagesize: 2048 kB
DirectMap4k: 185660 kB
DirectMap2M: 3698688 kB

8
collector/fixtures/net-dev

@ -0,0 +1,8 @@
Inter-| Receive | Transmit
face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed
tun0: 1888 24 0 0 0 0 0 0 67120 934 0 0 0 0 0 0
veth4B09XN: 648 8 0 0 0 0 0 0 1943284 10640 0 0 0 0 0 0
lo: 435303245 1832522 0 0 0 0 0 0 435303245 1832522 0 0 0 0 0 0
lxcbr0: 0 0 0 0 0 0 0 0 2630299 28339 0 0 0 0 0 0
wlan0: 10437182923 13899359 0 0 0 0 0 0 2851649360 11726200 0 0 0 0 0 0
docker0: 64910168 1065585 0 0 0 0 0 0 2681662018 1929779 0 0 0 0 0 0

0
exporter/ganglia/format.go → collector/ganglia/format.go

13
exporter/gmond_collector.go → collector/gmond_collector.go

@ -1,17 +1,18 @@
// +build ganglia
package exporter
package collector
import (
"bufio"
"encoding/xml"
"fmt"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/node_exporter/exporter/ganglia"
"io"
"net"
"regexp"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/node_exporter/collector/ganglia"
)
const (
@ -23,18 +24,18 @@ const (
type gmondCollector struct {
name string
Metrics map[string]prometheus.Gauge
config config
config Config
registry prometheus.Registry
}
func init() {
collectorFactories = append(collectorFactories, NewGmondCollector)
Factories = append(Factories, NewGmondCollector)
}
var illegalCharsRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
// Takes a config struct and prometheus registry and returns a new Collector scraping ganglia.
func NewGmondCollector(config config, registry prometheus.Registry) (Collector, error) {
func NewGmondCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := gmondCollector{
name: "gmond_collector",
config: config,

5
exporter/helper.go → collector/helper.go

@ -1,12 +1,15 @@
package exporter
package collector
import (
"flag"
"fmt"
"log"
"strconv"
"strings"
)
var verbose = flag.Bool("verbose", false, "Verbose output.")
func debug(name string, format string, a ...interface{}) {
if *verbose {
f := fmt.Sprintf("%s: %s", name, format)

72
exporter/native_collector.go → collector/native_collector.go

@ -1,11 +1,10 @@
// +build !nonative
package exporter
package collector
import (
"bufio"
"fmt"
"github.com/prometheus/client_golang/prometheus"
"io"
"io/ioutil"
"os"
@ -13,6 +12,8 @@ import (
"strconv"
"strings"
"time"
"github.com/prometheus/client_golang/prometheus"
)
const (
@ -42,16 +43,16 @@ type nativeCollector struct {
netStats prometheus.Counter
diskStats prometheus.Counter
name string
config config
config Config
}
func init() {
collectorFactories = append(collectorFactories, NewNativeCollector)
Factories = append(Factories, NewNativeCollector)
}
// Takes a config struct and prometheus registry and returns a new Collector exposing
// load, seconds since last login and a list of tags as specified by config.
func NewNativeCollector(config config, registry prometheus.Registry) (Collector, error) {
func NewNativeCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := nativeCollector{
name: "native_collector",
config: config,
@ -160,7 +161,7 @@ func (c *nativeCollector) Update() (updates int, err error) {
updates++
fv, err := strconv.ParseFloat(value, 64)
if err != nil {
return updates, fmt.Errorf("Invalid value in interrupts: %s", fv, err)
return updates, fmt.Errorf("Invalid value %s in interrupts: %s", value, err)
}
labels := map[string]string{
"CPU": strconv.Itoa(cpuNo),
@ -217,7 +218,11 @@ func getLoad() (float64, error) {
if err != nil {
return 0, err
}
parts := strings.Fields(string(data))
return parseLoad(string(data))
}
func parseLoad(data string) (float64, error) {
parts := strings.Fields(data)
load, err := strconv.ParseFloat(parts[0], 64)
if err != nil {
return 0, fmt.Errorf("Could not parse load '%s': %s", parts[0], err)
@ -276,13 +281,17 @@ func getSecondsSinceLastLogin() (float64, error) {
}
func getMemInfo() (map[string]string, error) {
memInfo := map[string]string{}
fh, err := os.Open(procMemInfo)
file, err := os.Open(procMemInfo)
if err != nil {
return nil, err
}
defer fh.Close()
scanner := bufio.NewScanner(fh)
return parseMemInfo(file)
}
func parseMemInfo(r io.ReadCloser) (map[string]string, error) {
defer r.Close()
memInfo := map[string]string{}
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := scanner.Text()
parts := strings.Fields(string(line))
@ -308,13 +317,17 @@ type interrupt struct {
}
func getInterrupts() (map[string]interrupt, error) {
interrupts := map[string]interrupt{}
fh, err := os.Open(procInterrupts)
file, err := os.Open(procInterrupts)
if err != nil {
return nil, err
}
defer fh.Close()
scanner := bufio.NewScanner(fh)
return parseInterrupts(file)
}
func parseInterrupts(r io.ReadCloser) (map[string]interrupt, error) {
defer r.Close()
interrupts := map[string]interrupt{}
scanner := bufio.NewScanner(r)
if !scanner.Scan() {
return nil, fmt.Errorf("%s empty", procInterrupts)
}
@ -343,15 +356,20 @@ func getInterrupts() (map[string]interrupt, error) {
}
func getNetStats() (map[string]map[string]map[string]string, error) {
netStats := map[string]map[string]map[string]string{}
netStats["transmit"] = map[string]map[string]string{}
netStats["receive"] = map[string]map[string]string{}
fh, err := os.Open(procNetDev)
file, err := os.Open(procNetDev)
if err != nil {
return nil, err
}
defer fh.Close()
scanner := bufio.NewScanner(fh)
return parseNetStats(file)
}
func parseNetStats(r io.ReadCloser) (map[string]map[string]map[string]string, error) {
defer r.Close()
netStats := map[string]map[string]map[string]string{}
netStats["transmit"] = map[string]map[string]string{}
netStats["receive"] = map[string]map[string]string{}
scanner := bufio.NewScanner(r)
scanner.Scan() // skip first header
scanner.Scan()
parts := strings.Split(string(scanner.Text()), "|")
@ -392,13 +410,17 @@ func parseNetDevLine(parts []string, header []string) (map[string]string, error)
}
func getDiskStats() (map[string]map[string]string, error) {
diskStats := map[string]map[string]string{}
fh, err := os.Open(procDiskStats)
file, err := os.Open(procDiskStats)
if err != nil {
return nil, err
}
defer fh.Close()
scanner := bufio.NewScanner(fh)
return parseDiskStats(file)
}
func parseDiskStats(r io.ReadCloser) (map[string]map[string]string, error) {
defer r.Close()
diskStats := map[string]map[string]string{}
scanner := bufio.NewScanner(r)
for scanner.Scan() {
parts := strings.Fields(string(scanner.Text()))
if len(parts) != len(diskStatsHeader)+3 { // we strip major, minor and dev

118
collector/native_collector_test.go

@ -0,0 +1,118 @@
package collector
import (
"io/ioutil"
"os"
"testing"
)
const (
loadExpected = 0.21
memTotalExpected = "3742148"
memDirectMap2MExpected = "3698688"
interruptsNmi1Expected = "5031"
netReceiveWlan0Bytes = "10437182923"
netTransmitTun0Packages = "934"
diskSda4ReadsCompleted = "25353629"
diskMmcIoTimeWeighted = "68"
testProcLoad = "fixtures/loadavg"
testProcMemInfo = "fixtures/meminfo"
testProcInterrupts = "fixtures/interrupts"
testProcNetDev = "fixtures/net-dev"
testProcDiskStats = "fixtures/diskstats"
)
func TestLoad(t *testing.T) {
data, err := ioutil.ReadFile(testProcLoad)
if err != nil {
t.Fatal(err)
}
load, err := parseLoad(string(data))
if err != nil {
t.Fatal(err)
}
if load != loadExpected {
t.Fatalf("Unexpected load: %f != %f", load, loadExpected)
}
}
func TestMemInfo(t *testing.T) {
file, err := os.Open(testProcMemInfo)
if err != nil {
t.Fatal(err)
}
memInfo, err := parseMemInfo(file)
if err != nil {
t.Fatal(err)
}
if memInfo["MemTotal_kB"] != memTotalExpected {
t.Fatalf("Unexpected memory: %s != %s", memInfo["MemTotal_kB"], memTotalExpected)
}
if memInfo["DirectMap2M_kB"] != memDirectMap2MExpected {
t.Fatalf("Unexpected memory: %s != %s", memInfo["MemTotal_kB"], memTotalExpected)
}
}
func TestInterrupts(t *testing.T) {
file, err := os.Open(testProcInterrupts)
if err != nil {
t.Fatal(err)
}
interrupts, err := parseInterrupts(file)
if err != nil {
t.Fatal(err)
}
if interrupts["NMI"].values[1] != interruptsNmi1Expected {
t.Fatalf("Unexpected interrupts: %s != %s", interrupts["NMI"].values[1],
interruptsNmi1Expected)
}
}
func TestNetStats(t *testing.T) {
file, err := os.Open(testProcNetDev)
if err != nil {
t.Fatal(err)
}
netStats, err := parseNetStats(file)
if err != nil {
t.Fatal(err)
}
if netStats["receive"]["wlan0"]["bytes"] != netReceiveWlan0Bytes {
t.Fatalf("Unexpected netstats: %s != %s", netStats["receive"]["wlan0"]["bytes"],
netReceiveWlan0Bytes)
}
if netStats["transmit"]["tun0"]["packets"] != netTransmitTun0Packages {
t.Fatalf("Unexpected netstats: %s != %s", netStats["transmit"]["tun0"]["packets"],
netTransmitTun0Packages)
}
}
func TestDiskStats(t *testing.T) {
file, err := os.Open(testProcDiskStats)
if err != nil {
t.Fatal(err)
}
diskStats, err := parseDiskStats(file)
if err != nil {
t.Fatal(err)
}
if diskStats["sda4"]["reads_completed"] != diskSda4ReadsCompleted {
t.Fatalf("Unexpected diskstats: %s != %s", diskStats["sda4"]["reads_completed"],
diskSda4ReadsCompleted)
}
if diskStats["mmcblk0p2"]["io_time_weighted"] != diskMmcIoTimeWeighted {
t.Fatalf("Unexpected diskstats: %s != %s",
diskStats["mmcblk0p2"]["io_time_weighted"], diskMmcIoTimeWeighted)
}
}

8
exporter/runit_collector.go → collector/runit_collector.go

@ -1,6 +1,6 @@
// +build runit
package exporter
package collector
import (
"github.com/prometheus/client_golang/prometheus"
@ -9,17 +9,17 @@ import (
type runitCollector struct {
name string
config config
config Config
state prometheus.Gauge
stateDesired prometheus.Gauge
stateNormal prometheus.Gauge
}
func init() {
collectorFactories = append(collectorFactories, NewRunitCollector)
Factories = append(Factories, NewRunitCollector)
}
func NewRunitCollector(config config, registry prometheus.Registry) (Collector, error) {
func NewRunitCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := runitCollector{
name: "runit_collector",
config: config,

167
exporter/exporter.go

@ -1,167 +0,0 @@
// Exporter is a prometheus exporter using multiple collectorFactories to collect and export system metrics.
package exporter
import (
"encoding/json"
"flag"
"fmt"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/exp"
"io/ioutil"
"log"
"net/http"
"os"
"os/signal"
"runtime/pprof"
"sync"
"syscall"
"time"
)
var verbose = flag.Bool("verbose", false, "Verbose output.")
var collectorFactories []func(config, prometheus.Registry) (Collector, error)
// Interface a collector has to implement.
type Collector interface {
// Get new metrics and expose them via prometheus registry.
Update() (n int, err error)
// Returns the name of the collector
Name() string
}
type config struct {
Attributes map[string]string `json:"attributes"`
ListeningAddress string `json:"listeningAddress"`
ScrapeInterval int `json:"scrapeInterval"`
}
func (e *exporter) loadConfig() (err error) {
log.Printf("Reading config %s", e.configFile)
bytes, err := ioutil.ReadFile(e.configFile)
if err != nil {
return
}
return json.Unmarshal(bytes, &e.config) // Make sure this is safe
}
type exporter struct {
configFile string
listeningAddress string
scrapeInterval time.Duration
scrapeDurations prometheus.Histogram
metricsUpdated prometheus.Gauge
config config
registry prometheus.Registry
Collectors []Collector
MemProfile string
}
// New takes the path to a config file and returns an exporter instance
func New(configFile string) (e exporter, err error) {
registry := prometheus.NewRegistry()
e = exporter{
configFile: configFile,
scrapeDurations: prometheus.NewDefaultHistogram(),
metricsUpdated: prometheus.NewGauge(),
listeningAddress: ":8080",
scrapeInterval: 60 * time.Second,
registry: registry,
}
err = e.loadConfig()
if err != nil {
return e, fmt.Errorf("Couldn't read config: %s", err)
}
for _, fn := range collectorFactories {
c, err := fn(e.config, e.registry)
if err != nil {
return e, err
}
e.Collectors = append(e.Collectors, c)
}
if e.config.ListeningAddress != "" {
e.listeningAddress = e.config.ListeningAddress
}
if e.config.ScrapeInterval != 0 {
e.scrapeInterval = time.Duration(e.config.ScrapeInterval) * time.Second
}
registry.Register("node_exporter_scrape_duration_seconds", "node_exporter: Duration of a scrape job.", prometheus.NilLabels, e.scrapeDurations)
registry.Register("node_exporter_metrics_updated", "node_exporter: Number of metrics updated.", prometheus.NilLabels, e.metricsUpdated)
return e, nil
}
func (e *exporter) serveStatus() {
exp.Handle(prometheus.ExpositionResource, e.registry.Handler())
http.ListenAndServe(e.listeningAddress, exp.DefaultCoarseMux)
}
func (e *exporter) Execute(c Collector) {
begin := time.Now()
updates, err := c.Update()
duration := time.Since(begin)
label := map[string]string{
"collector": c.Name(),
}
if err != nil {
log.Printf("ERROR: %s failed after %fs: %s", c.Name(), duration.Seconds(), err)
label["result"] = "error"
} else {
log.Printf("OK: %s success after %fs.", c.Name(), duration.Seconds())
label["result"] = "success"
}
e.scrapeDurations.Add(label, duration.Seconds())
e.metricsUpdated.Set(label, float64(updates))
}
func (e *exporter) Loop() {
sigHup := make(chan os.Signal)
sigUsr1 := make(chan os.Signal)
signal.Notify(sigHup, syscall.SIGHUP)
signal.Notify(sigUsr1, syscall.SIGUSR1)
go e.serveStatus()
tick := time.Tick(e.scrapeInterval)
for {
select {
case <-sigHup:
err := e.loadConfig()
if err != nil {
log.Printf("Couldn't reload config: %s", err)
continue
}
log.Printf("Got new config")
tick = time.Tick(e.scrapeInterval)
case <-tick:
log.Printf("Starting new scrape interval")
wg := sync.WaitGroup{}
wg.Add(len(e.Collectors))
for _, c := range e.Collectors {
go func(c Collector) {
e.Execute(c)
wg.Done()
}(c)
}
wg.Wait()
case <-sigUsr1:
log.Printf("got signal")
if e.MemProfile != "" {
log.Printf("Writing memory profile to %s", e.MemProfile)
f, err := os.Create(e.MemProfile)
if err != nil {
log.Fatal(err)
}
pprof.WriteHeapProfile(f)
f.Close()
}
}
}
}

1
node_exporter.conf

@ -1,5 +1,4 @@
{
"scrapeInterval": 10,
"attributes" : {
"web-server" : "1",
"zone" : "a",

129
node_exporter.go

@ -1,27 +1,140 @@
package main
import (
"encoding/json"
"flag"
"io/ioutil"
"log"
"net/http"
"os"
"os/signal"
"runtime/pprof"
"sync"
"syscall"
"time"
"github.com/prometheus/node_exporter/exporter"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/exp"
"github.com/prometheus/node_exporter/collector"
)
var (
configFile = flag.String("config", "node_exporter.conf", "config file.")
memprofile = flag.String("memprofile", "", "write memory profile to this file")
configFile = flag.String("config", "node_exporter.conf", "config file.")
memProfile = flag.String("memprofile", "", "write memory profile to this file")
listeningAddress = flag.String("listen", ":8080", "address to listen on")
interval = flag.Duration("interval", 60*time.Second, "refresh interval")
scrapeDurations = prometheus.NewDefaultHistogram()
metricsUpdated = prometheus.NewGauge()
)
func main() {
flag.Parse()
exporter, err := exporter.New(*configFile)
registry := prometheus.NewRegistry()
collectors, err := loadCollectors(*configFile, registry)
if err != nil {
log.Fatalf("Couldn't instantiate exporter: %s", err)
log.Fatalf("Couldn't load config and collectors: %s", err)
}
registry.Register("node_exporter_scrape_duration_seconds", "node_exporter: Duration of a scrape job.", prometheus.NilLabels, scrapeDurations)
registry.Register("node_exporter_metrics_updated", "node_exporter: Number of metrics updated.", prometheus.NilLabels, metricsUpdated)
log.Printf("Registered collectors:")
for _, c := range exporter.Collectors {
for _, c := range collectors {
log.Print(" - ", c.Name())
}
exporter.Loop()
sigHup := make(chan os.Signal)
sigUsr1 := make(chan os.Signal)
signal.Notify(sigHup, syscall.SIGHUP)
signal.Notify(sigUsr1, syscall.SIGUSR1)
go serveStatus(registry)
tick := time.Tick(*interval)
for {
select {
case <-sigHup:
collectors, err = loadCollectors(*configFile, registry)
if err != nil {
log.Fatalf("Couldn't load config and collectors: %s", err)
}
log.Printf("Reloaded collectors and config")
tick = time.Tick(*interval)
case <-tick:
log.Printf("Starting new interval")
wg := sync.WaitGroup{}
wg.Add(len(collectors))
for _, c := range collectors {
go func(c collector.Collector) {
Execute(c)
wg.Done()
}(c)
}
wg.Wait()
case <-sigUsr1:
log.Printf("got signal")
if *memProfile != "" {
log.Printf("Writing memory profile to %s", *memProfile)
f, err := os.Create(*memProfile)
if err != nil {
log.Fatal(err)
}
pprof.WriteHeapProfile(f)
f.Close()
}
}
}
}
func loadCollectors(file string, registry prometheus.Registry) ([]collector.Collector, error) {
collectors := []collector.Collector{}
config, err := getConfig(file)
if err != nil {
log.Fatalf("Couldn't read config %s: %s", file, err)
}
for _, fn := range collector.Factories {
c, err := fn(*config, registry)
if err != nil {
return nil, err
}
collectors = append(collectors, c)
}
return collectors, nil
}
func getConfig(file string) (*collector.Config, error) {
config := &collector.Config{}
log.Printf("Reading config %s", *configFile)
bytes, err := ioutil.ReadFile(*configFile)
if err != nil {
return nil, err
}
return config, json.Unmarshal(bytes, &config)
}
func serveStatus(registry prometheus.Registry) {
exp.Handle(prometheus.ExpositionResource, registry.Handler())
http.ListenAndServe(*listeningAddress, exp.DefaultCoarseMux)
}
func Execute(c collector.Collector) {
begin := time.Now()
updates, err := c.Update()
duration := time.Since(begin)
label := map[string]string{
"collector": c.Name(),
}
if err != nil {
log.Printf("ERROR: %s failed after %fs: %s", c.Name(), duration.Seconds(), err)
label["result"] = "error"
} else {
log.Printf("OK: %s success after %fs.", c.Name(), duration.Seconds())
label["result"] = "success"
}
scrapeDurations.Add(label, duration.Seconds())
metricsUpdated.Set(label, float64(updates))
}

Loading…
Cancel
Save