Report collector success/failure and duration per scrape. (#516)
This is in line with best practices, and also saves us 63 timeseries on a default Linux setup.pull/513/head
parent
7426dc9460
commit
a02e469b07
|
@ -457,8 +457,6 @@ node_edac_uncorrectable_errors_total{controller="0"} 5
|
|||
node_entropy_available_bits 1337
|
||||
# HELP node_exporter_build_info A metric with a constant '1' value labeled by version, revision, branch, and goversion from which node_exporter was built.
|
||||
# TYPE node_exporter_build_info gauge
|
||||
# HELP node_exporter_scrape_duration_seconds node_exporter: Duration of a scrape job.
|
||||
# TYPE node_exporter_scrape_duration_seconds summary
|
||||
# HELP node_filefd_allocated File descriptor statistics: allocated.
|
||||
# TYPE node_filefd_allocated gauge
|
||||
node_filefd_allocated 1024
|
||||
|
@ -2072,6 +2070,35 @@ node_procs_blocked 0
|
|||
# HELP node_procs_running Number of processes in runnable state.
|
||||
# TYPE node_procs_running gauge
|
||||
node_procs_running 2
|
||||
# HELP node_scrape_collector_duration_seconds node_exporter: Duration of a collector scrape.
|
||||
# TYPE node_scrape_collector_duration_seconds gauge
|
||||
# HELP node_scrape_collector_success node_exporter: Whether a collector succeeded.
|
||||
# TYPE node_scrape_collector_success gauge
|
||||
node_scrape_collector_success{collector="bonding"} 1
|
||||
node_scrape_collector_success{collector="buddyinfo"} 1
|
||||
node_scrape_collector_success{collector="conntrack"} 1
|
||||
node_scrape_collector_success{collector="diskstats"} 1
|
||||
node_scrape_collector_success{collector="drbd"} 1
|
||||
node_scrape_collector_success{collector="edac"} 1
|
||||
node_scrape_collector_success{collector="entropy"} 1
|
||||
node_scrape_collector_success{collector="filefd"} 1
|
||||
node_scrape_collector_success{collector="hwmon"} 1
|
||||
node_scrape_collector_success{collector="infiniband"} 1
|
||||
node_scrape_collector_success{collector="ksmd"} 1
|
||||
node_scrape_collector_success{collector="loadavg"} 1
|
||||
node_scrape_collector_success{collector="mdadm"} 1
|
||||
node_scrape_collector_success{collector="megacli"} 1
|
||||
node_scrape_collector_success{collector="meminfo"} 1
|
||||
node_scrape_collector_success{collector="meminfo_numa"} 1
|
||||
node_scrape_collector_success{collector="mountstats"} 1
|
||||
node_scrape_collector_success{collector="netdev"} 1
|
||||
node_scrape_collector_success{collector="netstat"} 1
|
||||
node_scrape_collector_success{collector="nfs"} 1
|
||||
node_scrape_collector_success{collector="sockstat"} 1
|
||||
node_scrape_collector_success{collector="stat"} 1
|
||||
node_scrape_collector_success{collector="textfile"} 1
|
||||
node_scrape_collector_success{collector="wifi"} 1
|
||||
node_scrape_collector_success{collector="zfs"} 1
|
||||
# HELP node_sockstat_FRAG_inuse Number of FRAG sockets in state inuse.
|
||||
# TYPE node_sockstat_FRAG_inuse gauge
|
||||
node_sockstat_FRAG_inuse 0
|
||||
|
|
|
@ -35,7 +35,7 @@ cd "$(dirname $0)"
|
|||
port="$((10000 + (RANDOM % 10000)))"
|
||||
tmpdir=$(mktemp -d /tmp/node_exporter_e2e_test.XXXXXX)
|
||||
|
||||
skip_re="^(go_|node_exporter_|process_|node_textfile_mtime)"
|
||||
skip_re="^(go_|node_exporter_build_info|node_scrape_collector_duration_seconds|process_|node_textfile_mtime)"
|
||||
|
||||
keep=0; update=0; verbose=0
|
||||
while getopts 'hkuv' opt
|
||||
|
|
|
@ -36,14 +36,17 @@ const (
|
|||
)
|
||||
|
||||
var (
|
||||
scrapeDurations = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Namespace: collector.Namespace,
|
||||
Subsystem: "exporter",
|
||||
Name: "scrape_duration_seconds",
|
||||
Help: "node_exporter: Duration of a scrape job.",
|
||||
},
|
||||
[]string{"collector", "result"},
|
||||
scrapeDurationDesc = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(collector.Namespace, "scrape", "collector_duration_seconds"),
|
||||
"node_exporter: Duration of a collector scrape.",
|
||||
[]string{"collector"},
|
||||
nil,
|
||||
)
|
||||
scrapeSuccessDesc = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(collector.Namespace, "scrape", "collector_success"),
|
||||
"node_exporter: Whether a collector succeeded.",
|
||||
[]string{"collector"},
|
||||
nil,
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -54,7 +57,8 @@ type NodeCollector struct {
|
|||
|
||||
// Describe implements the prometheus.Collector interface.
|
||||
func (n NodeCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
scrapeDurations.Describe(ch)
|
||||
ch <- scrapeDurationDesc
|
||||
ch <- scrapeSuccessDesc
|
||||
}
|
||||
|
||||
// Collect implements the prometheus.Collector interface.
|
||||
|
@ -68,7 +72,6 @@ func (n NodeCollector) Collect(ch chan<- prometheus.Metric) {
|
|||
}(name, c)
|
||||
}
|
||||
wg.Wait()
|
||||
scrapeDurations.Collect(ch)
|
||||
}
|
||||
|
||||
func filterAvailableCollectors(collectors string) string {
|
||||
|
@ -86,16 +89,17 @@ func execute(name string, c collector.Collector, ch chan<- prometheus.Metric) {
|
|||
begin := time.Now()
|
||||
err := c.Update(ch)
|
||||
duration := time.Since(begin)
|
||||
var result string
|
||||
var success float64
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("ERROR: %s collector failed after %fs: %s", name, duration.Seconds(), err)
|
||||
result = "error"
|
||||
success = 0
|
||||
} else {
|
||||
log.Debugf("OK: %s collector succeeded after %fs.", name, duration.Seconds())
|
||||
result = "success"
|
||||
success = 1
|
||||
}
|
||||
scrapeDurations.WithLabelValues(name, result).Observe(duration.Seconds())
|
||||
ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, duration.Seconds(), name)
|
||||
ch <- prometheus.MustNewConstMetric(scrapeSuccessDesc, prometheus.GaugeValue, success, name)
|
||||
}
|
||||
|
||||
func loadCollectors(list string) (map[string]collector.Collector, error) {
|
||||
|
|
Loading…
Reference in New Issue