Better error handling when collecting CPU temps

Log why we couldn't collect the temperature, and set metric to NaN if
the CPU should support temperature collection but had an error.
pull/397/head
Dominik Honnef 2017-01-05 06:47:13 +01:00
parent 782eaee100
commit d827db8e17
1 changed files with 13 additions and 3 deletions

View File

@ -17,10 +17,12 @@ package collector
import (
"fmt"
"math"
"strconv"
"unsafe"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
"golang.org/x/sys/unix"
)
@ -130,10 +132,18 @@ func (c *statCollector) Update(ch chan<- prometheus.Metric) (err error) {
ch <- c.cpu.mustNewConstMetric(float64(t.idle), lcpu, "idle")
temp, err := unix.SysctlUint32(fmt.Sprintf("dev.cpu.%d.temperature", cpu))
if err == nil {
ftemp := float64(temp-2732) / 10
ch <- c.temp.mustNewConstMetric(ftemp, lcpu)
if err != nil {
if err == unix.ENOENT {
// No temperature information for this CPU
log.Debugf("no temperature information for CPU %d", cpu)
} else {
// Unexpected error
ch <- c.temp.mustNewConstMetric(math.NaN(), lcpu)
log.Errorf("failed to query CPU temperature for CPU %d: %s", cpu, err)
}
continue
}
ch <- c.temp.mustNewConstMetric(float64(temp-2732)/10, lcpu)
}
return err
}