Better error handling when collecting CPU temps
Log why we couldn't collect the temperature, and set metric to NaN if the CPU should support temperature collection but had an error.pull/397/head
parent
782eaee100
commit
d827db8e17
|
@ -17,10 +17,12 @@ package collector
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
"strconv"
|
"strconv"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"github.com/prometheus/common/log"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -130,10 +132,18 @@ func (c *statCollector) Update(ch chan<- prometheus.Metric) (err error) {
|
||||||
ch <- c.cpu.mustNewConstMetric(float64(t.idle), lcpu, "idle")
|
ch <- c.cpu.mustNewConstMetric(float64(t.idle), lcpu, "idle")
|
||||||
|
|
||||||
temp, err := unix.SysctlUint32(fmt.Sprintf("dev.cpu.%d.temperature", cpu))
|
temp, err := unix.SysctlUint32(fmt.Sprintf("dev.cpu.%d.temperature", cpu))
|
||||||
if err == nil {
|
if err != nil {
|
||||||
ftemp := float64(temp-2732) / 10
|
if err == unix.ENOENT {
|
||||||
ch <- c.temp.mustNewConstMetric(ftemp, lcpu)
|
// No temperature information for this CPU
|
||||||
|
log.Debugf("no temperature information for CPU %d", cpu)
|
||||||
|
} else {
|
||||||
|
// Unexpected error
|
||||||
|
ch <- c.temp.mustNewConstMetric(math.NaN(), lcpu)
|
||||||
|
log.Errorf("failed to query CPU temperature for CPU %d: %s", cpu, err)
|
||||||
|
}
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
ch <- c.temp.mustNewConstMetric(float64(temp-2732)/10, lcpu)
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue