Exporter for machine metrics
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

277 lines
6.6 KiB

// Copyright 2023 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !nocpu
// +build !nocpu
package collector
import (
"errors"
"math"
"regexp"
"sort"
"strconv"
"strings"
"unsafe"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/sys/unix"
"howett.net/plist"
)
type clockinfo struct {
hz int32 // clock frequency
tick int32 // micro-seconds per hz tick
spare int32
stathz int32 // statistics clock frequency
profhz int32 // profiling clock frequency
}
type cputime struct {
user float64
nice float64
sys float64
intr float64
idle float64
}
type plistref struct {
pref_plist unsafe.Pointer
pref_len uint64
}
type sysmonValues struct {
CurValue int `plist:"cur-value"`
Description string `plist:"description"`
State string `plist:"state"`
Type string `plist:"type"`
}
type sysmonProperty []sysmonValues
type sysmonProperties map[string]sysmonProperty
func readBytes(ptr unsafe.Pointer, length uint64) []byte {
buf := make([]byte, length-1)
var i uint64
for ; i < length-1; i++ {
buf[i] = *(*byte)(unsafe.Pointer(uintptr(ptr) + uintptr(i)))
}
return buf
}
func ioctl(fd int, nr int64, typ byte, size uintptr, retptr unsafe.Pointer) error {
_, _, errno := unix.Syscall(
unix.SYS_IOCTL,
uintptr(fd),
// Some magicks derived from sys/ioccom.h.
uintptr((0x40000000|0x80000000)|
((int64(size)&(1<<13-1))<<16)|
(int64(typ)<<8)|
nr,
),
uintptr(retptr),
)
if errno != 0 {
return errno
}
return nil
}
func readSysmonProperties() (sysmonProperties, error) {
fd, err := unix.Open(rootfsFilePath("/dev/sysmon"), unix.O_RDONLY, 0777)
if err != nil {
return nil, err
}
defer unix.Close(fd)
var retptr plistref
if err = ioctl(fd, 0, 'E', unsafe.Sizeof(retptr), unsafe.Pointer(&retptr)); err != nil {
return nil, err
}
bytes := readBytes(retptr.pref_plist, retptr.pref_len)
var props sysmonProperties
if _, err = plist.Unmarshal(bytes, &props); err != nil {
return nil, err
}
return props, nil
}
func sortFilterSysmonProperties(props sysmonProperties, prefix string) []string {
var keys []string
for key := range props {
if !strings.HasPrefix(key, prefix) {
continue
}
keys = append(keys, key)
}
sort.Strings(keys)
return keys
}
func convertTemperatures(prop sysmonProperty, res map[int]float64) error {
for _, val := range prop {
if val.State == "invalid" || val.State == "unknown" || val.State == "" {
continue
}
re := regexp.MustCompile("^cpu([0-9]+) temperature$")
core := re.FindStringSubmatch(val.Description)[1]
ncore, _ := strconv.Atoi(core)
temperature := ((float64(uint64(val.CurValue))) / 1000000) - 273.15
res[ncore] = temperature
}
return nil
}
func getCPUTemperatures() (map[int]float64, error) {
res := make(map[int]float64)
// Read all properties
props, err := readSysmonProperties()
if err != nil {
return res, err
}
keys := sortFilterSysmonProperties(props, "coretemp")
for idx, _ := range keys {
convertTemperatures(props[keys[idx]], res)
}
return res, nil
}
func getCPUTimes() ([]cputime, error) {
const states = 5
clockb, err := unix.SysctlRaw("kern.clockrate")
if err != nil {
return nil, err
}
clock := *(*clockinfo)(unsafe.Pointer(&clockb[0]))
var cpufreq float64
if clock.stathz > 0 {
cpufreq = float64(clock.stathz)
} else {
cpufreq = float64(clock.hz)
}
ncpusb, err := unix.SysctlRaw("hw.ncpu")
if err != nil {
return nil, err
}
ncpus := *(*int)(unsafe.Pointer(&ncpusb[0]))
if ncpus < 1 {
return nil, errors.New("Invalid cpu number")
}
var times []float64
for ncpu := 0; ncpu < ncpus; ncpu++ {
cpb, err := unix.SysctlRaw("kern.cp_time", ncpu)
if err != nil {
return nil, err
}
for len(cpb) >= int(unsafe.Sizeof(int(0))) {
t := *(*int)(unsafe.Pointer(&cpb[0]))
times = append(times, float64(t)/cpufreq)
cpb = cpb[unsafe.Sizeof(int(0)):]
}
}
cpus := make([]cputime, len(times)/states)
for i := 0; i < len(times); i += states {
cpu := &cpus[i/states]
cpu.user = times[i]
cpu.nice = times[i+1]
cpu.sys = times[i+2]
cpu.intr = times[i+3]
cpu.idle = times[i+4]
}
return cpus, nil
}
type statCollector struct {
cpu typedDesc
temp typedDesc
logger log.Logger
}
func init() {
registerCollector("cpu", defaultEnabled, NewStatCollector)
}
// NewStatCollector returns a new Collector exposing CPU stats.
func NewStatCollector(logger log.Logger) (Collector, error) {
return &statCollector{
cpu: typedDesc{nodeCPUSecondsDesc, prometheus.CounterValue},
temp: typedDesc{prometheus.NewDesc(
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "temperature_celsius"),
"CPU temperature",
[]string{"cpu"}, nil,
), prometheus.GaugeValue},
logger: logger,
}, nil
}
// Expose CPU stats using sysctl.
func (c *statCollector) Update(ch chan<- prometheus.Metric) error {
// We want time spent per-cpu per CPUSTATE.
// CPUSTATES (number of CPUSTATES) is defined as 5U.
// Order: CP_USER | CP_NICE | CP_SYS | CP_IDLE | CP_INTR
// sysctl kern.cp_time.x provides CPUSTATES long integers:
// (space-separated list of the above variables, where
// x stands for the number of the CPU core)
//
// Each value is a counter incremented at frequency
// kern.clockrate.(stathz | hz)
//
// Look into sys/kern/kern_clock.c for details.
cpuTimes, err := getCPUTimes()
if err != nil {
return err
}
cpuTemperatures, err := getCPUTemperatures()
if err != nil {
return err
}
for cpu, t := range cpuTimes {
lcpu := strconv.Itoa(cpu)
ch <- c.cpu.mustNewConstMetric(float64(t.user), lcpu, "user")
ch <- c.cpu.mustNewConstMetric(float64(t.nice), lcpu, "nice")
ch <- c.cpu.mustNewConstMetric(float64(t.sys), lcpu, "system")
ch <- c.cpu.mustNewConstMetric(float64(t.intr), lcpu, "interrupt")
ch <- c.cpu.mustNewConstMetric(float64(t.idle), lcpu, "idle")
if temp, ok := cpuTemperatures[cpu]; ok {
ch <- c.temp.mustNewConstMetric(temp, lcpu)
} else {
level.Debug(c.logger).Log("msg", "no temperature information for CPU", "cpu", cpu)
ch <- c.temp.mustNewConstMetric(math.NaN(), lcpu)
}
}
return err
}