prometheusmetricshost-metricsmachine-metricsnode-metricsprocfsprometheus-exportersystem-informationsystem-metrics
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
237 lines
8.3 KiB
237 lines
8.3 KiB
// Copyright 2015 The Prometheus Authors |
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
|
// you may not use this file except in compliance with the License. |
|
// You may obtain a copy of the License at |
|
// |
|
// http://www.apache.org/licenses/LICENSE-2.0 |
|
// |
|
// Unless required by applicable law or agreed to in writing, software |
|
// distributed under the License is distributed on an "AS IS" BASIS, |
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
// See the License for the specific language governing permissions and |
|
// limitations under the License. |
|
|
|
// +build !nocpu |
|
|
|
package collector |
|
|
|
import ( |
|
"fmt" |
|
"path/filepath" |
|
"strconv" |
|
|
|
"github.com/prometheus/client_golang/prometheus" |
|
"github.com/prometheus/common/log" |
|
"github.com/prometheus/procfs" |
|
"github.com/prometheus/procfs/sysfs" |
|
) |
|
|
|
type cpuCollector struct { |
|
cpu *prometheus.Desc |
|
cpuGuest *prometheus.Desc |
|
cpuFreq *prometheus.Desc |
|
cpuFreqMin *prometheus.Desc |
|
cpuFreqMax *prometheus.Desc |
|
cpuCoreThrottle *prometheus.Desc |
|
cpuPackageThrottle *prometheus.Desc |
|
} |
|
|
|
func init() { |
|
registerCollector("cpu", defaultEnabled, NewCPUCollector) |
|
} |
|
|
|
// NewCPUCollector returns a new Collector exposing kernel/system statistics. |
|
func NewCPUCollector() (Collector, error) { |
|
return &cpuCollector{ |
|
cpu: nodeCPUSecondsDesc, |
|
cpuGuest: prometheus.NewDesc( |
|
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "guest_seconds_total"), |
|
"Seconds the cpus spent in guests (VMs) for each mode.", |
|
[]string{"cpu", "mode"}, nil, |
|
), |
|
cpuFreq: prometheus.NewDesc( |
|
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "frequency_hertz"), |
|
"Current cpu thread frequency in hertz.", |
|
[]string{"cpu"}, nil, |
|
), |
|
cpuFreqMin: prometheus.NewDesc( |
|
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "frequency_min_hertz"), |
|
"Minimum cpu thread frequency in hertz.", |
|
[]string{"cpu"}, nil, |
|
), |
|
cpuFreqMax: prometheus.NewDesc( |
|
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "frequency_max_hertz"), |
|
"Maximum cpu thread frequency in hertz.", |
|
[]string{"cpu"}, nil, |
|
), |
|
cpuCoreThrottle: prometheus.NewDesc( |
|
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "core_throttles_total"), |
|
"Number of times this cpu core has been throttled.", |
|
[]string{"package", "core"}, nil, |
|
), |
|
cpuPackageThrottle: prometheus.NewDesc( |
|
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "package_throttles_total"), |
|
"Number of times this cpu package has been throttled.", |
|
[]string{"package"}, nil, |
|
), |
|
}, nil |
|
} |
|
|
|
// Update implements Collector and exposes cpu related metrics from /proc/stat and /sys/.../cpu/. |
|
func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error { |
|
if err := c.updateStat(ch); err != nil { |
|
return err |
|
} |
|
if err := c.updateCPUfreq(ch); err != nil { |
|
return err |
|
} |
|
if err := c.updateThermalThrottle(ch); err != nil { |
|
return err |
|
} |
|
return nil |
|
} |
|
|
|
// updateCPUfreq reads /sys/devices/system/cpu/cpu* and expose cpu frequency statistics. |
|
func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error { |
|
fs, err := sysfs.NewFS(*sysPath) |
|
if err != nil { |
|
return fmt.Errorf("failed to open sysfs: %v", err) |
|
} |
|
|
|
cpuFreqs, err := fs.NewSystemCpufreq() |
|
if err != nil { |
|
return err |
|
} |
|
|
|
// sysfs cpufreq values are kHz, thus multiply by 1000 to export base units (hz). |
|
// See https://www.kernel.org/doc/Documentation/cpu-freq/user-guide.txt |
|
for _, stats := range cpuFreqs { |
|
ch <- prometheus.MustNewConstMetric( |
|
c.cpuFreq, |
|
prometheus.GaugeValue, |
|
float64(stats.CurrentFrequency)*1000.0, |
|
stats.Name, |
|
) |
|
ch <- prometheus.MustNewConstMetric( |
|
c.cpuFreqMin, |
|
prometheus.GaugeValue, |
|
float64(stats.MinimumFrequency)*1000.0, |
|
stats.Name, |
|
) |
|
ch <- prometheus.MustNewConstMetric( |
|
c.cpuFreqMax, |
|
prometheus.GaugeValue, |
|
float64(stats.MaximumFrequency)*1000.0, |
|
stats.Name, |
|
) |
|
} |
|
return nil |
|
} |
|
|
|
// updateThermalThrottle reads /sys/devices/system/cpu/cpu* and expose thermal throttle statistics. |
|
func (c *cpuCollector) updateThermalThrottle(ch chan<- prometheus.Metric) error { |
|
cpus, err := filepath.Glob(sysFilePath("devices/system/cpu/cpu[0-9]*")) |
|
if err != nil { |
|
return err |
|
} |
|
|
|
packageThrottles := make(map[uint64]uint64) |
|
packageCoreThrottles := make(map[uint64]map[uint64]uint64) |
|
|
|
// cpu loop |
|
for _, cpu := range cpus { |
|
// See |
|
// https://www.kernel.org/doc/Documentation/x86/topology.txt |
|
// https://www.kernel.org/doc/Documentation/cputopology.txt |
|
// https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-system-cpu |
|
var err error |
|
var physicalPackageID, coreID uint64 |
|
|
|
// topology/physical_package_id |
|
if physicalPackageID, err = readUintFromFile(filepath.Join(cpu, "topology", "physical_package_id")); err != nil { |
|
log.Debugf("CPU %v is missing physical_package_id", cpu) |
|
continue |
|
} |
|
// topology/core_id |
|
if coreID, err = readUintFromFile(filepath.Join(cpu, "topology", "core_id")); err != nil { |
|
log.Debugf("CPU %v is missing core_id", cpu) |
|
continue |
|
} |
|
|
|
// metric node_cpu_core_throttles_total |
|
// |
|
// We process this metric before the package throttles as there |
|
// are cpu+kernel combinations that only present core throttles |
|
// but no package throttles. |
|
// Seen e.g. on an Intel Xeon E5472 system with RHEL 6.9 kernel. |
|
if _, present := packageCoreThrottles[physicalPackageID]; !present { |
|
packageCoreThrottles[physicalPackageID] = make(map[uint64]uint64) |
|
} |
|
if _, present := packageCoreThrottles[physicalPackageID][coreID]; !present { |
|
// Read thermal_throttle/core_throttle_count only once |
|
if coreThrottleCount, err := readUintFromFile(filepath.Join(cpu, "thermal_throttle", "core_throttle_count")); err == nil { |
|
packageCoreThrottles[physicalPackageID][coreID] = coreThrottleCount |
|
} else { |
|
log.Debugf("CPU %v is missing core_throttle_count", cpu) |
|
} |
|
} |
|
|
|
// metric node_cpu_package_throttles_total |
|
if _, present := packageThrottles[physicalPackageID]; !present { |
|
// Read thermal_throttle/package_throttle_count only once |
|
if packageThrottleCount, err := readUintFromFile(filepath.Join(cpu, "thermal_throttle", "package_throttle_count")); err == nil { |
|
packageThrottles[physicalPackageID] = packageThrottleCount |
|
} else { |
|
log.Debugf("CPU %v is missing package_throttle_count", cpu) |
|
} |
|
} |
|
} |
|
|
|
for physicalPackageID, packageThrottleCount := range packageThrottles { |
|
ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, |
|
prometheus.CounterValue, |
|
float64(packageThrottleCount), |
|
strconv.FormatUint(physicalPackageID, 10)) |
|
} |
|
|
|
for physicalPackageID, coreMap := range packageCoreThrottles { |
|
for coreID, coreThrottleCount := range coreMap { |
|
ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, |
|
prometheus.CounterValue, |
|
float64(coreThrottleCount), |
|
strconv.FormatUint(physicalPackageID, 10), |
|
strconv.FormatUint(coreID, 10)) |
|
} |
|
} |
|
return nil |
|
} |
|
|
|
// updateStat reads /proc/stat through procfs and exports cpu related metrics. |
|
func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error { |
|
fs, err := procfs.NewFS(*procPath) |
|
if err != nil { |
|
return fmt.Errorf("failed to open procfs: %v", err) |
|
} |
|
stats, err := fs.NewStat() |
|
if err != nil { |
|
return err |
|
} |
|
|
|
for cpuID, cpuStat := range stats.CPU { |
|
cpuNum := fmt.Sprintf("%d", cpuID) |
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuNum, "user") |
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuNum, "nice") |
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.System, cpuNum, "system") |
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Idle, cpuNum, "idle") |
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Iowait, cpuNum, "iowait") |
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.IRQ, cpuNum, "irq") |
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.SoftIRQ, cpuNum, "softirq") |
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Steal, cpuNum, "steal") |
|
|
|
// Guest CPU is also accounted for in cpuStat.User and cpuStat.Nice, expose these as separate metrics. |
|
ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.Guest, cpuNum, "user") |
|
ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.GuestNice, cpuNum, "nice") |
|
} |
|
|
|
return nil |
|
}
|
|
|