prometheusmetricshost-metricsmachine-metricsnode-metricsprocfsprometheus-exportersystem-informationsystem-metrics
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
180 lines
6.6 KiB
180 lines
6.6 KiB
// Copyright 2015 The Prometheus Authors |
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
|
// you may not use this file except in compliance with the License. |
|
// You may obtain a copy of the License at |
|
// |
|
// http://www.apache.org/licenses/LICENSE-2.0 |
|
// |
|
// Unless required by applicable law or agreed to in writing, software |
|
// distributed under the License is distributed on an "AS IS" BASIS, |
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
// See the License for the specific language governing permissions and |
|
// limitations under the License. |
|
|
|
// +build !nocpu |
|
|
|
package collector |
|
|
|
import ( |
|
"fmt" |
|
"path/filepath" |
|
"strconv" |
|
|
|
"github.com/prometheus/client_golang/prometheus" |
|
"github.com/prometheus/common/log" |
|
"github.com/prometheus/procfs" |
|
) |
|
|
|
type cpuCollector struct { |
|
fs procfs.FS |
|
cpu *prometheus.Desc |
|
cpuGuest *prometheus.Desc |
|
cpuCoreThrottle *prometheus.Desc |
|
cpuPackageThrottle *prometheus.Desc |
|
} |
|
|
|
func init() { |
|
registerCollector("cpu", defaultEnabled, NewCPUCollector) |
|
} |
|
|
|
// NewCPUCollector returns a new Collector exposing kernel/system statistics. |
|
func NewCPUCollector() (Collector, error) { |
|
fs, err := procfs.NewFS(*procPath) |
|
if err != nil { |
|
return nil, fmt.Errorf("failed to open procfs: %v", err) |
|
} |
|
return &cpuCollector{ |
|
fs: fs, |
|
cpu: nodeCPUSecondsDesc, |
|
cpuGuest: prometheus.NewDesc( |
|
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "guest_seconds_total"), |
|
"Seconds the cpus spent in guests (VMs) for each mode.", |
|
[]string{"cpu", "mode"}, nil, |
|
), |
|
cpuCoreThrottle: prometheus.NewDesc( |
|
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "core_throttles_total"), |
|
"Number of times this cpu core has been throttled.", |
|
[]string{"package", "core"}, nil, |
|
), |
|
cpuPackageThrottle: prometheus.NewDesc( |
|
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "package_throttles_total"), |
|
"Number of times this cpu package has been throttled.", |
|
[]string{"package"}, nil, |
|
), |
|
}, nil |
|
} |
|
|
|
// Update implements Collector and exposes cpu related metrics from /proc/stat and /sys/.../cpu/. |
|
func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error { |
|
if err := c.updateStat(ch); err != nil { |
|
return err |
|
} |
|
if err := c.updateThermalThrottle(ch); err != nil { |
|
return err |
|
} |
|
return nil |
|
} |
|
|
|
// updateThermalThrottle reads /sys/devices/system/cpu/cpu* and expose thermal throttle statistics. |
|
func (c *cpuCollector) updateThermalThrottle(ch chan<- prometheus.Metric) error { |
|
cpus, err := filepath.Glob(sysFilePath("devices/system/cpu/cpu[0-9]*")) |
|
if err != nil { |
|
return err |
|
} |
|
|
|
packageThrottles := make(map[uint64]uint64) |
|
packageCoreThrottles := make(map[uint64]map[uint64]uint64) |
|
|
|
// cpu loop |
|
for _, cpu := range cpus { |
|
// See |
|
// https://www.kernel.org/doc/Documentation/x86/topology.txt |
|
// https://www.kernel.org/doc/Documentation/cputopology.txt |
|
// https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-system-cpu |
|
var err error |
|
var physicalPackageID, coreID uint64 |
|
|
|
// topology/physical_package_id |
|
if physicalPackageID, err = readUintFromFile(filepath.Join(cpu, "topology", "physical_package_id")); err != nil { |
|
log.Debugf("CPU %v is missing physical_package_id", cpu) |
|
continue |
|
} |
|
// topology/core_id |
|
if coreID, err = readUintFromFile(filepath.Join(cpu, "topology", "core_id")); err != nil { |
|
log.Debugf("CPU %v is missing core_id", cpu) |
|
continue |
|
} |
|
|
|
// metric node_cpu_core_throttles_total |
|
// |
|
// We process this metric before the package throttles as there |
|
// are cpu+kernel combinations that only present core throttles |
|
// but no package throttles. |
|
// Seen e.g. on an Intel Xeon E5472 system with RHEL 6.9 kernel. |
|
if _, present := packageCoreThrottles[physicalPackageID]; !present { |
|
packageCoreThrottles[physicalPackageID] = make(map[uint64]uint64) |
|
} |
|
if _, present := packageCoreThrottles[physicalPackageID][coreID]; !present { |
|
// Read thermal_throttle/core_throttle_count only once |
|
if coreThrottleCount, err := readUintFromFile(filepath.Join(cpu, "thermal_throttle", "core_throttle_count")); err == nil { |
|
packageCoreThrottles[physicalPackageID][coreID] = coreThrottleCount |
|
} else { |
|
log.Debugf("CPU %v is missing core_throttle_count", cpu) |
|
} |
|
} |
|
|
|
// metric node_cpu_package_throttles_total |
|
if _, present := packageThrottles[physicalPackageID]; !present { |
|
// Read thermal_throttle/package_throttle_count only once |
|
if packageThrottleCount, err := readUintFromFile(filepath.Join(cpu, "thermal_throttle", "package_throttle_count")); err == nil { |
|
packageThrottles[physicalPackageID] = packageThrottleCount |
|
} else { |
|
log.Debugf("CPU %v is missing package_throttle_count", cpu) |
|
} |
|
} |
|
} |
|
|
|
for physicalPackageID, packageThrottleCount := range packageThrottles { |
|
ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, |
|
prometheus.CounterValue, |
|
float64(packageThrottleCount), |
|
strconv.FormatUint(physicalPackageID, 10)) |
|
} |
|
|
|
for physicalPackageID, coreMap := range packageCoreThrottles { |
|
for coreID, coreThrottleCount := range coreMap { |
|
ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, |
|
prometheus.CounterValue, |
|
float64(coreThrottleCount), |
|
strconv.FormatUint(physicalPackageID, 10), |
|
strconv.FormatUint(coreID, 10)) |
|
} |
|
} |
|
return nil |
|
} |
|
|
|
// updateStat reads /proc/stat through procfs and exports cpu related metrics. |
|
func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error { |
|
stats, err := c.fs.NewStat() |
|
if err != nil { |
|
return err |
|
} |
|
|
|
for cpuID, cpuStat := range stats.CPU { |
|
cpuNum := fmt.Sprintf("%d", cpuID) |
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuNum, "user") |
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuNum, "nice") |
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.System, cpuNum, "system") |
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Idle, cpuNum, "idle") |
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Iowait, cpuNum, "iowait") |
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.IRQ, cpuNum, "irq") |
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.SoftIRQ, cpuNum, "softirq") |
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Steal, cpuNum, "steal") |
|
|
|
// Guest CPU is also accounted for in cpuStat.User and cpuStat.Nice, expose these as separate metrics. |
|
ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.Guest, cpuNum, "user") |
|
ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.GuestNice, cpuNum, "nice") |
|
} |
|
|
|
return nil |
|
}
|
|
|