k3s/vendor/github.com/google/cadvisor/metrics/prometheus_machine.go

350 lines
12 KiB
Go

// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"strconv"
"github.com/google/cadvisor/container"
info "github.com/google/cadvisor/info/v1"
"github.com/prometheus/client_golang/prometheus"
"k8s.io/klog/v2"
)
var baseLabelsNames = []string{"machine_id", "system_uuid", "boot_id"}
const (
prometheusModeLabelName = "mode"
prometheusTypeLabelName = "type"
prometheusLevelLabelName = "level"
prometheusNodeLabelName = "node_id"
prometheusCoreLabelName = "core_id"
prometheusThreadLabelName = "thread_id"
prometheusPageSizeLabelName = "page_size"
nvmMemoryMode = "memory_mode"
nvmAppDirectMode = "app_direct_mode"
memoryByTypeDimmCountKey = "DimmCount"
memoryByTypeDimmCapacityKey = "Capacity"
emptyLabelValue = ""
)
// machineMetric describes a multi-dimensional metric used for exposing a
// certain type of machine statistic.
type machineMetric struct {
name string
help string
valueType prometheus.ValueType
extraLabels []string
condition func(machineInfo *info.MachineInfo) bool
getValues func(machineInfo *info.MachineInfo) metricValues
}
func (metric *machineMetric) desc(baseLabels []string) *prometheus.Desc {
return prometheus.NewDesc(metric.name, metric.help, append(baseLabels, metric.extraLabels...), nil)
}
// PrometheusMachineCollector implements prometheus.Collector.
type PrometheusMachineCollector struct {
infoProvider infoProvider
errors prometheus.Gauge
machineMetrics []machineMetric
}
// NewPrometheusMachineCollector returns a new PrometheusCollector.
func NewPrometheusMachineCollector(i infoProvider, includedMetrics container.MetricSet) *PrometheusMachineCollector {
c := &PrometheusMachineCollector{
infoProvider: i,
errors: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "machine",
Name: "scrape_error",
Help: "1 if there was an error while getting machine metrics, 0 otherwise.",
}),
machineMetrics: []machineMetric{
{
name: "machine_cpu_physical_cores",
help: "Number of physical CPU cores.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.NumPhysicalCores), timestamp: machineInfo.Timestamp}}
},
},
{
name: "machine_cpu_cores",
help: "Number of logical CPU cores.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.NumCores), timestamp: machineInfo.Timestamp}}
},
},
{
name: "machine_cpu_sockets",
help: "Number of CPU sockets.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.NumSockets), timestamp: machineInfo.Timestamp}}
},
},
{
name: "machine_memory_bytes",
help: "Amount of memory installed on the machine.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.MemoryCapacity), timestamp: machineInfo.Timestamp}}
},
},
{
name: "machine_dimm_count",
help: "Number of RAM DIMM (all types memory modules) value labeled by dimm type.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusTypeLabelName},
condition: func(machineInfo *info.MachineInfo) bool { return len(machineInfo.MemoryByType) != 0 },
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getMemoryByType(machineInfo, memoryByTypeDimmCountKey)
},
},
{
name: "machine_dimm_capacity_bytes",
help: "Total RAM DIMM capacity (all types memory modules) value labeled by dimm type.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusTypeLabelName},
condition: func(machineInfo *info.MachineInfo) bool { return len(machineInfo.MemoryByType) != 0 },
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getMemoryByType(machineInfo, memoryByTypeDimmCapacityKey)
},
},
{
name: "machine_nvm_capacity",
help: "NVM capacity value labeled by NVM mode (memory mode or app direct mode).",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusModeLabelName},
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{
{value: float64(machineInfo.NVMInfo.MemoryModeCapacity), labels: []string{nvmMemoryMode}, timestamp: machineInfo.Timestamp},
{value: float64(machineInfo.NVMInfo.AppDirectModeCapacity), labels: []string{nvmAppDirectMode}, timestamp: machineInfo.Timestamp},
}
},
},
{
name: "machine_nvm_avg_power_budget_watts",
help: "NVM power budget.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.NVMInfo.AvgPowerBudget), timestamp: machineInfo.Timestamp}}
},
},
},
}
if includedMetrics.Has(container.CPUTopologyMetrics) {
c.machineMetrics = append(c.machineMetrics, []machineMetric{
{
name: "machine_cpu_cache_capacity_bytes",
help: "Cache size in bytes assigned to NUMA node and CPU core.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusNodeLabelName, prometheusCoreLabelName, prometheusTypeLabelName, prometheusLevelLabelName},
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getCaches(machineInfo)
},
},
{
name: "machine_thread_siblings_count",
help: "Number of CPU thread siblings.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusNodeLabelName, prometheusCoreLabelName, prometheusThreadLabelName},
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getThreadsSiblingsCount(machineInfo)
},
},
{
name: "machine_node_memory_capacity_bytes",
help: "Amount of memory assigned to NUMA node.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusNodeLabelName},
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getNodeMemory(machineInfo)
},
},
{
name: "machine_node_hugepages_count",
help: "Numer of hugepages assigned to NUMA node.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusNodeLabelName, prometheusPageSizeLabelName},
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getHugePagesCount(machineInfo)
},
},
}...)
}
return c
}
// Describe describes all the machine metrics ever exported by cadvisor. It
// implements prometheus.PrometheusCollector.
func (collector *PrometheusMachineCollector) Describe(ch chan<- *prometheus.Desc) {
collector.errors.Describe(ch)
for _, metric := range collector.machineMetrics {
ch <- metric.desc([]string{})
}
}
// Collect fetches information about machine and delivers them as
// Prometheus metrics. It implements prometheus.PrometheusCollector.
func (collector *PrometheusMachineCollector) Collect(ch chan<- prometheus.Metric) {
collector.errors.Set(0)
collector.collectMachineInfo(ch)
collector.errors.Collect(ch)
}
func (collector *PrometheusMachineCollector) collectMachineInfo(ch chan<- prometheus.Metric) {
machineInfo, err := collector.infoProvider.GetMachineInfo()
if err != nil {
collector.errors.Set(1)
klog.Warningf("Couldn't get machine info: %s", err)
return
}
baseLabelsValues := []string{machineInfo.MachineID, machineInfo.SystemUUID, machineInfo.BootID}
for _, metric := range collector.machineMetrics {
if metric.condition != nil && !metric.condition(machineInfo) {
continue
}
for _, metricValue := range metric.getValues(machineInfo) {
labelValues := make([]string, len(baseLabelsValues))
copy(labelValues, baseLabelsValues)
if len(metric.extraLabels) != 0 {
labelValues = append(labelValues, metricValue.labels...)
}
prometheusMetric := prometheus.MustNewConstMetric(metric.desc(baseLabelsNames),
metric.valueType, metricValue.value, labelValues...)
if metricValue.timestamp.IsZero() {
ch <- prometheusMetric
} else {
ch <- prometheus.NewMetricWithTimestamp(metricValue.timestamp, prometheusMetric)
}
}
}
}
func getMemoryByType(machineInfo *info.MachineInfo, property string) metricValues {
mValues := make(metricValues, 0, len(machineInfo.MemoryByType))
for memoryType, memoryInfo := range machineInfo.MemoryByType {
propertyValue := 0.0
switch property {
case memoryByTypeDimmCapacityKey:
propertyValue = float64(memoryInfo.Capacity)
case memoryByTypeDimmCountKey:
propertyValue = float64(memoryInfo.DimmCount)
default:
klog.Warningf("Incorrect propery name for MemoryByType, property %s", property)
return metricValues{}
}
mValues = append(mValues, metricValue{value: propertyValue, labels: []string{memoryType}, timestamp: machineInfo.Timestamp})
}
return mValues
}
func getThreadsSiblingsCount(machineInfo *info.MachineInfo) metricValues {
mValues := make(metricValues, 0, machineInfo.NumCores)
for _, node := range machineInfo.Topology {
nodeID := strconv.Itoa(node.Id)
for _, core := range node.Cores {
coreID := strconv.Itoa(core.Id)
siblingsCount := len(core.Threads)
for _, thread := range core.Threads {
mValues = append(mValues,
metricValue{
value: float64(siblingsCount),
labels: []string{nodeID, coreID, strconv.Itoa(thread)},
timestamp: machineInfo.Timestamp,
})
}
}
}
return mValues
}
func getNodeMemory(machineInfo *info.MachineInfo) metricValues {
mValues := make(metricValues, 0, len(machineInfo.Topology))
for _, node := range machineInfo.Topology {
nodeID := strconv.Itoa(node.Id)
mValues = append(mValues,
metricValue{
value: float64(node.Memory),
labels: []string{nodeID},
timestamp: machineInfo.Timestamp,
})
}
return mValues
}
func getHugePagesCount(machineInfo *info.MachineInfo) metricValues {
mValues := make(metricValues, 0)
for _, node := range machineInfo.Topology {
nodeID := strconv.Itoa(node.Id)
for _, hugePage := range node.HugePages {
mValues = append(mValues,
metricValue{
value: float64(hugePage.NumPages),
labels: []string{nodeID, strconv.FormatUint(hugePage.PageSize, 10)},
timestamp: machineInfo.Timestamp,
})
}
}
return mValues
}
func getCaches(machineInfo *info.MachineInfo) metricValues {
mValues := make(metricValues, 0)
for _, node := range machineInfo.Topology {
nodeID := strconv.Itoa(node.Id)
for _, core := range node.Cores {
coreID := strconv.Itoa(core.Id)
for _, cache := range core.Caches {
mValues = append(mValues,
metricValue{
value: float64(cache.Size),
labels: []string{nodeID, coreID, cache.Type, strconv.Itoa(cache.Level)},
timestamp: machineInfo.Timestamp,
})
}
}
for _, cache := range node.Caches {
mValues = append(mValues,
metricValue{
value: float64(cache.Size),
labels: []string{nodeID, emptyLabelValue, cache.Type, strconv.Itoa(cache.Level)},
timestamp: machineInfo.Timestamp,
})
}
}
return mValues
}