Exporter for machine metrics
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

245 lines
7.1 KiB

// Copyright 2015 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package collector includes all individual collectors to gather and export system metrics.
package collector
import (
"errors"
"fmt"
"log/slog"
"sync"
"time"
"github.com/alecthomas/kingpin/v2"
"github.com/prometheus/client_golang/prometheus"
)
// Namespace defines the common namespace to be used by all metrics.
const namespace = "node"
var (
scrapeDurationDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "scrape", "collector_duration_seconds"),
"node_exporter: Duration of a collector scrape.",
[]string{"collector"},
nil,
)
scrapeSuccessDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "scrape", "collector_success"),
"node_exporter: Whether a collector succeeded.",
[]string{"collector"},
nil,
)
)
const (
defaultEnabled = true
defaultDisabled = false
)
var (
factories = make(map[string]func(logger *slog.Logger) (Collector, error))
initiatedCollectorsMtx = sync.Mutex{}
initiatedCollectors = make(map[string]Collector)
collectorState = make(map[string]*bool)
forcedCollectors = map[string]bool{} // collectors which have been explicitly enabled or disabled
)
func registerCollector(collector string, isDefaultEnabled bool, factory func(logger *slog.Logger) (Collector, error)) {
var helpDefaultState string
if isDefaultEnabled {
helpDefaultState = "enabled"
} else {
helpDefaultState = "disabled"
}
flagName := fmt.Sprintf("collector.%s", collector)
flagHelp := fmt.Sprintf("Enable the %s collector (default: %s).", collector, helpDefaultState)
defaultValue := fmt.Sprintf("%v", isDefaultEnabled)
flag := kingpin.Flag(flagName, flagHelp).Default(defaultValue).Action(collectorFlagAction(collector)).Bool()
collectorState[collector] = flag
factories[collector] = factory
}
// NodeCollector implements the prometheus.Collector interface.
type NodeCollector struct {
Collectors map[string]Collector
logger *slog.Logger
}
// DisableDefaultCollectors sets the collector state to false for all collectors which
// have not been explicitly enabled on the command line.
func DisableDefaultCollectors() {
for c := range collectorState {
if _, ok := forcedCollectors[c]; !ok {
*collectorState[c] = false
}
}
}
// collectorFlagAction generates a new action function for the given collector
// to track whether it has been explicitly enabled or disabled from the command line.
// A new action function is needed for each collector flag because the ParseContext
// does not contain information about which flag called the action.
// See: https://github.com/alecthomas/kingpin/issues/294
func collectorFlagAction(collector string) func(ctx *kingpin.ParseContext) error {
return func(ctx *kingpin.ParseContext) error {
forcedCollectors[collector] = true
return nil
}
}
// NewNodeCollector creates a new NodeCollector.
func NewNodeCollector(logger *slog.Logger, filters ...string) (*NodeCollector, error) {
f := make(map[string]bool)
for _, filter := range filters {
enabled, exist := collectorState[filter]
if !exist {
return nil, fmt.Errorf("missing collector: %s", filter)
}
if !*enabled {
return nil, fmt.Errorf("disabled collector: %s", filter)
}
f[filter] = true
}
collectors := make(map[string]Collector)
initiatedCollectorsMtx.Lock()
defer initiatedCollectorsMtx.Unlock()
for key, enabled := range collectorState {
if !*enabled || (len(f) > 0 && !f[key]) {
continue
}
if collector, ok := initiatedCollectors[key]; ok {
collectors[key] = collector
} else {
collector, err := factories[key](logger.With("collector", key))
if err != nil {
return nil, err
}
collectors[key] = collector
initiatedCollectors[key] = collector
}
}
return &NodeCollector{Collectors: collectors, logger: logger}, nil
}
// Describe implements the prometheus.Collector interface.
func (n NodeCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- scrapeDurationDesc
ch <- scrapeSuccessDesc
}
// Collect implements the prometheus.Collector interface.
func (n NodeCollector) Collect(ch chan<- prometheus.Metric) {
wg := sync.WaitGroup{}
wg.Add(len(n.Collectors))
for name, c := range n.Collectors {
go func(name string, c Collector) {
execute(name, c, ch, n.logger)
wg.Done()
}(name, c)
}
wg.Wait()
}
func execute(name string, c Collector, ch chan<- prometheus.Metric, logger *slog.Logger) {
begin := time.Now()
err := c.Update(ch)
duration := time.Since(begin)
var success float64
if err != nil {
if IsNoDataError(err) {
logger.Debug("collector returned no data", "name", name, "duration_seconds", duration.Seconds(), "err", err)
} else {
logger.Error("collector failed", "name", name, "duration_seconds", duration.Seconds(), "err", err)
}
success = 0
} else {
logger.Debug("collector succeeded", "name", name, "duration_seconds", duration.Seconds())
success = 1
}
ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, duration.Seconds(), name)
ch <- prometheus.MustNewConstMetric(scrapeSuccessDesc, prometheus.GaugeValue, success, name)
}
// Collector is the interface a collector has to implement.
type Collector interface {
// Get new metrics and expose them via prometheus registry.
Update(ch chan<- prometheus.Metric) error
}
type typedDesc struct {
desc *prometheus.Desc
valueType prometheus.ValueType
}
func (d *typedDesc) mustNewConstMetric(value float64, labels ...string) prometheus.Metric {
return prometheus.MustNewConstMetric(d.desc, d.valueType, value, labels...)
}
// ErrNoData indicates the collector found no data to collect, but had no other error.
var ErrNoData = errors.New("collector returned no data")
func IsNoDataError(err error) bool {
return err == ErrNoData
}
// pushMetric helps construct and convert a variety of value types into Prometheus float64 metrics.
func pushMetric(ch chan<- prometheus.Metric, fieldDesc *prometheus.Desc, name string, value interface{}, valueType prometheus.ValueType, labelValues ...string) {
var fVal float64
switch val := value.(type) {
case uint8:
fVal = float64(val)
case uint16:
fVal = float64(val)
case uint32:
fVal = float64(val)
case uint64:
fVal = float64(val)
case int64:
fVal = float64(val)
case *uint8:
if val == nil {
return
}
fVal = float64(*val)
case *uint16:
if val == nil {
return
}
fVal = float64(*val)
case *uint32:
if val == nil {
return
}
fVal = float64(*val)
case *uint64:
if val == nil {
return
}
fVal = float64(*val)
case *int64:
if val == nil {
return
}
fVal = float64(*val)
default:
return
}
ch <- prometheus.MustNewConstMetric(fieldDesc, valueType, fVal, labelValues...)
}