You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
246 lines
7.1 KiB
246 lines
7.1 KiB
// Copyright 2015 The Prometheus Authors
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Package collector includes all individual collectors to gather and export system metrics.
|
|
package collector
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"log/slog"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/alecthomas/kingpin/v2"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
)
|
|
|
|
// Namespace defines the common namespace to be used by all metrics.
|
|
const namespace = "node"
|
|
|
|
var (
|
|
scrapeDurationDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "scrape", "collector_duration_seconds"),
|
|
"node_exporter: Duration of a collector scrape.",
|
|
[]string{"collector"},
|
|
nil,
|
|
)
|
|
scrapeSuccessDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "scrape", "collector_success"),
|
|
"node_exporter: Whether a collector succeeded.",
|
|
[]string{"collector"},
|
|
nil,
|
|
)
|
|
)
|
|
|
|
const (
|
|
defaultEnabled = true
|
|
defaultDisabled = false
|
|
)
|
|
|
|
var (
|
|
factories = make(map[string]func(logger *slog.Logger) (Collector, error))
|
|
initiatedCollectorsMtx = sync.Mutex{}
|
|
initiatedCollectors = make(map[string]Collector)
|
|
collectorState = make(map[string]*bool)
|
|
forcedCollectors = map[string]bool{} // collectors which have been explicitly enabled or disabled
|
|
)
|
|
|
|
func registerCollector(collector string, isDefaultEnabled bool, factory func(logger *slog.Logger) (Collector, error)) {
|
|
var helpDefaultState string
|
|
if isDefaultEnabled {
|
|
helpDefaultState = "enabled"
|
|
} else {
|
|
helpDefaultState = "disabled"
|
|
}
|
|
|
|
flagName := fmt.Sprintf("collector.%s", collector)
|
|
flagHelp := fmt.Sprintf("Enable the %s collector (default: %s).", collector, helpDefaultState)
|
|
defaultValue := fmt.Sprintf("%v", isDefaultEnabled)
|
|
|
|
flag := kingpin.Flag(flagName, flagHelp).Default(defaultValue).Action(collectorFlagAction(collector)).Bool()
|
|
collectorState[collector] = flag
|
|
|
|
factories[collector] = factory
|
|
}
|
|
|
|
// NodeCollector implements the prometheus.Collector interface.
|
|
type NodeCollector struct {
|
|
Collectors map[string]Collector
|
|
logger *slog.Logger
|
|
}
|
|
|
|
// DisableDefaultCollectors sets the collector state to false for all collectors which
|
|
// have not been explicitly enabled on the command line.
|
|
func DisableDefaultCollectors() {
|
|
for c := range collectorState {
|
|
if _, ok := forcedCollectors[c]; !ok {
|
|
*collectorState[c] = false
|
|
}
|
|
}
|
|
}
|
|
|
|
// collectorFlagAction generates a new action function for the given collector
|
|
// to track whether it has been explicitly enabled or disabled from the command line.
|
|
// A new action function is needed for each collector flag because the ParseContext
|
|
// does not contain information about which flag called the action.
|
|
// See: https://github.com/alecthomas/kingpin/issues/294
|
|
func collectorFlagAction(collector string) func(ctx *kingpin.ParseContext) error {
|
|
return func(ctx *kingpin.ParseContext) error {
|
|
forcedCollectors[collector] = true
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// NewNodeCollector creates a new NodeCollector.
|
|
func NewNodeCollector(logger *slog.Logger, filters ...string) (*NodeCollector, error) {
|
|
f := make(map[string]bool)
|
|
for _, filter := range filters {
|
|
enabled, exist := collectorState[filter]
|
|
if !exist {
|
|
return nil, fmt.Errorf("missing collector: %s", filter)
|
|
}
|
|
if !*enabled {
|
|
return nil, fmt.Errorf("disabled collector: %s", filter)
|
|
}
|
|
f[filter] = true
|
|
}
|
|
collectors := make(map[string]Collector)
|
|
initiatedCollectorsMtx.Lock()
|
|
defer initiatedCollectorsMtx.Unlock()
|
|
for key, enabled := range collectorState {
|
|
if !*enabled || (len(f) > 0 && !f[key]) {
|
|
continue
|
|
}
|
|
if collector, ok := initiatedCollectors[key]; ok {
|
|
collectors[key] = collector
|
|
} else {
|
|
collector, err := factories[key](logger.With("collector", key))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
collectors[key] = collector
|
|
initiatedCollectors[key] = collector
|
|
}
|
|
}
|
|
return &NodeCollector{Collectors: collectors, logger: logger}, nil
|
|
}
|
|
|
|
// Describe implements the prometheus.Collector interface.
|
|
func (n NodeCollector) Describe(ch chan<- *prometheus.Desc) {
|
|
ch <- scrapeDurationDesc
|
|
ch <- scrapeSuccessDesc
|
|
}
|
|
|
|
// Collect implements the prometheus.Collector interface.
|
|
func (n NodeCollector) Collect(ch chan<- prometheus.Metric) {
|
|
wg := sync.WaitGroup{}
|
|
wg.Add(len(n.Collectors))
|
|
for name, c := range n.Collectors {
|
|
go func(name string, c Collector) {
|
|
execute(name, c, ch, n.logger)
|
|
wg.Done()
|
|
}(name, c)
|
|
}
|
|
wg.Wait()
|
|
}
|
|
|
|
func execute(name string, c Collector, ch chan<- prometheus.Metric, logger *slog.Logger) {
|
|
begin := time.Now()
|
|
err := c.Update(ch)
|
|
duration := time.Since(begin)
|
|
var success float64
|
|
|
|
if err != nil {
|
|
if IsNoDataError(err) {
|
|
logger.Debug("collector returned no data", "name", name, "duration_seconds", duration.Seconds(), "err", err)
|
|
} else {
|
|
logger.Error("collector failed", "name", name, "duration_seconds", duration.Seconds(), "err", err)
|
|
}
|
|
success = 0
|
|
} else {
|
|
logger.Debug("collector succeeded", "name", name, "duration_seconds", duration.Seconds())
|
|
success = 1
|
|
}
|
|
ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, duration.Seconds(), name)
|
|
ch <- prometheus.MustNewConstMetric(scrapeSuccessDesc, prometheus.GaugeValue, success, name)
|
|
}
|
|
|
|
// Collector is the interface a collector has to implement.
|
|
type Collector interface {
|
|
// Get new metrics and expose them via prometheus registry.
|
|
Update(ch chan<- prometheus.Metric) error
|
|
}
|
|
|
|
type typedDesc struct {
|
|
desc *prometheus.Desc
|
|
valueType prometheus.ValueType
|
|
}
|
|
|
|
func (d *typedDesc) mustNewConstMetric(value float64, labels ...string) prometheus.Metric {
|
|
return prometheus.MustNewConstMetric(d.desc, d.valueType, value, labels...)
|
|
}
|
|
|
|
// ErrNoData indicates the collector found no data to collect, but had no other error.
|
|
var ErrNoData = errors.New("collector returned no data")
|
|
|
|
func IsNoDataError(err error) bool {
|
|
return err == ErrNoData
|
|
}
|
|
|
|
// pushMetric helps construct and convert a variety of value types into Prometheus float64 metrics.
|
|
func pushMetric(ch chan<- prometheus.Metric, fieldDesc *prometheus.Desc, name string, value interface{}, valueType prometheus.ValueType, labelValues ...string) {
|
|
var fVal float64
|
|
switch val := value.(type) {
|
|
case uint8:
|
|
fVal = float64(val)
|
|
case uint16:
|
|
fVal = float64(val)
|
|
case uint32:
|
|
fVal = float64(val)
|
|
case uint64:
|
|
fVal = float64(val)
|
|
case int64:
|
|
fVal = float64(val)
|
|
case *uint8:
|
|
if val == nil {
|
|
return
|
|
}
|
|
fVal = float64(*val)
|
|
case *uint16:
|
|
if val == nil {
|
|
return
|
|
}
|
|
fVal = float64(*val)
|
|
case *uint32:
|
|
if val == nil {
|
|
return
|
|
}
|
|
fVal = float64(*val)
|
|
case *uint64:
|
|
if val == nil {
|
|
return
|
|
}
|
|
fVal = float64(*val)
|
|
case *int64:
|
|
if val == nil {
|
|
return
|
|
}
|
|
fVal = float64(*val)
|
|
default:
|
|
return
|
|
}
|
|
|
|
ch <- prometheus.MustNewConstMetric(fieldDesc, valueType, fVal, labelValues...)
|
|
}
|