|
|
|
// Copyright 2016 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package scrape
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
|
|
)
|
|
|
|
|
|
|
|
type scrapeMetrics struct {
|
|
|
|
reg prometheus.Registerer
|
|
|
|
// Used by Manager.
|
|
|
|
targetMetadataCache *MetadataMetricsCollector
|
|
|
|
targetScrapePools prometheus.Counter
|
|
|
|
targetScrapePoolsFailed prometheus.Counter
|
|
|
|
|
|
|
|
// Used by scrapePool.
|
|
|
|
targetReloadIntervalLength *prometheus.SummaryVec
|
|
|
|
targetScrapePoolReloads prometheus.Counter
|
|
|
|
targetScrapePoolReloadsFailed prometheus.Counter
|
|
|
|
targetScrapePoolSyncsCounter *prometheus.CounterVec
|
|
|
|
targetScrapePoolExceededTargetLimit prometheus.Counter
|
|
|
|
targetScrapePoolTargetLimit *prometheus.GaugeVec
|
|
|
|
targetScrapePoolTargetsAdded *prometheus.GaugeVec
|
|
|
|
targetScrapePoolSymbolTableItems *prometheus.GaugeVec
|
|
|
|
targetSyncIntervalLength *prometheus.SummaryVec
|
|
|
|
targetSyncFailed *prometheus.CounterVec
|
|
|
|
|
|
|
|
// Used by targetScraper.
|
|
|
|
targetScrapeExceededBodySizeLimit prometheus.Counter
|
|
|
|
|
|
|
|
// Used by scrapeCache.
|
|
|
|
targetScrapeCacheFlushForced prometheus.Counter
|
|
|
|
|
|
|
|
// Used by scrapeLoop.
|
|
|
|
targetIntervalLength *prometheus.SummaryVec
|
|
|
|
targetScrapeSampleLimit prometheus.Counter
|
|
|
|
targetScrapeSampleDuplicate prometheus.Counter
|
|
|
|
targetScrapeSampleOutOfOrder prometheus.Counter
|
|
|
|
targetScrapeSampleOutOfBounds prometheus.Counter
|
|
|
|
targetScrapeExemplarOutOfOrder prometheus.Counter
|
|
|
|
targetScrapePoolExceededLabelLimits prometheus.Counter
|
|
|
|
targetScrapeNativeHistogramBucketLimit prometheus.Counter
|
|
|
|
}
|
|
|
|
|
|
|
|
func newScrapeMetrics(reg prometheus.Registerer) (*scrapeMetrics, error) {
|
|
|
|
sm := &scrapeMetrics{reg: reg}
|
|
|
|
|
|
|
|
// Manager metrics.
|
|
|
|
sm.targetMetadataCache = &MetadataMetricsCollector{
|
|
|
|
CacheEntries: prometheus.NewDesc(
|
|
|
|
"prometheus_target_metadata_cache_entries",
|
|
|
|
"Total number of metric metadata entries in the cache",
|
|
|
|
[]string{"scrape_job"},
|
|
|
|
nil,
|
|
|
|
),
|
|
|
|
CacheBytes: prometheus.NewDesc(
|
|
|
|
"prometheus_target_metadata_cache_bytes",
|
|
|
|
"The number of bytes that are currently used for storing metric metadata in the cache",
|
|
|
|
[]string{"scrape_job"},
|
|
|
|
nil,
|
|
|
|
),
|
|
|
|
// TargetsGatherer should be set later, because it's a circular dependency.
|
|
|
|
// newScrapeMetrics() is called by NewManager(), while also TargetsGatherer is the new Manager.
|
|
|
|
}
|
|
|
|
|
|
|
|
sm.targetScrapePools = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrape_pools_total",
|
|
|
|
Help: "Total number of scrape pool creation attempts.",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
sm.targetScrapePoolsFailed = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrape_pools_failed_total",
|
|
|
|
Help: "Total number of scrape pool creations that failed.",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
|
|
|
|
// Used by scrapePool.
|
|
|
|
sm.targetReloadIntervalLength = prometheus.NewSummaryVec(
|
|
|
|
prometheus.SummaryOpts{
|
|
|
|
Name: "prometheus_target_reload_length_seconds",
|
|
|
|
Help: "Actual interval to reload the scrape pool with a given configuration.",
|
|
|
|
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
|
|
|
|
},
|
|
|
|
[]string{"interval"},
|
|
|
|
)
|
|
|
|
sm.targetScrapePoolReloads = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrape_pool_reloads_total",
|
|
|
|
Help: "Total number of scrape pool reloads.",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
sm.targetScrapePoolReloadsFailed = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrape_pool_reloads_failed_total",
|
|
|
|
Help: "Total number of failed scrape pool reloads.",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
sm.targetScrapePoolExceededTargetLimit = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrape_pool_exceeded_target_limit_total",
|
|
|
|
Help: "Total number of times scrape pools hit the target limit, during sync or config reload.",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
sm.targetScrapePoolTargetLimit = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Name: "prometheus_target_scrape_pool_target_limit",
|
|
|
|
Help: "Maximum number of targets allowed in this scrape pool.",
|
|
|
|
},
|
|
|
|
[]string{"scrape_job"},
|
|
|
|
)
|
|
|
|
sm.targetScrapePoolTargetsAdded = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Name: "prometheus_target_scrape_pool_targets",
|
|
|
|
Help: "Current number of targets in this scrape pool.",
|
|
|
|
},
|
|
|
|
[]string{"scrape_job"},
|
|
|
|
)
|
|
|
|
sm.targetScrapePoolSymbolTableItems = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Name: "prometheus_target_scrape_pool_symboltable_items",
|
|
|
|
Help: "Current number of symbols in table for this scrape pool.",
|
|
|
|
},
|
|
|
|
[]string{"scrape_job"},
|
|
|
|
)
|
|
|
|
sm.targetScrapePoolSyncsCounter = prometheus.NewCounterVec(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrape_pool_sync_total",
|
|
|
|
Help: "Total number of syncs that were executed on a scrape pool.",
|
|
|
|
},
|
|
|
|
[]string{"scrape_job"},
|
|
|
|
)
|
|
|
|
sm.targetSyncIntervalLength = prometheus.NewSummaryVec(
|
|
|
|
prometheus.SummaryOpts{
|
|
|
|
Name: "prometheus_target_sync_length_seconds",
|
|
|
|
Help: "Actual interval to sync the scrape pool.",
|
|
|
|
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
|
|
|
|
},
|
|
|
|
[]string{"scrape_job"},
|
|
|
|
)
|
|
|
|
sm.targetSyncFailed = prometheus.NewCounterVec(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_sync_failed_total",
|
|
|
|
Help: "Total number of target sync failures.",
|
|
|
|
},
|
|
|
|
[]string{"scrape_job"},
|
|
|
|
)
|
|
|
|
|
|
|
|
// Used by targetScraper.
|
|
|
|
sm.targetScrapeExceededBodySizeLimit = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrapes_exceeded_body_size_limit_total",
|
|
|
|
Help: "Total number of scrapes that hit the body size limit",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
|
|
|
|
// Used by scrapeCache.
|
|
|
|
sm.targetScrapeCacheFlushForced = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrapes_cache_flush_forced_total",
|
|
|
|
Help: "How many times a scrape cache was flushed due to getting big while scrapes are failing.",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
|
|
|
|
// Used by scrapeLoop.
|
|
|
|
sm.targetIntervalLength = prometheus.NewSummaryVec(
|
|
|
|
prometheus.SummaryOpts{
|
|
|
|
Name: "prometheus_target_interval_length_seconds",
|
|
|
|
Help: "Actual intervals between scrapes.",
|
|
|
|
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
|
|
|
|
},
|
|
|
|
[]string{"interval"},
|
|
|
|
)
|
|
|
|
sm.targetScrapeSampleLimit = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrapes_exceeded_sample_limit_total",
|
|
|
|
Help: "Total number of scrapes that hit the sample limit and were rejected.",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
sm.targetScrapeSampleDuplicate = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrapes_sample_duplicate_timestamp_total",
|
|
|
|
Help: "Total number of samples rejected due to duplicate timestamps but different values.",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
sm.targetScrapeSampleOutOfOrder = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrapes_sample_out_of_order_total",
|
|
|
|
Help: "Total number of samples rejected due to not being out of the expected order.",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
sm.targetScrapeSampleOutOfBounds = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrapes_sample_out_of_bounds_total",
|
|
|
|
Help: "Total number of samples rejected due to timestamp falling outside of the time bounds.",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
sm.targetScrapePoolExceededLabelLimits = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrape_pool_exceeded_label_limits_total",
|
|
|
|
Help: "Total number of times scrape pools hit the label limits, during sync or config reload.",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
sm.targetScrapeNativeHistogramBucketLimit = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrapes_exceeded_native_histogram_bucket_limit_total",
|
|
|
|
Help: "Total number of scrapes that hit the native histogram bucket limit and were rejected.",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
sm.targetScrapeExemplarOutOfOrder = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrapes_exemplar_out_of_order_total",
|
|
|
|
Help: "Total number of exemplar rejected due to not being out of the expected order.",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
|
|
|
|
for _, collector := range []prometheus.Collector{
|
|
|
|
// Used by Manager.
|
|
|
|
sm.targetMetadataCache,
|
|
|
|
sm.targetScrapePools,
|
|
|
|
sm.targetScrapePoolsFailed,
|
|
|
|
// Used by scrapePool.
|
|
|
|
sm.targetReloadIntervalLength,
|
|
|
|
sm.targetScrapePoolReloads,
|
|
|
|
sm.targetScrapePoolReloadsFailed,
|
|
|
|
sm.targetSyncIntervalLength,
|
|
|
|
sm.targetScrapePoolSyncsCounter,
|
|
|
|
sm.targetScrapePoolExceededTargetLimit,
|
|
|
|
sm.targetScrapePoolTargetLimit,
|
|
|
|
sm.targetScrapePoolTargetsAdded,
|
|
|
|
sm.targetScrapePoolSymbolTableItems,
|
|
|
|
sm.targetSyncFailed,
|
|
|
|
// Used by targetScraper.
|
|
|
|
sm.targetScrapeExceededBodySizeLimit,
|
|
|
|
// Used by scrapeCache.
|
|
|
|
sm.targetScrapeCacheFlushForced,
|
|
|
|
// Used by scrapeLoop.
|
|
|
|
sm.targetIntervalLength,
|
|
|
|
sm.targetScrapeSampleLimit,
|
|
|
|
sm.targetScrapeSampleDuplicate,
|
|
|
|
sm.targetScrapeSampleOutOfOrder,
|
|
|
|
sm.targetScrapeSampleOutOfBounds,
|
|
|
|
sm.targetScrapeExemplarOutOfOrder,
|
|
|
|
sm.targetScrapePoolExceededLabelLimits,
|
|
|
|
sm.targetScrapeNativeHistogramBucketLimit,
|
|
|
|
} {
|
|
|
|
err := reg.Register(collector)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to register scrape metrics: %w", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return sm, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sm *scrapeMetrics) setTargetMetadataCacheGatherer(gatherer TargetsGatherer) {
|
|
|
|
sm.targetMetadataCache.TargetsGatherer = gatherer
|
|
|
|
}
|
|
|
|
|
|
|
|
// Unregister unregisters all metrics.
|
|
|
|
func (sm *scrapeMetrics) Unregister() {
|
|
|
|
sm.reg.Unregister(sm.targetMetadataCache)
|
|
|
|
sm.reg.Unregister(sm.targetScrapePools)
|
|
|
|
sm.reg.Unregister(sm.targetScrapePoolsFailed)
|
|
|
|
sm.reg.Unregister(sm.targetReloadIntervalLength)
|
|
|
|
sm.reg.Unregister(sm.targetScrapePoolReloads)
|
|
|
|
sm.reg.Unregister(sm.targetScrapePoolReloadsFailed)
|
|
|
|
sm.reg.Unregister(sm.targetSyncIntervalLength)
|
|
|
|
sm.reg.Unregister(sm.targetScrapePoolSyncsCounter)
|
|
|
|
sm.reg.Unregister(sm.targetScrapePoolExceededTargetLimit)
|
|
|
|
sm.reg.Unregister(sm.targetScrapePoolTargetLimit)
|
|
|
|
sm.reg.Unregister(sm.targetScrapePoolTargetsAdded)
|
|
|
|
sm.reg.Unregister(sm.targetScrapePoolSymbolTableItems)
|
|
|
|
sm.reg.Unregister(sm.targetSyncFailed)
|
|
|
|
sm.reg.Unregister(sm.targetScrapeExceededBodySizeLimit)
|
|
|
|
sm.reg.Unregister(sm.targetScrapeCacheFlushForced)
|
|
|
|
sm.reg.Unregister(sm.targetIntervalLength)
|
|
|
|
sm.reg.Unregister(sm.targetScrapeSampleLimit)
|
|
|
|
sm.reg.Unregister(sm.targetScrapeSampleDuplicate)
|
|
|
|
sm.reg.Unregister(sm.targetScrapeSampleOutOfOrder)
|
|
|
|
sm.reg.Unregister(sm.targetScrapeSampleOutOfBounds)
|
|
|
|
sm.reg.Unregister(sm.targetScrapeExemplarOutOfOrder)
|
|
|
|
sm.reg.Unregister(sm.targetScrapePoolExceededLabelLimits)
|
|
|
|
sm.reg.Unregister(sm.targetScrapeNativeHistogramBucketLimit)
|
|
|
|
}
|
|
|
|
|
|
|
|
type TargetsGatherer interface {
|
|
|
|
TargetsActive() map[string][]*Target
|
|
|
|
}
|
|
|
|
|
|
|
|
// MetadataMetricsCollector is a Custom Collector for the metadata cache metrics.
|
|
|
|
type MetadataMetricsCollector struct {
|
|
|
|
CacheEntries *prometheus.Desc
|
|
|
|
CacheBytes *prometheus.Desc
|
|
|
|
TargetsGatherer TargetsGatherer
|
|
|
|
}
|
|
|
|
|
|
|
|
// Describe sends the metrics descriptions to the channel.
|
|
|
|
func (mc *MetadataMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
|
|
|
|
ch <- mc.CacheEntries
|
|
|
|
ch <- mc.CacheBytes
|
|
|
|
}
|
|
|
|
|
|
|
|
// Collect creates and sends the metrics for the metadata cache.
|
|
|
|
func (mc *MetadataMetricsCollector) Collect(ch chan<- prometheus.Metric) {
|
|
|
|
if mc.TargetsGatherer == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
for tset, targets := range mc.TargetsGatherer.TargetsActive() {
|
|
|
|
var size, length int
|
|
|
|
for _, t := range targets {
|
|
|
|
size += t.SizeMetadata()
|
|
|
|
length += t.LengthMetadata()
|
|
|
|
}
|
|
|
|
|
|
|
|
ch <- prometheus.MustNewConstMetric(
|
|
|
|
mc.CacheEntries,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
float64(length),
|
|
|
|
tset,
|
|
|
|
)
|
|
|
|
|
|
|
|
ch <- prometheus.MustNewConstMetric(
|
|
|
|
mc.CacheBytes,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
float64(size),
|
|
|
|
tset,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|