Browse Source

scraping: limit detail on dropped targets, to save memory (#12647)

It's possible (quite common on Kubernetes) to have a service discovery
return thousands of targets then drop most of them in relabel rules.
The main place this data is used is to display in the web UI, where
you don't want thousands of lines of display.

The new limit is `keep_dropped_targets`, which defaults to 0
for backwards-compatibility.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
pull/12684/head
Bryan Boreham 1 year ago committed by GitHub
parent
commit
1e3fef6ab0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 9
      config/config.go
  2. 8
      docs/configuration/configuration.md
  3. 1
      docs/querying/api.md
  4. 5
      documentation/examples/prometheus-kubernetes.yml
  5. 13
      scrape/manager.go
  6. 18
      scrape/scrape.go
  7. 1
      scrape/scrape_test.go
  8. 9
      web/api/v1/api.go
  9. 11
      web/api/v1/api_test.go
  10. 5
      web/api/v1/errors_test.go
  11. 19
      web/ui/react-app/src/pages/serviceDiscovery/Services.tsx

9
config/config.go

@ -409,6 +409,9 @@ type GlobalConfig struct {
// More than this label value length post metric-relabeling will cause the // More than this label value length post metric-relabeling will cause the
// scrape to fail. 0 means no limit. // scrape to fail. 0 means no limit.
LabelValueLengthLimit uint `yaml:"label_value_length_limit,omitempty"` LabelValueLengthLimit uint `yaml:"label_value_length_limit,omitempty"`
// Keep no more than this many dropped targets per job.
// 0 means no limit.
KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
} }
// SetDirectory joins any relative file paths with dir. // SetDirectory joins any relative file paths with dir.
@ -514,6 +517,9 @@ type ScrapeConfig struct {
// More than this many buckets in a native histogram will cause the scrape to // More than this many buckets in a native histogram will cause the scrape to
// fail. // fail.
NativeHistogramBucketLimit uint `yaml:"native_histogram_bucket_limit,omitempty"` NativeHistogramBucketLimit uint `yaml:"native_histogram_bucket_limit,omitempty"`
// Keep no more than this many dropped targets per job.
// 0 means no limit.
KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
// We cannot do proper Go type embedding below as the parser will then parse // We cannot do proper Go type embedding below as the parser will then parse
// values arbitrarily into the overflow maps of further-down types. // values arbitrarily into the overflow maps of further-down types.
@ -608,6 +614,9 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error {
if c.LabelValueLengthLimit == 0 { if c.LabelValueLengthLimit == 0 {
c.LabelValueLengthLimit = globalConfig.LabelValueLengthLimit c.LabelValueLengthLimit = globalConfig.LabelValueLengthLimit
} }
if c.KeepDroppedTargets == 0 {
c.KeepDroppedTargets = globalConfig.KeepDroppedTargets
}
return nil return nil
} }

8
docs/configuration/configuration.md

@ -106,6 +106,10 @@ global:
# change in the future. # change in the future.
[ target_limit: <int> | default = 0 ] [ target_limit: <int> | default = 0 ]
# Limit per scrape config on the number of targets dropped by relabeling
# that will be kept in memory. 0 means no limit.
[ keep_dropped_targets: <int> | default = 0 ]
# Rule files specifies a list of globs. Rules and alerts are read from # Rule files specifies a list of globs. Rules and alerts are read from
# all matching files. # all matching files.
rule_files: rule_files:
@ -415,6 +419,10 @@ metric_relabel_configs:
# change in the future. # change in the future.
[ target_limit: <int> | default = 0 ] [ target_limit: <int> | default = 0 ]
# Per-job limit on the number of targets dropped by relabeling
# that will be kept in memory. 0 means no limit.
[ keep_dropped_targets: <int> | default = 0 ]
# Limit on total number of positive and negative buckets allowed in a single # Limit on total number of positive and negative buckets allowed in a single
# native histogram. If this is exceeded, the entire scrape will be treated as # native histogram. If this is exceeded, the entire scrape will be treated as
# failed. 0 means no limit. # failed. 0 means no limit.

1
docs/querying/api.md

@ -543,6 +543,7 @@ GET /api/v1/targets
``` ```
Both the active and dropped targets are part of the response by default. Both the active and dropped targets are part of the response by default.
Dropped targets are subject to `keep_dropped_targets` limit, if set.
`labels` represents the label set after relabeling has occurred. `labels` represents the label set after relabeling has occurred.
`discoveredLabels` represent the unmodified labels retrieved during service discovery before relabeling has occurred. `discoveredLabels` represent the unmodified labels retrieved during service discovery before relabeling has occurred.

5
documentation/examples/prometheus-kubernetes.yml

@ -8,6 +8,11 @@
# If you are using Kubernetes 1.7.2 or earlier, please take note of the comments # If you are using Kubernetes 1.7.2 or earlier, please take note of the comments
# for the kubernetes-cadvisor job; you will need to edit or remove this job. # for the kubernetes-cadvisor job; you will need to edit or remove this job.
# Keep at most 100 sets of details of targets dropped by relabeling.
# This information is used to display in the UI for troubleshooting.
global:
keep_dropped_targets: 100
# Scrape config for API servers. # Scrape config for API servers.
# #
# Kubernetes exposes API servers as endpoints to the default/kubernetes # Kubernetes exposes API servers as endpoints to the default/kubernetes

13
scrape/manager.go

@ -357,7 +357,7 @@ func (m *Manager) TargetsActive() map[string][]*Target {
return targets return targets
} }
// TargetsDropped returns the dropped targets during relabelling. // TargetsDropped returns the dropped targets during relabelling, subject to KeepDroppedTargets limit.
func (m *Manager) TargetsDropped() map[string][]*Target { func (m *Manager) TargetsDropped() map[string][]*Target {
m.mtxScrape.Lock() m.mtxScrape.Lock()
defer m.mtxScrape.Unlock() defer m.mtxScrape.Unlock()
@ -368,3 +368,14 @@ func (m *Manager) TargetsDropped() map[string][]*Target {
} }
return targets return targets
} }
func (m *Manager) TargetsDroppedCounts() map[string]int {
m.mtxScrape.Lock()
defer m.mtxScrape.Unlock()
counts := make(map[string]int, len(m.scrapePools))
for tset, sp := range m.scrapePools {
counts[tset] = sp.droppedTargetsCount
}
return counts
}

18
scrape/scrape.go

@ -242,8 +242,9 @@ type scrapePool struct {
targetMtx sync.Mutex targetMtx sync.Mutex
// activeTargets and loops must always be synchronized to have the same // activeTargets and loops must always be synchronized to have the same
// set of hashes. // set of hashes.
activeTargets map[uint64]*Target activeTargets map[uint64]*Target
droppedTargets []*Target droppedTargets []*Target // Subject to KeepDroppedTargets limit.
droppedTargetsCount int // Count of all dropped targets.
// Constructor for new scrape loops. This is settable for testing convenience. // Constructor for new scrape loops. This is settable for testing convenience.
newLoop func(scrapeLoopOptions) loop newLoop func(scrapeLoopOptions) loop
@ -354,12 +355,19 @@ func (sp *scrapePool) ActiveTargets() []*Target {
return tActive return tActive
} }
// Return dropped targets, subject to KeepDroppedTargets limit.
func (sp *scrapePool) DroppedTargets() []*Target { func (sp *scrapePool) DroppedTargets() []*Target {
sp.targetMtx.Lock() sp.targetMtx.Lock()
defer sp.targetMtx.Unlock() defer sp.targetMtx.Unlock()
return sp.droppedTargets return sp.droppedTargets
} }
func (sp *scrapePool) DroppedTargetsCount() int {
sp.targetMtx.Lock()
defer sp.targetMtx.Unlock()
return sp.droppedTargetsCount
}
// stop terminates all scrape loops and returns after they all terminated. // stop terminates all scrape loops and returns after they all terminated.
func (sp *scrapePool) stop() { func (sp *scrapePool) stop() {
sp.mtx.Lock() sp.mtx.Lock()
@ -506,6 +514,7 @@ func (sp *scrapePool) Sync(tgs []*targetgroup.Group) {
var targets []*Target var targets []*Target
lb := labels.NewBuilder(labels.EmptyLabels()) lb := labels.NewBuilder(labels.EmptyLabels())
sp.droppedTargets = []*Target{} sp.droppedTargets = []*Target{}
sp.droppedTargetsCount = 0
for _, tg := range tgs { for _, tg := range tgs {
targets, failures := TargetsFromGroup(tg, sp.config, sp.noDefaultPort, targets, lb) targets, failures := TargetsFromGroup(tg, sp.config, sp.noDefaultPort, targets, lb)
for _, err := range failures { for _, err := range failures {
@ -520,7 +529,10 @@ func (sp *scrapePool) Sync(tgs []*targetgroup.Group) {
case nonEmpty: case nonEmpty:
all = append(all, t) all = append(all, t)
case !t.discoveredLabels.IsEmpty(): case !t.discoveredLabels.IsEmpty():
sp.droppedTargets = append(sp.droppedTargets, t) if sp.config.KeepDroppedTargets != 0 && uint(len(sp.droppedTargets)) < sp.config.KeepDroppedTargets {
sp.droppedTargets = append(sp.droppedTargets, t)
}
sp.droppedTargetsCount++
} }
} }
} }

1
scrape/scrape_test.go

@ -88,6 +88,7 @@ func TestDroppedTargetsList(t *testing.T) {
SourceLabels: model.LabelNames{"job"}, SourceLabels: model.LabelNames{"job"},
}, },
}, },
KeepDroppedTargets: 1,
} }
tgs = []*targetgroup.Group{ tgs = []*targetgroup.Group{
{ {

9
web/api/v1/api.go

@ -100,6 +100,7 @@ type ScrapePoolsRetriever interface {
type TargetRetriever interface { type TargetRetriever interface {
TargetsActive() map[string][]*scrape.Target TargetsActive() map[string][]*scrape.Target
TargetsDropped() map[string][]*scrape.Target TargetsDropped() map[string][]*scrape.Target
TargetsDroppedCounts() map[string]int
} }
// AlertmanagerRetriever provides a list of all/dropped AlertManager URLs. // AlertmanagerRetriever provides a list of all/dropped AlertManager URLs.
@ -898,8 +899,9 @@ type DroppedTarget struct {
// TargetDiscovery has all the active targets. // TargetDiscovery has all the active targets.
type TargetDiscovery struct { type TargetDiscovery struct {
ActiveTargets []*Target `json:"activeTargets"` ActiveTargets []*Target `json:"activeTargets"`
DroppedTargets []*DroppedTarget `json:"droppedTargets"` DroppedTargets []*DroppedTarget `json:"droppedTargets"`
DroppedTargetCounts map[string]int `json:"droppedTargetCounts"`
} }
// GlobalURLOptions contains fields used for deriving the global URL for local targets. // GlobalURLOptions contains fields used for deriving the global URL for local targets.
@ -1039,6 +1041,9 @@ func (api *API) targets(r *http.Request) apiFuncResult {
} else { } else {
res.ActiveTargets = []*Target{} res.ActiveTargets = []*Target{}
} }
if showDropped {
res.DroppedTargetCounts = api.targetRetriever(r.Context()).TargetsDroppedCounts()
}
if showDropped { if showDropped {
targetsDropped := api.targetRetriever(r.Context()).TargetsDropped() targetsDropped := api.targetRetriever(r.Context()).TargetsDropped()
droppedKeys, numTargets := sortKeys(targetsDropped) droppedKeys, numTargets := sortKeys(targetsDropped)

11
web/api/v1/api_test.go

@ -137,6 +137,14 @@ func (t testTargetRetriever) TargetsDropped() map[string][]*scrape.Target {
return t.droppedTargets return t.droppedTargets
} }
func (t testTargetRetriever) TargetsDroppedCounts() map[string]int {
r := make(map[string]int)
for k, v := range t.droppedTargets {
r[k] = len(v)
}
return r
}
func (t *testTargetRetriever) SetMetadataStoreForTargets(identifier string, metadata scrape.MetricMetadataStore) error { func (t *testTargetRetriever) SetMetadataStoreForTargets(identifier string, metadata scrape.MetricMetadataStore) error {
targets, ok := t.activeTargets[identifier] targets, ok := t.activeTargets[identifier]
@ -1384,6 +1392,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E
}, },
}, },
}, },
DroppedTargetCounts: map[string]int{"blackbox": 1},
}, },
}, },
{ {
@ -1436,6 +1445,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E
}, },
}, },
}, },
DroppedTargetCounts: map[string]int{"blackbox": 1},
}, },
}, },
{ {
@ -1498,6 +1508,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E
}, },
}, },
}, },
DroppedTargetCounts: map[string]int{"blackbox": 1},
}, },
}, },
// With a matching metric. // With a matching metric.

5
web/api/v1/errors_test.go

@ -229,6 +229,11 @@ func (DummyTargetRetriever) TargetsDropped() map[string][]*scrape.Target {
return map[string][]*scrape.Target{} return map[string][]*scrape.Target{}
} }
// TargetsDroppedCounts implements targetRetriever.
func (DummyTargetRetriever) TargetsDroppedCounts() map[string]int {
return nil
}
// DummyAlertmanagerRetriever implements AlertmanagerRetriever. // DummyAlertmanagerRetriever implements AlertmanagerRetriever.
type DummyAlertmanagerRetriever struct{} type DummyAlertmanagerRetriever struct{}

19
web/ui/react-app/src/pages/serviceDiscovery/Services.tsx

@ -14,6 +14,7 @@ import SearchBar from '../../components/SearchBar';
interface ServiceMap { interface ServiceMap {
activeTargets: Target[]; activeTargets: Target[];
droppedTargets: DroppedTarget[]; droppedTargets: DroppedTarget[];
droppedTargetCounts: Record<string, number>;
} }
export interface TargetLabels { export interface TargetLabels {
@ -34,7 +35,7 @@ const droppedTargetKVSearch = new KVSearch<DroppedTarget>({
export const processSummary = ( export const processSummary = (
activeTargets: Target[], activeTargets: Target[],
droppedTargets: DroppedTarget[] droppedTargetCounts: Record<string, number>
): Record<string, { active: number; total: number }> => { ): Record<string, { active: number; total: number }> => {
const targets: Record<string, { active: number; total: number }> = {}; const targets: Record<string, { active: number; total: number }> = {};
@ -50,15 +51,15 @@ export const processSummary = (
targets[name].total++; targets[name].total++;
targets[name].active++; targets[name].active++;
} }
for (const target of droppedTargets) { for (const name in targets) {
const { job: name } = target.discoveredLabels;
if (!targets[name]) { if (!targets[name]) {
targets[name] = { targets[name] = {
total: 0, total: droppedTargetCounts[name],
active: 0, active: 0,
}; };
} else {
targets[name].total += droppedTargetCounts[name];
} }
targets[name].total++;
} }
return targets; return targets;
@ -94,10 +95,10 @@ export const processTargets = (activeTargets: Target[], droppedTargets: DroppedT
return labels; return labels;
}; };
export const ServiceDiscoveryContent: FC<ServiceMap> = ({ activeTargets, droppedTargets }) => { export const ServiceDiscoveryContent: FC<ServiceMap> = ({ activeTargets, droppedTargets, droppedTargetCounts }) => {
const [activeTargetList, setActiveTargetList] = useState(activeTargets); const [activeTargetList, setActiveTargetList] = useState(activeTargets);
const [droppedTargetList, setDroppedTargetList] = useState(droppedTargets); const [droppedTargetList, setDroppedTargetList] = useState(droppedTargets);
const [targetList, setTargetList] = useState(processSummary(activeTargets, droppedTargets)); const [targetList, setTargetList] = useState(processSummary(activeTargets, droppedTargetCounts));
const [labelList, setLabelList] = useState(processTargets(activeTargets, droppedTargets)); const [labelList, setLabelList] = useState(processTargets(activeTargets, droppedTargets));
const handleSearchChange = useCallback( const handleSearchChange = useCallback(
@ -118,9 +119,9 @@ export const ServiceDiscoveryContent: FC<ServiceMap> = ({ activeTargets, dropped
const defaultValue = useMemo(getQuerySearchFilter, []); const defaultValue = useMemo(getQuerySearchFilter, []);
useEffect(() => { useEffect(() => {
setTargetList(processSummary(activeTargetList, droppedTargetList)); setTargetList(processSummary(activeTargetList, droppedTargetCounts));
setLabelList(processTargets(activeTargetList, droppedTargetList)); setLabelList(processTargets(activeTargetList, droppedTargetList));
}, [activeTargetList, droppedTargetList]); }, [activeTargetList, droppedTargetList, droppedTargetCounts]);
return ( return (
<> <>

Loading…
Cancel
Save