Browse Source

scraping: limit detail on dropped targets, to save memory (#12647)

It's possible (quite common on Kubernetes) to have a service discovery
return thousands of targets then drop most of them in relabel rules.
The main place this data is used is to display in the web UI, where
you don't want thousands of lines of display.

The new limit is `keep_dropped_targets`, which defaults to 0
for backwards-compatibility.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
pull/12684/head
Bryan Boreham 1 year ago committed by GitHub
parent
commit
1e3fef6ab0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 9
      config/config.go
  2. 8
      docs/configuration/configuration.md
  3. 1
      docs/querying/api.md
  4. 5
      documentation/examples/prometheus-kubernetes.yml
  5. 13
      scrape/manager.go
  6. 18
      scrape/scrape.go
  7. 1
      scrape/scrape_test.go
  8. 9
      web/api/v1/api.go
  9. 11
      web/api/v1/api_test.go
  10. 5
      web/api/v1/errors_test.go
  11. 19
      web/ui/react-app/src/pages/serviceDiscovery/Services.tsx

9
config/config.go

@ -409,6 +409,9 @@ type GlobalConfig struct {
// More than this label value length post metric-relabeling will cause the
// scrape to fail. 0 means no limit.
LabelValueLengthLimit uint `yaml:"label_value_length_limit,omitempty"`
// Keep no more than this many dropped targets per job.
// 0 means no limit.
KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
}
// SetDirectory joins any relative file paths with dir.
@ -514,6 +517,9 @@ type ScrapeConfig struct {
// More than this many buckets in a native histogram will cause the scrape to
// fail.
NativeHistogramBucketLimit uint `yaml:"native_histogram_bucket_limit,omitempty"`
// Keep no more than this many dropped targets per job.
// 0 means no limit.
KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
// We cannot do proper Go type embedding below as the parser will then parse
// values arbitrarily into the overflow maps of further-down types.
@ -608,6 +614,9 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error {
if c.LabelValueLengthLimit == 0 {
c.LabelValueLengthLimit = globalConfig.LabelValueLengthLimit
}
if c.KeepDroppedTargets == 0 {
c.KeepDroppedTargets = globalConfig.KeepDroppedTargets
}
return nil
}

8
docs/configuration/configuration.md

@ -106,6 +106,10 @@ global:
# change in the future.
[ target_limit: <int> | default = 0 ]
# Limit per scrape config on the number of targets dropped by relabeling
# that will be kept in memory. 0 means no limit.
[ keep_dropped_targets: <int> | default = 0 ]
# Rule files specifies a list of globs. Rules and alerts are read from
# all matching files.
rule_files:
@ -415,6 +419,10 @@ metric_relabel_configs:
# change in the future.
[ target_limit: <int> | default = 0 ]
# Per-job limit on the number of targets dropped by relabeling
# that will be kept in memory. 0 means no limit.
[ keep_dropped_targets: <int> | default = 0 ]
# Limit on total number of positive and negative buckets allowed in a single
# native histogram. If this is exceeded, the entire scrape will be treated as
# failed. 0 means no limit.

1
docs/querying/api.md

@ -543,6 +543,7 @@ GET /api/v1/targets
```
Both the active and dropped targets are part of the response by default.
Dropped targets are subject to `keep_dropped_targets` limit, if set.
`labels` represents the label set after relabeling has occurred.
`discoveredLabels` represent the unmodified labels retrieved during service discovery before relabeling has occurred.

5
documentation/examples/prometheus-kubernetes.yml

@ -8,6 +8,11 @@
# If you are using Kubernetes 1.7.2 or earlier, please take note of the comments
# for the kubernetes-cadvisor job; you will need to edit or remove this job.
# Keep at most 100 sets of details of targets dropped by relabeling.
# This information is used to display in the UI for troubleshooting.
global:
keep_dropped_targets: 100
# Scrape config for API servers.
#
# Kubernetes exposes API servers as endpoints to the default/kubernetes

13
scrape/manager.go

@ -357,7 +357,7 @@ func (m *Manager) TargetsActive() map[string][]*Target {
return targets
}
// TargetsDropped returns the dropped targets during relabelling.
// TargetsDropped returns the dropped targets during relabelling, subject to KeepDroppedTargets limit.
func (m *Manager) TargetsDropped() map[string][]*Target {
m.mtxScrape.Lock()
defer m.mtxScrape.Unlock()
@ -368,3 +368,14 @@ func (m *Manager) TargetsDropped() map[string][]*Target {
}
return targets
}
func (m *Manager) TargetsDroppedCounts() map[string]int {
m.mtxScrape.Lock()
defer m.mtxScrape.Unlock()
counts := make(map[string]int, len(m.scrapePools))
for tset, sp := range m.scrapePools {
counts[tset] = sp.droppedTargetsCount
}
return counts
}

18
scrape/scrape.go

@ -242,8 +242,9 @@ type scrapePool struct {
targetMtx sync.Mutex
// activeTargets and loops must always be synchronized to have the same
// set of hashes.
activeTargets map[uint64]*Target
droppedTargets []*Target
activeTargets map[uint64]*Target
droppedTargets []*Target // Subject to KeepDroppedTargets limit.
droppedTargetsCount int // Count of all dropped targets.
// Constructor for new scrape loops. This is settable for testing convenience.
newLoop func(scrapeLoopOptions) loop
@ -354,12 +355,19 @@ func (sp *scrapePool) ActiveTargets() []*Target {
return tActive
}
// Return dropped targets, subject to KeepDroppedTargets limit.
func (sp *scrapePool) DroppedTargets() []*Target {
sp.targetMtx.Lock()
defer sp.targetMtx.Unlock()
return sp.droppedTargets
}
func (sp *scrapePool) DroppedTargetsCount() int {
sp.targetMtx.Lock()
defer sp.targetMtx.Unlock()
return sp.droppedTargetsCount
}
// stop terminates all scrape loops and returns after they all terminated.
func (sp *scrapePool) stop() {
sp.mtx.Lock()
@ -506,6 +514,7 @@ func (sp *scrapePool) Sync(tgs []*targetgroup.Group) {
var targets []*Target
lb := labels.NewBuilder(labels.EmptyLabels())
sp.droppedTargets = []*Target{}
sp.droppedTargetsCount = 0
for _, tg := range tgs {
targets, failures := TargetsFromGroup(tg, sp.config, sp.noDefaultPort, targets, lb)
for _, err := range failures {
@ -520,7 +529,10 @@ func (sp *scrapePool) Sync(tgs []*targetgroup.Group) {
case nonEmpty:
all = append(all, t)
case !t.discoveredLabels.IsEmpty():
sp.droppedTargets = append(sp.droppedTargets, t)
if sp.config.KeepDroppedTargets != 0 && uint(len(sp.droppedTargets)) < sp.config.KeepDroppedTargets {
sp.droppedTargets = append(sp.droppedTargets, t)
}
sp.droppedTargetsCount++
}
}
}

1
scrape/scrape_test.go

@ -88,6 +88,7 @@ func TestDroppedTargetsList(t *testing.T) {
SourceLabels: model.LabelNames{"job"},
},
},
KeepDroppedTargets: 1,
}
tgs = []*targetgroup.Group{
{

9
web/api/v1/api.go

@ -100,6 +100,7 @@ type ScrapePoolsRetriever interface {
type TargetRetriever interface {
TargetsActive() map[string][]*scrape.Target
TargetsDropped() map[string][]*scrape.Target
TargetsDroppedCounts() map[string]int
}
// AlertmanagerRetriever provides a list of all/dropped AlertManager URLs.
@ -898,8 +899,9 @@ type DroppedTarget struct {
// TargetDiscovery has all the active targets.
type TargetDiscovery struct {
ActiveTargets []*Target `json:"activeTargets"`
DroppedTargets []*DroppedTarget `json:"droppedTargets"`
ActiveTargets []*Target `json:"activeTargets"`
DroppedTargets []*DroppedTarget `json:"droppedTargets"`
DroppedTargetCounts map[string]int `json:"droppedTargetCounts"`
}
// GlobalURLOptions contains fields used for deriving the global URL for local targets.
@ -1039,6 +1041,9 @@ func (api *API) targets(r *http.Request) apiFuncResult {
} else {
res.ActiveTargets = []*Target{}
}
if showDropped {
res.DroppedTargetCounts = api.targetRetriever(r.Context()).TargetsDroppedCounts()
}
if showDropped {
targetsDropped := api.targetRetriever(r.Context()).TargetsDropped()
droppedKeys, numTargets := sortKeys(targetsDropped)

11
web/api/v1/api_test.go

@ -137,6 +137,14 @@ func (t testTargetRetriever) TargetsDropped() map[string][]*scrape.Target {
return t.droppedTargets
}
func (t testTargetRetriever) TargetsDroppedCounts() map[string]int {
r := make(map[string]int)
for k, v := range t.droppedTargets {
r[k] = len(v)
}
return r
}
func (t *testTargetRetriever) SetMetadataStoreForTargets(identifier string, metadata scrape.MetricMetadataStore) error {
targets, ok := t.activeTargets[identifier]
@ -1384,6 +1392,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E
},
},
},
DroppedTargetCounts: map[string]int{"blackbox": 1},
},
},
{
@ -1436,6 +1445,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E
},
},
},
DroppedTargetCounts: map[string]int{"blackbox": 1},
},
},
{
@ -1498,6 +1508,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E
},
},
},
DroppedTargetCounts: map[string]int{"blackbox": 1},
},
},
// With a matching metric.

5
web/api/v1/errors_test.go

@ -229,6 +229,11 @@ func (DummyTargetRetriever) TargetsDropped() map[string][]*scrape.Target {
return map[string][]*scrape.Target{}
}
// TargetsDroppedCounts implements targetRetriever.
func (DummyTargetRetriever) TargetsDroppedCounts() map[string]int {
return nil
}
// DummyAlertmanagerRetriever implements AlertmanagerRetriever.
type DummyAlertmanagerRetriever struct{}

19
web/ui/react-app/src/pages/serviceDiscovery/Services.tsx

@ -14,6 +14,7 @@ import SearchBar from '../../components/SearchBar';
interface ServiceMap {
activeTargets: Target[];
droppedTargets: DroppedTarget[];
droppedTargetCounts: Record<string, number>;
}
export interface TargetLabels {
@ -34,7 +35,7 @@ const droppedTargetKVSearch = new KVSearch<DroppedTarget>({
export const processSummary = (
activeTargets: Target[],
droppedTargets: DroppedTarget[]
droppedTargetCounts: Record<string, number>
): Record<string, { active: number; total: number }> => {
const targets: Record<string, { active: number; total: number }> = {};
@ -50,15 +51,15 @@ export const processSummary = (
targets[name].total++;
targets[name].active++;
}
for (const target of droppedTargets) {
const { job: name } = target.discoveredLabels;
for (const name in targets) {
if (!targets[name]) {
targets[name] = {
total: 0,
total: droppedTargetCounts[name],
active: 0,
};
} else {
targets[name].total += droppedTargetCounts[name];
}
targets[name].total++;
}
return targets;
@ -94,10 +95,10 @@ export const processTargets = (activeTargets: Target[], droppedTargets: DroppedT
return labels;
};
export const ServiceDiscoveryContent: FC<ServiceMap> = ({ activeTargets, droppedTargets }) => {
export const ServiceDiscoveryContent: FC<ServiceMap> = ({ activeTargets, droppedTargets, droppedTargetCounts }) => {
const [activeTargetList, setActiveTargetList] = useState(activeTargets);
const [droppedTargetList, setDroppedTargetList] = useState(droppedTargets);
const [targetList, setTargetList] = useState(processSummary(activeTargets, droppedTargets));
const [targetList, setTargetList] = useState(processSummary(activeTargets, droppedTargetCounts));
const [labelList, setLabelList] = useState(processTargets(activeTargets, droppedTargets));
const handleSearchChange = useCallback(
@ -118,9 +119,9 @@ export const ServiceDiscoveryContent: FC<ServiceMap> = ({ activeTargets, dropped
const defaultValue = useMemo(getQuerySearchFilter, []);
useEffect(() => {
setTargetList(processSummary(activeTargetList, droppedTargetList));
setTargetList(processSummary(activeTargetList, droppedTargetCounts));
setLabelList(processTargets(activeTargetList, droppedTargetList));
}, [activeTargetList, droppedTargetList]);
}, [activeTargetList, droppedTargetList, droppedTargetCounts]);
return (
<>

Loading…
Cancel
Save