Merge pull request #56389 from jpbetz/metrics-memory-fix

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Reduce memory footprint of admission metrics

Fix #56061

Remove `SummaryVec` from all but the top level metric and reduce buckets for the histogram from 7 to 5.

For a small test cluster, the `Individual Memory Usage: kube-system kube-apiserver-kubernetes-master` grafana metric suggests showed the memory footprint of apiserver at 655MB before this change and 415MB after, suggesting this accounts for the vast majority of the ~200MB of memory increase found in #56061.

```release-note
None
```
pull/6/head
Kubernetes Submit Queue 2017-11-27 00:05:50 -08:00 committed by GitHub
commit 02a7c12cbd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 31 additions and 21 deletions

View File

@ -32,7 +32,8 @@ const (
)
var (
latencyBuckets = prometheus.ExponentialBuckets(25000, 2.0, 7)
// Use buckets ranging from 25 ms to ~2.5 seconds.
latencyBuckets = prometheus.ExponentialBuckets(25000, 2.5, 5)
latencySummaryMaxAge = 5 * time.Hour
// Metrics provides access to all admission metrics.
@ -112,17 +113,17 @@ func newAdmissionMetrics() *AdmissionMetrics {
// Each step is identified by a distinct type label value.
step := newMetricSet("step",
[]string{"type", "operation", "group", "version", "resource", "subresource", "rejected"},
"Admission sub-step %s, broken out for each operation and API resource and step type (validate or admit).")
"Admission sub-step %s, broken out for each operation and API resource and step type (validate or admit).", true)
// Built-in admission controller metrics. Each admission controller is identified by name.
controller := newMetricSet("controller",
[]string{"name", "type", "operation", "group", "version", "resource", "subresource", "rejected"},
"Admission controller %s, identified by name and broken out for each operation and API resource and type (validate or admit).")
"Admission controller %s, identified by name and broken out for each operation and API resource and type (validate or admit).", false)
// Admission webhook metrics. Each webhook is identified by name.
webhook := newMetricSet("webhook",
[]string{"name", "type", "operation", "group", "version", "resource", "subresource", "rejected"},
"Admission webhook %s, identified by name and broken out for each operation and API resource and type (validate or admit).")
"Admission webhook %s, identified by name and broken out for each operation and API resource and type (validate or admit).", false)
step.mustRegister()
controller.mustRegister()
@ -159,7 +160,21 @@ type metricSet struct {
latenciesSummary *prometheus.SummaryVec
}
func newMetricSet(name string, labels []string, helpTemplate string) *metricSet {
func newMetricSet(name string, labels []string, helpTemplate string, hasSummary bool) *metricSet {
var summary *prometheus.SummaryVec
if hasSummary {
summary = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: fmt.Sprintf("%s_admission_latencies_seconds_summary", name),
Help: fmt.Sprintf(helpTemplate, "latency summary"),
MaxAge: latencySummaryMaxAge,
},
labels,
)
}
return &metricSet{
latencies: prometheus.NewHistogramVec(
prometheus.HistogramOpts{
@ -171,34 +186,32 @@ func newMetricSet(name string, labels []string, helpTemplate string) *metricSet
},
labels,
),
latenciesSummary: prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: fmt.Sprintf("%s_admission_latencies_seconds_summary", name),
Help: fmt.Sprintf(helpTemplate, "latency summary"),
MaxAge: latencySummaryMaxAge,
},
labels,
),
latenciesSummary: summary,
}
}
// MustRegister registers all the prometheus metrics in the metricSet.
func (m *metricSet) mustRegister() {
prometheus.MustRegister(m.latencies)
prometheus.MustRegister(m.latenciesSummary)
if m.latenciesSummary != nil {
prometheus.MustRegister(m.latenciesSummary)
}
}
// Reset resets all the prometheus metrics in the metricSet.
func (m *metricSet) reset() {
m.latencies.Reset()
m.latenciesSummary.Reset()
if m.latenciesSummary != nil {
m.latenciesSummary.Reset()
}
}
// Observe records an observed admission event to all metrics in the metricSet.
func (m *metricSet) observe(elapsed time.Duration, labels ...string) {
elapsedMicroseconds := float64(elapsed / time.Microsecond)
m.latencies.WithLabelValues(labels...).Observe(elapsedMicroseconds)
m.latenciesSummary.WithLabelValues(labels...).Observe(elapsedMicroseconds)
if m.latenciesSummary != nil {
m.latenciesSummary.WithLabelValues(labels...).Observe(elapsedMicroseconds)
}
}

View File

@ -69,11 +69,9 @@ func TestObserveAdmissionController(t *testing.T) {
"rejected": "false",
}
expectHistogramCountTotal(t, "apiserver_admission_controller_admission_latencies_seconds", wantLabels, 1)
expectFindMetric(t, "apiserver_admission_controller_admission_latencies_seconds_summary", wantLabels)
wantLabels["type"] = "validate"
expectHistogramCountTotal(t, "apiserver_admission_controller_admission_latencies_seconds", wantLabels, 1)
expectFindMetric(t, "apiserver_admission_controller_admission_latencies_seconds_summary", wantLabels)
}
func TestObserveWebhook(t *testing.T) {
@ -90,7 +88,6 @@ func TestObserveWebhook(t *testing.T) {
"rejected": "false",
}
expectHistogramCountTotal(t, "apiserver_admission_webhook_admission_latencies_seconds", wantLabels, 1)
expectFindMetric(t, "apiserver_admission_webhook_admission_latencies_seconds_summary", wantLabels)
}
func TestWithMetrics(t *testing.T) {