Merge pull request #15428 from prometheus/beorn7/metrics

notifier: fix increment of metric prometheus_notifications_errors_total
pull/15468/head
Björn Rabenstein 2024-11-26 17:38:59 +01:00 committed by GitHub
commit b9dd95f499
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 7 additions and 6 deletions

View File

@ -2,6 +2,7 @@
## unreleased ## unreleased
* [CHANGE] Notifier: Increment the prometheus_notifications_errors_total metric by the number of affected alerts rather than by one per batch of affected alerts. #15428
* [ENHANCEMENT] OTLP receiver: Convert also metric metadata. #15416 * [ENHANCEMENT] OTLP receiver: Convert also metric metadata. #15416
## 3.0.0 / 2024-11-14 ## 3.0.0 / 2024-11-14

View File

@ -84,8 +84,8 @@
severity: 'warning', severity: 'warning',
}, },
annotations: { annotations: {
summary: 'Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.', summary: 'More than 1% of alerts sent by Prometheus to a specific Alertmanager were affected by errors.',
description: '{{ printf "%%.1f" $value }}%% errors while sending alerts from Prometheus %(prometheusName)s to Alertmanager {{$labels.alertmanager}}.' % $._config, description: '{{ printf "%%.1f" $value }}%% of alerts sent by Prometheus %(prometheusName)s to Alertmanager {{$labels.alertmanager}} were affected by errors.' % $._config,
}, },
}, },
{ {

View File

@ -160,7 +160,7 @@ func newAlertMetrics(r prometheus.Registerer, queueCap int, queueLen, alertmanag
Namespace: namespace, Namespace: namespace,
Subsystem: subsystem, Subsystem: subsystem,
Name: "errors_total", Name: "errors_total",
Help: "Total number of errors sending alert notifications.", Help: "Total number of sent alerts affected by errors.",
}, },
[]string{alertmanagerLabel}, []string{alertmanagerLabel},
), ),
@ -619,13 +619,13 @@ func (n *Manager) sendAll(alerts ...*Alert) bool {
go func(ctx context.Context, client *http.Client, url string, payload []byte, count int) { go func(ctx context.Context, client *http.Client, url string, payload []byte, count int) {
if err := n.sendOne(ctx, client, url, payload); err != nil { if err := n.sendOne(ctx, client, url, payload); err != nil {
n.logger.Error("Error sending alert", "alertmanager", url, "count", count, "err", err) n.logger.Error("Error sending alerts", "alertmanager", url, "count", count, "err", err)
n.metrics.errors.WithLabelValues(url).Inc() n.metrics.errors.WithLabelValues(url).Add(float64(count))
} else { } else {
numSuccess.Inc() numSuccess.Inc()
} }
n.metrics.latency.WithLabelValues(url).Observe(time.Since(begin).Seconds()) n.metrics.latency.WithLabelValues(url).Observe(time.Since(begin).Seconds())
n.metrics.sent.WithLabelValues(url).Add(float64(len(amAlerts))) n.metrics.sent.WithLabelValues(url).Add(float64(count))
wg.Done() wg.Done()
}(ctx, ams.client, am.url().String(), payload, len(amAlerts)) }(ctx, ams.client, am.url().String(), payload, len(amAlerts))