From 1af81dc5c9f67227aaf7d404b0ffe05355790513 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Mon, 3 Jan 2022 11:13:48 +0100 Subject: [PATCH] Update sent timestamp when write irrecoverably fails. We have an alert that fires when prometheus_remote_storage_highest_timestamp_in_seconds - prometheus_remote_storage_queue_highest_sent_timestamp_seconds becomes too high. But we have an agent that fires this when the remote "rate-limits" the user. This is because prometheus_remote_storage_queue_highest_sent_timestamp_seconds doesn't get updated when the remote sends a 429. I think we should update the metrics, and the change I made makes sense. Because if the requests fails because of connectivity issues, etc. we will never exit the `sendWriteRequestWithBackoff` function. It only exits the function when there is a non-recoverable error, like a bad status code, and in that case, I think the metric needs to be updated. Signed-off-by: Goutham Veeramachaneni --- storage/remote/queue_manager.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/storage/remote/queue_manager.go b/storage/remote/queue_manager.go index 49de2c8a3..05ead4712 100644 --- a/storage/remote/queue_manager.go +++ b/storage/remote/queue_manager.go @@ -1311,12 +1311,11 @@ func (s *shards) sendSamplesWithBackoff(ctx context.Context, samples []prompb.Ti } err = sendWriteRequestWithBackoff(ctx, s.qm.cfg, s.qm.logger, attemptStore, onRetry) - if err != nil { - return err - } + s.qm.metrics.sentBytesTotal.Add(float64(reqSize)) s.qm.metrics.highestSentTimestamp.Set(float64(highest / 1000)) - return nil + + return err } func sendWriteRequestWithBackoff(ctx context.Context, cfg config.QueueConfig, l log.Logger, attempt func(int) error, onRetry func()) error {