diff --git a/tsdb/wal/wal.go b/tsdb/wal/wal.go index b0b693174..510326b88 100644 --- a/tsdb/wal/wal.go +++ b/tsdb/wal/wal.go @@ -187,6 +187,7 @@ type walMetrics struct { truncateFail prometheus.Counter truncateTotal prometheus.Counter currentSegment prometheus.Gauge + writesFailed prometheus.Counter } func newWALMetrics(w *WAL, r prometheus.Registerer) *walMetrics { @@ -217,6 +218,10 @@ func newWALMetrics(w *WAL, r prometheus.Registerer) *walMetrics { Name: "prometheus_tsdb_wal_segment_current", Help: "WAL segment index that TSDB is currently writing to.", }) + m.writesFailed = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "prometheus_tsdb_wal_writes_failed_total", + Help: "Total number of WAL writes that failed.", + }) if r != nil { r.MustRegister( @@ -226,6 +231,7 @@ func newWALMetrics(w *WAL, r prometheus.Registerer) *walMetrics { m.truncateFail, m.truncateTotal, m.currentSegment, + m.writesFailed, ) } @@ -575,6 +581,7 @@ func (w *WAL) Log(recs ...[]byte) error { // a bit of extra logic here frees them from that overhead. for i, r := range recs { if err := w.log(r, i == len(recs)-1); err != nil { + w.metrics.writesFailed.Inc() return err } }