mirror of https://github.com/prometheus/prometheus
fix the "failed compaction" metric. (#613)
Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>pull/5805/head
parent
13c80a5979
commit
882162d5b9
|
@ -84,7 +84,6 @@ type LeveledCompactor struct {
|
|||
type compactorMetrics struct {
|
||||
ran prometheus.Counter
|
||||
populatingBlocks prometheus.Gauge
|
||||
failed prometheus.Counter
|
||||
overlappingBlocks prometheus.Counter
|
||||
duration prometheus.Histogram
|
||||
chunkSize prometheus.Histogram
|
||||
|
@ -103,10 +102,6 @@ func newCompactorMetrics(r prometheus.Registerer) *compactorMetrics {
|
|||
Name: "prometheus_tsdb_compaction_populating_block",
|
||||
Help: "Set to 1 when a block is currently being written to the disk.",
|
||||
})
|
||||
m.failed = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: "prometheus_tsdb_compactions_failed_total",
|
||||
Help: "Total number of compactions that failed for the partition.",
|
||||
})
|
||||
m.overlappingBlocks = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: "prometheus_tsdb_vertical_compactions_total",
|
||||
Help: "Total number of compactions done on overlapping blocks.",
|
||||
|
@ -136,7 +131,6 @@ func newCompactorMetrics(r prometheus.Registerer) *compactorMetrics {
|
|||
r.MustRegister(
|
||||
m.ran,
|
||||
m.populatingBlocks,
|
||||
m.failed,
|
||||
m.overlappingBlocks,
|
||||
m.duration,
|
||||
m.chunkRange,
|
||||
|
@ -541,9 +535,6 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blocks ...BlockRe
|
|||
if err := os.RemoveAll(tmp); err != nil {
|
||||
level.Error(c.logger).Log("msg", "removed tmp folder after failed compaction", "err", err.Error())
|
||||
}
|
||||
if err != nil {
|
||||
c.metrics.failed.Inc()
|
||||
}
|
||||
c.metrics.ran.Inc()
|
||||
c.metrics.duration.Observe(time.Since(t).Seconds())
|
||||
}(time.Now())
|
||||
|
|
|
@ -1042,6 +1042,7 @@ func TestDeleteCompactionBlockAfterFailedReload(t *testing.T) {
|
|||
|
||||
testutil.Equals(t, 0.0, prom_testutil.ToFloat64(db.metrics.reloadsFailed), "initial 'failed db reload' count metrics mismatch")
|
||||
testutil.Equals(t, 0.0, prom_testutil.ToFloat64(db.compactor.(*LeveledCompactor).metrics.ran), "initial `compactions` count metric mismatch")
|
||||
testutil.Equals(t, 0.0, prom_testutil.ToFloat64(db.metrics.compactionsFailed), "initial `compactions failed` count metric mismatch")
|
||||
|
||||
// Do the compaction and check the metrics.
|
||||
// Compaction should succeed, but the reload should fail and
|
||||
|
@ -1049,6 +1050,8 @@ func TestDeleteCompactionBlockAfterFailedReload(t *testing.T) {
|
|||
testutil.NotOk(t, db.compact())
|
||||
testutil.Equals(t, 1.0, prom_testutil.ToFloat64(db.metrics.reloadsFailed), "'failed db reload' count metrics mismatch")
|
||||
testutil.Equals(t, 1.0, prom_testutil.ToFloat64(db.compactor.(*LeveledCompactor).metrics.ran), "`compaction` count metric mismatch")
|
||||
testutil.Equals(t, 1.0, prom_testutil.ToFloat64(db.metrics.compactionsFailed), "`compactions failed` count metric mismatch")
|
||||
|
||||
actBlocks, err = blockDirs(db.Dir())
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, expBlocks, len(actBlocks)-1, "block count should be the same as before the compaction") // -1 to exclude the corrupted block.
|
||||
|
|
11
db.go
11
db.go
|
@ -147,6 +147,7 @@ type dbMetrics struct {
|
|||
reloads prometheus.Counter
|
||||
reloadsFailed prometheus.Counter
|
||||
compactionsTriggered prometheus.Counter
|
||||
compactionsFailed prometheus.Counter
|
||||
timeRetentionCount prometheus.Counter
|
||||
compactionsSkipped prometheus.Counter
|
||||
startTime prometheus.GaugeFunc
|
||||
|
@ -191,6 +192,10 @@ func newDBMetrics(db *DB, r prometheus.Registerer) *dbMetrics {
|
|||
Name: "prometheus_tsdb_compactions_triggered_total",
|
||||
Help: "Total number of triggered compactions for the partition.",
|
||||
})
|
||||
m.compactionsFailed = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: "prometheus_tsdb_compactions_failed_total",
|
||||
Help: "Total number of compactions that failed for the partition.",
|
||||
})
|
||||
m.timeRetentionCount = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: "prometheus_tsdb_time_retentions_total",
|
||||
Help: "The number of times that blocks were deleted because the maximum time limit was exceeded.",
|
||||
|
@ -231,6 +236,7 @@ func newDBMetrics(db *DB, r prometheus.Registerer) *dbMetrics {
|
|||
m.reloadsFailed,
|
||||
m.timeRetentionCount,
|
||||
m.compactionsTriggered,
|
||||
m.compactionsFailed,
|
||||
m.startTime,
|
||||
m.tombCleanTimer,
|
||||
m.blocksBytes,
|
||||
|
@ -411,6 +417,11 @@ func (a dbAppender) Commit() error {
|
|||
func (db *DB) compact() (err error) {
|
||||
db.cmtx.Lock()
|
||||
defer db.cmtx.Unlock()
|
||||
defer func() {
|
||||
if err != nil {
|
||||
db.metrics.compactionsFailed.Inc()
|
||||
}
|
||||
}()
|
||||
// Check whether we have pending head blocks that are ready to be persisted.
|
||||
// They have the highest priority.
|
||||
for {
|
||||
|
|
Loading…
Reference in New Issue