mirror of https://github.com/prometheus/prometheus
Introduced some options for compactor concurrency (#66)
* Tool for CLI compactions. * Use concurrency when populating symbols for multiple blocks. * Use concurrency when writing to multiple output blocks. Signed-off-by: Peter Štibraný <pstibrany@gmail.com>owilliams/utf8-02-mimir
parent
415354aeb8
commit
cc9bc8fe9f
@ -0,0 +1,96 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"flag"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"os/signal"
|
||||||
|
"runtime/pprof"
|
||||||
|
"syscall"
|
||||||
|
|
||||||
|
golog "github.com/go-kit/log"
|
||||||
|
|
||||||
|
"github.com/prometheus/prometheus/tsdb"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
var (
|
||||||
|
outputDir string
|
||||||
|
shardCount int
|
||||||
|
cpuProf string
|
||||||
|
segmentSizeMB int64
|
||||||
|
maxClosingBlocks int
|
||||||
|
symbolFlushers int
|
||||||
|
)
|
||||||
|
|
||||||
|
flag.StringVar(&outputDir, "output-dir", ".", "Output directory for new block(s)")
|
||||||
|
flag.StringVar(&cpuProf, "cpuprofile", "", "Where to store CPU profile (it not empty)")
|
||||||
|
flag.IntVar(&shardCount, "shard-count", 1, "Number of shards for splitting")
|
||||||
|
flag.Int64Var(&segmentSizeMB, "segment-file-size", 512, "Size of segment file")
|
||||||
|
flag.IntVar(&maxClosingBlocks, "max-closing-blocks", 2, "Number of blocks that can close at once during split compaction")
|
||||||
|
flag.IntVar(&symbolFlushers, "symbol-flushers", 4, "Number of symbol flushers used during split compaction")
|
||||||
|
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
logger := golog.NewLogfmtLogger(os.Stderr)
|
||||||
|
|
||||||
|
var blockDirs []string
|
||||||
|
var blocks []*tsdb.Block
|
||||||
|
for _, d := range flag.Args() {
|
||||||
|
s, err := os.Stat(d)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
if !s.IsDir() {
|
||||||
|
log.Fatalln("not a directory: ", d)
|
||||||
|
}
|
||||||
|
|
||||||
|
blockDirs = append(blockDirs, d)
|
||||||
|
|
||||||
|
b, err := tsdb.OpenBlock(logger, d, nil)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalln("failed to open block:", d, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
blocks = append(blocks, b)
|
||||||
|
defer b.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(blockDirs) == 0 {
|
||||||
|
log.Fatalln("no blocks to compact")
|
||||||
|
}
|
||||||
|
|
||||||
|
if cpuProf != "" {
|
||||||
|
f, err := os.Create(cpuProf)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalln(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Println("writing to", cpuProf)
|
||||||
|
err = pprof.StartCPUProfile(f)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalln(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
defer pprof.StopCPUProfile()
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
c, err := tsdb.NewLeveledCompactorWithChunkSize(ctx, nil, logger, []int64{0}, nil, segmentSizeMB*1024*1024, nil)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalln("creating compator", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
opts := tsdb.DefaultConcurrencyOptions()
|
||||||
|
opts.MaxClosingBlocks = maxClosingBlocks
|
||||||
|
opts.SymbolsFlushersCount = symbolFlushers
|
||||||
|
c.SetConcurrencyOptions(opts)
|
||||||
|
|
||||||
|
_, err = c.CompactWithSplitting(outputDir, blockDirs, blocks, uint64(shardCount))
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalln("compacting", err)
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,166 @@
|
|||||||
|
package tsdb
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/pkg/errors"
|
||||||
|
"go.uber.org/atomic"
|
||||||
|
"golang.org/x/sync/semaphore"
|
||||||
|
|
||||||
|
"github.com/prometheus/prometheus/model/labels"
|
||||||
|
"github.com/prometheus/prometheus/storage"
|
||||||
|
"github.com/prometheus/prometheus/tsdb/chunkenc"
|
||||||
|
"github.com/prometheus/prometheus/tsdb/chunks"
|
||||||
|
)
|
||||||
|
|
||||||
|
// asyncBlockWriter runs a background goroutine that writes series and chunks to the block asynchronously.
|
||||||
|
type asyncBlockWriter struct {
|
||||||
|
chunkPool chunkenc.Pool // Where to return chunks after writing.
|
||||||
|
|
||||||
|
chunkw ChunkWriter
|
||||||
|
indexw IndexWriter
|
||||||
|
|
||||||
|
closeSemaphore *semaphore.Weighted
|
||||||
|
|
||||||
|
seriesChan chan seriesToWrite
|
||||||
|
finishedCh chan asyncBlockWriterResult
|
||||||
|
|
||||||
|
closed bool
|
||||||
|
result asyncBlockWriterResult
|
||||||
|
}
|
||||||
|
|
||||||
|
type asyncBlockWriterResult struct {
|
||||||
|
stats BlockStats
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
type seriesToWrite struct {
|
||||||
|
lbls labels.Labels
|
||||||
|
chks []chunks.Meta
|
||||||
|
}
|
||||||
|
|
||||||
|
func newAsyncBlockWriter(chunkPool chunkenc.Pool, chunkw ChunkWriter, indexw IndexWriter, closeSema *semaphore.Weighted) *asyncBlockWriter {
|
||||||
|
bw := &asyncBlockWriter{
|
||||||
|
chunkPool: chunkPool,
|
||||||
|
chunkw: chunkw,
|
||||||
|
indexw: indexw,
|
||||||
|
seriesChan: make(chan seriesToWrite, 64),
|
||||||
|
finishedCh: make(chan asyncBlockWriterResult, 1),
|
||||||
|
closeSemaphore: closeSema,
|
||||||
|
}
|
||||||
|
|
||||||
|
go bw.loop()
|
||||||
|
return bw
|
||||||
|
}
|
||||||
|
|
||||||
|
// loop doing the writes. Return value is only used by defer statement, and is sent to the channel,
|
||||||
|
// before closing it.
|
||||||
|
func (bw *asyncBlockWriter) loop() (res asyncBlockWriterResult) {
|
||||||
|
defer func() {
|
||||||
|
bw.finishedCh <- res
|
||||||
|
close(bw.finishedCh)
|
||||||
|
}()
|
||||||
|
|
||||||
|
stats := BlockStats{}
|
||||||
|
ref := storage.SeriesRef(0)
|
||||||
|
for sw := range bw.seriesChan {
|
||||||
|
if err := bw.chunkw.WriteChunks(sw.chks...); err != nil {
|
||||||
|
return asyncBlockWriterResult{err: errors.Wrap(err, "write chunks")}
|
||||||
|
}
|
||||||
|
if err := bw.indexw.AddSeries(ref, sw.lbls, sw.chks...); err != nil {
|
||||||
|
return asyncBlockWriterResult{err: errors.Wrap(err, "add series")}
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.NumChunks += uint64(len(sw.chks))
|
||||||
|
stats.NumSeries++
|
||||||
|
for _, chk := range sw.chks {
|
||||||
|
stats.NumSamples += uint64(chk.Chunk.NumSamples())
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, chk := range sw.chks {
|
||||||
|
if err := bw.chunkPool.Put(chk.Chunk); err != nil {
|
||||||
|
return asyncBlockWriterResult{err: errors.Wrap(err, "put chunk")}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ref++
|
||||||
|
}
|
||||||
|
|
||||||
|
err := bw.closeSemaphore.Acquire(context.Background(), 1)
|
||||||
|
if err != nil {
|
||||||
|
return asyncBlockWriterResult{err: errors.Wrap(err, "failed to acquire semaphore before closing writers")}
|
||||||
|
}
|
||||||
|
defer bw.closeSemaphore.Release(1)
|
||||||
|
|
||||||
|
// If everything went fine with writing so far, close writers.
|
||||||
|
if err := bw.chunkw.Close(); err != nil {
|
||||||
|
return asyncBlockWriterResult{err: errors.Wrap(err, "closing chunk writer")}
|
||||||
|
}
|
||||||
|
if err := bw.indexw.Close(); err != nil {
|
||||||
|
return asyncBlockWriterResult{err: errors.Wrap(err, "closing index writer")}
|
||||||
|
}
|
||||||
|
|
||||||
|
return asyncBlockWriterResult{stats: stats}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (bw *asyncBlockWriter) addSeries(lbls labels.Labels, chks []chunks.Meta) error {
|
||||||
|
select {
|
||||||
|
case bw.seriesChan <- seriesToWrite{lbls: lbls, chks: chks}:
|
||||||
|
return nil
|
||||||
|
case result, ok := <-bw.finishedCh:
|
||||||
|
if ok {
|
||||||
|
bw.result = result
|
||||||
|
}
|
||||||
|
return fmt.Errorf("asyncBlockWriter doesn't run anymore")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (bw *asyncBlockWriter) closeAsync() {
|
||||||
|
if !bw.closed {
|
||||||
|
bw.closed = true
|
||||||
|
|
||||||
|
close(bw.seriesChan)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (bw *asyncBlockWriter) waitFinished() (BlockStats, error) {
|
||||||
|
// Wait for flusher to finish.
|
||||||
|
result, ok := <-bw.finishedCh
|
||||||
|
if ok {
|
||||||
|
bw.result = result
|
||||||
|
}
|
||||||
|
|
||||||
|
return bw.result.stats, bw.result.err
|
||||||
|
}
|
||||||
|
|
||||||
|
type preventDoubleCloseIndexWriter struct {
|
||||||
|
IndexWriter
|
||||||
|
closed atomic.Bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func newPreventDoubleCloseIndexWriter(iw IndexWriter) *preventDoubleCloseIndexWriter {
|
||||||
|
return &preventDoubleCloseIndexWriter{IndexWriter: iw}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *preventDoubleCloseIndexWriter) Close() error {
|
||||||
|
if p.closed.CAS(false, true) {
|
||||||
|
return p.IndexWriter.Close()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type preventDoubleCloseChunkWriter struct {
|
||||||
|
ChunkWriter
|
||||||
|
closed atomic.Bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func newPreventDoubleCloseChunkWriter(cw ChunkWriter) *preventDoubleCloseChunkWriter {
|
||||||
|
return &preventDoubleCloseChunkWriter{ChunkWriter: cw}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *preventDoubleCloseChunkWriter) Close() error {
|
||||||
|
if p.closed.CAS(false, true) {
|
||||||
|
return p.ChunkWriter.Close()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
Loading…
Reference in new issue