prometheus/tsdb/head.go

// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package tsdb

import (
	"fmt"
	"math"
	"runtime"
	"sort"
	"strings"
	"sync"
	"sync/atomic"
	"time"

	"github.com/go-kit/kit/log"
	"github.com/go-kit/kit/log/level"
	"github.com/oklog/ulid"
	"github.com/pkg/errors"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/prometheus/tsdb/chunkenc"
	"github.com/prometheus/prometheus/tsdb/chunks"
	"github.com/prometheus/prometheus/tsdb/encoding"
	"github.com/prometheus/prometheus/tsdb/index"
	"github.com/prometheus/prometheus/tsdb/labels"
	"github.com/prometheus/prometheus/tsdb/record"
	"github.com/prometheus/prometheus/tsdb/tombstones"
	"github.com/prometheus/prometheus/tsdb/wal"
)

var (
	// ErrNotFound is returned if a looked up resource was not found.
	ErrNotFound = errors.Errorf("not found")

	// ErrOutOfOrderSample is returned if an appended sample has a
	// timestamp smaller than the most recent sample.
	ErrOutOfOrderSample = errors.New("out of order sample")

	// ErrAmendSample is returned if an appended sample has the same timestamp
	// as the most recent sample but a different value.
	ErrAmendSample = errors.New("amending sample")

	// ErrOutOfBounds is returned if an appended sample is out of the
	// writable time range.
	ErrOutOfBounds = errors.New("out of bounds")

	// emptyTombstoneReader is a no-op Tombstone Reader.
	// This is used by head to satisfy the Tombstones() function call.
	emptyTombstoneReader = tombstones.NewMemTombstones()
)

// Head handles reads and writes of time series data within a time window.
type Head struct {
	chunkRange int64
	metrics    *headMetrics
	wal        *wal.WAL
	logger     log.Logger
	appendPool sync.Pool
	seriesPool sync.Pool
	bytesPool  sync.Pool
	numSeries  uint64

	minTime, maxTime int64 // Current min and max of the samples included in the head.
	minValidTime     int64 // Mint allowed to be added to the head. It shouldn't be lower than the maxt of the last persisted block.
	lastSeriesID     uint64

	// All series addressable by their ID or hash.
	series *stripeSeries

	symMtx  sync.RWMutex
	symbols map[string]struct{}
	values  map[string]stringset // label names to possible values

	deletedMtx sync.Mutex
	deleted    map[uint64]int // Deleted series, and what WAL segment they must be kept until.

	postings *index.MemPostings // postings lists for terms
}

type headMetrics struct {
	activeAppenders         prometheus.Gauge
	series                  prometheus.GaugeFunc
	seriesCreated           prometheus.Counter
	seriesRemoved           prometheus.Counter
	seriesNotFound          prometheus.Counter
	chunks                  prometheus.Gauge
	chunksCreated           prometheus.Counter
	chunksRemoved           prometheus.Counter
	gcDuration              prometheus.Summary
	minTime                 prometheus.GaugeFunc
	maxTime                 prometheus.GaugeFunc
	samplesAppended         prometheus.Counter
	walTruncateDuration     prometheus.Summary
	walCorruptionsTotal     prometheus.Counter
	headTruncateFail        prometheus.Counter
	headTruncateTotal       prometheus.Counter
	checkpointDeleteFail    prometheus.Counter
	checkpointDeleteTotal   prometheus.Counter
	checkpointCreationFail  prometheus.Counter
	checkpointCreationTotal prometheus.Counter
}

func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
	m := &headMetrics{}

	m.activeAppenders = prometheus.NewGauge(prometheus.GaugeOpts{
		Name: "prometheus_tsdb_head_active_appenders",
		Help: "Number of currently active appender transactions",
	})
	m.series = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
		Name: "prometheus_tsdb_head_series",
		Help: "Total number of series in the head block.",
	}, func() float64 {
		return float64(h.NumSeries())
	})
	m.seriesCreated = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "prometheus_tsdb_head_series_created_total",
		Help: "Total number of series created in the head",
	})
	m.seriesRemoved = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "prometheus_tsdb_head_series_removed_total",
		Help: "Total number of series removed in the head",
	})
	m.seriesNotFound = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "prometheus_tsdb_head_series_not_found_total",
		Help: "Total number of requests for series that were not found.",
	})
	m.chunks = prometheus.NewGauge(prometheus.GaugeOpts{
		Name: "prometheus_tsdb_head_chunks",
		Help: "Total number of chunks in the head block.",
	})
	m.chunksCreated = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "prometheus_tsdb_head_chunks_created_total",
		Help: "Total number of chunks created in the head",
	})
	m.chunksRemoved = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "prometheus_tsdb_head_chunks_removed_total",
		Help: "Total number of chunks removed in the head",
	})
	m.gcDuration = prometheus.NewSummary(prometheus.SummaryOpts{
		Name:       "prometheus_tsdb_head_gc_duration_seconds",
		Help:       "Runtime of garbage collection in the head block.",
		Objectives: map[float64]float64{},
	})
	m.maxTime = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
		Name: "prometheus_tsdb_head_max_time",
		Help: "Maximum timestamp of the head block. The unit is decided by the library consumer.",
	}, func() float64 {
		return float64(h.MaxTime())
	})
	m.minTime = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
		Name: "prometheus_tsdb_head_min_time",
		Help: "Minimum time bound of the head block. The unit is decided by the library consumer.",
	}, func() float64 {
		return float64(h.MinTime())
	})
	m.walTruncateDuration = prometheus.NewSummary(prometheus.SummaryOpts{
		Name:       "prometheus_tsdb_wal_truncate_duration_seconds",
		Help:       "Duration of WAL truncation.",
		Objectives: map[float64]float64{},
	})
	m.walCorruptionsTotal = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "prometheus_tsdb_wal_corruptions_total",
		Help: "Total number of WAL corruptions.",
	})
	m.samplesAppended = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "prometheus_tsdb_head_samples_appended_total",
		Help: "Total number of appended samples.",
	})
	m.headTruncateFail = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "prometheus_tsdb_head_truncations_failed_total",
		Help: "Total number of head truncations that failed.",
	})
	m.headTruncateTotal = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "prometheus_tsdb_head_truncations_total",
		Help: "Total number of head truncations attempted.",
	})
	m.checkpointDeleteFail = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "prometheus_tsdb_checkpoint_deletions_failed_total",
		Help: "Total number of checkpoint deletions that failed.",
	})
	m.checkpointDeleteTotal = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "prometheus_tsdb_checkpoint_deletions_total",
		Help: "Total number of checkpoint deletions attempted.",
	})
	m.checkpointCreationFail = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "prometheus_tsdb_checkpoint_creations_failed_total",
		Help: "Total number of checkpoint creations that failed.",
	})
	m.checkpointCreationTotal = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "prometheus_tsdb_checkpoint_creations_total",
		Help: "Total number of checkpoint creations attempted.",
	})

	if r != nil {
		r.MustRegister(
			m.activeAppenders,
			m.chunks,
			m.chunksCreated,
			m.chunksRemoved,
			m.series,
			m.seriesCreated,
			m.seriesRemoved,
			m.seriesNotFound,
			m.minTime,
			m.maxTime,
			m.gcDuration,
			m.walTruncateDuration,
			m.walCorruptionsTotal,
			m.samplesAppended,
			m.headTruncateFail,
			m.headTruncateTotal,
			m.checkpointDeleteFail,
			m.checkpointDeleteTotal,
			m.checkpointCreationFail,
			m.checkpointCreationTotal,
		)
	}
	return m
}

// NewHead opens the head block in dir.
func NewHead(r prometheus.Registerer, l log.Logger, wal *wal.WAL, chunkRange int64) (*Head, error) {
	if l == nil {
		l = log.NewNopLogger()
	}
	if chunkRange < 1 {
		return nil, errors.Errorf("invalid chunk range %d", chunkRange)
	}
	h := &Head{
		wal:        wal,
		logger:     l,
		chunkRange: chunkRange,
		minTime:    math.MaxInt64,
		maxTime:    math.MinInt64,
		series:     newStripeSeries(),
		values:     map[string]stringset{},
		symbols:    map[string]struct{}{},
		postings:   index.NewUnorderedMemPostings(),
		deleted:    map[uint64]int{},
	}
	h.metrics = newHeadMetrics(h, r)

	return h, nil
}

// processWALSamples adds a partition of samples it receives to the head and passes
// them on to other workers.
// Samples before the mint timestamp are discarded.
func (h *Head) processWALSamples(
	minValidTime int64,
	input <-chan []record.RefSample, output chan<- []record.RefSample,
) (unknownRefs uint64) {
	defer close(output)

	// Mitigate lock contention in getByID.
	refSeries := map[uint64]*memSeries{}

	mint, maxt := int64(math.MaxInt64), int64(math.MinInt64)

	for samples := range input {
		for _, s := range samples {
			if s.T < minValidTime {
				continue
			}
			ms := refSeries[s.Ref]
			if ms == nil {
				ms = h.series.getByID(s.Ref)
				if ms == nil {
					unknownRefs++
					continue
				}
				refSeries[s.Ref] = ms
			}
			_, chunkCreated := ms.append(s.T, s.V)
			if chunkCreated {
				h.metrics.chunksCreated.Inc()
				h.metrics.chunks.Inc()
			}
			if s.T > maxt {
				maxt = s.T
			}
			if s.T < mint {
				mint = s.T
			}
		}
		output <- samples
	}
	h.updateMinMaxTime(mint, maxt)

	return unknownRefs
}

func (h *Head) updateMinMaxTime(mint, maxt int64) {
	for {
		lt := h.MinTime()
		if mint >= lt {
			break
		}
		if atomic.CompareAndSwapInt64(&h.minTime, lt, mint) {
			break
		}
	}
	for {
		ht := h.MaxTime()
		if maxt <= ht {
			break
		}
		if atomic.CompareAndSwapInt64(&h.maxTime, ht, maxt) {
			break
		}
	}
}

func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
	// Track number of samples that referenced a series we don't know about
	// for error reporting.
	var unknownRefs uint64

	// Start workers that each process samples for a partition of the series ID space.
	// They are connected through a ring of channels which ensures that all sample batches
	// read from the WAL are processed in order.
	var (
		wg      sync.WaitGroup
		n       = runtime.GOMAXPROCS(0)
		inputs  = make([]chan []record.RefSample, n)
		outputs = make([]chan []record.RefSample, n)
	)
	wg.Add(n)

	defer func() {
		// For CorruptionErr ensure to terminate all workers before exiting.
		if _, ok := err.(*wal.CorruptionErr); ok {
			for i := 0; i < n; i++ {
				close(inputs[i])
				for range outputs[i] {
				}
			}
			wg.Wait()
		}
	}()

	for i := 0; i < n; i++ {
		outputs[i] = make(chan []record.RefSample, 300)
		inputs[i] = make(chan []record.RefSample, 300)

		go func(input <-chan []record.RefSample, output chan<- []record.RefSample) {
			unknown := h.processWALSamples(h.minValidTime, input, output)
			atomic.AddUint64(&unknownRefs, unknown)
			wg.Done()
		}(inputs[i], outputs[i])
	}

	var (
		dec       record.Decoder
		series    []record.RefSeries
		samples   []record.RefSample
		tstones   []tombstones.Stone
		allStones = tombstones.NewMemTombstones()
		shards    = make([][]record.RefSample, n)
	)
	defer func() {
		if err := allStones.Close(); err != nil {
			level.Warn(h.logger).Log("msg", "closing  memTombstones during wal read", "err", err)
		}
	}()
	for r.Next() {
		series, samples, tstones = series[:0], samples[:0], tstones[:0]
		rec := r.Record()

		switch dec.Type(rec) {
		case record.Series:
			series, err = dec.Series(rec, series)
			if err != nil {
				return &wal.CorruptionErr{
					Err:     errors.Wrap(err, "decode series"),
					Segment: r.Segment(),
					Offset:  r.Offset(),
				}
			}
			for _, s := range series {
				series, created := h.getOrCreateWithID(s.Ref, s.Labels.Hash(), s.Labels)

				if !created {
					// There's already a different ref for this series.
					multiRef[s.Ref] = series.ref
				}

				if h.lastSeriesID < s.Ref {
					h.lastSeriesID = s.Ref
				}
			}
		case record.Samples:
			samples, err = dec.Samples(rec, samples)
			s := samples
			if err != nil {
				return &wal.CorruptionErr{
					Err:     errors.Wrap(err, "decode samples"),
					Segment: r.Segment(),
					Offset:  r.Offset(),
				}
			}
			// We split up the samples into chunks of 5000 samples or less.
			// With O(300 * #cores) in-flight sample batches, large scrapes could otherwise
			// cause thousands of very large in flight buffers occupying large amounts
			// of unused memory.
			for len(samples) > 0 {
				m := 5000
				if len(samples) < m {
					m = len(samples)
				}
				for i := 0; i < n; i++ {
					var buf []record.RefSample
					select {
					case buf = <-outputs[i]:
					default:
					}
					shards[i] = buf[:0]
				}
				for _, sam := range samples[:m] {
					if r, ok := multiRef[sam.Ref]; ok {
						sam.Ref = r
					}
					mod := sam.Ref % uint64(n)
					shards[mod] = append(shards[mod], sam)
				}
				for i := 0; i < n; i++ {
					inputs[i] <- shards[i]
				}
				samples = samples[m:]
			}
			samples = s // Keep whole slice for reuse.
		case record.Tombstones:
			tstones, err = dec.Tombstones(rec, tstones)
			if err != nil {
				return &wal.CorruptionErr{
					Err:     errors.Wrap(err, "decode tombstones"),
					Segment: r.Segment(),
					Offset:  r.Offset(),
				}
			}
			for _, s := range tstones {
				for _, itv := range s.Intervals {
					if itv.Maxt < h.minValidTime {
						continue
					}
					if m := h.series.getByID(s.Ref); m == nil {
						unknownRefs++
						continue
					}
					allStones.AddInterval(s.Ref, itv)
				}
			}
		default:
			return &wal.CorruptionErr{
				Err:     errors.Errorf("invalid record type %v", dec.Type(rec)),
				Segment: r.Segment(),
				Offset:  r.Offset(),
			}
		}
	}

	// Signal termination to each worker and wait for it to close its output channel.
	for i := 0; i < n; i++ {
		close(inputs[i])
		for range outputs[i] {
		}
	}
	wg.Wait()

	if r.Err() != nil {
		return errors.Wrap(r.Err(), "read records")
	}

	if err := allStones.Iter(func(ref uint64, dranges tombstones.Intervals) error {
		return h.chunkRewrite(ref, dranges)
	}); err != nil {
		return errors.Wrap(r.Err(), "deleting samples from tombstones")
	}

	if unknownRefs > 0 {
		level.Warn(h.logger).Log("msg", "unknown series references", "count", unknownRefs)
	}
	return nil
}

// Init loads data from the write ahead log and prepares the head for writes.
// It should be called before using an appender so that
// limits the ingested samples to the head min valid time.
func (h *Head) Init(minValidTime int64) error {
	h.minValidTime = minValidTime
	defer h.postings.EnsureOrder()
	defer h.gc() // After loading the wal remove the obsolete data from the head.

	if h.wal == nil {
		return nil
	}

	level.Info(h.logger).Log("msg", "replaying WAL, this may take awhile")
	// Backfill the checkpoint first if it exists.
	dir, startFrom, err := wal.LastCheckpoint(h.wal.Dir())
	if err != nil && err != record.ErrNotFound {
		return errors.Wrap(err, "find last checkpoint")
	}
	multiRef := map[uint64]uint64{}
	if err == nil {
		sr, err := wal.NewSegmentsReader(dir)
		if err != nil {
			return errors.Wrap(err, "open checkpoint")
		}
		defer func() {
			if err := sr.Close(); err != nil {
				level.Warn(h.logger).Log("msg", "error while closing the wal segments reader", "err", err)
			}
		}()

		// A corrupted checkpoint is a hard error for now and requires user
		// intervention. There's likely little data that can be recovered anyway.
		if err := h.loadWAL(wal.NewReader(sr), multiRef); err != nil {
			return errors.Wrap(err, "backfill checkpoint")
		}
		startFrom++
		level.Info(h.logger).Log("msg", "WAL checkpoint loaded")
	}

	// Find the last segment.
	_, last, err := h.wal.Segments()
	if err != nil {
		return errors.Wrap(err, "finding WAL segments")
	}

	// Backfill segments from the most recent checkpoint onwards.
	for i := startFrom; i <= last; i++ {
		s, err := wal.OpenReadSegment(wal.SegmentName(h.wal.Dir(), i))
		if err != nil {
			return errors.Wrap(err, fmt.Sprintf("open WAL segment: %d", i))
		}

		sr := wal.NewSegmentBufReader(s)
		err = h.loadWAL(wal.NewReader(sr), multiRef)
		if err := sr.Close(); err != nil {
			level.Warn(h.logger).Log("msg", "error while closing the wal segments reader", "err", err)
		}
		if err != nil {
			return err
		}
		level.Info(h.logger).Log("msg", "WAL segment loaded", "segment", i, "maxSegment", last)
	}

	return nil
}

// Truncate removes old data before mint from the head.
func (h *Head) Truncate(mint int64) (err error) {
	defer func() {
		if err != nil {
			h.metrics.headTruncateFail.Inc()
		}
	}()
	initialize := h.MinTime() == math.MaxInt64

	if h.MinTime() >= mint && !initialize {
		return nil
	}
	atomic.StoreInt64(&h.minTime, mint)
	atomic.StoreInt64(&h.minValidTime, mint)

	// Ensure that max time is at least as high as min time.
	for h.MaxTime() < mint {
		atomic.CompareAndSwapInt64(&h.maxTime, h.MaxTime(), mint)
	}

	// This was an initial call to Truncate after loading blocks on startup.
	// We haven't read back the WAL yet, so do not attempt to truncate it.
	if initialize {
		return nil
	}

	h.metrics.headTruncateTotal.Inc()
	start := time.Now()

	h.gc()
	level.Info(h.logger).Log("msg", "head GC completed", "duration", time.Since(start))
	h.metrics.gcDuration.Observe(time.Since(start).Seconds())

	if h.wal == nil {
		return nil
	}
	start = time.Now()

	first, last, err := h.wal.Segments()
	if err != nil {
		return errors.Wrap(err, "get segment range")
	}
	// Start a new segment, so low ingestion volume TSDB don't have more WAL than
	// needed.
	err = h.wal.NextSegment()
	if err != nil {
		return errors.Wrap(err, "next segment")
	}
	last-- // Never consider last segment for checkpoint.
	if last < 0 {
		return nil // no segments yet.
	}
	// The lower third of segments should contain mostly obsolete samples.
	// If we have less than three segments, it's not worth checkpointing yet.
	last = first + (last-first)/3
	if last <= first {
		return nil
	}

	keep := func(id uint64) bool {
		if h.series.getByID(id) != nil {
			return true
		}
		h.deletedMtx.Lock()
		_, ok := h.deleted[id]
		h.deletedMtx.Unlock()
		return ok
	}
	h.metrics.checkpointCreationTotal.Inc()
	if _, err = wal.Checkpoint(h.wal, first, last, keep, mint); err != nil {
		h.metrics.checkpointCreationFail.Inc()
		return errors.Wrap(err, "create checkpoint")
	}
	if err := h.wal.Truncate(last + 1); err != nil {
		// If truncating fails, we'll just try again at the next checkpoint.
		// Leftover segments will just be ignored in the future if there's a checkpoint
		// that supersedes them.
		level.Error(h.logger).Log("msg", "truncating segments failed", "err", err)
	}

	// The checkpoint is written and segments before it is truncated, so we no
	// longer need to track deleted series that are before it.
	h.deletedMtx.Lock()
	for ref, segment := range h.deleted {
		if segment < first {
			delete(h.deleted, ref)
		}
	}
	h.deletedMtx.Unlock()

	h.metrics.checkpointDeleteTotal.Inc()
	if err := wal.DeleteCheckpoints(h.wal.Dir(), last); err != nil {
		// Leftover old checkpoints do not cause problems down the line beyond
		// occupying disk space.
		// They will just be ignored since a higher checkpoint exists.
		level.Error(h.logger).Log("msg", "delete old checkpoints", "err", err)
		h.metrics.checkpointDeleteFail.Inc()
	}
	h.metrics.walTruncateDuration.Observe(time.Since(start).Seconds())

	level.Info(h.logger).Log("msg", "WAL checkpoint complete",
		"first", first, "last", last, "duration", time.Since(start))

	return nil
}

// initTime initializes a head with the first timestamp. This only needs to be called
// for a completely fresh head with an empty WAL.
// Returns true if the initialization took an effect.
func (h *Head) initTime(t int64) (initialized bool) {
	if !atomic.CompareAndSwapInt64(&h.minTime, math.MaxInt64, t) {
		return false
	}
	// Ensure that max time is initialized to at least the min time we just set.
	// Concurrent appenders may already have set it to a higher value.
	atomic.CompareAndSwapInt64(&h.maxTime, math.MinInt64, t)

	return true
}

type rangeHead struct {
	head       *Head
	mint, maxt int64
}

func (h *rangeHead) Index() (IndexReader, error) {
	return h.head.indexRange(h.mint, h.maxt), nil
}

func (h *rangeHead) Chunks() (ChunkReader, error) {
	return h.head.chunksRange(h.mint, h.maxt), nil
}

func (h *rangeHead) Tombstones() (tombstones.Reader, error) {
	return emptyTombstoneReader, nil
}

func (h *rangeHead) MinTime() int64 {
	return h.mint
}

func (h *rangeHead) MaxTime() int64 {
	return h.maxt
}

func (h *rangeHead) NumSeries() uint64 {
	return h.head.NumSeries()
}

func (h *rangeHead) Meta() BlockMeta {
	return BlockMeta{
		MinTime: h.MinTime(),
		MaxTime: h.MaxTime(),
		ULID:    h.head.Meta().ULID,
		Stats: BlockStats{
			NumSeries: h.NumSeries(),
		},
	}
}

// initAppender is a helper to initialize the time bounds of the head
// upon the first sample it receives.
type initAppender struct {
	app  Appender
	head *Head
}

func (a *initAppender) Add(lset labels.Labels, t int64, v float64) (uint64, error) {
	if a.app != nil {
		return a.app.Add(lset, t, v)
	}
	a.head.initTime(t)
	a.app = a.head.appender()

	return a.app.Add(lset, t, v)
}

func (a *initAppender) AddFast(ref uint64, t int64, v float64) error {
	if a.app == nil {
		return ErrNotFound
	}
	return a.app.AddFast(ref, t, v)
}

func (a *initAppender) Commit() error {
	if a.app == nil {
		return nil
	}
	return a.app.Commit()
}

func (a *initAppender) Rollback() error {
	if a.app == nil {
		return nil
	}
	return a.app.Rollback()
}

// Appender returns a new Appender on the database.
func (h *Head) Appender() Appender {
	h.metrics.activeAppenders.Inc()

	// The head cache might not have a starting point yet. The init appender
	// picks up the first appended timestamp as the base.
	if h.MinTime() == math.MaxInt64 {
		return &initAppender{head: h}
	}
	return h.appender()
}

func (h *Head) appender() *headAppender {
	return &headAppender{
		head: h,
		// Set the minimum valid time to whichever is greater the head min valid time or the compaciton window.
		// This ensures that no samples will be added within the compaction window to avoid races.
		minValidTime: max(atomic.LoadInt64(&h.minValidTime), h.MaxTime()-h.chunkRange/2),
		mint:         math.MaxInt64,
		maxt:         math.MinInt64,
		samples:      h.getAppendBuffer(),
		sampleSeries: h.getSeriesBuffer(),
	}
}

func max(a, b int64) int64 {
	if a > b {
		return a
	}
	return b
}

func (h *Head) getAppendBuffer() []record.RefSample {
	b := h.appendPool.Get()
	if b == nil {
		return make([]record.RefSample, 0, 512)
	}
	return b.([]record.RefSample)
}

func (h *Head) putAppendBuffer(b []record.RefSample) {
	//lint:ignore SA6002 safe to ignore and actually fixing it has some performance penalty.
	h.appendPool.Put(b[:0])
}

func (h *Head) getSeriesBuffer() []*memSeries {
	b := h.seriesPool.Get()
	if b == nil {
		return make([]*memSeries, 0, 512)
	}
	return b.([]*memSeries)
}

func (h *Head) putSeriesBuffer(b []*memSeries) {
	//lint:ignore SA6002 safe to ignore and actually fixing it has some performance penalty.
	h.seriesPool.Put(b[:0])
}

func (h *Head) getBytesBuffer() []byte {
	b := h.bytesPool.Get()
	if b == nil {
		return make([]byte, 0, 1024)
	}
	return b.([]byte)
}

func (h *Head) putBytesBuffer(b []byte) {
	//lint:ignore SA6002 safe to ignore and actually fixing it has some performance penalty.
	h.bytesPool.Put(b[:0])
}

type headAppender struct {
	head         *Head
	minValidTime int64 // No samples below this timestamp are allowed.
	mint, maxt   int64

	series       []record.RefSeries
	samples      []record.RefSample
	sampleSeries []*memSeries
}

func (a *headAppender) Add(lset labels.Labels, t int64, v float64) (uint64, error) {
	if t < a.minValidTime {
		return 0, ErrOutOfBounds
	}

	// Ensure no empty labels have gotten through.
	lset = lset.WithoutEmpty()

	s, created := a.head.getOrCreate(lset.Hash(), lset)
	if created {
		a.series = append(a.series, record.RefSeries{
			Ref:    s.ref,
			Labels: lset,
		})
	}
	return s.ref, a.AddFast(s.ref, t, v)
}

func (a *headAppender) AddFast(ref uint64, t int64, v float64) error {
	if t < a.minValidTime {
		return ErrOutOfBounds
	}

	s := a.head.series.getByID(ref)
	if s == nil {
		return errors.Wrap(ErrNotFound, "unknown series")
	}
	s.Lock()
	if err := s.appendable(t, v); err != nil {
		s.Unlock()
		return err
	}
	s.pendingCommit = true
	s.Unlock()

	if t < a.mint {
		a.mint = t
	}
	if t > a.maxt {
		a.maxt = t
	}

	a.samples = append(a.samples, record.RefSample{
		Ref: ref,
		T:   t,
		V:   v,
	})
	a.sampleSeries = append(a.sampleSeries, s)
	return nil
}

func (a *headAppender) log() error {
	if a.head.wal == nil {
		return nil
	}

	buf := a.head.getBytesBuffer()
	defer func() { a.head.putBytesBuffer(buf) }()

	var rec []byte
	var enc record.Encoder

	if len(a.series) > 0 {
		rec = enc.Series(a.series, buf)
		buf = rec[:0]

		if err := a.head.wal.Log(rec); err != nil {
			return errors.Wrap(err, "log series")
		}
	}
	if len(a.samples) > 0 {
		rec = enc.Samples(a.samples, buf)
		buf = rec[:0]

		if err := a.head.wal.Log(rec); err != nil {
			return errors.Wrap(err, "log samples")
		}
	}
	return nil
}

func (a *headAppender) Commit() error {
	defer a.head.metrics.activeAppenders.Dec()
	defer a.head.putAppendBuffer(a.samples)
	defer a.head.putSeriesBuffer(a.sampleSeries)

	if err := a.log(); err != nil {
		return errors.Wrap(err, "write to WAL")
	}

	total := len(a.samples)
	var series *memSeries
	for i, s := range a.samples {
		series = a.sampleSeries[i]
		series.Lock()
		ok, chunkCreated := series.append(s.T, s.V)
		series.pendingCommit = false
		series.Unlock()

		if !ok {
			total--
		}
		if chunkCreated {
			a.head.metrics.chunks.Inc()
			a.head.metrics.chunksCreated.Inc()
		}
	}

	a.head.metrics.samplesAppended.Add(float64(total))
	a.head.updateMinMaxTime(a.mint, a.maxt)

	return nil
}

func (a *headAppender) Rollback() error {
	a.head.metrics.activeAppenders.Dec()
	var series *memSeries
	for i := range a.samples {
		series = a.sampleSeries[i]
		series.Lock()
		series.pendingCommit = false
		series.Unlock()
	}
	a.head.putAppendBuffer(a.samples)

	// Series are created in the head memory regardless of rollback. Thus we have
	// to log them to the WAL in any case.
	a.samples = nil
	return a.log()
}

// Delete all samples in the range of [mint, maxt] for series that satisfy the given
// label matchers.
func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error {
	// Do not delete anything beyond the currently valid range.
	mint, maxt = clampInterval(mint, maxt, h.MinTime(), h.MaxTime())

	ir := h.indexRange(mint, maxt)

	p, err := PostingsForMatchers(ir, ms...)
	if err != nil {
		return errors.Wrap(err, "select series")
	}

	var stones []tombstones.Stone
	dirty := false
	for p.Next() {
		series := h.series.getByID(p.At())

		t0, t1 := series.minTime(), series.maxTime()
		if t0 == math.MinInt64 || t1 == math.MinInt64 {
			continue
		}
		// Delete only until the current values and not beyond.
		t0, t1 = clampInterval(mint, maxt, t0, t1)
		if h.wal != nil {
			stones = append(stones, tombstones.Stone{Ref: p.At(), Intervals: tombstones.Intervals{{Mint: t0, Maxt: t1}}})
		}
		if err := h.chunkRewrite(p.At(), tombstones.Intervals{{Mint: t0, Maxt: t1}}); err != nil {
			return errors.Wrap(err, "delete samples")
		}
		dirty = true
	}
	if p.Err() != nil {
		return p.Err()
	}
	var enc record.Encoder
	if h.wal != nil {
		// Although we don't store the stones in the head
		// we need to write them to the WAL to mark these as deleted
		// after a restart while loading the WAL.
		if err := h.wal.Log(enc.Tombstones(stones, nil)); err != nil {
			return err
		}
	}
	if dirty {
		h.gc()
	}

	return nil
}

// chunkRewrite re-writes the chunks which overlaps with deleted ranges
// and removes the samples in the deleted ranges.
// Chunks is deleted if no samples are left at the end.
func (h *Head) chunkRewrite(ref uint64, dranges tombstones.Intervals) (err error) {
	if len(dranges) == 0 {
		return nil
	}

	ms := h.series.getByID(ref)
	ms.Lock()
	defer ms.Unlock()
	if len(ms.chunks) == 0 {
		return nil
	}

	metas := ms.chunksMetas()
	mint, maxt := metas[0].MinTime, metas[len(metas)-1].MaxTime
	it := newChunkSeriesIterator(metas, dranges, mint, maxt)

	ms.reset()
	for it.Next() {
		t, v := it.At()
		ok, _ := ms.append(t, v)
		if !ok {
			level.Warn(h.logger).Log("msg", "failed to add sample during delete")
		}
	}

	return nil
}

// gc removes data before the minimum timestamp from the head.
func (h *Head) gc() {
	// Only data strictly lower than this timestamp must be deleted.
	mint := h.MinTime()

	// Drop old chunks and remember series IDs and hashes if they can be
	// deleted entirely.
	deleted, chunksRemoved := h.series.gc(mint)
	seriesRemoved := len(deleted)

	h.metrics.seriesRemoved.Add(float64(seriesRemoved))
	h.metrics.chunksRemoved.Add(float64(chunksRemoved))
	h.metrics.chunks.Sub(float64(chunksRemoved))
	// Using AddUint64 to subtract series removed.
	// See: https://golang.org/pkg/sync/atomic/#AddUint64.
	atomic.AddUint64(&h.numSeries, ^uint64(seriesRemoved-1))

	// Remove deleted series IDs from the postings lists.
	h.postings.Delete(deleted)

	if h.wal != nil {
		_, last, _ := h.wal.Segments()
		h.deletedMtx.Lock()
		// Keep series records until we're past segment 'last'
		// because the WAL will still have samples records with
		// this ref ID. If we didn't keep these series records then
		// on start up when we replay the WAL, or any other code
		// that reads the WAL, wouldn't be able to use those
		// samples since we would have no labels for that ref ID.
		for ref := range deleted {
			h.deleted[ref] = last
		}
		h.deletedMtx.Unlock()
	}

	// Rebuild symbols and label value indices from what is left in the postings terms.
	symbols := make(map[string]struct{}, len(h.symbols))
	values := make(map[string]stringset, len(h.values))

	if err := h.postings.Iter(func(t labels.Label, _ index.Postings) error {
		symbols[t.Name] = struct{}{}
		symbols[t.Value] = struct{}{}

		ss, ok := values[t.Name]
		if !ok {
			ss = stringset{}
			values[t.Name] = ss
		}
		ss.set(t.Value)
		return nil
	}); err != nil {
		// This should never happen, as the iteration function only returns nil.
		panic(err)
	}

	h.symMtx.Lock()

	h.symbols = symbols
	h.values = values

	h.symMtx.Unlock()
}

// Tombstones returns a new reader over the head's tombstones
func (h *Head) Tombstones() (tombstones.Reader, error) {
	return emptyTombstoneReader, nil
}

// Index returns an IndexReader against the block.
func (h *Head) Index() (IndexReader, error) {
	return h.indexRange(math.MinInt64, math.MaxInt64), nil
}

func (h *Head) indexRange(mint, maxt int64) *headIndexReader {
	if hmin := h.MinTime(); hmin > mint {
		mint = hmin
	}
	return &headIndexReader{head: h, mint: mint, maxt: maxt}
}

// Chunks returns a ChunkReader against the block.
func (h *Head) Chunks() (ChunkReader, error) {
	return h.chunksRange(math.MinInt64, math.MaxInt64), nil
}

func (h *Head) chunksRange(mint, maxt int64) *headChunkReader {
	if hmin := h.MinTime(); hmin > mint {
		mint = hmin
	}
	return &headChunkReader{head: h, mint: mint, maxt: maxt}
}

// NumSeries returns the number of active series in the head.
func (h *Head) NumSeries() uint64 {
	return atomic.LoadUint64(&h.numSeries)
}

// Meta returns meta information about the head.
// The head is dynamic so will return dynamic results.
func (h *Head) Meta() BlockMeta {
	var id [16]byte
	copy(id[:], "______head______")
	return BlockMeta{
		MinTime: h.MinTime(),
		MaxTime: h.MaxTime(),
		ULID:    ulid.ULID(id),
		Stats: BlockStats{
			NumSeries: h.NumSeries(),
		},
	}
}

// MinTime returns the lowest time bound on visible data in the head.
func (h *Head) MinTime() int64 {
	return atomic.LoadInt64(&h.minTime)
}

// MaxTime returns the highest timestamp seen in data of the head.
func (h *Head) MaxTime() int64 {
	return atomic.LoadInt64(&h.maxTime)
}

// compactable returns whether the head has a compactable range.
// The head has a compactable range when the head time range is 1.5 times the chunk range.
// The 0.5 acts as a buffer of the appendable window.
func (h *Head) compactable() bool {
	return h.MaxTime()-h.MinTime() > h.chunkRange/2*3
}

// Close flushes the WAL and closes the head.
func (h *Head) Close() error {
	if h.wal == nil {
		return nil
	}
	return h.wal.Close()
}

type headChunkReader struct {
	head       *Head
	mint, maxt int64
}

func (h *headChunkReader) Close() error {
	return nil
}

// packChunkID packs a seriesID and a chunkID within it into a global 8 byte ID.
// It panicks if the seriesID exceeds 5 bytes or the chunk ID 3 bytes.
func packChunkID(seriesID, chunkID uint64) uint64 {
	if seriesID > (1<<40)-1 {
		panic("series ID exceeds 5 bytes")
	}
	if chunkID > (1<<24)-1 {
		panic("chunk ID exceeds 3 bytes")
	}
	return (seriesID << 24) | chunkID
}

func unpackChunkID(id uint64) (seriesID, chunkID uint64) {
	return id >> 24, (id << 40) >> 40
}

// Chunk returns the chunk for the reference number.
func (h *headChunkReader) Chunk(ref uint64) (chunkenc.Chunk, error) {
	sid, cid := unpackChunkID(ref)

	s := h.head.series.getByID(sid)
	// This means that the series has been garbage collected.
	if s == nil {
		return nil, ErrNotFound
	}

	s.Lock()
	c := s.chunk(int(cid))

	// This means that the chunk has been garbage collected or is outside
	// the specified range.
	if c == nil || !c.OverlapsClosedInterval(h.mint, h.maxt) {
		s.Unlock()
		return nil, ErrNotFound
	}
	s.Unlock()

	return &safeChunk{
		Chunk: c.chunk,
		s:     s,
		cid:   int(cid),
	}, nil
}

type safeChunk struct {
	chunkenc.Chunk
	s   *memSeries
	cid int
}

func (c *safeChunk) Iterator(reuseIter chunkenc.Iterator) chunkenc.Iterator {
	c.s.Lock()
	it := c.s.iterator(c.cid, reuseIter)
	c.s.Unlock()
	return it
}

type headIndexReader struct {
	head       *Head
	mint, maxt int64
}

func (h *headIndexReader) Close() error {
	return nil
}

func (h *headIndexReader) Symbols() (map[string]struct{}, error) {
	h.head.symMtx.RLock()
	defer h.head.symMtx.RUnlock()

	res := make(map[string]struct{}, len(h.head.symbols))

	for s := range h.head.symbols {
		res[s] = struct{}{}
	}
	return res, nil
}

// LabelValues returns the possible label values
func (h *headIndexReader) LabelValues(names ...string) (index.StringTuples, error) {
	if len(names) != 1 {
		return nil, encoding.ErrInvalidSize
	}

	h.head.symMtx.RLock()
	sl := make([]string, 0, len(h.head.values[names[0]]))
	for s := range h.head.values[names[0]] {
		sl = append(sl, s)
	}
	h.head.symMtx.RUnlock()
	sort.Strings(sl)

	return index.NewStringTuples(sl, len(names))
}

// LabelNames returns all the unique label names present in the head.
func (h *headIndexReader) LabelNames() ([]string, error) {
	h.head.symMtx.RLock()
	defer h.head.symMtx.RUnlock()
	labelNames := make([]string, 0, len(h.head.values))
	for name := range h.head.values {
		if name == "" {
			continue
		}
		labelNames = append(labelNames, name)
	}
	sort.Strings(labelNames)
	return labelNames, nil
}

// Postings returns the postings list iterator for the label pair.
func (h *headIndexReader) Postings(name, value string) (index.Postings, error) {
	return h.head.postings.Get(name, value), nil
}

func (h *headIndexReader) SortedPostings(p index.Postings) index.Postings {
	series := make([]*memSeries, 0, 128)

	// Fetch all the series only once.
	for p.Next() {
		s := h.head.series.getByID(p.At())
		if s == nil {
			level.Debug(h.head.logger).Log("msg", "looked up series not found")
		} else {
			series = append(series, s)
		}
	}
	if err := p.Err(); err != nil {
		return index.ErrPostings(errors.Wrap(err, "expand postings"))
	}

	sort.Slice(series, func(i, j int) bool {
		return labels.Compare(series[i].lset, series[j].lset) < 0
	})

	// Convert back to list.
	ep := make([]uint64, 0, len(series))
	for _, p := range series {
		ep = append(ep, p.ref)
	}
	return index.NewListPostings(ep)
}

// Series returns the series for the given reference.
func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]chunks.Meta) error {
	s := h.head.series.getByID(ref)

	if s == nil {
		h.head.metrics.seriesNotFound.Inc()
		return ErrNotFound
	}
	*lbls = append((*lbls)[:0], s.lset...)

	s.Lock()
	defer s.Unlock()

	*chks = (*chks)[:0]

	for i, c := range s.chunks {
		// Do not expose chunks that are outside of the specified range.
		if !c.OverlapsClosedInterval(h.mint, h.maxt) {
			continue
		}
		// Set the head chunks as open (being appended to).
		maxTime := c.maxTime
		if s.headChunk == c {
			maxTime = math.MaxInt64
		}

		*chks = append(*chks, chunks.Meta{
			MinTime: c.minTime,
			MaxTime: maxTime,
			Ref:     packChunkID(s.ref, uint64(s.chunkID(i))),
		})
	}

	return nil
}

func (h *headIndexReader) LabelIndices() ([][]string, error) {
	h.head.symMtx.RLock()
	defer h.head.symMtx.RUnlock()
	res := [][]string{}
	for s := range h.head.values {
		res = append(res, []string{s})
	}
	return res, nil
}

func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*memSeries, bool) {
	// Just using `getOrSet` below would be semantically sufficient, but we'd create
	// a new series on every sample inserted via Add(), which causes allocations
	// and makes our series IDs rather random and harder to compress in postings.
	s := h.series.getByHash(hash, lset)
	if s != nil {
		return s, false
	}

	// Optimistically assume that we are the first one to create the series.
	id := atomic.AddUint64(&h.lastSeriesID, 1)

	return h.getOrCreateWithID(id, hash, lset)
}

func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSeries, bool) {
	s := newMemSeries(lset, id, h.chunkRange)

	s, created := h.series.getOrSet(hash, s)
	if !created {
		return s, false
	}

	h.metrics.seriesCreated.Inc()
	atomic.AddUint64(&h.numSeries, 1)

	h.postings.Add(id, lset)

	h.symMtx.Lock()
	defer h.symMtx.Unlock()

	for _, l := range lset {
		valset, ok := h.values[l.Name]
		if !ok {
			valset = stringset{}
			h.values[l.Name] = valset
		}
		valset.set(l.Value)

		h.symbols[l.Name] = struct{}{}
		h.symbols[l.Value] = struct{}{}
	}

	return s, true
}

// seriesHashmap is a simple hashmap for memSeries by their label set. It is built
// on top of a regular hashmap and holds a slice of series to resolve hash collisions.
// Its methods require the hash to be submitted with it to avoid re-computations throughout
// the code.
type seriesHashmap map[uint64][]*memSeries

func (m seriesHashmap) get(hash uint64, lset labels.Labels) *memSeries {
	for _, s := range m[hash] {
		if s.lset.Equals(lset) {
			return s
		}
	}
	return nil
}

func (m seriesHashmap) set(hash uint64, s *memSeries) {
	l := m[hash]
	for i, prev := range l {
		if prev.lset.Equals(s.lset) {
			l[i] = s
			return
		}
	}
	m[hash] = append(l, s)
}

func (m seriesHashmap) del(hash uint64, lset labels.Labels) {
	var rem []*memSeries
	for _, s := range m[hash] {
		if !s.lset.Equals(lset) {
			rem = append(rem, s)
		}
	}
	if len(rem) == 0 {
		delete(m, hash)
	} else {
		m[hash] = rem
	}
}

// stripeSeries locks modulo ranges of IDs and hashes to reduce lock contention.
// The locks are padded to not be on the same cache line. Filling the padded space
// with the maps was profiled to be slower – likely due to the additional pointer
// dereferences.
type stripeSeries struct {
	series [stripeSize]map[uint64]*memSeries
	hashes [stripeSize]seriesHashmap
	locks  [stripeSize]stripeLock
}

const (
	stripeSize = 1 << 14
	stripeMask = stripeSize - 1
)

type stripeLock struct {
	sync.RWMutex
	// Padding to avoid multiple locks being on the same cache line.
	_ [40]byte
}

func newStripeSeries() *stripeSeries {
	s := &stripeSeries{}

	for i := range s.series {
		s.series[i] = map[uint64]*memSeries{}
	}
	for i := range s.hashes {
		s.hashes[i] = seriesHashmap{}
	}
	return s
}

// gc garbage collects old chunks that are strictly before mint and removes
// series entirely that have no chunks left.
func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) {
	var (
		deleted  = map[uint64]struct{}{}
		rmChunks = 0
	)
	// Run through all series and truncate old chunks. Mark those with no
	// chunks left as deleted and store their ID.
	for i := 0; i < stripeSize; i++ {
		s.locks[i].Lock()

		for hash, all := range s.hashes[i] {
			for _, series := range all {
				series.Lock()
				rmChunks += series.truncateChunksBefore(mint)

				if len(series.chunks) > 0 || series.pendingCommit {
					series.Unlock()
					continue
				}

				// The series is gone entirely. We need to keep the series lock
				// and make sure we have acquired the stripe locks for hash and ID of the
				// series alike.
				// If we don't hold them all, there's a very small chance that a series receives
				// samples again while we are half-way into deleting it.
				j := int(series.ref & stripeMask)

				if i != j {
					s.locks[j].Lock()
				}

				deleted[series.ref] = struct{}{}
				s.hashes[i].del(hash, series.lset)
				delete(s.series[j], series.ref)

				if i != j {
					s.locks[j].Unlock()
				}

				series.Unlock()
			}
		}

		s.locks[i].Unlock()
	}

	return deleted, rmChunks
}

func (s *stripeSeries) getByID(id uint64) *memSeries {
	i := id & stripeMask

	s.locks[i].RLock()
	series := s.series[i][id]
	s.locks[i].RUnlock()

	return series
}

func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *memSeries {
	i := hash & stripeMask

	s.locks[i].RLock()
	series := s.hashes[i].get(hash, lset)
	s.locks[i].RUnlock()

	return series
}

func (s *stripeSeries) getOrSet(hash uint64, series *memSeries) (*memSeries, bool) {
	i := hash & stripeMask

	s.locks[i].Lock()

	if prev := s.hashes[i].get(hash, series.lset); prev != nil {
		s.locks[i].Unlock()
		return prev, false
	}
	s.hashes[i].set(hash, series)
	s.locks[i].Unlock()

	i = series.ref & stripeMask

	s.locks[i].Lock()
	s.series[i][series.ref] = series
	s.locks[i].Unlock()

	return series, true
}

type sample struct {
	t int64
	v float64
}

func (s sample) T() int64 {
	return s.t
}

func (s sample) V() float64 {
	return s.v
}

// memSeries is the in-memory representation of a series. None of its methods
// are goroutine safe and it is the caller's responsibility to lock it.
type memSeries struct {
	sync.Mutex

	ref          uint64
	lset         labels.Labels
	chunks       []*memChunk
	headChunk    *memChunk
	chunkRange   int64
	firstChunkID int

	nextAt        int64 // Timestamp at which to cut the next chunk.
	sampleBuf     [4]sample
	pendingCommit bool // Whether there are samples waiting to be committed to this series.

	app chunkenc.Appender // Current appender for the chunk.
}

func newMemSeries(lset labels.Labels, id uint64, chunkRange int64) *memSeries {
	s := &memSeries{
		lset:       lset,
		ref:        id,
		chunkRange: chunkRange,
		nextAt:     math.MinInt64,
	}
	return s
}

func (s *memSeries) minTime() int64 {
	if len(s.chunks) == 0 {
		return math.MinInt64
	}
	return s.chunks[0].minTime
}

func (s *memSeries) maxTime() int64 {
	c := s.head()
	if c == nil {
		return math.MinInt64
	}
	return c.maxTime
}

func (s *memSeries) cut(mint int64) *memChunk {
	c := &memChunk{
		chunk:   chunkenc.NewXORChunk(),
		minTime: mint,
		maxTime: math.MinInt64,
	}
	s.chunks = append(s.chunks, c)
	s.headChunk = c

	// Set upper bound on when the next chunk must be started. An earlier timestamp
	// may be chosen dynamically at a later point.
	s.nextAt = rangeForTimestamp(mint, s.chunkRange)

	app, err := c.chunk.Appender()
	if err != nil {
		panic(err)
	}
	s.app = app
	return c
}

func (s *memSeries) chunksMetas() []chunks.Meta {
	metas := make([]chunks.Meta, 0, len(s.chunks))
	for _, chk := range s.chunks {
		metas = append(metas, chunks.Meta{Chunk: chk.chunk, MinTime: chk.minTime, MaxTime: chk.maxTime})
	}
	return metas
}

// reset re-initialises all the variable in the memSeries except 'lset', 'ref',
// and 'chunkRange', like how it would appear after 'newMemSeries(...)'.
func (s *memSeries) reset() {
	s.chunks = nil
	s.headChunk = nil
	s.firstChunkID = 0
	s.nextAt = math.MinInt64
	s.sampleBuf = [4]sample{}
	s.pendingCommit = false
	s.app = nil
}

// appendable checks whether the given sample is valid for appending to the series.
func (s *memSeries) appendable(t int64, v float64) error {
	c := s.head()
	if c == nil {
		return nil
	}

	if t > c.maxTime {
		return nil
	}
	if t < c.maxTime {
		return ErrOutOfOrderSample
	}
	// We are allowing exact duplicates as we can encounter them in valid cases
	// like federation and erroring out at that time would be extremely noisy.
	if math.Float64bits(s.sampleBuf[3].v) != math.Float64bits(v) {
		return ErrAmendSample
	}
	return nil
}

func (s *memSeries) chunk(id int) *memChunk {
	ix := id - s.firstChunkID
	if ix < 0 || ix >= len(s.chunks) {
		return nil
	}
	return s.chunks[ix]
}

func (s *memSeries) chunkID(pos int) int {
	return pos + s.firstChunkID
}

// truncateChunksBefore removes all chunks from the series that have not timestamp
// at or after mint. Chunk IDs remain unchanged.
func (s *memSeries) truncateChunksBefore(mint int64) (removed int) {
	var k int
	for i, c := range s.chunks {
		if c.maxTime >= mint {
			break
		}
		k = i + 1
	}
	s.chunks = append(s.chunks[:0], s.chunks[k:]...)
	s.firstChunkID += k
	if len(s.chunks) == 0 {
		s.headChunk = nil
	} else {
		s.headChunk = s.chunks[len(s.chunks)-1]
	}

	return k
}

// append adds the sample (t, v) to the series.
func (s *memSeries) append(t int64, v float64) (success, chunkCreated bool) {
	// Based on Gorilla white papers this offers near-optimal compression ratio
	// so anything bigger that this has diminishing returns and increases
	// the time range within which we have to decompress all samples.
	const samplesPerChunk = 120

	c := s.head()

	if c == nil {
		c = s.cut(t)
		chunkCreated = true
	}
	numSamples := c.chunk.NumSamples()

	// Out of order sample.
	if c.maxTime >= t {
		return false, chunkCreated
	}
	// If we reach 25% of a chunk's desired sample count, set a definitive time
	// at which to start the next chunk.
	// At latest it must happen at the timestamp set when the chunk was cut.
	if numSamples == samplesPerChunk/4 {
		s.nextAt = computeChunkEndTime(c.minTime, c.maxTime, s.nextAt)
	}
	if t >= s.nextAt {
		c = s.cut(t)
		chunkCreated = true
	}
	s.app.Append(t, v)

	c.maxTime = t

	s.sampleBuf[0] = s.sampleBuf[1]
	s.sampleBuf[1] = s.sampleBuf[2]
	s.sampleBuf[2] = s.sampleBuf[3]
	s.sampleBuf[3] = sample{t: t, v: v}

	return true, chunkCreated
}

// computeChunkEndTime estimates the end timestamp based the beginning of a chunk,
// its current timestamp and the upper bound up to which we insert data.
// It assumes that the time range is 1/4 full.
func computeChunkEndTime(start, cur, max int64) int64 {
	a := (max - start) / ((cur - start + 1) * 4)
	if a == 0 {
		return max
	}
	return start + (max-start)/a
}

func (s *memSeries) iterator(id int, it chunkenc.Iterator) chunkenc.Iterator {
	c := s.chunk(id)
	// TODO(fabxc): Work around! A querier may have retrieved a pointer to a series' chunk,
	// which got then garbage collected before it got accessed.
	// We must ensure to not garbage collect as long as any readers still hold a reference.
	if c == nil {
		return chunkenc.NewNopIterator()
	}

	if id-s.firstChunkID < len(s.chunks)-1 {
		return c.chunk.Iterator(it)
	}
	// Serve the last 4 samples for the last chunk from the sample buffer
	// as their compressed bytes may be mutated by added samples.
	if msIter, ok := it.(*memSafeIterator); ok {
		msIter.Iterator = c.chunk.Iterator(msIter.Iterator)
		msIter.i = -1
		msIter.total = c.chunk.NumSamples()
		msIter.buf = s.sampleBuf
		return msIter
	}
	return &memSafeIterator{
		Iterator: c.chunk.Iterator(it),
		i:        -1,
		total:    c.chunk.NumSamples(),
		buf:      s.sampleBuf,
	}
}

func (s *memSeries) head() *memChunk {
	return s.headChunk
}

type memChunk struct {
	chunk            chunkenc.Chunk
	minTime, maxTime int64
}

// Returns true if the chunk overlaps [mint, maxt].
func (mc *memChunk) OverlapsClosedInterval(mint, maxt int64) bool {
	return mc.minTime <= maxt && mint <= mc.maxTime
}

type memSafeIterator struct {
	chunkenc.Iterator

	i     int
	total int
	buf   [4]sample
}

func (it *memSafeIterator) Next() bool {
	if it.i+1 >= it.total {
		return false
	}
	it.i++
	if it.total-it.i > 4 {
		return it.Iterator.Next()
	}
	return true
}

func (it *memSafeIterator) At() (int64, float64) {
	if it.total-it.i > 4 {
		return it.Iterator.At()
	}
	s := it.buf[4-(it.total-it.i)]
	return s.t, s.v
}

type stringset map[string]struct{}

func (ss stringset) set(s string) {
	ss[s] = struct{}{}
}

func (ss stringset) String() string {
	return strings.Join(ss.slice(), ",")
}

func (ss stringset) slice() []string {
	slice := make([]string, 0, len(ss))
	for k := range ss {
		slice = append(slice, k)
	}
	sort.Strings(slice)
	return slice
}
-												Add liecence file and headers

											
										
										
											8 years ago
+								// Copyright 2017 The Prometheus Authors
 								// Licensed under the Apache License, Version 2.0 (the "License");
 								// you may not use this file except in compliance with the License.
 								// You may obtain a copy of the License at
 								//
 								// http://www.apache.org/licenses/LICENSE-2.0
 								//
 								// Unless required by applicable law or agreed to in writing, software
 								// distributed under the License is distributed on an "AS IS" BASIS,
 								// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								// See the License for the specific language governing permissions and
 								// limitations under the License.
-												Add new interfaces and skeleton

											
										
										
											8 years ago
+								package tsdb
 								import (
-												Always create a new clean segment when starting the WAL. (#608)

* Always create a new clean segment when starting the WAL.
* Ensure we flush the last page after repairing and before recreating the
new segment in Repair.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											6 years ago
+									"fmt"
-												Fix last timestamp initialization

This initializes the chunkDesc's last timestamp to the minimum
value so initial samples with a timestamp of 0 (e.g. in tests)
are not accidentally dropped.

											
										
										
											8 years ago
+									"math"
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									"runtime"
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+									"sort"
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+									"strings"
-												Add new interfaces and skeleton

											
										
										
											8 years ago
+									"sync"
-												Count writer references on head blocks

											
										
										
											8 years ago
+									"sync/atomic"
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									"time"
-												Switch append refs to string

											
										
										
											8 years ago
-												Periodically fsync WAL, make head cut async

											
										
										
											8 years ago
+									"github.com/go-kit/kit/log"
-												Add levels to all log lines.

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

											
										
										
											7 years ago
+									"github.com/go-kit/kit/log/level"
-												Open db in Read only mode (#588)

* Added db read only open mode and use it for the tsdb cli.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											5 years ago
+									"github.com/oklog/ulid"
-												Move stats into meta.json file, cleanup, docs

											
										
										
											8 years ago
+									"github.com/pkg/errors"
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									"github.com/prometheus/client_golang/prometheus"
-												Cleanup after merging tsdb into prometheus

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											5 years ago
+									"github.com/prometheus/prometheus/tsdb/chunkenc"
 									"github.com/prometheus/prometheus/tsdb/chunks"
 									"github.com/prometheus/prometheus/tsdb/encoding"
 									"github.com/prometheus/prometheus/tsdb/index"
 									"github.com/prometheus/prometheus/tsdb/labels"
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+									"github.com/prometheus/prometheus/tsdb/record"
 									"github.com/prometheus/prometheus/tsdb/tombstones"
-												Cleanup after merging tsdb into prometheus

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											5 years ago
+									"github.com/prometheus/prometheus/tsdb/wal"
-												Add new interfaces and skeleton

											
										
										
											8 years ago
+								)
-												Write to WAL before appending to memory storage

											
										
										
											8 years ago
+								var (
 									// ErrNotFound is returned if a looked up resource was not found.
-												Fix races and add comments on remaining ones

											
										
										
											8 years ago
+									ErrNotFound = errors.Errorf("not found")
-												Write to WAL before appending to memory storage

											
										
										
											8 years ago
 									// ErrOutOfOrderSample is returned if an appended sample has a
-												Update comment for ErrOutOfOrderSample (#563)

Signed-off-by: zhulongcheng <zhulongcheng.me@gmail.com>
											
										
										
											6 years ago
+									// timestamp smaller than the most recent sample.
-												Write to WAL before appending to memory storage

											
										
										
											8 years ago
+									ErrOutOfOrderSample = errors.New("out of order sample")
 									// ErrAmendSample is returned if an appended sample has the same timestamp
 									// as the most recent sample but a different value.
 									ErrAmendSample = errors.New("amending sample")
 									// ErrOutOfBounds is returned if an appended sample is out of the
 									// writable time range.
 									ErrOutOfBounds = errors.New("out of bounds")
-												Dont store stones in head, delete samples directly

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
 									// emptyTombstoneReader is a no-op Tombstone Reader.
 									// This is used by head to satisfy the Tombstones() function call.
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+									emptyTombstoneReader = tombstones.NewMemTombstones()
-												Write to WAL before appending to memory storage

											
										
										
											8 years ago
+								)
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								// Head handles reads and writes of time series data within a time window.
 								type Head struct {
 									chunkRange int64
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									metrics    *headMetrics
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									wal        *wal.WAL
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									logger     log.Logger
 									appendPool sync.Pool
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+									seriesPool sync.Pool
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									bytesPool  sync.Pool
-												Open db in Read only mode (#588)

* Added db read only open mode and use it for the tsdb cli.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											5 years ago
+									numSeries  uint64
-												Move stats into meta.json file, cleanup, docs

											
										
										
											8 years ago
-												no overlapping on compaction when an existing block is not within default boundaries. (#461)

closes https://github.com/prometheus/prometheus/issues/4643

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+									minTime, maxTime int64 // Current min and max of the samples included in the head.
 									minValidTime     int64 // Mint allowed to be added to the head. It shouldn't be lower than the maxt of the last persisted block.
-												Change series ID from uint32 to uint64

											
										
										
											7 years ago
+									lastSeriesID     uint64
-												Count writer references on head blocks

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									// All series addressable by their ID or hash.
 									series *stripeSeries
-												Consolidate mem index into HeadBlock

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									symMtx  sync.RWMutex
 									symbols map[string]struct{}
 									values  map[string]stringset // label names to possible values
-												Keep series that are still in WAL in checkpoints (#577)

If all the samples are deleted for a series,
we should still keep the series in the WAL as
anything else reading the WAL will still care
about it in order to understand the samples.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
+									deletedMtx sync.Mutex
 									deleted    map[uint64]int // Deleted series, and what WAL segment they must be kept until.
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+									postings *index.MemPostings // postings lists for terms
-												Add new interfaces and skeleton

											
										
										
											8 years ago
+								}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								type headMetrics struct {
-												Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+									activeAppenders         prometheus.Gauge
-												Open db in Read only mode (#588)

* Added db read only open mode and use it for the tsdb cli.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											5 years ago
+									series                  prometheus.GaugeFunc
-												Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+									seriesCreated           prometheus.Counter
 									seriesRemoved           prometheus.Counter
 									seriesNotFound          prometheus.Counter
 									chunks                  prometheus.Gauge
 									chunksCreated           prometheus.Counter
 									chunksRemoved           prometheus.Counter
 									gcDuration              prometheus.Summary
 									minTime                 prometheus.GaugeFunc
 									maxTime                 prometheus.GaugeFunc
 									samplesAppended         prometheus.Counter
 									walTruncateDuration     prometheus.Summary
-												re-add the missing prometheus_tsdb_wal_corruptions_total (#473)

closes https://github.com/prometheus/tsdb/issues/471

after implementing the new WAL this metric was missing so adding it again.
Also added it in a test to make sure it works as expected.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+									walCorruptionsTotal     prometheus.Counter
-												Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+									headTruncateFail        prometheus.Counter
 									headTruncateTotal       prometheus.Counter
 									checkpointDeleteFail    prometheus.Counter
 									checkpointDeleteTotal   prometheus.Counter
 									checkpointCreationFail  prometheus.Counter
 									checkpointCreationTotal prometheus.Counter
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								}
 								func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
 									m := &headMetrics{}
 									m.activeAppenders = prometheus.NewGauge(prometheus.GaugeOpts{
-												Revert "Remove `prometheus_` prefix from metrics"

This reverts commit 98fe30438ce2f33372fda366fc8205f4b86bfc5c.

After some discussion, it was concluded that we want the full
`prometheus_tsdb_...` prefix hardcoded in the library.

Signed-off-by: beorn7 <beorn@soundcloud.com>

											
										
										
											6 years ago
+										Name: "prometheus_tsdb_head_active_appenders",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										Help: "Number of currently active appender transactions",
 									})
-												Open db in Read only mode (#588)

* Added db read only open mode and use it for the tsdb cli.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											5 years ago
+									m.series = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
-												Revert "Remove `prometheus_` prefix from metrics"

This reverts commit 98fe30438ce2f33372fda366fc8205f4b86bfc5c.

After some discussion, it was concluded that we want the full
`prometheus_tsdb_...` prefix hardcoded in the library.

Signed-off-by: beorn7 <beorn@soundcloud.com>

											
										
										
											6 years ago
+										Name: "prometheus_tsdb_head_series",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										Help: "Total number of series in the head block.",
-												Open db in Read only mode (#588)

* Added db read only open mode and use it for the tsdb cli.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											5 years ago
+									}, func() float64 {
 										return float64(h.NumSeries())
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									})
-												Changes in series names (and types) exposed (#376)

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
											
										
										
											6 years ago
+									m.seriesCreated = prometheus.NewCounter(prometheus.CounterOpts{
-												Revert "Remove `prometheus_` prefix from metrics"

This reverts commit 98fe30438ce2f33372fda366fc8205f4b86bfc5c.

After some discussion, it was concluded that we want the full
`prometheus_tsdb_...` prefix hardcoded in the library.

Signed-off-by: beorn7 <beorn@soundcloud.com>

											
										
										
											6 years ago
+										Name: "prometheus_tsdb_head_series_created_total",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										Help: "Total number of series created in the head",
 									})
-												Changes in series names (and types) exposed (#376)

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
											
										
										
											6 years ago
+									m.seriesRemoved = prometheus.NewCounter(prometheus.CounterOpts{
-												Revert "Remove `prometheus_` prefix from metrics"

This reverts commit 98fe30438ce2f33372fda366fc8205f4b86bfc5c.

After some discussion, it was concluded that we want the full
`prometheus_tsdb_...` prefix hardcoded in the library.

Signed-off-by: beorn7 <beorn@soundcloud.com>

											
										
										
											6 years ago
+										Name: "prometheus_tsdb_head_series_removed_total",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										Help: "Total number of series removed in the head",
 									})
-												head: track number of series not found errors in metric

											
										
										
											7 years ago
+									m.seriesNotFound = prometheus.NewCounter(prometheus.CounterOpts{
-												Revert "Remove `prometheus_` prefix from metrics"

This reverts commit 98fe30438ce2f33372fda366fc8205f4b86bfc5c.

After some discussion, it was concluded that we want the full
`prometheus_tsdb_...` prefix hardcoded in the library.

Signed-off-by: beorn7 <beorn@soundcloud.com>

											
										
										
											6 years ago
+										Name: "prometheus_tsdb_head_series_not_found_total",
-												head: track number of series not found errors in metric

											
										
										
											7 years ago
+										Help: "Total number of requests for series that were not found.",
 									})
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									m.chunks = prometheus.NewGauge(prometheus.GaugeOpts{
-												Revert "Remove `prometheus_` prefix from metrics"

This reverts commit 98fe30438ce2f33372fda366fc8205f4b86bfc5c.

After some discussion, it was concluded that we want the full
`prometheus_tsdb_...` prefix hardcoded in the library.

Signed-off-by: beorn7 <beorn@soundcloud.com>

											
										
										
											6 years ago
+										Name: "prometheus_tsdb_head_chunks",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										Help: "Total number of chunks in the head block.",
 									})
-												Changes in series names (and types) exposed (#376)

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
											
										
										
											6 years ago
+									m.chunksCreated = prometheus.NewCounter(prometheus.CounterOpts{
-												Revert "Remove `prometheus_` prefix from metrics"

This reverts commit 98fe30438ce2f33372fda366fc8205f4b86bfc5c.

After some discussion, it was concluded that we want the full
`prometheus_tsdb_...` prefix hardcoded in the library.

Signed-off-by: beorn7 <beorn@soundcloud.com>

											
										
										
											6 years ago
+										Name: "prometheus_tsdb_head_chunks_created_total",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										Help: "Total number of chunks created in the head",
 									})
-												Changes in series names (and types) exposed (#376)

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
											
										
										
											6 years ago
+									m.chunksRemoved = prometheus.NewCounter(prometheus.CounterOpts{
-												Revert "Remove `prometheus_` prefix from metrics"

This reverts commit 98fe30438ce2f33372fda366fc8205f4b86bfc5c.

After some discussion, it was concluded that we want the full
`prometheus_tsdb_...` prefix hardcoded in the library.

Signed-off-by: beorn7 <beorn@soundcloud.com>

											
										
										
											6 years ago
+										Name: "prometheus_tsdb_head_chunks_removed_total",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										Help: "Total number of chunks removed in the head",
 									})
 									m.gcDuration = prometheus.NewSummary(prometheus.SummaryOpts{
-												Make objectives of Summaries explicit

With the next release of client_golang, Summaries will not have
objectives by default.

As it turns out, for prometheus_tsdb_head_gc_duration_seconds and
prometheus_tsdb_wal_truncate_duration_seconds, the objective-less
default makes more sense then the current default.

To make sure we do the right thing before and after the upcoming
release of client_golang, I have set the objectives explicitly
wherever that was not the case so far:

- prometheus_tsdb_head_gc_duration_seconds and
  prometheus_tsdb_wal_truncate_duration_seconds now have no objectives
  explicitly.
- prometheus_tsdb_wal_fsync_duration_seconds now explicitly uses the
  previous default objectives.

Signed-off-by: beorn7 <beorn@grafana.com>

											
										
										
											6 years ago
+										Name:       "prometheus_tsdb_head_gc_duration_seconds",
 										Help:       "Runtime of garbage collection in the head block.",
 										Objectives: map[float64]float64{},
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									})
-												Fix innocuous typo in variable names

This change fixes the variable names holding the tsdb_head_max_time and
tsdb_head_min_time metrics. It is a cosmetic change to improve the
code readability as the metric values are taken from the correct
variables.

											
										
										
											7 years ago
+									m.maxTime = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
-												Revert "Remove `prometheus_` prefix from metrics"

This reverts commit 98fe30438ce2f33372fda366fc8205f4b86bfc5c.

After some discussion, it was concluded that we want the full
`prometheus_tsdb_...` prefix hardcoded in the library.

Signed-off-by: beorn7 <beorn@soundcloud.com>

											
										
										
											6 years ago
+										Name: "prometheus_tsdb_head_max_time",
-												remove opaque metrics (#457)

* more descriptive help text for the head metrics unit

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>

											
										
										
											6 years ago
+										Help: "Maximum timestamp of the head block. The unit is decided by the library consumer.",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}, func() float64 {
 										return float64(h.MaxTime())
 									})
-												Fix innocuous typo in variable names

This change fixes the variable names holding the tsdb_head_max_time and
tsdb_head_min_time metrics. It is a cosmetic change to improve the
code readability as the metric values are taken from the correct
variables.

											
										
										
											7 years ago
+									m.minTime = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
-												Revert "Remove `prometheus_` prefix from metrics"

This reverts commit 98fe30438ce2f33372fda366fc8205f4b86bfc5c.

After some discussion, it was concluded that we want the full
`prometheus_tsdb_...` prefix hardcoded in the library.

Signed-off-by: beorn7 <beorn@soundcloud.com>

											
										
										
											6 years ago
+										Name: "prometheus_tsdb_head_min_time",
-												remove opaque metrics (#457)

* more descriptive help text for the head metrics unit

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>

											
										
										
											6 years ago
+										Help: "Minimum time bound of the head block. The unit is decided by the library consumer.",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}, func() float64 {
 										return float64(h.MinTime())
 									})
 									m.walTruncateDuration = prometheus.NewSummary(prometheus.SummaryOpts{
-												Make objectives of Summaries explicit

With the next release of client_golang, Summaries will not have
objectives by default.

As it turns out, for prometheus_tsdb_head_gc_duration_seconds and
prometheus_tsdb_wal_truncate_duration_seconds, the objective-less
default makes more sense then the current default.

To make sure we do the right thing before and after the upcoming
release of client_golang, I have set the objectives explicitly
wherever that was not the case so far:

- prometheus_tsdb_head_gc_duration_seconds and
  prometheus_tsdb_wal_truncate_duration_seconds now have no objectives
  explicitly.
- prometheus_tsdb_wal_fsync_duration_seconds now explicitly uses the
  previous default objectives.

Signed-off-by: beorn7 <beorn@grafana.com>

											
										
										
											6 years ago
+										Name:       "prometheus_tsdb_wal_truncate_duration_seconds",
 										Help:       "Duration of WAL truncation.",
 										Objectives: map[float64]float64{},
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									})
-												re-add the missing prometheus_tsdb_wal_corruptions_total (#473)

closes https://github.com/prometheus/tsdb/issues/471

after implementing the new WAL this metric was missing so adding it again.
Also added it in a test to make sure it works as expected.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+									m.walCorruptionsTotal = prometheus.NewCounter(prometheus.CounterOpts{
 										Name: "prometheus_tsdb_wal_corruptions_total",
 										Help: "Total number of WAL corruptions.",
 									})
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									m.samplesAppended = prometheus.NewCounter(prometheus.CounterOpts{
-												Revert "Remove `prometheus_` prefix from metrics"

This reverts commit 98fe30438ce2f33372fda366fc8205f4b86bfc5c.

After some discussion, it was concluded that we want the full
`prometheus_tsdb_...` prefix hardcoded in the library.

Signed-off-by: beorn7 <beorn@soundcloud.com>

											
										
										
											6 years ago
+										Name: "prometheus_tsdb_head_samples_appended_total",
-												Typo in prometheus_tsdb_head_samples_appended_total description (#188)


											
										
										
											7 years ago
+										Help: "Total number of appended samples.",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									})
-												Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+									m.headTruncateFail = prometheus.NewCounter(prometheus.CounterOpts{
 										Name: "prometheus_tsdb_head_truncations_failed_total",
 										Help: "Total number of head truncations that failed.",
 									})
 									m.headTruncateTotal = prometheus.NewCounter(prometheus.CounterOpts{
 										Name: "prometheus_tsdb_head_truncations_total",
 										Help: "Total number of head truncations attempted.",
 									})
-												Add new metrics.

1. 'prometheus_tsdb_wal_truncate_fail' for failed WAL truncation.
2. 'prometheus_tsdb_checkpoint_delete_fail' for failed old checkpoint delete.

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+									m.checkpointDeleteFail = prometheus.NewCounter(prometheus.CounterOpts{
-												Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+										Name: "prometheus_tsdb_checkpoint_deletions_failed_total",
 										Help: "Total number of checkpoint deletions that failed.",
 									})
 									m.checkpointDeleteTotal = prometheus.NewCounter(prometheus.CounterOpts{
 										Name: "prometheus_tsdb_checkpoint_deletions_total",
 										Help: "Total number of checkpoint deletions attempted.",
 									})
 									m.checkpointCreationFail = prometheus.NewCounter(prometheus.CounterOpts{
 										Name: "prometheus_tsdb_checkpoint_creations_failed_total",
 										Help: "Total number of checkpoint creations that failed.",
 									})
 									m.checkpointCreationTotal = prometheus.NewCounter(prometheus.CounterOpts{
 										Name: "prometheus_tsdb_checkpoint_creations_total",
 										Help: "Total number of checkpoint creations attempted.",
-												Add new metrics.

1. 'prometheus_tsdb_wal_truncate_fail' for failed WAL truncation.
2. 'prometheus_tsdb_checkpoint_delete_fail' for failed old checkpoint delete.

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+									})
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
 									if r != nil {
 										r.MustRegister(
 											m.activeAppenders,
 											m.chunks,
 											m.chunksCreated,
 											m.chunksRemoved,
 											m.series,
 											m.seriesCreated,
 											m.seriesRemoved,
-												head: track number of series not found errors in metric

											
										
										
											7 years ago
+											m.seriesNotFound,
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+											m.minTime,
 											m.maxTime,
 											m.gcDuration,
 											m.walTruncateDuration,
-												re-add the missing prometheus_tsdb_wal_corruptions_total (#473)

closes https://github.com/prometheus/tsdb/issues/471

after implementing the new WAL this metric was missing so adding it again.
Also added it in a test to make sure it works as expected.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+											m.walCorruptionsTotal,
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+											m.samplesAppended,
-												Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+											m.headTruncateFail,
 											m.headTruncateTotal,
-												Add new metrics.

1. 'prometheus_tsdb_wal_truncate_fail' for failed WAL truncation.
2. 'prometheus_tsdb_checkpoint_delete_fail' for failed old checkpoint delete.

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+											m.checkpointDeleteFail,
-												Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+											m.checkpointDeleteTotal,
 											m.checkpointCreationFail,
 											m.checkpointCreationTotal,
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										)
 									}
 									return m
 								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								// NewHead opens the head block in dir.
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+								func NewHead(r prometheus.Registerer, l log.Logger, wal *wal.WAL, chunkRange int64) (*Head, error) {
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									if l == nil {
 										l = log.NewNopLogger()
 									}
 									if chunkRange < 1 {
 										return nil, errors.Errorf("invalid chunk range %d", chunkRange)
 									}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									h := &Head{
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										wal:        wal,
 										logger:     l,
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+										chunkRange: chunkRange,
-												Properly initialize head time

This fixes various issues when initializing the head time range
under different starting conditions.

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+										minTime:    math.MaxInt64,
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+										maxTime:    math.MinInt64,
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										series:     newStripeSeries(),
-												Implement Delete on HeadBlock

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

											
										
										
											8 years ago
+										values:     map[string]stringset{},
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+										symbols:    map[string]struct{}{},
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+										postings:   index.NewUnorderedMemPostings(),
-												Keep series that are still in WAL in checkpoints (#577)

If all the samples are deleted for a series,
we should still keep the series in the WAL as
anything else reading the WAL will still care
about it in order to understand the samples.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
+										deleted:    map[uint64]int{},
-												Fix races

											
										
										
											8 years ago
+									}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									h.metrics = newHeadMetrics(h, r)
-												Filter WAL data in Head, misc fixes

											
										
										
											7 years ago
+									return h, nil
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								}
-												wal: parallelize sample processing

											
										
										
											7 years ago
+								// processWALSamples adds a partition of samples it receives to the head and passes
 								// them on to other workers.
 								// Samples before the mint timestamp are discarded.
 								func (h *Head) processWALSamples(
-												Properly initialize head time

This fixes various issues when initializing the head time range
under different starting conditions.

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									minValidTime int64,
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+									input <-chan []record.RefSample, output chan<- []record.RefSample,
-												wal: parallelize sample processing

											
										
										
											7 years ago
+								) (unknownRefs uint64) {
 									defer close(output)
-												Keep local cache of ids.

With the various goroutines running, the locking
in getByID is notable. This cuts cpu usage by ~25%
and walltime by ~20%.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+									// Mitigate lock contention in getByID.
 									refSeries := map[uint64]*memSeries{}
-												Properly initialize head time

This fixes various issues when initializing the head time range
under different starting conditions.

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									mint, maxt := int64(math.MaxInt64), int64(math.MinInt64)
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									for samples := range input {
 										for _, s := range samples {
-												Only send WAL read workers the samples they need.

Calculating the modulus in each worker was a hotspot,
and meant that you had more work to do the more cores you had.
This cuts CPU usage (on my 8 core, 4 real core machine) by
33%, and walltime by 3%

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+											if s.T < minValidTime {
-												wal: parallelize sample processing

											
										
										
											7 years ago
+												continue
 											}
-												Keep local cache of ids.

With the various goroutines running, the locking
in getByID is notable. This cuts cpu usage by ~25%
and walltime by ~20%.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+											ms := refSeries[s.Ref]
-												wal: parallelize sample processing

											
										
										
											7 years ago
+											if ms == nil {
-												Keep local cache of ids.

With the various goroutines running, the locking
in getByID is notable. This cuts cpu usage by ~25%
and walltime by ~20%.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+												ms = h.series.getByID(s.Ref)
 												if ms == nil {
 													unknownRefs++
 													continue
 												}
 												refSeries[s.Ref] = ms
-												wal: parallelize sample processing

											
										
										
											7 years ago
+											}
 											_, chunkCreated := ms.append(s.T, s.V)
 											if chunkCreated {
 												h.metrics.chunksCreated.Inc()
 												h.metrics.chunks.Inc()
 											}
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+											if s.T > maxt {
 												maxt = s.T
 											}
-												Properly initialize head time

This fixes various issues when initializing the head time range
under different starting conditions.

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+											if s.T < mint {
 												mint = s.T
 											}
-												wal: parallelize sample processing

											
										
										
											7 years ago
+										}
 										output <- samples
 									}
-												Properly initialize head time

This fixes various issues when initializing the head time range
under different starting conditions.

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									h.updateMinMaxTime(mint, maxt)
 									return unknownRefs
 								}
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
-												Properly initialize head time

This fixes various issues when initializing the head time range
under different starting conditions.

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+								func (h *Head) updateMinMaxTime(mint, maxt int64) {
 									for {
 										lt := h.MinTime()
 										if mint >= lt {
 											break
 										}
 										if atomic.CompareAndSwapInt64(&h.minTime, lt, mint) {
 											break
 										}
 									}
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									for {
 										ht := h.MaxTime()
 										if maxt <= ht {
 											break
 										}
 										if atomic.CompareAndSwapInt64(&h.maxTime, ht, maxt) {
 											break
 										}
 									}
-												wal: parallelize sample processing

											
										
										
											7 years ago
+								}
-												move the wal repair logic in db.Open (#633)

* move the wal repair logic in db.Open

This is to allow opening a wal in a read oly mode without triggering a
repair.

Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com>
											
										
										
											6 years ago
+								func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
-												Use boolean function instead of postings to drop WAL series

There is not guarantee or requirement for WAL writers to only add
series entries in increasing order of IDs. A postings list cannot look
back and thus unordered WAL entries would skip over IDs to not truncate
from the WAL.
We replace it with a simple boolean check function that does not require
order.

											
										
										
											7 years ago
+									// Track number of samples that referenced a series we don't know about
 									// for error reporting.
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									var unknownRefs uint64
 									// Start workers that each process samples for a partition of the series ID space.
 									// They are connected through a ring of channels which ensures that all sample batches
 									// read from the WAL are processed in order.
 									var (
-												Remove unnecessary lock in loadWAL (#6107)

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
											
										
										
											5 years ago
+										wg      sync.WaitGroup
 										n       = runtime.GOMAXPROCS(0)
 										inputs  = make([]chan []record.RefSample, n)
 										outputs = make([]chan []record.RefSample, n)
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									)
-												Ensure workers terminated fully before reading unknownRefs

											
										
										
											7 years ago
+									wg.Add(n)
-												move the wal repair logic in db.Open (#633)

* move the wal repair logic in db.Open

This is to allow opening a wal in a read oly mode without triggering a
repair.

Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com>
											
										
										
											6 years ago
+									defer func() {
 										// For CorruptionErr ensure to terminate all workers before exiting.
 										if _, ok := err.(*wal.CorruptionErr); ok {
 											for i := 0; i < n; i++ {
 												close(inputs[i])
 												for range outputs[i] {
 												}
 											}
 											wg.Wait()
 										}
 									}()
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									for i := 0; i < n; i++ {
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+										outputs[i] = make(chan []record.RefSample, 300)
 										inputs[i] = make(chan []record.RefSample, 300)
-												wal: parallelize sample processing

											
										
										
											7 years ago
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+										go func(input <-chan []record.RefSample, output chan<- []record.RefSample) {
-												no overlapping on compaction when an existing block is not within default boundaries. (#461)

closes https://github.com/prometheus/prometheus/issues/4643

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+											unknown := h.processWALSamples(h.minValidTime, input, output)
-												wal: parallelize sample processing

											
										
										
											7 years ago
+											atomic.AddUint64(&unknownRefs, unknown)
-												Ensure workers terminated fully before reading unknownRefs

											
										
										
											7 years ago
+											wg.Done()
-												Only send WAL read workers the samples they need.

Calculating the modulus in each worker was a hotspot,
and meant that you had more work to do the more cores you had.
This cuts CPU usage (on my 8 core, 4 real core machine) by
33%, and walltime by 3%

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+										}(inputs[i], outputs[i])
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									}
-												Use boolean function instead of postings to drop WAL series

There is not guarantee or requirement for WAL writers to only add
series entries in increasing order of IDs. A postings list cannot look
back and thus unordered WAL entries would skip over IDs to not truncate
from the WAL.
We replace it with a simple boolean check function that does not require
order.

											
										
										
											7 years ago
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									var (
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+										dec       record.Decoder
 										series    []record.RefSeries
 										samples   []record.RefSample
 										tstones   []tombstones.Stone
 										allStones = tombstones.NewMemTombstones()
-												Allocate the shards only once while reading WAL (#6093)

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
											
										
										
											5 years ago
+										shards    = make([][]record.RefSample, n)
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									)
-												move the wal repair logic in db.Open (#633)

* move the wal repair logic in db.Open

This is to allow opening a wal in a read oly mode without triggering a
repair.

Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com>
											
										
										
											6 years ago
+									defer func() {
 										if err := allStones.Close(); err != nil {
 											level.Warn(h.logger).Log("msg", "closing  memTombstones during wal read", "err", err)
 										}
 									}()
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									for r.Next() {
 										series, samples, tstones = series[:0], samples[:0], tstones[:0]
 										rec := r.Record()
 										switch dec.Type(rec) {
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+										case record.Series:
-												Actually reuse samples in loadWAL across records.

This cuts walltime by 2.5X and CPU by 2X

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+											series, err = dec.Series(rec, series)
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+											if err != nil {
-												repair wal when the record cannot be decoded (#453)

* repair wal when the record cannot be decoded

Currently repair is run only when the error happens in the reader.

A corruption can occur after the record is read and when it is decoded.
This change wraps the error at decoding as a CorruptionErr as this error
is expected to trigger a repair.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+												return &wal.CorruptionErr{
 													Err:     errors.Wrap(err, "decode series"),
 													Segment: r.Segment(),
 													Offset:  r.Offset(),
 												}
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+											}
 											for _, s := range series {
-												Handle multiple refs for the same series when WAL reading. (#623)

This can happen if a given series is created/truncated/recreated.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
+												series, created := h.getOrCreateWithID(s.Ref, s.Labels.Hash(), s.Labels)
 												if !created {
 													// There's already a different ref for this series.
 													multiRef[s.Ref] = series.ref
 												}
-												Create series with ID recorded in WAL when reading it back

											
										
										
											7 years ago
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+												if h.lastSeriesID < s.Ref {
 													h.lastSeriesID = s.Ref
 												}
-												Create series with ID recorded in WAL when reading it back

											
										
										
											7 years ago
+											}
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+										case record.Samples:
-												Actually reuse samples in loadWAL across records.

This cuts walltime by 2.5X and CPU by 2X

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+											samples, err = dec.Samples(rec, samples)
 											s := samples
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+											if err != nil {
-												repair wal when the record cannot be decoded (#453)

* repair wal when the record cannot be decoded

Currently repair is run only when the error happens in the reader.

A corruption can occur after the record is read and when it is decoded.
This change wraps the error at decoding as a CorruptionErr as this error
is expected to trigger a repair.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+												return &wal.CorruptionErr{
 													Err:     errors.Wrap(err, "decode samples"),
 													Segment: r.Segment(),
 													Offset:  r.Offset(),
 												}
-												head: limit WAL sample processing batch size

											
										
										
											7 years ago
+											}
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+											// We split up the samples into chunks of 5000 samples or less.
 											// With O(300 * #cores) in-flight sample batches, large scrapes could otherwise
 											// cause thousands of very large in flight buffers occupying large amounts
 											// of unused memory.
 											for len(samples) > 0 {
-												Only send WAL read workers the samples they need.

Calculating the modulus in each worker was a hotspot,
and meant that you had more work to do the more cores you had.
This cuts CPU usage (on my 8 core, 4 real core machine) by
33%, and walltime by 3%

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+												m := 5000
 												if len(samples) < m {
 													m = len(samples)
 												}
 												for i := 0; i < n; i++ {
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+													var buf []record.RefSample
-												Only send WAL read workers the samples they need.

Calculating the modulus in each worker was a hotspot,
and meant that you had more work to do the more cores you had.
This cuts CPU usage (on my 8 core, 4 real core machine) by
33%, and walltime by 3%

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+													select {
 													case buf = <-outputs[i]:
 													default:
 													}
 													shards[i] = buf[:0]
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+												}
-												Only send WAL read workers the samples they need.

Calculating the modulus in each worker was a hotspot,
and meant that you had more work to do the more cores you had.
This cuts CPU usage (on my 8 core, 4 real core machine) by
33%, and walltime by 3%

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+												for _, sam := range samples[:m] {
-												Handle multiple refs for the same series when WAL reading. (#623)

This can happen if a given series is created/truncated/recreated.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
+													if r, ok := multiRef[sam.Ref]; ok {
 														sam.Ref = r
 													}
-												Only send WAL read workers the samples they need.

Calculating the modulus in each worker was a hotspot,
and meant that you had more work to do the more cores you had.
This cuts CPU usage (on my 8 core, 4 real core machine) by
33%, and walltime by 3%

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+													mod := sam.Ref % uint64(n)
 													shards[mod] = append(shards[mod], sam)
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+												}
-												Only send WAL read workers the samples they need.

Calculating the modulus in each worker was a hotspot,
and meant that you had more work to do the more cores you had.
This cuts CPU usage (on my 8 core, 4 real core machine) by
33%, and walltime by 3%

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+												for i := 0; i < n; i++ {
 													inputs[i] <- shards[i]
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+												}
-												Only send WAL read workers the samples they need.

Calculating the modulus in each worker was a hotspot,
and meant that you had more work to do the more cores you had.
This cuts CPU usage (on my 8 core, 4 real core machine) by
33%, and walltime by 3%

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+												samples = samples[m:]
-												head: limit WAL sample processing batch size

											
										
										
											7 years ago
+											}
-												Actually reuse samples in loadWAL across records.

This cuts walltime by 2.5X and CPU by 2X

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+											samples = s // Keep whole slice for reuse.
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+										case record.Tombstones:
-												Actually reuse samples in loadWAL across records.

This cuts walltime by 2.5X and CPU by 2X

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+											tstones, err = dec.Tombstones(rec, tstones)
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+											if err != nil {
-												repair wal when the record cannot be decoded (#453)

* repair wal when the record cannot be decoded

Currently repair is run only when the error happens in the reader.

A corruption can occur after the record is read and when it is decoded.
This change wraps the error at decoding as a CorruptionErr as this error
is expected to trigger a repair.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+												return &wal.CorruptionErr{
 													Err:     errors.Wrap(err, "decode tombstones"),
 													Segment: r.Segment(),
 													Offset:  r.Offset(),
 												}
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+											}
 											for _, s := range tstones {
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+												for _, itv := range s.Intervals {
-												no overlapping on compaction when an existing block is not within default boundaries. (#461)

closes https://github.com/prometheus/prometheus/issues/4643

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+													if itv.Maxt < h.minValidTime {
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+														continue
 													}
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+													if m := h.series.getByID(s.Ref); m == nil {
-												Don't crash on an unknown tombstone ref. (#604)

Fixes https://github.com/prometheus/prometheus/issues/5562

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
+														unknownRefs++
 														continue
 													}
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+													allStones.AddInterval(s.Ref, itv)
-												Filter WAL data in Head, misc fixes

											
										
										
											7 years ago
+												}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+											}
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+										default:
-												repair wal when the record cannot be decoded (#453)

* repair wal when the record cannot be decoded

Currently repair is run only when the error happens in the reader.

A corruption can occur after the record is read and when it is decoded.
This change wraps the error at decoding as a CorruptionErr as this error
is expected to trigger a repair.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+											return &wal.CorruptionErr{
 												Err:     errors.Errorf("invalid record type %v", dec.Type(rec)),
 												Segment: r.Segment(),
 												Offset:  r.Offset(),
 											}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										}
 									}
-												Use boolean function instead of postings to drop WAL series

There is not guarantee or requirement for WAL writers to only add
series entries in increasing order of IDs. A postings list cannot look
back and thus unordered WAL entries would skip over IDs to not truncate
from the WAL.
We replace it with a simple boolean check function that does not require
order.

											
										
										
											7 years ago
-												Only send WAL read workers the samples they need.

Calculating the modulus in each worker was a hotspot,
and meant that you had more work to do the more cores you had.
This cuts CPU usage (on my 8 core, 4 real core machine) by
33%, and walltime by 3%

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+									// Signal termination to each worker and wait for it to close its output channel.
 									for i := 0; i < n; i++ {
 										close(inputs[i])
 										for range outputs[i] {
 										}
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									}
-												Ensure workers terminated fully before reading unknownRefs

											
										
										
											7 years ago
+									wg.Wait()
-												move the wal repair logic in db.Open (#633)

* move the wal repair logic in db.Open

This is to allow opening a wal in a read oly mode without triggering a
repair.

Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com>
											
										
										
											6 years ago
+									if r.Err() != nil {
 										return errors.Wrap(r.Err(), "read records")
 									}
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+									if err := allStones.Iter(func(ref uint64, dranges tombstones.Intervals) error {
-												Dont store stones in head, delete samples directly

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+										return h.chunkRewrite(ref, dranges)
 									}); err != nil {
 										return errors.Wrap(r.Err(), "deleting samples from tombstones")
 									}
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									if unknownRefs > 0 {
 										level.Warn(h.logger).Log("msg", "unknown series references", "count", unknownRefs)
 									}
 									return nil
 								}
 								// Init loads data from the write ahead log and prepares the head for writes.
-												no overlapping on compaction when an existing block is not within default boundaries. (#461)

closes https://github.com/prometheus/prometheus/issues/4643

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+								// It should be called before using an appender so that
 								// limits the ingested samples to the head min valid time.
 								func (h *Head) Init(minValidTime int64) error {
 									h.minValidTime = minValidTime
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									defer h.postings.EnsureOrder()
-												no overlapping on compaction when an existing block is not within default boundaries. (#461)

closes https://github.com/prometheus/prometheus/issues/4643

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+									defer h.gc() // After loading the wal remove the obsolete data from the head.
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
 									if h.wal == nil {
 										return nil
 									}
-												Add logging during WAL replay

Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>

											
										
										
											5 years ago
+									level.Info(h.logger).Log("msg", "replaying WAL, this may take awhile")
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									// Backfill the checkpoint first if it exists.
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+									dir, startFrom, err := wal.LastCheckpoint(h.wal.Dir())
 									if err != nil && err != record.ErrNotFound {
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+										return errors.Wrap(err, "find last checkpoint")
 									}
-												Handle multiple refs for the same series when WAL reading. (#623)

This can happen if a given series is created/truncated/recreated.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
+									multiRef := map[uint64]uint64{}
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									if err == nil {
-												refactor NewSegmentsRangeReader to take multi WAL ranges (#449)

* refactor NewSegmentsRangeReader to take multi WAL ranges

In case of an error when checkpointing the WAL the error doesn't show
the exact WAL index that is corrupter. this is because it uses
MultiReader to read multiply WAL files.
This refactoring allows the NewSegmentsRangeReader to take more than a
single WAL range and it reads all of the ranges by iterating each one.

this changes the logs from
create checkpoint: read segments: corruption after 4841144384 bytes:...
to
create checkpoint: read segments: corruption in segment
data/wal/00017351 at 123142208: ...

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>

											
										
										
											6 years ago
+										sr, err := wal.NewSegmentsReader(dir)
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+										if err != nil {
 											return errors.Wrap(err, "open checkpoint")
 										}
-												move the wal repair logic in db.Open (#633)

* move the wal repair logic in db.Open

This is to allow opening a wal in a read oly mode without triggering a
repair.

Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com>
											
										
										
											6 years ago
+										defer func() {
 											if err := sr.Close(); err != nil {
 												level.Warn(h.logger).Log("msg", "error while closing the wal segments reader", "err", err)
 											}
 										}()
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
 										// A corrupted checkpoint is a hard error for now and requires user
 										// intervention. There's likely little data that can be recovered anyway.
-												Handle multiple refs for the same series when WAL reading. (#623)

This can happen if a given series is created/truncated/recreated.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
+										if err := h.loadWAL(wal.NewReader(sr), multiRef); err != nil {
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+											return errors.Wrap(err, "backfill checkpoint")
 										}
-												more descriptive var names and some more logging. (#405)

* more descriptive checkpoint var names and some more logging.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+										startFrom++
-												Add logging during WAL replay

Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>

											
										
										
											5 years ago
+										level.Info(h.logger).Log("msg", "WAL checkpoint loaded")
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									}
-												Always create a new clean segment when starting the WAL. (#608)

* Always create a new clean segment when starting the WAL.
* Ensure we flush the last page after repairing and before recreating the
new segment in Repair.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											6 years ago
+									// Find the last segment.
 									_, last, err := h.wal.Segments()
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									if err != nil {
-												Always create a new clean segment when starting the WAL. (#608)

* Always create a new clean segment when starting the WAL.
* Ensure we flush the last page after repairing and before recreating the
new segment in Repair.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											6 years ago
+										return errors.Wrap(err, "finding WAL segments")
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
-												Always create a new clean segment when starting the WAL. (#608)

* Always create a new clean segment when starting the WAL.
* Ensure we flush the last page after repairing and before recreating the
new segment in Repair.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											6 years ago
+									// Backfill segments from the most recent checkpoint onwards.
 									for i := startFrom; i <= last; i++ {
 										s, err := wal.OpenReadSegment(wal.SegmentName(h.wal.Dir(), i))
 										if err != nil {
 											return errors.Wrap(err, fmt.Sprintf("open WAL segment: %d", i))
 										}
 										sr := wal.NewSegmentBufReader(s)
-												Handle multiple refs for the same series when WAL reading. (#623)

This can happen if a given series is created/truncated/recreated.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
+										err = h.loadWAL(wal.NewReader(sr), multiRef)
-												move the wal repair logic in db.Open (#633)

* move the wal repair logic in db.Open

This is to allow opening a wal in a read oly mode without triggering a
repair.

Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com>
											
										
										
											6 years ago
+										if err := sr.Close(); err != nil {
 											level.Warn(h.logger).Log("msg", "error while closing the wal segments reader", "err", err)
-												Always create a new clean segment when starting the WAL. (#608)

* Always create a new clean segment when starting the WAL.
* Ensure we flush the last page after repairing and before recreating the
new segment in Repair.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											6 years ago
+										}
-												move the wal repair logic in db.Open (#633)

* move the wal repair logic in db.Open

This is to allow opening a wal in a read oly mode without triggering a
repair.

Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com>
											
										
										
											6 years ago
+										if err != nil {
 											return err
-												Always create a new clean segment when starting the WAL. (#608)

* Always create a new clean segment when starting the WAL.
* Ensure we flush the last page after repairing and before recreating the
new segment in Repair.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											6 years ago
+										}
-												Add logging during WAL replay

Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>

											
										
										
											5 years ago
+										level.Info(h.logger).Log("msg", "WAL segment loaded", "segment", i, "maxSegment", last)
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									}
-												Always create a new clean segment when starting the WAL. (#608)

* Always create a new clean segment when starting the WAL.
* Ensure we flush the last page after repairing and before recreating the
new segment in Repair.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											6 years ago
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									return nil
-												Make WAL for HeadBlock composeable.

											
										
										
											8 years ago
+								}
-												Handle compaction trigger and reinitializing in DB

											
										
										
											8 years ago
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+								// Truncate removes old data before mint from the head.
-												Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+								func (h *Head) Truncate(mint int64) (err error) {
 									defer func() {
 										if err != nil {
 											h.metrics.headTruncateFail.Inc()
 										}
 									}()
-												Properly initialize head time

This fixes various issues when initializing the head time range
under different starting conditions.

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									initialize := h.MinTime() == math.MaxInt64
-												Filter WAL data in Head, misc fixes

											
										
										
											7 years ago
-												Properly initialize head time

This fixes various issues when initializing the head time range
under different starting conditions.

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									if h.MinTime() >= mint && !initialize {
-												Add tests for GC and chunk truncation

											
										
										
											7 years ago
+										return nil
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}
 									atomic.StoreInt64(&h.minTime, mint)
-												fix race for minValidTime (#479)

it happens when truncating the WAL and another goroutine creates a new
Appender()

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+									atomic.StoreInt64(&h.minValidTime, mint)
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
-												Fix min/max time handling and concurrent crc32 usage

											
										
										
											7 years ago
+									// Ensure that max time is at least as high as min time.
 									for h.MaxTime() < mint {
 										atomic.CompareAndSwapInt64(&h.maxTime, h.MaxTime(), mint)
 									}
-												Filter WAL data in Head, misc fixes

											
										
										
											7 years ago
+									// This was an initial call to Truncate after loading blocks on startup.
 									// We haven't read back the WAL yet, so do not attempt to truncate it.
 									if initialize {
 										return nil
 									}
-												Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+									h.metrics.headTruncateTotal.Inc()
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									start := time.Now()
 									h.gc()
-												Add levels to all log lines.

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

											
										
										
											7 years ago
+									level.Info(h.logger).Log("msg", "head GC completed", "duration", time.Since(start))
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									h.metrics.gcDuration.Observe(time.Since(start).Seconds())
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									if h.wal == nil {
 										return nil
 									}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									start = time.Now()
-												more descriptive var names and some more logging. (#405)

* more descriptive checkpoint var names and some more logging.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+									first, last, err := h.wal.Segments()
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									if err != nil {
 										return errors.Wrap(err, "get segment range")
 									}
-												Start a new WAL segement on head truncation. (#605)

This reduces disk space usage to not be a minimum of 3 128MB files
in small setups. This will possibly also help debug wal data issues,
by making things a bit more deterministic.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
+									// Start a new segment, so low ingestion volume TSDB don't have more WAL than
 									// needed.
 									err = h.wal.NextSegment()
 									if err != nil {
 										return errors.Wrap(err, "next segment")
 									}
-												more descriptive var names and some more logging. (#405)

* more descriptive checkpoint var names and some more logging.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+									last-- // Never consider last segment for checkpoint.
 									if last < 0 {
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+										return nil // no segments yet.
 									}
 									// The lower third of segments should contain mostly obsolete samples.
-												Address comments

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									// If we have less than three segments, it's not worth checkpointing yet.
-												more descriptive var names and some more logging. (#405)

* more descriptive checkpoint var names and some more logging.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+									last = first + (last-first)/3
 									if last <= first {
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+										return nil
 									}
-												Use boolean function instead of postings to drop WAL series

There is not guarantee or requirement for WAL writers to only add
series entries in increasing order of IDs. A postings list cannot look
back and thus unordered WAL entries would skip over IDs to not truncate
from the WAL.
We replace it with a simple boolean check function that does not require
order.

											
										
										
											7 years ago
+									keep := func(id uint64) bool {
-												Keep series that are still in WAL in checkpoints (#577)

If all the samples are deleted for a series,
we should still keep the series in the WAL as
anything else reading the WAL will still care
about it in order to understand the samples.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
+										if h.series.getByID(id) != nil {
 											return true
 										}
 										h.deletedMtx.Lock()
 										_, ok := h.deleted[id]
 										h.deletedMtx.Unlock()
 										return ok
-												[WIP]: WAL implementation

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

											
										
										
											7 years ago
+									}
-												Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+									h.metrics.checkpointCreationTotal.Inc()
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+									if _, err = wal.Checkpoint(h.wal, first, last, keep, mint); err != nil {
-												Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+										h.metrics.checkpointCreationFail.Inc()
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+										return errors.Wrap(err, "create checkpoint")
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}
-												more descriptive var names and some more logging. (#405)

* more descriptive checkpoint var names and some more logging.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+									if err := h.wal.Truncate(last + 1); err != nil {
-												Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+										// If truncating fails, we'll just try again at the next checkpoint.
 										// Leftover segments will just be ignored in the future if there's a checkpoint
 										// that supersedes them.
 										level.Error(h.logger).Log("msg", "truncating segments failed", "err", err)
 									}
-												Keep series that are still in WAL in checkpoints (#577)

If all the samples are deleted for a series,
we should still keep the series in the WAL as
anything else reading the WAL will still care
about it in order to understand the samples.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
 									// The checkpoint is written and segments before it is truncated, so we no
 									// longer need to track deleted series that are before it.
 									h.deletedMtx.Lock()
 									for ref, segment := range h.deleted {
 										if segment < first {
 											delete(h.deleted, ref)
 										}
 									}
 									h.deletedMtx.Unlock()
-												Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+									h.metrics.checkpointDeleteTotal.Inc()
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+									if err := wal.DeleteCheckpoints(h.wal.Dir(), last); err != nil {
-												Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+										// Leftover old checkpoints do not cause problems down the line beyond
 										// occupying disk space.
 										// They will just be ignored since a higher checkpoint exists.
 										level.Error(h.logger).Log("msg", "delete old checkpoints", "err", err)
 										h.metrics.checkpointDeleteFail.Inc()
 									}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									h.metrics.walTruncateDuration.Observe(time.Since(start).Seconds())
-												Add tests for GC and chunk truncation

											
										
										
											7 years ago
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									level.Info(h.logger).Log("msg", "WAL checkpoint complete",
-												more descriptive var names and some more logging. (#405)

* more descriptive checkpoint var names and some more logging.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+										"first", first, "last", last, "duration", time.Since(start))
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
-												Add tests for GC and chunk truncation

											
										
										
											7 years ago
+									return nil
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								}
 								// initTime initializes a head with the first timestamp. This only needs to be called
-												Fixs typo: "compltely" to "completely" (#470)

Fix a small typo.
											
										
										
											6 years ago
+								// for a completely fresh head with an empty WAL.
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								// Returns true if the initialization took an effect.
 								func (h *Head) initTime(t int64) (initialized bool) {
-												Properly initialize head time

This fixes various issues when initializing the head time range
under different starting conditions.

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									if !atomic.CompareAndSwapInt64(&h.minTime, math.MaxInt64, t) {
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										return false
 									}
-												Fix min/max time handling and concurrent crc32 usage

											
										
										
											7 years ago
+									// Ensure that max time is initialized to at least the min time we just set.
 									// Concurrent appenders may already have set it to a higher value.
 									atomic.CompareAndSwapInt64(&h.maxTime, math.MinInt64, t)
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									return true
 								}
-												Add more verbose error handling for closing, reduce locking

This commit introduces error returns in various places and is explicit
about closing persisted blocks.
{Index,Chunk,Tombstone}Readers are more consistent about their Close()
method. Whenever a reader is retrieved, the corresponding close method
must eventually be called. We use this to track pending readers against
persisted blocks.

Querier's against the DB no longer hold a read lock for their entire
lifecycle. This avoids long running queriers to starve new ones when we
have to acquire a write lock when reloading blocks.

											
										
										
											7 years ago
+								type rangeHead struct {
 									head       *Head
 									mint, maxt int64
 								}
 								func (h *rangeHead) Index() (IndexReader, error) {
 									return h.head.indexRange(h.mint, h.maxt), nil
 								}
 								func (h *rangeHead) Chunks() (ChunkReader, error) {
 									return h.head.chunksRange(h.mint, h.maxt), nil
 								}
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+								func (h *rangeHead) Tombstones() (tombstones.Reader, error) {
-												Dont store stones in head, delete samples directly

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+									return emptyTombstoneReader, nil
-												Add more verbose error handling for closing, reduce locking

This commit introduces error returns in various places and is explicit
about closing persisted blocks.
{Index,Chunk,Tombstone}Readers are more consistent about their Close()
method. Whenever a reader is retrieved, the corresponding close method
must eventually be called. We use this to track pending readers against
persisted blocks.

Querier's against the DB no longer hold a read lock for their entire
lifecycle. This avoids long running queriers to starve new ones when we
have to acquire a write lock when reloading blocks.

											
										
										
											7 years ago
+								}
-												Vertical query merging and compaction (#370)

* Vertical series iterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Select overlapped blocks first in compactor Plan()

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Added vertical compaction

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Code cleanup and comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Fix tests

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Add benchmark for compaction

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Perform vertical compaction only when blocks are overlapping.

Actions for vertical compaction:
* Sorting chunk metas
* Calling chunks.MergeOverlappingChunks on the chunks

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Benchmark for vertical compaction

* BenchmarkNormalCompaction => BenchmarkCompaction
* Moved the benchmark from db_test.go to compact_test.go

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Benchmark for query iterator and seek for non overlapping blocks

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Vertical query merge only for overlapping blocks

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Simplify logging in Compact(...)

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Updated CHANGELOG.md

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Calculate overlapping inside populateBlock

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* MinTime and MaxTime for BlockReader.

Using this to find overlapping blocks in populateBlock()

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Sort blocks w.r.t. MinTime in reload()

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Log about overlapping in LeveledCompactor.write() instead of returning bool

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Log about overlapping inside LeveledCompactor.populateBlock()

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Refactor createBlock to take optional []Series

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* review1

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>

* Updated CHANGELOG and minor nits

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* nits

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Updated CHANGELOG

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Refactor iterator and seek benchmarks for Querier.

Also has as overlapping blocks.

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Additional test case

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* genSeries takes optional labels. Updated BenchmarkQueryIterator and BenchmarkQuerySeek.

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Split genSeries into genSeries and populateSeries

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Check error in benchmark

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Warn about overlapping blocks in reload()

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+								func (h *rangeHead) MinTime() int64 {
 									return h.mint
 								}
 								func (h *rangeHead) MaxTime() int64 {
 									return h.maxt
 								}
-												Open db in Read only mode (#588)

* Added db read only open mode and use it for the tsdb cli.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											5 years ago
+								func (h *rangeHead) NumSeries() uint64 {
 									return h.head.NumSeries()
 								}
 								func (h *rangeHead) Meta() BlockMeta {
 									return BlockMeta{
 										MinTime: h.MinTime(),
 										MaxTime: h.MaxTime(),
 										ULID:    h.head.Meta().ULID,
 										Stats: BlockStats{
 											NumSeries: h.NumSeries(),
 										},
 									}
 								}
-												Fix typos in comments (#254)

a the -> the
timestmap -> timestamp
badded -> padded
its -> it is
callers -> caller's
											
										
										
											7 years ago
+								// initAppender is a helper to initialize the time bounds of the head
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								// upon the first sample it receives.
 								type initAppender struct {
 									app  Appender
 									head *Head
 								}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+								func (a *initAppender) Add(lset labels.Labels, t int64, v float64) (uint64, error) {
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									if a.app != nil {
 										return a.app.Add(lset, t, v)
 									}
-												Remove defer statement in hot path

											
										
										
											7 years ago
+									a.head.initTime(t)
 									a.app = a.head.appender()
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									return a.app.Add(lset, t, v)
 								}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+								func (a *initAppender) AddFast(ref uint64, t int64, v float64) error {
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									if a.app == nil {
 										return ErrNotFound
 									}
 									return a.app.AddFast(ref, t, v)
 								}
 								func (a *initAppender) Commit() error {
 									if a.app == nil {
 										return nil
 									}
 									return a.app.Commit()
 								}
 								func (a *initAppender) Rollback() error {
 									if a.app == nil {
 										return nil
 									}
 									return a.app.Rollback()
 								}
 								// Appender returns a new Appender on the database.
 								func (h *Head) Appender() Appender {
 									h.metrics.activeAppenders.Inc()
 									// The head cache might not have a starting point yet. The init appender
 									// picks up the first appended timestamp as the base.
-												Properly initialize head time

This fixes various issues when initializing the head time range
under different starting conditions.

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									if h.MinTime() == math.MaxInt64 {
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										return &initAppender{head: h}
 									}
 									return h.appender()
 								}
 								func (h *Head) appender() *headAppender {
 									return &headAppender{
-												no overlapping on compaction when an existing block is not within default boundaries. (#461)

closes https://github.com/prometheus/prometheus/issues/4643

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+										head: h,
 										// Set the minimum valid time to whichever is greater the head min valid time or the compaciton window.
 										// This ensures that no samples will be added within the compaction window to avoid races.
-												fix race for minValidTime (#479)

it happens when truncating the WAL and another goroutine creates a new
Appender()

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+										minValidTime: max(atomic.LoadInt64(&h.minValidTime), h.MaxTime()-h.chunkRange/2),
-												Properly initialize head time

This fixes various issues when initializing the head time range
under different starting conditions.

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+										mint:         math.MaxInt64,
 										maxt:         math.MinInt64,
 										samples:      h.getAppendBuffer(),
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+										sampleSeries: h.getSeriesBuffer(),
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}
 								}
-												no overlapping on compaction when an existing block is not within default boundaries. (#461)

closes https://github.com/prometheus/prometheus/issues/4643

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+								func max(a, b int64) int64 {
 									if a > b {
 										return a
 									}
 									return b
 								}
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+								func (h *Head) getAppendBuffer() []record.RefSample {
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									b := h.appendPool.Get()
 									if b == nil {
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+										return make([]record.RefSample, 0, 512)
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+									return b.([]record.RefSample)
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								}
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+								func (h *Head) putAppendBuffer(b []record.RefSample) {
-												fix statick check errors (#475)

fix the tests for `check_license` and `staticcheck`

the static check also found some actual bugs.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+									//lint:ignore SA6002 safe to ignore and actually fixing it has some performance penalty.
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									h.appendPool.Put(b[:0])
 								}
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+								func (h *Head) getSeriesBuffer() []*memSeries {
 									b := h.seriesPool.Get()
 									if b == nil {
 										return make([]*memSeries, 0, 512)
 									}
 									return b.([]*memSeries)
 								}
 								func (h *Head) putSeriesBuffer(b []*memSeries) {
 									//lint:ignore SA6002 safe to ignore and actually fixing it has some performance penalty.
 									h.seriesPool.Put(b[:0])
 								}
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+								func (h *Head) getBytesBuffer() []byte {
 									b := h.bytesPool.Get()
 									if b == nil {
 										return make([]byte, 0, 1024)
 									}
 									return b.([]byte)
 								}
 								func (h *Head) putBytesBuffer(b []byte) {
-												fix statick check errors (#475)

fix the tests for `check_license` and `staticcheck`

the static check also found some actual bugs.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+									//lint:ignore SA6002 safe to ignore and actually fixing it has some performance penalty.
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									h.bytesPool.Put(b[:0])
 								}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								type headAppender struct {
-												Properly initialize head time

This fixes various issues when initializing the head time range
under different starting conditions.

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									head         *Head
 									minValidTime int64 // No samples below this timestamp are allowed.
 									mint, maxt   int64
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+									series       []record.RefSeries
 									samples      []record.RefSample
 									sampleSeries []*memSeries
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+								func (a *headAppender) Add(lset labels.Labels, t int64, v float64) (uint64, error) {
-												Properly initialize head time

This fixes various issues when initializing the head time range
under different starting conditions.

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									if t < a.minValidTime {
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										return 0, ErrOutOfBounds
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}
-												Correctly handle empty labels. (#594)

Currently a time series with empty labels is not treated the same
as one with missing labels. Currently this can only come from
ALERTS&ALERT_FOR_STATE so it's unlikely anyone has actually hit it.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
+									// Ensure no empty labels have gotten through.
 									lset = lset.WithoutEmpty()
-												Simplify series create logic in head

											
										
										
											7 years ago
+									s, created := a.head.getOrCreate(lset.Hash(), lset)
 									if created {
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+										a.series = append(a.series, record.RefSeries{
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+											Ref:    s.ref,
 											Labels: lset,
 										})
 									}
 									return s.ref, a.AddFast(s.ref, t, v)
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+								func (a *headAppender) AddFast(ref uint64, t int64, v float64) error {
-												Fix race condition between gc and committing (#378)

Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
											
										
										
											6 years ago
+									if t < a.minValidTime {
 										return ErrOutOfBounds
 									}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
-												Fix race condition between gc and committing (#378)

Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
											
										
										
											6 years ago
+									s := a.head.series.getByID(ref)
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									if s == nil {
 										return errors.Wrap(ErrNotFound, "unknown series")
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}
-												Fix various races

											
										
										
											7 years ago
+									s.Lock()
-												Fix race condition between gc and committing (#378)

Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
											
										
										
											6 years ago
+									if err := s.appendable(t, v); err != nil {
 										s.Unlock()
-												Finish old WAL segment async, default to no fsync

We were still fsyncing while holding the write lock when we cut a new
segment. Given we cannot do anything but logging errors, we might just
as well complete segments asynchronously.

There's not realistic use case where one would fsync after every WAL
entry, thus make the default of a flush interval of 0 to never fsync
which is a much more likely use case.

											
										
										
											7 years ago
+										return err
 									}
-												Fix race condition between gc and committing (#378)

Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
											
										
										
											6 years ago
+									s.pendingCommit = true
 									s.Unlock()
-												Properly initialize head time

This fixes various issues when initializing the head time range
under different starting conditions.

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									if t < a.mint {
 										a.mint = t
 									}
-												head: Rename highTimestamp to maxt

`maxt` seems more consistent with `mint` and other uses of `maxt`
elsewhere in the code, if I've understand the intent correctly.

											
										
										
											7 years ago
+									if t > a.maxt {
 										a.maxt = t
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+									a.samples = append(a.samples, record.RefSample{
 										Ref: ref,
 										T:   t,
 										V:   v,
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									})
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+									a.sampleSeries = append(a.sampleSeries, s)
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									return nil
 								}
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+								func (a *headAppender) log() error {
 									if a.head.wal == nil {
 										return nil
 									}
 									buf := a.head.getBytesBuffer()
 									defer func() { a.head.putBytesBuffer(buf) }()
 									var rec []byte
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+									var enc record.Encoder
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
 									if len(a.series) > 0 {
 										rec = enc.Series(a.series, buf)
 										buf = rec[:0]
 										if err := a.head.wal.Log(rec); err != nil {
 											return errors.Wrap(err, "log series")
 										}
 									}
 									if len(a.samples) > 0 {
 										rec = enc.Samples(a.samples, buf)
 										buf = rec[:0]
 										if err := a.head.wal.Log(rec); err != nil {
 											return errors.Wrap(err, "log samples")
 										}
 									}
 									return nil
 								}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								func (a *headAppender) Commit() error {
-												Log series on rollback

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									defer a.head.metrics.activeAppenders.Dec()
 									defer a.head.putAppendBuffer(a.samples)
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+									defer a.head.putSeriesBuffer(a.sampleSeries)
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									if err := a.log(); err != nil {
 										return errors.Wrap(err, "write to WAL")
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									total := len(a.samples)
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+									var series *memSeries
 									for i, s := range a.samples {
 										series = a.sampleSeries[i]
 										series.Lock()
 										ok, chunkCreated := series.append(s.T, s.V)
 										series.pendingCommit = false
 										series.Unlock()
-												Fix various races

											
										
										
											7 years ago
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										if !ok {
 											total--
 										}
 										if chunkCreated {
 											a.head.metrics.chunks.Inc()
 											a.head.metrics.chunksCreated.Inc()
 										}
 									}
 									a.head.metrics.samplesAppended.Add(float64(total))
-												Properly initialize head time

This fixes various issues when initializing the head time range
under different starting conditions.

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									a.head.updateMinMaxTime(a.mint, a.maxt)
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
 									return nil
 								}
 								func (a *headAppender) Rollback() error {
 									a.head.metrics.activeAppenders.Dec()
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+									var series *memSeries
 									for i := range a.samples {
 										series = a.sampleSeries[i]
 										series.Lock()
 										series.pendingCommit = false
 										series.Unlock()
-												Fix race condition between gc and committing (#378)

Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
											
										
										
											6 years ago
+									}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									a.head.putAppendBuffer(a.samples)
-												Log series on rollback

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									// Series are created in the head memory regardless of rollback. Thus we have
 									// to log them to the WAL in any case.
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									a.samples = nil
 									return a.log()
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								}
 								// Delete all samples in the range of [mint, maxt] for series that satisfy the given
 								// label matchers.
 								func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error {
 									// Do not delete anything beyond the currently valid range.
 									mint, maxt = clampInterval(mint, maxt, h.MinTime(), h.MaxTime())
 									ir := h.indexRange(mint, maxt)
-												Select series with label unset for != and !~

Fixes https://github.com/prometheus/prometheus/issues/3575

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

											
										
										
											7 years ago
+									p, err := PostingsForMatchers(ir, ms...)
-												Add explicit error to Querier.Select

This has been a frequent source of debugging pain since errors are
potentially delayed to a much later point. They bubble up in an
unrelated execution path.

											
										
										
											7 years ago
+									if err != nil {
 										return errors.Wrap(err, "select series")
 									}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+									var stones []tombstones.Stone
-												Dont store stones in head, delete samples directly

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+									dirty := false
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									for p.Next() {
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										series := h.series.getByID(p.At())
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
-												Fix crash when a series has no block

											
										
										
											7 years ago
+										t0, t1 := series.minTime(), series.maxTime()
 										if t0 == math.MinInt64 || t1 == math.MinInt64 {
 											continue
 										}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										// Delete only until the current values and not beyond.
-												Fix crash when a series has no block

											
										
										
											7 years ago
+										t0, t1 = clampInterval(mint, maxt, t0, t1)
-												Dont store stones in head, delete samples directly

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+										if h.wal != nil {
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+											stones = append(stones, tombstones.Stone{Ref: p.At(), Intervals: tombstones.Intervals{{Mint: t0, Maxt: t1}}})
-												Dont store stones in head, delete samples directly

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+										}
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+										if err := h.chunkRewrite(p.At(), tombstones.Intervals{{Mint: t0, Maxt: t1}}); err != nil {
-												Dont store stones in head, delete samples directly

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+											return errors.Wrap(err, "delete samples")
 										}
 										dirty = true
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}
 									if p.Err() != nil {
 										return p.Err()
 									}
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+									var enc record.Encoder
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									if h.wal != nil {
-												Dont store stones in head, delete samples directly

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+										// Although we don't store the stones in the head
-												Fix a typo in head.go (#553)

Signed-off-by: zhulongcheng <zhulongcheng.me@gmail.com>
											
										
										
											6 years ago
+										// we need to write them to the WAL to mark these as deleted
 										// after a restart while loading the WAL.
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+										if err := h.wal.Log(enc.Tombstones(stones, nil)); err != nil {
 											return err
 										}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}
-												Dont store stones in head, delete samples directly

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+									if dirty {
 										h.gc()
 									}
 									return nil
 								}
 								// chunkRewrite re-writes the chunks which overlaps with deleted ranges
 								// and removes the samples in the deleted ranges.
 								// Chunks is deleted if no samples are left at the end.
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+								func (h *Head) chunkRewrite(ref uint64, dranges tombstones.Intervals) (err error) {
-												Dont store stones in head, delete samples directly

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+									if len(dranges) == 0 {
 										return nil
 									}
 									ms := h.series.getByID(ref)
 									ms.Lock()
 									defer ms.Unlock()
 									if len(ms.chunks) == 0 {
 										return nil
 									}
 									metas := ms.chunksMetas()
 									mint, maxt := metas[0].MinTime, metas[len(metas)-1].MaxTime
 									it := newChunkSeriesIterator(metas, dranges, mint, maxt)
 									ms.reset()
 									for it.Next() {
 										t, v := it.At()
 										ok, _ := ms.append(t, v)
 										if !ok {
 											level.Warn(h.logger).Log("msg", "failed to add sample during delete")
 										}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}
-												Dont store stones in head, delete samples directly

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									return nil
 								}
-												Fix typos in comments (#254)

a the -> the
timestmap -> timestamp
badded -> padded
its -> it is
callers -> caller's
											
										
										
											7 years ago
+								// gc removes data before the minimum timestamp from the head.
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								func (h *Head) gc() {
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									// Only data strictly lower than this timestamp must be deleted.
 									mint := h.MinTime()
-												Properly track and write meta file

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									// Drop old chunks and remember series IDs and hashes if they can be
 									// deleted entirely.
 									deleted, chunksRemoved := h.series.gc(mint)
 									seriesRemoved := len(deleted)
-												Trigger reload correctly on interrupted compaction

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									h.metrics.seriesRemoved.Add(float64(seriesRemoved))
 									h.metrics.chunksRemoved.Add(float64(chunksRemoved))
 									h.metrics.chunks.Sub(float64(chunksRemoved))
-												fix spelling mistakes in docs (#5952)

Signed-off-by: hwdef <hwdef97@gmail.com>
											
										
										
											5 years ago
+									// Using AddUint64 to subtract series removed.
-												Open db in Read only mode (#588)

* Added db read only open mode and use it for the tsdb cli.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											5 years ago
+									// See: https://golang.org/pkg/sync/atomic/#AddUint64.
 									atomic.AddUint64(&h.numSeries, ^uint64(seriesRemoved-1))
-												Add separate head mutex

Introduce a seperate mutex for the head blocks to avoid a race where
a post-compaction reload may run between switching the DB's base mutex
to create a new head block in an appender.

											
										
										
											8 years ago
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+									// Remove deleted series IDs from the postings lists.
 									h.postings.Delete(deleted)
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
-												Keep series that are still in WAL in checkpoints (#577)

If all the samples are deleted for a series,
we should still keep the series in the WAL as
anything else reading the WAL will still care
about it in order to understand the samples.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
+									if h.wal != nil {
 										_, last, _ := h.wal.Segments()
 										h.deletedMtx.Lock()
 										// Keep series records until we're past segment 'last'
 										// because the WAL will still have samples records with
 										// this ref ID. If we didn't keep these series records then
 										// on start up when we replay the WAL, or any other code
 										// that reads the WAL, wouldn't be able to use those
 										// samples since we would have no labels for that ref ID.
 										for ref := range deleted {
 											h.deleted[ref] = last
 										}
 										h.deletedMtx.Unlock()
 									}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									// Rebuild symbols and label value indices from what is left in the postings terms.
-												Make Head.symbols map with size hint (#552)

To reduce the number of times the map is resized

Signed-off-by: zhulongcheng <zhulongcheng.me@gmail.com>
											
										
										
											6 years ago
+									symbols := make(map[string]struct{}, len(h.symbols))
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									values := make(map[string]stringset, len(h.values))
-												Initial implementation of HeadBlock Snapshots

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

											
										
										
											8 years ago
-												Handle a bunch of unchecked errors (#365)

As discovered by "gosec".

Signed-off-by: Julius Volz <julius.volz@gmail.com>
											
										
										
											6 years ago
+									if err := h.postings.Iter(func(t labels.Label, _ index.Postings) error {
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										symbols[t.Name] = struct{}{}
 										symbols[t.Value] = struct{}{}
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										ss, ok := values[t.Name]
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+										if !ok {
 											ss = stringset{}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+											values[t.Name] = ss
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+										}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										ss.set(t.Value)
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+										return nil
-												Handle a bunch of unchecked errors (#365)

As discovered by "gosec".

Signed-off-by: Julius Volz <julius.volz@gmail.com>
											
										
										
											6 years ago
+									}); err != nil {
 										// This should never happen, as the iteration function only returns nil.
 										panic(err)
 									}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
 									h.symMtx.Lock()
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									h.symbols = symbols
 									h.values = values
-												Add various metrics

											
										
										
											7 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									h.symMtx.Unlock()
-												Add Queryable interface to Block

This adds the Queryable interface to the Block interface. Head and
persisted blocks now implement their own Querier() method and thus
isolate customization (e.g. remapPostings) more cleanly.

											
										
										
											8 years ago
+								}
-												Add more verbose error handling for closing, reduce locking

This commit introduces error returns in various places and is explicit
about closing persisted blocks.
{Index,Chunk,Tombstone}Readers are more consistent about their Close()
method. Whenever a reader is retrieved, the corresponding close method
must eventually be called. We use this to track pending readers against
persisted blocks.

Querier's against the DB no longer hold a read lock for their entire
lifecycle. This avoids long running queriers to starve new ones when we
have to acquire a write lock when reloading blocks.

											
										
										
											7 years ago
+								// Tombstones returns a new reader over the head's tombstones
-												Move WAL watcher code to tsdb/wal package. (#5999)

* Move WAL watcher code to tsdb/wal package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix tests after moving WAL watcher code.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Lint fixes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

											
										
										
											5 years ago
+								func (h *Head) Tombstones() (tombstones.Reader, error) {
-												Dont store stones in head, delete samples directly

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+									return emptyTombstoneReader, nil
-												Compact head block early

Let older head blocks be compacted once the newest once has samples at
50% of its total range. This allows the memory of the compacted blocks
to be released and garbage collected before a new head block gets
created. Thereby the number of head blocks is 1 or 2 instead of 2 or 3
and memory spikes are reduced.

											
										
										
											8 years ago
+								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								// Index returns an IndexReader against the block.
-												Add more verbose error handling for closing, reduce locking

This commit introduces error returns in various places and is explicit
about closing persisted blocks.
{Index,Chunk,Tombstone}Readers are more consistent about their Close()
method. Whenever a reader is retrieved, the corresponding close method
must eventually be called. We use this to track pending readers against
persisted blocks.

Querier's against the DB no longer hold a read lock for their entire
lifecycle. This avoids long running queriers to starve new ones when we
have to acquire a write lock when reloading blocks.

											
										
										
											7 years ago
+								func (h *Head) Index() (IndexReader, error) {
 									return h.indexRange(math.MinInt64, math.MaxInt64), nil
-												Add composed Block interfaces, remove head generation

This adds more lower-leve interfaces which are used to compose
to different Block interfaces.
The DB only uses interfaces instead of explicit persistedBlock and
headBlock. The headBlock generation property is dropped as the use-case
can be implemented using block sequence numbers.

											
										
										
											8 years ago
+								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								func (h *Head) indexRange(mint, maxt int64) *headIndexReader {
 									if hmin := h.MinTime(); hmin > mint {
 										mint = hmin
-												Use buffer pool for head appenders

											
										
										
											8 years ago
+									}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									return &headIndexReader{head: h, mint: mint, maxt: maxt}
-												Expose series references to clients

This exposes a reference number of a series represented by a label set
to clients.
Subsequent samples can be directly added via the reference rather than
repeatedly passing in the full labels. This drasitcally speeds up the
append process.

The appender chain uses different sections of the reference number for
assignment to child appenders and invalidating reference numbers as
necessary.

Clients can either pass out reference numbers themselves or have their
own optimized lookup, i.e. by directly associating unparsed metric
descriptors strings with reference numbers.

											
										
										
											8 years ago
+								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								// Chunks returns a ChunkReader against the block.
-												Add more verbose error handling for closing, reduce locking

This commit introduces error returns in various places and is explicit
about closing persisted blocks.
{Index,Chunk,Tombstone}Readers are more consistent about their Close()
method. Whenever a reader is retrieved, the corresponding close method
must eventually be called. We use this to track pending readers against
persisted blocks.

Querier's against the DB no longer hold a read lock for their entire
lifecycle. This avoids long running queriers to starve new ones when we
have to acquire a write lock when reloading blocks.

											
										
										
											7 years ago
+								func (h *Head) Chunks() (ChunkReader, error) {
 									return h.chunksRange(math.MinInt64, math.MaxInt64), nil
-												Expose series references to clients

This exposes a reference number of a series represented by a label set
to clients.
Subsequent samples can be directly added via the reference rather than
repeatedly passing in the full labels. This drasitcally speeds up the
append process.

The appender chain uses different sections of the reference number for
assignment to child appenders and invalidating reference numbers as
necessary.

Clients can either pass out reference numbers themselves or have their
own optimized lookup, i.e. by directly associating unparsed metric
descriptors strings with reference numbers.

											
										
										
											8 years ago
+								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								func (h *Head) chunksRange(mint, maxt int64) *headChunkReader {
 									if hmin := h.MinTime(); hmin > mint {
 										mint = hmin
-												Expose series references to clients

This exposes a reference number of a series represented by a label set
to clients.
Subsequent samples can be directly added via the reference rather than
repeatedly passing in the full labels. This drasitcally speeds up the
append process.

The appender chain uses different sections of the reference number for
assignment to child appenders and invalidating reference numbers as
necessary.

Clients can either pass out reference numbers themselves or have their
own optimized lookup, i.e. by directly associating unparsed metric
descriptors strings with reference numbers.

											
										
										
											8 years ago
+									}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									return &headChunkReader{head: h, mint: mint, maxt: maxt}
-												Expose series references to clients

This exposes a reference number of a series represented by a label set
to clients.
Subsequent samples can be directly added via the reference rather than
repeatedly passing in the full labels. This drasitcally speeds up the
append process.

The appender chain uses different sections of the reference number for
assignment to child appenders and invalidating reference numbers as
necessary.

Clients can either pass out reference numbers themselves or have their
own optimized lookup, i.e. by directly associating unparsed metric
descriptors strings with reference numbers.

											
										
										
											8 years ago
+								}
-												Open db in Read only mode (#588)

* Added db read only open mode and use it for the tsdb cli.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											5 years ago
+								// NumSeries returns the number of active series in the head.
 								func (h *Head) NumSeries() uint64 {
 									return atomic.LoadUint64(&h.numSeries)
 								}
 								// Meta returns meta information about the head.
 								// The head is dynamic so will return dynamic results.
 								func (h *Head) Meta() BlockMeta {
 									var id [16]byte
 									copy(id[:], "______head______")
 									return BlockMeta{
 										MinTime: h.MinTime(),
 										MaxTime: h.MaxTime(),
 										ULID:    ulid.ULID(id),
 										Stats: BlockStats{
 											NumSeries: h.NumSeries(),
 										},
 									}
 								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								// MinTime returns the lowest time bound on visible data in the head.
 								func (h *Head) MinTime() int64 {
 									return atomic.LoadInt64(&h.minTime)
-												Expose series references to clients

This exposes a reference number of a series represented by a label set
to clients.
Subsequent samples can be directly added via the reference rather than
repeatedly passing in the full labels. This drasitcally speeds up the
append process.

The appender chain uses different sections of the reference number for
assignment to child appenders and invalidating reference numbers as
necessary.

Clients can either pass out reference numbers themselves or have their
own optimized lookup, i.e. by directly associating unparsed metric
descriptors strings with reference numbers.

											
										
										
											8 years ago
+								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								// MaxTime returns the highest timestamp seen in data of the head.
 								func (h *Head) MaxTime() int64 {
 									return atomic.LoadInt64(&h.maxTime)
-												Expose series references to clients

This exposes a reference number of a series represented by a label set
to clients.
Subsequent samples can be directly added via the reference rather than
repeatedly passing in the full labels. This drasitcally speeds up the
append process.

The appender chain uses different sections of the reference number for
assignment to child appenders and invalidating reference numbers as
necessary.

Clients can either pass out reference numbers themselves or have their
own optimized lookup, i.e. by directly associating unparsed metric
descriptors strings with reference numbers.

											
										
										
											8 years ago
+								}
-												Add Head.compactable method (#542)

* Add Head.compactable method

Signed-off-by: zhulongcheng <zhulongcheng.me@gmail.com>
											
										
										
											6 years ago
+								// compactable returns whether the head has a compactable range.
 								// The head has a compactable range when the head time range is 1.5 times the chunk range.
 								// The 0.5 acts as a buffer of the appendable window.
 								func (h *Head) compactable() bool {
 									return h.MaxTime()-h.MinTime() > h.chunkRange/2*3
 								}
-												Close WAL when closing the DB

Also, the `wal` field of the `DB` was not used anywhere, so this removes
it.

											
										
										
											7 years ago
+								// Close flushes the WAL and closes the head.
 								func (h *Head) Close() error {
-												Integrate new WAL and checkpoints

Remove the old WAL and drop in the new one

Signed-off-by: Fabian Reinartz <freinartz@google.com>

											
										
										
											7 years ago
+									if h.wal == nil {
 										return nil
 									}
-												Close WAL when closing the DB

Also, the `wal` field of the `DB` was not used anywhere, so this removes
it.

											
										
										
											7 years ago
+									return h.wal.Close()
 								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								type headChunkReader struct {
 									head       *Head
 									mint, maxt int64
-												Expose series references to clients

This exposes a reference number of a series represented by a label set
to clients.
Subsequent samples can be directly added via the reference rather than
repeatedly passing in the full labels. This drasitcally speeds up the
append process.

The appender chain uses different sections of the reference number for
assignment to child appenders and invalidating reference numbers as
necessary.

Clients can either pass out reference numbers themselves or have their
own optimized lookup, i.e. by directly associating unparsed metric
descriptors strings with reference numbers.

											
										
										
											8 years ago
+								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								func (h *headChunkReader) Close() error {
-												Expose series references to clients

This exposes a reference number of a series represented by a label set
to clients.
Subsequent samples can be directly added via the reference rather than
repeatedly passing in the full labels. This drasitcally speeds up the
append process.

The appender chain uses different sections of the reference number for
assignment to child appenders and invalidating reference numbers as
necessary.

Clients can either pass out reference numbers themselves or have their
own optimized lookup, i.e. by directly associating unparsed metric
descriptors strings with reference numbers.

											
										
										
											8 years ago
+									return nil
 								}
-												Change series ID from uint32 to uint64

											
										
										
											7 years ago
+								// packChunkID packs a seriesID and a chunkID within it into a global 8 byte ID.
 								// It panicks if the seriesID exceeds 5 bytes or the chunk ID 3 bytes.
 								func packChunkID(seriesID, chunkID uint64) uint64 {
 									if seriesID > (1<<40)-1 {
 										panic("series ID exceeds 5 bytes")
 									}
 									if chunkID > (1<<24)-1 {
 										panic("chunk ID exceeds 3 bytes")
 									}
 									return (seriesID << 24) | chunkID
 								}
 								func unpackChunkID(id uint64) (seriesID, chunkID uint64) {
 									return id >> 24, (id << 40) >> 40
 								}
-												Add stats serialization, load querier of all blocks

											
										
										
											8 years ago
+								// Chunk returns the chunk for the reference number.
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+								func (h *headChunkReader) Chunk(ref uint64) (chunkenc.Chunk, error) {
-												Change series ID from uint32 to uint64

											
										
										
											7 years ago
+									sid, cid := unpackChunkID(ref)
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
 									s := h.head.series.getByID(sid)
-												Make sure gc'ed chunks are handled properly

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

											
										
										
											7 years ago
+									// This means that the series has been garbage collected.
 									if s == nil {
 										return nil, ErrNotFound
 									}
-												Use separate lock for series creation

This uses the head block's own lock to only lock if new series were
encountered.
In the general append case we just need to hold a

											
										
										
											8 years ago
-												Fix various races

											
										
										
											7 years ago
+									s.Lock()
-												Change series ID from uint32 to uint64

											
										
										
											7 years ago
+									c := s.chunk(int(cid))
-												Make sure gc'ed chunks are handled properly

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

											
										
										
											7 years ago
-												Make interval overlap comparisons more explicit

Blocks are half-open intervals [a, b), while all other intervals
(chunks, head, ...) are closed intervals [a, b].

Make that distinction explicit by defining `OverlapsClosedInterval()`
methods for blocks and chunks, and using them in place of the more
generic `intervalOverlap()` function.

This change also fixes `db.Querier()` and `db.Delete()`, which could
previously return one extraneous block at the end of the specified
interval.

Signed-off-by: Benoît Knecht <benoit.knecht@fsfe.org>

											
										
										
											6 years ago
+									// This means that the chunk has been garbage collected or is outside
 									// the specified range.
 									if c == nil || !c.OverlapsClosedInterval(h.mint, h.maxt) {
-												Make sure gc'ed chunks are handled properly

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

											
										
										
											7 years ago
+										s.Unlock()
 										return nil, ErrNotFound
 									}
-												Fix various races

											
										
										
											7 years ago
+									s.Unlock()
-												Support multiple chunk files in read path

											
										
										
											8 years ago
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									return &safeChunk{
 										Chunk: c.chunk,
 										s:     s,
-												Change series ID from uint32 to uint64

											
										
										
											7 years ago
+										cid:   int(cid),
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									}, nil
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+								}
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+								type safeChunk struct {
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+									chunkenc.Chunk
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									s   *memSeries
 									cid int
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+								}
-												Reuse Chunk Iterator (#642)

* Reset method for chunkenc.Iterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Reset method only for XORIterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Use Reset(...) in querier.go

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Reuse deletedIterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Another way of reusing chunk iterators

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Unexport xorIterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Fix memSeries.iterator(...)

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Add some comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											5 years ago
+								func (c *safeChunk) Iterator(reuseIter chunkenc.Iterator) chunkenc.Iterator {
-												Fix various races

											
										
										
											7 years ago
+									c.s.Lock()
-												Reuse Chunk Iterator (#642)

* Reset method for chunkenc.Iterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Reset method only for XORIterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Use Reset(...) in querier.go

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Reuse deletedIterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Another way of reusing chunk iterators

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Unexport xorIterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Fix memSeries.iterator(...)

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Add some comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											5 years ago
+									it := c.s.iterator(c.cid, reuseIter)
-												Fix various races

											
										
										
											7 years ago
+									c.s.Unlock()
 									return it
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+								}
-												Unexport HeadBlock, export Block interface

											
										
										
											8 years ago
+								type headIndexReader struct {
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									head       *Head
 									mint, maxt int64
 								}
 								func (h *headIndexReader) Close() error {
 									return nil
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+								}
 								func (h *headIndexReader) Symbols() (map[string]struct{}, error) {
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									h.head.symMtx.RLock()
 									defer h.head.symMtx.RUnlock()
 									res := make(map[string]struct{}, len(h.head.symbols))
 									for s := range h.head.symbols {
 										res[s] = struct{}{}
 									}
 									return res, nil
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+								}
 								// LabelValues returns the possible label values
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+								func (h *headIndexReader) LabelValues(names ...string) (index.StringTuples, error) {
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+									if len(names) != 1 {
-												Merge encoding_helpers.go to tsdbutil (#526)

remove duplicate encoding helper funcs and move to own package so they can be reused.

Signed-off-by: naivewong <867245430@qq.com>

											
										
										
											6 years ago
+										return nil, encoding.ErrInvalidSize
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+									}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									h.head.symMtx.RLock()
-												Reduce allocations for queries on `HEAD` (#417)

Some benchmarks for HEAD and allocate the correct slice size in LabelValues , we already know what it'll be

This is ~15% time improvement, and ~25% allocation improvement:


```
benchmark                             old ns/op     new ns/op     delta
BenchmarkHeadPostingForMatchers-4     74452         63514         -14.69%

benchmark                             old allocs     new allocs     delta
BenchmarkHeadPostingForMatchers-4     20             13             -35.00%

benchmark                             old bytes     new bytes     delta
BenchmarkHeadPostingForMatchers-4     5425          3137          -42.18%
```

Signed-off-by: Thomas Jackson <jacksontj.89@gmail.com>

											
										
										
											6 years ago
+									sl := make([]string, 0, len(h.head.values[names[0]]))
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									for s := range h.head.values[names[0]] {
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+										sl = append(sl, s)
 									}
-												Reduce allocations for queries on `HEAD` (#417)

Some benchmarks for HEAD and allocate the correct slice size in LabelValues , we already know what it'll be

This is ~15% time improvement, and ~25% allocation improvement:


```
benchmark                             old ns/op     new ns/op     delta
BenchmarkHeadPostingForMatchers-4     74452         63514         -14.69%

benchmark                             old allocs     new allocs     delta
BenchmarkHeadPostingForMatchers-4     20             13             -35.00%

benchmark                             old bytes     new bytes     delta
BenchmarkHeadPostingForMatchers-4     5425          3137          -42.18%
```

Signed-off-by: Thomas Jackson <jacksontj.89@gmail.com>

											
										
										
											6 years ago
+									h.head.symMtx.RUnlock()
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+									sort.Strings(sl)
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+									return index.NewStringTuples(sl, len(names))
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+								}
-												LabelNames() method to get all unique label names (#369)

* LabelNames() method to get all unique label names

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
											
										
										
											6 years ago
+								// LabelNames returns all the unique label names present in the head.
 								func (h *headIndexReader) LabelNames() ([]string, error) {
 									h.head.symMtx.RLock()
 									defer h.head.symMtx.RUnlock()
 									labelNames := make([]string, 0, len(h.head.values))
 									for name := range h.head.values {
 										if name == "" {
 											continue
 										}
 										labelNames = append(labelNames, name)
 									}
 									sort.Strings(labelNames)
 									return labelNames, nil
 								}
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+								// Postings returns the postings list iterator for the label pair.
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+								func (h *headIndexReader) Postings(name, value string) (index.Postings, error) {
 									return h.head.postings.Get(name, value), nil
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+								}
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+								func (h *headIndexReader) SortedPostings(p index.Postings) index.Postings {
-												Move series fetches out of inner loop of SortedPostings. (#485)

With 1M series:

Before:
BenchmarkHeadPostingForMatchers-8              1        3501996117 ns/op 61311520 B/op         78 allocs/op

After:
BenchmarkHeadPostingForMatchers-8              1        1403072952 ns/op 69261568 B/op         72 allocs/op

This works out as 3X faster, as the above time includes other things.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
+									series := make([]*memSeries, 0, 128)
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
-												Move series fetches out of inner loop of SortedPostings. (#485)

With 1M series:

Before:
BenchmarkHeadPostingForMatchers-8              1        3501996117 ns/op 61311520 B/op         78 allocs/op

After:
BenchmarkHeadPostingForMatchers-8              1        1403072952 ns/op 69261568 B/op         72 allocs/op

This works out as 3X faster, as the above time includes other things.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
+									// Fetch all the series only once.
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+									for p.Next() {
-												Move series fetches out of inner loop of SortedPostings. (#485)

With 1M series:

Before:
BenchmarkHeadPostingForMatchers-8              1        3501996117 ns/op 61311520 B/op         78 allocs/op

After:
BenchmarkHeadPostingForMatchers-8              1        1403072952 ns/op 69261568 B/op         72 allocs/op

This works out as 3X faster, as the above time includes other things.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
+										s := h.head.series.getByID(p.At())
 										if s == nil {
 											level.Debug(h.head.logger).Log("msg", "looked up series not found")
 										} else {
 											series = append(series, s)
 										}
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+									}
 									if err := p.Err(); err != nil {
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+										return index.ErrPostings(errors.Wrap(err, "expand postings"))
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+									}
-												Move series fetches out of inner loop of SortedPostings. (#485)

With 1M series:

Before:
BenchmarkHeadPostingForMatchers-8              1        3501996117 ns/op 61311520 B/op         78 allocs/op

After:
BenchmarkHeadPostingForMatchers-8              1        1403072952 ns/op 69261568 B/op         72 allocs/op

This works out as 3X faster, as the above time includes other things.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
+									sort.Slice(series, func(i, j int) bool {
 										return labels.Compare(series[i].lset, series[j].lset) < 0
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+									})
-												Move series fetches out of inner loop of SortedPostings. (#485)

With 1M series:

Before:
BenchmarkHeadPostingForMatchers-8              1        3501996117 ns/op 61311520 B/op         78 allocs/op

After:
BenchmarkHeadPostingForMatchers-8              1        1403072952 ns/op 69261568 B/op         72 allocs/op

This works out as 3X faster, as the above time includes other things.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
 									// Convert back to list.
 									ep := make([]uint64, 0, len(series))
 									for _, p := range series {
 										ep = append(ep, p.ref)
 									}
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+									return index.NewListPostings(ep)
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+								}
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+								// Series returns the series for the given reference.
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+								func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]chunks.Meta) error {
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									s := h.head.series.getByID(ref)
-												Use separate lock for series creation

This uses the head block's own lock to only lock if new series were
encountered.
In the general append case we just need to hold a

											
										
										
											8 years ago
-												Switch append refs to string

											
										
										
											8 years ago
+									if s == nil {
-												head: track number of series not found errors in metric

											
										
										
											7 years ago
+										h.head.metrics.seriesNotFound.Inc()
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+										return ErrNotFound
-												Switch append refs to string

											
										
										
											8 years ago
+									}
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+									*lbls = append((*lbls)[:0], s.lset...)
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
-												Fix various races

											
										
										
											7 years ago
+									s.Lock()
 									defer s.Unlock()
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+									*chks = (*chks)[:0]
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+									for i, c := range s.chunks {
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+										// Do not expose chunks that are outside of the specified range.
-												Make interval overlap comparisons more explicit

Blocks are half-open intervals [a, b), while all other intervals
(chunks, head, ...) are closed intervals [a, b].

Make that distinction explicit by defining `OverlapsClosedInterval()`
methods for blocks and chunks, and using them in place of the more
generic `intervalOverlap()` function.

This change also fixes `db.Querier()` and `db.Delete()`, which could
previously return one extraneous block at the end of the specified
interval.

Signed-off-by: Benoît Knecht <benoit.knecht@fsfe.org>

											
										
										
											6 years ago
+										if !c.OverlapsClosedInterval(h.mint, h.maxt) {
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+											continue
 										}
-												Re-encode chunks that are still being appended to when snapshoti… (#641)

* re encode all head chunks outside the time range.

Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com>
											
										
										
											5 years ago
+										// Set the head chunks as open (being appended to).
 										maxTime := c.maxTime
 										if s.headChunk == c {
 											maxTime = math.MaxInt64
 										}
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+										*chks = append(*chks, chunks.Meta{
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+											MinTime: c.minTime,
-												Re-encode chunks that are still being appended to when snapshoti… (#641)

* re encode all head chunks outside the time range.

Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com>
											
										
										
											5 years ago
+											MaxTime: maxTime,
-												Change series ID from uint32 to uint64

											
										
										
											7 years ago
+											Ref:     packChunkID(s.ref, uint64(s.chunkID(i))),
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+										})
-												Consolidate persistence and compaction

											
										
										
											8 years ago
+									}
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+									return nil
-												Modify IndexReader API to accomodate compaction

This changes the IndexReader API to expose plain labels
and chunk meta information instead of a Series interface.
Dropping of irrelevant chunks is moved into the querier.

A LabelIndices method is added to query for existing label
value indices.

											
										
										
											8 years ago
+								}
-												Unexport HeadBlock, export Block interface

											
										
										
											8 years ago
+								func (h *headIndexReader) LabelIndices() ([][]string, error) {
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									h.head.symMtx.RLock()
 									defer h.head.symMtx.RUnlock()
-												Modify IndexReader API to accomodate compaction

This changes the IndexReader API to expose plain labels
and chunk meta information instead of a Series interface.
Dropping of irrelevant chunks is moved into the querier.

A LabelIndices method is added to query for existing label
value indices.

											
										
										
											8 years ago
+									res := [][]string{}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									for s := range h.head.values {
-												Modify IndexReader API to accomodate compaction

This changes the IndexReader API to expose plain labels
and chunk meta information instead of a Series interface.
Dropping of irrelevant chunks is moved into the querier.

A LabelIndices method is added to query for existing label
value indices.

											
										
										
											8 years ago
+										res = append(res, []string{s})
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+									}
-												Modify IndexReader API to accomodate compaction

This changes the IndexReader API to expose plain labels
and chunk meta information instead of a Series interface.
Dropping of irrelevant chunks is moved into the querier.

A LabelIndices method is added to query for existing label
value indices.

											
										
										
											8 years ago
+									return res, nil
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+								}
-												Simplify series create logic in head

											
										
										
											7 years ago
+								func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*memSeries, bool) {
 									// Just using `getOrSet` below would be semantically sufficient, but we'd create
 									// a new series on every sample inserted via Add(), which causes allocations
 									// and makes our series IDs rather random and harder to compress in postings.
 									s := h.series.getByHash(hash, lset)
 									if s != nil {
 										return s, false
 									}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									// Optimistically assume that we are the first one to create the series.
-												Change series ID from uint32 to uint64

											
										
										
											7 years ago
+									id := atomic.AddUint64(&h.lastSeriesID, 1)
-												Create series with ID recorded in WAL when reading it back

											
										
										
											7 years ago
 									return h.getOrCreateWithID(id, hash, lset)
 								}
 								func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSeries, bool) {
 									s := newMemSeries(lset, id, h.chunkRange)
-												Consolidate mem index into HeadBlock

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									s, created := h.series.getOrSet(hash, s)
 									if !created {
-												Simplify series create logic in head

											
										
										
											7 years ago
+										return s, false
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									}
-												Simplify series create logic in head

											
										
										
											7 years ago
+									h.metrics.seriesCreated.Inc()
-												Open db in Read only mode (#588)

* Added db read only open mode and use it for the tsdb cli.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											5 years ago
+									atomic.AddUint64(&h.numSeries, 1)
-												Simplify series create logic in head

											
										
										
											7 years ago
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+									h.postings.Add(id, lset)
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
 									h.symMtx.Lock()
 									defer h.symMtx.Unlock()
-												Consolidate mem index into HeadBlock

											
										
										
											8 years ago
 									for _, l := range lset {
 										valset, ok := h.values[l.Name]
 										if !ok {
 											valset = stringset{}
 											h.values[l.Name] = valset
 										}
 										valset.set(l.Value)
-												Consolidate persistence and compaction

											
										
										
											8 years ago
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+										h.symbols[l.Name] = struct{}{}
 										h.symbols[l.Value] = struct{}{}
-												Consolidate mem index into HeadBlock

											
										
										
											8 years ago
+									}
-												Consolidate persistence and compaction

											
										
										
											8 years ago
-												Simplify series create logic in head

											
										
										
											7 years ago
+									return s, true
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+								}
 								// seriesHashmap is a simple hashmap for memSeries by their label set. It is built
 								// on top of a regular hashmap and holds a slice of series to resolve hash collisions.
 								// Its methods require the hash to be submitted with it to avoid re-computations throughout
 								// the code.
 								type seriesHashmap map[uint64][]*memSeries
-												Add new interfaces and skeleton

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+								func (m seriesHashmap) get(hash uint64, lset labels.Labels) *memSeries {
 									for _, s := range m[hash] {
 										if s.lset.Equals(lset) {
 											return s
 										}
 									}
 									return nil
 								}
 								func (m seriesHashmap) set(hash uint64, s *memSeries) {
 									l := m[hash]
 									for i, prev := range l {
 										if prev.lset.Equals(s.lset) {
 											l[i] = s
 											return
 										}
 									}
 									m[hash] = append(l, s)
 								}
 								func (m seriesHashmap) del(hash uint64, lset labels.Labels) {
 									var rem []*memSeries
 									for _, s := range m[hash] {
 										if !s.lset.Equals(lset) {
 											rem = append(rem, s)
 										}
 									}
 									if len(rem) == 0 {
 										delete(m, hash)
 									} else {
 										m[hash] = rem
 									}
 								}
 								// stripeSeries locks modulo ranges of IDs and hashes to reduce lock contention.
-												Fix typos in comments (#254)

a the -> the
timestmap -> timestamp
badded -> padded
its -> it is
callers -> caller's
											
										
										
											7 years ago
+								// The locks are padded to not be on the same cache line. Filling the padded space
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+								// with the maps was profiled to be slower – likely due to the additional pointer
 								// dereferences.
 								type stripeSeries struct {
 									series [stripeSize]map[uint64]*memSeries
 									hashes [stripeSize]seriesHashmap
 									locks  [stripeSize]stripeLock
 								}
 								const (
 									stripeSize = 1 << 14
 									stripeMask = stripeSize - 1
 								)
 								type stripeLock struct {
 									sync.RWMutex
 									// Padding to avoid multiple locks being on the same cache line.
 									_ [40]byte
 								}
 								func newStripeSeries() *stripeSeries {
 									s := &stripeSeries{}
 									for i := range s.series {
 										s.series[i] = map[uint64]*memSeries{}
 									}
 									for i := range s.hashes {
 										s.hashes[i] = seriesHashmap{}
 									}
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+									return s
-												Add new interfaces and skeleton

											
										
										
											8 years ago
+								}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+								// gc garbage collects old chunks that are strictly before mint and removes
 								// series entirely that have no chunks left.
 								func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) {
 									var (
 										deleted  = map[uint64]struct{}{}
 										rmChunks = 0
 									)
 									// Run through all series and truncate old chunks. Mark those with no
-												Filter WAL data in Head, misc fixes

											
										
										
											7 years ago
+									// chunks left as deleted and store their ID.
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									for i := 0; i < stripeSize; i++ {
 										s.locks[i].Lock()
 										for hash, all := range s.hashes[i] {
 											for _, series := range all {
-												Fix various races

											
										
										
											7 years ago
+												series.Lock()
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+												rmChunks += series.truncateChunksBefore(mint)
-												Fix race condition between gc and committing (#378)

Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
											
										
										
											6 years ago
+												if len(series.chunks) > 0 || series.pendingCommit {
-												Fix various races

											
										
										
											7 years ago
+													series.Unlock()
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+													continue
 												}
 												// The series is gone entirely. We need to keep the series lock
 												// and make sure we have acquired the stripe locks for hash and ID of the
 												// series alike.
 												// If we don't hold them all, there's a very small chance that a series receives
 												// samples again while we are half-way into deleting it.
 												j := int(series.ref & stripeMask)
 												if i != j {
 													s.locks[j].Lock()
 												}
 												deleted[series.ref] = struct{}{}
 												s.hashes[i].del(hash, series.lset)
 												delete(s.series[j], series.ref)
 												if i != j {
 													s.locks[j].Unlock()
 												}
-												Fix various races

											
										
										
											7 years ago
+												series.Unlock()
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+											}
 										}
 										s.locks[i].Unlock()
 									}
 									return deleted, rmChunks
 								}
 								func (s *stripeSeries) getByID(id uint64) *memSeries {
 									i := id & stripeMask
 									s.locks[i].RLock()
 									series := s.series[i][id]
 									s.locks[i].RUnlock()
 									return series
 								}
 								func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *memSeries {
 									i := hash & stripeMask
 									s.locks[i].RLock()
 									series := s.hashes[i].get(hash, lset)
 									s.locks[i].RUnlock()
 									return series
 								}
 								func (s *stripeSeries) getOrSet(hash uint64, series *memSeries) (*memSeries, bool) {
 									i := hash & stripeMask
 									s.locks[i].Lock()
 									if prev := s.hashes[i].get(hash, series.lset); prev != nil {
-												Add missing unlock on early return

											
										
										
											7 years ago
+										s.locks[i].Unlock()
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										return prev, false
 									}
 									s.hashes[i].set(hash, series)
 									s.locks[i].Unlock()
 									i = series.ref & stripeMask
 									s.locks[i].Lock()
 									s.series[i][series.ref] = series
 									s.locks[i].Unlock()
 									return series, true
 								}
-												Move BufferedSeriesIterator in own package

This functionality is useful for a lot of clients but not relevant to
the TSDB's core features.

											
										
										
											8 years ago
+								type sample struct {
 									t int64
 									v float64
 								}
-												refactor util funcs to allow re-usage. (#419)

* refactor util funcs to allow reusage.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+								func (s sample) T() int64 {
 									return s.t
 								}
 								func (s sample) V() float64 {
 									return s.v
 								}
-												Fix various races

											
										
										
											7 years ago
+								// memSeries is the in-memory representation of a series. None of its methods
-												Fix typos in comments (#254)

a the -> the
timestmap -> timestamp
badded -> padded
its -> it is
callers -> caller's
											
										
										
											7 years ago
+								// are goroutine safe and it is the caller's responsibility to lock it.
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+								type memSeries struct {
-												Fix various races

											
										
										
											7 years ago
+									sync.Mutex
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
-												Change series ID from uint32 to uint64

											
										
										
											7 years ago
+									ref          uint64
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									lset         labels.Labels
 									chunks       []*memChunk
-												Precalculate memSeries.head

This is read far more than it changes.
This cuts ~14% off walltme and ~27% off CPU for WAL reading.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+									headChunk    *memChunk
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									chunkRange   int64
 									firstChunkID int
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
-												Fix race condition between gc and committing (#378)

Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
											
										
										
											6 years ago
+									nextAt        int64 // Timestamp at which to cut the next chunk.
 									sampleBuf     [4]sample
 									pendingCommit bool // Whether there are samples waiting to be committed to this series.
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+									app chunkenc.Appender // Current appender for the chunk.
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+								}
-												Dont store stones in head, delete samples directly

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+								func newMemSeries(lset labels.Labels, id uint64, chunkRange int64) *memSeries {
 									s := &memSeries{
 										lset:       lset,
 										ref:        id,
 										chunkRange: chunkRange,
 										nextAt:     math.MinInt64,
 									}
 									return s
 								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								func (s *memSeries) minTime() int64 {
-												Fix crash when a series has no block

											
										
										
											7 years ago
+									if len(s.chunks) == 0 {
 										return math.MinInt64
 									}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									return s.chunks[0].minTime
 								}
 								func (s *memSeries) maxTime() int64 {
-												Fix crash when a series has no block

											
										
										
											7 years ago
+									c := s.head()
 									if c == nil {
 										return math.MinInt64
 									}
 									return c.maxTime
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								}
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+								func (s *memSeries) cut(mint int64) *memChunk {
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+									c := &memChunk{
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+										chunk:   chunkenc.NewXORChunk(),
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+										minTime: mint,
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+										maxTime: math.MinInt64,
 									}
 									s.chunks = append(s.chunks, c)
-												Precalculate memSeries.head

This is read far more than it changes.
This cuts ~14% off walltme and ~27% off CPU for WAL reading.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+									s.headChunk = c
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
-												Ensure near-empty chunks end at correct boundary

We were determining a chunk's end time once it was one quarter full to
compute it so all chunks have uniform number of samples.
This accidentally skipped the case where series started near the end of
a chunk range/block and never reached that threshold. As a result they
got persisted but were continued across the range.

This resulted in corrupted persisted data.

											
										
										
											7 years ago
+									// Set upper bound on when the next chunk must be started. An earlier timestamp
 									// may be chosen dynamically at a later point.
-												no overlapping on compaction when an existing block is not within default boundaries. (#461)

closes https://github.com/prometheus/prometheus/issues/4643

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+									s.nextAt = rangeForTimestamp(mint, s.chunkRange)
-												Ensure near-empty chunks end at correct boundary

We were determining a chunk's end time once it was one quarter full to
compute it so all chunks have uniform number of samples.
This accidentally skipped the case where series started near the end of
a chunk range/block and never reached that threshold. As a result they
got persisted but were continued across the range.

This resulted in corrupted persisted data.

											
										
										
											7 years ago
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+									app, err := c.chunk.Appender()
 									if err != nil {
 										panic(err)
 									}
 									s.app = app
 									return c
 								}
-												Dont store stones in head, delete samples directly

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+								func (s *memSeries) chunksMetas() []chunks.Meta {
 									metas := make([]chunks.Meta, 0, len(s.chunks))
 									for _, chk := range s.chunks {
 										metas = append(metas, chunks.Meta{Chunk: chk.chunk, MinTime: chk.minTime, MaxTime: chk.maxTime})
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+									}
-												Dont store stones in head, delete samples directly

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											6 years ago
+									return metas
 								}
 								// reset re-initialises all the variable in the memSeries except 'lset', 'ref',
 								// and 'chunkRange', like how it would appear after 'newMemSeries(...)'.
 								func (s *memSeries) reset() {
 									s.chunks = nil
 									s.headChunk = nil
 									s.firstChunkID = 0
 									s.nextAt = math.MinInt64
 									s.sampleBuf = [4]sample{}
 									s.pendingCommit = false
 									s.app = nil
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								// appendable checks whether the given sample is valid for appending to the series.
 								func (s *memSeries) appendable(t int64, v float64) error {
-												Add tests for GC and chunk truncation

											
										
										
											7 years ago
+									c := s.head()
 									if c == nil {
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+										return nil
 									}
 									if t > c.maxTime {
 										return nil
 									}
 									if t < c.maxTime {
 										return ErrOutOfOrderSample
 									}
 									// We are allowing exact duplicates as we can encounter them in valid cases
 									// like federation and erroring out at that time would be extremely noisy.
-												Use sampleBuf instead of maintaining lastValue. (#444)

This cuts the size of memSize by 8B.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
											
										
										
											6 years ago
+									if math.Float64bits(s.sampleBuf[3].v) != math.Float64bits(v) {
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+										return ErrAmendSample
 									}
 									return nil
 								}
 								func (s *memSeries) chunk(id int) *memChunk {
-												Add tests for GC and chunk truncation

											
										
										
											7 years ago
+									ix := id - s.firstChunkID
 									if ix < 0 || ix >= len(s.chunks) {
 										return nil
 									}
 									return s.chunks[ix]
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								}
 								func (s *memSeries) chunkID(pos int) int {
 									return pos + s.firstChunkID
 								}
 								// truncateChunksBefore removes all chunks from the series that have not timestamp
 								// at or after mint. Chunk IDs remain unchanged.
-												Add various metrics

											
										
										
											7 years ago
+								func (s *memSeries) truncateChunksBefore(mint int64) (removed int) {
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									var k int
 									for i, c := range s.chunks {
 										if c.maxTime >= mint {
 											break
 										}
 										k = i + 1
 									}
 									s.chunks = append(s.chunks[:0], s.chunks[k:]...)
 									s.firstChunkID += k
-												Precalculate memSeries.head

This is read far more than it changes.
This cuts ~14% off walltme and ~27% off CPU for WAL reading.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+									if len(s.chunks) == 0 {
 										s.headChunk = nil
 									} else {
 										s.headChunk = s.chunks[len(s.chunks)-1]
 									}
-												Add various metrics

											
										
										
											7 years ago
 									return k
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								}
 								// append adds the sample (t, v) to the series.
-												Add various metrics

											
										
										
											7 years ago
+								func (s *memSeries) append(t int64, v float64) (success, chunkCreated bool) {
-												comments about the 120samples const and link to Gorilla papers. (#423)

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
											
										
										
											6 years ago
+									// Based on Gorilla white papers this offers near-optimal compression ratio
 									// so anything bigger that this has diminishing returns and increases
 									// the time range within which we have to decompress all samples.
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+									const samplesPerChunk = 120
-												Add tests for GC and chunk truncation

											
										
										
											7 years ago
+									c := s.head()
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
-												Add tests for GC and chunk truncation

											
										
										
											7 years ago
+									if c == nil {
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+										c = s.cut(t)
-												Add various metrics

											
										
										
											7 years ago
+										chunkCreated = true
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+									}
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									numSamples := c.chunk.NumSamples()
-												Ensure near-empty chunks end at correct boundary

We were determining a chunk's end time once it was one quarter full to
compute it so all chunks have uniform number of samples.
This accidentally skipped the case where series started near the end of
a chunk range/block and never reached that threshold. As a result they
got persisted but were continued across the range.

This resulted in corrupted persisted data.

											
										
										
											7 years ago
+									// Out of order sample.
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+									if c.maxTime >= t {
-												Add various metrics

											
										
										
											7 years ago
+										return false, chunkCreated
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+									}
-												Ensure near-empty chunks end at correct boundary

We were determining a chunk's end time once it was one quarter full to
compute it so all chunks have uniform number of samples.
This accidentally skipped the case where series started near the end of
a chunk range/block and never reached that threshold. As a result they
got persisted but were continued across the range.

This resulted in corrupted persisted data.

											
										
										
											7 years ago
+									// If we reach 25% of a chunk's desired sample count, set a definitive time
 									// at which to start the next chunk.
 									// At latest it must happen at the timestamp set when the chunk was cut.
 									if numSamples == samplesPerChunk/4 {
 										s.nextAt = computeChunkEndTime(c.minTime, c.maxTime, s.nextAt)
 									}
 									if t >= s.nextAt {
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+										c = s.cut(t)
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										chunkCreated = true
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+									}
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+									s.app.Append(t, v)
 									c.maxTime = t
 									s.sampleBuf[0] = s.sampleBuf[1]
 									s.sampleBuf[1] = s.sampleBuf[2]
 									s.sampleBuf[2] = s.sampleBuf[3]
 									s.sampleBuf[3] = sample{t: t, v: v}
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
-												Add various metrics

											
										
										
											7 years ago
+									return true, chunkCreated
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+								}
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+								// computeChunkEndTime estimates the end timestamp based the beginning of a chunk,
 								// its current timestamp and the upper bound up to which we insert data.
 								// It assumes that the time range is 1/4 full.
 								func computeChunkEndTime(start, cur, max int64) int64 {
 									a := (max - start) / ((cur - start + 1) * 4)
 									if a == 0 {
 										return max
 									}
 									return start + (max-start)/a
 								}
-												Reuse Chunk Iterator (#642)

* Reset method for chunkenc.Iterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Reset method only for XORIterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Use Reset(...) in querier.go

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Reuse deletedIterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Another way of reusing chunk iterators

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Unexport xorIterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Fix memSeries.iterator(...)

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Add some comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											5 years ago
+								func (s *memSeries) iterator(id int, it chunkenc.Iterator) chunkenc.Iterator {
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									c := s.chunk(id)
-												Return nop iterator for invalid chunk references

											
										
										
											7 years ago
+									// TODO(fabxc): Work around! A querier may have retrieved a pointer to a series' chunk,
 									// which got then garbage collected before it got accessed.
 									// We must ensure to not garbage collect as long as any readers still hold a reference.
 									if c == nil {
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+										return chunkenc.NewNopIterator()
-												Return nop iterator for invalid chunk references

											
										
										
											7 years ago
+									}
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									if id-s.firstChunkID < len(s.chunks)-1 {
-												Reuse Chunk Iterator (#642)

* Reset method for chunkenc.Iterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Reset method only for XORIterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Use Reset(...) in querier.go

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Reuse deletedIterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Another way of reusing chunk iterators

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Unexport xorIterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Fix memSeries.iterator(...)

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Add some comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											5 years ago
+										return c.chunk.Iterator(it)
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+									}
-												Misc fixes (#285)

* Fix typo in head.go

pralellize -> paralellize

* Remove commented out code

It's dead code, remove it.

* Correct reference to sample buffer

											
										
										
											7 years ago
+									// Serve the last 4 samples for the last chunk from the sample buffer
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									// as their compressed bytes may be mutated by added samples.
-												Reuse Chunk Iterator (#642)

* Reset method for chunkenc.Iterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Reset method only for XORIterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Use Reset(...) in querier.go

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Reuse deletedIterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Another way of reusing chunk iterators

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Unexport xorIterator

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Fix memSeries.iterator(...)

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Add some comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

											
										
										
											5 years ago
+									if msIter, ok := it.(*memSafeIterator); ok {
 										msIter.Iterator = c.chunk.Iterator(msIter.Iterator)
 										msIter.i = -1
 										msIter.total = c.chunk.NumSamples()
 										msIter.buf = s.sampleBuf
 										return msIter
 									}
 									return &memSafeIterator{
 										Iterator: c.chunk.Iterator(it),
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+										i:        -1,
-												Add tests for GC and chunk truncation

											
										
										
											7 years ago
+										total:    c.chunk.NumSamples(),
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+										buf:      s.sampleBuf,
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+									}
 								}
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+								func (s *memSeries) head() *memChunk {
-												Precalculate memSeries.head

This is read far more than it changes.
This cuts ~14% off walltme and ~27% off CPU for WAL reading.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>

											
										
										
											6 years ago
+									return s.headChunk
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+								}
 								type memChunk struct {
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+									chunk            chunkenc.Chunk
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+									minTime, maxTime int64
 								}
-												Make interval overlap comparisons more explicit

Blocks are half-open intervals [a, b), while all other intervals
(chunks, head, ...) are closed intervals [a, b].

Make that distinction explicit by defining `OverlapsClosedInterval()`
methods for blocks and chunks, and using them in place of the more
generic `intervalOverlap()` function.

This change also fixes `db.Querier()` and `db.Delete()`, which could
previously return one extraneous block at the end of the specified
interval.

Signed-off-by: Benoît Knecht <benoit.knecht@fsfe.org>

											
										
										
											6 years ago
+								// Returns true if the chunk overlaps [mint, maxt].
 								func (mc *memChunk) OverlapsClosedInterval(mint, maxt int64) bool {
 									return mc.minTime <= maxt && mint <= mc.maxTime
 								}
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+								type memSafeIterator struct {
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
+									chunkenc.Iterator
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
 									i     int
 									total int
 									buf   [4]sample
 								}
 								func (it *memSafeIterator) Next() bool {
 									if it.i+1 >= it.total {
 										return false
 									}
 									it.i++
 									if it.total-it.i > 4 {
 										return it.Iterator.Next()
 									}
 									return true
 								}
 								func (it *memSafeIterator) At() (int64, float64) {
 									if it.total-it.i > 4 {
 										return it.Iterator.At()
 									}
 									s := it.buf[4-(it.total-it.i)]
 									return s.t, s.v
 								}
-												Move index and chunk encoders to own packages

											
										
										
											7 years ago
 								type stringset map[string]struct{}
 								func (ss stringset) set(s string) {
 									ss[s] = struct{}{}
 								}
 								func (ss stringset) String() string {
 									return strings.Join(ss.slice(), ",")
 								}
 								func (ss stringset) slice() []string {
 									slice := make([]string, 0, len(ss))
 									for k := range ss {
 										slice = append(slice, k)
 									}
 									sort.Strings(slice)
 									return slice
 								}