prometheus/head.go

// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package tsdb

import (
	"math"
	"runtime"
	"sort"
	"sync"
	"sync/atomic"
	"time"

	"github.com/go-kit/kit/log"
	"github.com/go-kit/kit/log/level"
	"github.com/pkg/errors"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/tsdb/chunks"
	"github.com/prometheus/tsdb/labels"
)

var (
	// ErrNotFound is returned if a looked up resource was not found.
	ErrNotFound = errors.Errorf("not found")

	// ErrOutOfOrderSample is returned if an appended sample has a
	// timestamp larger than the most recent sample.
	ErrOutOfOrderSample = errors.New("out of order sample")

	// ErrAmendSample is returned if an appended sample has the same timestamp
	// as the most recent sample but a different value.
	ErrAmendSample = errors.New("amending sample")

	// ErrOutOfBounds is returned if an appended sample is out of the
	// writable time range.
	ErrOutOfBounds = errors.New("out of bounds")
)

// Head handles reads and writes of time series data within a time window.
type Head struct {
	chunkRange int64
	metrics    *headMetrics
	wal        WAL
	logger     log.Logger
	appendPool sync.Pool

	minTime, maxTime int64
	lastSeriesID     uint64

	// All series addressable by their ID or hash.
	series *stripeSeries

	symMtx  sync.RWMutex
	symbols map[string]struct{}
	values  map[string]stringset // label names to possible values

	postings *memPostings // postings lists for terms

	tombstones memTombstones
}

type headMetrics struct {
	activeAppenders     prometheus.Gauge
	series              prometheus.Gauge
	seriesCreated       prometheus.Counter
	seriesRemoved       prometheus.Counter
	seriesNotFound      prometheus.Counter
	chunks              prometheus.Gauge
	chunksCreated       prometheus.Gauge
	chunksRemoved       prometheus.Gauge
	gcDuration          prometheus.Summary
	minTime             prometheus.GaugeFunc
	maxTime             prometheus.GaugeFunc
	samplesAppended     prometheus.Counter
	walTruncateDuration prometheus.Summary
}

func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
	m := &headMetrics{}

	m.activeAppenders = prometheus.NewGauge(prometheus.GaugeOpts{
		Name: "prometheus_tsdb_head_active_appenders",
		Help: "Number of currently active appender transactions",
	})
	m.series = prometheus.NewGauge(prometheus.GaugeOpts{
		Name: "prometheus_tsdb_head_series",
		Help: "Total number of series in the head block.",
	})
	m.seriesCreated = prometheus.NewGauge(prometheus.GaugeOpts{
		Name: "prometheus_tsdb_head_series_created_total",
		Help: "Total number of series created in the head",
	})
	m.seriesRemoved = prometheus.NewGauge(prometheus.GaugeOpts{
		Name: "prometheus_tsdb_head_series_removed_total",
		Help: "Total number of series removed in the head",
	})
	m.seriesNotFound = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "prometheus_tsdb_head_series_not_found",
		Help: "Total number of requests for series that were not found.",
	})
	m.chunks = prometheus.NewGauge(prometheus.GaugeOpts{
		Name: "prometheus_tsdb_head_chunks",
		Help: "Total number of chunks in the head block.",
	})
	m.chunksCreated = prometheus.NewGauge(prometheus.GaugeOpts{
		Name: "prometheus_tsdb_head_chunks_created_total",
		Help: "Total number of chunks created in the head",
	})
	m.chunksRemoved = prometheus.NewGauge(prometheus.GaugeOpts{
		Name: "prometheus_tsdb_head_chunks_removed_total",
		Help: "Total number of chunks removed in the head",
	})
	m.gcDuration = prometheus.NewSummary(prometheus.SummaryOpts{
		Name: "prometheus_tsdb_head_gc_duration_seconds",
		Help: "Runtime of garbage collection in the head block.",
	})
	m.maxTime = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
		Name: "prometheus_tsdb_head_max_time",
		Help: "Maximum timestamp of the head block.",
	}, func() float64 {
		return float64(h.MaxTime())
	})
	m.minTime = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
		Name: "prometheus_tsdb_head_min_time",
		Help: "Minimum time bound of the head block.",
	}, func() float64 {
		return float64(h.MinTime())
	})
	m.walTruncateDuration = prometheus.NewSummary(prometheus.SummaryOpts{
		Name: "prometheus_tsdb_wal_truncate_duration_seconds",
		Help: "Duration of WAL truncation.",
	})
	m.samplesAppended = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "prometheus_tsdb_head_samples_appended_total",
		Help: "Total number of appended samples.",
	})

	if r != nil {
		r.MustRegister(
			m.activeAppenders,
			m.chunks,
			m.chunksCreated,
			m.chunksRemoved,
			m.series,
			m.seriesCreated,
			m.seriesRemoved,
			m.seriesNotFound,
			m.minTime,
			m.maxTime,
			m.gcDuration,
			m.walTruncateDuration,
			m.samplesAppended,
		)
	}
	return m
}

// NewHead opens the head block in dir.
func NewHead(r prometheus.Registerer, l log.Logger, wal WAL, chunkRange int64) (*Head, error) {
	if l == nil {
		l = log.NewNopLogger()
	}
	if wal == nil {
		wal = NopWAL()
	}
	if chunkRange < 1 {
		return nil, errors.Errorf("invalid chunk range %d", chunkRange)
	}
	h := &Head{
		wal:        wal,
		logger:     l,
		chunkRange: chunkRange,
		minTime:    math.MinInt64,
		maxTime:    math.MinInt64,
		series:     newStripeSeries(),
		values:     map[string]stringset{},
		symbols:    map[string]struct{}{},
		postings:   newUnorderedMemPostings(),
		tombstones: memTombstones{},
	}
	h.metrics = newHeadMetrics(h, r)

	return h, nil
}

// processWALSamples adds a partition of samples it receives to the head and passes
// them on to other workers.
// Samples before the mint timestamp are discarded.
func (h *Head) processWALSamples(
	mint int64,
	partition, total uint64,
	input <-chan []RefSample, output chan<- []RefSample,
) (unknownRefs uint64) {
	defer close(output)

	for samples := range input {
		for _, s := range samples {
			if s.T < mint || s.Ref%total != partition {
				continue
			}
			ms := h.series.getByID(s.Ref)
			if ms == nil {
				unknownRefs++
				continue
			}
			_, chunkCreated := ms.append(s.T, s.V)
			if chunkCreated {
				h.metrics.chunksCreated.Inc()
				h.metrics.chunks.Inc()
			}
		}
		output <- samples
	}
	return unknownRefs
}

// ReadWAL initializes the head by consuming the write ahead log.
func (h *Head) ReadWAL() error {
	defer h.postings.ensureOrder()

	r := h.wal.Reader()
	mint := h.MinTime()

	// Track number of samples that referenced a series we don't know about
	// for error reporting.
	var unknownRefs uint64

	// Start workers that each process samples for a partition of the series ID space.
	// They are connected through a ring of channels which ensures that all sample batches
	// read from the WAL are processed in order.
	var (
		wg         sync.WaitGroup
		n          = runtime.GOMAXPROCS(0)
		firstInput = make(chan []RefSample, 300)
		input      = firstInput
	)
	wg.Add(n)

	for i := 0; i < n; i++ {
		output := make(chan []RefSample, 300)

		go func(i int, input <-chan []RefSample, output chan<- []RefSample) {
			unknown := h.processWALSamples(mint, uint64(i), uint64(n), input, output)
			atomic.AddUint64(&unknownRefs, unknown)
			wg.Done()
		}(i, input, output)

		// The output feeds the next worker goroutine. For the last worker,
		// it feeds the initial input again to reuse the RefSample slices.
		input = output
	}

	// TODO(fabxc): series entries spread between samples can starve the sample workers.
	// Even with bufferd channels, this can impact startup time with lots of series churn.
	// We must not pralellize series creation itself but could make the indexing asynchronous.
	seriesFunc := func(series []RefSeries) {
		for _, s := range series {
			h.getOrCreateWithID(s.Ref, s.Labels.Hash(), s.Labels)

			if h.lastSeriesID < s.Ref {
				h.lastSeriesID = s.Ref
			}
		}
	}
	samplesFunc := func(samples []RefSample) {
		// We split up the samples into chunks of 5000 samples or less.
		// With O(300 * #cores) in-flight sample batches, large scrapes could otherwise
		// cause thousands of very large in flight buffers occupying large amounts
		// of unused memory.
		for len(samples) > 0 {
			n := 5000
			if len(samples) < n {
				n = len(samples)
			}
			var buf []RefSample
			select {
			case buf = <-input:
			default:
			}
			firstInput <- append(buf[:0], samples[:n]...)
			samples = samples[n:]
		}
	}
	deletesFunc := func(stones []Stone) {
		for _, s := range stones {
			for _, itv := range s.intervals {
				if itv.Maxt < mint {
					continue
				}
				h.tombstones.add(s.ref, itv)
			}
		}
	}

	err := r.Read(seriesFunc, samplesFunc, deletesFunc)

	// Signal termination to first worker and wait for last one to close its output channel.
	close(firstInput)
	for range input {
	}
	wg.Wait()

	if err != nil {
		return errors.Wrap(err, "consume WAL")
	}
	if unknownRefs > 0 {
		level.Warn(h.logger).Log("msg", "unknown series references in WAL samples", "count", unknownRefs)
	}
	return nil
}

// Truncate removes all data before mint from the head block and truncates its WAL.
func (h *Head) Truncate(mint int64) error {
	initialize := h.MinTime() == math.MinInt64

	if h.MinTime() >= mint {
		return nil
	}
	atomic.StoreInt64(&h.minTime, mint)

	// Ensure that max time is at least as high as min time.
	for h.MaxTime() < mint {
		atomic.CompareAndSwapInt64(&h.maxTime, h.MaxTime(), mint)
	}

	// This was an initial call to Truncate after loading blocks on startup.
	// We haven't read back the WAL yet, so do not attempt to truncate it.
	if initialize {
		return nil
	}

	start := time.Now()

	h.gc()
	level.Info(h.logger).Log("msg", "head GC completed", "duration", time.Since(start))
	h.metrics.gcDuration.Observe(time.Since(start).Seconds())

	start = time.Now()

	keep := func(id uint64) bool {
		return h.series.getByID(id) != nil
	}
	if err := h.wal.Truncate(mint, keep); err == nil {
		level.Info(h.logger).Log("msg", "WAL truncation completed", "duration", time.Since(start))
	} else {
		level.Error(h.logger).Log("msg", "WAL truncation failed", "err", err, "duration", time.Since(start))
	}
	h.metrics.walTruncateDuration.Observe(time.Since(start).Seconds())

	return nil
}

// initTime initializes a head with the first timestamp. This only needs to be called
// for a compltely fresh head with an empty WAL.
// Returns true if the initialization took an effect.
func (h *Head) initTime(t int64) (initialized bool) {
	// In the init state, the head has a high timestamp of math.MinInt64.
	mint, _ := rangeForTimestamp(t, h.chunkRange)

	if !atomic.CompareAndSwapInt64(&h.minTime, math.MinInt64, mint) {
		return false
	}
	// Ensure that max time is initialized to at least the min time we just set.
	// Concurrent appenders may already have set it to a higher value.
	atomic.CompareAndSwapInt64(&h.maxTime, math.MinInt64, t)

	return true
}

type rangeHead struct {
	head       *Head
	mint, maxt int64
}

func (h *rangeHead) Index() (IndexReader, error) {
	return h.head.indexRange(h.mint, h.maxt), nil
}

func (h *rangeHead) Chunks() (ChunkReader, error) {
	return h.head.chunksRange(h.mint, h.maxt), nil
}

func (h *rangeHead) Tombstones() (TombstoneReader, error) {
	return h.head.tombstones, nil
}

// initAppender is a helper to initialize the time bounds of a the head
// upon the first sample it receives.
type initAppender struct {
	app  Appender
	head *Head
}

func (a *initAppender) Add(lset labels.Labels, t int64, v float64) (uint64, error) {
	if a.app != nil {
		return a.app.Add(lset, t, v)
	}
	a.head.initTime(t)
	a.app = a.head.appender()

	return a.app.Add(lset, t, v)
}

func (a *initAppender) AddFast(ref uint64, t int64, v float64) error {
	if a.app == nil {
		return ErrNotFound
	}
	return a.app.AddFast(ref, t, v)
}

func (a *initAppender) Commit() error {
	if a.app == nil {
		return nil
	}
	return a.app.Commit()
}

func (a *initAppender) Rollback() error {
	if a.app == nil {
		return nil
	}
	return a.app.Rollback()
}

// Appender returns a new Appender on the database.
func (h *Head) Appender() Appender {
	h.metrics.activeAppenders.Inc()

	// The head cache might not have a starting point yet. The init appender
	// picks up the first appended timestamp as the base.
	if h.MinTime() == math.MinInt64 {
		return &initAppender{head: h}
	}
	return h.appender()
}

func (h *Head) appender() *headAppender {
	return &headAppender{
		head:          h,
		mint:          h.MaxTime() - h.chunkRange/2,
		samples:       h.getAppendBuffer(),
		highTimestamp: math.MinInt64,
	}
}

func (h *Head) getAppendBuffer() []RefSample {
	b := h.appendPool.Get()
	if b == nil {
		return make([]RefSample, 0, 512)
	}
	return b.([]RefSample)
}

func (h *Head) putAppendBuffer(b []RefSample) {
	h.appendPool.Put(b[:0])
}

type headAppender struct {
	head *Head
	mint int64

	series        []RefSeries
	samples       []RefSample
	highTimestamp int64
}

func (a *headAppender) Add(lset labels.Labels, t int64, v float64) (uint64, error) {
	if t < a.mint {
		return 0, ErrOutOfBounds
	}

	s, created := a.head.getOrCreate(lset.Hash(), lset)
	if created {
		a.series = append(a.series, RefSeries{
			Ref:    s.ref,
			Labels: lset,
		})
	}
	return s.ref, a.AddFast(s.ref, t, v)
}

func (a *headAppender) AddFast(ref uint64, t int64, v float64) error {
	s := a.head.series.getByID(ref)

	if s == nil {
		return errors.Wrap(ErrNotFound, "unknown series")
	}
	s.Lock()
	err := s.appendable(t, v)
	s.Unlock()

	if err != nil {
		return err
	}
	if t < a.mint {
		return ErrOutOfBounds
	}
	if t > a.highTimestamp {
		a.highTimestamp = t
	}

	a.samples = append(a.samples, RefSample{
		Ref:    ref,
		T:      t,
		V:      v,
		series: s,
	})
	return nil
}

func (a *headAppender) Commit() error {
	defer a.Rollback()

	if err := a.head.wal.LogSeries(a.series); err != nil {
		return err
	}
	if err := a.head.wal.LogSamples(a.samples); err != nil {
		return errors.Wrap(err, "WAL log samples")
	}

	total := len(a.samples)

	for _, s := range a.samples {
		s.series.Lock()
		ok, chunkCreated := s.series.append(s.T, s.V)
		s.series.Unlock()

		if !ok {
			total--
		}
		if chunkCreated {
			a.head.metrics.chunks.Inc()
			a.head.metrics.chunksCreated.Inc()
		}
	}

	a.head.metrics.samplesAppended.Add(float64(total))

	for {
		ht := a.head.MaxTime()
		if a.highTimestamp <= ht {
			break
		}
		if atomic.CompareAndSwapInt64(&a.head.maxTime, ht, a.highTimestamp) {
			break
		}
	}

	return nil
}

func (a *headAppender) Rollback() error {
	a.head.metrics.activeAppenders.Dec()
	a.head.putAppendBuffer(a.samples)

	return nil
}

// Delete all samples in the range of [mint, maxt] for series that satisfy the given
// label matchers.
func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error {
	// Do not delete anything beyond the currently valid range.
	mint, maxt = clampInterval(mint, maxt, h.MinTime(), h.MaxTime())

	ir := h.indexRange(mint, maxt)

	p, absent, err := PostingsForMatchers(ir, ms...)
	if err != nil {
		return errors.Wrap(err, "select series")
	}

	var stones []Stone

Outer:
	for p.Next() {
		series := h.series.getByID(p.At())

		for _, abs := range absent {
			if series.lset.Get(abs) != "" {
				continue Outer
			}
		}

		// Delete only until the current values and not beyond.
		t0, t1 := clampInterval(mint, maxt, series.minTime(), series.maxTime())
		stones = append(stones, Stone{p.At(), Intervals{{t0, t1}}})
	}

	if p.Err() != nil {
		return p.Err()
	}
	if err := h.wal.LogDeletes(stones); err != nil {
		return err
	}
	for _, s := range stones {
		h.tombstones.add(s.ref, s.intervals[0])
	}
	return nil
}

// gc removes data before the minimum timestmap from the head.
func (h *Head) gc() {
	// Only data strictly lower than this timestamp must be deleted.
	mint := h.MinTime()

	// Drop old chunks and remember series IDs and hashes if they can be
	// deleted entirely.
	deleted, chunksRemoved := h.series.gc(mint)
	seriesRemoved := len(deleted)

	h.metrics.seriesRemoved.Add(float64(seriesRemoved))
	h.metrics.series.Sub(float64(seriesRemoved))
	h.metrics.chunksRemoved.Add(float64(chunksRemoved))
	h.metrics.chunks.Sub(float64(chunksRemoved))

	// Remove deleted series IDs from the postings lists. First do a collection
	// run where we rebuild all postings that have something to delete
	h.postings.mtx.RLock()

	type replEntry struct {
		idx int
		l   []uint64
	}
	collected := map[labels.Label]replEntry{}

	for t, p := range h.postings.m {
		repl := replEntry{idx: len(p)}

		for i, id := range p {
			if _, ok := deleted[id]; ok {
				// First ID that got deleted, initialize replacement with
				// all remaining IDs so far.
				if repl.l == nil {
					repl.l = make([]uint64, 0, len(p))
					repl.l = append(repl.l, p[:i]...)
				}
				continue
			}
			// Only add to the replacement once we know we have to do it.
			if repl.l != nil {
				repl.l = append(repl.l, id)
			}
		}
		if repl.l != nil {
			collected[t] = repl
		}
	}

	h.postings.mtx.RUnlock()

	// Replace all postings that have changed. Append all IDs that may have
	// been added while we switched locks.
	h.postings.mtx.Lock()

	for t, repl := range collected {
		l := append(repl.l, h.postings.m[t][repl.idx:]...)

		if len(l) > 0 {
			h.postings.m[t] = l
		} else {
			delete(h.postings.m, t)
		}
	}

	h.postings.mtx.Unlock()

	// Rebuild symbols and label value indices from what is left in the postings terms.
	h.postings.mtx.RLock()

	symbols := make(map[string]struct{})
	values := make(map[string]stringset, len(h.values))

	for t := range h.postings.m {
		symbols[t.Name] = struct{}{}
		symbols[t.Value] = struct{}{}

		ss, ok := values[t.Name]
		if !ok {
			ss = stringset{}
			values[t.Name] = ss
		}
		ss.set(t.Value)
	}

	h.postings.mtx.RUnlock()

	h.symMtx.Lock()

	h.symbols = symbols
	h.values = values

	h.symMtx.Unlock()
}

// Tombstones returns a new reader over the head's tombstones
func (h *Head) Tombstones() (TombstoneReader, error) {
	return h.tombstones, nil
}

// Index returns an IndexReader against the block.
func (h *Head) Index() (IndexReader, error) {
	return h.indexRange(math.MinInt64, math.MaxInt64), nil
}

func (h *Head) indexRange(mint, maxt int64) *headIndexReader {
	if hmin := h.MinTime(); hmin > mint {
		mint = hmin
	}
	return &headIndexReader{head: h, mint: mint, maxt: maxt}
}

// Chunks returns a ChunkReader against the block.
func (h *Head) Chunks() (ChunkReader, error) {
	return h.chunksRange(math.MinInt64, math.MaxInt64), nil
}

func (h *Head) chunksRange(mint, maxt int64) *headChunkReader {
	if hmin := h.MinTime(); hmin > mint {
		mint = hmin
	}
	return &headChunkReader{head: h, mint: mint, maxt: maxt}
}

// MinTime returns the lowest time bound on visible data in the head.
func (h *Head) MinTime() int64 {
	return atomic.LoadInt64(&h.minTime)
}

// MaxTime returns the highest timestamp seen in data of the head.
func (h *Head) MaxTime() int64 {
	return atomic.LoadInt64(&h.maxTime)
}

// Close flushes the WAL and closes the head.
func (h *Head) Close() error {
	return h.wal.Close()
}

type headChunkReader struct {
	head       *Head
	mint, maxt int64
}

func (h *headChunkReader) Close() error {
	return nil
}

// packChunkID packs a seriesID and a chunkID within it into a global 8 byte ID.
// It panicks if the seriesID exceeds 5 bytes or the chunk ID 3 bytes.
func packChunkID(seriesID, chunkID uint64) uint64 {
	if seriesID > (1<<40)-1 {
		panic("series ID exceeds 5 bytes")
	}
	if chunkID > (1<<24)-1 {
		panic("chunk ID exceeds 3 bytes")
	}
	return (seriesID << 24) | chunkID
}

func unpackChunkID(id uint64) (seriesID, chunkID uint64) {
	return id >> 24, (id << 40) >> 40
}

// Chunk returns the chunk for the reference number.
func (h *headChunkReader) Chunk(ref uint64) (chunks.Chunk, error) {
	sid, cid := unpackChunkID(ref)

	s := h.head.series.getByID(sid)

	s.Lock()
	c := s.chunk(int(cid))
	mint, maxt := c.minTime, c.maxTime
	s.Unlock()

	// Do not expose chunks that are outside of the specified range.
	if c == nil || !intervalOverlap(mint, maxt, h.mint, h.maxt) {
		return nil, ErrNotFound
	}
	return &safeChunk{
		Chunk: c.chunk,
		s:     s,
		cid:   int(cid),
	}, nil
}

type safeChunk struct {
	chunks.Chunk
	s   *memSeries
	cid int
}

func (c *safeChunk) Iterator() chunks.Iterator {
	c.s.Lock()
	it := c.s.iterator(c.cid)
	c.s.Unlock()
	return it
}

// func (c *safeChunk) Appender() (chunks.Appender, error) { panic("illegal") }
// func (c *safeChunk) Bytes() []byte                      { panic("illegal") }
// func (c *safeChunk) Encoding() chunks.Encoding          { panic("illegal") }

type headIndexReader struct {
	head       *Head
	mint, maxt int64
}

func (h *headIndexReader) Close() error {
	return nil
}

func (h *headIndexReader) Symbols() (map[string]struct{}, error) {
	h.head.symMtx.RLock()
	defer h.head.symMtx.RUnlock()

	res := make(map[string]struct{}, len(h.head.symbols))

	for s := range h.head.symbols {
		res[s] = struct{}{}
	}
	return res, nil
}

// LabelValues returns the possible label values
func (h *headIndexReader) LabelValues(names ...string) (StringTuples, error) {
	if len(names) != 1 {
		return nil, errInvalidSize
	}
	var sl []string

	h.head.symMtx.RLock()
	defer h.head.symMtx.RUnlock()

	for s := range h.head.values[names[0]] {
		sl = append(sl, s)
	}
	sort.Strings(sl)

	return &stringTuples{l: len(names), s: sl}, nil
}

// Postings returns the postings list iterator for the label pair.
func (h *headIndexReader) Postings(name, value string) (Postings, error) {
	return h.head.postings.get(name, value), nil
}

func (h *headIndexReader) SortedPostings(p Postings) Postings {
	ep := make([]uint64, 0, 128)

	for p.Next() {
		ep = append(ep, p.At())
	}
	if err := p.Err(); err != nil {
		return errPostings{err: errors.Wrap(err, "expand postings")}
	}

	sort.Slice(ep, func(i, j int) bool {
		a := h.head.series.getByID(ep[i])
		b := h.head.series.getByID(ep[j])

		if a == nil || b == nil {
			level.Debug(h.head.logger).Log("msg", "looked up series not found")
			return false
		}
		return labels.Compare(a.lset, b.lset) < 0
	})
	return newListPostings(ep)
}

// Series returns the series for the given reference.
func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]ChunkMeta) error {
	s := h.head.series.getByID(ref)

	if s == nil {
		h.head.metrics.seriesNotFound.Inc()
		return ErrNotFound
	}
	*lbls = append((*lbls)[:0], s.lset...)

	s.Lock()
	defer s.Unlock()

	*chks = (*chks)[:0]

	for i, c := range s.chunks {
		// Do not expose chunks that are outside of the specified range.
		if !intervalOverlap(c.minTime, c.maxTime, h.mint, h.maxt) {
			continue
		}
		*chks = append(*chks, ChunkMeta{
			MinTime: c.minTime,
			MaxTime: c.maxTime,
			Ref:     packChunkID(s.ref, uint64(s.chunkID(i))),
		})
	}

	return nil
}

func (h *headIndexReader) LabelIndices() ([][]string, error) {
	h.head.symMtx.RLock()
	defer h.head.symMtx.RUnlock()

	res := [][]string{}

	for s := range h.head.values {
		res = append(res, []string{s})
	}
	return res, nil
}

func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*memSeries, bool) {
	// Just using `getOrSet` below would be semantically sufficient, but we'd create
	// a new series on every sample inserted via Add(), which causes allocations
	// and makes our series IDs rather random and harder to compress in postings.
	s := h.series.getByHash(hash, lset)
	if s != nil {
		return s, false
	}

	// Optimistically assume that we are the first one to create the series.
	id := atomic.AddUint64(&h.lastSeriesID, 1)

	return h.getOrCreateWithID(id, hash, lset)
}

func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSeries, bool) {
	s := newMemSeries(lset, id, h.chunkRange)

	s, created := h.series.getOrSet(hash, s)
	if !created {
		return s, false
	}

	h.metrics.series.Inc()
	h.metrics.seriesCreated.Inc()

	h.postings.add(id, lset)

	h.symMtx.Lock()
	defer h.symMtx.Unlock()

	for _, l := range lset {
		valset, ok := h.values[l.Name]
		if !ok {
			valset = stringset{}
			h.values[l.Name] = valset
		}
		valset.set(l.Value)

		h.symbols[l.Name] = struct{}{}
		h.symbols[l.Value] = struct{}{}
	}

	return s, true
}

// seriesHashmap is a simple hashmap for memSeries by their label set. It is built
// on top of a regular hashmap and holds a slice of series to resolve hash collisions.
// Its methods require the hash to be submitted with it to avoid re-computations throughout
// the code.
type seriesHashmap map[uint64][]*memSeries

func (m seriesHashmap) get(hash uint64, lset labels.Labels) *memSeries {
	for _, s := range m[hash] {
		if s.lset.Equals(lset) {
			return s
		}
	}
	return nil
}

func (m seriesHashmap) set(hash uint64, s *memSeries) {
	l := m[hash]
	for i, prev := range l {
		if prev.lset.Equals(s.lset) {
			l[i] = s
			return
		}
	}
	m[hash] = append(l, s)
}

func (m seriesHashmap) del(hash uint64, lset labels.Labels) {
	var rem []*memSeries
	for _, s := range m[hash] {
		if !s.lset.Equals(lset) {
			rem = append(rem, s)
		}
	}
	if len(rem) == 0 {
		delete(m, hash)
	} else {
		m[hash] = rem
	}
}

// stripeSeries locks modulo ranges of IDs and hashes to reduce lock contention.
// The locks are padded to not be on the same cache line. Filling the badded space
// with the maps was profiled to be slower – likely due to the additional pointer
// dereferences.
type stripeSeries struct {
	series [stripeSize]map[uint64]*memSeries
	hashes [stripeSize]seriesHashmap
	locks  [stripeSize]stripeLock
}

const (
	stripeSize = 1 << 14
	stripeMask = stripeSize - 1
)

type stripeLock struct {
	sync.RWMutex
	// Padding to avoid multiple locks being on the same cache line.
	_ [40]byte
}

func newStripeSeries() *stripeSeries {
	s := &stripeSeries{}

	for i := range s.series {
		s.series[i] = map[uint64]*memSeries{}
	}
	for i := range s.hashes {
		s.hashes[i] = seriesHashmap{}
	}
	return s
}

// gc garbage collects old chunks that are strictly before mint and removes
// series entirely that have no chunks left.
func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) {
	var (
		deleted  = map[uint64]struct{}{}
		rmChunks = 0
	)
	// Run through all series and truncate old chunks. Mark those with no
	// chunks left as deleted and store their ID.
	for i := 0; i < stripeSize; i++ {
		s.locks[i].Lock()

		for hash, all := range s.hashes[i] {
			for _, series := range all {
				series.Lock()
				rmChunks += series.truncateChunksBefore(mint)

				if len(series.chunks) > 0 {
					series.Unlock()
					continue
				}

				// The series is gone entirely. We need to keep the series lock
				// and make sure we have acquired the stripe locks for hash and ID of the
				// series alike.
				// If we don't hold them all, there's a very small chance that a series receives
				// samples again while we are half-way into deleting it.
				j := int(series.ref & stripeMask)

				if i != j {
					s.locks[j].Lock()
				}

				deleted[series.ref] = struct{}{}
				s.hashes[i].del(hash, series.lset)
				delete(s.series[j], series.ref)

				if i != j {
					s.locks[j].Unlock()
				}

				series.Unlock()
			}
		}

		s.locks[i].Unlock()
	}

	return deleted, rmChunks
}

func (s *stripeSeries) getByID(id uint64) *memSeries {
	i := id & stripeMask

	s.locks[i].RLock()
	series := s.series[i][id]
	s.locks[i].RUnlock()

	return series
}

func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *memSeries {
	i := hash & stripeMask

	s.locks[i].RLock()
	series := s.hashes[i].get(hash, lset)
	s.locks[i].RUnlock()

	return series
}

func (s *stripeSeries) getOrSet(hash uint64, series *memSeries) (*memSeries, bool) {
	i := hash & stripeMask

	s.locks[i].Lock()

	if prev := s.hashes[i].get(hash, series.lset); prev != nil {
		s.locks[i].Unlock()
		return prev, false
	}
	s.hashes[i].set(hash, series)
	s.locks[i].Unlock()

	i = series.ref & stripeMask

	s.locks[i].Lock()
	s.series[i][series.ref] = series
	s.locks[i].Unlock()

	return series, true
}

type sample struct {
	t int64
	v float64
}

// memSeries is the in-memory representation of a series. None of its methods
// are goroutine safe and its the callers responsibility to lock it.
type memSeries struct {
	sync.Mutex

	ref          uint64
	lset         labels.Labels
	chunks       []*memChunk
	chunkRange   int64
	firstChunkID int

	nextAt    int64 // timestamp at which to cut the next chunk.
	lastValue float64
	sampleBuf [4]sample

	app chunks.Appender // Current appender for the chunk.
}

func (s *memSeries) minTime() int64 {
	return s.chunks[0].minTime
}

func (s *memSeries) maxTime() int64 {
	return s.head().maxTime
}

func (s *memSeries) cut(mint int64) *memChunk {
	c := &memChunk{
		chunk:   chunks.NewXORChunk(),
		minTime: mint,
		maxTime: math.MinInt64,
	}
	s.chunks = append(s.chunks, c)

	// Set upper bound on when the next chunk must be started. An earlier timestamp
	// may be chosen dynamically at a later point.
	_, s.nextAt = rangeForTimestamp(mint, s.chunkRange)

	app, err := c.chunk.Appender()
	if err != nil {
		panic(err)
	}
	s.app = app
	return c
}

func newMemSeries(lset labels.Labels, id uint64, chunkRange int64) *memSeries {
	s := &memSeries{
		lset:       lset,
		ref:        id,
		chunkRange: chunkRange,
		nextAt:     math.MinInt64,
	}
	return s
}

// appendable checks whether the given sample is valid for appending to the series.
func (s *memSeries) appendable(t int64, v float64) error {
	c := s.head()
	if c == nil {
		return nil
	}

	if t > c.maxTime {
		return nil
	}
	if t < c.maxTime {
		return ErrOutOfOrderSample
	}
	// We are allowing exact duplicates as we can encounter them in valid cases
	// like federation and erroring out at that time would be extremely noisy.
	if math.Float64bits(s.lastValue) != math.Float64bits(v) {
		return ErrAmendSample
	}
	return nil
}

func (s *memSeries) chunk(id int) *memChunk {
	ix := id - s.firstChunkID
	if ix < 0 || ix >= len(s.chunks) {
		return nil
	}
	return s.chunks[ix]
}

func (s *memSeries) chunkID(pos int) int {
	return pos + s.firstChunkID
}

// truncateChunksBefore removes all chunks from the series that have not timestamp
// at or after mint. Chunk IDs remain unchanged.
func (s *memSeries) truncateChunksBefore(mint int64) (removed int) {
	var k int
	for i, c := range s.chunks {
		if c.maxTime >= mint {
			break
		}
		k = i + 1
	}
	s.chunks = append(s.chunks[:0], s.chunks[k:]...)
	s.firstChunkID += k

	return k
}

// append adds the sample (t, v) to the series.
func (s *memSeries) append(t int64, v float64) (success, chunkCreated bool) {
	const samplesPerChunk = 120

	c := s.head()

	if c == nil {
		c = s.cut(t)
		chunkCreated = true
	}
	numSamples := c.chunk.NumSamples()

	// Out of order sample.
	if c.maxTime >= t {
		return false, chunkCreated
	}
	// If we reach 25% of a chunk's desired sample count, set a definitive time
	// at which to start the next chunk.
	// At latest it must happen at the timestamp set when the chunk was cut.
	if numSamples == samplesPerChunk/4 {
		s.nextAt = computeChunkEndTime(c.minTime, c.maxTime, s.nextAt)
	}
	if t >= s.nextAt {
		c = s.cut(t)
		chunkCreated = true
	}
	s.app.Append(t, v)

	c.maxTime = t

	s.lastValue = v

	s.sampleBuf[0] = s.sampleBuf[1]
	s.sampleBuf[1] = s.sampleBuf[2]
	s.sampleBuf[2] = s.sampleBuf[3]
	s.sampleBuf[3] = sample{t: t, v: v}

	return true, chunkCreated
}

// computeChunkEndTime estimates the end timestamp based the beginning of a chunk,
// its current timestamp and the upper bound up to which we insert data.
// It assumes that the time range is 1/4 full.
func computeChunkEndTime(start, cur, max int64) int64 {
	a := (max - start) / ((cur - start + 1) * 4)
	if a == 0 {
		return max
	}
	return start + (max-start)/a
}

func (s *memSeries) iterator(id int) chunks.Iterator {
	c := s.chunk(id)
	// TODO(fabxc): Work around! A querier may have retrieved a pointer to a series' chunk,
	// which got then garbage collected before it got accessed.
	// We must ensure to not garbage collect as long as any readers still hold a reference.
	if c == nil {
		return chunks.NewNopIterator()
	}

	if id-s.firstChunkID < len(s.chunks)-1 {
		return c.chunk.Iterator()
	}
	// Serve the last 4 samples for the last chunk from the series buffer
	// as their compressed bytes may be mutated by added samples.
	it := &memSafeIterator{
		Iterator: c.chunk.Iterator(),
		i:        -1,
		total:    c.chunk.NumSamples(),
		buf:      s.sampleBuf,
	}
	return it
}

func (s *memSeries) head() *memChunk {
	if len(s.chunks) == 0 {
		return nil
	}
	return s.chunks[len(s.chunks)-1]
}

type memChunk struct {
	chunk            chunks.Chunk
	minTime, maxTime int64
}

type memSafeIterator struct {
	chunks.Iterator

	i     int
	total int
	buf   [4]sample
}

func (it *memSafeIterator) Next() bool {
	if it.i+1 >= it.total {
		return false
	}
	it.i++
	if it.total-it.i > 4 {
		return it.Iterator.Next()
	}
	return true
}

func (it *memSafeIterator) At() (int64, float64) {
	if it.total-it.i > 4 {
		return it.Iterator.At()
	}
	s := it.buf[4-(it.total-it.i)]
	return s.t, s.v
}
-												Add liecence file and headers

											
										
										
											8 years ago
+								// Copyright 2017 The Prometheus Authors
 								// Licensed under the Apache License, Version 2.0 (the "License");
 								// you may not use this file except in compliance with the License.
 								// You may obtain a copy of the License at
 								//
 								// http://www.apache.org/licenses/LICENSE-2.0
 								//
 								// Unless required by applicable law or agreed to in writing, software
 								// distributed under the License is distributed on an "AS IS" BASIS,
 								// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								// See the License for the specific language governing permissions and
 								// limitations under the License.
-												Add new interfaces and skeleton

											
										
										
											8 years ago
+								package tsdb
 								import (
-												Fix last timestamp initialization

This initializes the chunkDesc's last timestamp to the minimum
value so initial samples with a timestamp of 0 (e.g. in tests)
are not accidentally dropped.

											
										
										
											8 years ago
+									"math"
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									"runtime"
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+									"sort"
-												Add new interfaces and skeleton

											
										
										
											8 years ago
+									"sync"
-												Count writer references on head blocks

											
										
										
											8 years ago
+									"sync/atomic"
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									"time"
-												Switch append refs to string

											
										
										
											8 years ago
-												Periodically fsync WAL, make head cut async

											
										
										
											8 years ago
+									"github.com/go-kit/kit/log"
-												Add levels to all log lines.

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

											
										
										
											7 years ago
+									"github.com/go-kit/kit/log/level"
-												Move stats into meta.json file, cleanup, docs

											
										
										
											8 years ago
+									"github.com/pkg/errors"
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									"github.com/prometheus/client_golang/prometheus"
-												Adjust import names to new repository organisation

											
										
										
											8 years ago
+									"github.com/prometheus/tsdb/chunks"
 									"github.com/prometheus/tsdb/labels"
-												Add new interfaces and skeleton

											
										
										
											8 years ago
+								)
-												Write to WAL before appending to memory storage

											
										
										
											8 years ago
+								var (
 									// ErrNotFound is returned if a looked up resource was not found.
-												Fix races and add comments on remaining ones

											
										
										
											8 years ago
+									ErrNotFound = errors.Errorf("not found")
-												Write to WAL before appending to memory storage

											
										
										
											8 years ago
 									// ErrOutOfOrderSample is returned if an appended sample has a
 									// timestamp larger than the most recent sample.
 									ErrOutOfOrderSample = errors.New("out of order sample")
 									// ErrAmendSample is returned if an appended sample has the same timestamp
 									// as the most recent sample but a different value.
 									ErrAmendSample = errors.New("amending sample")
 									// ErrOutOfBounds is returned if an appended sample is out of the
 									// writable time range.
 									ErrOutOfBounds = errors.New("out of bounds")
 								)
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								// Head handles reads and writes of time series data within a time window.
 								type Head struct {
 									chunkRange int64
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									metrics    *headMetrics
 									wal        WAL
 									logger     log.Logger
 									appendPool sync.Pool
-												Move stats into meta.json file, cleanup, docs

											
										
										
											8 years ago
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									minTime, maxTime int64
-												Change series ID from uint32 to uint64

											
										
										
											7 years ago
+									lastSeriesID     uint64
-												Count writer references on head blocks

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									// All series addressable by their ID or hash.
 									series *stripeSeries
-												Consolidate mem index into HeadBlock

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									symMtx  sync.RWMutex
 									symbols map[string]struct{}
 									values  map[string]stringset // label names to possible values
 									postings *memPostings // postings lists for terms
-												Bucket samples before appending.

This pre-sorts samples into buckets before appending them to reduce
locking of shards.

											
										
										
											8 years ago
-												Refactor tombstone reader types

											
										
										
											7 years ago
+									tombstones memTombstones
-												Add new interfaces and skeleton

											
										
										
											8 years ago
+								}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								type headMetrics struct {
 									activeAppenders     prometheus.Gauge
 									series              prometheus.Gauge
 									seriesCreated       prometheus.Counter
 									seriesRemoved       prometheus.Counter
-												head: track number of series not found errors in metric

											
										
										
											7 years ago
+									seriesNotFound      prometheus.Counter
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									chunks              prometheus.Gauge
 									chunksCreated       prometheus.Gauge
 									chunksRemoved       prometheus.Gauge
 									gcDuration          prometheus.Summary
 									minTime             prometheus.GaugeFunc
 									maxTime             prometheus.GaugeFunc
 									samplesAppended     prometheus.Counter
 									walTruncateDuration prometheus.Summary
 								}
 								func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
 									m := &headMetrics{}
 									m.activeAppenders = prometheus.NewGauge(prometheus.GaugeOpts{
-												Prefix all metrics with `prometheus_*`

											
										
										
											7 years ago
+										Name: "prometheus_tsdb_head_active_appenders",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										Help: "Number of currently active appender transactions",
 									})
 									m.series = prometheus.NewGauge(prometheus.GaugeOpts{
-												Prefix all metrics with `prometheus_*`

											
										
										
											7 years ago
+										Name: "prometheus_tsdb_head_series",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										Help: "Total number of series in the head block.",
 									})
 									m.seriesCreated = prometheus.NewGauge(prometheus.GaugeOpts{
-												Prefix all metrics with `prometheus_*`

											
										
										
											7 years ago
+										Name: "prometheus_tsdb_head_series_created_total",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										Help: "Total number of series created in the head",
 									})
 									m.seriesRemoved = prometheus.NewGauge(prometheus.GaugeOpts{
-												Prefix all metrics with `prometheus_*`

											
										
										
											7 years ago
+										Name: "prometheus_tsdb_head_series_removed_total",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										Help: "Total number of series removed in the head",
 									})
-												head: track number of series not found errors in metric

											
										
										
											7 years ago
+									m.seriesNotFound = prometheus.NewCounter(prometheus.CounterOpts{
-												Prefix all metrics with `prometheus_*`

											
										
										
											7 years ago
+										Name: "prometheus_tsdb_head_series_not_found",
-												head: track number of series not found errors in metric

											
										
										
											7 years ago
+										Help: "Total number of requests for series that were not found.",
 									})
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									m.chunks = prometheus.NewGauge(prometheus.GaugeOpts{
-												Prefix all metrics with `prometheus_*`

											
										
										
											7 years ago
+										Name: "prometheus_tsdb_head_chunks",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										Help: "Total number of chunks in the head block.",
 									})
 									m.chunksCreated = prometheus.NewGauge(prometheus.GaugeOpts{
-												Prefix all metrics with `prometheus_*`

											
										
										
											7 years ago
+										Name: "prometheus_tsdb_head_chunks_created_total",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										Help: "Total number of chunks created in the head",
 									})
 									m.chunksRemoved = prometheus.NewGauge(prometheus.GaugeOpts{
-												Prefix all metrics with `prometheus_*`

											
										
										
											7 years ago
+										Name: "prometheus_tsdb_head_chunks_removed_total",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										Help: "Total number of chunks removed in the head",
 									})
 									m.gcDuration = prometheus.NewSummary(prometheus.SummaryOpts{
-												Prefix all metrics with `prometheus_*`

											
										
										
											7 years ago
+										Name: "prometheus_tsdb_head_gc_duration_seconds",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										Help: "Runtime of garbage collection in the head block.",
 									})
-												Fix innocuous typo in variable names

This change fixes the variable names holding the tsdb_head_max_time and
tsdb_head_min_time metrics. It is a cosmetic change to improve the
code readability as the metric values are taken from the correct
variables.

											
										
										
											7 years ago
+									m.maxTime = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
-												Prefix all metrics with `prometheus_*`

											
										
										
											7 years ago
+										Name: "prometheus_tsdb_head_max_time",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										Help: "Maximum timestamp of the head block.",
 									}, func() float64 {
 										return float64(h.MaxTime())
 									})
-												Fix innocuous typo in variable names

This change fixes the variable names holding the tsdb_head_max_time and
tsdb_head_min_time metrics. It is a cosmetic change to improve the
code readability as the metric values are taken from the correct
variables.

											
										
										
											7 years ago
+									m.minTime = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
-												Prefix all metrics with `prometheus_*`

											
										
										
											7 years ago
+										Name: "prometheus_tsdb_head_min_time",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										Help: "Minimum time bound of the head block.",
 									}, func() float64 {
 										return float64(h.MinTime())
 									})
 									m.walTruncateDuration = prometheus.NewSummary(prometheus.SummaryOpts{
-												Prefix all metrics with `prometheus_*`

											
										
										
											7 years ago
+										Name: "prometheus_tsdb_wal_truncate_duration_seconds",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										Help: "Duration of WAL truncation.",
 									})
 									m.samplesAppended = prometheus.NewCounter(prometheus.CounterOpts{
-												Prefix all metrics with `prometheus_*`

											
										
										
											7 years ago
+										Name: "prometheus_tsdb_head_samples_appended_total",
-												Typo in prometheus_tsdb_head_samples_appended_total description (#188)


											
										
										
											7 years ago
+										Help: "Total number of appended samples.",
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									})
 									if r != nil {
 										r.MustRegister(
 											m.activeAppenders,
 											m.chunks,
 											m.chunksCreated,
 											m.chunksRemoved,
 											m.series,
 											m.seriesCreated,
 											m.seriesRemoved,
-												head: track number of series not found errors in metric

											
										
										
											7 years ago
+											m.seriesNotFound,
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+											m.minTime,
 											m.maxTime,
 											m.gcDuration,
 											m.walTruncateDuration,
 											m.samplesAppended,
 										)
 									}
 									return m
 								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								// NewHead opens the head block in dir.
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								func NewHead(r prometheus.Registerer, l log.Logger, wal WAL, chunkRange int64) (*Head, error) {
 									if l == nil {
 										l = log.NewNopLogger()
 									}
 									if wal == nil {
-												Filter WAL data in Head, misc fixes

											
										
										
											7 years ago
+										wal = NopWAL()
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}
 									if chunkRange < 1 {
 										return nil, errors.Errorf("invalid chunk range %d", chunkRange)
 									}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									h := &Head{
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										wal:        wal,
 										logger:     l,
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+										chunkRange: chunkRange,
-												Filter WAL data in Head, misc fixes

											
										
										
											7 years ago
+										minTime:    math.MinInt64,
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+										maxTime:    math.MinInt64,
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										series:     newStripeSeries(),
-												Implement Delete on HeadBlock

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

											
										
										
											8 years ago
+										values:     map[string]stringset{},
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+										symbols:    map[string]struct{}{},
-												Load postings in batch on startup

This allows to insert IDs to postings out of order until
a trigger function is called. This avoids the insertion sort we usually
do which can be very costly since WAL entries are more out of order than
regular adds.

											
										
										
											7 years ago
+										postings:   newUnorderedMemPostings(),
-												Refactor tombstone reader types

											
										
										
											7 years ago
+										tombstones: memTombstones{},
-												Fix races

											
										
										
											8 years ago
+									}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									h.metrics = newHeadMetrics(h, r)
-												Filter WAL data in Head, misc fixes

											
										
										
											7 years ago
+									return h, nil
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								}
-												wal: parallelize sample processing

											
										
										
											7 years ago
+								// processWALSamples adds a partition of samples it receives to the head and passes
 								// them on to other workers.
 								// Samples before the mint timestamp are discarded.
 								func (h *Head) processWALSamples(
 									mint int64,
 									partition, total uint64,
 									input <-chan []RefSample, output chan<- []RefSample,
 								) (unknownRefs uint64) {
 									defer close(output)
 									for samples := range input {
 										for _, s := range samples {
 											if s.T < mint || s.Ref%total != partition {
 												continue
 											}
 											ms := h.series.getByID(s.Ref)
 											if ms == nil {
 												unknownRefs++
 												continue
 											}
 											_, chunkCreated := ms.append(s.T, s.V)
 											if chunkCreated {
 												h.metrics.chunksCreated.Inc()
 												h.metrics.chunks.Inc()
 											}
 										}
 										output <- samples
 									}
 									return unknownRefs
 								}
-												Simplify series create logic in head

											
										
										
											7 years ago
+								// ReadWAL initializes the head by consuming the write ahead log.
-												Filter WAL data in Head, misc fixes

											
										
										
											7 years ago
+								func (h *Head) ReadWAL() error {
-												Load postings in batch on startup

This allows to insert IDs to postings out of order until
a trigger function is called. This avoids the insertion sort we usually
do which can be very costly since WAL entries are more out of order than
regular adds.

											
										
										
											7 years ago
+									defer h.postings.ensureOrder()
-												WAL refactoring and truncation fixes and test

											
										
										
											7 years ago
+									r := h.wal.Reader()
-												Filter WAL data in Head, misc fixes

											
										
										
											7 years ago
+									mint := h.MinTime()
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
-												Use boolean function instead of postings to drop WAL series

There is not guarantee or requirement for WAL writers to only add
series entries in increasing order of IDs. A postings list cannot look
back and thus unordered WAL entries would skip over IDs to not truncate
from the WAL.
We replace it with a simple boolean check function that does not require
order.

											
										
										
											7 years ago
+									// Track number of samples that referenced a series we don't know about
 									// for error reporting.
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									var unknownRefs uint64
 									// Start workers that each process samples for a partition of the series ID space.
 									// They are connected through a ring of channels which ensures that all sample batches
 									// read from the WAL are processed in order.
 									var (
-												Ensure workers terminated fully before reading unknownRefs

											
										
										
											7 years ago
+										wg         sync.WaitGroup
-												wal: parallelize sample processing

											
										
										
											7 years ago
+										n          = runtime.GOMAXPROCS(0)
 										firstInput = make(chan []RefSample, 300)
 										input      = firstInput
 									)
-												Ensure workers terminated fully before reading unknownRefs

											
										
										
											7 years ago
+									wg.Add(n)
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									for i := 0; i < n; i++ {
 										output := make(chan []RefSample, 300)
 										go func(i int, input <-chan []RefSample, output chan<- []RefSample) {
 											unknown := h.processWALSamples(mint, uint64(i), uint64(n), input, output)
 											atomic.AddUint64(&unknownRefs, unknown)
-												Ensure workers terminated fully before reading unknownRefs

											
										
										
											7 years ago
+											wg.Done()
-												wal: parallelize sample processing

											
										
										
											7 years ago
+										}(i, input, output)
-												Use boolean function instead of postings to drop WAL series

There is not guarantee or requirement for WAL writers to only add
series entries in increasing order of IDs. A postings list cannot look
back and thus unordered WAL entries would skip over IDs to not truncate
from the WAL.
We replace it with a simple boolean check function that does not require
order.

											
										
										
											7 years ago
-												wal: parallelize sample processing

											
										
										
											7 years ago
+										// The output feeds the next worker goroutine. For the last worker,
 										// it feeds the initial input again to reuse the RefSample slices.
 										input = output
 									}
-												Use boolean function instead of postings to drop WAL series

There is not guarantee or requirement for WAL writers to only add
series entries in increasing order of IDs. A postings list cannot look
back and thus unordered WAL entries would skip over IDs to not truncate
from the WAL.
We replace it with a simple boolean check function that does not require
order.

											
										
										
											7 years ago
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									// TODO(fabxc): series entries spread between samples can starve the sample workers.
 									// Even with bufferd channels, this can impact startup time with lots of series churn.
 									// We must not pralellize series creation itself but could make the indexing asynchronous.
-												wal: decode and process in separate threads.

											
										
										
											7 years ago
+									seriesFunc := func(series []RefSeries) {
-												[WIP]: WAL implementation

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

											
										
										
											7 years ago
+										for _, s := range series {
-												Create series with ID recorded in WAL when reading it back

											
										
										
											7 years ago
+											h.getOrCreateWithID(s.Ref, s.Labels.Hash(), s.Labels)
 											if h.lastSeriesID < s.Ref {
 												h.lastSeriesID = s.Ref
 											}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										}
 									}
-												wal: decode and process in separate threads.

											
										
										
											7 years ago
+									samplesFunc := func(samples []RefSample) {
-												head: limit WAL sample processing batch size

											
										
										
											7 years ago
+										// We split up the samples into chunks of 5000 samples or less.
 										// With O(300 * #cores) in-flight sample batches, large scrapes could otherwise
 										// cause thousands of very large in flight buffers occupying large amounts
 										// of unused memory.
 										for len(samples) > 0 {
 											n := 5000
 											if len(samples) < n {
 												n = len(samples)
 											}
 											var buf []RefSample
 											select {
 											case buf = <-input:
 											default:
 											}
 											firstInput <- append(buf[:0], samples[:n]...)
 											samples = samples[n:]
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										}
 									}
-												wal: decode and process in separate threads.

											
										
										
											7 years ago
+									deletesFunc := func(stones []Stone) {
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										for _, s := range stones {
 											for _, itv := range s.intervals {
-												Filter WAL data in Head, misc fixes

											
										
										
											7 years ago
+												if itv.Maxt < mint {
 													continue
 												}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+												h.tombstones.add(s.ref, itv)
 											}
 										}
 									}
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									err := r.Read(seriesFunc, samplesFunc, deletesFunc)
-												Use boolean function instead of postings to drop WAL series

There is not guarantee or requirement for WAL writers to only add
series entries in increasing order of IDs. A postings list cannot look
back and thus unordered WAL entries would skip over IDs to not truncate
from the WAL.
We replace it with a simple boolean check function that does not require
order.

											
										
										
											7 years ago
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									// Signal termination to first worker and wait for last one to close its output channel.
 									close(firstInput)
 									for range input {
 									}
-												Ensure workers terminated fully before reading unknownRefs

											
										
										
											7 years ago
+									wg.Wait()
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									if err != nil {
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										return errors.Wrap(err, "consume WAL")
 									}
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									if unknownRefs > 0 {
 										level.Warn(h.logger).Log("msg", "unknown series references in WAL samples", "count", unknownRefs)
 									}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									return nil
-												Make WAL for HeadBlock composeable.

											
										
										
											8 years ago
+								}
-												Handle compaction trigger and reinitializing in DB

											
										
										
											8 years ago
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								// Truncate removes all data before mint from the head block and truncates its WAL.
-												Add tests for GC and chunk truncation

											
										
										
											7 years ago
+								func (h *Head) Truncate(mint int64) error {
-												Filter WAL data in Head, misc fixes

											
										
										
											7 years ago
+									initialize := h.MinTime() == math.MinInt64
-												Fix min/max time handling and concurrent crc32 usage

											
										
										
											7 years ago
+									if h.MinTime() >= mint {
-												Add tests for GC and chunk truncation

											
										
										
											7 years ago
+										return nil
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}
 									atomic.StoreInt64(&h.minTime, mint)
-												Fix min/max time handling and concurrent crc32 usage

											
										
										
											7 years ago
+									// Ensure that max time is at least as high as min time.
 									for h.MaxTime() < mint {
 										atomic.CompareAndSwapInt64(&h.maxTime, h.MaxTime(), mint)
 									}
-												Filter WAL data in Head, misc fixes

											
										
										
											7 years ago
+									// This was an initial call to Truncate after loading blocks on startup.
 									// We haven't read back the WAL yet, so do not attempt to truncate it.
 									if initialize {
 										return nil
 									}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									start := time.Now()
 									h.gc()
-												Add levels to all log lines.

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

											
										
										
											7 years ago
+									level.Info(h.logger).Log("msg", "head GC completed", "duration", time.Since(start))
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									h.metrics.gcDuration.Observe(time.Since(start).Seconds())
 									start = time.Now()
-												Use boolean function instead of postings to drop WAL series

There is not guarantee or requirement for WAL writers to only add
series entries in increasing order of IDs. A postings list cannot look
back and thus unordered WAL entries would skip over IDs to not truncate
from the WAL.
We replace it with a simple boolean check function that does not require
order.

											
										
										
											7 years ago
+									keep := func(id uint64) bool {
 										return h.series.getByID(id) != nil
-												[WIP]: WAL implementation

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

											
										
										
											7 years ago
+									}
-												Use boolean function instead of postings to drop WAL series

There is not guarantee or requirement for WAL writers to only add
series entries in increasing order of IDs. A postings list cannot look
back and thus unordered WAL entries would skip over IDs to not truncate
from the WAL.
We replace it with a simple boolean check function that does not require
order.

											
										
										
											7 years ago
+									if err := h.wal.Truncate(mint, keep); err == nil {
-												Add levels to all log lines.

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

											
										
										
											7 years ago
+										level.Info(h.logger).Log("msg", "WAL truncation completed", "duration", time.Since(start))
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									} else {
-												Add levels to all log lines.

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

											
										
										
											7 years ago
+										level.Error(h.logger).Log("msg", "WAL truncation failed", "err", err, "duration", time.Since(start))
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}
 									h.metrics.walTruncateDuration.Observe(time.Since(start).Seconds())
-												Add tests for GC and chunk truncation

											
										
										
											7 years ago
 									return nil
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								}
 								// initTime initializes a head with the first timestamp. This only needs to be called
 								// for a compltely fresh head with an empty WAL.
 								// Returns true if the initialization took an effect.
 								func (h *Head) initTime(t int64) (initialized bool) {
 									// In the init state, the head has a high timestamp of math.MinInt64.
 									mint, _ := rangeForTimestamp(t, h.chunkRange)
-												Fix min/max time handling and concurrent crc32 usage

											
										
										
											7 years ago
+									if !atomic.CompareAndSwapInt64(&h.minTime, math.MinInt64, mint) {
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										return false
 									}
-												Fix min/max time handling and concurrent crc32 usage

											
										
										
											7 years ago
+									// Ensure that max time is initialized to at least the min time we just set.
 									// Concurrent appenders may already have set it to a higher value.
 									atomic.CompareAndSwapInt64(&h.maxTime, math.MinInt64, t)
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									return true
 								}
-												Add more verbose error handling for closing, reduce locking

This commit introduces error returns in various places and is explicit
about closing persisted blocks.
{Index,Chunk,Tombstone}Readers are more consistent about their Close()
method. Whenever a reader is retrieved, the corresponding close method
must eventually be called. We use this to track pending readers against
persisted blocks.

Querier's against the DB no longer hold a read lock for their entire
lifecycle. This avoids long running queriers to starve new ones when we
have to acquire a write lock when reloading blocks.

											
										
										
											7 years ago
+								type rangeHead struct {
 									head       *Head
 									mint, maxt int64
 								}
 								func (h *rangeHead) Index() (IndexReader, error) {
 									return h.head.indexRange(h.mint, h.maxt), nil
 								}
 								func (h *rangeHead) Chunks() (ChunkReader, error) {
 									return h.head.chunksRange(h.mint, h.maxt), nil
 								}
 								func (h *rangeHead) Tombstones() (TombstoneReader, error) {
 									return h.head.tombstones, nil
 								}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								// initAppender is a helper to initialize the time bounds of a the head
 								// upon the first sample it receives.
 								type initAppender struct {
 									app  Appender
 									head *Head
 								}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+								func (a *initAppender) Add(lset labels.Labels, t int64, v float64) (uint64, error) {
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									if a.app != nil {
 										return a.app.Add(lset, t, v)
 									}
-												Remove defer statement in hot path

											
										
										
											7 years ago
+									a.head.initTime(t)
 									a.app = a.head.appender()
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									return a.app.Add(lset, t, v)
 								}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+								func (a *initAppender) AddFast(ref uint64, t int64, v float64) error {
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									if a.app == nil {
 										return ErrNotFound
 									}
 									return a.app.AddFast(ref, t, v)
 								}
 								func (a *initAppender) Commit() error {
 									if a.app == nil {
 										return nil
 									}
 									return a.app.Commit()
 								}
 								func (a *initAppender) Rollback() error {
 									if a.app == nil {
 										return nil
 									}
 									return a.app.Rollback()
 								}
 								// Appender returns a new Appender on the database.
 								func (h *Head) Appender() Appender {
 									h.metrics.activeAppenders.Inc()
 									// The head cache might not have a starting point yet. The init appender
 									// picks up the first appended timestamp as the base.
-												Fix min/max time handling and concurrent crc32 usage

											
										
										
											7 years ago
+									if h.MinTime() == math.MinInt64 {
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										return &initAppender{head: h}
 									}
 									return h.appender()
 								}
 								func (h *Head) appender() *headAppender {
 									return &headAppender{
 										head:          h,
 										mint:          h.MaxTime() - h.chunkRange/2,
 										samples:       h.getAppendBuffer(),
 										highTimestamp: math.MinInt64,
 									}
 								}
 								func (h *Head) getAppendBuffer() []RefSample {
 									b := h.appendPool.Get()
 									if b == nil {
 										return make([]RefSample, 0, 512)
 									}
 									return b.([]RefSample)
 								}
 								func (h *Head) putAppendBuffer(b []RefSample) {
 									h.appendPool.Put(b[:0])
 								}
 								type headAppender struct {
 									head *Head
 									mint int64
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									series        []RefSeries
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									samples       []RefSample
 									highTimestamp int64
 								}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+								func (a *headAppender) Add(lset labels.Labels, t int64, v float64) (uint64, error) {
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									if t < a.mint {
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										return 0, ErrOutOfBounds
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}
-												Simplify series create logic in head

											
										
										
											7 years ago
+									s, created := a.head.getOrCreate(lset.Hash(), lset)
 									if created {
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										a.series = append(a.series, RefSeries{
 											Ref:    s.ref,
 											Labels: lset,
 										})
 									}
 									return s.ref, a.AddFast(s.ref, t, v)
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+								func (a *headAppender) AddFast(ref uint64, t int64, v float64) error {
 									s := a.head.series.getByID(ref)
 									if s == nil {
 										return errors.Wrap(ErrNotFound, "unknown series")
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									}
-												Fix various races

											
										
										
											7 years ago
+									s.Lock()
-												Finish old WAL segment async, default to no fsync

We were still fsyncing while holding the write lock when we cut a new
segment. Given we cannot do anything but logging errors, we might just
as well complete segments asynchronously.

There's not realistic use case where one would fsync after every WAL
entry, thus make the default of a flush interval of 0 to never fsync
which is a much more likely use case.

											
										
										
											7 years ago
+									err := s.appendable(t, v)
-												Fix various races

											
										
										
											7 years ago
+									s.Unlock()
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
-												Finish old WAL segment async, default to no fsync

We were still fsyncing while holding the write lock when we cut a new
segment. Given we cannot do anything but logging errors, we might just
as well complete segments asynchronously.

There's not realistic use case where one would fsync after every WAL
entry, thus make the default of a flush interval of 0 to never fsync
which is a much more likely use case.

											
										
										
											7 years ago
+									if err != nil {
 										return err
 									}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									if t < a.mint {
 										return ErrOutOfBounds
 									}
 									if t > a.highTimestamp {
 										a.highTimestamp = t
 									}
 									a.samples = append(a.samples, RefSample{
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										Ref:    ref,
 										T:      t,
 										V:      v,
 										series: s,
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									})
 									return nil
 								}
 								func (a *headAppender) Commit() error {
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									defer a.Rollback()
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									if err := a.head.wal.LogSeries(a.series); err != nil {
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										return err
 									}
 									if err := a.head.wal.LogSamples(a.samples); err != nil {
 										return errors.Wrap(err, "WAL log samples")
 									}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									total := len(a.samples)
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
 									for _, s := range a.samples {
-												Fix various races

											
										
										
											7 years ago
+										s.series.Lock()
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										ok, chunkCreated := s.series.append(s.T, s.V)
-												Fix various races

											
										
										
											7 years ago
+										s.series.Unlock()
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										if !ok {
 											total--
 										}
 										if chunkCreated {
 											a.head.metrics.chunks.Inc()
 											a.head.metrics.chunksCreated.Inc()
 										}
 									}
 									a.head.metrics.samplesAppended.Add(float64(total))
 									for {
 										ht := a.head.MaxTime()
 										if a.highTimestamp <= ht {
 											break
 										}
 										if atomic.CompareAndSwapInt64(&a.head.maxTime, ht, a.highTimestamp) {
 											break
 										}
 									}
 									return nil
 								}
 								func (a *headAppender) Rollback() error {
 									a.head.metrics.activeAppenders.Dec()
 									a.head.putAppendBuffer(a.samples)
 									return nil
 								}
 								// Delete all samples in the range of [mint, maxt] for series that satisfy the given
 								// label matchers.
 								func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error {
 									// Do not delete anything beyond the currently valid range.
 									mint, maxt = clampInterval(mint, maxt, h.MinTime(), h.MaxTime())
 									ir := h.indexRange(mint, maxt)
-												Expose ChunkSeriesSet and lookups methods.

											
										
										
											7 years ago
+									p, absent, err := PostingsForMatchers(ir, ms...)
-												Add explicit error to Querier.Select

This has been a frequent source of debugging pain since errors are
potentially delayed to a much later point. They bubble up in an
unrelated execution path.

											
										
										
											7 years ago
+									if err != nil {
 										return errors.Wrap(err, "select series")
 									}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
 									var stones []Stone
 								Outer:
 									for p.Next() {
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										series := h.series.getByID(p.At())
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
 										for _, abs := range absent {
 											if series.lset.Get(abs) != "" {
 												continue Outer
 											}
 										}
 										// Delete only until the current values and not beyond.
 										t0, t1 := clampInterval(mint, maxt, series.minTime(), series.maxTime())
 										stones = append(stones, Stone{p.At(), Intervals{{t0, t1}}})
 									}
 									if p.Err() != nil {
 										return p.Err()
 									}
 									if err := h.wal.LogDeletes(stones); err != nil {
 										return err
 									}
 									for _, s := range stones {
 										h.tombstones.add(s.ref, s.intervals[0])
 									}
 									return nil
 								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								// gc removes data before the minimum timestmap from the head.
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								func (h *Head) gc() {
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									// Only data strictly lower than this timestamp must be deleted.
 									mint := h.MinTime()
-												Properly track and write meta file

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									// Drop old chunks and remember series IDs and hashes if they can be
 									// deleted entirely.
 									deleted, chunksRemoved := h.series.gc(mint)
 									seriesRemoved := len(deleted)
-												Trigger reload correctly on interrupted compaction

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									h.metrics.seriesRemoved.Add(float64(seriesRemoved))
 									h.metrics.series.Sub(float64(seriesRemoved))
 									h.metrics.chunksRemoved.Add(float64(chunksRemoved))
 									h.metrics.chunks.Sub(float64(chunksRemoved))
-												Add separate head mutex

Introduce a seperate mutex for the head blocks to avoid a race where
a post-compaction reload may run between switching the DB's base mutex
to create a new head block in an appender.

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									// Remove deleted series IDs from the postings lists. First do a collection
 									// run where we rebuild all postings that have something to delete
 									h.postings.mtx.RLock()
-												Remove unreturned locks, detect writes on closed heads

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									type replEntry struct {
 										idx int
 										l   []uint64
-												Actually close olds blocks in reloadBlocks

This fixes a bug leaking memory because blocks were not actually closed
as the closing call references the initial, empty slice

											
										
										
											8 years ago
+									}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									collected := map[labels.Label]replEntry{}
-												Move stats into meta.json file, cleanup, docs

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									for t, p := range h.postings.m {
 										repl := replEntry{idx: len(p)}
 										for i, id := range p {
 											if _, ok := deleted[id]; ok {
 												// First ID that got deleted, initialize replacement with
 												// all remaining IDs so far.
 												if repl.l == nil {
 													repl.l = make([]uint64, 0, len(p))
 													repl.l = append(repl.l, p[:i]...)
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+												}
 												continue
 											}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+											// Only add to the replacement once we know we have to do it.
 											if repl.l != nil {
 												repl.l = append(repl.l, id)
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+											}
 										}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										if repl.l != nil {
 											collected[t] = repl
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+										}
-												Make HeadBlock use WAL.

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

											
										
										
											8 years ago
+									}
-												Implement Delete on HeadBlock

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									h.postings.mtx.RUnlock()
-												Make HeadBlock use WAL.

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									// Replace all postings that have changed. Append all IDs that may have
 									// been added while we switched locks.
 									h.postings.mtx.Lock()
-												Initial implentation of Deletes on persistedBlock

Very much a WIP

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									for t, repl := range collected {
 										l := append(repl.l, h.postings.m[t][repl.idx:]...)
 										if len(l) > 0 {
 											h.postings.m[t] = l
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+										} else {
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+											delete(h.postings.m, t)
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+										}
-												Refactor compactor

											
										
										
											7 years ago
+									}
-												Initial implementation of HeadBlock Snapshots

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									h.postings.mtx.Unlock()
 									// Rebuild symbols and label value indices from what is left in the postings terms.
 									h.postings.mtx.RLock()
-												Fix race in symbol table re-creation

											
										
										
											7 years ago
+									symbols := make(map[string]struct{})
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									values := make(map[string]stringset, len(h.values))
-												Initial implementation of HeadBlock Snapshots

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>

											
										
										
											8 years ago
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									for t := range h.postings.m {
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										symbols[t.Name] = struct{}{}
 										symbols[t.Value] = struct{}{}
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										ss, ok := values[t.Name]
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+										if !ok {
 											ss = stringset{}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+											values[t.Name] = ss
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+										}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										ss.set(t.Value)
-												Add Queryable interface to Block

This adds the Queryable interface to the Block interface. Head and
persisted blocks now implement their own Querier() method and thus
isolate customization (e.g. remapPostings) more cleanly.

											
										
										
											8 years ago
+									}
-												Fix wrong comparison in head block resorting

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									h.postings.mtx.RUnlock()
 									h.symMtx.Lock()
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									h.symbols = symbols
 									h.values = values
-												Add various metrics

											
										
										
											7 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									h.symMtx.Unlock()
-												Add Queryable interface to Block

This adds the Queryable interface to the Block interface. Head and
persisted blocks now implement their own Querier() method and thus
isolate customization (e.g. remapPostings) more cleanly.

											
										
										
											8 years ago
+								}
-												Add more verbose error handling for closing, reduce locking

This commit introduces error returns in various places and is explicit
about closing persisted blocks.
{Index,Chunk,Tombstone}Readers are more consistent about their Close()
method. Whenever a reader is retrieved, the corresponding close method
must eventually be called. We use this to track pending readers against
persisted blocks.

Querier's against the DB no longer hold a read lock for their entire
lifecycle. This avoids long running queriers to starve new ones when we
have to acquire a write lock when reloading blocks.

											
										
										
											7 years ago
+								// Tombstones returns a new reader over the head's tombstones
 								func (h *Head) Tombstones() (TombstoneReader, error) {
 									return h.tombstones, nil
-												Compact head block early

Let older head blocks be compacted once the newest once has samples at
50% of its total range. This allows the memory of the compacted blocks
to be released and garbage collected before a new head block gets
created. Thereby the number of head blocks is 1 or 2 instead of 2 or 3
and memory spikes are reduced.

											
										
										
											8 years ago
+								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								// Index returns an IndexReader against the block.
-												Add more verbose error handling for closing, reduce locking

This commit introduces error returns in various places and is explicit
about closing persisted blocks.
{Index,Chunk,Tombstone}Readers are more consistent about their Close()
method. Whenever a reader is retrieved, the corresponding close method
must eventually be called. We use this to track pending readers against
persisted blocks.

Querier's against the DB no longer hold a read lock for their entire
lifecycle. This avoids long running queriers to starve new ones when we
have to acquire a write lock when reloading blocks.

											
										
										
											7 years ago
+								func (h *Head) Index() (IndexReader, error) {
 									return h.indexRange(math.MinInt64, math.MaxInt64), nil
-												Add composed Block interfaces, remove head generation

This adds more lower-leve interfaces which are used to compose
to different Block interfaces.
The DB only uses interfaces instead of explicit persistedBlock and
headBlock. The headBlock generation property is dropped as the use-case
can be implemented using block sequence numbers.

											
										
										
											8 years ago
+								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								func (h *Head) indexRange(mint, maxt int64) *headIndexReader {
 									if hmin := h.MinTime(); hmin > mint {
 										mint = hmin
-												Use buffer pool for head appenders

											
										
										
											8 years ago
+									}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									return &headIndexReader{head: h, mint: mint, maxt: maxt}
-												Expose series references to clients

This exposes a reference number of a series represented by a label set
to clients.
Subsequent samples can be directly added via the reference rather than
repeatedly passing in the full labels. This drasitcally speeds up the
append process.

The appender chain uses different sections of the reference number for
assignment to child appenders and invalidating reference numbers as
necessary.

Clients can either pass out reference numbers themselves or have their
own optimized lookup, i.e. by directly associating unparsed metric
descriptors strings with reference numbers.

											
										
										
											8 years ago
+								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								// Chunks returns a ChunkReader against the block.
-												Add more verbose error handling for closing, reduce locking

This commit introduces error returns in various places and is explicit
about closing persisted blocks.
{Index,Chunk,Tombstone}Readers are more consistent about their Close()
method. Whenever a reader is retrieved, the corresponding close method
must eventually be called. We use this to track pending readers against
persisted blocks.

Querier's against the DB no longer hold a read lock for their entire
lifecycle. This avoids long running queriers to starve new ones when we
have to acquire a write lock when reloading blocks.

											
										
										
											7 years ago
+								func (h *Head) Chunks() (ChunkReader, error) {
 									return h.chunksRange(math.MinInt64, math.MaxInt64), nil
-												Expose series references to clients

This exposes a reference number of a series represented by a label set
to clients.
Subsequent samples can be directly added via the reference rather than
repeatedly passing in the full labels. This drasitcally speeds up the
append process.

The appender chain uses different sections of the reference number for
assignment to child appenders and invalidating reference numbers as
necessary.

Clients can either pass out reference numbers themselves or have their
own optimized lookup, i.e. by directly associating unparsed metric
descriptors strings with reference numbers.

											
										
										
											8 years ago
+								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								func (h *Head) chunksRange(mint, maxt int64) *headChunkReader {
 									if hmin := h.MinTime(); hmin > mint {
 										mint = hmin
-												Expose series references to clients

This exposes a reference number of a series represented by a label set
to clients.
Subsequent samples can be directly added via the reference rather than
repeatedly passing in the full labels. This drasitcally speeds up the
append process.

The appender chain uses different sections of the reference number for
assignment to child appenders and invalidating reference numbers as
necessary.

Clients can either pass out reference numbers themselves or have their
own optimized lookup, i.e. by directly associating unparsed metric
descriptors strings with reference numbers.

											
										
										
											8 years ago
+									}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									return &headChunkReader{head: h, mint: mint, maxt: maxt}
-												Expose series references to clients

This exposes a reference number of a series represented by a label set
to clients.
Subsequent samples can be directly added via the reference rather than
repeatedly passing in the full labels. This drasitcally speeds up the
append process.

The appender chain uses different sections of the reference number for
assignment to child appenders and invalidating reference numbers as
necessary.

Clients can either pass out reference numbers themselves or have their
own optimized lookup, i.e. by directly associating unparsed metric
descriptors strings with reference numbers.

											
										
										
											8 years ago
+								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								// MinTime returns the lowest time bound on visible data in the head.
 								func (h *Head) MinTime() int64 {
 									return atomic.LoadInt64(&h.minTime)
-												Expose series references to clients

This exposes a reference number of a series represented by a label set
to clients.
Subsequent samples can be directly added via the reference rather than
repeatedly passing in the full labels. This drasitcally speeds up the
append process.

The appender chain uses different sections of the reference number for
assignment to child appenders and invalidating reference numbers as
necessary.

Clients can either pass out reference numbers themselves or have their
own optimized lookup, i.e. by directly associating unparsed metric
descriptors strings with reference numbers.

											
										
										
											8 years ago
+								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								// MaxTime returns the highest timestamp seen in data of the head.
 								func (h *Head) MaxTime() int64 {
 									return atomic.LoadInt64(&h.maxTime)
-												Expose series references to clients

This exposes a reference number of a series represented by a label set
to clients.
Subsequent samples can be directly added via the reference rather than
repeatedly passing in the full labels. This drasitcally speeds up the
append process.

The appender chain uses different sections of the reference number for
assignment to child appenders and invalidating reference numbers as
necessary.

Clients can either pass out reference numbers themselves or have their
own optimized lookup, i.e. by directly associating unparsed metric
descriptors strings with reference numbers.

											
										
										
											8 years ago
+								}
-												Close WAL when closing the DB

Also, the `wal` field of the `DB` was not used anywhere, so this removes
it.

											
										
										
											7 years ago
+								// Close flushes the WAL and closes the head.
 								func (h *Head) Close() error {
 									return h.wal.Close()
 								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								type headChunkReader struct {
 									head       *Head
 									mint, maxt int64
-												Expose series references to clients

This exposes a reference number of a series represented by a label set
to clients.
Subsequent samples can be directly added via the reference rather than
repeatedly passing in the full labels. This drasitcally speeds up the
append process.

The appender chain uses different sections of the reference number for
assignment to child appenders and invalidating reference numbers as
necessary.

Clients can either pass out reference numbers themselves or have their
own optimized lookup, i.e. by directly associating unparsed metric
descriptors strings with reference numbers.

											
										
										
											8 years ago
+								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								func (h *headChunkReader) Close() error {
-												Expose series references to clients

This exposes a reference number of a series represented by a label set
to clients.
Subsequent samples can be directly added via the reference rather than
repeatedly passing in the full labels. This drasitcally speeds up the
append process.

The appender chain uses different sections of the reference number for
assignment to child appenders and invalidating reference numbers as
necessary.

Clients can either pass out reference numbers themselves or have their
own optimized lookup, i.e. by directly associating unparsed metric
descriptors strings with reference numbers.

											
										
										
											8 years ago
+									return nil
 								}
-												Change series ID from uint32 to uint64

											
										
										
											7 years ago
+								// packChunkID packs a seriesID and a chunkID within it into a global 8 byte ID.
 								// It panicks if the seriesID exceeds 5 bytes or the chunk ID 3 bytes.
 								func packChunkID(seriesID, chunkID uint64) uint64 {
 									if seriesID > (1<<40)-1 {
 										panic("series ID exceeds 5 bytes")
 									}
 									if chunkID > (1<<24)-1 {
 										panic("chunk ID exceeds 3 bytes")
 									}
 									return (seriesID << 24) | chunkID
 								}
 								func unpackChunkID(id uint64) (seriesID, chunkID uint64) {
 									return id >> 24, (id << 40) >> 40
 								}
-												Add stats serialization, load querier of all blocks

											
										
										
											8 years ago
+								// Chunk returns the chunk for the reference number.
-												Segment chunk file

This adds write path support for segmented chunk data files.
Files of 512MB are pre-allocated and written to. If the file size
is exceeded, the next file is started. On completion, files
are truncated to their final size.

											
										
										
											8 years ago
+								func (h *headChunkReader) Chunk(ref uint64) (chunks.Chunk, error) {
-												Change series ID from uint32 to uint64

											
										
										
											7 years ago
+									sid, cid := unpackChunkID(ref)
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
 									s := h.head.series.getByID(sid)
-												Use separate lock for series creation

This uses the head block's own lock to only lock if new series were
encountered.
In the general append case we just need to hold a

											
										
										
											8 years ago
-												Fix various races

											
										
										
											7 years ago
+									s.Lock()
-												Change series ID from uint32 to uint64

											
										
										
											7 years ago
+									c := s.chunk(int(cid))
-												Access chunk time range while holding lock

											
										
										
											7 years ago
+									mint, maxt := c.minTime, c.maxTime
-												Fix various races

											
										
										
											7 years ago
+									s.Unlock()
-												Support multiple chunk files in read path

											
										
										
											8 years ago
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									// Do not expose chunks that are outside of the specified range.
-												Access chunk time range while holding lock

											
										
										
											7 years ago
+									if c == nil || !intervalOverlap(mint, maxt, h.mint, h.maxt) {
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+										return nil, ErrNotFound
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+									}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									return &safeChunk{
 										Chunk: c.chunk,
 										s:     s,
-												Change series ID from uint32 to uint64

											
										
										
											7 years ago
+										cid:   int(cid),
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									}, nil
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+								}
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+								type safeChunk struct {
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+									chunks.Chunk
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									s   *memSeries
 									cid int
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+								}
 								func (c *safeChunk) Iterator() chunks.Iterator {
-												Fix various races

											
										
										
											7 years ago
+									c.s.Lock()
 									it := c.s.iterator(c.cid)
 									c.s.Unlock()
 									return it
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+								}
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+								// func (c *safeChunk) Appender() (chunks.Appender, error) { panic("illegal") }
 								// func (c *safeChunk) Bytes() []byte                      { panic("illegal") }
 								// func (c *safeChunk) Encoding() chunks.Encoding          { panic("illegal") }
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
-												Unexport HeadBlock, export Block interface

											
										
										
											8 years ago
+								type headIndexReader struct {
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									head       *Head
 									mint, maxt int64
 								}
 								func (h *headIndexReader) Close() error {
 									return nil
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+								}
 								func (h *headIndexReader) Symbols() (map[string]struct{}, error) {
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									h.head.symMtx.RLock()
 									defer h.head.symMtx.RUnlock()
 									res := make(map[string]struct{}, len(h.head.symbols))
 									for s := range h.head.symbols {
 										res[s] = struct{}{}
 									}
 									return res, nil
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+								}
 								// LabelValues returns the possible label values
-												Unexport HeadBlock, export Block interface

											
										
										
											8 years ago
+								func (h *headIndexReader) LabelValues(names ...string) (StringTuples, error) {
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+									if len(names) != 1 {
 										return nil, errInvalidSize
 									}
 									var sl []string
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									h.head.symMtx.RLock()
 									defer h.head.symMtx.RUnlock()
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									for s := range h.head.values[names[0]] {
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+										sl = append(sl, s)
 									}
 									sort.Strings(sl)
-												Add postings wrapper that emits head postings in label set order

This adds a position mapper that takes series from a head block
in the order they were appended and creates a mapping representing
them in order of their label sets.

Write-repair of the postings list would cause very expensive writing.
Hence, we keep them as they are and only apply the postition mapping
at the very end, after a postings list has been sufficienctly reduced
through intersections etc.

											
										
										
											8 years ago
+									return &stringTuples{l: len(names), s: sl}, nil
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+								}
 								// Postings returns the postings list iterator for the label pair.
-												Unexport HeadBlock, export Block interface

											
										
										
											8 years ago
+								func (h *headIndexReader) Postings(name, value string) (Postings, error) {
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									return h.head.postings.get(name, value), nil
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+								}
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+								func (h *headIndexReader) SortedPostings(p Postings) Postings {
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									ep := make([]uint64, 0, 128)
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
 									for p.Next() {
 										ep = append(ep, p.At())
 									}
 									if err := p.Err(); err != nil {
 										return errPostings{err: errors.Wrap(err, "expand postings")}
 									}
 									sort.Slice(ep, func(i, j int) bool {
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										a := h.head.series.getByID(ep[i])
 										b := h.head.series.getByID(ep[j])
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										if a == nil || b == nil {
-												Clarify postings index semantics, handle staleness

The postings list index may point to series that no longer
exist during garbage collection. This clarifies that this is valid
behavior.
It would be possible, though more complex, to always keep them in sync.
However, series existance means nothing in itself as the queried time
range defines whether there's actual data. Thus our definition is sane
overall as long as drift is kept small.

											
										
										
											7 years ago
+											level.Debug(h.head.logger).Log("msg", "looked up series not found")
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+											return false
 										}
 										return labels.Compare(a.lset, b.lset) < 0
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+									})
 									return newListPostings(ep)
 								}
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+								// Series returns the series for the given reference.
-												Change series ID from uint32 to uint64

											
										
										
											7 years ago
+								func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]ChunkMeta) error {
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									s := h.head.series.getByID(ref)
-												Use separate lock for series creation

This uses the head block's own lock to only lock if new series were
encountered.
In the general append case we just need to hold a

											
										
										
											8 years ago
-												Switch append refs to string

											
										
										
											8 years ago
+									if s == nil {
-												head: track number of series not found errors in metric

											
										
										
											7 years ago
+										h.head.metrics.seriesNotFound.Inc()
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+										return ErrNotFound
-												Switch append refs to string

											
										
										
											8 years ago
+									}
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+									*lbls = append((*lbls)[:0], s.lset...)
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
-												Fix various races

											
										
										
											7 years ago
+									s.Lock()
 									defer s.Unlock()
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+									*chks = (*chks)[:0]
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+									for i, c := range s.chunks {
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+										// Do not expose chunks that are outside of the specified range.
 										if !intervalOverlap(c.minTime, c.maxTime, h.mint, h.maxt) {
 											continue
 										}
-												Don't allocate ChunkMetas, reuse postings slices

											
										
										
											7 years ago
+										*chks = append(*chks, ChunkMeta{
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+											MinTime: c.minTime,
 											MaxTime: c.maxTime,
-												Change series ID from uint32 to uint64

											
										
										
											7 years ago
+											Ref:     packChunkID(s.ref, uint64(s.chunkID(i))),
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+										})
-												Consolidate persistence and compaction

											
										
										
											8 years ago
+									}
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+									return nil
-												Modify IndexReader API to accomodate compaction

This changes the IndexReader API to expose plain labels
and chunk meta information instead of a Series interface.
Dropping of irrelevant chunks is moved into the querier.

A LabelIndices method is added to query for existing label
value indices.

											
										
										
											8 years ago
+								}
-												Unexport HeadBlock, export Block interface

											
										
										
											8 years ago
+								func (h *headIndexReader) LabelIndices() ([][]string, error) {
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									h.head.symMtx.RLock()
 									defer h.head.symMtx.RUnlock()
-												Use separate lock for series creation

This uses the head block's own lock to only lock if new series were
encountered.
In the general append case we just need to hold a

											
										
										
											8 years ago
-												Modify IndexReader API to accomodate compaction

This changes the IndexReader API to expose plain labels
and chunk meta information instead of a Series interface.
Dropping of irrelevant chunks is moved into the querier.

A LabelIndices method is added to query for existing label
value indices.

											
										
										
											8 years ago
+									res := [][]string{}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									for s := range h.head.values {
-												Modify IndexReader API to accomodate compaction

This changes the IndexReader API to expose plain labels
and chunk meta information instead of a Series interface.
Dropping of irrelevant chunks is moved into the querier.

A LabelIndices method is added to query for existing label
value indices.

											
										
										
											8 years ago
+										res = append(res, []string{s})
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+									}
-												Modify IndexReader API to accomodate compaction

This changes the IndexReader API to expose plain labels
and chunk meta information instead of a Series interface.
Dropping of irrelevant chunks is moved into the querier.

A LabelIndices method is added to query for existing label
value indices.

											
										
										
											8 years ago
+									return res, nil
-												Misc fixes for initial Prometheus integration

											
										
										
											8 years ago
+								}
-												Simplify series create logic in head

											
										
										
											7 years ago
+								func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*memSeries, bool) {
 									// Just using `getOrSet` below would be semantically sufficient, but we'd create
 									// a new series on every sample inserted via Add(), which causes allocations
 									// and makes our series IDs rather random and harder to compress in postings.
 									s := h.series.getByHash(hash, lset)
 									if s != nil {
 										return s, false
 									}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									// Optimistically assume that we are the first one to create the series.
-												Change series ID from uint32 to uint64

											
										
										
											7 years ago
+									id := atomic.AddUint64(&h.lastSeriesID, 1)
-												Create series with ID recorded in WAL when reading it back

											
										
										
											7 years ago
 									return h.getOrCreateWithID(id, hash, lset)
 								}
 								func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSeries, bool) {
 									s := newMemSeries(lset, id, h.chunkRange)
-												Consolidate mem index into HeadBlock

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									s, created := h.series.getOrSet(hash, s)
 									if !created {
-												Simplify series create logic in head

											
										
										
											7 years ago
+										return s, false
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									}
-												Simplify series create logic in head

											
										
										
											7 years ago
+									h.metrics.series.Inc()
 									h.metrics.seriesCreated.Inc()
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									h.postings.add(id, lset)
 									h.symMtx.Lock()
 									defer h.symMtx.Unlock()
-												Consolidate mem index into HeadBlock

											
										
										
											8 years ago
 									for _, l := range lset {
 										valset, ok := h.values[l.Name]
 										if !ok {
 											valset = stringset{}
 											h.values[l.Name] = valset
 										}
 										valset.set(l.Value)
-												Consolidate persistence and compaction

											
										
										
											8 years ago
-												Persist series without allocating the full set

Change index persistence for series to not be accumulated in memory
before being written as one large batch. `Labels` and `ChunkMeta`
objects are reused.
This cuts down memory spikes during compaction of multiple blocks
significantly.

As part of the the Index{Reader,Writer} now have an explicit notion of
symbols and series must be inserted in order.

											
										
										
											7 years ago
+										h.symbols[l.Name] = struct{}{}
 										h.symbols[l.Value] = struct{}{}
-												Consolidate mem index into HeadBlock

											
										
										
											8 years ago
+									}
-												Consolidate persistence and compaction

											
										
										
											8 years ago
-												Simplify series create logic in head

											
										
										
											7 years ago
+									return s, true
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+								}
 								// seriesHashmap is a simple hashmap for memSeries by their label set. It is built
 								// on top of a regular hashmap and holds a slice of series to resolve hash collisions.
 								// Its methods require the hash to be submitted with it to avoid re-computations throughout
 								// the code.
 								type seriesHashmap map[uint64][]*memSeries
-												Add new interfaces and skeleton

											
										
										
											8 years ago
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+								func (m seriesHashmap) get(hash uint64, lset labels.Labels) *memSeries {
 									for _, s := range m[hash] {
 										if s.lset.Equals(lset) {
 											return s
 										}
 									}
 									return nil
 								}
 								func (m seriesHashmap) set(hash uint64, s *memSeries) {
 									l := m[hash]
 									for i, prev := range l {
 										if prev.lset.Equals(s.lset) {
 											l[i] = s
 											return
 										}
 									}
 									m[hash] = append(l, s)
 								}
 								func (m seriesHashmap) del(hash uint64, lset labels.Labels) {
 									var rem []*memSeries
 									for _, s := range m[hash] {
 										if !s.lset.Equals(lset) {
 											rem = append(rem, s)
 										}
 									}
 									if len(rem) == 0 {
 										delete(m, hash)
 									} else {
 										m[hash] = rem
 									}
 								}
 								// stripeSeries locks modulo ranges of IDs and hashes to reduce lock contention.
 								// The locks are padded to not be on the same cache line. Filling the badded space
 								// with the maps was profiled to be slower – likely due to the additional pointer
 								// dereferences.
 								type stripeSeries struct {
 									series [stripeSize]map[uint64]*memSeries
 									hashes [stripeSize]seriesHashmap
 									locks  [stripeSize]stripeLock
 								}
 								const (
 									stripeSize = 1 << 14
 									stripeMask = stripeSize - 1
 								)
 								type stripeLock struct {
 									sync.RWMutex
 									// Padding to avoid multiple locks being on the same cache line.
 									_ [40]byte
 								}
 								func newStripeSeries() *stripeSeries {
 									s := &stripeSeries{}
 									for i := range s.series {
 										s.series[i] = map[uint64]*memSeries{}
 									}
 									for i := range s.hashes {
 										s.hashes[i] = seriesHashmap{}
 									}
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+									return s
-												Add new interfaces and skeleton

											
										
										
											8 years ago
+								}
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+								// gc garbage collects old chunks that are strictly before mint and removes
 								// series entirely that have no chunks left.
 								func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) {
 									var (
 										deleted  = map[uint64]struct{}{}
 										rmChunks = 0
 									)
 									// Run through all series and truncate old chunks. Mark those with no
-												Filter WAL data in Head, misc fixes

											
										
										
											7 years ago
+									// chunks left as deleted and store their ID.
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+									for i := 0; i < stripeSize; i++ {
 										s.locks[i].Lock()
 										for hash, all := range s.hashes[i] {
 											for _, series := range all {
-												Fix various races

											
										
										
											7 years ago
+												series.Lock()
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+												rmChunks += series.truncateChunksBefore(mint)
 												if len(series.chunks) > 0 {
-												Fix various races

											
										
										
											7 years ago
+													series.Unlock()
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+													continue
 												}
 												// The series is gone entirely. We need to keep the series lock
 												// and make sure we have acquired the stripe locks for hash and ID of the
 												// series alike.
 												// If we don't hold them all, there's a very small chance that a series receives
 												// samples again while we are half-way into deleting it.
 												j := int(series.ref & stripeMask)
 												if i != j {
 													s.locks[j].Lock()
 												}
 												deleted[series.ref] = struct{}{}
 												s.hashes[i].del(hash, series.lset)
 												delete(s.series[j], series.ref)
 												if i != j {
 													s.locks[j].Unlock()
 												}
-												Fix various races

											
										
										
											7 years ago
+												series.Unlock()
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+											}
 										}
 										s.locks[i].Unlock()
 									}
 									return deleted, rmChunks
 								}
 								func (s *stripeSeries) getByID(id uint64) *memSeries {
 									i := id & stripeMask
 									s.locks[i].RLock()
 									series := s.series[i][id]
 									s.locks[i].RUnlock()
 									return series
 								}
 								func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *memSeries {
 									i := hash & stripeMask
 									s.locks[i].RLock()
 									series := s.hashes[i].get(hash, lset)
 									s.locks[i].RUnlock()
 									return series
 								}
 								func (s *stripeSeries) getOrSet(hash uint64, series *memSeries) (*memSeries, bool) {
 									i := hash & stripeMask
 									s.locks[i].Lock()
 									if prev := s.hashes[i].get(hash, series.lset); prev != nil {
-												Add missing unlock on early return

											
										
										
											7 years ago
+										s.locks[i].Unlock()
-												Replace single head lock with granular locks

This adds various new locks to replace the single big lock on
the head. All parts now must be COW as they may be held by clients
after initial retrieval.
Series by ID and hashes are now held in a stripe lock to reduce
contention and total holding time during GC. This should reduce
starvation of readers.

											
										
										
											7 years ago
+										return prev, false
 									}
 									s.hashes[i].set(hash, series)
 									s.locks[i].Unlock()
 									i = series.ref & stripeMask
 									s.locks[i].Lock()
 									s.series[i][series.ref] = series
 									s.locks[i].Unlock()
 									return series, true
 								}
-												Move BufferedSeriesIterator in own package

This functionality is useful for a lot of clients but not relevant to
the TSDB's core features.

											
										
										
											8 years ago
+								type sample struct {
 									t int64
 									v float64
 								}
-												Fix various races

											
										
										
											7 years ago
+								// memSeries is the in-memory representation of a series. None of its methods
 								// are goroutine safe and its the callers responsibility to lock it.
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+								type memSeries struct {
-												Fix various races

											
										
										
											7 years ago
+									sync.Mutex
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
-												Change series ID from uint32 to uint64

											
										
										
											7 years ago
+									ref          uint64
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									lset         labels.Labels
 									chunks       []*memChunk
 									chunkRange   int64
 									firstChunkID int
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+									nextAt    int64 // timestamp at which to cut the next chunk.
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+									lastValue float64
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+									sampleBuf [4]sample
-												Remove Partitioned* code

											
										
										
											8 years ago
+									app chunks.Appender // Current appender for the chunk.
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								func (s *memSeries) minTime() int64 {
 									return s.chunks[0].minTime
 								}
 								func (s *memSeries) maxTime() int64 {
 									return s.head().maxTime
 								}
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+								func (s *memSeries) cut(mint int64) *memChunk {
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+									c := &memChunk{
 										chunk:   chunks.NewXORChunk(),
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+										minTime: mint,
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+										maxTime: math.MinInt64,
 									}
 									s.chunks = append(s.chunks, c)
-												Ensure near-empty chunks end at correct boundary

We were determining a chunk's end time once it was one quarter full to
compute it so all chunks have uniform number of samples.
This accidentally skipped the case where series started near the end of
a chunk range/block and never reached that threshold. As a result they
got persisted but were continued across the range.

This resulted in corrupted persisted data.

											
										
										
											7 years ago
+									// Set upper bound on when the next chunk must be started. An earlier timestamp
 									// may be chosen dynamically at a later point.
 									_, s.nextAt = rangeForTimestamp(mint, s.chunkRange)
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+									app, err := c.chunk.Appender()
 									if err != nil {
 										panic(err)
 									}
 									s.app = app
 									return c
 								}
-												Change series ID from uint32 to uint64

											
										
										
											7 years ago
+								func newMemSeries(lset labels.Labels, id uint64, chunkRange int64) *memSeries {
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+									s := &memSeries{
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+										lset:       lset,
 										ref:        id,
 										chunkRange: chunkRange,
 										nextAt:     math.MinInt64,
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+									}
 									return s
 								}
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								// appendable checks whether the given sample is valid for appending to the series.
 								func (s *memSeries) appendable(t int64, v float64) error {
-												Add tests for GC and chunk truncation

											
										
										
											7 years ago
+									c := s.head()
 									if c == nil {
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+										return nil
 									}
 									if t > c.maxTime {
 										return nil
 									}
 									if t < c.maxTime {
 										return ErrOutOfOrderSample
 									}
 									// We are allowing exact duplicates as we can encounter them in valid cases
 									// like federation and erroring out at that time would be extremely noisy.
 									if math.Float64bits(s.lastValue) != math.Float64bits(v) {
 										return ErrAmendSample
 									}
 									return nil
 								}
 								func (s *memSeries) chunk(id int) *memChunk {
-												Add tests for GC and chunk truncation

											
										
										
											7 years ago
+									ix := id - s.firstChunkID
 									if ix < 0 || ix >= len(s.chunks) {
 										return nil
 									}
 									return s.chunks[ix]
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								}
 								func (s *memSeries) chunkID(pos int) int {
 									return pos + s.firstChunkID
 								}
 								// truncateChunksBefore removes all chunks from the series that have not timestamp
 								// at or after mint. Chunk IDs remain unchanged.
-												Add various metrics

											
										
										
											7 years ago
+								func (s *memSeries) truncateChunksBefore(mint int64) (removed int) {
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+									var k int
 									for i, c := range s.chunks {
 										if c.maxTime >= mint {
 											break
 										}
 										k = i + 1
 									}
 									s.chunks = append(s.chunks[:0], s.chunks[k:]...)
 									s.firstChunkID += k
-												Add various metrics

											
										
										
											7 years ago
 									return k
-												Remove multiple heads

This changes the structure to a single WAL backed by a single head
block.
Parts of the head block can be compacted. This relieves us from any head
amangement and greatly simplifies any consistency and isolation concerns
by just having a single head.

											
										
										
											7 years ago
+								}
 								// append adds the sample (t, v) to the series.
-												Add various metrics

											
										
										
											7 years ago
+								func (s *memSeries) append(t int64, v float64) (success, chunkCreated bool) {
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+									const samplesPerChunk = 120
-												Add tests for GC and chunk truncation

											
										
										
											7 years ago
+									c := s.head()
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
-												Add tests for GC and chunk truncation

											
										
										
											7 years ago
+									if c == nil {
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+										c = s.cut(t)
-												Add various metrics

											
										
										
											7 years ago
+										chunkCreated = true
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+									}
-												wal: parallelize sample processing

											
										
										
											7 years ago
+									numSamples := c.chunk.NumSamples()
-												Ensure near-empty chunks end at correct boundary

We were determining a chunk's end time once it was one quarter full to
compute it so all chunks have uniform number of samples.
This accidentally skipped the case where series started near the end of
a chunk range/block and never reached that threshold. As a result they
got persisted but were continued across the range.

This resulted in corrupted persisted data.

											
										
										
											7 years ago
+									// Out of order sample.
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+									if c.maxTime >= t {
-												Add various metrics

											
										
										
											7 years ago
+										return false, chunkCreated
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+									}
-												Ensure near-empty chunks end at correct boundary

We were determining a chunk's end time once it was one quarter full to
compute it so all chunks have uniform number of samples.
This accidentally skipped the case where series started near the end of
a chunk range/block and never reached that threshold. As a result they
got persisted but were continued across the range.

This resulted in corrupted persisted data.

											
										
										
											7 years ago
+									// If we reach 25% of a chunk's desired sample count, set a definitive time
 									// at which to start the next chunk.
 									// At latest it must happen at the timestamp set when the chunk was cut.
 									if numSamples == samplesPerChunk/4 {
 										s.nextAt = computeChunkEndTime(c.minTime, c.maxTime, s.nextAt)
 									}
 									if t >= s.nextAt {
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+										c = s.cut(t)
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+										chunkCreated = true
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+									}
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+									s.app.Append(t, v)
 									c.maxTime = t
 									s.lastValue = v
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+									s.sampleBuf[0] = s.sampleBuf[1]
 									s.sampleBuf[1] = s.sampleBuf[2]
 									s.sampleBuf[2] = s.sampleBuf[3]
 									s.sampleBuf[3] = sample{t: t, v: v}
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
-												Add various metrics

											
										
										
											7 years ago
+									return true, chunkCreated
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+								}
-												Improve heuristic to spread chunks across block

											
										
										
											8 years ago
+								// computeChunkEndTime estimates the end timestamp based the beginning of a chunk,
 								// its current timestamp and the upper bound up to which we insert data.
 								// It assumes that the time range is 1/4 full.
 								func computeChunkEndTime(start, cur, max int64) int64 {
 									a := (max - start) / ((cur - start + 1) * 4)
 									if a == 0 {
 										return max
 									}
 									return start + (max-start)/a
 								}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+								func (s *memSeries) iterator(id int) chunks.Iterator {
 									c := s.chunk(id)
-												Return nop iterator for invalid chunk references

											
										
										
											7 years ago
+									// TODO(fabxc): Work around! A querier may have retrieved a pointer to a series' chunk,
 									// which got then garbage collected before it got accessed.
 									// We must ensure to not garbage collect as long as any readers still hold a reference.
 									if c == nil {
 										return chunks.NewNopIterator()
 									}
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									if id-s.firstChunkID < len(s.chunks)-1 {
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+										return c.chunk.Iterator()
 									}
-												Refactor WAL into Head and misc improvements

											
										
										
											7 years ago
+									// Serve the last 4 samples for the last chunk from the series buffer
 									// as their compressed bytes may be mutated by added samples.
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+									it := &memSafeIterator{
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+										Iterator: c.chunk.Iterator(),
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+										i:        -1,
-												Add tests for GC and chunk truncation

											
										
										
											7 years ago
+										total:    c.chunk.NumSamples(),
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+										buf:      s.sampleBuf,
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+									}
 									return it
 								}
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+								func (s *memSeries) head() *memChunk {
-												Add tests for GC and chunk truncation

											
										
										
											7 years ago
+									if len(s.chunks) == 0 {
 										return nil
 									}
-												Replace single head chunk per series with memSeries

This adds a memory series holding several chunk to replace
the single head chunk per series so far.
This is necessary for uniform maximum chunk sizes in cases
where some series have higher frequency samples than others.

											
										
										
											8 years ago
+									return s.chunks[len(s.chunks)-1]
 								}
 								type memChunk struct {
 									chunk            chunks.Chunk
 									minTime, maxTime int64
 								}
-												Make concurrent head chunk reads safe, fix misc races

This adds a 4 sample buffer to every head chunk. The XOR
compression scheme may edit bytes in place. The minimum size
of a sample is 2 bits. So keeping the last 4 samples in an in-memory
buffer makes it safe to query the preceeding ones while samples
are added

											
										
										
											8 years ago
+								type memSafeIterator struct {
 									chunks.Iterator
 									i     int
 									total int
 									buf   [4]sample
 								}
 								func (it *memSafeIterator) Next() bool {
 									if it.i+1 >= it.total {
 										return false
 									}
 									it.i++
 									if it.total-it.i > 4 {
 										return it.Iterator.Next()
 									}
 									return true
 								}
 								func (it *memSafeIterator) At() (int64, float64) {
 									if it.total-it.i > 4 {
 										return it.Iterator.At()
 									}
 									s := it.buf[4-(it.total-it.i)]
 									return s.t, s.v
 								}