prometheus/storage/local/doubledelta.go

// Copyright 2014 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package local

import (
	"encoding/binary"
	"fmt"
	"io"
	"math"
	"sort"

	clientmodel "github.com/prometheus/client_golang/model"

	"github.com/prometheus/prometheus/storage/metric"
)

// The 37-byte header of a delta-encoded chunk looks like:
//
// - used buf bytes:           2 bytes
// - time double-delta bytes:  1 bytes
// - value double-delta bytes: 1 bytes
// - is integer:               1 byte
// - base time:                8 bytes
// - base value:               8 bytes
// - base time delta:          8 bytes
// - base value delta:         8 bytes
const (
	doubleDeltaHeaderBytes = 37

	doubleDeltaHeaderBufLenOffset         = 0
	doubleDeltaHeaderTimeBytesOffset      = 2
	doubleDeltaHeaderValueBytesOffset     = 3
	doubleDeltaHeaderIsIntOffset          = 4
	doubleDeltaHeaderBaseTimeOffset       = 5
	doubleDeltaHeaderBaseValueOffset      = 13
	doubleDeltaHeaderBaseTimeDeltaOffset  = 21
	doubleDeltaHeaderBaseValueDeltaOffset = 29
)

// A doubleDeltaEncodedChunk adaptively stores sample timestamps and values with
// a double-delta encoding of various types (int, float) and bit widths. A base
// value and timestamp and a base delta for each is saved in the header. The
// payload consists of double-deltas, i.e. deviations from the values and
// timestamps calculated by applying the base value and time and the base deltas.
// However, once 8 bytes would be needed to encode a double-delta value, a
// fall-back to the absolute numbers happens (so that timestamps are saved
// directly as int64 and values as float64).
// doubleDeltaEncodedChunk implements the chunk interface.
type doubleDeltaEncodedChunk []byte

// newDoubleDeltaEncodedChunk returns a newly allocated doubleDeltaEncodedChunk.
func newDoubleDeltaEncodedChunk(tb, vb deltaBytes, isInt bool, length int) *doubleDeltaEncodedChunk {
	if tb < 1 {
		panic("need at least 1 time delta byte")
	}
	if length < doubleDeltaHeaderBytes+16 {
		panic(fmt.Errorf(
			"chunk length %d bytes is insufficient, need at least %d",
			length, doubleDeltaHeaderBytes+16,
		))
	}
	c := make(doubleDeltaEncodedChunk, doubleDeltaHeaderIsIntOffset+1, length)

	c[doubleDeltaHeaderTimeBytesOffset] = byte(tb)
	c[doubleDeltaHeaderValueBytesOffset] = byte(vb)
	if vb < d8 && isInt { // Only use int for fewer than 8 value double-delta bytes.
		c[doubleDeltaHeaderIsIntOffset] = 1
	} else {
		c[doubleDeltaHeaderIsIntOffset] = 0
	}
	return &c
}

// add implements chunk.
func (c doubleDeltaEncodedChunk) add(s *metric.SamplePair) []chunk {
	if c.len() == 0 {
		return c.addFirstSample(s)
	}

	tb := c.timeBytes()
	vb := c.valueBytes()

	if c.len() == 1 {
		return c.addSecondSample(s, tb, vb)
	}

	remainingBytes := cap(c) - len(c)
	sampleSize := c.sampleSize()

	// Do we generally have space for another sample in this chunk? If not,
	// overflow into a new one.
	if remainingBytes < sampleSize {
		overflowChunks := newChunk().add(s)
		return []chunk{&c, overflowChunks[0]}
	}

	projectedTime := c.baseTime() + clientmodel.Timestamp(c.len())*c.baseTimeDelta()
	ddt := s.Timestamp - projectedTime

	projectedValue := c.baseValue() + clientmodel.SampleValue(c.len())*c.baseValueDelta()
	ddv := s.Value - projectedValue

	ntb, nvb, nInt := tb, vb, c.isInt()
	// If the new sample is incompatible with the current encoding, reencode the
	// existing chunk data into new chunk(s).
	if c.isInt() && !isInt64(ddv) {
		// int->float.
		nvb = d4
		nInt = false
	} else if !c.isInt() && vb == d4 && projectedValue+clientmodel.SampleValue(float32(ddv)) != s.Value {
		// float32->float64.
		nvb = d8
	} else {
		if tb < d8 {
			// Maybe more bytes for timestamp.
			ntb = max(tb, bytesNeededForSignedTimestampDelta(ddt))
		}
		if c.isInt() && vb < d8 {
			// Maybe more bytes for sample value.
			nvb = max(vb, bytesNeededForIntegerSampleValueDelta(ddv))
		}
	}
	if tb != ntb || vb != nvb || c.isInt() != nInt {
		if len(c)*2 < cap(c) {
			return transcodeAndAdd(newDoubleDeltaEncodedChunk(ntb, nvb, nInt, cap(c)), &c, s)
		}
		// Chunk is already half full. Better create a new one and save the transcoding efforts.
		overflowChunks := newChunk().add(s)
		return []chunk{&c, overflowChunks[0]}
	}

	offset := len(c)
	c = c[:offset+sampleSize]

	switch tb {
	case d1:
		c[offset] = byte(ddt)
	case d2:
		binary.LittleEndian.PutUint16(c[offset:], uint16(ddt))
	case d4:
		binary.LittleEndian.PutUint32(c[offset:], uint32(ddt))
	case d8:
		// Store the absolute value (no delta) in case of d8.
		binary.LittleEndian.PutUint64(c[offset:], uint64(s.Timestamp))
	default:
		panic("invalid number of bytes for time delta")
	}

	offset += int(tb)

	if c.isInt() {
		switch vb {
		case d0:
			// No-op. Constant delta is stored as base value.
		case d1:
			c[offset] = byte(ddv)
		case d2:
			binary.LittleEndian.PutUint16(c[offset:], uint16(ddv))
		case d4:
			binary.LittleEndian.PutUint32(c[offset:], uint32(ddv))
		// d8 must not happen. Those samples are encoded as float64.
		default:
			panic("invalid number of bytes for integer delta")
		}
	} else {
		switch vb {
		case d4:
			binary.LittleEndian.PutUint32(c[offset:], math.Float32bits(float32(ddv)))
		case d8:
			// Store the absolute value (no delta) in case of d8.
			binary.LittleEndian.PutUint64(c[offset:], math.Float64bits(float64(s.Value)))
		default:
			panic("invalid number of bytes for floating point delta")
		}
	}
	return []chunk{&c}
}

// clone implements chunk.
func (c doubleDeltaEncodedChunk) clone() chunk {
	clone := make(doubleDeltaEncodedChunk, len(c), cap(c))
	copy(clone, c)
	return &clone
}

// firstTime implements chunk.
func (c doubleDeltaEncodedChunk) firstTime() clientmodel.Timestamp {
	return c.baseTime()
}

// newIterator implements chunk.
func (c *doubleDeltaEncodedChunk) newIterator() chunkIterator {
	return &doubleDeltaEncodedChunkIterator{
		c:      *c,
		len:    c.len(),
		baseT:  c.baseTime(),
		baseΔT: c.baseTimeDelta(),
		baseV:  c.baseValue(),
		baseΔV: c.baseValueDelta(),
		tBytes: c.timeBytes(),
		vBytes: c.valueBytes(),
		isInt:  c.isInt(),
	}
}

// marshal implements chunk.
func (c doubleDeltaEncodedChunk) marshal(w io.Writer) error {
	if len(c) > math.MaxUint16 {
		panic("chunk buffer length would overflow a 16 bit uint.")
	}
	binary.LittleEndian.PutUint16(c[doubleDeltaHeaderBufLenOffset:], uint16(len(c)))

	n, err := w.Write(c[:cap(c)])
	if err != nil {
		return err
	}
	if n != cap(c) {
		return fmt.Errorf("wanted to write %d bytes, wrote %d", len(c), n)
	}
	return nil
}

// unmarshal implements chunk.
func (c *doubleDeltaEncodedChunk) unmarshal(r io.Reader) error {
	*c = (*c)[:cap(*c)]
	if _, err := io.ReadFull(r, *c); err != nil {
		return err
	}
	*c = (*c)[:binary.LittleEndian.Uint16((*c)[doubleDeltaHeaderBufLenOffset:])]
	return nil
}

// unmarshalFromBuf implements chunk.
func (c *doubleDeltaEncodedChunk) unmarshalFromBuf(buf []byte) {
	*c = (*c)[:cap(*c)]
	copy(*c, buf)
	*c = (*c)[:binary.LittleEndian.Uint16((*c)[doubleDeltaHeaderBufLenOffset:])]
}

// encoding implements chunk.
func (c doubleDeltaEncodedChunk) encoding() chunkEncoding { return doubleDelta }

func (c doubleDeltaEncodedChunk) baseTime() clientmodel.Timestamp {
	return clientmodel.Timestamp(
		binary.LittleEndian.Uint64(
			c[doubleDeltaHeaderBaseTimeOffset:],
		),
	)
}

func (c doubleDeltaEncodedChunk) baseValue() clientmodel.SampleValue {
	return clientmodel.SampleValue(
		math.Float64frombits(
			binary.LittleEndian.Uint64(
				c[doubleDeltaHeaderBaseValueOffset:],
			),
		),
	)
}

func (c doubleDeltaEncodedChunk) baseTimeDelta() clientmodel.Timestamp {
	if len(c) < doubleDeltaHeaderBaseTimeDeltaOffset+8 {
		return 0
	}
	return clientmodel.Timestamp(
		binary.LittleEndian.Uint64(
			c[doubleDeltaHeaderBaseTimeDeltaOffset:],
		),
	)
}

func (c doubleDeltaEncodedChunk) baseValueDelta() clientmodel.SampleValue {
	if len(c) < doubleDeltaHeaderBaseValueDeltaOffset+8 {
		return 0
	}
	return clientmodel.SampleValue(
		math.Float64frombits(
			binary.LittleEndian.Uint64(
				c[doubleDeltaHeaderBaseValueDeltaOffset:],
			),
		),
	)
}

func (c doubleDeltaEncodedChunk) timeBytes() deltaBytes {
	return deltaBytes(c[doubleDeltaHeaderTimeBytesOffset])
}

func (c doubleDeltaEncodedChunk) valueBytes() deltaBytes {
	return deltaBytes(c[doubleDeltaHeaderValueBytesOffset])
}

func (c doubleDeltaEncodedChunk) sampleSize() int {
	return int(c.timeBytes() + c.valueBytes())
}

func (c doubleDeltaEncodedChunk) len() int {
	if len(c) <= doubleDeltaHeaderIsIntOffset+1 {
		return 0
	}
	if len(c) <= doubleDeltaHeaderBaseValueOffset+8 {
		return 1
	}
	return (len(c)-doubleDeltaHeaderBytes)/c.sampleSize() + 2
}

func (c doubleDeltaEncodedChunk) isInt() bool {
	return c[doubleDeltaHeaderIsIntOffset] == 1
}

// addFirstSample is a helper method only used by c.add(). It adds timestamp and
// value as base time and value.
func (c doubleDeltaEncodedChunk) addFirstSample(s *metric.SamplePair) []chunk {
	c = c[:doubleDeltaHeaderBaseValueOffset+8]
	binary.LittleEndian.PutUint64(
		c[doubleDeltaHeaderBaseTimeOffset:],
		uint64(s.Timestamp),
	)
	binary.LittleEndian.PutUint64(
		c[doubleDeltaHeaderBaseValueOffset:],
		math.Float64bits(float64(s.Value)),
	)
	return []chunk{&c}
}

// addSecondSample is a helper method only used by c.add(). It calculates the
// base delta from the provided sample and adds it to the chunk.
func (c doubleDeltaEncodedChunk) addSecondSample(s *metric.SamplePair, tb, vb deltaBytes) []chunk {
	baseTimeDelta := s.Timestamp - c.baseTime()
	if baseTimeDelta < 0 {
		// TODO(beorn7): We ignore this irregular case for now. Once
		// https://github.com/prometheus/prometheus/issues/481 is
		// fixed, we should panic here instead.
		return []chunk{&c}
	}
	c = c[:doubleDeltaHeaderBytes]
	if tb >= d8 || bytesNeededForUnsignedTimestampDelta(baseTimeDelta) >= d8 {
		// If already the base delta needs d8 (or we are at d8
		// already, anyway), we better encode this timestamp
		// directly rather than as a delta and switch everything
		// to d8.
		c[doubleDeltaHeaderTimeBytesOffset] = byte(d8)
		binary.LittleEndian.PutUint64(
			c[doubleDeltaHeaderBaseTimeDeltaOffset:],
			uint64(s.Timestamp),
		)
	} else {
		binary.LittleEndian.PutUint64(
			c[doubleDeltaHeaderBaseTimeDeltaOffset:],
			uint64(baseTimeDelta),
		)
	}
	baseValue := c.baseValue()
	baseValueDelta := s.Value - baseValue
	if vb >= d8 || baseValue+baseValueDelta != s.Value {
		// If we can't reproduce the original sample value (or
		// if we are at d8 already, anyway), we better encode
		// this value directly rather than as a delta and switch
		// everything to d8.
		c[doubleDeltaHeaderValueBytesOffset] = byte(d8)
		c[doubleDeltaHeaderIsIntOffset] = 0
		binary.LittleEndian.PutUint64(
			c[doubleDeltaHeaderBaseValueDeltaOffset:],
			math.Float64bits(float64(s.Value)),
		)
	} else {
		binary.LittleEndian.PutUint64(
			c[doubleDeltaHeaderBaseValueDeltaOffset:],
			math.Float64bits(float64(baseValueDelta)),
		)
	}
	return []chunk{&c}
}

// doubleDeltaEncodedChunkIterator implements chunkIterator.
type doubleDeltaEncodedChunkIterator struct {
	c              doubleDeltaEncodedChunk
	len            int
	baseT, baseΔT  clientmodel.Timestamp
	baseV, baseΔV  clientmodel.SampleValue
	tBytes, vBytes deltaBytes
	isInt          bool
}

// length implements chunkIterator.
func (it *doubleDeltaEncodedChunkIterator) length() int { return it.len }

// valueAtTime implements chunkIterator.
func (it *doubleDeltaEncodedChunkIterator) valueAtTime(t clientmodel.Timestamp) metric.Values {
	i := sort.Search(it.len, func(i int) bool {
		return !it.timestampAtIndex(i).Before(t)
	})

	switch i {
	case 0:
		return metric.Values{metric.SamplePair{
			Timestamp: it.timestampAtIndex(0),
			Value:     it.sampleValueAtIndex(0),
		}}
	case it.len:
		return metric.Values{metric.SamplePair{
			Timestamp: it.timestampAtIndex(it.len - 1),
			Value:     it.sampleValueAtIndex(it.len - 1),
		}}
	default:
		ts := it.timestampAtIndex(i)
		if ts.Equal(t) {
			return metric.Values{metric.SamplePair{
				Timestamp: ts,
				Value:     it.sampleValueAtIndex(i),
			}}
		}
		return metric.Values{
			metric.SamplePair{
				Timestamp: it.timestampAtIndex(i - 1),
				Value:     it.sampleValueAtIndex(i - 1),
			},
			metric.SamplePair{
				Timestamp: ts,
				Value:     it.sampleValueAtIndex(i),
			},
		}
	}
}

// rangeValues implements chunkIterator.
func (it *doubleDeltaEncodedChunkIterator) rangeValues(in metric.Interval) metric.Values {
	oldest := sort.Search(it.len, func(i int) bool {
		return !it.timestampAtIndex(i).Before(in.OldestInclusive)
	})

	newest := sort.Search(it.len, func(i int) bool {
		return it.timestampAtIndex(i).After(in.NewestInclusive)
	})

	if oldest == it.len {
		return nil
	}

	result := make(metric.Values, 0, newest-oldest)
	for i := oldest; i < newest; i++ {
		result = append(result, metric.SamplePair{
			Timestamp: it.timestampAtIndex(i),
			Value:     it.sampleValueAtIndex(i),
		})
	}
	return result
}

// contains implements chunkIterator.
func (it *doubleDeltaEncodedChunkIterator) contains(t clientmodel.Timestamp) bool {
	return !t.Before(it.baseT) && !t.After(it.timestampAtIndex(it.len-1))
}

// values implements chunkIterator.
func (it *doubleDeltaEncodedChunkIterator) values() <-chan *metric.SamplePair {
	valuesChan := make(chan *metric.SamplePair)
	go func() {
		for i := 0; i < it.len; i++ {
			valuesChan <- &metric.SamplePair{
				Timestamp: it.timestampAtIndex(i),
				Value:     it.sampleValueAtIndex(i),
			}
		}
		close(valuesChan)
	}()
	return valuesChan
}

// timestampAtIndex implements chunkIterator.
func (it *doubleDeltaEncodedChunkIterator) timestampAtIndex(idx int) clientmodel.Timestamp {
	if idx == 0 {
		return it.baseT
	}
	if idx == 1 {
		// If time bytes are at d8, the time is saved directly rather
		// than as a difference.
		if it.tBytes == d8 {
			return it.baseΔT
		}
		return it.baseT + it.baseΔT
	}

	offset := doubleDeltaHeaderBytes + (idx-2)*int(it.tBytes+it.vBytes)

	switch it.tBytes {
	case d1:
		return it.baseT +
			clientmodel.Timestamp(idx)*it.baseΔT +
			clientmodel.Timestamp(int8(it.c[offset]))
	case d2:
		return it.baseT +
			clientmodel.Timestamp(idx)*it.baseΔT +
			clientmodel.Timestamp(int16(binary.LittleEndian.Uint16(it.c[offset:])))
	case d4:
		return it.baseT +
			clientmodel.Timestamp(idx)*it.baseΔT +
			clientmodel.Timestamp(int32(binary.LittleEndian.Uint32(it.c[offset:])))
	case d8:
		// Take absolute value for d8.
		return clientmodel.Timestamp(binary.LittleEndian.Uint64(it.c[offset:]))
	default:
		panic("Invalid number of bytes for time delta")
	}
}

// lastTimestamp implements chunkIterator.
func (it *doubleDeltaEncodedChunkIterator) lastTimestamp() clientmodel.Timestamp {
	return it.timestampAtIndex(it.len - 1)
}

// sampleValueAtIndex implements chunkIterator.
func (it *doubleDeltaEncodedChunkIterator) sampleValueAtIndex(idx int) clientmodel.SampleValue {
	if idx == 0 {
		return it.baseV
	}
	if idx == 1 {
		// If value bytes are at d8, the value is saved directly rather
		// than as a difference.
		if it.vBytes == d8 {
			return it.baseΔV
		}
		return it.baseV + it.baseΔV
	}

	offset := doubleDeltaHeaderBytes + (idx-2)*int(it.tBytes+it.vBytes) + int(it.tBytes)

	if it.isInt {
		switch it.vBytes {
		case d0:
			return it.baseV +
				clientmodel.SampleValue(idx)*it.baseΔV
		case d1:
			return it.baseV +
				clientmodel.SampleValue(idx)*it.baseΔV +
				clientmodel.SampleValue(int8(it.c[offset]))
		case d2:
			return it.baseV +
				clientmodel.SampleValue(idx)*it.baseΔV +
				clientmodel.SampleValue(int16(binary.LittleEndian.Uint16(it.c[offset:])))
		case d4:
			return it.baseV +
				clientmodel.SampleValue(idx)*it.baseΔV +
				clientmodel.SampleValue(int32(binary.LittleEndian.Uint32(it.c[offset:])))
		// No d8 for ints.
		default:
			panic("Invalid number of bytes for integer delta")
		}
	} else {
		switch it.vBytes {
		case d4:
			return it.baseV +
				clientmodel.SampleValue(idx)*it.baseΔV +
				clientmodel.SampleValue(math.Float32frombits(binary.LittleEndian.Uint32(it.c[offset:])))
		case d8:
			// Take absolute value for d8.
			return clientmodel.SampleValue(math.Float64frombits(binary.LittleEndian.Uint64(it.c[offset:])))
		default:
			panic("Invalid number of bytes for floating point delta")
		}
	}
}

// lastSampleValue implements chunkIterator.
func (it *doubleDeltaEncodedChunkIterator) lastSampleValue() clientmodel.SampleValue {
	return it.sampleValueAtIndex(it.len - 1)
}