From d51a5344cd6be0fa058af5e85c4fb42d7737d9ff Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 22 Jan 2024 21:39:24 +0000 Subject: [PATCH] labels: new version de-duplicating strings in SymbolTables The individual strings for label names and values are held in a table, and each Labels value is a run of varint-encoded indexes into that table. When creating new labels, a sync.Mutex is locked around reads and writes. When reading labels, there is no locking because the table of strings used by those labels is immutable. Signed-off-by: Bryan Boreham --- model/labels/labels_dedupelabels.go | 808 ++++++++++++++++++++++++++ model/labels/sharding_dedupelabels.go | 52 ++ 2 files changed, 860 insertions(+) create mode 100644 model/labels/labels_dedupelabels.go create mode 100644 model/labels/sharding_dedupelabels.go diff --git a/model/labels/labels_dedupelabels.go b/model/labels/labels_dedupelabels.go new file mode 100644 index 000000000..db6c56b33 --- /dev/null +++ b/model/labels/labels_dedupelabels.go @@ -0,0 +1,808 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build dedupelabels + +package labels + +import ( + "bytes" + "strings" + "sync" + "unsafe" + + "github.com/cespare/xxhash/v2" + "golang.org/x/exp/slices" +) + +// Labels is implemented by a SymbolTable and string holding name/value +// pairs encoded as indexes into the table in varint encoding. +// Names are in alphabetical order. +type Labels struct { + syms *nameTable + data string +} + +// Split SymbolTable into the part used by Labels and the part used by Builder. Only the latter needs the map. + +// This part is used by Labels. All fields are immutable after construction. +type nameTable struct { + byNum []string // This slice header is never changed, even while we are building the symbol table. + symbolTable *SymbolTable // If we need to use it in a Builder. +} + +// SymbolTable is used to map strings into numbers so they can be packed together. +type SymbolTable struct { + mx sync.Mutex + *nameTable + nextNum int + byName map[string]int +} + +const defaultSymbolTableSize = 1024 + +func NewSymbolTable() *SymbolTable { + t := &SymbolTable{ + nameTable: &nameTable{byNum: make([]string, defaultSymbolTableSize)}, + byName: make(map[string]int, defaultSymbolTableSize), + } + t.nameTable.symbolTable = t + return t +} + +func (t *SymbolTable) Len() int { + t.mx.Lock() + defer t.mx.Unlock() + return len(t.byName) +} + +// ToNum maps a string to an integer, adding the string to the table if it is not already there. +// Note: copies the string before adding, in case the caller passed part of +// a buffer that should not be kept alive by this SymbolTable. +func (t *SymbolTable) ToNum(name string) int { + t.mx.Lock() + defer t.mx.Unlock() + return t.toNumUnlocked(name) +} + +func (t *SymbolTable) toNumUnlocked(name string) int { + if i, found := t.byName[name]; found { + return i + } + i := t.nextNum + if t.nextNum == cap(t.byNum) { + // Name table is full; copy to a new one. Don't touch the existing slice. + oldSlice := t.byNum + t.nameTable = &nameTable{byNum: make([]string, cap(oldSlice)*2), symbolTable: t} + copy(t.nameTable.byNum, oldSlice) + } + name = strings.Clone(name) + t.byNum[i] = name + t.byName[name] = i + t.nextNum++ + return i +} + +func (t *SymbolTable) checkNum(name string) (int, bool) { + t.mx.Lock() + defer t.mx.Unlock() + i, bool := t.byName[name] + return i, bool +} + +// ToName maps an integer to a string. +func (t *nameTable) ToName(num int) string { + return t.byNum[num] +} + +func decodeVarint(data string, index int) (int, int) { + // Fast-path for common case of a single byte, value 0..127. + b := data[index] + index++ + if b < 0x80 { + return int(b), index + } + size := int(b & 0x7F) + for shift := uint(7); ; shift += 7 { + // Just panic if we go of the end of data, since all Labels strings are constructed internally and + // malformed data indicates a bug, or memory corruption. + b := data[index] + index++ + size |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + return size, index +} + +func decodeString(t *nameTable, data string, index int) (string, int) { + var num int + num, index = decodeVarint(data, index) + return t.ToName(num), index +} + +// Bytes returns ls as a byte slice. +// It uses non-printing characters and so should not be used for printing. +func (ls Labels) Bytes(buf []byte) []byte { + b := bytes.NewBuffer(buf[:0]) + for i := 0; i < len(ls.data); { + if i > 0 { + b.WriteByte(seps[0]) + } + var name, value string + name, i = decodeString(ls.syms, ls.data, i) + value, i = decodeString(ls.syms, ls.data, i) + b.WriteString(name) + b.WriteByte(seps[0]) + b.WriteString(value) + } + return b.Bytes() +} + +// IsZero implements yaml.IsZeroer - if we don't have this then 'omitempty' fields are always omitted. +func (ls Labels) IsZero() bool { + return len(ls.data) == 0 +} + +// MatchLabels returns a subset of Labels that matches/does not match with the provided label names based on the 'on' boolean. +// If on is set to true, it returns the subset of labels that match with the provided label names and its inverse when 'on' is set to false. +// TODO: This is only used in printing an error message +func (ls Labels) MatchLabels(on bool, names ...string) Labels { + b := NewBuilder(ls) + if on { + b.Keep(names...) + } else { + b.Del(MetricName) + b.Del(names...) + } + return b.Labels() +} + +// Hash returns a hash value for the label set. +// Note: the result is not guaranteed to be consistent across different runs of Prometheus. +func (ls Labels) Hash() uint64 { + // Use xxhash.Sum64(b) for fast path as it's faster. + b := make([]byte, 0, 1024) + for pos := 0; pos < len(ls.data); { + name, newPos := decodeString(ls.syms, ls.data, pos) + value, newPos := decodeString(ls.syms, ls.data, newPos) + if len(b)+len(name)+len(value)+2 >= cap(b) { + // If labels entry is 1KB+, hash the rest of them via Write(). + h := xxhash.New() + _, _ = h.Write(b) + for pos < len(ls.data) { + name, pos = decodeString(ls.syms, ls.data, pos) + value, pos = decodeString(ls.syms, ls.data, pos) + _, _ = h.WriteString(name) + _, _ = h.Write(seps) + _, _ = h.WriteString(value) + _, _ = h.Write(seps) + } + return h.Sum64() + } + + b = append(b, name...) + b = append(b, seps[0]) + b = append(b, value...) + b = append(b, seps[0]) + pos = newPos + } + return xxhash.Sum64(b) +} + +// HashForLabels returns a hash value for the labels matching the provided names. +// 'names' have to be sorted in ascending order. +func (ls Labels) HashForLabels(b []byte, names ...string) (uint64, []byte) { + b = b[:0] + j := 0 + for i := 0; i < len(ls.data); { + var name, value string + name, i = decodeString(ls.syms, ls.data, i) + value, i = decodeString(ls.syms, ls.data, i) + for j < len(names) && names[j] < name { + j++ + } + if j == len(names) { + break + } + if name == names[j] { + b = append(b, name...) + b = append(b, seps[0]) + b = append(b, value...) + b = append(b, seps[0]) + } + } + + return xxhash.Sum64(b), b +} + +// HashWithoutLabels returns a hash value for all labels except those matching +// the provided names. +// 'names' have to be sorted in ascending order. +func (ls Labels) HashWithoutLabels(b []byte, names ...string) (uint64, []byte) { + b = b[:0] + j := 0 + for i := 0; i < len(ls.data); { + var name, value string + name, i = decodeString(ls.syms, ls.data, i) + value, i = decodeString(ls.syms, ls.data, i) + for j < len(names) && names[j] < name { + j++ + } + if name == MetricName || (j < len(names) && name == names[j]) { + continue + } + b = append(b, name...) + b = append(b, seps[0]) + b = append(b, value...) + b = append(b, seps[0]) + } + return xxhash.Sum64(b), b +} + +// BytesWithLabels is just as Bytes(), but only for labels matching names. +// 'names' have to be sorted in ascending order. +func (ls Labels) BytesWithLabels(buf []byte, names ...string) []byte { + b := bytes.NewBuffer(buf[:0]) + j := 0 + for pos := 0; pos < len(ls.data); { + lName, newPos := decodeString(ls.syms, ls.data, pos) + lValue, newPos := decodeString(ls.syms, ls.data, newPos) + for j < len(names) && names[j] < lName { + j++ + } + if j == len(names) { + break + } + if lName == names[j] { + if b.Len() > 1 { + b.WriteByte(seps[0]) + } + b.WriteString(lName) + b.WriteByte(seps[0]) + b.WriteString(lValue) + } + pos = newPos + } + return b.Bytes() +} + +// BytesWithoutLabels is just as Bytes(), but only for labels not matching names. +// 'names' have to be sorted in ascending order. +func (ls Labels) BytesWithoutLabels(buf []byte, names ...string) []byte { + b := bytes.NewBuffer(buf[:0]) + j := 0 + for pos := 0; pos < len(ls.data); { + lName, newPos := decodeString(ls.syms, ls.data, pos) + lValue, newPos := decodeString(ls.syms, ls.data, newPos) + for j < len(names) && names[j] < lName { + j++ + } + if j == len(names) || lName != names[j] { + if b.Len() > 1 { + b.WriteByte(seps[0]) + } + b.WriteString(lName) + b.WriteByte(seps[0]) + b.WriteString(lValue) + } + pos = newPos + } + return b.Bytes() +} + +// Copy returns a copy of the labels. +func (ls Labels) Copy() Labels { + buf := append([]byte{}, ls.data...) + return Labels{syms: ls.syms, data: yoloString(buf)} +} + +// Get returns the value for the label with the given name. +// Returns an empty string if the label doesn't exist. +func (ls Labels) Get(name string) string { + if name == "" { // Avoid crash in loop if someone asks for "". + return "" // Prometheus does not store blank label names. + } + for i := 0; i < len(ls.data); { + var lName, lValue string + lName, i = decodeString(ls.syms, ls.data, i) + if lName == name { + lValue, _ = decodeString(ls.syms, ls.data, i) + return lValue + } else if lName[0] > name[0] { // Stop looking if we've gone past. + break + } + _, i = decodeVarint(ls.data, i) + } + return "" +} + +// Has returns true if the label with the given name is present. +func (ls Labels) Has(name string) bool { + if name == "" { // Avoid crash in loop if someone asks for "". + return false // Prometheus does not store blank label names. + } + for i := 0; i < len(ls.data); { + var lName string + lName, i = decodeString(ls.syms, ls.data, i) + if lName == name { + return true + } else if lName[0] > name[0] { // Stop looking if we've gone past. + break + } + _, i = decodeVarint(ls.data, i) + } + return false +} + +// HasDuplicateLabelNames returns whether ls has duplicate label names. +// It assumes that the labelset is sorted. +func (ls Labels) HasDuplicateLabelNames() (string, bool) { + prevNum := -1 + for i := 0; i < len(ls.data); { + var lNum int + lNum, i = decodeVarint(ls.data, i) + _, i = decodeVarint(ls.data, i) + if lNum == prevNum { + return ls.syms.ToName(lNum), true + } + prevNum = lNum + } + return "", false +} + +// WithoutEmpty returns the labelset without empty labels. +// May return the same labelset. +func (ls Labels) WithoutEmpty() Labels { + if ls.IsEmpty() { + return ls + } + // Idea: have a constant symbol for blank, then we don't have to look it up. + blank, ok := ls.syms.symbolTable.checkNum("") + if !ok { // Symbol table has no entry for blank - none of the values can be blank. + return ls + } + for pos := 0; pos < len(ls.data); { + _, newPos := decodeVarint(ls.data, pos) + lValue, newPos := decodeVarint(ls.data, newPos) + if lValue != blank { + pos = newPos + continue + } + // Do not copy the slice until it's necessary. + // TODO: could optimise the case where all blanks are at the end. + // Note: we size the new buffer on the assumption there is exactly one blank value. + buf := make([]byte, pos, pos+(len(ls.data)-newPos)) + copy(buf, ls.data[:pos]) // copy the initial non-blank labels + pos = newPos // move past the first blank value + for pos < len(ls.data) { + var newPos int + _, newPos = decodeVarint(ls.data, pos) + lValue, newPos = decodeVarint(ls.data, newPos) + if lValue != blank { + buf = append(buf, ls.data[pos:newPos]...) + } + pos = newPos + } + return Labels{syms: ls.syms, data: yoloString(buf)} + } + return ls +} + +// Equal returns whether the two label sets are equal. +func Equal(a, b Labels) bool { + if a.syms == b.syms { + return a.data == b.data + } + + la, lb := len(a.data), len(b.data) + ia, ib := 0, 0 + for ia < la && ib < lb { + var aValue, bValue string + aValue, ia = decodeString(a.syms, a.data, ia) + bValue, ib = decodeString(b.syms, b.data, ib) + if aValue != bValue { + return false + } + } + if ia != la || ib != lb { + return false + } + return true +} + +// EmptyLabels returns an empty Labels value, for convenience. +func EmptyLabels() Labels { + return Labels{} +} + +func yoloString(b []byte) string { + return *((*string)(unsafe.Pointer(&b))) +} + +// New returns a sorted Labels from the given labels. +// The caller has to guarantee that all label names are unique. +// Note this function is not efficient; should not be used in performance-critical places. +func New(ls ...Label) Labels { + slices.SortFunc(ls, func(a, b Label) int { return strings.Compare(a.Name, b.Name) }) + syms := NewSymbolTable() + var stackSpace [16]int + size, nums := mapLabelsToNumbers(syms, ls, stackSpace[:]) + buf := make([]byte, size) + marshalNumbersToSizedBuffer(nums, buf) + return Labels{syms: syms.nameTable, data: yoloString(buf)} +} + +// FromStrings creates new labels from pairs of strings. +func FromStrings(ss ...string) Labels { + if len(ss)%2 != 0 { + panic("invalid number of strings") + } + ls := make([]Label, 0, len(ss)/2) + for i := 0; i < len(ss); i += 2 { + ls = append(ls, Label{Name: ss[i], Value: ss[i+1]}) + } + + return New(ls...) +} + +// Compare compares the two label sets. +// The result will be 0 if a==b, <0 if a < b, and >0 if a > b. +func Compare(a, b Labels) int { + la, lb := len(a.data), len(b.data) + ia, ib := 0, 0 + for ia < la && ib < lb { + var aName, bName string + aName, ia = decodeString(a.syms, a.data, ia) + bName, ib = decodeString(b.syms, b.data, ib) + if aName != bName { + if aName < bName { + return -1 + } + return 1 + } + var aValue, bValue string + aValue, ia = decodeString(a.syms, a.data, ia) + bValue, ib = decodeString(b.syms, b.data, ib) + if aValue != bValue { + if aValue < bValue { + return -1 + } + return 1 + } + } + // If all labels so far were in common, the set with fewer labels comes first. + return (la - ia) - (lb - ib) +} + +// Copy labels from b on top of whatever was in ls previously, reusing memory or expanding if needed. +func (ls *Labels) CopyFrom(b Labels) { + *ls = b // Straightforward memberwise copy is all we need. +} + +// IsEmpty returns true if ls represents an empty set of labels. +func (ls Labels) IsEmpty() bool { + return len(ls.data) == 0 +} + +// Len returns the number of labels; it is relatively slow. +func (ls Labels) Len() int { + count := 0 + for i := 0; i < len(ls.data); { + _, i = decodeVarint(ls.data, i) + _, i = decodeVarint(ls.data, i) + count++ + } + return count +} + +// Range calls f on each label. +func (ls Labels) Range(f func(l Label)) { + for i := 0; i < len(ls.data); { + var lName, lValue string + lName, i = decodeString(ls.syms, ls.data, i) + lValue, i = decodeString(ls.syms, ls.data, i) + f(Label{Name: lName, Value: lValue}) + } +} + +// Validate calls f on each label. If f returns a non-nil error, then it returns that error cancelling the iteration. +func (ls Labels) Validate(f func(l Label) error) error { + for i := 0; i < len(ls.data); { + var lName, lValue string + lName, i = decodeString(ls.syms, ls.data, i) + lValue, i = decodeString(ls.syms, ls.data, i) + err := f(Label{Name: lName, Value: lValue}) + if err != nil { + return err + } + } + return nil +} + +// InternStrings calls intern on every string value inside ls, replacing them with what it returns. +func (ls *Labels) InternStrings(intern func(string) string) { + // TODO: remove these calls as there is nothing to do. +} + +// ReleaseStrings calls release on every string value inside ls. +func (ls Labels) ReleaseStrings(release func(string)) { + // TODO: remove these calls as there is nothing to do. +} + +// DropMetricName returns Labels with "__name__" removed. +func (ls Labels) DropMetricName() Labels { + for i := 0; i < len(ls.data); { + lName, i2 := decodeString(ls.syms, ls.data, i) + _, i2 = decodeVarint(ls.data, i2) + if lName == MetricName { + if i == 0 { // Make common case fast with no allocations. + ls.data = ls.data[i2:] + } else { + ls.data = ls.data[:i] + ls.data[i2:] + } + break + } else if lName[0] > MetricName[0] { // Stop looking if we've gone past. + break + } + i = i2 + } + return ls +} + +// Builder allows modifying Labels. +type Builder struct { + syms *SymbolTable + nums []int + base Labels + del []string + add []Label +} + +// NewBuilderWithSymbolTable returns a new LabelsBuilder not based on any labels, but with the SymbolTable. +func NewBuilderWithSymbolTable(s *SymbolTable) *Builder { + return &Builder{ + syms: s, + } +} + +// Reset clears all current state for the builder. +func (b *Builder) Reset(base Labels) { + if base.syms != nil { // If base has a symbol table, use that. + b.syms = base.syms.symbolTable + } else if b.syms == nil { // Or continue using previous symbol table in builder. + b.syms = NewSymbolTable() // Don't do this in performance-sensitive code. + } + + b.base = base + b.del = b.del[:0] + b.add = b.add[:0] + base.Range(func(l Label) { + if l.Value == "" { + b.del = append(b.del, l.Name) + } + }) +} + +// Labels returns the labels from the builder. +// If no modifications were made, the original labels are returned. +func (b *Builder) Labels() Labels { + if len(b.del) == 0 && len(b.add) == 0 { + return b.base + } + + slices.SortFunc(b.add, func(a, b Label) int { return strings.Compare(a.Name, b.Name) }) + slices.Sort(b.del) + a, d, newSize := 0, 0, 0 + + newSize, b.nums = mapLabelsToNumbers(b.syms, b.add, b.nums) + bufSize := len(b.base.data) + newSize + buf := make([]byte, 0, bufSize) + for pos := 0; pos < len(b.base.data); { + oldPos := pos + var lName string + lName, pos = decodeString(b.base.syms, b.base.data, pos) + _, pos = decodeVarint(b.base.data, pos) + for d < len(b.del) && b.del[d] < lName { + d++ + } + if d < len(b.del) && b.del[d] == lName { + continue // This label has been deleted. + } + for ; a < len(b.add) && b.add[a].Name < lName; a++ { + buf = appendLabelTo(b.nums[a*2], b.nums[a*2+1], buf) // Insert label that was not in the base set. + } + if a < len(b.add) && b.add[a].Name == lName { + buf = appendLabelTo(b.nums[a*2], b.nums[a*2+1], buf) + a++ + continue // This label has been replaced. + } + buf = append(buf, b.base.data[oldPos:pos]...) // If base had a symbol-table we are using it, so we don't need to look up these symbols. + } + // We have come to the end of the base set; add any remaining labels. + for ; a < len(b.add); a++ { + buf = appendLabelTo(b.nums[a*2], b.nums[a*2+1], buf) + } + return Labels{syms: b.syms.nameTable, data: yoloString(buf)} +} + +func marshalNumbersToSizedBuffer(nums []int, data []byte) int { + i := len(data) + for index := len(nums) - 1; index >= 0; index-- { + i = encodeVarint(data, i, nums[index]) + } + return len(data) - i +} + +func sizeVarint(x uint64) (n int) { + // Most common case first + if x < 1<<7 { + return 1 + } + if x >= 1<<56 { + return 9 + } + if x >= 1<<28 { + x >>= 28 + n = 4 + } + if x >= 1<<14 { + x >>= 14 + n += 2 + } + if x >= 1<<7 { + n++ + } + return n + 1 +} + +func encodeVarintSlow(data []byte, offset int, v uint64) int { + offset -= sizeVarint(v) + base := offset + for v >= 1<<7 { + data[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + data[offset] = uint8(v) + return base +} + +// Special code for the common case that a value is less than 128 +func encodeVarint(data []byte, offset, v int) int { + if v < 1<<7 { + offset-- + data[offset] = uint8(v) + return offset + } + return encodeVarintSlow(data, offset, uint64(v)) +} + +// Map all the strings in lbls to the symbol table; return the total size required to hold them and all the individual mappings. +func mapLabelsToNumbers(t *SymbolTable, lbls []Label, buf []int) (totalSize int, nums []int) { + nums = buf[:0] + t.mx.Lock() + defer t.mx.Unlock() + // we just encode name/value/name/value, without any extra tags or length bytes + for _, m := range lbls { + // strings are encoded as a single varint, the index into the symbol table. + i := t.toNumUnlocked(m.Name) + nums = append(nums, i) + totalSize += sizeVarint(uint64(i)) + i = t.toNumUnlocked(m.Value) + nums = append(nums, i) + totalSize += sizeVarint(uint64(i)) + } + return totalSize, nums +} + +func appendLabelTo(nameNum, valueNum int, buf []byte) []byte { + size := sizeVarint(uint64(nameNum)) + sizeVarint(uint64(valueNum)) + sizeRequired := len(buf) + size + if cap(buf) >= sizeRequired { + buf = buf[:sizeRequired] + } else { + bufSize := cap(buf) + // Double size of buffer each time it needs to grow, to amortise copying cost. + for bufSize < sizeRequired { + bufSize = bufSize*2 + 1 + } + newBuf := make([]byte, sizeRequired, bufSize) + copy(newBuf, buf) + buf = newBuf + } + i := sizeRequired + i = encodeVarint(buf, i, valueNum) + i = encodeVarint(buf, i, nameNum) + return buf +} + +// ScratchBuilder allows efficient construction of a Labels from scratch. +type ScratchBuilder struct { + syms *SymbolTable + nums []int + add []Label + output Labels + overwriteBuffer []byte +} + +// NewScratchBuilder creates a ScratchBuilder initialized for Labels with n entries. +// Warning: expensive; don't call in tight loops. +func NewScratchBuilder(n int) ScratchBuilder { + return ScratchBuilder{syms: NewSymbolTable(), add: make([]Label, 0, n)} +} + +// NewScratchBuilderWithSymbolTable creates a ScratchBuilder initialized for Labels with n entries. +func NewScratchBuilderWithSymbolTable(s *SymbolTable, n int) ScratchBuilder { + return ScratchBuilder{syms: s, add: make([]Label, 0, n)} +} + +func (b *ScratchBuilder) SetSymbolTable(s *SymbolTable) { + b.syms = s +} + +func (b *ScratchBuilder) Reset() { + b.add = b.add[:0] + b.output = EmptyLabels() +} + +// Add a name/value pair. +// Note if you Add the same name twice you will get a duplicate label, which is invalid. +func (b *ScratchBuilder) Add(name, value string) { + b.add = append(b.add, Label{Name: name, Value: value}) +} + +// Add a name/value pair, using []byte instead of string to reduce memory allocations. +// The values must remain live until Labels() is called. +func (b *ScratchBuilder) UnsafeAddBytes(name, value []byte) { + b.add = append(b.add, Label{Name: yoloString(name), Value: yoloString(value)}) +} + +// Sort the labels added so far by name. +func (b *ScratchBuilder) Sort() { + slices.SortFunc(b.add, func(a, b Label) int { return strings.Compare(a.Name, b.Name) }) +} + +// Assign is for when you already have a Labels which you want this ScratchBuilder to return. +func (b *ScratchBuilder) Assign(l Labels) { + b.output = l +} + +// Labels returns the name/value pairs added as a Labels object. Calling Add() after Labels() has no effect. +// Note: if you want them sorted, call Sort() first. +func (b *ScratchBuilder) Labels() Labels { + if b.output.IsEmpty() { + var size int + size, b.nums = mapLabelsToNumbers(b.syms, b.add, b.nums) + buf := make([]byte, size) + marshalNumbersToSizedBuffer(b.nums, buf) + b.output = Labels{syms: b.syms.nameTable, data: yoloString(buf)} + } + return b.output +} + +// Write the newly-built Labels out to ls, reusing an internal buffer. +// Callers must ensure that there are no other references to ls, or any strings fetched from it. +func (b *ScratchBuilder) Overwrite(ls *Labels) { + var size int + size, b.nums = mapLabelsToNumbers(b.syms, b.add, b.nums) + if size <= cap(b.overwriteBuffer) { + b.overwriteBuffer = b.overwriteBuffer[:size] + } else { + b.overwriteBuffer = make([]byte, size) + } + marshalNumbersToSizedBuffer(b.nums, b.overwriteBuffer) + ls.syms = b.syms.nameTable + ls.data = yoloString(b.overwriteBuffer) +} diff --git a/model/labels/sharding_dedupelabels.go b/model/labels/sharding_dedupelabels.go new file mode 100644 index 000000000..a4ff28512 --- /dev/null +++ b/model/labels/sharding_dedupelabels.go @@ -0,0 +1,52 @@ +// Copyright 2020 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build dedupelabels + +package labels + +import ( + "github.com/cespare/xxhash/v2" +) + +// StableHash is a labels hashing implementation which is guaranteed to not change over time. +// This function should be used whenever labels hashing backward compatibility must be guaranteed. +func StableHash(ls Labels) uint64 { + // Use xxhash.Sum64(b) for fast path as it's faster. + b := make([]byte, 0, 1024) + for pos := 0; pos < len(ls.data); { + name, newPos := decodeString(ls.syms, ls.data, pos) + value, newPos := decodeString(ls.syms, ls.data, newPos) + if len(b)+len(name)+len(value)+2 >= cap(b) { + // If labels entry is 1KB+, hash the rest of them via Write(). + h := xxhash.New() + _, _ = h.Write(b) + for pos < len(ls.data) { + name, pos = decodeString(ls.syms, ls.data, pos) + value, pos = decodeString(ls.syms, ls.data, pos) + _, _ = h.WriteString(name) + _, _ = h.Write(seps) + _, _ = h.WriteString(value) + _, _ = h.Write(seps) + } + return h.Sum64() + } + + b = append(b, name...) + b = append(b, seps[0]) + b = append(b, value...) + b = append(b, seps[0]) + pos = newPos + } + return xxhash.Sum64(b) +}