[ENHANCEMENT] TSDB: Improve calculation of space used by labels (#13880)

* [ENHANCEMENT] TSDB: Improve calculation of space used by labels

The labels for each series in the Head take up some some space in the
Postings index, but far more space in the `memSeries` structure.

Instead of having the Postings index calculate this overhead, which is
a layering violation, have the caller pass in a function to do it.

Provide three implementations of this function for the three Labels
versions.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
pull/15679/head
Bryan Boreham 2024-12-16 09:42:52 +00:00 committed by GitHub
parent 4d93f4217f
commit ac4f8a5e23
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 24 additions and 6 deletions

View File

@ -19,6 +19,7 @@ import (
"bytes"
"slices"
"strings"
"unsafe"
"github.com/cespare/xxhash/v2"
)
@ -488,3 +489,8 @@ func (b *ScratchBuilder) Labels() Labels {
func (b *ScratchBuilder) Overwrite(ls *Labels) {
*ls = append((*ls)[:0], b.add...)
}
// SizeOfLabels returns the approximate space required for n copies of a label.
func SizeOfLabels(name, value string, n uint64) uint64 {
return (uint64(len(name)) + uint64(unsafe.Sizeof(name)) + uint64(len(value)) + uint64(unsafe.Sizeof(value))) * n
}

View File

@ -815,3 +815,8 @@ func (b *ScratchBuilder) Overwrite(ls *Labels) {
ls.syms = b.syms.nameTable
ls.data = yoloString(b.overwriteBuffer)
}
// SizeOfLabels returns the approximate space required for n copies of a label.
func SizeOfLabels(name, value string, n uint64) uint64 {
return uint64(len(name)+len(value)) + n*4 // Assuming most symbol-table entries are 2 bytes long.
}

View File

@ -691,3 +691,8 @@ func NewScratchBuilderWithSymbolTable(_ *SymbolTable, n int) ScratchBuilder {
func (b *ScratchBuilder) SetSymbolTable(_ *SymbolTable) {
// no-op
}
// SizeOfLabels returns the approximate space required for n copies of a label.
func SizeOfLabels(name, value string, n uint64) uint64 {
return uint64(labelSize(&Label{Name: name, Value: value})) * n
}

View File

@ -1048,7 +1048,7 @@ func (h *Head) PostingsCardinalityStats(statsByLabelName string, limit int) *ind
return h.cardinalityCache
}
h.cardinalityCacheKey = cacheKey
h.cardinalityCache = h.postings.Stats(statsByLabelName, limit)
h.cardinalityCache = h.postings.Stats(statsByLabelName, limit, labels.SizeOfLabels)
h.lastPostingsStatsCall = time.Duration(time.Now().Unix()) * time.Second
return h.cardinalityCache

View File

@ -190,7 +190,8 @@ type PostingsStats struct {
}
// Stats calculates the cardinality statistics from postings.
func (p *MemPostings) Stats(label string, limit int) *PostingsStats {
// Caller can pass in a function which computes the space required for n series with a given label.
func (p *MemPostings) Stats(label string, limit int, labelSizeFunc func(string, string, uint64) uint64) *PostingsStats {
var size uint64
p.mtx.RLock()
@ -218,7 +219,7 @@ func (p *MemPostings) Stats(label string, limit int) *PostingsStats {
}
seriesCnt := uint64(len(values))
labelValuePairs.push(Stat{Name: n + "=" + name, Count: seriesCnt})
size += uint64(len(name)) * seriesCnt
size += labelSizeFunc(n, name, seriesCnt)
}
labelValueLength.push(Stat{Name: n, Count: size})
}

View File

@ -939,7 +939,7 @@ func BenchmarkPostings_Stats(b *testing.B) {
}
b.ResetTimer()
for n := 0; n < b.N; n++ {
p.Stats("__name__", 10)
p.Stats("__name__", 10, labels.SizeOfLabels)
}
}
@ -954,7 +954,8 @@ func TestMemPostingsStats(t *testing.T) {
p.Add(2, labels.FromStrings("label", "value1"))
// call the Stats method to calculate the cardinality statistics
stats := p.Stats("label", 10)
// passing a fake calculation so we get the same result regardless of compilation -tags.
stats := p.Stats("label", 10, func(name, value string, n uint64) uint64 { return uint64(len(name)+len(value)) * n })
// assert that the expected statistics were calculated
require.Equal(t, uint64(2), stats.CardinalityMetricsStats[0].Count)
@ -963,7 +964,7 @@ func TestMemPostingsStats(t *testing.T) {
require.Equal(t, uint64(3), stats.CardinalityLabelStats[0].Count)
require.Equal(t, "label", stats.CardinalityLabelStats[0].Name)
require.Equal(t, uint64(24), stats.LabelValueStats[0].Count)
require.Equal(t, uint64(44), stats.LabelValueStats[0].Count)
require.Equal(t, "label", stats.LabelValueStats[0].Name)
require.Equal(t, uint64(2), stats.LabelValuePairsStats[0].Count)