Merge pull request #14333 from bboreham/faster-dedupelabels

[PERF] Labels: faster encoding for -tags dedupelabels
pull/14368/head
Bryan Boreham 2024-06-30 12:26:48 +01:00 committed by GitHub
commit 675d02cd0b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 148 additions and 51 deletions

View File

@ -104,30 +104,39 @@ func (t *nameTable) ToName(num int) string {
return t.byNum[num] return t.byNum[num]
} }
// "Varint" in this file is non-standard: we encode small numbers (up to 32767) in 2 bytes,
// because we expect most Prometheus to have more than 127 unique strings.
// And we don't encode numbers larger than 4 bytes because we don't expect more than 536,870,912 unique strings.
func decodeVarint(data string, index int) (int, int) { func decodeVarint(data string, index int) (int, int) {
// Fast-path for common case of a single byte, value 0..127. b := int(data[index]) + int(data[index+1])<<8
b := data[index] index += 2
if b < 0x8000 {
return b, index
}
return decodeVarintRest(b, data, index)
}
func decodeVarintRest(b int, data string, index int) (int, int) {
value := int(b & 0x7FFF)
b = int(data[index])
index++ index++
if b < 0x80 { if b < 0x80 {
return int(b), index return value | (b << 15), index
} }
value := int(b & 0x7F)
for shift := uint(7); ; shift += 7 { value |= (b & 0x7f) << 15
// Just panic if we go of the end of data, since all Labels strings are constructed internally and b = int(data[index])
// malformed data indicates a bug, or memory corruption.
b := data[index]
index++ index++
value |= int(b&0x7F) << shift return value | (b << 22), index
if b < 0x80 {
break
}
}
return value, index
} }
func decodeString(t *nameTable, data string, index int) (string, int) { func decodeString(t *nameTable, data string, index int) (string, int) {
var num int // Copy decodeVarint here, because the Go compiler says it's too big to inline.
num, index = decodeVarint(data, index) num := int(data[index]) + int(data[index+1])<<8
index += 2
if num >= 0x8000 {
num, index = decodeVarintRest(num, data, index)
}
return t.ToName(num), index return t.ToName(num), index
} }
@ -321,7 +330,12 @@ func (ls Labels) Get(name string) string {
} else if lName[0] > name[0] { // Stop looking if we've gone past. } else if lName[0] > name[0] { // Stop looking if we've gone past.
break break
} }
_, i = decodeVarint(ls.data, i) // Copy decodeVarint here, because the Go compiler says it's too big to inline.
num := int(ls.data[i]) + int(ls.data[i+1])<<8
i += 2
if num >= 0x8000 {
_, i = decodeVarintRest(num, ls.data, i)
}
} }
return "" return ""
} }
@ -339,7 +353,12 @@ func (ls Labels) Has(name string) bool {
} else if lName[0] > name[0] { // Stop looking if we've gone past. } else if lName[0] > name[0] { // Stop looking if we've gone past.
break break
} }
_, i = decodeVarint(ls.data, i) // Copy decodeVarint here, because the Go compiler says it's too big to inline.
num := int(ls.data[i]) + int(ls.data[i+1])<<8
i += 2
if num >= 0x8000 {
_, i = decodeVarintRest(num, ls.data, i)
}
} }
return false return false
} }
@ -641,29 +660,24 @@ func marshalNumbersToSizedBuffer(nums []int, data []byte) int {
func sizeVarint(x uint64) (n int) { func sizeVarint(x uint64) (n int) {
// Most common case first // Most common case first
if x < 1<<7 { if x < 1<<15 {
return 1 return 2
} }
if x >= 1<<56 { if x < 1<<22 {
return 9 return 3
} }
if x >= 1<<28 { if x >= 1<<29 {
x >>= 28 panic("Number too large to represent")
n = 4
} }
if x >= 1<<14 { return 4
x >>= 14
n += 2
}
if x >= 1<<7 {
n++
}
return n + 1
} }
func encodeVarintSlow(data []byte, offset int, v uint64) int { func encodeVarintSlow(data []byte, offset int, v uint64) int {
offset -= sizeVarint(v) offset -= sizeVarint(v)
base := offset base := offset
data[offset] = uint8(v)
v >>= 8
offset++
for v >= 1<<7 { for v >= 1<<7 {
data[offset] = uint8(v&0x7f | 0x80) data[offset] = uint8(v&0x7f | 0x80)
v >>= 7 v >>= 7
@ -673,11 +687,12 @@ func encodeVarintSlow(data []byte, offset int, v uint64) int {
return base return base
} }
// Special code for the common case that a value is less than 128 // Special code for the common case that a value is less than 32768
func encodeVarint(data []byte, offset, v int) int { func encodeVarint(data []byte, offset, v int) int {
if v < 1<<7 { if v < 1<<15 {
offset-- offset -= 2
data[offset] = uint8(v) data[offset] = uint8(v)
data[offset+1] = uint8(v >> 8)
return offset return offset
} }
return encodeVarintSlow(data, offset, uint64(v)) return encodeVarintSlow(data, offset, uint64(v))

View File

@ -0,0 +1,50 @@
// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build dedupelabels
package labels
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestVarint(t *testing.T) {
cases := []struct {
v int
expected []byte
}{
{0, []byte{0, 0}},
{1, []byte{1, 0}},
{2, []byte{2, 0}},
{0x7FFF, []byte{0xFF, 0x7F}},
{0x8000, []byte{0x00, 0x80, 0x01}},
{0x8001, []byte{0x01, 0x80, 0x01}},
{0x3FFFFF, []byte{0xFF, 0xFF, 0x7F}},
{0x400000, []byte{0x00, 0x80, 0x80, 0x01}},
{0x400001, []byte{0x01, 0x80, 0x80, 0x01}},
{0x1FFFFFFF, []byte{0xFF, 0xFF, 0xFF, 0x7F}},
}
var buf [16]byte
for _, c := range cases {
n := encodeVarint(buf[:], len(buf), c.v)
require.Equal(t, len(c.expected), len(buf)-n)
require.Equal(t, c.expected, buf[n:])
got, m := decodeVarint(string(buf[:]), n)
require.Equal(t, c.v, got)
require.Equal(t, len(buf), m)
}
require.Panics(t, func() { encodeVarint(buf[:], len(buf), 1<<29) })
}

View File

@ -466,6 +466,38 @@ func TestLabels_DropMetricName(t *testing.T) {
require.True(t, Equal(original, check)) require.True(t, Equal(original, check))
} }
func ScratchBuilderForBenchmark() ScratchBuilder {
// (Only relevant to -tags dedupelabels: stuff the symbol table before adding the real labels, to avoid having everything fitting into 1 byte.)
b := NewScratchBuilder(256)
for i := 0; i < 256; i++ {
b.Add(fmt.Sprintf("name%d", i), fmt.Sprintf("value%d", i))
}
b.Labels()
b.Reset()
return b
}
func NewForBenchmark(ls ...Label) Labels {
b := ScratchBuilderForBenchmark()
for _, l := range ls {
b.Add(l.Name, l.Value)
}
b.Sort()
return b.Labels()
}
func FromStringsForBenchmark(ss ...string) Labels {
if len(ss)%2 != 0 {
panic("invalid number of strings")
}
b := ScratchBuilderForBenchmark()
for i := 0; i < len(ss); i += 2 {
b.Add(ss[i], ss[i+1])
}
b.Sort()
return b.Labels()
}
// BenchmarkLabels_Get was written to check whether a binary search can improve the performance vs the linear search implementation // BenchmarkLabels_Get was written to check whether a binary search can improve the performance vs the linear search implementation
// The results have shown that binary search would only be better when searching last labels in scenarios with more than 10 labels. // The results have shown that binary search would only be better when searching last labels in scenarios with more than 10 labels.
// In the following list, `old` is the linear search while `new` is the binary search implementation (without calling sort.Search, which performs even worse here) // In the following list, `old` is the linear search while `new` is the binary search implementation (without calling sort.Search, which performs even worse here)
@ -488,7 +520,7 @@ func BenchmarkLabels_Get(b *testing.B) {
} }
for _, size := range []int{5, 10, maxLabels} { for _, size := range []int{5, 10, maxLabels} {
b.Run(fmt.Sprintf("with %d labels", size), func(b *testing.B) { b.Run(fmt.Sprintf("with %d labels", size), func(b *testing.B) {
labels := New(allLabels[:size]...) labels := NewForBenchmark(allLabels[:size]...)
for _, scenario := range []struct { for _, scenario := range []struct {
desc, label string desc, label string
}{ }{
@ -520,33 +552,33 @@ var comparisonBenchmarkScenarios = []struct {
}{ }{
{ {
"equal", "equal",
FromStrings("a_label_name", "a_label_value", "another_label_name", "another_label_value"), FromStringsForBenchmark("a_label_name", "a_label_value", "another_label_name", "another_label_value"),
FromStrings("a_label_name", "a_label_value", "another_label_name", "another_label_value"), FromStringsForBenchmark("a_label_name", "a_label_value", "another_label_name", "another_label_value"),
}, },
{ {
"not equal", "not equal",
FromStrings("a_label_name", "a_label_value", "another_label_name", "another_label_value"), FromStringsForBenchmark("a_label_name", "a_label_value", "another_label_name", "another_label_value"),
FromStrings("a_label_name", "a_label_value", "another_label_name", "a_different_label_value"), FromStringsForBenchmark("a_label_name", "a_label_value", "another_label_name", "a_different_label_value"),
}, },
{ {
"different sizes", "different sizes",
FromStrings("a_label_name", "a_label_value", "another_label_name", "another_label_value"), FromStringsForBenchmark("a_label_name", "a_label_value", "another_label_name", "another_label_value"),
FromStrings("a_label_name", "a_label_value"), FromStringsForBenchmark("a_label_name", "a_label_value"),
}, },
{ {
"lots", "lots",
FromStrings("aaa", "bbb", "ccc", "ddd", "eee", "fff", "ggg", "hhh", "iii", "jjj", "kkk", "lll", "mmm", "nnn", "ooo", "ppp", "qqq", "rrz"), FromStringsForBenchmark("aaa", "bbb", "ccc", "ddd", "eee", "fff", "ggg", "hhh", "iii", "jjj", "kkk", "lll", "mmm", "nnn", "ooo", "ppp", "qqq", "rrz"),
FromStrings("aaa", "bbb", "ccc", "ddd", "eee", "fff", "ggg", "hhh", "iii", "jjj", "kkk", "lll", "mmm", "nnn", "ooo", "ppp", "qqq", "rrr"), FromStringsForBenchmark("aaa", "bbb", "ccc", "ddd", "eee", "fff", "ggg", "hhh", "iii", "jjj", "kkk", "lll", "mmm", "nnn", "ooo", "ppp", "qqq", "rrr"),
}, },
{ {
"real long equal", "real long equal",
FromStrings("__name__", "kube_pod_container_status_last_terminated_exitcode", "cluster", "prod-af-north-0", " container", "prometheus", "instance", "kube-state-metrics-0:kube-state-metrics:ksm", "job", "kube-state-metrics/kube-state-metrics", " namespace", "observability-prometheus", "pod", "observability-prometheus-0", "uid", "d3ec90b2-4975-4607-b45d-b9ad64bb417e"), FromStringsForBenchmark("__name__", "kube_pod_container_status_last_terminated_exitcode", "cluster", "prod-af-north-0", " container", "prometheus", "instance", "kube-state-metrics-0:kube-state-metrics:ksm", "job", "kube-state-metrics/kube-state-metrics", " namespace", "observability-prometheus", "pod", "observability-prometheus-0", "uid", "d3ec90b2-4975-4607-b45d-b9ad64bb417e"),
FromStrings("__name__", "kube_pod_container_status_last_terminated_exitcode", "cluster", "prod-af-north-0", " container", "prometheus", "instance", "kube-state-metrics-0:kube-state-metrics:ksm", "job", "kube-state-metrics/kube-state-metrics", " namespace", "observability-prometheus", "pod", "observability-prometheus-0", "uid", "d3ec90b2-4975-4607-b45d-b9ad64bb417e"), FromStringsForBenchmark("__name__", "kube_pod_container_status_last_terminated_exitcode", "cluster", "prod-af-north-0", " container", "prometheus", "instance", "kube-state-metrics-0:kube-state-metrics:ksm", "job", "kube-state-metrics/kube-state-metrics", " namespace", "observability-prometheus", "pod", "observability-prometheus-0", "uid", "d3ec90b2-4975-4607-b45d-b9ad64bb417e"),
}, },
{ {
"real long different end", "real long different end",
FromStrings("__name__", "kube_pod_container_status_last_terminated_exitcode", "cluster", "prod-af-north-0", " container", "prometheus", "instance", "kube-state-metrics-0:kube-state-metrics:ksm", "job", "kube-state-metrics/kube-state-metrics", " namespace", "observability-prometheus", "pod", "observability-prometheus-0", "uid", "d3ec90b2-4975-4607-b45d-b9ad64bb417e"), FromStringsForBenchmark("__name__", "kube_pod_container_status_last_terminated_exitcode", "cluster", "prod-af-north-0", " container", "prometheus", "instance", "kube-state-metrics-0:kube-state-metrics:ksm", "job", "kube-state-metrics/kube-state-metrics", " namespace", "observability-prometheus", "pod", "observability-prometheus-0", "uid", "d3ec90b2-4975-4607-b45d-b9ad64bb417e"),
FromStrings("__name__", "kube_pod_container_status_last_terminated_exitcode", "cluster", "prod-af-north-0", " container", "prometheus", "instance", "kube-state-metrics-0:kube-state-metrics:ksm", "job", "kube-state-metrics/kube-state-metrics", " namespace", "observability-prometheus", "pod", "observability-prometheus-0", "uid", "deadbeef-0000-1111-2222-b9ad64bb417e"), FromStringsForBenchmark("__name__", "kube_pod_container_status_last_terminated_exitcode", "cluster", "prod-af-north-0", " container", "prometheus", "instance", "kube-state-metrics-0:kube-state-metrics:ksm", "job", "kube-state-metrics/kube-state-metrics", " namespace", "observability-prometheus", "pod", "observability-prometheus-0", "uid", "deadbeef-0000-1111-2222-b9ad64bb417e"),
}, },
} }
@ -834,7 +866,7 @@ func BenchmarkBuilder(b *testing.B) {
} }
func BenchmarkLabels_Copy(b *testing.B) { func BenchmarkLabels_Copy(b *testing.B) {
l := New(benchmarkLabels...) l := NewForBenchmark(benchmarkLabels...)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
l = l.Copy() l = l.Copy()