From 2ced2f6aeca39027ca0b2954d32cd67b014ef745 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Sun, 12 May 2024 17:41:07 +0100 Subject: [PATCH] [PERF] Labels: faster varint for dedupelabels Including tests. Signed-off-by: Bryan Boreham --- model/labels/labels_dedupelabels.go | 64 ++++++++++++------------ model/labels/labels_dedupelabels_test.go | 50 ++++++++++++++++++ 2 files changed, 81 insertions(+), 33 deletions(-) create mode 100644 model/labels/labels_dedupelabels_test.go diff --git a/model/labels/labels_dedupelabels.go b/model/labels/labels_dedupelabels.go index 972f5dc16..c0d84d02f 100644 --- a/model/labels/labels_dedupelabels.go +++ b/model/labels/labels_dedupelabels.go @@ -104,25 +104,27 @@ func (t *nameTable) ToName(num int) string { return t.byNum[num] } +// "Varint" in this file is non-standard: we encode small numbers (up to 32767) in 2 bytes, +// because we expect most Prometheus to have more than 127 unique strings. +// And we don't encode numbers larger than 4 bytes because we don't expect more than 536,870,912 unique strings. func decodeVarint(data string, index int) (int, int) { - // Fast-path for common case of a single byte, value 0..127. - b := data[index] + b := int(data[index]) + int(data[index+1])<<8 + index += 2 + if b < 0x8000 { + return b, index + } + + value := int(b & 0x7FFF) + b = int(data[index]) index++ if b < 0x80 { - return int(b), index - } - value := int(b & 0x7F) - for shift := uint(7); ; shift += 7 { - // Just panic if we go of the end of data, since all Labels strings are constructed internally and - // malformed data indicates a bug, or memory corruption. - b := data[index] - index++ - value |= int(b&0x7F) << shift - if b < 0x80 { - break - } + return value | (b << 15), index } - return value, index + + value |= (b & 0x7f) << 15 + b = int(data[index]) + index++ + return value | (b << 22), index } func decodeString(t *nameTable, data string, index int) (string, int) { @@ -641,29 +643,24 @@ func marshalNumbersToSizedBuffer(nums []int, data []byte) int { func sizeVarint(x uint64) (n int) { // Most common case first - if x < 1<<7 { - return 1 - } - if x >= 1<<56 { - return 9 + if x < 1<<15 { + return 2 } - if x >= 1<<28 { - x >>= 28 - n = 4 + if x < 1<<22 { + return 3 } - if x >= 1<<14 { - x >>= 14 - n += 2 + if x >= 1<<29 { + panic("Number too large to represent") } - if x >= 1<<7 { - n++ - } - return n + 1 + return 4 } func encodeVarintSlow(data []byte, offset int, v uint64) int { offset -= sizeVarint(v) base := offset + data[offset] = uint8(v) + v >>= 8 + offset++ for v >= 1<<7 { data[offset] = uint8(v&0x7f | 0x80) v >>= 7 @@ -673,11 +670,12 @@ func encodeVarintSlow(data []byte, offset int, v uint64) int { return base } -// Special code for the common case that a value is less than 128 +// Special code for the common case that a value is less than 32768 func encodeVarint(data []byte, offset, v int) int { - if v < 1<<7 { - offset-- + if v < 1<<15 { + offset -= 2 data[offset] = uint8(v) + data[offset+1] = uint8(v >> 8) return offset } return encodeVarintSlow(data, offset, uint64(v)) diff --git a/model/labels/labels_dedupelabels_test.go b/model/labels/labels_dedupelabels_test.go new file mode 100644 index 000000000..5ef9255c2 --- /dev/null +++ b/model/labels/labels_dedupelabels_test.go @@ -0,0 +1,50 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build dedupelabels + +package labels + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestVarint(t *testing.T) { + cases := []struct { + v int + expected []byte + }{ + {0, []byte{0, 0}}, + {1, []byte{1, 0}}, + {2, []byte{2, 0}}, + {0x7FFF, []byte{0xFF, 0x7F}}, + {0x8000, []byte{0x00, 0x80, 0x01}}, + {0x8001, []byte{0x01, 0x80, 0x01}}, + {0x3FFFFF, []byte{0xFF, 0xFF, 0x7F}}, + {0x400000, []byte{0x00, 0x80, 0x80, 0x01}}, + {0x400001, []byte{0x01, 0x80, 0x80, 0x01}}, + {0x1FFFFFFF, []byte{0xFF, 0xFF, 0xFF, 0x7F}}, + } + var buf [16]byte + for _, c := range cases { + n := encodeVarint(buf[:], len(buf), c.v) + require.Equal(t, len(c.expected), len(buf)-n) + require.Equal(t, c.expected, buf[n:]) + got, m := decodeVarint(string(buf[:]), n) + require.Equal(t, c.v, got) + require.Equal(t, len(buf), m) + } + require.Panics(t, func() { encodeVarint(buf[:], len(buf), 1<<29) }) +}