Coalesce series reads where we can.

When compacting rather than doing a read of all series in the index per label name, do many at once but only when it won't use (much) more ram than writing the special all index does. original in-memory postings: BenchmarkCompactionFromHead/labelnames=1,labelvalues=100000-4 1 1202383447 ns/op 158936496 B/op 1031511 allocs/op BenchmarkCompactionFromHead/labelnames=10,labelvalues=10000-4 1 1141792706 ns/op 154453408 B/op 1093453 allocs/op BenchmarkCompactionFromHead/labelnames=100,labelvalues=1000-4 1 1169288829 ns/op 161072336 B/op 1110021 allocs/op BenchmarkCompactionFromHead/labelnames=1000,labelvalues=100-4 1 1115700103 ns/op 149480472 B/op 1129180 allocs/op BenchmarkCompactionFromHead/labelnames=10000,labelvalues=10-4 1 1283813141 ns/op 162937800 B/op 1202771 allocs/op before: BenchmarkCompactionFromHead/labelnames=1,labelvalues=100000-4 1 1145195941 ns/op 131749984 B/op 834400 allocs/op BenchmarkCompactionFromHead/labelnames=10,labelvalues=10000-4 1 1233526345 ns/op 127889416 B/op 897033 allocs/op BenchmarkCompactionFromHead/labelnames=100,labelvalues=1000-4 1 1821942296 ns/op 131665648 B/op 914836 allocs/op BenchmarkCompactionFromHead/labelnames=1000,labelvalues=100-4 1 8035568665 ns/op 123811832 B/op 934312 allocs/op BenchmarkCompactionFromHead/labelnames=10000,labelvalues=10-4 1 71325926267 ns/op 140722648 B/op 1016824 allocs/op after: BenchmarkCompactionFromHead/labelnames=1,labelvalues=100000-4 1 1101429174 ns/op 129063496 B/op 832571 allocs/op BenchmarkCompactionFromHead/labelnames=10,labelvalues=10000-4 1 1074466374 ns/op 124154888 B/op 894875 allocs/op BenchmarkCompactionFromHead/labelnames=100,labelvalues=1000-4 1 1166510282 ns/op 128790648 B/op 912931 allocs/op BenchmarkCompactionFromHead/labelnames=1000,labelvalues=100-4 1 1075013071 ns/op 120570696 B/op 933511 allocs/op BenchmarkCompactionFromHead/labelnames=10000,labelvalues=10-4 1 1231673790 ns/op 138754288 B/op 1022791 allocs/op Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
5 years ago · 971dafdfbe
5 changed files with 85 additions and 32 deletions
--- a/tsdb/compact_test.go
+++ b/tsdb/compact_test.go
@ -860,6 +860,36 @@ func BenchmarkCompaction(b *testing.B) {
 	}
 }

+func BenchmarkCompactionFromHead(b *testing.B) {
+	dir, err := ioutil.TempDir("", "bench_compaction_from_head")
+	testutil.Ok(b, err)
+	defer func() {
+		testutil.Ok(b, os.RemoveAll(dir))
+	}()
+	totalSeries := 100000
+	for labelNames := 1; labelNames < totalSeries; labelNames *= 10 {
+		labelValues := totalSeries / labelNames
+		b.Run(fmt.Sprintf("labelnames=%d,labelvalues=%d", labelNames, labelValues), func(b *testing.B) {
+			h, err := NewHead(nil, nil, nil, 1000)
+			testutil.Ok(b, err)
+			for ln := 0; ln < labelNames; ln++ {
+				app := h.Appender()
+				for lv := 0; lv < labelValues; lv++ {
+					app.Add(labels.FromStrings(fmt.Sprintf("%d", ln), fmt.Sprintf("%d%s%d", lv, postingsBenchSuffix, ln)), 0, 0)
+				}
+				testutil.Ok(b, app.Commit())
+			}
+
+			b.ResetTimer()
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				createBlockFromHead(b, filepath.Join(dir, fmt.Sprintf("%d-%d", i, labelNames)), h)
+			}
+			h.Close()
+		})
+	}
+}
+
 // TestDisableAutoCompactions checks that we can
 // disable and enable the auto compaction.
 // This is needed for unit tests that rely on
--- a/tsdb/encoding/encoding.go
+++ b/tsdb/encoding/encoding.go
@ -268,7 +268,7 @@ func (d *Decbuf) Byte() byte {
 	return x
 }

-func (d *Decbuf) EatPadding() {
+func (d *Decbuf) ConsumePadding() {
 	if d.E != nil {
 		return
 	}
@ -279,7 +279,6 @@ func (d *Decbuf) EatPadding() {
 		d.E = ErrInvalidSize
 		return
 	}
-	return
 }

 func (d *Decbuf) Err() error  { return d.E }
--- a/tsdb/index/index.go
+++ b/tsdb/index/index.go
@ -126,7 +126,7 @@ type Writer struct {
 	reverseSymbols map[uint32]string
 	labelIndexes   []labelIndexHashEntry // label index offsets
 	postings       []postingsHashEntry   // postings lists offsets
-	labelNames     map[string]struct{}   // label names
+	labelNames     map[string]uint64     // label names, and their usage

 	// Hold last series to validate that clients insert new series in order.
 	lastSeries labels.Labels
@ -208,10 +208,8 @@ func NewWriter(fn string) (*Writer, error) {
 		buf2: encoding.Encbuf{B: make([]byte, 0, 1<<22)},

 		// Caches.
-		symbols:        make(map[string]uint32, 1<<13),
-		reverseSymbols: make(map[uint32]string, 1<<13),
-		labelNames:     make(map[string]struct{}, 1<<8),
-		crc32:          newCRC32(),
+		labelNames: make(map[string]uint64, 1<<8),
+		crc32:      newCRC32(),
 	}
 	if err := iw.writeMeta(); err != nil {
 		return nil, err
@ -337,7 +335,7 @@ func (w *Writer) AddSeries(ref uint64, lset labels.Labels, chunks ...chunks.Meta
 		if !ok {
 			return errors.Errorf("symbol entry for %q does not exist", l.Name)
 		}
-		w.labelNames[l.Name] = struct{}{}
+		w.labelNames[l.Name]++
 		w.buf2.PutUvarint32(index)

 		index, ok = w.symbols[l.Value]
@ -409,6 +407,7 @@ func (w *Writer) AddSymbols(sym map[string]struct{}) error {
 	}

 	w.symbols = make(map[string]uint32, len(symbols))
+	w.reverseSymbols = make(map[uint32]string, len(symbols))

 	for index, s := range symbols {
 		w.symbols[s] = uint32(index)
@ -596,7 +595,6 @@ func (w *Writer) writeTOC() error {
 }

 func (w *Writer) writePostings() error {
-
 	names := make([]string, 0, len(w.labelNames))
 	for n := range w.labelNames {
 		names = append(names, n)
@ -617,7 +615,7 @@ func (w *Writer) writePostings() error {
 	d := encoding.NewDecbufRaw(realByteSlice(f.Bytes()), int(w.toc.LabelIndices))
 	d.B = d.B[w.toc.Series:] // dec.Skip not merged yet
 	for d.Len() > 0 {
-		d.EatPadding()
+		d.ConsumePadding()
 		startPos := w.toc.LabelIndices - uint64(d.Len())
 		if startPos%16 != 0 {
 			return errors.Errorf("series not 16-byte aligned at %d", startPos)
@ -630,32 +628,53 @@ func (w *Writer) writePostings() error {
 			return nil
 		}
 	}
-	w.writePosting("", "", offsets)
+	if err := w.writePosting("", "", offsets); err != nil {
+		return err
+	}
+	maxPostings := uint64(len(offsets)) // No label name can have more postings than this.
+
+	for len(names) > 0 {
+		batchNames := []string{}
+		var c uint64
+		// Try to bunch up label names into one loop, but avoid
+		// using more memory than a single label name can.
+		for len(names) > 0 {
+			if w.labelNames[names[0]]+c > maxPostings {
+				break
+			}
+			batchNames = append(batchNames, names[0])
+			names = names[1:]
+		}

-	for _, name := range names {
-		nameo := w.symbols[name]
-		postings := map[uint32][]uint32{}
+		nameSymbols := map[uint32]struct{}{}
+		for _, name := range batchNames {
+			nameSymbols[w.symbols[name]] = struct{}{}
+		}
+		// Label name -> label value -> positions.
+		postings := map[uint32]map[uint32][]uint32{}

 		d := encoding.NewDecbufRaw(realByteSlice(f.Bytes()), int(w.toc.LabelIndices))
-		d.B = d.B[w.toc.Series:] // dec.Skip not merged yet
+		d.Skip(int(w.toc.Series))
 		for d.Len() > 0 {
-			d.EatPadding()
+			d.ConsumePadding()
 			startPos := w.toc.LabelIndices - uint64(d.Len())
 			l := d.Uvarint() // Length of this series in bytes.
 			startLen := d.Len()

-			// See if this label name is in the series.
+			// See if label names we want are in the series.
 			numLabels := d.Uvarint()
 			for i := 0; i < numLabels; i++ {
 				lno := uint32(d.Uvarint())
 				lvo := uint32(d.Uvarint())

-				if lno == nameo {
-					if _, ok := postings[lvo]; !ok {
-						postings[lvo] = []uint32{}
+				if _, ok := nameSymbols[lno]; ok {
+					if _, ok := postings[lno]; !ok {
+						postings[lno] = map[uint32][]uint32{}
 					}
-					postings[lvo] = append(postings[lvo], uint32(startPos/16))
-					break
+					if _, ok := postings[lno][lvo]; !ok {
+						postings[lno][lvo] = []uint32{}
+					}
+					postings[lno][lvo] = append(postings[lno][lvo], uint32(startPos/16))
 				}
 			}
 			// Skip to next series. The 4 is for the CRC32.
@ -666,16 +685,20 @@ func (w *Writer) writePostings() error {
 			}
 		}

-		// Write out postings for this label name.
-		values := make([]uint32, 0, len(postings))
-		for v := range postings {
-			values = append(values, v)
+		for _, name := range batchNames {
+			// Write out postings for this label name.
+			values := make([]uint32, 0, len(postings[w.symbols[name]]))
+			for v := range postings[w.symbols[name]] {
+				values = append(values, v)

-		}
-		// Symbol numbers are in order, so the strings will also be in order.
-		sort.Sort(uint32slice(values))
-		for _, v := range values {
-			w.writePosting(name, w.reverseSymbols[v], postings[v])
+			}
+			// Symbol numbers are in order, so the strings will also be in order.
+			sort.Sort(uint32slice(values))
+			for _, v := range values {
+				if err := w.writePosting(name, w.reverseSymbols[v], postings[w.symbols[name]][v]); err != nil {
+					return err
+				}
+			}
 		}

 	}
--- a/tsdb/index/index_test.go
+++ b/tsdb/index/index_test.go
@ -211,6 +211,7 @@ func TestIndexRW_Postings(t *testing.T) {
 	testutil.Ok(t, iw.AddSeries(4, series[3]))

 	err = iw.WriteLabelIndex([]string{"a"}, []string{"1"})
+	testutil.Ok(t, err)
 	err = iw.WriteLabelIndex([]string{"b"}, []string{"1", "2", "3", "4"})
 	testutil.Ok(t, err)

@ -266,6 +267,7 @@ func TestPostingsMany(t *testing.T) {
 		testutil.Ok(t, iw.AddSeries(uint64(i), s))
 	}
 	err = iw.WriteLabelIndex([]string{"foo"}, []string{"bar"})
+	testutil.Ok(t, err)
 	testutil.Ok(t, iw.Close())

 	ir, err := NewFileReader(fn)
--- a/tsdb/mocks_test.go
+++ b/tsdb/mocks_test.go
@ -62,7 +62,6 @@ func (m *mockIndexWriter) AddSeries(ref uint64, l labels.Labels, chunks ...chunk
 }

 func (mockIndexWriter) WriteLabelIndex(names []string, values []string) error { return nil }
-func (mockIndexWriter) WritePostings() error                                  { return nil }
 func (mockIndexWriter) Close() error                                          { return nil }

 type mockBReader struct {