From c93e261466b11d8d2786bb41dca8e9573d66bb78 Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Fri, 2 Nov 2018 09:52:45 +0000 Subject: [PATCH] Reduce memory taken up by posting/symbol tables. Reuse the string already allocated for symbols in the posting tables. Use a slice for symbols in v2 format. Move symbol size logic into the index code. Avoid duplication of lookupSymbol logic. Signed-off-by: Brian Brazil --- block.go | 13 +---------- index/index.go | 62 +++++++++++++++++++++++++++++--------------------- 2 files changed, 37 insertions(+), 38 deletions(-) diff --git a/block.go b/block.go index 7081db077..e5a66bd9e 100644 --- a/block.go +++ b/block.go @@ -15,7 +15,6 @@ package tsdb import ( - "encoding/binary" "encoding/json" "io/ioutil" "os" @@ -278,23 +277,13 @@ func OpenBlock(dir string, pool chunkenc.Pool) (*Block, error) { return nil, err } - // Calculating symbol table size. - tmp := make([]byte, 8) - symTblSize := uint64(0) - for _, v := range ir.SymbolTable() { - // Size of varint length of the symbol. - symTblSize += uint64(binary.PutUvarint(tmp, uint64(len(v)))) - // Size of the symbol. - symTblSize += uint64(len(v)) - } - pb := &Block{ dir: dir, meta: *meta, chunkr: cr, indexr: ir, tombstones: tr, - symbolTableSize: symTblSize, + symbolTableSize: ir.SymbolTableSize(), } return pb, nil } diff --git a/index/index.go b/index/index.go index 133799ac9..ea131d941 100644 --- a/index/index.go +++ b/index/index.go @@ -549,8 +549,10 @@ type Reader struct { // block are always backed by true strings held in here rather than // strings that are backed by byte slices from the mmap'd index file. This // prevents memory faults when applications work with read symbols after - // the block has been unmapped. - symbols map[uint32]string + // the block has been unmapped. The older format has sparse indexes so a map + // must be used, but the new format is not so we can use a slice. + symbols map[uint32]string + symbolSlice []string dec *Decoder @@ -631,11 +633,21 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) { } var err error + // Use the strings already allocated by symbols, rather than + // re-allocating them again below. + symbols := make(map[string]string, len(r.symbols)+len(r.symbolSlice)) + for _, s := range r.symbols { + symbols[s] = s + } + for _, s := range r.symbolSlice { + symbols[s] = s + } + err = r.readOffsetTable(r.toc.labelIndicesTable, func(key []string, off uint64) error { if len(key) != 1 { return errors.Errorf("unexpected key length %d", len(key)) } - r.labels[key[0]] = off + r.labels[symbols[key[0]]] = off return nil }) if err != nil { @@ -645,14 +657,14 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) { if len(key) != 2 { return errors.Errorf("unexpected key length %d", len(key)) } - r.postings[labels.Label{Name: key[0], Value: key[1]}] = off + r.postings[labels.Label{Name: symbols[key[0]], Value: symbols[key[1]]}] = off return nil }) if err != nil { return nil, errors.Wrap(err, "read postings table") } - r.dec = &Decoder{symbols: r.symbols} + r.dec = &Decoder{lookupSymbol: r.lookupSymbol} return r, nil } @@ -777,18 +789,17 @@ func (r *Reader) readSymbols(off int) error { basePos = uint32(off) + 4 nextPos = basePos + uint32(origLen-d.len()) ) - if r.version == 2 { - nextPos = 0 + r.symbolSlice = make([]string, 0, cnt) } for d.err() == nil && d.len() > 0 && cnt > 0 { s := d.uvarintStr() - r.symbols[nextPos] = s if r.version == 2 { - nextPos++ + r.symbolSlice = append(r.symbolSlice, s) } else { + r.symbols[nextPos] = s nextPos = basePos + uint32(origLen-d.len()) } cnt-- @@ -828,6 +839,9 @@ func (r *Reader) Close() error { } func (r *Reader) lookupSymbol(o uint32) (string, error) { + if int(o) < len(r.symbolSlice) { + return r.symbolSlice[o], nil + } s, ok := r.symbols[o] if !ok { return "", errors.Errorf("unknown symbol offset %d", o) @@ -842,12 +856,22 @@ func (r *Reader) Symbols() (map[string]struct{}, error) { for _, s := range r.symbols { res[s] = struct{}{} } + for _, s := range r.symbolSlice { + res[s] = struct{}{} + } return res, nil } // SymbolTable returns the symbol table that is used to resolve symbol references. -func (r *Reader) SymbolTable() map[uint32]string { - return r.symbols +func (r *Reader) SymbolTableSize() uint64 { + var size int + for _, s := range r.symbols { + size += len(s) + 8 + } + for _, s := range r.symbolSlice { + size += len(s) + 8 + } + return uint64(size) } // LabelValues returns value tuples that exist for the given label name tuples. @@ -1031,21 +1055,7 @@ func (t *serializedStringTuples) At(i int) ([]string, error) { // It currently does not contain decoding methods for all entry types but can be extended // by them if there's demand. type Decoder struct { - symbols map[uint32]string -} - -func (dec *Decoder) lookupSymbol(o uint32) (string, error) { - s, ok := dec.symbols[o] - if !ok { - return "", errors.Errorf("unknown symbol offset %d", o) - } - return s, nil -} - -// SetSymbolTable set the symbol table to be used for lookups when decoding series -// and label indices -func (dec *Decoder) SetSymbolTable(t map[uint32]string) { - dec.symbols = t + lookupSymbol func(uint32) (string, error) } // Postings returns a postings list for b and its number of elements.