From 4fdf9b195c3a50fa3f0adf371875bdd9d78d25a2 Mon Sep 17 00:00:00 2001 From: Sunny Klair Date: Wed, 25 Oct 2017 18:12:13 -0400 Subject: [PATCH 1/4] Validate index TOC checksum on read --- Documentation/format/chunks.md | 2 +- block.go | 2 +- index.go | 17 +++++++++++++---- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/Documentation/format/chunks.md b/Documentation/format/chunks.md index 51ac1242c..3faae9b7d 100644 --- a/Documentation/format/chunks.md +++ b/Documentation/format/chunks.md @@ -2,7 +2,7 @@ The following describes the format of a single chunks file, which is created in the `chunks/` directory of a block. The maximum size per segment file is 512MiB. -Chunks in the files are referenced from the index by the in-file offset in the 4 LSB and the segment sequence number in the bigher 4 MSBs. +Chunks in the files are referenced from the index by the in-file offset in the 4 LSB and the segment sequence number in the higher 4 MSBs. ``` ┌────────────────────────────────────────┬──────────────────────┐ diff --git a/block.go b/block.go index 63b468f27..2bd1fd364 100644 --- a/block.go +++ b/block.go @@ -133,7 +133,7 @@ func writeMetaFile(dir string, meta *BlockMeta) error { return renameFile(tmp, path) } -// Block represents a directory of time series data covering a continous time range. +// Block represents a directory of time series data covering a continuous time range. type Block struct { mtx sync.RWMutex closing bool diff --git a/index.go b/index.go index c0680aa33..972aaa8bd 100644 --- a/index.go +++ b/index.go @@ -573,8 +573,9 @@ type indexReader struct { } var ( - errInvalidSize = fmt.Errorf("invalid size") - errInvalidFlag = fmt.Errorf("invalid flag") + errInvalidSize = fmt.Errorf("invalid size") + errInvalidFlag = fmt.Errorf("invalid flag") + errInvalidChecksum = fmt.Errorf("invalid checksum") ) // NewIndexReader returns a new IndexReader on the given directory. @@ -618,6 +619,11 @@ func newIndexReader(dir string) (*indexReader, error) { func (r *indexReader) readTOC() error { d := r.decbufAt(len(r.b) - indexTOCLen) + crc := newCRC32() + if _, err := crc.Write(d.get()[:d.len()-4]); err != nil { + return errors.Wrap(err, "write to hash") + } + r.toc.symbols = d.be64() r.toc.series = d.be64() r.toc.labelIndices = d.be64() @@ -625,9 +631,12 @@ func (r *indexReader) readTOC() error { r.toc.postings = d.be64() r.toc.postingsTable = d.be64() - // TODO(fabxc): validate checksum. + // Validate checksum + if d.be32() != crc.Sum32() { + return errors.Wrap(errInvalidChecksum, "TOC checksum") + } - return nil + return d.err() } func (r *indexReader) decbufAt(off int) decbuf { From ab02ea4de4195bd6c4c06f1c8a6e55796bbd428f Mon Sep 17 00:00:00 2001 From: Sunny Klair Date: Thu, 26 Oct 2017 13:48:31 -0400 Subject: [PATCH 2/4] Validate index offset table checksums on read --- index.go | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/index.go b/index.go index 972aaa8bd..cc0ad140c 100644 --- a/index.go +++ b/index.go @@ -619,11 +619,6 @@ func newIndexReader(dir string) (*indexReader, error) { func (r *indexReader) readTOC() error { d := r.decbufAt(len(r.b) - indexTOCLen) - crc := newCRC32() - if _, err := crc.Write(d.get()[:d.len()-4]); err != nil { - return errors.Wrap(err, "write to hash") - } - r.toc.symbols = d.be64() r.toc.series = d.be64() r.toc.labelIndices = d.be64() @@ -631,9 +626,8 @@ func (r *indexReader) readTOC() error { r.toc.postings = d.be64() r.toc.postingsTable = d.be64() - // Validate checksum - if d.be32() != crc.Sum32() { - return errors.Wrap(errInvalidChecksum, "TOC checksum") + if valid, err := r.validCrc(d.be32(), len(r.b)-indexTOCLen, indexTOCLen-4); !valid { + return errors.Wrap(err, "TOC checksum") } return d.err() @@ -646,6 +640,20 @@ func (r *indexReader) decbufAt(off int) decbuf { return decbuf{b: r.b[off:]} } +func (r *indexReader) validCrc(crc uint32, off, cnt int) (bool, error) { + c2 := newCRC32() + if len(r.b) < off+cnt { + return false, errInvalidSize + } + if _, err := c2.Write(r.b[off : off+cnt]); err != nil { + return false, errors.Wrap(err, "write to hash") + } + if c2.Sum32() != crc { + return false, errInvalidChecksum + } + return true, nil +} + // readSymbols reads the symbol table fully into memory and allocates proper strings for them. // Strings backed by the mmap'd memory would cause memory faults if applications keep using them // after the reader is closed. @@ -677,9 +685,10 @@ func (r *indexReader) readOffsetTable(off uint64) (map[string]uint32, error) { const sep = "\xff" var ( - d1 = r.decbufAt(int(off)) - d2 = d1.decbuf(d1.be32int()) - cnt = d2.be32() + d1 = r.decbufAt(int(off)) + tableLen = d1.be32int() + d2 = d1.decbuf(tableLen) + cnt = d2.be32() ) res := make(map[string]uint32, 512) @@ -696,7 +705,10 @@ func (r *indexReader) readOffsetTable(off uint64) (map[string]uint32, error) { cnt-- } - // TODO(fabxc): verify checksum from remainer of d1. + if valid, err := r.validCrc(d1.be32(), int(off)+4, tableLen); !valid { + return res, errors.Wrap(err, "offset table checksum") + } + return res, d2.err() } From b65dd43c5bb519de311f5428308bbc681d62402b Mon Sep 17 00:00:00 2001 From: Sunny Klair Date: Thu, 26 Oct 2017 15:34:31 -0400 Subject: [PATCH 3/4] Validate index series/postings/symbol table checksums on read --- index.go | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/index.go b/index.go index cc0ad140c..4d046de99 100644 --- a/index.go +++ b/index.go @@ -626,7 +626,7 @@ func (r *indexReader) readTOC() error { r.toc.postings = d.be64() r.toc.postingsTable = d.be64() - if valid, err := r.validCrc(d.be32(), len(r.b)-indexTOCLen, indexTOCLen-4); !valid { + if valid, err := r.checkCrc(d.be32(), len(r.b)-indexTOCLen, indexTOCLen-4); !valid { return errors.Wrap(err, "TOC checksum") } @@ -640,7 +640,7 @@ func (r *indexReader) decbufAt(off int) decbuf { return decbuf{b: r.b[off:]} } -func (r *indexReader) validCrc(crc uint32, off, cnt int) (bool, error) { +func (r *indexReader) checkCrc(crc uint32, off, cnt int) (bool, error) { c2 := newCRC32() if len(r.b) < off+cnt { return false, errInvalidSize @@ -663,7 +663,8 @@ func (r *indexReader) readSymbols(off int) error { } var ( d1 = r.decbufAt(int(off)) - d2 = d1.decbuf(d1.be32int()) + l = d1.be32int() + d2 = d1.decbuf(l) origLen = d2.len() cnt = d2.be32int() basePos = uint32(off) + 4 @@ -676,6 +677,9 @@ func (r *indexReader) readSymbols(off int) error { nextPos = basePos + uint32(origLen-d2.len()) cnt-- } + if valid, err := r.checkCrc(d1.be32(), int(off)+4, l); !valid { + return errors.Wrap(err, "symbol table checksum") + } return d2.err() } @@ -685,10 +689,10 @@ func (r *indexReader) readOffsetTable(off uint64) (map[string]uint32, error) { const sep = "\xff" var ( - d1 = r.decbufAt(int(off)) - tableLen = d1.be32int() - d2 = d1.decbuf(tableLen) - cnt = d2.be32() + d1 = r.decbufAt(int(off)) + l = d1.be32int() + d2 = d1.decbuf(l) + cnt = d2.be32() ) res := make(map[string]uint32, 512) @@ -705,7 +709,7 @@ func (r *indexReader) readOffsetTable(off uint64) (map[string]uint32, error) { cnt-- } - if valid, err := r.validCrc(d1.be32(), int(off)+4, tableLen); !valid { + if valid, err := r.checkCrc(d1.be32(), int(off)+4, l); !valid { return res, errors.Wrap(err, "offset table checksum") } @@ -765,7 +769,8 @@ func (r *indexReader) LabelValues(names ...string) (StringTuples, error) { } d1 := r.decbufAt(int(off)) - d2 := d1.decbuf(d1.be32int()) + l := d1.be32int() + d2 := d1.decbuf(l) nc := d2.be32int() d2.be32() // consume unused value entry count. @@ -774,7 +779,9 @@ func (r *indexReader) LabelValues(names ...string) (StringTuples, error) { return nil, errors.Wrap(d2.err(), "read label value index") } - // TODO(fabxc): verify checksum in 4 remaining bytes of d1. + if valid, err := r.checkCrc(d1.be32(), int(off)+4, l); !valid { + return nil, errors.Wrap(err, "read label values checksum") + } st := &serializedStringTuples{ l: nc, @@ -802,7 +809,9 @@ func (r *indexReader) LabelIndices() ([][]string, error) { func (r *indexReader) Series(ref uint64, lbls *labels.Labels, chks *[]ChunkMeta) error { d1 := r.decbufAt(int(ref)) - d2 := d1.decbuf(int(d1.uvarint())) + l := d1.uvarint() + sl := len(r.b[ref:]) - d1.len() // # bytes in l + d2 := d1.decbuf(l) *lbls = (*lbls)[:0] *chks = (*chks)[:0] @@ -865,7 +874,9 @@ func (r *indexReader) Series(ref uint64, lbls *labels.Labels, chks *[]ChunkMeta) }) } - // TODO(fabxc): verify CRC32. + if valid, err := r.checkCrc(d1.be32(), int(ref)+sl, l); !valid { + return errors.Wrap(err, "series checksum") + } return nil } @@ -880,7 +891,8 @@ func (r *indexReader) Postings(name, value string) (Postings, error) { } d1 := r.decbufAt(int(off)) - d2 := d1.decbuf(d1.be32int()) + l := d1.be32int() + d2 := d1.decbuf(l) d2.be32() // consume unused postings list length. @@ -888,7 +900,9 @@ func (r *indexReader) Postings(name, value string) (Postings, error) { return nil, errors.Wrap(d2.err(), "get postings bytes") } - // TODO(fabxc): read checksum from 4 remainer bytes of d1 and verify. + if valid, err := r.checkCrc(d1.be32(), int(off)+4, l); !valid { + return nil, errors.Wrap(err, "postings checksum") + } return newBigEndianPostings(d2.get()), nil } From aecac3b5211fe4341977ea527609a8efedea7647 Mon Sep 17 00:00:00 2001 From: Sunny Klair Date: Fri, 27 Oct 2017 12:29:59 -0400 Subject: [PATCH 4/4] Resuse single CRC for index checksum validation --- index.go | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/index.go b/index.go index 4d046de99..3a8e8bb4a 100644 --- a/index.go +++ b/index.go @@ -570,6 +570,8 @@ type indexReader struct { // prevents memory faults when applications work with read symbols after // the block has been unmapped. symbols map[uint32]string + + crc32 hash.Hash32 } var ( @@ -591,6 +593,7 @@ func newIndexReader(dir string) (*indexReader, error) { b: f.b, c: f, symbols: map[uint32]string{}, + crc32: newCRC32(), } // Verify magic number. @@ -626,7 +629,7 @@ func (r *indexReader) readTOC() error { r.toc.postings = d.be64() r.toc.postingsTable = d.be64() - if valid, err := r.checkCrc(d.be32(), len(r.b)-indexTOCLen, indexTOCLen-4); !valid { + if valid, err := r.checkCRC(d.be32(), len(r.b)-indexTOCLen, indexTOCLen-4); !valid { return errors.Wrap(err, "TOC checksum") } @@ -640,15 +643,15 @@ func (r *indexReader) decbufAt(off int) decbuf { return decbuf{b: r.b[off:]} } -func (r *indexReader) checkCrc(crc uint32, off, cnt int) (bool, error) { - c2 := newCRC32() +func (r *indexReader) checkCRC(crc uint32, off, cnt int) (bool, error) { + r.crc32.Reset() if len(r.b) < off+cnt { return false, errInvalidSize } - if _, err := c2.Write(r.b[off : off+cnt]); err != nil { + if _, err := r.crc32.Write(r.b[off : off+cnt]); err != nil { return false, errors.Wrap(err, "write to hash") } - if c2.Sum32() != crc { + if r.crc32.Sum32() != crc { return false, errInvalidChecksum } return true, nil @@ -677,7 +680,7 @@ func (r *indexReader) readSymbols(off int) error { nextPos = basePos + uint32(origLen-d2.len()) cnt-- } - if valid, err := r.checkCrc(d1.be32(), int(off)+4, l); !valid { + if valid, err := r.checkCRC(d1.be32(), int(off)+4, l); !valid { return errors.Wrap(err, "symbol table checksum") } return d2.err() @@ -709,7 +712,7 @@ func (r *indexReader) readOffsetTable(off uint64) (map[string]uint32, error) { cnt-- } - if valid, err := r.checkCrc(d1.be32(), int(off)+4, l); !valid { + if valid, err := r.checkCRC(d1.be32(), int(off)+4, l); !valid { return res, errors.Wrap(err, "offset table checksum") } @@ -779,7 +782,7 @@ func (r *indexReader) LabelValues(names ...string) (StringTuples, error) { return nil, errors.Wrap(d2.err(), "read label value index") } - if valid, err := r.checkCrc(d1.be32(), int(off)+4, l); !valid { + if valid, err := r.checkCRC(d1.be32(), int(off)+4, l); !valid { return nil, errors.Wrap(err, "read label values checksum") } @@ -874,7 +877,7 @@ func (r *indexReader) Series(ref uint64, lbls *labels.Labels, chks *[]ChunkMeta) }) } - if valid, err := r.checkCrc(d1.be32(), int(ref)+sl, l); !valid { + if valid, err := r.checkCRC(d1.be32(), int(ref)+sl, l); !valid { return errors.Wrap(err, "series checksum") } @@ -900,7 +903,7 @@ func (r *indexReader) Postings(name, value string) (Postings, error) { return nil, errors.Wrap(d2.err(), "get postings bytes") } - if valid, err := r.checkCrc(d1.be32(), int(off)+4, l); !valid { + if valid, err := r.checkCRC(d1.be32(), int(off)+4, l); !valid { return nil, errors.Wrap(err, "postings checksum") }