From 6ab483071a5d4dce43634f6eef2dce67d1f4740f Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Tue, 11 Jun 2019 16:14:25 +0800 Subject: [PATCH] Improvement on postings intersection (#616) * improvement on postings intersection Signed-off-by: naivewong <867245430@qq.com> --- index/postings.go | 64 ++++++++++++------------- index/postings_test.go | 104 +++++++++++++++++++++++++++++++---------- 2 files changed, 111 insertions(+), 57 deletions(-) diff --git a/index/postings.go b/index/postings.go index bb7b5837a..cef2d886e 100644 --- a/index/postings.go +++ b/index/postings.go @@ -303,68 +303,68 @@ func Intersect(its ...Postings) Postings { if len(its) == 1 { return its[0] } - - l := len(its) / 2 - a := Intersect(its[:l]...) - b := Intersect(its[l:]...) - - if a == EmptyPostings() || b == EmptyPostings() { - return EmptyPostings() + for _, p := range its { + if p == EmptyPostings() { + return EmptyPostings() + } } - return newIntersectPostings(a, b) + + return newIntersectPostings(its...) } type intersectPostings struct { - a, b Postings - cur uint64 + arr []Postings + cur uint64 } -func newIntersectPostings(a, b Postings) *intersectPostings { - return &intersectPostings{a: a, b: b} +func newIntersectPostings(its ...Postings) *intersectPostings { + return &intersectPostings{arr: its} } func (it *intersectPostings) At() uint64 { return it.cur } -func (it *intersectPostings) doNext(id uint64) bool { +func (it *intersectPostings) doNext() bool { +Loop: for { - if !it.b.Seek(id) { - return false - } - if vb := it.b.At(); vb != id { - if !it.a.Seek(vb) { + for _, p := range it.arr { + if !p.Seek(it.cur) { return false } - id = it.a.At() - if vb != id { - continue + if p.At() > it.cur { + it.cur = p.At() + continue Loop } } - it.cur = id return true } } func (it *intersectPostings) Next() bool { - if !it.a.Next() { - return false + for _, p := range it.arr { + if !p.Next() { + return false + } + if p.At() > it.cur { + it.cur = p.At() + } } - return it.doNext(it.a.At()) + return it.doNext() } func (it *intersectPostings) Seek(id uint64) bool { - if !it.a.Seek(id) { - return false - } - return it.doNext(it.a.At()) + it.cur = id + return it.doNext() } func (it *intersectPostings) Err() error { - if it.a.Err() != nil { - return it.a.Err() + for _, p := range it.arr { + if p.Err() != nil { + return p.Err() + } } - return it.b.Err() + return nil } // Merge returns a new iterator over the union of the input iterators. diff --git a/index/postings_test.go b/index/postings_test.go index b8f760349..1eed1dbfb 100644 --- a/index/postings_test.go +++ b/index/postings_test.go @@ -221,36 +221,90 @@ func TestMultiIntersect(t *testing.T) { } func BenchmarkIntersect(t *testing.B) { - var a, b, c, d []uint64 + t.Run("LongPostings1", func(bench *testing.B) { + var a, b, c, d []uint64 - for i := 0; i < 10000000; i += 2 { - a = append(a, uint64(i)) - } - for i := 5000000; i < 5000100; i += 4 { - b = append(b, uint64(i)) - } - for i := 5090000; i < 5090600; i += 4 { - b = append(b, uint64(i)) - } - for i := 4990000; i < 5100000; i++ { - c = append(c, uint64(i)) - } - for i := 4000000; i < 6000000; i++ { - d = append(d, uint64(i)) - } + for i := 0; i < 10000000; i += 2 { + a = append(a, uint64(i)) + } + for i := 5000000; i < 5000100; i += 4 { + b = append(b, uint64(i)) + } + for i := 5090000; i < 5090600; i += 4 { + b = append(b, uint64(i)) + } + for i := 4990000; i < 5100000; i++ { + c = append(c, uint64(i)) + } + for i := 4000000; i < 6000000; i++ { + d = append(d, uint64(i)) + } - i1 := newListPostings(a...) - i2 := newListPostings(b...) - i3 := newListPostings(c...) - i4 := newListPostings(d...) + i1 := newListPostings(a...) + i2 := newListPostings(b...) + i3 := newListPostings(c...) + i4 := newListPostings(d...) + + bench.ResetTimer() + bench.ReportAllocs() + for i := 0; i < bench.N; i++ { + if _, err := ExpandPostings(Intersect(i1, i2, i3, i4)); err != nil { + bench.Fatal(err) + } + } + }) - t.ResetTimer() + t.Run("LongPostings2", func(bench *testing.B) { + var a, b, c, d []uint64 - for i := 0; i < t.N; i++ { - if _, err := ExpandPostings(Intersect(i1, i2, i3, i4)); err != nil { - t.Fatal(err) + for i := 0; i < 12500000; i++ { + a = append(a, uint64(i)) } - } + for i := 7500000; i < 12500000; i++ { + b = append(b, uint64(i)) + } + for i := 9000000; i < 20000000; i++ { + c = append(c, uint64(i)) + } + for i := 10000000; i < 12000000; i++ { + d = append(d, uint64(i)) + } + + i1 := newListPostings(a...) + i2 := newListPostings(b...) + i3 := newListPostings(c...) + i4 := newListPostings(d...) + + bench.ResetTimer() + bench.ReportAllocs() + for i := 0; i < bench.N; i++ { + if _, err := ExpandPostings(Intersect(i1, i2, i3, i4)); err != nil { + bench.Fatal(err) + } + } + }) + + // Many matchers(k >> n). + t.Run("ManyPostings", func(bench *testing.B) { + var its []Postings + + // 100000 matchers(k=100000). + for i := 0; i < 100000; i++ { + var temp []uint64 + for j := 1; j < 100; j++ { + temp = append(temp, uint64(j)) + } + its = append(its, newListPostings(temp...)) + } + + bench.ResetTimer() + bench.ReportAllocs() + for i := 0; i < bench.N; i++ { + if _, err := ExpandPostings(Intersect(its...)); err != nil { + bench.Fatal(err) + } + } + }) } func TestMultiMerge(t *testing.T) {