Browse Source

Improvement on postings intersection (#616)

* improvement on postings intersection

Signed-off-by: naivewong <867245430@qq.com>
pull/5805/head
naivewong 6 years ago committed by Krasi Georgiev
parent
commit
6ab483071a
  1. 64
      index/postings.go
  2. 104
      index/postings_test.go

64
index/postings.go

@ -303,68 +303,68 @@ func Intersect(its ...Postings) Postings {
if len(its) == 1 { if len(its) == 1 {
return its[0] return its[0]
} }
for _, p := range its {
l := len(its) / 2 if p == EmptyPostings() {
a := Intersect(its[:l]...) return EmptyPostings()
b := Intersect(its[l:]...) }
if a == EmptyPostings() || b == EmptyPostings() {
return EmptyPostings()
} }
return newIntersectPostings(a, b)
return newIntersectPostings(its...)
} }
type intersectPostings struct { type intersectPostings struct {
a, b Postings arr []Postings
cur uint64 cur uint64
} }
func newIntersectPostings(a, b Postings) *intersectPostings { func newIntersectPostings(its ...Postings) *intersectPostings {
return &intersectPostings{a: a, b: b} return &intersectPostings{arr: its}
} }
func (it *intersectPostings) At() uint64 { func (it *intersectPostings) At() uint64 {
return it.cur return it.cur
} }
func (it *intersectPostings) doNext(id uint64) bool { func (it *intersectPostings) doNext() bool {
Loop:
for { for {
if !it.b.Seek(id) { for _, p := range it.arr {
return false if !p.Seek(it.cur) {
}
if vb := it.b.At(); vb != id {
if !it.a.Seek(vb) {
return false return false
} }
id = it.a.At() if p.At() > it.cur {
if vb != id { it.cur = p.At()
continue continue Loop
} }
} }
it.cur = id
return true return true
} }
} }
func (it *intersectPostings) Next() bool { func (it *intersectPostings) Next() bool {
if !it.a.Next() { for _, p := range it.arr {
return false if !p.Next() {
return false
}
if p.At() > it.cur {
it.cur = p.At()
}
} }
return it.doNext(it.a.At()) return it.doNext()
} }
func (it *intersectPostings) Seek(id uint64) bool { func (it *intersectPostings) Seek(id uint64) bool {
if !it.a.Seek(id) { it.cur = id
return false return it.doNext()
}
return it.doNext(it.a.At())
} }
func (it *intersectPostings) Err() error { func (it *intersectPostings) Err() error {
if it.a.Err() != nil { for _, p := range it.arr {
return it.a.Err() if p.Err() != nil {
return p.Err()
}
} }
return it.b.Err() return nil
} }
// Merge returns a new iterator over the union of the input iterators. // Merge returns a new iterator over the union of the input iterators.

104
index/postings_test.go

@ -221,36 +221,90 @@ func TestMultiIntersect(t *testing.T) {
} }
func BenchmarkIntersect(t *testing.B) { func BenchmarkIntersect(t *testing.B) {
var a, b, c, d []uint64 t.Run("LongPostings1", func(bench *testing.B) {
var a, b, c, d []uint64
for i := 0; i < 10000000; i += 2 { for i := 0; i < 10000000; i += 2 {
a = append(a, uint64(i)) a = append(a, uint64(i))
} }
for i := 5000000; i < 5000100; i += 4 { for i := 5000000; i < 5000100; i += 4 {
b = append(b, uint64(i)) b = append(b, uint64(i))
} }
for i := 5090000; i < 5090600; i += 4 { for i := 5090000; i < 5090600; i += 4 {
b = append(b, uint64(i)) b = append(b, uint64(i))
} }
for i := 4990000; i < 5100000; i++ { for i := 4990000; i < 5100000; i++ {
c = append(c, uint64(i)) c = append(c, uint64(i))
} }
for i := 4000000; i < 6000000; i++ { for i := 4000000; i < 6000000; i++ {
d = append(d, uint64(i)) d = append(d, uint64(i))
} }
i1 := newListPostings(a...) i1 := newListPostings(a...)
i2 := newListPostings(b...) i2 := newListPostings(b...)
i3 := newListPostings(c...) i3 := newListPostings(c...)
i4 := newListPostings(d...) i4 := newListPostings(d...)
bench.ResetTimer()
bench.ReportAllocs()
for i := 0; i < bench.N; i++ {
if _, err := ExpandPostings(Intersect(i1, i2, i3, i4)); err != nil {
bench.Fatal(err)
}
}
})
t.ResetTimer() t.Run("LongPostings2", func(bench *testing.B) {
var a, b, c, d []uint64
for i := 0; i < t.N; i++ { for i := 0; i < 12500000; i++ {
if _, err := ExpandPostings(Intersect(i1, i2, i3, i4)); err != nil { a = append(a, uint64(i))
t.Fatal(err)
} }
} for i := 7500000; i < 12500000; i++ {
b = append(b, uint64(i))
}
for i := 9000000; i < 20000000; i++ {
c = append(c, uint64(i))
}
for i := 10000000; i < 12000000; i++ {
d = append(d, uint64(i))
}
i1 := newListPostings(a...)
i2 := newListPostings(b...)
i3 := newListPostings(c...)
i4 := newListPostings(d...)
bench.ResetTimer()
bench.ReportAllocs()
for i := 0; i < bench.N; i++ {
if _, err := ExpandPostings(Intersect(i1, i2, i3, i4)); err != nil {
bench.Fatal(err)
}
}
})
// Many matchers(k >> n).
t.Run("ManyPostings", func(bench *testing.B) {
var its []Postings
// 100000 matchers(k=100000).
for i := 0; i < 100000; i++ {
var temp []uint64
for j := 1; j < 100; j++ {
temp = append(temp, uint64(j))
}
its = append(its, newListPostings(temp...))
}
bench.ResetTimer()
bench.ReportAllocs()
for i := 0; i < bench.N; i++ {
if _, err := ExpandPostings(Intersect(its...)); err != nil {
bench.Fatal(err)
}
}
})
} }
func TestMultiMerge(t *testing.T) { func TestMultiMerge(t *testing.T) {

Loading…
Cancel
Save