...
 
Commits (5)
  • Seebs's avatar
    enable instrumentation of de copies · 1a8c1fc6
    Seebs authored
    We had a local patch to improve performance under some
    workloads by reducing the frequency with which `de` objects
    get copied. This patch introduces a build-tag-guarded
    framework for enabling or disabling instrumentation of
    how many de objects are being copied; if it's not present,
    it's not used, but we still get at least an approximate
    sense of the performance of trivial Set operations for
    some workloads. If the build tag is added, we get numbers.
    1a8c1fc6
  • Seebs's avatar
    move more than one thing when overflowing · a479b9b5
    Seebs authored
    When overflowing one d into another, if we move only one object, we
    can end up performing a lot of extra copies; for instance, if we're
    moving things left, every time we move one item to the left, we
    end up copying all but one of the items in the current d -- which
    we already know is full when we're doing this. If we then want to
    insert another item, and copy to the left, we do it again.
    
    This patch adopts a balanced solution of trying to move up to
    half of the available items into the available space at a time.
    The actual number of items copied is usually roughly the same,
    but we do it significantly less frequently. As N gets large, and
    especially with large values of kd, this can become quite
    significant. The impact isn't huge when using interfaces,
    but it becomes more noticeable with concrete types. Using
    a `make generic` btree converted to explicitly use int keys
    and values:
    
    	Linear:
    	copied 1624997920 de for N=50000000, 165 ns/op =>
    	copied 549999648 de for N=100000000, 153 ns/op
    
    	Random:
    	copied 1148275965 de for N=20000000, 723 ns/op =>
    	copied 761915320 de for N=20000000, 656 ns/op
    a479b9b5
  • Seebs's avatar
    add some benchmark numbers for the change in overflow behavior · c5482817
    Seebs authored
    I have no idea why this changes performance of Next(), and even
    less why it changes performance of Next() but not Prev().
    c5482817
  • Seebs's avatar
    update authors/contributors · e6635026
    Seebs authored
    The actual logic was originally patched by Travis Turner
    at Pilosa, but he didn't feel it was enough work to merit
    a separate copyright/contribution acknowledgement.
    e6635026
  • Peter Seebach's avatar
    Merge branch 'seebs/decopy' into 'master' · be3cc8a0
    Peter Seebach authored
    Seebs/decopy
    
    See merge request !15
    be3cc8a0
......@@ -10,3 +10,4 @@
Jan Mercl <0xjnml@gmail.com>
Nexedi <jp@nexedi.com>
Peter Seebach <seebs@seebs.net>
......@@ -12,3 +12,4 @@ Dan Kortschak <dan.kortschak@adelaide.edu.au>
Jan Mercl <0xjnml@gmail.com>
Kirill Smelkov <kirr@nexedi.com>
Nikifor Seryakov <nikandfor@gmail.com>
Peter Seebach <seebs@seebs.net>
......@@ -9,6 +9,7 @@ import (
"fmt"
"io"
"math"
"math/rand"
"path"
"runtime"
"runtime/debug"
......@@ -153,6 +154,22 @@ func cmp(a, b interface{}) int {
return a.(int) - b.(int)
}
func copyBenchmark(fn func(int) int) func(b *testing.B) {
return func(b *testing.B) {
r := TreeNew(cmp)
for i := 0; i < b.N; i++ {
j := fn(i)
r.Set(j, j)
}
b.Logf("copied %d de for N=%d\n", r.countCopies(), b.N)
}
}
func BenchmarkDeCopies(b *testing.B) {
b.Run("Linear", copyBenchmark(func(i int) int { return i }))
b.Run("Random", copyBenchmark(func(i int) int { return int(rand.Int63()) }))
}
func TestGet0(t *testing.T) {
r := TreeNew(cmp)
if g, e := r.Len(), 0; g != e {
......
......@@ -62,6 +62,7 @@ type (
d [2*kd + 1]de
n *d
p *d
dTree
}
de struct { // d element
......@@ -95,6 +96,7 @@ type (
last *d
r interface{}
ver int64
treeInst
}
xe struct { // x element
......@@ -179,6 +181,7 @@ func (q *x) siblings(i int) (l, r *d) {
// -------------------------------------------------------------------------- d
func (l *d) mvL(r *d, c int) {
r.didCopy(r.c)
copy(l.d[l.c:], r.d[:c])
copy(r.d[:], r.d[c:r.c])
l.c += c
......@@ -186,6 +189,7 @@ func (l *d) mvL(r *d, c int) {
}
func (l *d) mvR(r *d, c int) {
l.didCopy(r.c + c)
copy(r.d[c:], r.d[:r.c])
copy(r.d[:c], l.d[l.c-c:])
r.c += c
......@@ -336,6 +340,7 @@ func (t *Tree) extract(q *d, i int) { // (r interface{} /*V*/) {
//r = q.d[i].v // prepared for Extract
q.c--
if i < q.c {
t.didCopy(q.c - i)
copy(q.d[i:], q.d[i+1:q.c+1])
}
q.d[q.c] = zde // GC
......@@ -418,8 +423,10 @@ func (t *Tree) Get(k interface{} /*K*/) (v interface{} /*V*/, ok bool) {
func (t *Tree) insert(q *d, i int, k interface{} /*K*/, v interface{} /*V*/) *d {
t.ver++
q.setTree(t)
c := q.c
if i < c {
t.didCopy(c - i)
copy(q.d[i+1:], q.d[i:c])
}
c++
......@@ -448,16 +455,70 @@ func (t *Tree) overflow(p *x, q *d, pi, i int, k interface{} /*K*/, v interface{
t.ver++
l, r := p.siblings(pi)
// s is the number of items to shift out of the full data container to
// allow for the new data item. This logic shifts by half the available
// space plus one. In the case where the new item is to be inserted within
// the calculated shift space, s is reduced to include only the
// data items up to the index of the new data item.
//
// This is more useful as kd gets larger, but only benefits inserts. It
// has no effect on gets, but may marginally affect Next performance
// (but not Prev). Omitted the 1e3/1.4 cases because they had >10%
// variance. The performance differences may be more noticeable if you
// have a non-interface key type, because that will generally be much
// faster overall, making the copies are a bigger part of the workload.
// kd=32
// name old time/op new time/op delta
// DeCopies/Linear-8 281ns ± 4% 264ns ± 7% -5.99% (p=0.032 n=5+5)
// DeCopies/Random-8 1.08µs ± 7% 1.07µs ± 5% ~ (p=0.841 n=5+5)
// SetSeq1e5-8 19.9ms ± 2% 17.9ms ± 5% -9.81% (p=0.008 n=5+5)
// SetSeq1e6-8 230ms ± 2% 211ms ± 7% -8.52% (p=0.016 n=5+5)
// SetRnd1e5-8 44.7ms ± 1% 41.4ms ± 4% -7.51% (p=0.008 n=5+5)
// SetRnd1e6-8 827ms ± 2% 762ms ± 2% -7.92% (p=0.008 n=5+5)
// Next1e5-8 774µs ± 2% 763µs ± 1% ~ (p=0.222 n=5+5)
// Next1e6-8 7.89ms ± 2% 7.77ms ± 2% -1.54% (p=0.008 n=5+5)
// kd=128
// name old time/op new time/op delta
// DeCopies/Linear-8 304ns ± 2% 266ns ± 7% -12.31% (p=0.008 n=5+5)
// DeCopies/Random-8 1.28µs ± 2% 1.13µs ± 5% -11.11% (p=0.008 n=5+5)
// SetSeq1e5-8 22.9ms ± 3% 18.1ms ± 9% -20.98% (p=0.008 n=5+5)
// SetSeq1e6-8 257ms ± 2% 201ms ± 3% -21.81% (p=0.008 n=5+5)
// SetRnd1e5-8 56.0ms ± 0% 49.6ms ± 3% -11.41% (p=0.008 n=5+5)
// SetRnd1e6-8 1.13s ± 3% 1.04s ± 8% -7.62% (p=0.032 n=5+5)
// Next1e5-8 714µs ± 2% 755µs ± 2% +5.77% (p=0.008 n=5+5)
// Next1e6-8 7.48ms ± 1% 7.85ms ± 2% +4.95% (p=0.008 n=5+5)
// kd=256
// name old time/op new time/op delta
// DeCopies/Linear-8 350ns ± 1% 255ns ± 7% -27.16% (p=0.008 n=5+5)
// DeCopies/Random-8 1.61µs ± 1% 1.32µs ± 7% -18.28% (p=0.008 n=5+5)
// SetSeq1e5-8 27.7ms ± 2% 17.6ms ± 5% -36.60% (p=0.008 n=5+5)
// SetSeq1e6-8 302ms ± 2% 196ms ± 1% -35.21% (p=0.008 n=5+5)
// SetRnd1e5-8 74.9ms ± 2% 58.9ms ± 2% -21.43% (p=0.008 n=5+5)
// SetRnd1e6-8 1.43s ± 0% 1.19s ± 2% -16.45% (p=0.008 n=5+5)
// Next1e5-8 717µs ± 2% 754µs ± 2% +5.21% (p=0.008 n=5+5)
// Next1e6-8 7.40ms ± 2% 7.77ms ± 1% +4.99% (p=0.008 n=5+5)
if l != nil && l.c < 2*kd && i != 0 {
l.mvL(q, 1)
t.insert(q, i-1, k, v)
s := (2*kd-l.c)/2 + 1 // half plus one
if i < s {
s = i
}
l.mvL(q, s)
t.insert(q, i-s, k, v)
p.x[pi-1].k = q.d[0].k
return
}
if r != nil && r.c < 2*kd {
if i < 2*kd {
q.mvR(r, 1)
s := (2*kd-r.c)/2 + 1 // half plus one
if 2*kd-i < s {
s = 2*kd - i
}
q.mvR(r, s)
t.insert(q, i, k, v)
p.x[pi].k = r.d[0].k
return
......@@ -661,6 +722,7 @@ func (t *Tree) Put(k interface{} /*K*/, upd func(oldV interface{} /*V*/, exists
func (t *Tree) split(p *x, q *d, pi, i int, k interface{} /*K*/, v interface{} /*V*/) {
t.ver++
r := btDPool.Get().(*d)
r.setTree(t)
if q.n != nil {
r.n = q.n
r.n.p = r
......@@ -670,6 +732,7 @@ func (t *Tree) split(p *x, q *d, pi, i int, k interface{} /*K*/, v interface{} /
q.n = r
r.p = q
t.didCopy(kd)
copy(r.d[:], q.d[kd:2*kd])
for i := range q.d[kd:] {
q.d[kd+i] = zde
......
// +build b.instrumentation
package b
type dTree struct {
t *Tree
}
type treeInst struct {
deCopied int64
}
func (t *Tree) didCopy(n int) {
t.deCopied += int64(n)
}
func (d *d) didCopy(n int) {
d.t.deCopied += int64(n)
}
func (t *Tree) countCopies() int64 {
return t.deCopied
}
func (d *d) setTree(t *Tree) {
d.t = t
}
// +build !b.instrumentation
package b
type dTree struct {
}
type treeInst struct {
}
func (t *Tree) didCopy(n int) {
}
func (d *d) didCopy(n int) {
}
func (t *Tree) countCopies() int64 {
return 0
}
func (d *d) setTree(t *Tree) {
}